mirror of
https://github.com/simstudioai/sim.git
synced 2026-04-28 03:00:29 -04:00
v0.6.36: new chunkers, sockets state machine, google sheets/drive/calendar triggers, docs updates, integrations/models pages improvements
This commit is contained in:
@@ -1,17 +1,17 @@
|
||||
---
|
||||
description: Create webhook triggers for a Sim integration using the generic trigger builder
|
||||
description: Create webhook or polling triggers for a Sim integration
|
||||
argument-hint: <service-name>
|
||||
---
|
||||
|
||||
# Add Trigger
|
||||
|
||||
You are an expert at creating webhook triggers for Sim. You understand the trigger system, the generic `buildTriggerSubBlocks` helper, and how triggers connect to blocks.
|
||||
You are an expert at creating webhook and polling triggers for Sim. You understand the trigger system, the generic `buildTriggerSubBlocks` helper, polling infrastructure, and how triggers connect to blocks.
|
||||
|
||||
## Your Task
|
||||
|
||||
1. Research what webhook events the service supports
|
||||
2. Create the trigger files using the generic builder
|
||||
3. Create a provider handler if custom auth, formatting, or subscriptions are needed
|
||||
1. Research what webhook events the service supports — if the service lacks reliable webhooks, use polling
|
||||
2. Create the trigger files using the generic builder (webhook) or manual config (polling)
|
||||
3. Create a provider handler (webhook) or polling handler (polling)
|
||||
4. Register triggers and connect them to the block
|
||||
|
||||
## Directory Structure
|
||||
@@ -146,23 +146,37 @@ export const TRIGGER_REGISTRY: TriggerRegistry = {
|
||||
|
||||
### Block file (`apps/sim/blocks/blocks/{service}.ts`)
|
||||
|
||||
Wire triggers into the block so the trigger UI appears and `generate-docs.ts` discovers them. Two changes are needed:
|
||||
|
||||
1. **Spread trigger subBlocks** at the end of the block's `subBlocks` array
|
||||
2. **Add `triggers` property** after `outputs` with `enabled: true` and `available: [...]`
|
||||
|
||||
```typescript
|
||||
import { getTrigger } from '@/triggers'
|
||||
|
||||
export const {Service}Block: BlockConfig = {
|
||||
// ...
|
||||
triggers: {
|
||||
enabled: true,
|
||||
available: ['{service}_event_a', '{service}_event_b'],
|
||||
},
|
||||
subBlocks: [
|
||||
// Regular tool subBlocks first...
|
||||
...getTrigger('{service}_event_a').subBlocks,
|
||||
...getTrigger('{service}_event_b').subBlocks,
|
||||
],
|
||||
// ... tools, inputs, outputs ...
|
||||
triggers: {
|
||||
enabled: true,
|
||||
available: ['{service}_event_a', '{service}_event_b'],
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
**Versioned blocks (V1 + V2):** Many integrations have a hidden V1 block and a visible V2 block. Where you add the trigger wiring depends on how V2 inherits from V1:
|
||||
|
||||
- **V2 uses `...V1Block` spread** (e.g., Google Calendar): Add trigger to V1 — V2 inherits both `subBlocks` and `triggers` automatically.
|
||||
- **V2 defines its own `subBlocks`** (e.g., Google Sheets): Add trigger to V2 (the visible block). V1 is hidden and doesn't need it.
|
||||
- **Single block, no V2** (e.g., Google Drive): Add trigger directly.
|
||||
|
||||
`generate-docs.ts` deduplicates by base type (first match wins). If V1 is processed first without triggers, the V2 triggers won't appear in `integrations.json`. Always verify by checking the output after running the script.
|
||||
|
||||
## Provider Handler
|
||||
|
||||
All provider-specific webhook logic lives in a single handler file: `apps/sim/lib/webhooks/providers/{service}.ts`.
|
||||
@@ -327,6 +341,122 @@ export function buildOutputs(): Record<string, TriggerOutput> {
|
||||
}
|
||||
```
|
||||
|
||||
## Polling Triggers
|
||||
|
||||
Use polling when the service lacks reliable webhooks (e.g., Google Sheets, Google Drive, Google Calendar, Gmail, RSS, IMAP). Polling triggers do NOT use `buildTriggerSubBlocks` — they define subBlocks manually.
|
||||
|
||||
### Directory Structure
|
||||
|
||||
```
|
||||
apps/sim/triggers/{service}/
|
||||
├── index.ts # Barrel export
|
||||
└── poller.ts # TriggerConfig with polling: true
|
||||
|
||||
apps/sim/lib/webhooks/polling/
|
||||
└── {service}.ts # PollingProviderHandler implementation
|
||||
```
|
||||
|
||||
### Polling Handler (`apps/sim/lib/webhooks/polling/{service}.ts`)
|
||||
|
||||
```typescript
|
||||
import { pollingIdempotency } from '@/lib/core/idempotency/service'
|
||||
import type { PollingProviderHandler, PollWebhookContext } from '@/lib/webhooks/polling/types'
|
||||
import { markWebhookFailed, markWebhookSuccess, resolveOAuthCredential, updateWebhookProviderConfig } from '@/lib/webhooks/polling/utils'
|
||||
import { processPolledWebhookEvent } from '@/lib/webhooks/processor'
|
||||
|
||||
export const {service}PollingHandler: PollingProviderHandler = {
|
||||
provider: '{service}',
|
||||
label: '{Service}',
|
||||
|
||||
async pollWebhook(ctx: PollWebhookContext): Promise<'success' | 'failure'> {
|
||||
const { webhookData, workflowData, requestId, logger } = ctx
|
||||
const webhookId = webhookData.id
|
||||
|
||||
try {
|
||||
// For OAuth services:
|
||||
const accessToken = await resolveOAuthCredential(webhookData, '{service}', requestId, logger)
|
||||
const config = webhookData.providerConfig as unknown as {Service}WebhookConfig
|
||||
|
||||
// First poll: seed state, emit nothing
|
||||
if (!config.lastCheckedTimestamp) {
|
||||
await updateWebhookProviderConfig(webhookId, { lastCheckedTimestamp: new Date().toISOString() }, logger)
|
||||
await markWebhookSuccess(webhookId, logger)
|
||||
return 'success'
|
||||
}
|
||||
|
||||
// Fetch changes since last poll, process with idempotency
|
||||
// ...
|
||||
|
||||
await markWebhookSuccess(webhookId, logger)
|
||||
return 'success'
|
||||
} catch (error) {
|
||||
logger.error(`[${requestId}] Error processing {service} webhook ${webhookId}:`, error)
|
||||
await markWebhookFailed(webhookId, logger)
|
||||
return 'failure'
|
||||
}
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
**Key patterns:**
|
||||
- First poll seeds state and emits nothing (avoids flooding with existing data)
|
||||
- Use `pollingIdempotency.executeWithIdempotency(provider, key, callback)` for dedup
|
||||
- Use `processPolledWebhookEvent(webhookData, workflowData, payload, requestId)` to fire the workflow
|
||||
- Use `updateWebhookProviderConfig(webhookId, partialConfig, logger)` for read-merge-write on state
|
||||
- Use the latest server-side timestamp from API responses (not wall clock) to avoid clock skew
|
||||
|
||||
### Trigger Config (`apps/sim/triggers/{service}/poller.ts`)
|
||||
|
||||
```typescript
|
||||
import { {Service}Icon } from '@/components/icons'
|
||||
import type { TriggerConfig } from '@/triggers/types'
|
||||
|
||||
export const {service}PollingTrigger: TriggerConfig = {
|
||||
id: '{service}_poller',
|
||||
name: '{Service} Trigger',
|
||||
provider: '{service}',
|
||||
description: 'Triggers when ...',
|
||||
version: '1.0.0',
|
||||
icon: {Service}Icon,
|
||||
polling: true, // REQUIRED — routes to polling infrastructure
|
||||
|
||||
subBlocks: [
|
||||
{ id: 'triggerCredentials', type: 'oauth-input', title: 'Credentials', serviceId: '{service}', requiredScopes: [], required: true, mode: 'trigger', supportsCredentialSets: true },
|
||||
// ... service-specific config fields (dropdowns, inputs, switches) ...
|
||||
{ id: 'triggerSave', type: 'trigger-save', title: '', hideFromPreview: true, mode: 'trigger', triggerId: '{service}_poller' },
|
||||
{ id: 'triggerInstructions', type: 'text', title: 'Setup Instructions', hideFromPreview: true, mode: 'trigger', defaultValue: '...' },
|
||||
],
|
||||
|
||||
outputs: {
|
||||
// Must match the payload shape from processPolledWebhookEvent
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
### Registration (3 places)
|
||||
|
||||
1. **`apps/sim/triggers/constants.ts`** — add provider to `POLLING_PROVIDERS` Set
|
||||
2. **`apps/sim/lib/webhooks/polling/registry.ts`** — import handler, add to `POLLING_HANDLERS`
|
||||
3. **`apps/sim/triggers/registry.ts`** — import trigger config, add to `TRIGGER_REGISTRY`
|
||||
|
||||
### Helm Cron Job
|
||||
|
||||
Add to `helm/sim/values.yaml` under the existing polling cron jobs:
|
||||
|
||||
```yaml
|
||||
{service}WebhookPoll:
|
||||
schedule: "*/1 * * * *"
|
||||
concurrencyPolicy: Forbid
|
||||
url: "http://sim:3000/api/webhooks/poll/{service}"
|
||||
```
|
||||
|
||||
### Reference Implementations
|
||||
|
||||
- Simple: `apps/sim/lib/webhooks/polling/rss.ts` + `apps/sim/triggers/rss/poller.ts`
|
||||
- Complex (OAuth, attachments): `apps/sim/lib/webhooks/polling/gmail.ts` + `apps/sim/triggers/gmail/poller.ts`
|
||||
- Cursor-based (changes API): `apps/sim/lib/webhooks/polling/google-drive.ts`
|
||||
- Timestamp-based: `apps/sim/lib/webhooks/polling/google-calendar.ts`
|
||||
|
||||
## Checklist
|
||||
|
||||
### Trigger Definition
|
||||
@@ -352,7 +482,18 @@ export function buildOutputs(): Record<string, TriggerOutput> {
|
||||
- [ ] NO changes to `route.ts`, `provider-subscriptions.ts`, or `deploy.ts`
|
||||
- [ ] API key field uses `password: true`
|
||||
|
||||
### Polling Trigger (if applicable)
|
||||
- [ ] Handler implements `PollingProviderHandler` at `lib/webhooks/polling/{service}.ts`
|
||||
- [ ] Trigger config has `polling: true` and defines subBlocks manually (no `buildTriggerSubBlocks`)
|
||||
- [ ] Provider string matches across: trigger config, handler, `POLLING_PROVIDERS`, polling registry
|
||||
- [ ] `triggerSave` subBlock `triggerId` matches trigger config `id`
|
||||
- [ ] First poll seeds state and emits nothing
|
||||
- [ ] Added provider to `POLLING_PROVIDERS` in `triggers/constants.ts`
|
||||
- [ ] Added handler to `POLLING_HANDLERS` in `lib/webhooks/polling/registry.ts`
|
||||
- [ ] Added cron job to `helm/sim/values.yaml`
|
||||
- [ ] Payload shape matches trigger `outputs` schema
|
||||
|
||||
### Testing
|
||||
- [ ] `bun run type-check` passes
|
||||
- [ ] Manually verify `formatInput` output keys match trigger `outputs` keys
|
||||
- [ ] Manually verify output keys match trigger `outputs` keys
|
||||
- [ ] Trigger UI shows correctly in the block
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
# Add Trigger
|
||||
|
||||
You are an expert at creating webhook triggers for Sim. You understand the trigger system, the generic `buildTriggerSubBlocks` helper, and how triggers connect to blocks.
|
||||
You are an expert at creating webhook and polling triggers for Sim. You understand the trigger system, the generic `buildTriggerSubBlocks` helper, polling infrastructure, and how triggers connect to blocks.
|
||||
|
||||
## Your Task
|
||||
|
||||
1. Research what webhook events the service supports
|
||||
2. Create the trigger files using the generic builder
|
||||
3. Create a provider handler if custom auth, formatting, or subscriptions are needed
|
||||
1. Research what webhook events the service supports — if the service lacks reliable webhooks, use polling
|
||||
2. Create the trigger files using the generic builder (webhook) or manual config (polling)
|
||||
3. Create a provider handler (webhook) or polling handler (polling)
|
||||
4. Register triggers and connect them to the block
|
||||
|
||||
## Directory Structure
|
||||
@@ -141,23 +141,37 @@ export const TRIGGER_REGISTRY: TriggerRegistry = {
|
||||
|
||||
### Block file (`apps/sim/blocks/blocks/{service}.ts`)
|
||||
|
||||
Wire triggers into the block so the trigger UI appears and `generate-docs.ts` discovers them. Two changes are needed:
|
||||
|
||||
1. **Spread trigger subBlocks** at the end of the block's `subBlocks` array
|
||||
2. **Add `triggers` property** after `outputs` with `enabled: true` and `available: [...]`
|
||||
|
||||
```typescript
|
||||
import { getTrigger } from '@/triggers'
|
||||
|
||||
export const {Service}Block: BlockConfig = {
|
||||
// ...
|
||||
triggers: {
|
||||
enabled: true,
|
||||
available: ['{service}_event_a', '{service}_event_b'],
|
||||
},
|
||||
subBlocks: [
|
||||
// Regular tool subBlocks first...
|
||||
...getTrigger('{service}_event_a').subBlocks,
|
||||
...getTrigger('{service}_event_b').subBlocks,
|
||||
],
|
||||
// ... tools, inputs, outputs ...
|
||||
triggers: {
|
||||
enabled: true,
|
||||
available: ['{service}_event_a', '{service}_event_b'],
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
**Versioned blocks (V1 + V2):** Many integrations have a hidden V1 block and a visible V2 block. Where you add the trigger wiring depends on how V2 inherits from V1:
|
||||
|
||||
- **V2 uses `...V1Block` spread** (e.g., Google Calendar): Add trigger to V1 — V2 inherits both `subBlocks` and `triggers` automatically.
|
||||
- **V2 defines its own `subBlocks`** (e.g., Google Sheets): Add trigger to V2 (the visible block). V1 is hidden and doesn't need it.
|
||||
- **Single block, no V2** (e.g., Google Drive): Add trigger directly.
|
||||
|
||||
`generate-docs.ts` deduplicates by base type (first match wins). If V1 is processed first without triggers, the V2 triggers won't appear in `integrations.json`. Always verify by checking the output after running the script.
|
||||
|
||||
## Provider Handler
|
||||
|
||||
All provider-specific webhook logic lives in a single handler file: `apps/sim/lib/webhooks/providers/{service}.ts`.
|
||||
@@ -322,6 +336,122 @@ export function buildOutputs(): Record<string, TriggerOutput> {
|
||||
}
|
||||
```
|
||||
|
||||
## Polling Triggers
|
||||
|
||||
Use polling when the service lacks reliable webhooks (e.g., Google Sheets, Google Drive, Google Calendar, Gmail, RSS, IMAP). Polling triggers do NOT use `buildTriggerSubBlocks` — they define subBlocks manually.
|
||||
|
||||
### Directory Structure
|
||||
|
||||
```
|
||||
apps/sim/triggers/{service}/
|
||||
├── index.ts # Barrel export
|
||||
└── poller.ts # TriggerConfig with polling: true
|
||||
|
||||
apps/sim/lib/webhooks/polling/
|
||||
└── {service}.ts # PollingProviderHandler implementation
|
||||
```
|
||||
|
||||
### Polling Handler (`apps/sim/lib/webhooks/polling/{service}.ts`)
|
||||
|
||||
```typescript
|
||||
import { pollingIdempotency } from '@/lib/core/idempotency/service'
|
||||
import type { PollingProviderHandler, PollWebhookContext } from '@/lib/webhooks/polling/types'
|
||||
import { markWebhookFailed, markWebhookSuccess, resolveOAuthCredential, updateWebhookProviderConfig } from '@/lib/webhooks/polling/utils'
|
||||
import { processPolledWebhookEvent } from '@/lib/webhooks/processor'
|
||||
|
||||
export const {service}PollingHandler: PollingProviderHandler = {
|
||||
provider: '{service}',
|
||||
label: '{Service}',
|
||||
|
||||
async pollWebhook(ctx: PollWebhookContext): Promise<'success' | 'failure'> {
|
||||
const { webhookData, workflowData, requestId, logger } = ctx
|
||||
const webhookId = webhookData.id
|
||||
|
||||
try {
|
||||
// For OAuth services:
|
||||
const accessToken = await resolveOAuthCredential(webhookData, '{service}', requestId, logger)
|
||||
const config = webhookData.providerConfig as unknown as {Service}WebhookConfig
|
||||
|
||||
// First poll: seed state, emit nothing
|
||||
if (!config.lastCheckedTimestamp) {
|
||||
await updateWebhookProviderConfig(webhookId, { lastCheckedTimestamp: new Date().toISOString() }, logger)
|
||||
await markWebhookSuccess(webhookId, logger)
|
||||
return 'success'
|
||||
}
|
||||
|
||||
// Fetch changes since last poll, process with idempotency
|
||||
// ...
|
||||
|
||||
await markWebhookSuccess(webhookId, logger)
|
||||
return 'success'
|
||||
} catch (error) {
|
||||
logger.error(`[${requestId}] Error processing {service} webhook ${webhookId}:`, error)
|
||||
await markWebhookFailed(webhookId, logger)
|
||||
return 'failure'
|
||||
}
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
**Key patterns:**
|
||||
- First poll seeds state and emits nothing (avoids flooding with existing data)
|
||||
- Use `pollingIdempotency.executeWithIdempotency(provider, key, callback)` for dedup
|
||||
- Use `processPolledWebhookEvent(webhookData, workflowData, payload, requestId)` to fire the workflow
|
||||
- Use `updateWebhookProviderConfig(webhookId, partialConfig, logger)` for read-merge-write on state
|
||||
- Use the latest server-side timestamp from API responses (not wall clock) to avoid clock skew
|
||||
|
||||
### Trigger Config (`apps/sim/triggers/{service}/poller.ts`)
|
||||
|
||||
```typescript
|
||||
import { {Service}Icon } from '@/components/icons'
|
||||
import type { TriggerConfig } from '@/triggers/types'
|
||||
|
||||
export const {service}PollingTrigger: TriggerConfig = {
|
||||
id: '{service}_poller',
|
||||
name: '{Service} Trigger',
|
||||
provider: '{service}',
|
||||
description: 'Triggers when ...',
|
||||
version: '1.0.0',
|
||||
icon: {Service}Icon,
|
||||
polling: true, // REQUIRED — routes to polling infrastructure
|
||||
|
||||
subBlocks: [
|
||||
{ id: 'triggerCredentials', type: 'oauth-input', title: 'Credentials', serviceId: '{service}', requiredScopes: [], required: true, mode: 'trigger', supportsCredentialSets: true },
|
||||
// ... service-specific config fields (dropdowns, inputs, switches) ...
|
||||
{ id: 'triggerSave', type: 'trigger-save', title: '', hideFromPreview: true, mode: 'trigger', triggerId: '{service}_poller' },
|
||||
{ id: 'triggerInstructions', type: 'text', title: 'Setup Instructions', hideFromPreview: true, mode: 'trigger', defaultValue: '...' },
|
||||
],
|
||||
|
||||
outputs: {
|
||||
// Must match the payload shape from processPolledWebhookEvent
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
### Registration (3 places)
|
||||
|
||||
1. **`apps/sim/triggers/constants.ts`** — add provider to `POLLING_PROVIDERS` Set
|
||||
2. **`apps/sim/lib/webhooks/polling/registry.ts`** — import handler, add to `POLLING_HANDLERS`
|
||||
3. **`apps/sim/triggers/registry.ts`** — import trigger config, add to `TRIGGER_REGISTRY`
|
||||
|
||||
### Helm Cron Job
|
||||
|
||||
Add to `helm/sim/values.yaml` under the existing polling cron jobs:
|
||||
|
||||
```yaml
|
||||
{service}WebhookPoll:
|
||||
schedule: "*/1 * * * *"
|
||||
concurrencyPolicy: Forbid
|
||||
url: "http://sim:3000/api/webhooks/poll/{service}"
|
||||
```
|
||||
|
||||
### Reference Implementations
|
||||
|
||||
- Simple: `apps/sim/lib/webhooks/polling/rss.ts` + `apps/sim/triggers/rss/poller.ts`
|
||||
- Complex (OAuth, attachments): `apps/sim/lib/webhooks/polling/gmail.ts` + `apps/sim/triggers/gmail/poller.ts`
|
||||
- Cursor-based (changes API): `apps/sim/lib/webhooks/polling/google-drive.ts`
|
||||
- Timestamp-based: `apps/sim/lib/webhooks/polling/google-calendar.ts`
|
||||
|
||||
## Checklist
|
||||
|
||||
### Trigger Definition
|
||||
@@ -347,7 +477,18 @@ export function buildOutputs(): Record<string, TriggerOutput> {
|
||||
- [ ] NO changes to `route.ts`, `provider-subscriptions.ts`, or `deploy.ts`
|
||||
- [ ] API key field uses `password: true`
|
||||
|
||||
### Polling Trigger (if applicable)
|
||||
- [ ] Handler implements `PollingProviderHandler` at `lib/webhooks/polling/{service}.ts`
|
||||
- [ ] Trigger config has `polling: true` and defines subBlocks manually (no `buildTriggerSubBlocks`)
|
||||
- [ ] Provider string matches across: trigger config, handler, `POLLING_PROVIDERS`, polling registry
|
||||
- [ ] `triggerSave` subBlock `triggerId` matches trigger config `id`
|
||||
- [ ] First poll seeds state and emits nothing
|
||||
- [ ] Added provider to `POLLING_PROVIDERS` in `triggers/constants.ts`
|
||||
- [ ] Added handler to `POLLING_HANDLERS` in `lib/webhooks/polling/registry.ts`
|
||||
- [ ] Added cron job to `helm/sim/values.yaml`
|
||||
- [ ] Payload shape matches trigger `outputs` schema
|
||||
|
||||
### Testing
|
||||
- [ ] `bun run type-check` passes
|
||||
- [ ] Manually verify `formatInput` output keys match trigger `outputs` keys
|
||||
- [ ] Manually verify output keys match trigger `outputs` keys
|
||||
- [ ] Trigger UI shows correctly in the block
|
||||
|
||||
@@ -21,7 +21,17 @@ Verwenden Sie Ihre eigenen API-Schlüssel für KI-Modellanbieter anstelle der ge
|
||||
| OpenAI | Knowledge Base-Embeddings, Agent-Block |
|
||||
| Anthropic | Agent-Block |
|
||||
| Google | Agent-Block |
|
||||
| Mistral | Knowledge Base OCR |
|
||||
| Mistral | Knowledge Base OCR, Agent-Block |
|
||||
| Fireworks | Agent-Block |
|
||||
| Firecrawl | Web-Scraping, Crawling, Suche und Extraktion |
|
||||
| Exa | KI-gestützte Suche und Recherche |
|
||||
| Serper | Google-Such-API |
|
||||
| Linkup | Websuche und Inhaltsabruf |
|
||||
| Parallel AI | Websuche, Extraktion und tiefgehende Recherche |
|
||||
| Perplexity | KI-gestützter Chat und Websuche |
|
||||
| Jina AI | Web-Lesen und Suche |
|
||||
| Google Cloud | Translate, Maps, PageSpeed und Books APIs |
|
||||
| Brandfetch | Marken-Assets, Logos, Farben und Unternehmensinformationen |
|
||||
|
||||
### Einrichtung
|
||||
|
||||
|
||||
@@ -105,9 +105,108 @@ Die Modellaufschlüsselung zeigt:
|
||||
Die angezeigten Preise entsprechen den Tarifen vom 10. September 2025. Überprüfen Sie die Dokumentation der Anbieter für aktuelle Preise.
|
||||
</Callout>
|
||||
|
||||
## Gehostete Tool-Preise
|
||||
|
||||
Wenn Workflows Tool-Blöcke mit den gehosteten API-Schlüsseln von Sim verwenden, werden die Kosten pro Operation berechnet. Verwenden Sie Ihre eigenen Schlüssel über BYOK, um direkt an die Anbieter zu zahlen.
|
||||
|
||||
<Tabs items={['Firecrawl', 'Exa', 'Serper', 'Perplexity', 'Linkup', 'Parallel AI', 'Jina AI', 'Google Cloud', 'Brandfetch']}>
|
||||
<Tab>
|
||||
**Firecrawl** - Web-Scraping, Crawling, Suche und Extraktion
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Scrape | $0.001 per credit used |
|
||||
| Crawl | $0.001 per credit used |
|
||||
| Search | $0.001 per credit used |
|
||||
| Extract | $0.001 per credit used |
|
||||
| Map | $0.001 per credit used |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Exa** - KI-gestützte Suche und Recherche
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | Dynamic (returned by API) |
|
||||
| Get Contents | Dynamic (returned by API) |
|
||||
| Find Similar Links | Dynamic (returned by API) |
|
||||
| Answer | Dynamic (returned by API) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Serper** - Google-Such-API
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search (≤10 results) | $0.001 |
|
||||
| Search (>10 results) | $0.002 |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Perplexity** - KI-gestützter Chat und Websuche
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | $0.005 per request |
|
||||
| Chat | Token-based (varies by model) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Linkup** - Websuche und Inhaltsabruf
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Standard search | ~$0.006 |
|
||||
| Deep search | ~$0.055 |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Parallel AI** - Websuche, Extraktion und tiefgehende Recherche
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search (≤10 results) | $0.005 |
|
||||
| Search (>10 results) | $0.005 + $0.001 per additional result |
|
||||
| Extract | $0.001 per URL |
|
||||
| Deep Research | $0.005–$2.40 (varies by processor tier) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Jina AI** - Web-Lesen und Suche
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Read URL | $0.20 per 1M tokens |
|
||||
| Search | $0.20 per 1M tokens (minimum 10K tokens) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Google Cloud** - Translate, Maps, PageSpeed und Books APIs
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Translate / Detect | $0.00002 per character |
|
||||
| Maps (Geocode, Directions, Distance Matrix, Elevation, Timezone, Reverse Geocode, Geolocate, Validate Address) | $0.005 per request |
|
||||
| Maps (Snap to Roads) | $0.01 per request |
|
||||
| Maps (Place Details) | $0.017 per request |
|
||||
| Maps (Places Search) | $0.032 per request |
|
||||
| PageSpeed | Free |
|
||||
| Books (Search, Details) | Free |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Brandfetch** - Marken-Assets, Logos, Farben und Unternehmensinformationen
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | Free |
|
||||
| Get Brand | $0.04 per request |
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Bring Your Own Key (BYOK)
|
||||
|
||||
Sie können Ihre eigenen API-Schlüssel für gehostete Modelle (OpenAI, Anthropic, Google, Mistral) unter **Einstellungen → BYOK** verwenden, um Basispreise zu zahlen. Schlüssel werden verschlüsselt und gelten arbeitsbereichsweit.
|
||||
Sie können Ihre eigenen API-Schlüssel für unterstützte Anbieter (OpenAI, Anthropic, Google, Mistral, Fireworks, Firecrawl, Exa, Serper, Linkup, Parallel AI, Perplexity, Jina AI, Google Cloud, Brandfetch) unter **Einstellungen → BYOK** verwenden, um Basispreise zu zahlen. Schlüssel werden verschlüsselt und gelten arbeitsbereichsweit.
|
||||
|
||||
## Strategien zur Kostenoptimierung
|
||||
|
||||
|
||||
@@ -110,9 +110,108 @@ The model breakdown shows:
|
||||
Pricing shown reflects rates as of September 10, 2025. Check provider documentation for current pricing.
|
||||
</Callout>
|
||||
|
||||
## Hosted Tool Pricing
|
||||
|
||||
When workflows use tool blocks with Sim's hosted API keys, costs are charged per operation. Use your own keys via BYOK to pay providers directly instead.
|
||||
|
||||
<Tabs items={['Firecrawl', 'Exa', 'Serper', 'Perplexity', 'Linkup', 'Parallel AI', 'Jina AI', 'Google Cloud', 'Brandfetch']}>
|
||||
<Tab>
|
||||
**Firecrawl** - Web scraping, crawling, search, and extraction
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Scrape | $0.001 per credit used |
|
||||
| Crawl | $0.001 per credit used |
|
||||
| Search | $0.001 per credit used |
|
||||
| Extract | $0.001 per credit used |
|
||||
| Map | $0.001 per credit used |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Exa** - AI-powered search and research
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | Dynamic (returned by API) |
|
||||
| Get Contents | Dynamic (returned by API) |
|
||||
| Find Similar Links | Dynamic (returned by API) |
|
||||
| Answer | Dynamic (returned by API) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Serper** - Google search API
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search (≤10 results) | $0.001 |
|
||||
| Search (>10 results) | $0.002 |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Perplexity** - AI-powered chat and web search
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | $0.005 per request |
|
||||
| Chat | Token-based (varies by model) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Linkup** - Web search and content retrieval
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Standard search | ~$0.006 |
|
||||
| Deep search | ~$0.055 |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Parallel AI** - Web search, extraction, and deep research
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search (≤10 results) | $0.005 |
|
||||
| Search (>10 results) | $0.005 + $0.001 per additional result |
|
||||
| Extract | $0.001 per URL |
|
||||
| Deep Research | $0.005–$2.40 (varies by processor tier) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Jina AI** - Web reading and search
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Read URL | $0.20 per 1M tokens |
|
||||
| Search | $0.20 per 1M tokens (minimum 10K tokens) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Google Cloud** - Translate, Maps, PageSpeed, and Books APIs
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Translate / Detect | $0.00002 per character |
|
||||
| Maps (Geocode, Directions, Distance Matrix, Elevation, Timezone, Reverse Geocode, Geolocate, Validate Address) | $0.005 per request |
|
||||
| Maps (Snap to Roads) | $0.01 per request |
|
||||
| Maps (Place Details) | $0.017 per request |
|
||||
| Maps (Places Search) | $0.032 per request |
|
||||
| PageSpeed | Free |
|
||||
| Books (Search, Details) | Free |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Brandfetch** - Brand assets, logos, colors, and company info
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | Free |
|
||||
| Get Brand | $0.04 per request |
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Bring Your Own Key (BYOK)
|
||||
|
||||
Use your own API keys for AI model providers instead of Sim's hosted keys to pay base prices with no markup.
|
||||
Use your own API keys for supported providers instead of Sim's hosted keys to pay base prices with no markup.
|
||||
|
||||
### Supported Providers
|
||||
|
||||
@@ -121,7 +220,17 @@ Use your own API keys for AI model providers instead of Sim's hosted keys to pay
|
||||
| OpenAI | Knowledge Base embeddings, Agent block |
|
||||
| Anthropic | Agent block |
|
||||
| Google | Agent block |
|
||||
| Mistral | Knowledge Base OCR |
|
||||
| Mistral | Knowledge Base OCR, Agent block |
|
||||
| Fireworks | Agent block |
|
||||
| Firecrawl | Web scraping, crawling, search, and extraction |
|
||||
| Exa | AI-powered search and research |
|
||||
| Serper | Google search API |
|
||||
| Linkup | Web search and content retrieval |
|
||||
| Parallel AI | Web search, extraction, and deep research |
|
||||
| Perplexity | AI-powered chat and web search |
|
||||
| Jina AI | Web reading and search |
|
||||
| Google Cloud | Translate, Maps, PageSpeed, and Books APIs |
|
||||
| Brandfetch | Brand assets, logos, colors, and company info |
|
||||
|
||||
### Setup
|
||||
|
||||
@@ -152,20 +261,20 @@ Each voice session is billed when it starts. In deployed chat voice mode, each c
|
||||
|
||||
## Plans
|
||||
|
||||
Sim has two paid plan tiers — **Pro** and **Max**. Either can be used individually or with a team. Team plans pool credits across all seats in the organization.
|
||||
Sim has two paid plan tiers - **Pro** and **Max**. Either can be used individually or with a team. Team plans pool credits across all seats in the organization.
|
||||
|
||||
| Plan | Price | Credits Included | Daily Refresh |
|
||||
|------|-------|------------------|---------------|
|
||||
| **Community** | $0 | 1,000 (one-time) | — |
|
||||
| **Community** | $0 | 1,000 (one-time) | - |
|
||||
| **Pro** | $25/mo | 6,000/mo | +50/day |
|
||||
| **Max** | $100/mo | 25,000/mo | +200/day |
|
||||
| **Enterprise** | Custom | Custom | — |
|
||||
| **Enterprise** | Custom | Custom | - |
|
||||
|
||||
To use Pro or Max with a team, select **Get For Team** in subscription settings and choose the tier and number of seats. Credits are pooled across the organization at the per-seat rate (e.g. Max for Teams with 3 seats = 75,000 credits/mo pooled).
|
||||
|
||||
### Daily Refresh Credits
|
||||
|
||||
Paid plans include a small daily credit allowance that does not count toward your plan limit. Each day, usage up to the daily refresh amount is excluded from billable usage. This allowance resets every 24 hours and does not carry over — use it or lose it.
|
||||
Paid plans include a small daily credit allowance that does not count toward your plan limit. Each day, usage up to the daily refresh amount is excluded from billable usage. This allowance resets every 24 hours and does not carry over - use it or lose it.
|
||||
|
||||
| Plan | Daily Refresh |
|
||||
|------|---------------|
|
||||
@@ -252,7 +361,7 @@ Sim uses a **base subscription + overage** billing model:
|
||||
|
||||
### How It Works
|
||||
|
||||
**Pro Plan ($25/month — 6,000 credits):**
|
||||
**Pro Plan ($25/month - 6,000 credits):**
|
||||
- Monthly subscription includes 6,000 credits of usage
|
||||
- Usage under 6,000 credits → No additional charges
|
||||
- Usage over 6,000 credits (with on-demand enabled) → Pay the overage at month end
|
||||
|
||||
@@ -21,7 +21,17 @@ Usa tus propias claves API para proveedores de modelos de IA en lugar de las cla
|
||||
| OpenAI | Embeddings de base de conocimiento, bloque Agent |
|
||||
| Anthropic | Bloque Agent |
|
||||
| Google | Bloque Agent |
|
||||
| Mistral | OCR de base de conocimiento |
|
||||
| Mistral | OCR de base de conocimiento, bloque Agent |
|
||||
| Fireworks | Bloque Agent |
|
||||
| Firecrawl | Web scraping, crawling, búsqueda y extracción |
|
||||
| Exa | Búsqueda e investigación impulsada por IA |
|
||||
| Serper | API de búsqueda de Google |
|
||||
| Linkup | Búsqueda web y recuperación de contenido |
|
||||
| Parallel AI | Búsqueda web, extracción e investigación profunda |
|
||||
| Perplexity | Chat y búsqueda web impulsada por IA |
|
||||
| Jina AI | Lectura y búsqueda web |
|
||||
| Google Cloud | APIs de Translate, Maps, PageSpeed y Books |
|
||||
| Brandfetch | Activos de marca, logos, colores e información de empresas |
|
||||
|
||||
### Configuración
|
||||
|
||||
|
||||
@@ -105,9 +105,108 @@ El desglose del modelo muestra:
|
||||
Los precios mostrados reflejan las tarifas a partir del 10 de septiembre de 2025. Consulta la documentación del proveedor para conocer los precios actuales.
|
||||
</Callout>
|
||||
|
||||
## Precios de herramientas alojadas
|
||||
|
||||
Cuando los flujos de trabajo usan bloques de herramientas con las claves API alojadas de Sim, los costos se cobran por operación. Usa tus propias claves a través de BYOK para pagar directamente a los proveedores.
|
||||
|
||||
<Tabs items={['Firecrawl', 'Exa', 'Serper', 'Perplexity', 'Linkup', 'Parallel AI', 'Jina AI', 'Google Cloud', 'Brandfetch']}>
|
||||
<Tab>
|
||||
**Firecrawl** - Web scraping, crawling, búsqueda y extracción
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Scrape | $0.001 per credit used |
|
||||
| Crawl | $0.001 per credit used |
|
||||
| Search | $0.001 per credit used |
|
||||
| Extract | $0.001 per credit used |
|
||||
| Map | $0.001 per credit used |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Exa** - Búsqueda e investigación impulsada por IA
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | Dynamic (returned by API) |
|
||||
| Get Contents | Dynamic (returned by API) |
|
||||
| Find Similar Links | Dynamic (returned by API) |
|
||||
| Answer | Dynamic (returned by API) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Serper** - API de búsqueda de Google
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search (≤10 results) | $0.001 |
|
||||
| Search (>10 results) | $0.002 |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Perplexity** - Chat y búsqueda web impulsada por IA
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | $0.005 per request |
|
||||
| Chat | Token-based (varies by model) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Linkup** - Búsqueda web y recuperación de contenido
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Standard search | ~$0.006 |
|
||||
| Deep search | ~$0.055 |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Parallel AI** - Búsqueda web, extracción e investigación profunda
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search (≤10 results) | $0.005 |
|
||||
| Search (>10 results) | $0.005 + $0.001 per additional result |
|
||||
| Extract | $0.001 per URL |
|
||||
| Deep Research | $0.005–$2.40 (varies by processor tier) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Jina AI** - Lectura y búsqueda web
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Read URL | $0.20 per 1M tokens |
|
||||
| Search | $0.20 per 1M tokens (minimum 10K tokens) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Google Cloud** - APIs de Translate, Maps, PageSpeed y Books
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Translate / Detect | $0.00002 per character |
|
||||
| Maps (Geocode, Directions, Distance Matrix, Elevation, Timezone, Reverse Geocode, Geolocate, Validate Address) | $0.005 per request |
|
||||
| Maps (Snap to Roads) | $0.01 per request |
|
||||
| Maps (Place Details) | $0.017 per request |
|
||||
| Maps (Places Search) | $0.032 per request |
|
||||
| PageSpeed | Free |
|
||||
| Books (Search, Details) | Free |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Brandfetch** - Activos de marca, logos, colores e información de empresas
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | Free |
|
||||
| Get Brand | $0.04 per request |
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Trae tu propia clave (BYOK)
|
||||
|
||||
Puedes usar tus propias claves API para modelos alojados (OpenAI, Anthropic, Google, Mistral) en **Configuración → BYOK** para pagar precios base. Las claves están encriptadas y se aplican a todo el espacio de trabajo.
|
||||
Puedes usar tus propias claves API para proveedores compatibles (OpenAI, Anthropic, Google, Mistral, Fireworks, Firecrawl, Exa, Serper, Linkup, Parallel AI, Perplexity, Jina AI, Google Cloud, Brandfetch) en **Configuración → BYOK** para pagar precios base. Las claves están encriptadas y se aplican a todo el espacio de trabajo.
|
||||
|
||||
## Estrategias de optimización de costos
|
||||
|
||||
|
||||
@@ -21,7 +21,17 @@ Utilisez vos propres clés API pour les fournisseurs de modèles IA au lieu des
|
||||
| OpenAI | Embeddings de base de connaissances, bloc Agent |
|
||||
| Anthropic | Bloc Agent |
|
||||
| Google | Bloc Agent |
|
||||
| Mistral | OCR de base de connaissances |
|
||||
| Mistral | OCR de base de connaissances, bloc Agent |
|
||||
| Fireworks | Bloc Agent |
|
||||
| Firecrawl | Web scraping, crawling, recherche et extraction |
|
||||
| Exa | Recherche et investigation alimentées par l'IA |
|
||||
| Serper | API de recherche Google |
|
||||
| Linkup | Recherche web et récupération de contenu |
|
||||
| Parallel AI | Recherche web, extraction et recherche approfondie |
|
||||
| Perplexity | Chat et recherche web alimentés par l'IA |
|
||||
| Jina AI | Lecture et recherche web |
|
||||
| Google Cloud | APIs Translate, Maps, PageSpeed et Books |
|
||||
| Brandfetch | Ressources de marque, logos, couleurs et informations d'entreprise |
|
||||
|
||||
### Configuration
|
||||
|
||||
|
||||
@@ -105,9 +105,108 @@ La répartition des modèles montre :
|
||||
Les prix indiqués reflètent les tarifs en date du 10 septembre 2025. Consultez la documentation des fournisseurs pour les tarifs actuels.
|
||||
</Callout>
|
||||
|
||||
## Tarification des outils hébergés
|
||||
|
||||
Lorsque les workflows utilisent des blocs d'outils avec les clés API hébergées par Sim, les coûts sont facturés par opération. Utilisez vos propres clés via BYOK pour payer directement les fournisseurs.
|
||||
|
||||
<Tabs items={['Firecrawl', 'Exa', 'Serper', 'Perplexity', 'Linkup', 'Parallel AI', 'Jina AI', 'Google Cloud', 'Brandfetch']}>
|
||||
<Tab>
|
||||
**Firecrawl** - Web scraping, crawling, recherche et extraction
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Scrape | $0.001 per credit used |
|
||||
| Crawl | $0.001 per credit used |
|
||||
| Search | $0.001 per credit used |
|
||||
| Extract | $0.001 per credit used |
|
||||
| Map | $0.001 per credit used |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Exa** - Recherche et investigation alimentées par l'IA
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | Dynamic (returned by API) |
|
||||
| Get Contents | Dynamic (returned by API) |
|
||||
| Find Similar Links | Dynamic (returned by API) |
|
||||
| Answer | Dynamic (returned by API) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Serper** - API de recherche Google
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search (≤10 results) | $0.001 |
|
||||
| Search (>10 results) | $0.002 |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Perplexity** - Chat et recherche web alimentés par l'IA
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | $0.005 per request |
|
||||
| Chat | Token-based (varies by model) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Linkup** - Recherche web et récupération de contenu
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Standard search | ~$0.006 |
|
||||
| Deep search | ~$0.055 |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Parallel AI** - Recherche web, extraction et recherche approfondie
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search (≤10 results) | $0.005 |
|
||||
| Search (>10 results) | $0.005 + $0.001 per additional result |
|
||||
| Extract | $0.001 per URL |
|
||||
| Deep Research | $0.005–$2.40 (varies by processor tier) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Jina AI** - Lecture et recherche web
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Read URL | $0.20 per 1M tokens |
|
||||
| Search | $0.20 per 1M tokens (minimum 10K tokens) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Google Cloud** - APIs Translate, Maps, PageSpeed et Books
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Translate / Detect | $0.00002 per character |
|
||||
| Maps (Geocode, Directions, Distance Matrix, Elevation, Timezone, Reverse Geocode, Geolocate, Validate Address) | $0.005 per request |
|
||||
| Maps (Snap to Roads) | $0.01 per request |
|
||||
| Maps (Place Details) | $0.017 per request |
|
||||
| Maps (Places Search) | $0.032 per request |
|
||||
| PageSpeed | Free |
|
||||
| Books (Search, Details) | Free |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Brandfetch** - Ressources de marque, logos, couleurs et informations d'entreprise
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | Free |
|
||||
| Get Brand | $0.04 per request |
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Apportez votre propre clé (BYOK)
|
||||
|
||||
Vous pouvez utiliser vos propres clés API pour les modèles hébergés (OpenAI, Anthropic, Google, Mistral) dans **Paramètres → BYOK** pour payer les prix de base. Les clés sont chiffrées et s'appliquent à l'ensemble de l'espace de travail.
|
||||
Vous pouvez utiliser vos propres clés API pour les fournisseurs pris en charge (OpenAI, Anthropic, Google, Mistral, Fireworks, Firecrawl, Exa, Serper, Linkup, Parallel AI, Perplexity, Jina AI, Google Cloud, Brandfetch) dans **Paramètres → BYOK** pour payer les prix de base. Les clés sont chiffrées et s'appliquent à l'ensemble de l'espace de travail.
|
||||
|
||||
## Stratégies d'optimisation des coûts
|
||||
|
||||
|
||||
@@ -20,7 +20,17 @@ Simのホストキーの代わりに、AIモデルプロバイダー用の独自
|
||||
| OpenAI | ナレッジベースの埋め込み、エージェントブロック |
|
||||
| Anthropic | エージェントブロック |
|
||||
| Google | エージェントブロック |
|
||||
| Mistral | ナレッジベースOCR |
|
||||
| Mistral | ナレッジベースOCR、エージェントブロック |
|
||||
| Fireworks | エージェントブロック |
|
||||
| Firecrawl | Webスクレイピング、クローリング、検索、抽出 |
|
||||
| Exa | AI搭載の検索とリサーチ |
|
||||
| Serper | Google検索API |
|
||||
| Linkup | Web検索とコンテンツ取得 |
|
||||
| Parallel AI | Web検索、抽出、ディープリサーチ |
|
||||
| Perplexity | AI搭載のチャットとWeb検索 |
|
||||
| Jina AI | Web閲覧と検索 |
|
||||
| Google Cloud | Translate、Maps、PageSpeed、Books API |
|
||||
| Brandfetch | ブランドアセット、ロゴ、カラー、企業情報 |
|
||||
|
||||
### セットアップ
|
||||
|
||||
|
||||
@@ -105,9 +105,108 @@ AIブロックを使用するワークフローでは、ログで詳細なコス
|
||||
表示価格は2025年9月10日時点のレートを反映しています。最新の価格については各プロバイダーのドキュメントをご確認ください。
|
||||
</Callout>
|
||||
|
||||
## ホスティングツールの料金
|
||||
|
||||
ワークフローがSimのホスティングAPIキーを使用するツールブロックを利用する場合、操作ごとに料金が発生します。BYOKで独自のキーを使用すると、プロバイダーに直接支払うことができます。
|
||||
|
||||
<Tabs items={['Firecrawl', 'Exa', 'Serper', 'Perplexity', 'Linkup', 'Parallel AI', 'Jina AI', 'Google Cloud', 'Brandfetch']}>
|
||||
<Tab>
|
||||
**Firecrawl** - Webスクレイピング、クローリング、検索、抽出
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Scrape | $0.001 per credit used |
|
||||
| Crawl | $0.001 per credit used |
|
||||
| Search | $0.001 per credit used |
|
||||
| Extract | $0.001 per credit used |
|
||||
| Map | $0.001 per credit used |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Exa** - AI搭載の検索とリサーチ
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | Dynamic (returned by API) |
|
||||
| Get Contents | Dynamic (returned by API) |
|
||||
| Find Similar Links | Dynamic (returned by API) |
|
||||
| Answer | Dynamic (returned by API) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Serper** - Google検索API
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search (≤10 results) | $0.001 |
|
||||
| Search (>10 results) | $0.002 |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Perplexity** - AI搭載のチャットとWeb検索
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | $0.005 per request |
|
||||
| Chat | Token-based (varies by model) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Linkup** - Web検索とコンテンツ取得
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Standard search | ~$0.006 |
|
||||
| Deep search | ~$0.055 |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Parallel AI** - Web検索、抽出、ディープリサーチ
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search (≤10 results) | $0.005 |
|
||||
| Search (>10 results) | $0.005 + $0.001 per additional result |
|
||||
| Extract | $0.001 per URL |
|
||||
| Deep Research | $0.005–$2.40 (varies by processor tier) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Jina AI** - Web閲覧と検索
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Read URL | $0.20 per 1M tokens |
|
||||
| Search | $0.20 per 1M tokens (minimum 10K tokens) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Google Cloud** - Translate、Maps、PageSpeed、Books API
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Translate / Detect | $0.00002 per character |
|
||||
| Maps (Geocode, Directions, Distance Matrix, Elevation, Timezone, Reverse Geocode, Geolocate, Validate Address) | $0.005 per request |
|
||||
| Maps (Snap to Roads) | $0.01 per request |
|
||||
| Maps (Place Details) | $0.017 per request |
|
||||
| Maps (Places Search) | $0.032 per request |
|
||||
| PageSpeed | Free |
|
||||
| Books (Search, Details) | Free |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Brandfetch** - ブランドアセット、ロゴ、カラー、企業情報
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | Free |
|
||||
| Get Brand | $0.04 per request |
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Bring Your Own Key (BYOK)
|
||||
|
||||
ホストされたモデル(OpenAI、Anthropic、Google、Mistral)に対して、**設定 → BYOK**で独自のAPIキーを使用し、基本価格で支払うことができます。キーは暗号化され、ワークスペース全体に適用されます。
|
||||
対応プロバイダー(OpenAI、Anthropic、Google、Mistral、Fireworks、Firecrawl、Exa、Serper、Linkup、Parallel AI、Perplexity、Jina AI、Google Cloud、Brandfetch)に対して、**設定 → BYOK**で独自のAPIキーを使用し、基本価格で支払うことができます。キーは暗号化され、ワークスペース全体に適用されます。
|
||||
|
||||
## コスト最適化戦略
|
||||
|
||||
|
||||
@@ -20,7 +20,17 @@ Sim 企业版为需要更高安全性、合规性和管理能力的组织提供
|
||||
| OpenAI | 知识库嵌入、Agent 模块 |
|
||||
| Anthropic | Agent 模块 |
|
||||
| Google | Agent 模块 |
|
||||
| Mistral | 知识库 OCR |
|
||||
| Mistral | 知识库 OCR、Agent 模块 |
|
||||
| Fireworks | Agent 模块 |
|
||||
| Firecrawl | 网页抓取、爬取、搜索和提取 |
|
||||
| Exa | AI 驱动的搜索和研究 |
|
||||
| Serper | Google 搜索 API |
|
||||
| Linkup | 网络搜索和内容检索 |
|
||||
| Parallel AI | 网络搜索、提取和深度研究 |
|
||||
| Perplexity | AI 驱动的聊天和网络搜索 |
|
||||
| Jina AI | 网页阅读和搜索 |
|
||||
| Google Cloud | Translate、Maps、PageSpeed 和 Books API |
|
||||
| Brandfetch | 品牌资产、标志、颜色和公司信息 |
|
||||
|
||||
### 配置方法
|
||||
|
||||
|
||||
@@ -105,9 +105,108 @@ totalCost = baseExecutionCharge + modelCost
|
||||
显示的价格为截至 2025 年 9 月 10 日的费率。请查看提供商文档以获取最新价格。
|
||||
</Callout>
|
||||
|
||||
## 托管工具定价
|
||||
|
||||
当工作流使用 Sim 托管 API 密钥的工具模块时,费用按操作收取。通过 BYOK 使用你自己的密钥可直接向服务商付费。
|
||||
|
||||
<Tabs items={['Firecrawl', 'Exa', 'Serper', 'Perplexity', 'Linkup', 'Parallel AI', 'Jina AI', 'Google Cloud', 'Brandfetch']}>
|
||||
<Tab>
|
||||
**Firecrawl** - 网页抓取、爬取、搜索和提取
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Scrape | $0.001 per credit used |
|
||||
| Crawl | $0.001 per credit used |
|
||||
| Search | $0.001 per credit used |
|
||||
| Extract | $0.001 per credit used |
|
||||
| Map | $0.001 per credit used |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Exa** - AI 驱动的搜索和研究
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | Dynamic (returned by API) |
|
||||
| Get Contents | Dynamic (returned by API) |
|
||||
| Find Similar Links | Dynamic (returned by API) |
|
||||
| Answer | Dynamic (returned by API) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Serper** - Google 搜索 API
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search (≤10 results) | $0.001 |
|
||||
| Search (>10 results) | $0.002 |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Perplexity** - AI 驱动的聊天和网络搜索
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | $0.005 per request |
|
||||
| Chat | Token-based (varies by model) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Linkup** - 网络搜索和内容检索
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Standard search | ~$0.006 |
|
||||
| Deep search | ~$0.055 |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Parallel AI** - 网络搜索、提取和深度研究
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search (≤10 results) | $0.005 |
|
||||
| Search (>10 results) | $0.005 + $0.001 per additional result |
|
||||
| Extract | $0.001 per URL |
|
||||
| Deep Research | $0.005–$2.40 (varies by processor tier) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Jina AI** - 网页阅读和搜索
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Read URL | $0.20 per 1M tokens |
|
||||
| Search | $0.20 per 1M tokens (minimum 10K tokens) |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Google Cloud** - Translate、Maps、PageSpeed 和 Books API
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Translate / Detect | $0.00002 per character |
|
||||
| Maps (Geocode, Directions, Distance Matrix, Elevation, Timezone, Reverse Geocode, Geolocate, Validate Address) | $0.005 per request |
|
||||
| Maps (Snap to Roads) | $0.01 per request |
|
||||
| Maps (Place Details) | $0.017 per request |
|
||||
| Maps (Places Search) | $0.032 per request |
|
||||
| PageSpeed | Free |
|
||||
| Books (Search, Details) | Free |
|
||||
</Tab>
|
||||
|
||||
<Tab>
|
||||
**Brandfetch** - 品牌资产、标志、颜色和公司信息
|
||||
|
||||
| Operation | Cost |
|
||||
|-----------|------|
|
||||
| Search | Free |
|
||||
| Get Brand | $0.04 per request |
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## 自带密钥(BYOK)
|
||||
|
||||
你可以在 **设置 → BYOK** 中为托管模型(OpenAI、Anthropic、Google、Mistral)使用你自己的 API 密钥,以按基础价格计费。密钥会被加密,并在整个工作区范围内生效。
|
||||
你可以在 **设置 → BYOK** 中为支持的服务商(OpenAI、Anthropic、Google、Mistral、Fireworks、Firecrawl、Exa、Serper、Linkup、Parallel AI、Perplexity、Jina AI、Google Cloud、Brandfetch)使用你自己的 API 密钥,以按基础价格计费。密钥会被加密,并在整个工作区范围内生效。
|
||||
|
||||
## 成本优化策略
|
||||
|
||||
|
||||
@@ -161,7 +161,7 @@ export default async function Page({ params }: { params: Promise<{ slug: string
|
||||
<h3 className='font-[430] font-season text-lg text-white leading-tight tracking-[-0.01em]'>
|
||||
{p.title}
|
||||
</h3>
|
||||
<p className='line-clamp-2 text-[#F6F6F0]/50 text-sm leading-[150%]'>
|
||||
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
|
||||
{p.description}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
@@ -110,7 +110,7 @@ export default async function BlogIndex({
|
||||
<h1 className='text-balance font-[430] font-season text-[28px] text-white leading-[100%] tracking-[-0.02em] lg:text-[40px]'>
|
||||
Latest from Sim
|
||||
</h1>
|
||||
<p className='max-w-[360px] font-[430] font-season text-[#F6F6F0]/50 text-sm leading-[150%] tracking-[0.02em] lg:text-base'>
|
||||
<p className='max-w-[540px] font-[430] font-season text-[var(--landing-text-muted)] text-sm leading-[150%] tracking-[0.02em] lg:text-base'>
|
||||
Announcements, insights, and guides for building AI agent workflows.
|
||||
</p>
|
||||
</div>
|
||||
@@ -152,7 +152,7 @@ export default async function BlogIndex({
|
||||
<h3 className='font-[430] font-season text-lg text-white leading-tight tracking-[-0.01em]'>
|
||||
{p.title}
|
||||
</h3>
|
||||
<p className='line-clamp-2 text-[#F6F6F0]/50 text-sm leading-[150%]'>
|
||||
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
|
||||
{p.description}
|
||||
</p>
|
||||
</div>
|
||||
@@ -191,7 +191,7 @@ export default async function BlogIndex({
|
||||
<h3 className='font-[430] font-season text-base text-white leading-tight tracking-[-0.01em] lg:text-lg'>
|
||||
{p.title}
|
||||
</h3>
|
||||
<p className='line-clamp-2 text-[#F6F6F0]/40 text-sm leading-[150%]'>
|
||||
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
|
||||
{p.description}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
@@ -63,10 +63,8 @@ const INTEGRATION_LINKS: FooterItem[] = [
|
||||
{ label: 'Linear', href: 'https://docs.sim.ai/tools/linear', external: true },
|
||||
{ label: 'Airtable', href: 'https://docs.sim.ai/tools/airtable', external: true },
|
||||
{ label: 'Firecrawl', href: 'https://docs.sim.ai/tools/firecrawl', external: true },
|
||||
{ label: 'Pinecone', href: 'https://docs.sim.ai/tools/pinecone', external: true },
|
||||
{ label: 'Discord', href: 'https://docs.sim.ai/tools/discord', external: true },
|
||||
{ label: 'Microsoft Teams', href: 'https://docs.sim.ai/tools/microsoft_teams', external: true },
|
||||
{ label: 'Outlook', href: 'https://docs.sim.ai/tools/outlook', external: true },
|
||||
{ label: 'Telegram', href: 'https://docs.sim.ai/tools/telegram', external: true },
|
||||
]
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
import { AnimatePresence, motion } from 'framer-motion'
|
||||
import { ChevronDown } from '@/components/emcn'
|
||||
import { cn } from '@/lib/core/utils/cn'
|
||||
|
||||
@@ -15,46 +16,67 @@ interface LandingFAQProps {
|
||||
|
||||
export function LandingFAQ({ faqs }: LandingFAQProps) {
|
||||
const [openIndex, setOpenIndex] = useState<number | null>(0)
|
||||
const [hoveredIndex, setHoveredIndex] = useState<number | null>(null)
|
||||
|
||||
return (
|
||||
<div className='divide-y divide-[var(--landing-border)]'>
|
||||
<div>
|
||||
{faqs.map(({ question, answer }, index) => {
|
||||
const isOpen = openIndex === index
|
||||
const isHovered = hoveredIndex === index
|
||||
const showDivider = index > 0 && hoveredIndex !== index && hoveredIndex !== index - 1
|
||||
|
||||
return (
|
||||
<div key={question}>
|
||||
<div
|
||||
className={cn(
|
||||
'h-px w-full bg-[var(--landing-bg-elevated)]',
|
||||
index === 0 || !showDivider ? 'invisible' : 'visible'
|
||||
)}
|
||||
/>
|
||||
<button
|
||||
type='button'
|
||||
onClick={() => setOpenIndex(isOpen ? null : index)}
|
||||
className='flex w-full items-start justify-between gap-4 py-5 text-left'
|
||||
onMouseEnter={() => setHoveredIndex(index)}
|
||||
onMouseLeave={() => setHoveredIndex(null)}
|
||||
className='-mx-6 flex w-[calc(100%+3rem)] items-center justify-between gap-4 px-6 py-4 text-left transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
aria-expanded={isOpen}
|
||||
>
|
||||
<span
|
||||
className={cn(
|
||||
'font-[500] text-[15px] leading-snug transition-colors',
|
||||
'text-[15px] leading-snug tracking-[-0.02em] transition-colors',
|
||||
isOpen
|
||||
? 'text-[var(--landing-text)]'
|
||||
: 'text-[var(--landing-text-muted)] hover:text-[var(--landing-text)]'
|
||||
: 'text-[var(--landing-text-body)] hover:text-[var(--landing-text)]'
|
||||
)}
|
||||
>
|
||||
{question}
|
||||
</span>
|
||||
<ChevronDown
|
||||
className={cn(
|
||||
'mt-0.5 h-4 w-4 shrink-0 text-[#555] transition-transform duration-200',
|
||||
'h-3 w-3 shrink-0 text-[var(--landing-text-subtle)] transition-transform duration-200',
|
||||
isOpen ? 'rotate-180' : 'rotate-0'
|
||||
)}
|
||||
aria-hidden='true'
|
||||
/>
|
||||
</button>
|
||||
|
||||
{isOpen && (
|
||||
<div className='pb-5'>
|
||||
<p className='text-[14px] text-[var(--landing-text-muted)] leading-[1.75]'>
|
||||
{answer}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
<AnimatePresence initial={false}>
|
||||
{isOpen && (
|
||||
<motion.div
|
||||
initial={{ height: 0, opacity: 0 }}
|
||||
animate={{ height: 'auto', opacity: 1 }}
|
||||
exit={{ height: 0, opacity: 0 }}
|
||||
transition={{ duration: 0.25, ease: [0.4, 0, 0.2, 1] }}
|
||||
className='overflow-hidden'
|
||||
>
|
||||
<div className='pt-2 pb-4'>
|
||||
<p className='text-[14px] text-[var(--landing-text-body)] leading-[1.75]'>
|
||||
{answer}
|
||||
</p>
|
||||
</div>
|
||||
</motion.div>
|
||||
)}
|
||||
</AnimatePresence>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
|
||||
@@ -0,0 +1,149 @@
|
||||
import type { ComponentType, SVGProps } from 'react'
|
||||
import Link from 'next/link'
|
||||
import {
|
||||
AgentIcon,
|
||||
ApiIcon,
|
||||
McpIcon,
|
||||
PackageSearchIcon,
|
||||
TableIcon,
|
||||
WorkflowIcon,
|
||||
} from '@/components/icons'
|
||||
|
||||
interface ProductLink {
|
||||
label: string
|
||||
description: string
|
||||
href: string
|
||||
external?: boolean
|
||||
icon: ComponentType<SVGProps<SVGSVGElement>>
|
||||
}
|
||||
|
||||
interface SidebarLink {
|
||||
label: string
|
||||
href: string
|
||||
external?: boolean
|
||||
}
|
||||
|
||||
const PLATFORM: ProductLink[] = [
|
||||
{
|
||||
label: 'Workflows',
|
||||
description: 'Visual AI automation builder',
|
||||
href: 'https://docs.sim.ai/getting-started',
|
||||
external: true,
|
||||
icon: WorkflowIcon,
|
||||
},
|
||||
{
|
||||
label: 'Agent',
|
||||
description: 'Build autonomous AI agents',
|
||||
href: 'https://docs.sim.ai/blocks/agent',
|
||||
external: true,
|
||||
icon: AgentIcon,
|
||||
},
|
||||
{
|
||||
label: 'MCP',
|
||||
description: 'Connect external tools',
|
||||
href: 'https://docs.sim.ai/mcp',
|
||||
external: true,
|
||||
icon: McpIcon,
|
||||
},
|
||||
{
|
||||
label: 'Knowledge Base',
|
||||
description: 'Retrieval-augmented context',
|
||||
href: 'https://docs.sim.ai/knowledgebase',
|
||||
external: true,
|
||||
icon: PackageSearchIcon,
|
||||
},
|
||||
{
|
||||
label: 'Tables',
|
||||
description: 'Structured data storage',
|
||||
href: 'https://docs.sim.ai/tables',
|
||||
external: true,
|
||||
icon: TableIcon,
|
||||
},
|
||||
{
|
||||
label: 'API',
|
||||
description: 'Deploy workflows as endpoints',
|
||||
href: 'https://docs.sim.ai/api-reference/getting-started',
|
||||
external: true,
|
||||
icon: ApiIcon,
|
||||
},
|
||||
]
|
||||
|
||||
const EXPLORE: SidebarLink[] = [
|
||||
{ label: 'Models', href: '/models' },
|
||||
{ label: 'Integrations', href: '/integrations' },
|
||||
{ label: 'Changelog', href: '/changelog' },
|
||||
{ label: 'Self-hosting', href: 'https://docs.sim.ai/self-hosting', external: true },
|
||||
]
|
||||
|
||||
function DropdownLink({ link }: { link: ProductLink }) {
|
||||
const Icon = link.icon
|
||||
const Tag = link.external ? 'a' : Link
|
||||
const props = link.external
|
||||
? { href: link.href, target: '_blank' as const, rel: 'noopener noreferrer' }
|
||||
: { href: link.href }
|
||||
|
||||
return (
|
||||
<Tag
|
||||
{...props}
|
||||
className='group/item flex items-start gap-2.5 rounded-[5px] px-2.5 py-2 transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
<Icon className='mt-0.5 h-[15px] w-[15px] shrink-0 text-[var(--landing-text-icon)]' />
|
||||
<div className='flex flex-col'>
|
||||
<span className='font-[430] font-season text-[13px] text-white leading-tight'>
|
||||
{link.label}
|
||||
</span>
|
||||
<span className='font-season text-[12px] text-[var(--landing-text-subtle)] leading-[150%]'>
|
||||
{link.description}
|
||||
</span>
|
||||
</div>
|
||||
</Tag>
|
||||
)
|
||||
}
|
||||
|
||||
export function ProductDropdown() {
|
||||
return (
|
||||
<div className='flex w-[560px] rounded-[5px] border border-[var(--landing-bg-elevated)] bg-[var(--landing-bg)] shadow-overlay'>
|
||||
<div className='flex-1 p-2'>
|
||||
<div className='mb-1 px-2.5 pt-1'>
|
||||
<span className='font-[430] font-season text-[11px] text-[var(--landing-text-subtle)] uppercase tracking-[0.08em]'>
|
||||
Platform
|
||||
</span>
|
||||
<div className='mt-1.5 h-px bg-[var(--landing-bg-elevated)]' />
|
||||
</div>
|
||||
|
||||
<div className='grid grid-cols-2'>
|
||||
{PLATFORM.map((link) => (
|
||||
<DropdownLink key={link.label} link={link} />
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className='w-px self-stretch bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
<div className='w-[160px] p-2'>
|
||||
<div className='mb-1 px-2.5 pt-1'>
|
||||
<span className='font-[430] font-season text-[11px] text-[var(--landing-text-subtle)] uppercase tracking-[0.08em]'>
|
||||
Explore
|
||||
</span>
|
||||
<div className='mt-1.5 h-px bg-[var(--landing-bg-elevated)]' />
|
||||
</div>
|
||||
|
||||
{EXPLORE.map((link) => {
|
||||
const Tag = link.external ? 'a' : Link
|
||||
const props = link.external
|
||||
? { href: link.href, target: '_blank' as const, rel: 'noopener noreferrer' }
|
||||
: { href: link.href }
|
||||
return (
|
||||
<Tag
|
||||
key={link.label}
|
||||
{...props}
|
||||
className='block rounded-[5px] px-2.5 py-1.5 font-[430] font-season text-[13px] text-white transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
{link.label}
|
||||
</Tag>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -2,13 +2,15 @@
|
||||
|
||||
import { useRouter } from 'next/navigation'
|
||||
import { LandingPromptStorage } from '@/lib/core/utils/browser-storage'
|
||||
import { cn } from '@/lib/core/utils/cn'
|
||||
|
||||
interface TemplateCardButtonProps {
|
||||
prompt: string
|
||||
className?: string
|
||||
children: React.ReactNode
|
||||
}
|
||||
|
||||
export function TemplateCardButton({ prompt, children }: TemplateCardButtonProps) {
|
||||
export function TemplateCardButton({ prompt, className, children }: TemplateCardButtonProps) {
|
||||
const router = useRouter()
|
||||
|
||||
function handleClick() {
|
||||
@@ -17,11 +19,7 @@ export function TemplateCardButton({ prompt, children }: TemplateCardButtonProps
|
||||
}
|
||||
|
||||
return (
|
||||
<button
|
||||
type='button'
|
||||
onClick={handleClick}
|
||||
className='group flex w-full flex-col items-start rounded-lg border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-5 text-left transition-colors hover:border-[var(--landing-border-strong)] hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
<button type='button' onClick={handleClick} className={cn('w-full text-left', className)}>
|
||||
{children}
|
||||
</button>
|
||||
)
|
||||
|
||||
@@ -283,7 +283,7 @@ export default async function IntegrationPage({ params }: { params: Promise<{ sl
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
<section className='bg-[var(--landing-bg)]'>
|
||||
<script
|
||||
type='application/ld+json'
|
||||
dangerouslySetInnerHTML={{ __html: JSON.stringify(breadcrumbJsonLd) }}
|
||||
@@ -301,440 +301,434 @@ export default async function IntegrationPage({ params }: { params: Promise<{ sl
|
||||
dangerouslySetInnerHTML={{ __html: JSON.stringify(faqJsonLd) }}
|
||||
/>
|
||||
|
||||
<div className='mx-auto max-w-[1200px] px-6 py-12 sm:px-8 md:px-12'>
|
||||
{/* Breadcrumb */}
|
||||
<nav
|
||||
aria-label='Breadcrumb'
|
||||
className='mb-10 flex items-center gap-2 text-[#555] text-[13px]'
|
||||
>
|
||||
<Link href='/' className='transition-colors hover:text-[var(--landing-text-muted)]'>
|
||||
Home
|
||||
</Link>
|
||||
<span aria-hidden='true'>/</span>
|
||||
{/* Hero */}
|
||||
<div className='px-5 pt-[60px] lg:px-16 lg:pt-[100px]'>
|
||||
<div className='mb-6'>
|
||||
<Link
|
||||
href='/integrations'
|
||||
className='transition-colors hover:text-[var(--landing-text-muted)]'
|
||||
className='group/link inline-flex items-center gap-1.5 font-season text-[var(--landing-text-muted)] text-sm tracking-[0.02em] hover:text-[var(--landing-text)]'
|
||||
>
|
||||
Integrations
|
||||
</Link>
|
||||
<span aria-hidden='true'>/</span>
|
||||
<span className='text-[var(--landing-text-muted)]'>{name}</span>
|
||||
</nav>
|
||||
|
||||
{/* Hero */}
|
||||
<section aria-labelledby='integration-heading' className='mb-16'>
|
||||
<div className='mb-6 flex items-center gap-5'>
|
||||
<IntegrationIcon
|
||||
bgColor={bgColor}
|
||||
name={name}
|
||||
Icon={IconComponent}
|
||||
className='h-16 w-16 rounded-xl'
|
||||
iconClassName='h-8 w-8'
|
||||
fallbackClassName='text-[26px]'
|
||||
<svg
|
||||
className='h-3 w-3 shrink-0'
|
||||
viewBox='0 0 10 10'
|
||||
fill='none'
|
||||
xmlns='http://www.w3.org/2000/svg'
|
||||
aria-hidden='true'
|
||||
/>
|
||||
<div>
|
||||
<p className='mb-0.5 text-[#555] text-[12px]'>Integration</p>
|
||||
<h1
|
||||
id='integration-heading'
|
||||
className='font-[500] text-[36px] text-[var(--landing-text)] leading-tight sm:text-[44px]'
|
||||
>
|
||||
{name}
|
||||
</h1>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p className='mb-8 max-w-[700px] text-[17px] text-[var(--landing-text-muted)] leading-[1.7]'>
|
||||
{description}
|
||||
</p>
|
||||
|
||||
{/* CTAs */}
|
||||
<div className='flex flex-wrap gap-2'>
|
||||
<a
|
||||
href='https://sim.ai'
|
||||
className='inline-flex h-[32px] items-center rounded-[5px] border border-[var(--white)] bg-[var(--white)] px-2.5 font-[430] font-season text-[14px] text-[var(--landing-text-dark)] transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
|
||||
>
|
||||
Start building free
|
||||
</a>
|
||||
<a
|
||||
href={docsUrl}
|
||||
target='_blank'
|
||||
rel='noopener noreferrer'
|
||||
className='inline-flex h-[32px] items-center gap-1.5 rounded-[5px] border border-[var(--landing-border-strong)] px-2.5 font-[430] font-season text-[14px] text-[var(--landing-text)] transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
View docs
|
||||
<svg
|
||||
aria-hidden='true'
|
||||
className='h-3 w-3'
|
||||
fill='none'
|
||||
<line
|
||||
x1='1'
|
||||
y1='5'
|
||||
x2='10'
|
||||
y2='5'
|
||||
stroke='currentColor'
|
||||
strokeWidth={2}
|
||||
viewBox='0 0 24 24'
|
||||
>
|
||||
<path d='M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6' />
|
||||
<polyline points='15 3 21 3 21 9' />
|
||||
<line x1='10' x2='21' y1='14' y2='3' />
|
||||
</svg>
|
||||
</a>
|
||||
strokeWidth='1.33'
|
||||
strokeLinecap='square'
|
||||
className='origin-right scale-x-0 transition-transform duration-200 ease-out [transform-box:fill-box] group-hover/link:scale-x-100'
|
||||
/>
|
||||
<path
|
||||
d='M6.5 2L3.5 5L6.5 8'
|
||||
stroke='currentColor'
|
||||
strokeWidth='1.33'
|
||||
strokeLinecap='square'
|
||||
strokeLinejoin='miter'
|
||||
fill='none'
|
||||
className='group-hover/link:-translate-x-[30%] transition-transform duration-200 ease-out'
|
||||
/>
|
||||
</svg>
|
||||
Back to Integrations
|
||||
</Link>
|
||||
</div>
|
||||
|
||||
{/* Hero content */}
|
||||
<div className='mb-6 flex items-center gap-5'>
|
||||
<IntegrationIcon
|
||||
bgColor={bgColor}
|
||||
name={name}
|
||||
Icon={IconComponent}
|
||||
className='h-12 w-12 rounded-[5px]'
|
||||
iconClassName='h-6 w-6'
|
||||
fallbackClassName='text-[20px]'
|
||||
aria-hidden='true'
|
||||
/>
|
||||
<div>
|
||||
<h1
|
||||
id='integration-heading'
|
||||
className='text-[28px] text-white leading-[100%] tracking-[-0.02em] sm:text-[36px] lg:text-[44px]'
|
||||
>
|
||||
{name}
|
||||
</h1>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p className='mb-8 max-w-[700px] text-[var(--landing-text-body)] text-base leading-[150%] tracking-[0.02em]'>
|
||||
{description}
|
||||
</p>
|
||||
|
||||
{/* CTAs */}
|
||||
<div className='flex flex-wrap gap-2'>
|
||||
<Link
|
||||
href='/signup'
|
||||
className='inline-flex h-[32px] items-center gap-2 rounded-[5px] border border-white bg-white px-2.5 font-season text-black text-sm transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
|
||||
>
|
||||
Start building free
|
||||
</Link>
|
||||
<a
|
||||
href={docsUrl}
|
||||
target='_blank'
|
||||
rel='noopener noreferrer'
|
||||
className='group/link inline-flex h-[32px] items-center gap-1.5 rounded-[5px] border border-[var(--landing-border-strong)] px-2.5 font-season text-[var(--landing-text)] text-sm transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
View docs
|
||||
<svg
|
||||
aria-hidden='true'
|
||||
className='-rotate-45 h-3 w-3 shrink-0'
|
||||
viewBox='0 0 10 10'
|
||||
fill='none'
|
||||
>
|
||||
<line
|
||||
x1='0'
|
||||
y1='5'
|
||||
x2='9'
|
||||
y2='5'
|
||||
stroke='currentColor'
|
||||
strokeWidth='1.33'
|
||||
strokeLinecap='square'
|
||||
className='origin-left scale-x-0 transition-transform duration-200 ease-out [transform-box:fill-box] group-hover/link:scale-x-100'
|
||||
/>
|
||||
<path
|
||||
d='M3.5 2L6.5 5L3.5 8'
|
||||
stroke='currentColor'
|
||||
strokeWidth='1.33'
|
||||
strokeLinecap='square'
|
||||
strokeLinejoin='miter'
|
||||
fill='none'
|
||||
className='transition-transform duration-200 ease-out group-hover/link:translate-x-[30%]'
|
||||
/>
|
||||
</svg>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Full-width divider */}
|
||||
<div className='mt-8 h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
{/* Border-railed content */}
|
||||
<div className='mx-5 border-[var(--landing-bg-elevated)] border-x lg:mx-16'>
|
||||
{/* Overview */}
|
||||
{longDescription && (
|
||||
<>
|
||||
<section aria-labelledby='overview-heading' className='px-6 py-10'>
|
||||
<h2
|
||||
id='overview-heading'
|
||||
className='mb-4 text-[20px] text-white leading-[100%] tracking-[-0.02em]'
|
||||
>
|
||||
Overview
|
||||
</h2>
|
||||
<p className='text-[15px] text-[var(--landing-text-body)] leading-[150%] tracking-[0.02em]'>
|
||||
{longDescription}
|
||||
</p>
|
||||
</section>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* How to automate */}
|
||||
<section aria-labelledby='how-it-works-heading' className='px-6 py-10'>
|
||||
<h2
|
||||
id='how-it-works-heading'
|
||||
className='mb-6 text-[20px] text-white leading-[100%] tracking-[-0.02em]'
|
||||
>
|
||||
How to automate {name} with Sim
|
||||
</h2>
|
||||
<ol className='space-y-4' aria-label='Steps to set up automation'>
|
||||
{[
|
||||
{
|
||||
step: '01',
|
||||
title: 'Create a free account',
|
||||
body: 'Sign up at sim.ai in seconds. No credit card required. Your workspace is ready immediately.',
|
||||
},
|
||||
{
|
||||
step: '02',
|
||||
title: `Add a ${name} block`,
|
||||
body:
|
||||
authType === 'oauth'
|
||||
? `Open a workflow, drag a ${name} block onto the canvas, and connect your account with one-click OAuth.`
|
||||
: authType === 'api-key'
|
||||
? `Open a workflow, drag a ${name} block onto the canvas, and paste in your ${name} API key.`
|
||||
: `Open a workflow, drag a ${name} block onto the canvas, and authenticate your account.`,
|
||||
},
|
||||
{
|
||||
step: '03',
|
||||
title: 'Configure, connect, and run',
|
||||
body: `Pick the tool you need, wire in an AI agent for reasoning or data transformation, and run. Your ${name} automation is live.`,
|
||||
},
|
||||
].map(({ step, title, body }) => (
|
||||
<li key={step} className='flex gap-4'>
|
||||
<span
|
||||
className='mt-0.5 flex h-7 w-7 shrink-0 items-center justify-center rounded-full border border-[var(--landing-border-strong)] font-martian-mono text-[11px] text-[var(--landing-text-subtle)]'
|
||||
aria-hidden='true'
|
||||
>
|
||||
{step}
|
||||
</span>
|
||||
<div>
|
||||
<h3 className='mb-1 text-[15px] text-white tracking-[-0.02em]'>{title}</h3>
|
||||
<p className='text-[14px] text-[var(--landing-text-body)] leading-[150%] tracking-[0.02em]'>
|
||||
{body}
|
||||
</p>
|
||||
</div>
|
||||
</li>
|
||||
))}
|
||||
</ol>
|
||||
</section>
|
||||
|
||||
{/* Two-column layout */}
|
||||
<div className='grid grid-cols-1 gap-16 lg:grid-cols-[1fr_300px]'>
|
||||
{/* Main column */}
|
||||
<div className='min-w-0 space-y-16'>
|
||||
{/* Overview */}
|
||||
{longDescription && (
|
||||
<section aria-labelledby='overview-heading'>
|
||||
<h2
|
||||
id='overview-heading'
|
||||
className='mb-4 font-[500] text-[20px] text-[var(--landing-text)]'
|
||||
>
|
||||
Overview
|
||||
</h2>
|
||||
<p className='text-[15px] text-[var(--landing-text-muted)] leading-[1.8]'>
|
||||
{longDescription}
|
||||
</p>
|
||||
</section>
|
||||
)}
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
{/* How to automate — targets "how to connect X" queries */}
|
||||
<section aria-labelledby='how-it-works-heading'>
|
||||
<h2
|
||||
id='how-it-works-heading'
|
||||
className='mb-6 font-[500] text-[20px] text-[var(--landing-text)]'
|
||||
>
|
||||
How to automate {name} with Sim
|
||||
</h2>
|
||||
<ol className='space-y-4' aria-label='Steps to set up automation'>
|
||||
{[
|
||||
{
|
||||
step: '01',
|
||||
title: 'Create a free account',
|
||||
body: 'Sign up at sim.ai in seconds. No credit card required. Your workspace is ready immediately.',
|
||||
},
|
||||
{
|
||||
step: '02',
|
||||
title: `Add a ${name} block`,
|
||||
body:
|
||||
authType === 'oauth'
|
||||
? `Open a workflow, drag a ${name} block onto the canvas, and connect your account with one-click OAuth.`
|
||||
: authType === 'api-key'
|
||||
? `Open a workflow, drag a ${name} block onto the canvas, and paste in your ${name} API key.`
|
||||
: `Open a workflow, drag a ${name} block onto the canvas, and authenticate your account.`,
|
||||
},
|
||||
{
|
||||
step: '03',
|
||||
title: 'Configure, connect, and run',
|
||||
body: `Pick the tool you need, wire in an AI agent for reasoning or data transformation, and run. Your ${name} automation is live.`,
|
||||
},
|
||||
].map(({ step, title, body }) => (
|
||||
<li key={step} className='flex gap-4'>
|
||||
<span
|
||||
className='mt-0.5 flex h-7 w-7 shrink-0 items-center justify-center rounded-full border border-[var(--landing-border-strong)] font-[500] text-[#555] text-[11px]'
|
||||
aria-hidden='true'
|
||||
>
|
||||
{step}
|
||||
</span>
|
||||
<div>
|
||||
<h3 className='mb-1 font-[500] text-[15px] text-[var(--landing-text)]'>
|
||||
{title}
|
||||
</h3>
|
||||
<p className='text-[14px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
{body}
|
||||
</p>
|
||||
</div>
|
||||
</li>
|
||||
))}
|
||||
</ol>
|
||||
</section>
|
||||
|
||||
{/* Triggers */}
|
||||
{triggers.length > 0 && (
|
||||
<section aria-labelledby='triggers-heading'>
|
||||
{/* Triggers — rows */}
|
||||
{triggers.length > 0 && (
|
||||
<section aria-labelledby='triggers-heading'>
|
||||
<div className='px-6 pt-10 pb-4'>
|
||||
<div className='mb-2 flex items-center gap-2.5'>
|
||||
<span className='relative flex h-2 w-2' aria-hidden='true'>
|
||||
<span className='absolute inline-flex h-full w-full animate-ping rounded-full bg-emerald-400 opacity-75' />
|
||||
<span className='relative inline-flex h-2 w-2 rounded-full bg-emerald-500' />
|
||||
</span>
|
||||
<h2
|
||||
id='triggers-heading'
|
||||
className='mb-2 font-[500] text-[20px] text-[var(--landing-text)]'
|
||||
className='text-[20px] text-white leading-[100%] tracking-[-0.02em]'
|
||||
>
|
||||
Real-time triggers
|
||||
</h2>
|
||||
<p className='mb-4 text-[14px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
Connect a {name} webhook to Sim and your workflow fires the instant an event
|
||||
happens — no polling, no delay. Sim receives the full event payload and makes
|
||||
every field available as a variable inside your workflow.
|
||||
</p>
|
||||
|
||||
{/* Event cards */}
|
||||
<ul
|
||||
className='grid grid-cols-1 gap-3 sm:grid-cols-2'
|
||||
aria-label={`${name} trigger events`}
|
||||
>
|
||||
{triggers.map((trigger) => (
|
||||
<li
|
||||
key={trigger.id}
|
||||
className='rounded-lg border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-4'
|
||||
>
|
||||
<div className='mb-2 flex items-center gap-2'>
|
||||
<span className='inline-flex items-center gap-1 rounded-[4px] bg-[var(--landing-bg-elevated)] px-1.5 py-0.5 font-[500] text-[11px] text-[var(--landing-text)]'>
|
||||
<svg
|
||||
aria-hidden='true'
|
||||
className='h-2.5 w-2.5'
|
||||
fill='none'
|
||||
stroke='currentColor'
|
||||
strokeWidth={2.5}
|
||||
viewBox='0 0 24 24'
|
||||
>
|
||||
<polygon points='13 2 3 14 12 14 11 22 21 10 12 10 13 2' />
|
||||
</svg>
|
||||
Event
|
||||
</span>
|
||||
</div>
|
||||
<p className='font-[500] text-[13px] text-[var(--landing-text)]'>
|
||||
{trigger.name}
|
||||
</div>
|
||||
<p className='text-[14px] text-[var(--landing-text-body)] leading-[150%] tracking-[0.02em]'>
|
||||
Connect a {name} webhook to Sim and your workflow fires the instant an event happens
|
||||
— no polling, no delay.
|
||||
</p>
|
||||
</div>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
{triggers.map((trigger) => (
|
||||
<div key={trigger.id}>
|
||||
<div className='flex items-start gap-4 px-6 py-4'>
|
||||
<div className='flex min-w-0 flex-1 flex-col gap-0.5'>
|
||||
<p className='text-[14px] text-white leading-snug tracking-[-0.02em]'>
|
||||
{trigger.name}
|
||||
</p>
|
||||
{trigger.description && (
|
||||
<p className='text-[12px] text-[var(--landing-text-muted)] leading-[150%]'>
|
||||
{trigger.description}
|
||||
</p>
|
||||
{trigger.description && (
|
||||
<p className='mt-1 text-[12px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
{trigger.description}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</div>
|
||||
))}
|
||||
</section>
|
||||
)}
|
||||
|
||||
{/* Workflow templates — horizontal cards */}
|
||||
{matchingTemplates.length > 0 && (
|
||||
<section aria-labelledby='templates-heading'>
|
||||
<div className='px-6 pt-10 pb-4'>
|
||||
<h2
|
||||
id='templates-heading'
|
||||
className='mb-2 text-[20px] text-white leading-[100%] tracking-[-0.02em]'
|
||||
>
|
||||
Workflow templates
|
||||
</h2>
|
||||
<p className='text-[14px] text-[var(--landing-text-body)] tracking-[0.02em]'>
|
||||
Ready-to-use workflows featuring {name}. Click any to build it instantly.
|
||||
</p>
|
||||
</div>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
{(() => {
|
||||
const isOdd = matchingTemplates.length % 2 === 1
|
||||
const pairedTemplates = isOdd ? matchingTemplates.slice(0, -1) : matchingTemplates
|
||||
const lastTemplate = isOdd ? matchingTemplates[matchingTemplates.length - 1] : null
|
||||
|
||||
const resolveTypes = (template: (typeof matchingTemplates)[number]) => [
|
||||
integration.type,
|
||||
...template.integrationBlockTypes.filter((bt) => bt !== integration.type),
|
||||
]
|
||||
|
||||
const renderIcons = (allTypes: string[]) =>
|
||||
allTypes.map((bt, idx) => {
|
||||
const resolvedBt = byType.get(bt)
|
||||
? bt
|
||||
: byType.get(`${bt}_v2`)
|
||||
? `${bt}_v2`
|
||||
: byType.get(`${bt}_v3`)
|
||||
? `${bt}_v3`
|
||||
: bt
|
||||
const int = byType.get(resolvedBt)
|
||||
const ToolIcon = blockTypeToIconMap[resolvedBt]
|
||||
return (
|
||||
<span key={bt} className='inline-flex items-center gap-1.5'>
|
||||
{idx > 0 && (
|
||||
<span className='text-[#555] text-[11px]' aria-hidden='true'>
|
||||
→
|
||||
</span>
|
||||
)}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</section>
|
||||
)}
|
||||
|
||||
{/* Workflow templates */}
|
||||
{matchingTemplates.length > 0 && (
|
||||
<section aria-labelledby='templates-heading'>
|
||||
<h2
|
||||
id='templates-heading'
|
||||
className='mb-2 font-[500] text-[20px] text-[var(--landing-text)]'
|
||||
>
|
||||
Workflow templates
|
||||
</h2>
|
||||
<p className='mb-6 text-[14px] text-[var(--landing-text-muted)]'>
|
||||
Ready-to-use workflows featuring {name}. Click any to build it instantly.
|
||||
</p>
|
||||
<ul
|
||||
className='grid grid-cols-1 gap-4 sm:grid-cols-2'
|
||||
aria-label='Workflow templates'
|
||||
>
|
||||
{matchingTemplates.map((template) => {
|
||||
const allTypes = [
|
||||
integration.type,
|
||||
...template.integrationBlockTypes.filter((bt) => bt !== integration.type),
|
||||
]
|
||||
<IntegrationIcon
|
||||
bgColor={int?.bgColor ?? '#333'}
|
||||
name={int?.name ?? bt}
|
||||
Icon={ToolIcon}
|
||||
as='span'
|
||||
className='h-6 w-6 rounded-[4px]'
|
||||
iconClassName='h-3.5 w-3.5'
|
||||
fallbackClassName='text-[10px]'
|
||||
aria-hidden='true'
|
||||
/>
|
||||
</span>
|
||||
)
|
||||
})
|
||||
|
||||
return (
|
||||
<>
|
||||
{/* Paired rows of 2 */}
|
||||
{Array.from({ length: Math.ceil(pairedTemplates.length / 2) }, (_, rowIdx) => {
|
||||
const row = pairedTemplates.slice(rowIdx * 2, rowIdx * 2 + 2)
|
||||
return (
|
||||
<li key={template.title}>
|
||||
<TemplateCardButton prompt={template.prompt}>
|
||||
{/* Integration pills row */}
|
||||
<div className='mb-3 flex flex-wrap items-center gap-1.5 text-[12px]'>
|
||||
{allTypes.map((bt, idx) => {
|
||||
// Templates may use unversioned keys (e.g. "notion") while the
|
||||
// icon map has versioned keys ("notion_v2") — fall back to _v2.
|
||||
const resolvedBt = byType.get(bt)
|
||||
? bt
|
||||
: byType.get(`${bt}_v2`)
|
||||
? `${bt}_v2`
|
||||
: bt
|
||||
const int = byType.get(resolvedBt)
|
||||
const intName = int?.name ?? bt
|
||||
return (
|
||||
<span key={bt} className='inline-flex items-center gap-1.5'>
|
||||
{idx > 0 && (
|
||||
<span className='text-[#555]' aria-hidden='true'>
|
||||
→
|
||||
</span>
|
||||
)}
|
||||
<span className='inline-flex items-center gap-1 rounded-[3px] bg-[var(--landing-bg-elevated)] px-1.5 py-0.5 font-[500] text-[var(--landing-text)]'>
|
||||
<IntegrationIcon
|
||||
bgColor={int?.bgColor ?? '#6B7280'}
|
||||
name={intName}
|
||||
Icon={blockTypeToIconMap[resolvedBt]}
|
||||
as='span'
|
||||
className='h-3.5 w-3.5 rounded-[2px]'
|
||||
iconClassName='h-2.5 w-2.5'
|
||||
aria-hidden='true'
|
||||
/>
|
||||
{intName}
|
||||
</span>
|
||||
</span>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
|
||||
<p className='mb-1 font-[500] text-[14px] text-[var(--landing-text)]'>
|
||||
{template.title}
|
||||
</p>
|
||||
|
||||
<p className='mt-3 text-[#555] text-[13px] transition-colors group-hover:text-[var(--landing-text-muted)]'>
|
||||
Try this workflow →
|
||||
</p>
|
||||
</TemplateCardButton>
|
||||
</li>
|
||||
<div key={rowIdx}>
|
||||
<nav
|
||||
aria-label={`Template row ${rowIdx + 1}`}
|
||||
className='flex flex-col sm:flex-row'
|
||||
>
|
||||
{row.map((template) => (
|
||||
<TemplateCardButton
|
||||
key={template.title}
|
||||
prompt={template.prompt}
|
||||
className='group flex flex-1 flex-col gap-4 border-[var(--landing-bg-elevated)] border-t p-6 transition-colors first:border-t-0 hover:bg-[var(--landing-bg-elevated)] sm:border-t-0 sm:border-l sm:first:border-l-0'
|
||||
>
|
||||
<div className='flex items-center gap-1.5'>
|
||||
{renderIcons(resolveTypes(template))}
|
||||
</div>
|
||||
<div className='flex flex-col gap-2'>
|
||||
<h3 className='text-[14px] text-white leading-snug tracking-[-0.02em]'>
|
||||
{template.title}
|
||||
</h3>
|
||||
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
|
||||
{template.prompt}
|
||||
</p>
|
||||
</div>
|
||||
</TemplateCardButton>
|
||||
))}
|
||||
</nav>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</ul>
|
||||
</section>
|
||||
)}
|
||||
|
||||
{/* Tools */}
|
||||
{operations.length > 0 && (
|
||||
<section aria-labelledby='tools-heading'>
|
||||
<h2
|
||||
id='tools-heading'
|
||||
className='mb-2 font-[500] text-[20px] text-[var(--landing-text)]'
|
||||
>
|
||||
Supported tools
|
||||
</h2>
|
||||
<p className='mb-6 text-[14px] text-[var(--landing-text-muted)]'>
|
||||
{operations.length} {name} tool{operations.length === 1 ? '' : 's'} available in
|
||||
Sim
|
||||
</p>
|
||||
<ul
|
||||
className='grid grid-cols-1 gap-2 sm:grid-cols-2'
|
||||
aria-label={`${name} supported tools`}
|
||||
>
|
||||
{operations.map((op) => (
|
||||
<li
|
||||
key={op.name}
|
||||
className='rounded-[6px] border border-[var(--landing-border)] bg-[var(--landing-bg-card)] px-3.5 py-3'
|
||||
>
|
||||
<p className='font-[500] text-[13px] text-[var(--landing-text)]'>{op.name}</p>
|
||||
{op.description && (
|
||||
<p className='mt-0.5 text-[#555] text-[12px] leading-relaxed'>
|
||||
{op.description}
|
||||
</p>
|
||||
)}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</section>
|
||||
)}
|
||||
|
||||
{/* FAQ */}
|
||||
<section aria-labelledby='faq-heading'>
|
||||
<h2
|
||||
id='faq-heading'
|
||||
className='mb-8 font-[500] text-[20px] text-[var(--landing-text)]'
|
||||
>
|
||||
Frequently asked questions
|
||||
</h2>
|
||||
<IntegrationFAQ faqs={faqs} />
|
||||
</section>
|
||||
</div>
|
||||
|
||||
{/* Sidebar */}
|
||||
<aside className='space-y-5' aria-label='Integration details'>
|
||||
{/* Quick details */}
|
||||
<div className='rounded-lg border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-5'>
|
||||
<h3 className='mb-4 font-[500] text-[14px] text-[var(--landing-text)]'>Details</h3>
|
||||
<dl className='space-y-3 text-[13px]'>
|
||||
{operations.length > 0 && (
|
||||
<div>
|
||||
<dt className='text-[#555]'>Tools</dt>
|
||||
<dd className='text-[var(--landing-text)]'>{operations.length} supported</dd>
|
||||
</div>
|
||||
)}
|
||||
{triggers.length > 0 && (
|
||||
<div>
|
||||
<dt className='text-[#555]'>Triggers</dt>
|
||||
<dd className='text-[var(--landing-text)]'>{triggers.length} available</dd>
|
||||
</div>
|
||||
)}
|
||||
<div>
|
||||
<dt className='text-[#555]'>Auth</dt>
|
||||
<dd className='text-[var(--landing-text)]'>
|
||||
{authType === 'oauth'
|
||||
? 'One-click OAuth'
|
||||
: authType === 'api-key'
|
||||
? 'API key'
|
||||
: 'None required'}
|
||||
</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt className='text-[#555]'>Pricing</dt>
|
||||
<dd className='text-[var(--landing-text)]'>Free to start</dd>
|
||||
</div>
|
||||
</dl>
|
||||
<div className='mt-5 flex flex-col gap-2'>
|
||||
<a
|
||||
href='https://sim.ai'
|
||||
className='flex h-[32px] w-full items-center justify-center rounded-[5px] border border-[var(--white)] bg-[var(--white)] font-[430] font-season text-[13px] text-[var(--landing-text-dark)] transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
|
||||
>
|
||||
Get started free
|
||||
</a>
|
||||
<a
|
||||
href={docsUrl}
|
||||
target='_blank'
|
||||
rel='noopener noreferrer'
|
||||
className='flex h-[32px] w-full items-center justify-center gap-1.5 rounded-[5px] border border-[var(--landing-border-strong)] font-[430] font-season text-[13px] text-[var(--landing-text)] transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
View docs
|
||||
<svg
|
||||
aria-hidden='true'
|
||||
className='h-3 w-3'
|
||||
fill='none'
|
||||
stroke='currentColor'
|
||||
strokeWidth={2}
|
||||
viewBox='0 0 24 24'
|
||||
>
|
||||
<path d='M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6' />
|
||||
<polyline points='15 3 21 3 21 9' />
|
||||
<line x1='10' x2='21' y1='14' y2='3' />
|
||||
</svg>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Related integrations — internal linking for SEO */}
|
||||
<div className='rounded-lg border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-5'>
|
||||
{relatedIntegrations.length > 0 && (
|
||||
<>
|
||||
<h3 className='mb-4 font-[500] text-[14px] text-[var(--landing-text)]'>
|
||||
Related integrations
|
||||
</h3>
|
||||
<ul className='space-y-2'>
|
||||
{relatedIntegrations.map((rel) => (
|
||||
<li key={rel.slug}>
|
||||
<Link
|
||||
href={`/integrations/${rel.slug}`}
|
||||
className='flex items-center gap-2.5 rounded-[6px] p-1.5 text-[13px] text-[var(--landing-text-muted)] transition-colors hover:bg-[var(--landing-bg-elevated)] hover:text-[var(--landing-text)]'
|
||||
>
|
||||
<IntegrationIcon
|
||||
bgColor={rel.bgColor}
|
||||
name={rel.name}
|
||||
Icon={blockTypeToIconMap[rel.type]}
|
||||
as='span'
|
||||
className='h-6 w-6 rounded-[4px]'
|
||||
iconClassName='h-3.5 w-3.5'
|
||||
fallbackClassName='text-[10px]'
|
||||
aria-hidden='true'
|
||||
/>
|
||||
{rel.name}
|
||||
</Link>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
{/* Last template as a full-width row when odd */}
|
||||
{lastTemplate && (
|
||||
<>
|
||||
<TemplateCardButton
|
||||
prompt={lastTemplate.prompt}
|
||||
className='group/link flex items-center gap-4 px-6 py-4 transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
<div className='flex items-center gap-1.5'>
|
||||
{renderIcons(resolveTypes(lastTemplate))}
|
||||
</div>
|
||||
<div className='flex min-w-0 flex-1 flex-col gap-0.5'>
|
||||
<h3 className='text-[14px] text-white leading-snug tracking-[-0.02em]'>
|
||||
{lastTemplate.title}
|
||||
</h3>
|
||||
<p className='line-clamp-1 text-[12px] text-[var(--landing-text-muted)] leading-[150%]'>
|
||||
{lastTemplate.prompt}
|
||||
</p>
|
||||
</div>
|
||||
</TemplateCardButton>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
<Link
|
||||
href='/integrations'
|
||||
className={`block text-[#555] text-[12px] transition-colors hover:text-[var(--landing-text-muted)]${relatedIntegrations.length > 0 ? ' mt-4' : ''}`}
|
||||
)
|
||||
})()}
|
||||
</section>
|
||||
)}
|
||||
|
||||
{/* Supported tools — rows */}
|
||||
{operations.length > 0 && (
|
||||
<section aria-labelledby='tools-heading'>
|
||||
<div className='px-6 pt-10 pb-4'>
|
||||
<h2
|
||||
id='tools-heading'
|
||||
className='mb-2 text-[20px] text-white leading-[100%] tracking-[-0.02em]'
|
||||
>
|
||||
All integrations →
|
||||
</Link>
|
||||
Supported tools
|
||||
</h2>
|
||||
<p className='text-[14px] text-[var(--landing-text-body)] tracking-[0.02em]'>
|
||||
{operations.length} {name} tool{operations.length === 1 ? '' : 's'} available in Sim
|
||||
</p>
|
||||
</div>
|
||||
</aside>
|
||||
</div>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
{operations.map((op) => (
|
||||
<div key={op.name}>
|
||||
<div className='flex items-start gap-4 px-6 py-4'>
|
||||
<div className='flex min-w-0 flex-1 flex-col gap-0.5'>
|
||||
<p className='text-[14px] text-white leading-snug tracking-[-0.02em]'>
|
||||
{op.name}
|
||||
</p>
|
||||
{op.description && (
|
||||
<p className='text-[12px] text-[var(--landing-text-muted)] leading-[150%]'>
|
||||
{op.description}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</div>
|
||||
))}
|
||||
</section>
|
||||
)}
|
||||
|
||||
{/* FAQ — full width */}
|
||||
<section aria-labelledby='faq-heading' className='px-6 py-10'>
|
||||
<h2
|
||||
id='faq-heading'
|
||||
className='mb-8 text-[20px] text-white leading-[100%] tracking-[-0.02em]'
|
||||
>
|
||||
Frequently asked questions
|
||||
</h2>
|
||||
<IntegrationFAQ faqs={faqs} />
|
||||
</section>
|
||||
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
{/* Related integrations — horizontal cards with vertical dividers (blog featured pattern) */}
|
||||
{relatedIntegrations.length > 0 && (
|
||||
<>
|
||||
<nav aria-label='Related integrations' className='flex flex-col sm:flex-row'>
|
||||
{relatedIntegrations.slice(0, 4).map((rel) => (
|
||||
<Link
|
||||
key={rel.slug}
|
||||
href={`/integrations/${rel.slug}`}
|
||||
className='group flex flex-1 flex-col gap-4 border-[var(--landing-bg-elevated)] border-t p-6 transition-colors first:border-t-0 hover:bg-[var(--landing-bg-elevated)] sm:border-t-0 sm:border-l sm:first:border-l-0'
|
||||
>
|
||||
<IntegrationIcon
|
||||
bgColor={rel.bgColor}
|
||||
name={rel.name}
|
||||
Icon={blockTypeToIconMap[rel.type]}
|
||||
as='span'
|
||||
className='h-10 w-10 rounded-[5px]'
|
||||
aria-hidden='true'
|
||||
/>
|
||||
<div className='flex flex-col gap-2'>
|
||||
<h3 className='text-lg text-white leading-tight tracking-[-0.01em]'>
|
||||
{rel.name}
|
||||
</h3>
|
||||
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
|
||||
{rel.description}
|
||||
</p>
|
||||
</div>
|
||||
</Link>
|
||||
))}
|
||||
</nav>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* Bottom CTA */}
|
||||
<section
|
||||
aria-labelledby='cta-heading'
|
||||
className='mt-20 rounded-xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-8 text-center sm:p-12'
|
||||
>
|
||||
{/* Logo pair: Sim × Integration */}
|
||||
<section aria-labelledby='cta-heading' className='px-6 py-16 text-center'>
|
||||
<div className='mx-auto mb-6 flex items-center justify-center gap-3'>
|
||||
<Image
|
||||
src='/brandbook/logo/small.png'
|
||||
@@ -776,22 +770,25 @@ export default async function IntegrationPage({ params }: { params: Promise<{ sl
|
||||
</div>
|
||||
<h2
|
||||
id='cta-heading'
|
||||
className='mb-3 font-[500] text-[28px] text-[var(--landing-text)] sm:text-[34px]'
|
||||
className='mb-3 text-[28px] text-white leading-[100%] tracking-[-0.02em] sm:text-[34px]'
|
||||
>
|
||||
Start automating {name} today
|
||||
</h2>
|
||||
<p className='mx-auto mb-8 max-w-[480px] text-[16px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
<p className='mx-auto mb-8 max-w-[480px] text-[var(--landing-text-body)] text-base leading-[150%] tracking-[0.02em]'>
|
||||
Build your first AI workflow with {name} in minutes. Connect to every tool your team
|
||||
uses. Free to start — no credit card required.
|
||||
</p>
|
||||
<a
|
||||
href='https://sim.ai'
|
||||
className='inline-flex h-[32px] items-center rounded-[5px] border border-[var(--white)] bg-[var(--white)] px-2.5 font-[430] font-season text-[14px] text-[var(--landing-text-dark)] transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
|
||||
<Link
|
||||
href='/signup'
|
||||
className='inline-flex h-[32px] items-center gap-2 rounded-[5px] border border-white bg-white px-2.5 font-season text-black text-sm transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
|
||||
>
|
||||
Build for free →
|
||||
</a>
|
||||
Build for free
|
||||
</Link>
|
||||
</section>
|
||||
</div>
|
||||
</>
|
||||
|
||||
{/* Closing full-width divider */}
|
||||
<div className='-mt-px h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</section>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import type { ComponentType, SVGProps } from 'react'
|
||||
import Link from 'next/link'
|
||||
import { Badge } from '@/components/emcn'
|
||||
import type { Integration } from '@/app/(landing)/integrations/data/types'
|
||||
import { ChevronArrow } from '@/app/(landing)/models/components/model-primitives'
|
||||
import { IntegrationIcon } from './integration-icon'
|
||||
|
||||
interface IntegrationCardProps {
|
||||
@@ -9,49 +9,76 @@ interface IntegrationCardProps {
|
||||
IconComponent?: ComponentType<SVGProps<SVGSVGElement>>
|
||||
}
|
||||
|
||||
/**
|
||||
* Featured integration card — matches blog featured post pattern.
|
||||
* Used in flex rows separated by border-l dividers.
|
||||
*/
|
||||
export function IntegrationCard({ integration, IconComponent }: IntegrationCardProps) {
|
||||
const { slug, name, description, bgColor, operationCount, triggerCount } = integration
|
||||
const { slug, name, description, bgColor } = integration
|
||||
|
||||
return (
|
||||
<Link
|
||||
href={`/integrations/${slug}`}
|
||||
className='group flex flex-col rounded-lg border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-4 transition-colors hover:border-[var(--landing-border-strong)] hover:bg-[var(--landing-bg-elevated)]'
|
||||
aria-label={`${name} integration`}
|
||||
className='group/link flex flex-1 flex-col gap-4 border-[var(--landing-bg-elevated)] border-t p-6 transition-colors first:border-t-0 hover:bg-[var(--landing-bg-elevated)] sm:border-t-0 sm:border-l sm:first:border-l-0'
|
||||
>
|
||||
<IntegrationIcon
|
||||
bgColor={bgColor}
|
||||
name={name}
|
||||
Icon={IconComponent}
|
||||
className='mb-3 h-10 w-10 rounded-lg'
|
||||
className='h-10 w-10 rounded-[5px]'
|
||||
aria-hidden='true'
|
||||
/>
|
||||
|
||||
{/* Name */}
|
||||
<h3 className='mb-1 font-[500] text-[14px] text-[var(--landing-text)] leading-snug'>
|
||||
{name}
|
||||
</h3>
|
||||
|
||||
{/* Description — clamped to 2 lines */}
|
||||
<p className='mb-3 line-clamp-2 flex-1 text-[12px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
{description}
|
||||
</p>
|
||||
|
||||
{/* Footer row */}
|
||||
<div className='flex flex-wrap items-center gap-1.5'>
|
||||
{operationCount > 0 && (
|
||||
<Badge className='border-0 bg-[#333] text-[11px] text-[var(--landing-text-muted)]'>
|
||||
{operationCount} {operationCount === 1 ? 'tool' : 'tools'}
|
||||
</Badge>
|
||||
)}
|
||||
{triggerCount > 0 && (
|
||||
<Badge className='border-0 bg-[#333] text-[11px] text-[var(--landing-text-muted)]'>
|
||||
{triggerCount} {triggerCount === 1 ? 'trigger' : 'triggers'}
|
||||
</Badge>
|
||||
)}
|
||||
<span className='ml-auto text-[#555] text-[12px] transition-colors group-hover:text-[var(--landing-text-muted)]'>
|
||||
Learn more →
|
||||
</span>
|
||||
<div className='flex flex-col gap-2'>
|
||||
<h3 className='text-lg text-white leading-tight tracking-[-0.01em]'>{name}</h3>
|
||||
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
|
||||
{description}
|
||||
</p>
|
||||
</div>
|
||||
</Link>
|
||||
)
|
||||
}
|
||||
|
||||
interface IntegrationRowProps {
|
||||
integration: Integration
|
||||
IconComponent?: ComponentType<SVGProps<SVGSVGElement>>
|
||||
}
|
||||
|
||||
/**
|
||||
* Integration list row — matches blog remaining post pattern.
|
||||
* Each row followed by an h-px divider.
|
||||
*/
|
||||
export function IntegrationRow({ integration, IconComponent }: IntegrationRowProps) {
|
||||
const { slug, name, description, bgColor } = integration
|
||||
|
||||
return (
|
||||
<>
|
||||
<Link
|
||||
href={`/integrations/${slug}`}
|
||||
className='group/link flex items-center gap-4 px-6 py-4 transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
aria-label={`${name} integration`}
|
||||
>
|
||||
<IntegrationIcon
|
||||
bgColor={bgColor}
|
||||
name={name}
|
||||
Icon={IconComponent}
|
||||
className='h-8 w-8 shrink-0 rounded-[5px]'
|
||||
iconClassName='h-4 w-4'
|
||||
fallbackClassName='text-[13px]'
|
||||
aria-hidden='true'
|
||||
/>
|
||||
|
||||
{/* Name + description */}
|
||||
<div className='flex min-w-0 flex-1 flex-col gap-0.5'>
|
||||
<h3 className='text-[14px] text-white leading-snug tracking-[-0.02em]'>{name}</h3>
|
||||
<p className='line-clamp-1 hidden text-[12px] text-[var(--landing-text-muted)] leading-[150%] sm:block'>
|
||||
{description}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Animated arrow */}
|
||||
<ChevronArrow />
|
||||
</Link>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -4,12 +4,11 @@ import { useMemo, useState } from 'react'
|
||||
import { Input } from '@/components/emcn'
|
||||
import { blockTypeToIconMap } from '@/app/(landing)/integrations/data/icon-mapping'
|
||||
import type { Integration } from '@/app/(landing)/integrations/data/types'
|
||||
import { IntegrationCard } from './integration-card'
|
||||
import { IntegrationRow } from './integration-card'
|
||||
|
||||
const CATEGORY_LABELS: Record<string, string> = {
|
||||
ai: 'AI',
|
||||
analytics: 'Analytics',
|
||||
automation: 'Automation',
|
||||
communication: 'Communication',
|
||||
crm: 'CRM',
|
||||
'customer-support': 'Customer Support',
|
||||
@@ -21,12 +20,10 @@ const CATEGORY_LABELS: Record<string, string> = {
|
||||
email: 'Email',
|
||||
'file-storage': 'File Storage',
|
||||
hr: 'HR',
|
||||
media: 'Media',
|
||||
productivity: 'Productivity',
|
||||
'sales-intelligence': 'Sales Intelligence',
|
||||
sales: 'Sales',
|
||||
search: 'Search',
|
||||
security: 'Security',
|
||||
social: 'Social',
|
||||
other: 'Other',
|
||||
} as const
|
||||
|
||||
@@ -41,8 +38,10 @@ export function IntegrationGrid({ integrations }: IntegrationGridProps) {
|
||||
const availableCategories = useMemo(() => {
|
||||
const counts = new Map<string, number>()
|
||||
for (const i of integrations) {
|
||||
if (i.integrationType) {
|
||||
counts.set(i.integrationType, (counts.get(i.integrationType) || 0) + 1)
|
||||
if (i.integrationTypes) {
|
||||
for (const t of i.integrationTypes) {
|
||||
counts.set(t, (counts.get(t) || 0) + 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
return Array.from(counts.entries())
|
||||
@@ -54,7 +53,7 @@ export function IntegrationGrid({ integrations }: IntegrationGridProps) {
|
||||
let results = integrations
|
||||
|
||||
if (activeCategory) {
|
||||
results = results.filter((i) => i.integrationType === activeCategory)
|
||||
results = results.filter((i) => i.integrationTypes?.includes(activeCategory))
|
||||
}
|
||||
|
||||
const q = query.trim().toLowerCase()
|
||||
@@ -75,7 +74,7 @@ export function IntegrationGrid({ integrations }: IntegrationGridProps) {
|
||||
|
||||
return (
|
||||
<div>
|
||||
<div className='mb-6 flex flex-col gap-4 sm:flex-row sm:items-center'>
|
||||
<div className='mb-6 flex flex-col gap-4 px-6 sm:flex-row sm:items-center'>
|
||||
<div className='relative max-w-[480px] flex-1'>
|
||||
<svg
|
||||
aria-hidden='true'
|
||||
@@ -99,14 +98,14 @@ export function IntegrationGrid({ integrations }: IntegrationGridProps) {
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className='mb-8 flex flex-wrap gap-2'>
|
||||
<div className='mb-6 flex flex-wrap gap-2 px-6'>
|
||||
<button
|
||||
type='button'
|
||||
onClick={() => setActiveCategory(null)}
|
||||
className={`rounded-md border px-3 py-1 text-[12px] transition-colors ${
|
||||
className={`rounded-[5px] border px-[9px] py-0.5 text-[13.5px] transition-colors ${
|
||||
activeCategory === null
|
||||
? 'border-[#555] bg-[#333] text-[var(--landing-text)]'
|
||||
: 'border-[var(--landing-border)] bg-transparent text-[var(--landing-text-muted)] hover:border-[var(--landing-border-strong)] hover:text-[var(--landing-text)]'
|
||||
? 'border-[var(--landing-border-strong)] bg-[var(--landing-bg-elevated)] text-[var(--landing-text)]'
|
||||
: 'border-[var(--landing-border-strong)] text-[var(--landing-text)] hover:bg-[var(--landing-bg-elevated)]'
|
||||
}`}
|
||||
>
|
||||
All
|
||||
@@ -116,10 +115,10 @@ export function IntegrationGrid({ integrations }: IntegrationGridProps) {
|
||||
key={cat}
|
||||
type='button'
|
||||
onClick={() => setActiveCategory(activeCategory === cat ? null : cat)}
|
||||
className={`rounded-md border px-3 py-1 text-[12px] transition-colors ${
|
||||
className={`rounded-[5px] border px-[9px] py-0.5 text-[13.5px] transition-colors ${
|
||||
activeCategory === cat
|
||||
? 'border-[#555] bg-[#333] text-[var(--landing-text)]'
|
||||
: 'border-[var(--landing-border)] bg-transparent text-[var(--landing-text-muted)] hover:border-[var(--landing-border-strong)] hover:text-[var(--landing-text)]'
|
||||
? 'border-[var(--landing-border-strong)] bg-[var(--landing-bg-elevated)] text-[var(--landing-text)]'
|
||||
: 'border-[var(--landing-border-strong)] text-[var(--landing-text)] hover:bg-[var(--landing-bg-elevated)]'
|
||||
}`}
|
||||
>
|
||||
{CATEGORY_LABELS[cat] || cat}
|
||||
@@ -127,16 +126,18 @@ export function IntegrationGrid({ integrations }: IntegrationGridProps) {
|
||||
))}
|
||||
</div>
|
||||
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
{filtered.length === 0 ? (
|
||||
<p className='py-12 text-center text-[#555] text-[15px]'>
|
||||
<p className='py-12 text-center text-[15px] text-[var(--landing-text-subtle)]'>
|
||||
No integrations found
|
||||
{query ? <> for “{query}”</> : null}
|
||||
{activeCategory ? <> in {CATEGORY_LABELS[activeCategory] || activeCategory}</> : null}
|
||||
</p>
|
||||
) : (
|
||||
<div className='grid grid-cols-1 gap-4 sm:grid-cols-2 md:grid-cols-3 lg:grid-cols-4'>
|
||||
<div>
|
||||
{filtered.map((integration) => (
|
||||
<IntegrationCard
|
||||
<IntegrationRow
|
||||
key={integration.type}
|
||||
integration={integration}
|
||||
IconComponent={blockTypeToIconMap[integration.type]}
|
||||
|
||||
@@ -41,9 +41,7 @@ export function IntegrationIcon({
|
||||
{Icon ? (
|
||||
<Icon className={cn(iconClassName, 'text-white')} />
|
||||
) : (
|
||||
<span className={cn('font-[500] text-white leading-none', fallbackClassName)}>
|
||||
{name.charAt(0)}
|
||||
</span>
|
||||
<span className={cn('text-white leading-none', fallbackClassName)}>{name.charAt(0)}</span>
|
||||
)}
|
||||
</Tag>
|
||||
)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -34,6 +34,6 @@ export interface Integration {
|
||||
triggerCount: number
|
||||
authType: AuthType
|
||||
category: string
|
||||
integrationType?: string
|
||||
integrationTypes?: string[]
|
||||
tags?: string[]
|
||||
}
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import type { Metadata } from 'next'
|
||||
import { Badge } from '@/components/emcn'
|
||||
import { getBaseUrl } from '@/lib/core/utils/urls'
|
||||
import { IntegrationCard } from './components/integration-card'
|
||||
import { IntegrationGrid } from './components/integration-grid'
|
||||
import { RequestIntegrationModal } from './components/request-integration-modal'
|
||||
import { blockTypeToIconMap } from './data/icon-mapping'
|
||||
@@ -18,6 +20,14 @@ const TOP_NAMES = [...new Set(POPULAR_WORKFLOWS.flatMap((p) => [p.from, p.to]))]
|
||||
|
||||
const baseUrl = getBaseUrl()
|
||||
|
||||
/** Curated featured integrations — high-recognition services shown as cards. */
|
||||
const FEATURED_SLUGS = ['slack', 'notion', 'github', 'gmail'] as const
|
||||
|
||||
const bySlug = new Map(allIntegrations.map((i) => [i.slug, i]))
|
||||
const featured = FEATURED_SLUGS.map((s) => bySlug.get(s)).filter(
|
||||
(i): i is Integration => i !== undefined
|
||||
)
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: 'Integrations',
|
||||
description: `Connect ${INTEGRATION_COUNT}+ apps and services with Sim's AI workflow automation. Build intelligent pipelines with ${TOP_NAMES.join(', ')}, and more.`,
|
||||
@@ -90,7 +100,7 @@ export default function IntegrationsPage() {
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
<section className='bg-[var(--landing-bg)]'>
|
||||
<script
|
||||
type='application/ld+json'
|
||||
dangerouslySetInnerHTML={{ __html: JSON.stringify(breadcrumbJsonLd) }}
|
||||
@@ -100,64 +110,81 @@ export default function IntegrationsPage() {
|
||||
dangerouslySetInnerHTML={{ __html: JSON.stringify(itemListJsonLd) }}
|
||||
/>
|
||||
|
||||
<div className='mx-auto max-w-[1200px] px-6 py-16 sm:px-8 md:px-12'>
|
||||
{/* Hero */}
|
||||
<section aria-labelledby='integrations-heading' className='mb-16'>
|
||||
{/* Hero */}
|
||||
<div className='px-5 pt-[60px] lg:px-16 lg:pt-[100px]'>
|
||||
<Badge
|
||||
variant='blue'
|
||||
size='md'
|
||||
dot
|
||||
className='mb-5 bg-white/10 font-season text-white uppercase tracking-[0.02em]'
|
||||
>
|
||||
Integrations
|
||||
</Badge>
|
||||
|
||||
<div className='flex flex-col gap-4 xl:flex-row xl:items-end xl:justify-between'>
|
||||
<h1
|
||||
id='integrations-heading'
|
||||
className='mb-4 text-balance font-[500] text-[40px] text-[var(--landing-text)] leading-tight sm:text-[56px]'
|
||||
className='text-balance text-[28px] text-white leading-[100%] tracking-[-0.02em] lg:text-[40px]'
|
||||
>
|
||||
Integrations
|
||||
</h1>
|
||||
<p className='max-w-[640px] text-[18px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
<p className='font-[430] font-season text-[var(--landing-text-muted)] text-sm leading-[150%] tracking-[0.02em] lg:text-base'>
|
||||
Connect every tool your team uses. Build AI-powered workflows that automate tasks across{' '}
|
||||
{TOP_NAMES.slice(0, 4).map((name, i, arr) => {
|
||||
const integration = allIntegrations.find((int) => int.name === name)
|
||||
const Icon = integration ? blockTypeToIconMap[integration.type] : undefined
|
||||
return (
|
||||
<span key={name} className='inline-flex items-center gap-[5px]'>
|
||||
{Icon && (
|
||||
<span
|
||||
aria-hidden='true'
|
||||
className='inline-flex shrink-0'
|
||||
style={{ opacity: 0.65 }}
|
||||
>
|
||||
<Icon className='h-[0.85em] w-[0.85em]' />
|
||||
</span>
|
||||
)}
|
||||
{name}
|
||||
{i < arr.length - 1 ? ', ' : ''}
|
||||
</span>
|
||||
)
|
||||
})}
|
||||
{' and more.'}
|
||||
{INTEGRATION_COUNT} apps and services.
|
||||
</p>
|
||||
</section>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Searchable grid — client component */}
|
||||
{/* Full-width divider */}
|
||||
<div className='mt-8 h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
{/* Border-railed content */}
|
||||
<div className='mx-5 border-[var(--landing-bg-elevated)] border-x lg:mx-16'>
|
||||
{/* Featured integrations — top */}
|
||||
{featured.length > 0 && (
|
||||
<>
|
||||
<nav aria-label='Featured integrations' className='flex flex-col sm:flex-row'>
|
||||
{featured.map((integration) => (
|
||||
<IntegrationCard
|
||||
key={integration.type}
|
||||
integration={integration}
|
||||
IconComponent={blockTypeToIconMap[integration.type]}
|
||||
/>
|
||||
))}
|
||||
</nav>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</>
|
||||
)}
|
||||
|
||||
{/* All Integrations — search, filters, rows */}
|
||||
<section aria-labelledby='all-integrations-heading'>
|
||||
<h2
|
||||
id='all-integrations-heading'
|
||||
className='mb-8 font-[500] text-[24px] text-[var(--landing-text)]'
|
||||
>
|
||||
All Integrations
|
||||
</h2>
|
||||
<div className='px-6 pt-10 pb-4'>
|
||||
<h2
|
||||
id='all-integrations-heading'
|
||||
className='mb-2 text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'
|
||||
>
|
||||
All Integrations
|
||||
</h2>
|
||||
</div>
|
||||
<IntegrationGrid integrations={allIntegrations} />
|
||||
</section>
|
||||
|
||||
{/* Integration request */}
|
||||
<div className='mt-16 flex flex-col items-start gap-3 border-[var(--landing-border)] border-t pt-10 sm:flex-row sm:items-center sm:justify-between'>
|
||||
<div className='flex flex-col items-start gap-3 px-6 py-6 sm:flex-row sm:items-center sm:justify-between'>
|
||||
<div>
|
||||
<p className='font-[500] text-[15px] text-[var(--landing-text)]'>
|
||||
<p className='text-[15px] text-white tracking-[-0.02em]'>
|
||||
Don't see the integration you need?
|
||||
</p>
|
||||
<p className='mt-0.5 text-[#555] text-[13px]'>
|
||||
<p className='mt-0.5 font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
|
||||
Let us know and we'll prioritize it.
|
||||
</p>
|
||||
</div>
|
||||
<RequestIntegrationModal />
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
|
||||
{/* Closing full-width divider */}
|
||||
<div className='-mt-px h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</section>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -3,14 +3,7 @@ import Link from 'next/link'
|
||||
import { notFound } from 'next/navigation'
|
||||
import { getBaseUrl } from '@/lib/core/utils/urls'
|
||||
import { LandingFAQ } from '@/app/(landing)/components/landing-faq'
|
||||
import {
|
||||
Breadcrumbs,
|
||||
CapabilityTags,
|
||||
DetailItem,
|
||||
ModelCard,
|
||||
ProviderIcon,
|
||||
StatCard,
|
||||
} from '@/app/(landing)/models/components/model-primitives'
|
||||
import { FeaturedModelCard, ProviderIcon } from '@/app/(landing)/models/components/model-primitives'
|
||||
import {
|
||||
ALL_CATALOG_MODELS,
|
||||
buildModelCapabilityFacts,
|
||||
@@ -165,66 +158,88 @@ export default async function ModelPage({
|
||||
dangerouslySetInnerHTML={{ __html: JSON.stringify(faqJsonLd) }}
|
||||
/>
|
||||
|
||||
<div className='mx-auto max-w-[1280px] px-6 py-12 sm:px-8 md:px-12'>
|
||||
<Breadcrumbs
|
||||
items={[
|
||||
{ label: 'Home', href: '/' },
|
||||
{ label: 'Models', href: '/models' },
|
||||
{ label: provider.name, href: provider.href },
|
||||
{ label: model.displayName },
|
||||
]}
|
||||
/>
|
||||
<section className='bg-[var(--landing-bg)]'>
|
||||
<div className='px-5 pt-[60px] lg:px-16 lg:pt-[100px]'>
|
||||
<div className='mb-6'>
|
||||
<Link
|
||||
href={provider.href}
|
||||
className='group/link inline-flex items-center gap-1.5 font-season text-[var(--landing-text-muted)] text-sm tracking-[0.02em] hover:text-[var(--landing-text)]'
|
||||
>
|
||||
<svg
|
||||
className='h-3 w-3 shrink-0'
|
||||
viewBox='0 0 10 10'
|
||||
fill='none'
|
||||
xmlns='http://www.w3.org/2000/svg'
|
||||
>
|
||||
<line
|
||||
x1='1'
|
||||
y1='5'
|
||||
x2='10'
|
||||
y2='5'
|
||||
stroke='currentColor'
|
||||
strokeWidth='1.33'
|
||||
strokeLinecap='square'
|
||||
className='origin-right scale-x-0 transition-transform duration-200 ease-out [transform-box:fill-box] group-hover/link:scale-x-100'
|
||||
/>
|
||||
<path
|
||||
d='M6.5 2L3.5 5L6.5 8'
|
||||
stroke='currentColor'
|
||||
strokeWidth='1.33'
|
||||
strokeLinecap='square'
|
||||
strokeLinejoin='miter'
|
||||
fill='none'
|
||||
className='group-hover/link:-translate-x-[30%] transition-transform duration-200 ease-out'
|
||||
/>
|
||||
</svg>
|
||||
Back to {provider.name}
|
||||
</Link>
|
||||
</div>
|
||||
|
||||
<section aria-labelledby='model-heading' className='mb-14'>
|
||||
<div className='mb-6 flex items-start gap-4'>
|
||||
<div className='mb-6 flex items-center gap-5'>
|
||||
<ProviderIcon
|
||||
provider={provider}
|
||||
className='h-16 w-16 rounded-3xl'
|
||||
className='h-16 w-16 rounded-[5px]'
|
||||
iconClassName='h-8 w-8'
|
||||
/>
|
||||
<div className='min-w-0'>
|
||||
<p className='text-[12px] text-[var(--landing-text-muted)] uppercase tracking-[0.12em]'>
|
||||
<div>
|
||||
<p className='mb-0.5 font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
|
||||
{provider.name} model
|
||||
</p>
|
||||
<h1
|
||||
id='model-heading'
|
||||
className='font-[500] text-[38px] text-[var(--landing-text)] leading-tight sm:text-[48px]'
|
||||
className='text-[28px] text-white leading-[100%] tracking-[-0.02em] sm:text-[36px] lg:text-[44px]'
|
||||
>
|
||||
{model.displayName}
|
||||
</h1>
|
||||
<p className='mt-2 break-all text-[13px] text-[var(--landing-text-muted)]'>
|
||||
Model ID: {model.id}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p className='max-w-[820px] text-[17px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
<p className='mb-8 max-w-[700px] text-[var(--landing-text-body)] text-base leading-[150%] tracking-[0.02em]'>
|
||||
{model.summary}
|
||||
{model.bestFor ? ` ${model.bestFor}` : ''}
|
||||
</p>
|
||||
|
||||
<div className='mt-8 flex flex-wrap gap-3'>
|
||||
<Link
|
||||
href={provider.href}
|
||||
className='inline-flex h-[34px] items-center rounded-[6px] border border-[var(--landing-border-strong)] px-3 font-[430] text-[14px] text-[var(--landing-text)] transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
Explore {provider.name} models
|
||||
</Link>
|
||||
<div className='flex flex-wrap gap-2'>
|
||||
<a
|
||||
href='https://sim.ai'
|
||||
className='inline-flex h-[34px] items-center rounded-[6px] border border-[var(--white)] bg-[var(--white)] px-3 font-[430] text-[14px] text-[var(--landing-text-dark)] transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
|
||||
className='inline-flex h-[32px] items-center gap-2 rounded-[5px] border border-white bg-white px-2.5 font-season text-black text-sm transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
|
||||
>
|
||||
Build with this model
|
||||
</a>
|
||||
<Link
|
||||
href={provider.href}
|
||||
className='inline-flex h-[32px] items-center rounded-[5px] border border-[var(--landing-border-strong)] px-2.5 font-season text-[var(--landing-text)] text-sm transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
All {provider.name} models
|
||||
</Link>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
|
||||
<section
|
||||
aria-label='Model stats'
|
||||
className='mb-16 grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-4'
|
||||
>
|
||||
<StatCard label='Input price' value={`${formatPrice(model.pricing.input)}/1M`} />
|
||||
<StatCard
|
||||
<div className='mt-8 h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
<div className='mx-5 border-[var(--landing-bg-elevated)] border-x lg:mx-16'>
|
||||
<InfoRow label='Input price' value={`${formatPrice(model.pricing.input)}/1M`} />
|
||||
<InfoRow
|
||||
label='Cached input'
|
||||
value={
|
||||
model.pricing.cachedInput !== undefined
|
||||
@@ -232,158 +247,72 @@ export default async function ModelPage({
|
||||
: 'N/A'
|
||||
}
|
||||
/>
|
||||
<StatCard label='Output price' value={`${formatPrice(model.pricing.output)}/1M`} />
|
||||
<StatCard
|
||||
<InfoRow label='Output price' value={`${formatPrice(model.pricing.output)}/1M`} />
|
||||
<InfoRow
|
||||
label='Context window'
|
||||
value={model.contextWindow ? formatTokenCount(model.contextWindow) : 'Unknown'}
|
||||
/>
|
||||
</section>
|
||||
<InfoRow
|
||||
label='Max output'
|
||||
value={
|
||||
model.capabilities.maxOutputTokens
|
||||
? `${formatTokenCount(getEffectiveMaxOutputTokens(model.capabilities))} tokens`
|
||||
: 'Not published'
|
||||
}
|
||||
/>
|
||||
<InfoRow label='Provider' value={provider.name} />
|
||||
<InfoRow label='Updated' value={formatUpdatedAt(model.pricing.updatedAt)} />
|
||||
{model.bestFor ? <InfoRow label='Best for' value={model.bestFor} /> : null}
|
||||
|
||||
<div className='grid grid-cols-1 gap-16 lg:grid-cols-[1fr_320px]'>
|
||||
<div className='min-w-0 space-y-16'>
|
||||
<section aria-labelledby='pricing-heading'>
|
||||
<h2
|
||||
id='pricing-heading'
|
||||
className='mb-2 font-[500] text-[28px] text-[var(--landing-text)]'
|
||||
>
|
||||
Pricing and limits
|
||||
</h2>
|
||||
<p className='mb-6 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
Pricing below is generated directly from the provider registry in Sim. All amounts
|
||||
are listed per one million tokens.
|
||||
</p>
|
||||
{capabilityFacts.length > 0 && (
|
||||
<>
|
||||
{capabilityFacts.map((item) => (
|
||||
<InfoRow key={item.label} label={item.label} value={item.value} />
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
|
||||
<div className='grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-4'>
|
||||
<DetailItem label='Input price' value={`${formatPrice(model.pricing.input)}/1M`} />
|
||||
<DetailItem
|
||||
label='Cached input'
|
||||
value={
|
||||
model.pricing.cachedInput !== undefined
|
||||
? `${formatPrice(model.pricing.cachedInput)}/1M`
|
||||
: 'N/A'
|
||||
}
|
||||
/>
|
||||
<DetailItem
|
||||
label='Output price'
|
||||
value={`${formatPrice(model.pricing.output)}/1M`}
|
||||
/>
|
||||
<DetailItem label='Updated' value={formatUpdatedAt(model.pricing.updatedAt)} />
|
||||
<DetailItem
|
||||
label='Context window'
|
||||
value={
|
||||
model.contextWindow
|
||||
? `${formatTokenCount(model.contextWindow)} tokens`
|
||||
: 'Unknown'
|
||||
}
|
||||
/>
|
||||
<DetailItem
|
||||
label='Max output'
|
||||
value={
|
||||
model.capabilities.maxOutputTokens
|
||||
? `${formatTokenCount(getEffectiveMaxOutputTokens(model.capabilities))} tokens`
|
||||
: 'Not published'
|
||||
}
|
||||
/>
|
||||
<DetailItem label='Provider' value={provider.name} />
|
||||
{model.bestFor ? <DetailItem label='Best for' value={model.bestFor} /> : null}
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section aria-labelledby='capabilities-heading'>
|
||||
<h2
|
||||
id='capabilities-heading'
|
||||
className='mb-2 font-[500] text-[28px] text-[var(--landing-text)]'
|
||||
>
|
||||
Capabilities
|
||||
</h2>
|
||||
<p className='mb-6 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
These capability flags are generated from the provider and model definitions tracked
|
||||
in Sim.
|
||||
</p>
|
||||
<CapabilityTags tags={model.capabilityTags} />
|
||||
<div className='mt-8 grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-3'>
|
||||
{capabilityFacts.map((item) => (
|
||||
<DetailItem key={item.label} label={item.label} value={item.value} />
|
||||
{relatedModels.length > 0 && (
|
||||
<>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
<nav aria-label='Related models' className='flex flex-col sm:flex-row'>
|
||||
{relatedModels.slice(0, 3).map((entry) => (
|
||||
<FeaturedModelCard key={entry.id} provider={provider} model={entry} />
|
||||
))}
|
||||
</div>
|
||||
</section>
|
||||
</nav>
|
||||
</>
|
||||
)}
|
||||
|
||||
{relatedModels.length > 0 && (
|
||||
<section aria-labelledby='related-models-heading'>
|
||||
<h2
|
||||
id='related-models-heading'
|
||||
className='mb-2 font-[500] text-[28px] text-[var(--landing-text)]'
|
||||
>
|
||||
Related {provider.name} models
|
||||
</h2>
|
||||
<p className='mb-8 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
Browse comparable models from the same provider to compare pricing, context
|
||||
window, and capability coverage.
|
||||
</p>
|
||||
<div className='grid grid-cols-1 gap-4 xl:grid-cols-2'>
|
||||
{relatedModels.map((entry) => (
|
||||
<ModelCard key={entry.id} provider={provider} model={entry} />
|
||||
))}
|
||||
</div>
|
||||
</section>
|
||||
)}
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
<section
|
||||
aria-labelledby='model-faq-heading'
|
||||
className='rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-6 sm:p-8'
|
||||
<section aria-labelledby='model-faq-heading' className='px-6 py-10'>
|
||||
<h2
|
||||
id='model-faq-heading'
|
||||
className='mb-8 text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'
|
||||
>
|
||||
<h2
|
||||
id='model-faq-heading'
|
||||
className='font-[500] text-[28px] text-[var(--landing-text)]'
|
||||
>
|
||||
Frequently asked questions
|
||||
</h2>
|
||||
<div className='mt-3'>
|
||||
<LandingFAQ faqs={faqs} />
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
|
||||
<aside className='space-y-5' aria-label='Model details'>
|
||||
<div className='rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-5'>
|
||||
<h2 className='mb-4 font-[500] text-[16px] text-[var(--landing-text)]'>
|
||||
Quick details
|
||||
</h2>
|
||||
<div className='space-y-3'>
|
||||
<DetailItem label='Display name' value={model.displayName} />
|
||||
<DetailItem label='Provider' value={provider.name} />
|
||||
<DetailItem
|
||||
label='Context tracked'
|
||||
value={model.contextWindow ? 'Yes' : 'Partial'}
|
||||
/>
|
||||
<DetailItem
|
||||
label='Pricing updated'
|
||||
value={formatUpdatedAt(model.pricing.updatedAt)}
|
||||
/>
|
||||
</div>
|
||||
Frequently asked questions
|
||||
</h2>
|
||||
<div>
|
||||
<LandingFAQ faqs={faqs} />
|
||||
</div>
|
||||
|
||||
<div className='rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-5'>
|
||||
<h2 className='mb-4 font-[500] text-[16px] text-[var(--landing-text)]'>
|
||||
Browse more
|
||||
</h2>
|
||||
<div className='space-y-2'>
|
||||
<Link
|
||||
href={provider.href}
|
||||
className='block rounded-xl px-3 py-2 text-[14px] text-[var(--landing-text-muted)] transition-colors hover:bg-[var(--landing-bg-elevated)] hover:text-[var(--landing-text)]'
|
||||
>
|
||||
All {provider.name} models
|
||||
</Link>
|
||||
<Link
|
||||
href='/models'
|
||||
className='block rounded-xl px-3 py-2 text-[14px] text-[var(--landing-text-muted)] transition-colors hover:bg-[var(--landing-bg-elevated)] hover:text-[var(--landing-text)]'
|
||||
>
|
||||
Full models directory
|
||||
</Link>
|
||||
</div>
|
||||
</div>
|
||||
</aside>
|
||||
</section>
|
||||
</div>
|
||||
|
||||
<div className='-mt-px h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</section>
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
function InfoRow({ label, value }: { label: string; value: string }) {
|
||||
return (
|
||||
<>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
<div className='flex items-baseline justify-between gap-4 px-6 py-4'>
|
||||
<span className='font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
|
||||
{label}
|
||||
</span>
|
||||
<span className='text-right text-[14px] text-white leading-snug'>{value}</span>
|
||||
</div>
|
||||
</>
|
||||
)
|
||||
|
||||
@@ -1,19 +1,21 @@
|
||||
import type { Metadata } from 'next'
|
||||
import Link from 'next/link'
|
||||
import { notFound } from 'next/navigation'
|
||||
import { Badge } from '@/components/emcn'
|
||||
import { getBaseUrl } from '@/lib/core/utils/urls'
|
||||
import { LandingFAQ } from '@/app/(landing)/components/landing-faq'
|
||||
import {
|
||||
Breadcrumbs,
|
||||
CapabilityTags,
|
||||
ModelCard,
|
||||
ProviderCard,
|
||||
ChevronArrow,
|
||||
FeaturedModelCard,
|
||||
FeaturedProviderCard,
|
||||
ProviderIcon,
|
||||
StatCard,
|
||||
} from '@/app/(landing)/models/components/model-primitives'
|
||||
import { ModelTimelineChart } from '@/app/(landing)/models/components/model-timeline-chart'
|
||||
import {
|
||||
buildProviderFaqs,
|
||||
formatPrice,
|
||||
formatTokenCount,
|
||||
getProviderBySlug,
|
||||
getProviderCapabilitySummary,
|
||||
MODEL_PROVIDERS_WITH_CATALOGS,
|
||||
TOP_MODEL_PROVIDERS,
|
||||
} from '@/app/(landing)/models/utils'
|
||||
@@ -95,7 +97,6 @@ export default async function ProviderModelsPage({
|
||||
}
|
||||
|
||||
const faqs = buildProviderFaqs(provider)
|
||||
const capabilitySummary = getProviderCapabilitySummary(provider)
|
||||
const relatedProviders = MODEL_PROVIDERS_WITH_CATALOGS.filter(
|
||||
(entry) => entry.id !== provider.id && TOP_MODEL_PROVIDERS.includes(entry.name)
|
||||
).slice(0, 4)
|
||||
@@ -153,142 +154,149 @@ export default async function ProviderModelsPage({
|
||||
dangerouslySetInnerHTML={{ __html: JSON.stringify(faqJsonLd) }}
|
||||
/>
|
||||
|
||||
<div className='mx-auto max-w-[1280px] px-6 py-12 sm:px-8 md:px-12'>
|
||||
<Breadcrumbs
|
||||
items={[
|
||||
{ label: 'Home', href: '/' },
|
||||
{ label: 'Models', href: '/models' },
|
||||
{ label: provider.name },
|
||||
]}
|
||||
/>
|
||||
<section className='bg-[var(--landing-bg)]'>
|
||||
<div className='px-5 pt-[60px] lg:px-16 lg:pt-[100px]'>
|
||||
<div className='mb-6'>
|
||||
<Link
|
||||
href='/models'
|
||||
className='group/link inline-flex items-center gap-1.5 font-season text-[var(--landing-text-muted)] text-sm tracking-[0.02em] hover:text-[var(--landing-text)]'
|
||||
>
|
||||
<svg
|
||||
className='h-3 w-3 shrink-0'
|
||||
viewBox='0 0 10 10'
|
||||
fill='none'
|
||||
xmlns='http://www.w3.org/2000/svg'
|
||||
>
|
||||
<line
|
||||
x1='1'
|
||||
y1='5'
|
||||
x2='10'
|
||||
y2='5'
|
||||
stroke='currentColor'
|
||||
strokeWidth='1.33'
|
||||
strokeLinecap='square'
|
||||
className='origin-right scale-x-0 transition-transform duration-200 ease-out [transform-box:fill-box] group-hover/link:scale-x-100'
|
||||
/>
|
||||
<path
|
||||
d='M6.5 2L3.5 5L6.5 8'
|
||||
stroke='currentColor'
|
||||
strokeWidth='1.33'
|
||||
strokeLinecap='square'
|
||||
strokeLinejoin='miter'
|
||||
fill='none'
|
||||
className='group-hover/link:-translate-x-[30%] transition-transform duration-200 ease-out'
|
||||
/>
|
||||
</svg>
|
||||
Back to Models
|
||||
</Link>
|
||||
</div>
|
||||
|
||||
<section aria-labelledby='provider-heading' className='mb-14'>
|
||||
<div className='mb-6 flex items-center gap-4'>
|
||||
<ProviderIcon
|
||||
provider={provider}
|
||||
className='h-16 w-16 rounded-3xl'
|
||||
iconClassName='h-8 w-8'
|
||||
/>
|
||||
<div>
|
||||
<p className='text-[12px] text-[var(--landing-text-muted)] uppercase tracking-[0.12em]'>
|
||||
Provider
|
||||
</p>
|
||||
<Badge
|
||||
variant='blue'
|
||||
size='md'
|
||||
dot
|
||||
className='mb-5 bg-white/10 font-season text-white uppercase tracking-[0.02em]'
|
||||
>
|
||||
Provider
|
||||
</Badge>
|
||||
|
||||
<div className='flex flex-col gap-4 lg:flex-row lg:items-end lg:justify-between'>
|
||||
<div className='flex items-center gap-4'>
|
||||
<ProviderIcon
|
||||
provider={provider}
|
||||
className='h-12 w-12 rounded-[5px]'
|
||||
iconClassName='h-6 w-6'
|
||||
/>
|
||||
<h1
|
||||
id='provider-heading'
|
||||
className='font-[500] text-[38px] text-[var(--landing-text)] leading-tight sm:text-[48px]'
|
||||
className='font-[430] font-season text-[28px] text-white leading-[100%] tracking-[-0.02em] lg:text-[40px]'
|
||||
>
|
||||
{provider.name} models
|
||||
</h1>
|
||||
</div>
|
||||
<span className='shrink-0 font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
|
||||
{provider.modelCount} models
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p className='max-w-[820px] text-[17px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
{provider.summary} Browse every {provider.name} model page generated from Sim's
|
||||
provider registry with human-readable names, pricing, context windows, and capability
|
||||
metadata.
|
||||
</p>
|
||||
<div className='mt-8 h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
<div className='mt-8 grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-4'>
|
||||
<StatCard label='Models tracked' value={provider.modelCount.toString()} />
|
||||
<StatCard
|
||||
label='Default model'
|
||||
value={provider.defaultModelDisplayName || 'Dynamic'}
|
||||
compact
|
||||
/>
|
||||
<StatCard
|
||||
label='Metadata coverage'
|
||||
value={provider.contextInformationAvailable ? 'Tracked' : 'Partial'}
|
||||
compact
|
||||
/>
|
||||
<StatCard
|
||||
label='Featured models'
|
||||
value={provider.featuredModels.length.toString()}
|
||||
compact
|
||||
/>
|
||||
</div>
|
||||
<div className='mx-5 border-[var(--landing-bg-elevated)] border-x lg:mx-16'>
|
||||
{provider.featuredModels.length > 0 && (
|
||||
<>
|
||||
<nav aria-label='Featured models' className='flex flex-col sm:flex-row'>
|
||||
{provider.featuredModels.slice(0, 3).map((model) => (
|
||||
<FeaturedModelCard key={model.id} provider={provider} model={model} />
|
||||
))}
|
||||
</nav>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</>
|
||||
)}
|
||||
|
||||
<div className='mt-6'>
|
||||
<CapabilityTags tags={provider.providerCapabilityTags} />
|
||||
</div>
|
||||
</section>
|
||||
<ModelTimelineChart models={provider.models} providerId={provider.id} />
|
||||
|
||||
<section aria-labelledby='provider-models-heading' className='mb-16'>
|
||||
<h2
|
||||
id='provider-models-heading'
|
||||
className='mb-2 font-[500] text-[28px] text-[var(--landing-text)]'
|
||||
>
|
||||
All {provider.name} models
|
||||
</h2>
|
||||
<p className='mb-8 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
Every model below links to a dedicated SEO page with exact pricing, context window,
|
||||
capability support, and related model recommendations.
|
||||
</p>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
<div className='grid grid-cols-1 gap-4 xl:grid-cols-2'>
|
||||
{provider.models.map((model) => (
|
||||
<ModelCard key={model.id} provider={provider} model={model} />
|
||||
))}
|
||||
</div>
|
||||
</section>
|
||||
{provider.models.map((model) => (
|
||||
<div key={model.id}>
|
||||
<Link
|
||||
href={model.href}
|
||||
className='group/link flex items-center gap-4 px-6 py-4 transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
<div className='flex min-w-0 flex-1 flex-col gap-0.5'>
|
||||
<h3 className='text-[14px] text-white leading-snug tracking-[-0.02em]'>
|
||||
{model.displayName}
|
||||
</h3>
|
||||
<p className='line-clamp-1 hidden text-[12px] text-[var(--landing-text-muted)] leading-[150%] sm:block'>
|
||||
{model.id}
|
||||
</p>
|
||||
</div>
|
||||
<span className='hidden shrink-0 font-martian-mono text-[11px] text-[var(--landing-text-muted)] uppercase tracking-[0.1em] md:block'>
|
||||
{formatPrice(model.pricing.input)}/1M in
|
||||
</span>
|
||||
<span className='hidden shrink-0 font-martian-mono text-[11px] text-[var(--landing-text-muted)] uppercase tracking-[0.1em] md:block'>
|
||||
{formatPrice(model.pricing.output)}/1M out
|
||||
</span>
|
||||
{model.contextWindow ? (
|
||||
<span className='hidden shrink-0 font-martian-mono text-[11px] text-[var(--landing-text-muted)] uppercase tracking-[0.1em] lg:block'>
|
||||
{formatTokenCount(model.contextWindow)} ctx
|
||||
</span>
|
||||
) : null}
|
||||
<ChevronArrow />
|
||||
</Link>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</div>
|
||||
))}
|
||||
|
||||
<section
|
||||
aria-labelledby='lineup-snapshot-heading'
|
||||
className='mb-16 rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-6 sm:p-8'
|
||||
>
|
||||
<h2
|
||||
id='lineup-snapshot-heading'
|
||||
className='mb-2 font-[500] text-[28px] text-[var(--landing-text)]'
|
||||
>
|
||||
Lineup snapshot
|
||||
</h2>
|
||||
<p className='mb-8 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
A quick view of the strongest differentiators in the {provider.name} model lineup based
|
||||
on the metadata currently tracked in Sim.
|
||||
</p>
|
||||
{relatedProviders.length > 0 && (
|
||||
<>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
<nav aria-label='Related providers' className='flex flex-col sm:flex-row'>
|
||||
{relatedProviders.map((entry) => (
|
||||
<FeaturedProviderCard key={entry.id} provider={entry} />
|
||||
))}
|
||||
</nav>
|
||||
</>
|
||||
)}
|
||||
|
||||
<div className='grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-3'>
|
||||
{capabilitySummary.map((item) => (
|
||||
<StatCard key={item.label} label={item.label} value={item.value} compact />
|
||||
))}
|
||||
</div>
|
||||
</section>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
{relatedProviders.length > 0 && (
|
||||
<section aria-labelledby='related-providers-heading' className='mb-16'>
|
||||
<section aria-labelledby='provider-faq-heading' className='px-6 py-10'>
|
||||
<h2
|
||||
id='related-providers-heading'
|
||||
className='mb-2 font-[500] text-[28px] text-[var(--landing-text)]'
|
||||
id='provider-faq-heading'
|
||||
className='mb-8 text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'
|
||||
>
|
||||
Compare with other providers
|
||||
Frequently asked questions
|
||||
</h2>
|
||||
<p className='mb-8 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
Explore similar provider hubs to compare model lineups, pricing surfaces, and
|
||||
long-context coverage across the broader AI ecosystem.
|
||||
</p>
|
||||
|
||||
<div className='grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-4'>
|
||||
{relatedProviders.map((entry) => (
|
||||
<ProviderCard key={entry.id} provider={entry} />
|
||||
))}
|
||||
<div>
|
||||
<LandingFAQ faqs={faqs} />
|
||||
</div>
|
||||
</section>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<section
|
||||
aria-labelledby='provider-faq-heading'
|
||||
className='rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-6 sm:p-8'
|
||||
>
|
||||
<h2
|
||||
id='provider-faq-heading'
|
||||
className='font-[500] text-[28px] text-[var(--landing-text)]'
|
||||
>
|
||||
Frequently asked questions
|
||||
</h2>
|
||||
<div className='mt-3'>
|
||||
<LandingFAQ faqs={faqs} />
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<div className='-mt-px h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</section>
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
9
apps/sim/app/(landing)/models/components/consts.ts
Normal file
9
apps/sim/app/(landing)/models/components/consts.ts
Normal file
@@ -0,0 +1,9 @@
|
||||
import { MODEL_CATALOG_PROVIDERS } from '@/app/(landing)/models/utils'
|
||||
|
||||
const colorMap = new Map(
|
||||
MODEL_CATALOG_PROVIDERS.filter((p) => p.color).map((p) => [p.id, p.color as string])
|
||||
)
|
||||
|
||||
export function getProviderColor(providerId: string): string {
|
||||
return colorMap.get(providerId) ?? '#888888'
|
||||
}
|
||||
@@ -0,0 +1,245 @@
|
||||
'use client'
|
||||
|
||||
import type { ComponentType } from 'react'
|
||||
import { useMemo } from 'react'
|
||||
import Link from 'next/link'
|
||||
import { getProviderColor } from '@/app/(landing)/models/components/consts'
|
||||
import type { CatalogModel } from '@/app/(landing)/models/utils'
|
||||
import {
|
||||
formatPrice,
|
||||
formatTokenCount,
|
||||
MODEL_CATALOG_PROVIDERS,
|
||||
} from '@/app/(landing)/models/utils'
|
||||
|
||||
/** Providers that host other providers' models — deprioritized to avoid duplicates. */
|
||||
const RESELLER_PROVIDERS = new Set(
|
||||
MODEL_CATALOG_PROVIDERS.filter((p) => p.isReseller).map((p) => p.id)
|
||||
)
|
||||
|
||||
const PROVIDER_ICON_MAP: Record<string, ComponentType<{ className?: string }>> = (() => {
|
||||
const map: Record<string, ComponentType<{ className?: string }>> = {}
|
||||
for (const provider of MODEL_CATALOG_PROVIDERS) {
|
||||
if (provider.icon) {
|
||||
map[provider.id] = provider.icon
|
||||
}
|
||||
}
|
||||
return map
|
||||
})()
|
||||
|
||||
function selectComparisonModels(models: CatalogModel[]): CatalogModel[] {
|
||||
const seen = new Set<string>()
|
||||
const result: CatalogModel[] = []
|
||||
|
||||
const sorted = [...models].sort((a, b) => {
|
||||
const score = (m: CatalogModel) => {
|
||||
const reseller = RESELLER_PROVIDERS.has(m.providerId) ? -50 : 0
|
||||
const reasoning = m.capabilities.reasoningEffort || m.capabilities.thinking ? 10 : 0
|
||||
const context = (m.contextWindow ?? 0) / 100000
|
||||
return reseller + reasoning + context
|
||||
}
|
||||
return score(b) - score(a)
|
||||
})
|
||||
|
||||
for (const model of sorted) {
|
||||
if (result.length >= 10) break
|
||||
const nameKey = model.displayName.toLowerCase()
|
||||
if (seen.has(nameKey)) continue
|
||||
seen.add(nameKey)
|
||||
result.push(model)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
interface ModelLabelProps {
|
||||
model: CatalogModel
|
||||
}
|
||||
|
||||
function ModelLabel({ model }: ModelLabelProps) {
|
||||
const Icon = PROVIDER_ICON_MAP[model.providerId]
|
||||
|
||||
return (
|
||||
<div className='flex w-[140px] shrink-0 items-center justify-end gap-1.5 sm:w-[180px]'>
|
||||
{Icon && <Icon className='h-3.5 w-3.5 shrink-0' />}
|
||||
<span className='truncate font-medium text-[13px] text-[var(--landing-text)] leading-none tracking-[-0.01em]'>
|
||||
{model.displayName}
|
||||
</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
interface ChartProps {
|
||||
models: CatalogModel[]
|
||||
}
|
||||
|
||||
function StackedCostChart({ models }: ChartProps) {
|
||||
const data = useMemo(() => {
|
||||
const entries = models
|
||||
.map((model) => ({
|
||||
model,
|
||||
input: model.pricing.input,
|
||||
output: model.pricing.output,
|
||||
total: model.pricing.input + model.pricing.output,
|
||||
}))
|
||||
.filter((e) => e.total > 0)
|
||||
.sort((a, b) => a.total - b.total)
|
||||
|
||||
const maxTotal = entries.length > 0 ? Math.max(...entries.map((e) => e.total)) : 0
|
||||
return { entries, maxTotal }
|
||||
}, [models])
|
||||
|
||||
if (data.entries.length === 0) return null
|
||||
|
||||
return (
|
||||
<div className='flex flex-col gap-3'>
|
||||
<div className='flex flex-col gap-1'>
|
||||
<h3 className='text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'>
|
||||
Cost
|
||||
</h3>
|
||||
<span className='font-[430] font-season text-[var(--landing-text-muted)] text-sm leading-[150%] tracking-[0.02em]'>
|
||||
Per 1M tokens
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div className='flex flex-col gap-1.5'>
|
||||
{data.entries.map(({ model, input, output, total }) => {
|
||||
const totalPct = data.maxTotal > 0 ? (total / data.maxTotal) * 100 : 0
|
||||
const inputPct = total > 0 ? (input / total) * 100 : 0
|
||||
const color = getProviderColor(model.providerId)
|
||||
|
||||
return (
|
||||
<Link
|
||||
key={model.id}
|
||||
href={model.href}
|
||||
className='-mx-2 flex items-center gap-3 rounded-md px-2 transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
<ModelLabel model={model} />
|
||||
<div className='relative flex h-7 min-w-0 flex-1 items-center'>
|
||||
<div
|
||||
className='flex h-full overflow-hidden rounded-r-[3px]'
|
||||
style={{ width: `${Math.max(totalPct, 3)}%` }}
|
||||
>
|
||||
<div
|
||||
className='h-full'
|
||||
style={{
|
||||
width: `${inputPct}%`,
|
||||
backgroundColor: color,
|
||||
opacity: 0.8,
|
||||
}}
|
||||
/>
|
||||
<div
|
||||
className='h-full'
|
||||
style={{
|
||||
width: `${100 - inputPct}%`,
|
||||
backgroundColor: color,
|
||||
opacity: 0.35,
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<span className='ml-2.5 shrink-0 font-mono text-[var(--landing-text-muted)] text-xs'>
|
||||
{formatPrice(input)} input / {formatPrice(output)} output
|
||||
</span>
|
||||
</div>
|
||||
</Link>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function ContextWindowChart({ models }: ChartProps) {
|
||||
const data = useMemo(() => {
|
||||
const entries = models
|
||||
.map((model) => ({
|
||||
model,
|
||||
value: model.contextWindow,
|
||||
}))
|
||||
.filter((e): e is { model: CatalogModel; value: number } => e.value !== null && e.value > 0)
|
||||
.sort((a, b) => a.value - b.value)
|
||||
|
||||
const maxValue = entries.length > 0 ? Math.max(...entries.map((e) => e.value)) : 0
|
||||
return { entries, maxValue }
|
||||
}, [models])
|
||||
|
||||
if (data.entries.length === 0) return null
|
||||
|
||||
return (
|
||||
<div className='flex flex-col gap-3'>
|
||||
<div className='flex flex-col gap-1'>
|
||||
<h3 className='text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'>
|
||||
Context window
|
||||
</h3>
|
||||
<span className='font-[430] font-season text-[var(--landing-text-muted)] text-sm leading-[150%] tracking-[0.02em]'>
|
||||
Max tokens
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div className='flex flex-col gap-1.5'>
|
||||
{data.entries.map(({ model, value }) => {
|
||||
const pct = data.maxValue > 0 ? (value / data.maxValue) * 100 : 0
|
||||
const color = getProviderColor(model.providerId)
|
||||
|
||||
return (
|
||||
<Link
|
||||
key={model.id}
|
||||
href={model.href}
|
||||
className='-mx-2 flex items-center gap-3 rounded-md px-2 transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
<ModelLabel model={model} />
|
||||
<div className='relative flex h-7 min-w-0 flex-1 items-center'>
|
||||
<div
|
||||
className='h-full rounded-r-[3px]'
|
||||
style={{
|
||||
width: `${Math.max(pct, 3)}%`,
|
||||
backgroundColor: color,
|
||||
opacity: 0.8,
|
||||
}}
|
||||
/>
|
||||
<span className='ml-2.5 shrink-0 font-mono text-[var(--landing-text-muted)] text-xs'>
|
||||
{formatTokenCount(value)}
|
||||
</span>
|
||||
</div>
|
||||
</Link>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
interface ModelComparisonChartsProps {
|
||||
models: CatalogModel[]
|
||||
}
|
||||
|
||||
export function ModelComparisonCharts({ models }: ModelComparisonChartsProps) {
|
||||
const comparisonModels = useMemo(() => selectComparisonModels(models), [models])
|
||||
|
||||
return (
|
||||
<section aria-labelledby='comparison-heading'>
|
||||
<div className='px-6 pt-10 pb-4'>
|
||||
<h2
|
||||
id='comparison-heading'
|
||||
className='mb-2 text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'
|
||||
>
|
||||
Compare models
|
||||
</h2>
|
||||
<p className='font-[430] font-season text-[var(--landing-text-muted)] text-sm leading-[150%] tracking-[0.02em]'>
|
||||
Side-by-side comparison of top models across key metrics.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
<div className='flex flex-col sm:flex-row'>
|
||||
<div className='flex-1 p-6'>
|
||||
<StackedCostChart models={comparisonModels} />
|
||||
</div>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)] sm:h-auto sm:w-px' />
|
||||
<div className='flex-1 p-6'>
|
||||
<ContextWindowChart models={comparisonModels} />
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
)
|
||||
}
|
||||
@@ -3,20 +3,14 @@
|
||||
import { useMemo, useState } from 'react'
|
||||
import Link from 'next/link'
|
||||
import { Input } from '@/components/emcn'
|
||||
import { SearchIcon } from '@/components/icons'
|
||||
import { cn } from '@/lib/core/utils/cn'
|
||||
import {
|
||||
CapabilityTags,
|
||||
DetailItem,
|
||||
ModelCard,
|
||||
ProviderIcon,
|
||||
StatCard,
|
||||
} from '@/app/(landing)/models/components/model-primitives'
|
||||
import { ChevronArrow, ProviderIcon } from '@/app/(landing)/models/components/model-primitives'
|
||||
import {
|
||||
type CatalogModel,
|
||||
type CatalogProvider,
|
||||
formatPrice,
|
||||
formatTokenCount,
|
||||
MODEL_PROVIDERS_WITH_CATALOGS,
|
||||
MODEL_PROVIDERS_WITH_DYNAMIC_CATALOGS,
|
||||
TOTAL_MODELS,
|
||||
} from '@/app/(landing)/models/utils'
|
||||
|
||||
export function ModelDirectory() {
|
||||
@@ -35,7 +29,7 @@ export function ModelDirectory() {
|
||||
|
||||
const normalizedQuery = query.trim().toLowerCase()
|
||||
|
||||
const { filteredProviders, filteredDynamicProviders, visibleModelCount } = useMemo(() => {
|
||||
const { filteredProviders, filteredDynamicProviders } = useMemo(() => {
|
||||
const filteredProviders = MODEL_PROVIDERS_WITH_CATALOGS.map((provider) => {
|
||||
const providerMatchesSearch =
|
||||
normalizedQuery.length > 0 && provider.searchText.includes(normalizedQuery)
|
||||
@@ -77,15 +71,9 @@ export function ModelDirectory() {
|
||||
return provider.searchText.includes(normalizedQuery)
|
||||
})
|
||||
|
||||
const visibleModelCount = filteredProviders.reduce(
|
||||
(count, provider) => count + provider.models.length,
|
||||
0
|
||||
)
|
||||
|
||||
return {
|
||||
filteredProviders,
|
||||
filteredDynamicProviders,
|
||||
visibleModelCount,
|
||||
}
|
||||
}, [activeProviderId, normalizedQuery])
|
||||
|
||||
@@ -93,170 +81,143 @@ export function ModelDirectory() {
|
||||
|
||||
return (
|
||||
<div>
|
||||
<div className='mb-8 flex flex-col gap-4 lg:flex-row lg:items-center lg:justify-between'>
|
||||
<div className='relative max-w-[560px] flex-1'>
|
||||
<SearchIcon
|
||||
<div className='mb-6 flex flex-col gap-4 px-6 sm:flex-row sm:items-center'>
|
||||
<div className='relative max-w-[480px] flex-1'>
|
||||
<svg
|
||||
aria-hidden='true'
|
||||
className='-translate-y-1/2 pointer-events-none absolute top-1/2 left-3 h-4 w-4 text-[var(--landing-text-muted)]'
|
||||
/>
|
||||
className='-translate-y-1/2 pointer-events-none absolute top-1/2 left-3 h-4 w-4 text-[#555]'
|
||||
fill='none'
|
||||
stroke='currentColor'
|
||||
strokeWidth={2}
|
||||
viewBox='0 0 24 24'
|
||||
>
|
||||
<circle cx={11} cy={11} r={8} />
|
||||
<path d='m21 21-4.35-4.35' />
|
||||
</svg>
|
||||
<Input
|
||||
type='search'
|
||||
placeholder='Search models, providers, capabilities, or pricing details'
|
||||
placeholder='Search models, providers, or capabilities…'
|
||||
value={query}
|
||||
onChange={(event) => setQuery(event.target.value)}
|
||||
className='h-11 border-[var(--landing-border)] bg-[var(--landing-bg-card)] pl-10 text-[var(--landing-text)] placeholder:text-[var(--landing-text-muted)]'
|
||||
className='pl-9'
|
||||
aria-label='Search AI models'
|
||||
/>
|
||||
</div>
|
||||
|
||||
<p className='text-[13px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
Showing {visibleModelCount.toLocaleString('en-US')} of{' '}
|
||||
{TOTAL_MODELS.toLocaleString('en-US')} models
|
||||
{activeProviderId ? ' in one provider' : ''}.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className='mb-10 flex flex-wrap gap-2'>
|
||||
<FilterButton
|
||||
isActive={activeProviderId === null}
|
||||
<div className='mb-6 flex flex-wrap gap-2 px-6'>
|
||||
<button
|
||||
type='button'
|
||||
onClick={() => setActiveProviderId(null)}
|
||||
label={`All providers (${MODEL_PROVIDERS_WITH_CATALOGS.length})`}
|
||||
/>
|
||||
className={`rounded-[5px] border px-[9px] py-0.5 text-[13.5px] transition-colors ${
|
||||
activeProviderId === null
|
||||
? 'border-[var(--landing-border-strong)] bg-[var(--landing-bg-elevated)] text-[var(--landing-text)]'
|
||||
: 'border-[var(--landing-border-strong)] text-[var(--landing-text)] hover:bg-[var(--landing-bg-elevated)]'
|
||||
}`}
|
||||
>
|
||||
All
|
||||
</button>
|
||||
{providerOptions.map((provider) => (
|
||||
<FilterButton
|
||||
<button
|
||||
key={provider.id}
|
||||
isActive={activeProviderId === provider.id}
|
||||
type='button'
|
||||
onClick={() =>
|
||||
setActiveProviderId(activeProviderId === provider.id ? null : provider.id)
|
||||
}
|
||||
label={`${provider.name} (${provider.count})`}
|
||||
/>
|
||||
className={`rounded-[5px] border px-[9px] py-0.5 text-[13.5px] transition-colors ${
|
||||
activeProviderId === provider.id
|
||||
? 'border-[var(--landing-border-strong)] bg-[var(--landing-bg-elevated)] text-[var(--landing-text)]'
|
||||
: 'border-[var(--landing-border-strong)] text-[var(--landing-text)] hover:bg-[var(--landing-bg-elevated)]'
|
||||
}`}
|
||||
>
|
||||
{provider.name}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
{!hasResults ? (
|
||||
<div className='rounded-2xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] px-6 py-12 text-center'>
|
||||
<h3 className='font-[500] text-[18px] text-[var(--landing-text)]'>No matches found</h3>
|
||||
<p className='mt-2 text-[14px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
<div className='px-6 py-12 text-center'>
|
||||
<h3 className='text-[18px] text-white'>No matches found</h3>
|
||||
<p className='mt-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
|
||||
Try a provider name like OpenAI or Anthropic, or search for capabilities like
|
||||
structured outputs, reasoning, or deep research.
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className='space-y-10'>
|
||||
{filteredProviders.map((provider) => (
|
||||
<section
|
||||
key={provider.id}
|
||||
aria-labelledby={`${provider.id}-heading`}
|
||||
className='rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-6 sm:p-8'
|
||||
>
|
||||
<div className='mb-6 flex flex-col gap-5 border-[var(--landing-border)] border-b pb-6 lg:flex-row lg:items-start lg:justify-between'>
|
||||
<div className='min-w-0'>
|
||||
<div className='mb-3 flex items-center gap-3'>
|
||||
<ProviderIcon provider={provider} />
|
||||
<div>
|
||||
<p className='text-[12px] text-[var(--landing-text-muted)]'>Provider</p>
|
||||
<h2
|
||||
id={`${provider.id}-heading`}
|
||||
className='font-[500] text-[24px] text-[var(--landing-text)]'
|
||||
>
|
||||
{provider.name}
|
||||
</h2>
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
{filteredProviders.map((provider, index) => (
|
||||
<section key={provider.id} aria-labelledby={`${provider.id}-heading`}>
|
||||
{index > 0 && <div className='h-px w-full bg-[var(--landing-bg-elevated)]' />}
|
||||
|
||||
<p className='max-w-[720px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
{provider.description}
|
||||
</p>
|
||||
<Link
|
||||
href={provider.href}
|
||||
className='mt-3 inline-flex text-[#555] text-[13px] transition-colors hover:text-[var(--landing-text-muted)]'
|
||||
<Link
|
||||
href={provider.href}
|
||||
className='group/link flex items-center gap-3 px-6 py-4 transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
<ProviderIcon
|
||||
provider={provider}
|
||||
className='h-8 w-8 rounded-[5px]'
|
||||
iconClassName='h-4 w-4'
|
||||
/>
|
||||
<div className='min-w-0 flex-1'>
|
||||
<h2
|
||||
id={`${provider.id}-heading`}
|
||||
className='text-[14px] text-white leading-snug tracking-[-0.02em]'
|
||||
>
|
||||
View provider page →
|
||||
</Link>
|
||||
{provider.name}
|
||||
</h2>
|
||||
<p className='line-clamp-1 hidden text-[12px] text-[var(--landing-text-muted)] leading-[150%] sm:block'>
|
||||
{provider.modelCount} models · {provider.description}
|
||||
</p>
|
||||
</div>
|
||||
<ChevronArrow />
|
||||
</Link>
|
||||
|
||||
<div className='grid shrink-0 grid-cols-2 gap-3 sm:grid-cols-3'>
|
||||
<StatCard label='Models' value={provider.models.length.toString()} />
|
||||
<StatCard
|
||||
label='Default'
|
||||
value={provider.defaultModelDisplayName || 'Dynamic'}
|
||||
compact
|
||||
/>
|
||||
<StatCard
|
||||
label='Context info'
|
||||
value={provider.contextInformationAvailable ? 'Tracked' : 'Limited'}
|
||||
compact
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className='mb-6'>
|
||||
<CapabilityTags tags={provider.providerCapabilityTags} />
|
||||
</div>
|
||||
|
||||
<div className='grid grid-cols-1 gap-4 xl:grid-cols-2'>
|
||||
{provider.models.map((model) => (
|
||||
<ModelCard key={model.id} provider={provider} model={model} />
|
||||
))}
|
||||
</div>
|
||||
{provider.models.map((model) => (
|
||||
<ModelRow key={model.id} provider={provider} model={model} />
|
||||
))}
|
||||
</section>
|
||||
))}
|
||||
|
||||
{filteredDynamicProviders.length > 0 && (
|
||||
<section
|
||||
aria-labelledby='dynamic-catalogs-heading'
|
||||
className='rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-6 sm:p-8'
|
||||
>
|
||||
<div className='mb-6'>
|
||||
<section aria-labelledby='dynamic-catalogs-heading'>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
<div className='px-6 pt-8 pb-6'>
|
||||
<h2
|
||||
id='dynamic-catalogs-heading'
|
||||
className='font-[500] text-[24px] text-[var(--landing-text)]'
|
||||
className='text-[18px] text-white leading-[100%] tracking-[-0.02em] lg:text-[20px]'
|
||||
>
|
||||
Dynamic model catalogs
|
||||
</h2>
|
||||
<p className='mt-2 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
These providers are supported by Sim, but their model lists are loaded dynamically
|
||||
at runtime rather than hard-coded into the public catalog.
|
||||
<p className='mt-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
|
||||
These providers load their model lists dynamically at runtime.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className='grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-4'>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
<nav aria-label='Dynamic catalog providers' className='flex flex-col lg:flex-row'>
|
||||
{filteredDynamicProviders.map((provider) => (
|
||||
<article
|
||||
<div
|
||||
key={provider.id}
|
||||
className='rounded-2xl border border-[var(--landing-border)] bg-[var(--landing-bg-elevated)] p-5'
|
||||
className='flex flex-1 items-center gap-3 border-[var(--landing-bg-elevated)] border-t px-6 py-4 first:border-t-0 lg:border-t-0 lg:border-l lg:first:border-l-0'
|
||||
>
|
||||
<div className='mb-4 flex items-center gap-3'>
|
||||
<ProviderIcon provider={provider} />
|
||||
<div className='min-w-0'>
|
||||
<h3 className='font-[500] text-[16px] text-[var(--landing-text)]'>
|
||||
{provider.name}
|
||||
</h3>
|
||||
<p className='text-[12px] text-[var(--landing-text-muted)]'>
|
||||
{provider.id}
|
||||
</p>
|
||||
</div>
|
||||
<ProviderIcon
|
||||
provider={provider}
|
||||
className='h-8 w-8 rounded-[5px]'
|
||||
iconClassName='h-4 w-4'
|
||||
/>
|
||||
<div className='min-w-0 flex-1'>
|
||||
<h3 className='text-[14px] text-white leading-snug'>{provider.name}</h3>
|
||||
<p className='line-clamp-1 text-[12px] text-[var(--landing-text-muted)] leading-[150%]'>
|
||||
{provider.description}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<p className='text-[13px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
{provider.description}
|
||||
</p>
|
||||
|
||||
<div className='mt-4 space-y-3 text-[13px]'>
|
||||
<DetailItem
|
||||
label='Default'
|
||||
value={provider.defaultModelDisplayName || 'Selected at runtime'}
|
||||
/>
|
||||
<DetailItem label='Catalog source' value='Loaded dynamically inside Sim' />
|
||||
</div>
|
||||
|
||||
<div className='mt-4'>
|
||||
<CapabilityTags tags={provider.providerCapabilityTags} />
|
||||
</div>
|
||||
</article>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</nav>
|
||||
</section>
|
||||
)}
|
||||
</div>
|
||||
@@ -265,27 +226,33 @@ export function ModelDirectory() {
|
||||
)
|
||||
}
|
||||
|
||||
function FilterButton({
|
||||
isActive,
|
||||
onClick,
|
||||
label,
|
||||
}: {
|
||||
isActive: boolean
|
||||
onClick: () => void
|
||||
label: string
|
||||
}) {
|
||||
function ModelRow({ provider, model }: { provider: CatalogProvider; model: CatalogModel }) {
|
||||
return (
|
||||
<button
|
||||
type='button'
|
||||
onClick={onClick}
|
||||
className={cn(
|
||||
'rounded-full border px-3 py-1.5 text-[12px] transition-colors',
|
||||
isActive
|
||||
? 'border-[#555] bg-[#333] text-[var(--landing-text)]'
|
||||
: 'border-[var(--landing-border)] bg-transparent text-[var(--landing-text-muted)] hover:border-[var(--landing-border-strong)] hover:text-[var(--landing-text)]'
|
||||
)}
|
||||
>
|
||||
{label}
|
||||
</button>
|
||||
<>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
<Link
|
||||
href={model.href}
|
||||
className='group/link flex items-center gap-4 px-6 py-4 transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
<ProviderIcon
|
||||
provider={provider}
|
||||
className='h-8 w-8 shrink-0 rounded-[5px]'
|
||||
iconClassName='h-4 w-4'
|
||||
/>
|
||||
|
||||
<div className='flex min-w-0 flex-1 flex-col gap-0.5'>
|
||||
<h3 className='text-[14px] text-white leading-snug tracking-[-0.02em]'>
|
||||
{model.displayName}
|
||||
</h3>
|
||||
<p className='line-clamp-1 hidden text-[12px] text-[var(--landing-text-muted)] leading-[150%] sm:block'>
|
||||
{model.id} · Input {formatPrice(model.pricing.input)}/1M · Output{' '}
|
||||
{formatPrice(model.pricing.output)}/1M
|
||||
{model.contextWindow ? ` · ${formatTokenCount(model.contextWindow)} context` : ''}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<ChevronArrow />
|
||||
</Link>
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ export function Breadcrumbs({ items }: { items: Array<{ label: string; href?: st
|
||||
return (
|
||||
<nav
|
||||
aria-label='Breadcrumb'
|
||||
className='mb-10 flex flex-wrap items-center gap-2 text-[#555] text-[13px]'
|
||||
className='mb-10 flex flex-wrap items-center gap-2 font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'
|
||||
>
|
||||
{items.map((item, index) => (
|
||||
<span key={`${item.label}-${index}`} className='inline-flex items-center gap-2'>
|
||||
@@ -35,7 +35,7 @@ export function Breadcrumbs({ items }: { items: Array<{ label: string; href?: st
|
||||
|
||||
export function ProviderIcon({
|
||||
provider,
|
||||
className = 'h-12 w-12 rounded-2xl',
|
||||
className = 'h-12 w-12 rounded-[5px]',
|
||||
iconClassName = 'h-6 w-6',
|
||||
}: {
|
||||
provider: Pick<CatalogProvider, 'icon' | 'name'>
|
||||
@@ -51,7 +51,7 @@ export function ProviderIcon({
|
||||
{Icon ? (
|
||||
<Icon className={iconClassName} />
|
||||
) : (
|
||||
<span className='font-[500] text-[14px] text-[var(--landing-text)]'>
|
||||
<span className='font-[430] text-[14px] text-[var(--landing-text)]'>
|
||||
{provider.name.slice(0, 2).toUpperCase()}
|
||||
</span>
|
||||
)}
|
||||
@@ -69,12 +69,12 @@ export function StatCard({
|
||||
compact?: boolean
|
||||
}) {
|
||||
return (
|
||||
<div className='rounded-2xl border border-[var(--landing-border)] bg-[var(--landing-bg-elevated)] px-4 py-3'>
|
||||
<p className='text-[11px] text-[var(--landing-text-muted)] uppercase tracking-[0.08em]'>
|
||||
<div className='rounded-[5px] border border-[var(--landing-border)] bg-[var(--landing-bg-elevated)] px-4 py-3'>
|
||||
<p className='font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
|
||||
{label}
|
||||
</p>
|
||||
<p
|
||||
className={`mt-1 font-[500] text-[var(--landing-text)] ${
|
||||
className={`mt-1 font-[430] text-[var(--landing-text)] ${
|
||||
compact ? 'break-all text-[12px] leading-snug' : 'text-[18px]'
|
||||
}`}
|
||||
>
|
||||
@@ -86,17 +86,49 @@ export function StatCard({
|
||||
|
||||
export function DetailItem({ label, value }: { label: string; value: string }) {
|
||||
return (
|
||||
<div className='rounded-xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] px-3 py-2'>
|
||||
<p className='text-[11px] text-[var(--landing-text-muted)] uppercase tracking-[0.08em]'>
|
||||
<div className='rounded-[5px] border border-[var(--landing-border)] bg-[var(--landing-bg-card)] px-3 py-2'>
|
||||
<p className='font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
|
||||
{label}
|
||||
</p>
|
||||
<p className='mt-1 break-words font-[500] text-[12px] text-[var(--landing-text)] leading-snug'>
|
||||
<p className='mt-1 break-words font-[430] text-[12px] text-[var(--landing-text)] leading-snug'>
|
||||
{value}
|
||||
</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export function ChevronArrow() {
|
||||
return (
|
||||
<svg
|
||||
className='h-3 w-3 shrink-0 text-[var(--landing-text-subtle)]'
|
||||
viewBox='0 0 10 10'
|
||||
fill='none'
|
||||
xmlns='http://www.w3.org/2000/svg'
|
||||
aria-hidden='true'
|
||||
>
|
||||
<line
|
||||
x1='0'
|
||||
y1='5'
|
||||
x2='9'
|
||||
y2='5'
|
||||
stroke='currentColor'
|
||||
strokeWidth='1.33'
|
||||
strokeLinecap='square'
|
||||
className='origin-left scale-x-0 transition-transform duration-200 ease-out [transform-box:fill-box] group-hover/link:scale-x-100'
|
||||
/>
|
||||
<path
|
||||
d='M3.5 2L6.5 5L3.5 8'
|
||||
stroke='currentColor'
|
||||
strokeWidth='1.33'
|
||||
strokeLinecap='square'
|
||||
strokeLinejoin='miter'
|
||||
fill='none'
|
||||
className='transition-transform duration-200 ease-out group-hover/link:translate-x-[30%]'
|
||||
/>
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
|
||||
export function CapabilityTags({ tags }: { tags: string[] }) {
|
||||
if (tags.length === 0) {
|
||||
return null
|
||||
@@ -116,23 +148,76 @@ export function CapabilityTags({ tags }: { tags: string[] }) {
|
||||
)
|
||||
}
|
||||
|
||||
export function FeaturedProviderCard({ provider }: { provider: CatalogProvider }) {
|
||||
return (
|
||||
<Link
|
||||
href={provider.href}
|
||||
className='group flex flex-1 flex-col gap-4 border-[var(--landing-bg-elevated)] border-t p-6 transition-colors first:border-t-0 hover:bg-[var(--landing-bg-elevated)] sm:border-t-0 sm:border-l sm:first:border-l-0'
|
||||
>
|
||||
<ProviderIcon
|
||||
provider={provider}
|
||||
className='h-10 w-10 rounded-[5px]'
|
||||
iconClassName='h-5 w-5'
|
||||
/>
|
||||
<div className='flex flex-col gap-2'>
|
||||
<h3 className='text-lg text-white leading-tight tracking-[-0.01em]'>{provider.name}</h3>
|
||||
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
|
||||
{provider.description}
|
||||
</p>
|
||||
</div>
|
||||
</Link>
|
||||
)
|
||||
}
|
||||
|
||||
export function FeaturedModelCard({
|
||||
provider,
|
||||
model,
|
||||
}: {
|
||||
provider: CatalogProvider
|
||||
model: CatalogModel
|
||||
}) {
|
||||
return (
|
||||
<Link
|
||||
href={model.href}
|
||||
className='group flex flex-1 flex-col gap-4 border-[var(--landing-bg-elevated)] border-t p-6 transition-colors first:border-t-0 hover:bg-[var(--landing-bg-elevated)] sm:border-t-0 sm:border-l sm:first:border-l-0'
|
||||
>
|
||||
<ProviderIcon
|
||||
provider={provider}
|
||||
className='h-10 w-10 rounded-[5px]'
|
||||
iconClassName='h-5 w-5'
|
||||
/>
|
||||
<div className='flex flex-col gap-2'>
|
||||
<span className='font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
|
||||
{provider.name}
|
||||
</span>
|
||||
<h3 className='text-lg text-white leading-tight tracking-[-0.01em]'>{model.displayName}</h3>
|
||||
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
|
||||
{model.summary}
|
||||
</p>
|
||||
</div>
|
||||
</Link>
|
||||
)
|
||||
}
|
||||
|
||||
export function ProviderCard({ provider }: { provider: CatalogProvider }) {
|
||||
return (
|
||||
<Link
|
||||
href={provider.href}
|
||||
className='group flex h-full flex-col rounded-lg border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-4 transition-colors hover:border-[var(--landing-border-strong)] hover:bg-[var(--landing-bg-elevated)]'
|
||||
className='group flex h-full flex-col rounded-[5px] border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-4 transition-colors hover:border-[var(--landing-border-strong)] hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
<div className='mb-4 flex items-center gap-3'>
|
||||
<ProviderIcon provider={provider} />
|
||||
<div className='min-w-0'>
|
||||
<h3 className='font-[500] text-[18px] text-[var(--landing-text)]'>{provider.name}</h3>
|
||||
<p className='text-[12px] text-[var(--landing-text-muted)]'>
|
||||
<h3 className='font-[430] font-season text-base text-white tracking-[-0.01em]'>
|
||||
{provider.name}
|
||||
</h3>
|
||||
<p className='font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
|
||||
{provider.modelCount} models tracked
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p className='mb-4 flex-1 text-[14px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
<p className='mb-4 flex-1 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
|
||||
{provider.description}
|
||||
</p>
|
||||
|
||||
@@ -165,26 +250,30 @@ export function ModelCard({
|
||||
return (
|
||||
<Link
|
||||
href={model.href}
|
||||
className='group flex h-full flex-col rounded-lg border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-4 transition-colors hover:border-[var(--landing-border-strong)] hover:bg-[var(--landing-bg-elevated)]'
|
||||
className='group flex h-full flex-col rounded-[5px] border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-4 transition-colors hover:border-[var(--landing-border-strong)] hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
<div className='mb-4 flex items-start gap-3'>
|
||||
<ProviderIcon
|
||||
provider={provider}
|
||||
className='h-10 w-10 rounded-xl'
|
||||
className='h-10 w-10 rounded-[5px]'
|
||||
iconClassName='h-5 w-5'
|
||||
/>
|
||||
<div className='min-w-0 flex-1'>
|
||||
{showProvider ? (
|
||||
<p className='mb-1 text-[12px] text-[var(--landing-text-muted)]'>{provider.name}</p>
|
||||
<p className='mb-1 font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
|
||||
{provider.name}
|
||||
</p>
|
||||
) : null}
|
||||
<h3 className='break-all font-[500] text-[16px] text-[var(--landing-text)] leading-snug'>
|
||||
<h3 className='break-all font-[430] font-season text-base text-white leading-snug tracking-[-0.01em]'>
|
||||
{model.displayName}
|
||||
</h3>
|
||||
<p className='mt-1 break-all text-[12px] text-[var(--landing-text-muted)]'>{model.id}</p>
|
||||
<p className='mt-1 break-all font-martian-mono text-[var(--landing-text-subtle)] text-xs tracking-[0.1em]'>
|
||||
{model.id}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p className='mb-3 line-clamp-3 flex-1 text-[12px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
<p className='mb-3 line-clamp-3 flex-1 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
|
||||
{model.summary}
|
||||
</p>
|
||||
|
||||
|
||||
@@ -0,0 +1,132 @@
|
||||
'use client'
|
||||
|
||||
import { useMemo } from 'react'
|
||||
import Link from 'next/link'
|
||||
import { getProviderColor } from '@/app/(landing)/models/components/consts'
|
||||
import type { CatalogModel } from '@/app/(landing)/models/utils'
|
||||
|
||||
function formatShortDate(date: string): string {
|
||||
try {
|
||||
return new Intl.DateTimeFormat('en-US', {
|
||||
month: 'short',
|
||||
day: 'numeric',
|
||||
year: 'numeric',
|
||||
timeZone: 'UTC',
|
||||
}).format(new Date(date))
|
||||
} catch {
|
||||
return date
|
||||
}
|
||||
}
|
||||
|
||||
interface ModelTimelineChartProps {
|
||||
models: CatalogModel[]
|
||||
providerId: string
|
||||
}
|
||||
|
||||
const ITEM_WIDTH = 150
|
||||
|
||||
export function ModelTimelineChart({ models, providerId }: ModelTimelineChartProps) {
|
||||
const entries = useMemo(() => {
|
||||
return models
|
||||
.filter((m) => m.releaseDate !== null)
|
||||
.map((m) => ({
|
||||
model: m,
|
||||
date: new Date(m.releaseDate as string),
|
||||
dateStr: m.releaseDate as string,
|
||||
}))
|
||||
.sort((a, b) => a.date.getTime() - b.date.getTime())
|
||||
}, [models])
|
||||
|
||||
if (entries.length === 0) return null
|
||||
|
||||
const color = getProviderColor(providerId)
|
||||
|
||||
return (
|
||||
<section aria-labelledby='timeline-heading'>
|
||||
<div className='px-6 pt-10 pb-4'>
|
||||
<h2
|
||||
id='timeline-heading'
|
||||
className='mb-2 text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'
|
||||
>
|
||||
Release timeline
|
||||
</h2>
|
||||
<p className='font-[430] font-season text-[var(--landing-text-muted)] text-sm leading-[150%] tracking-[0.02em]'>
|
||||
When each model was first publicly available.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className='overflow-x-auto px-6 pb-8'>
|
||||
{/* Fixed height: top labels + line + bottom labels */}
|
||||
<div
|
||||
className='relative h-[140px]'
|
||||
style={{ minWidth: `${entries.length * ITEM_WIDTH}px` }}
|
||||
>
|
||||
{/* Horizontal line — vertically centered */}
|
||||
<div className='absolute top-[70px] right-0 left-0 h-px bg-[var(--landing-border-strong)]' />
|
||||
|
||||
{entries.map(({ model, dateStr }, i) => {
|
||||
const left = i * ITEM_WIDTH + ITEM_WIDTH / 2
|
||||
const isAbove = i % 2 === 0
|
||||
|
||||
return (
|
||||
<Link
|
||||
key={model.id}
|
||||
href={model.href}
|
||||
className='group absolute flex flex-col items-center'
|
||||
style={{
|
||||
left: `${left}px`,
|
||||
width: `${ITEM_WIDTH}px`,
|
||||
marginLeft: `${-ITEM_WIDTH / 2}px`,
|
||||
top: 0,
|
||||
height: '100%',
|
||||
}}
|
||||
>
|
||||
{/* Dot — centered exactly on the line (70px - 4.5px) */}
|
||||
<div
|
||||
className='-translate-x-1/2 absolute top-[66px] left-1/2 h-[9px] w-[9px] rounded-full transition-[filter,transform] duration-150 group-hover:scale-150 group-hover:brightness-150'
|
||||
style={{ backgroundColor: color, opacity: 0.85 }}
|
||||
/>
|
||||
|
||||
{/* Stem + label above */}
|
||||
{isAbove && (
|
||||
<div className='-translate-x-1/2 absolute bottom-[74px] left-1/2 flex flex-col items-center'>
|
||||
<div className='flex flex-col items-center gap-0.5 pb-1.5'>
|
||||
<span className='whitespace-nowrap font-medium text-[12px] text-[var(--landing-text)] leading-none tracking-[-0.01em] transition-colors group-hover:text-white'>
|
||||
{model.displayName}
|
||||
</span>
|
||||
<span className='whitespace-nowrap font-mono text-[10px] text-[var(--landing-text-muted)] leading-none'>
|
||||
{formatShortDate(dateStr)}
|
||||
</span>
|
||||
</div>
|
||||
<div
|
||||
className='w-px'
|
||||
style={{ height: '10px', backgroundColor: color, opacity: 0.2 }}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Stem + label below */}
|
||||
{!isAbove && (
|
||||
<div className='-translate-x-1/2 absolute top-[75px] left-1/2 flex flex-col items-center'>
|
||||
<div
|
||||
className='w-px'
|
||||
style={{ height: '10px', backgroundColor: color, opacity: 0.2 }}
|
||||
/>
|
||||
<div className='flex flex-col items-center gap-0.5 pt-1.5'>
|
||||
<span className='whitespace-nowrap font-medium text-[12px] text-[var(--landing-text)] leading-none tracking-[-0.01em] transition-colors group-hover:text-white'>
|
||||
{model.displayName}
|
||||
</span>
|
||||
<span className='whitespace-nowrap font-mono text-[10px] text-[var(--landing-text-muted)] leading-none'>
|
||||
{formatShortDate(dateStr)}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</Link>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
)
|
||||
}
|
||||
@@ -1,10 +1,15 @@
|
||||
import type { Metadata } from 'next'
|
||||
import Link from 'next/link'
|
||||
import { Badge } from '@/components/emcn'
|
||||
import { getBaseUrl } from '@/lib/core/utils/urls'
|
||||
import { LandingFAQ } from '@/app/(landing)/components/landing-faq'
|
||||
import { ModelComparisonCharts } from '@/app/(landing)/models/components/model-comparison-charts'
|
||||
import { ModelDirectory } from '@/app/(landing)/models/components/model-directory'
|
||||
import { ModelCard, ProviderCard } from '@/app/(landing)/models/components/model-primitives'
|
||||
import {
|
||||
FeaturedModelCard,
|
||||
FeaturedProviderCard,
|
||||
} from '@/app/(landing)/models/components/model-primitives'
|
||||
import {
|
||||
ALL_CATALOG_MODELS,
|
||||
getPricingBounds,
|
||||
MODEL_CATALOG_PROVIDERS,
|
||||
MODEL_PROVIDERS_WITH_CATALOGS,
|
||||
@@ -17,24 +22,29 @@ const baseUrl = getBaseUrl()
|
||||
|
||||
const faqItems = [
|
||||
{
|
||||
question: 'What is the Sim AI models directory?',
|
||||
question: 'Which AI models are best for building agents and automated workflows?',
|
||||
answer:
|
||||
'The Sim AI models directory is a public catalog of the language models and providers tracked inside Sim. It shows provider coverage, model IDs, pricing per one million tokens, context windows, and supported capabilities such as reasoning controls, structured outputs, and deep research.',
|
||||
'The most important factors for agent tasks are reliable tool use (function calling), a large enough context window to track conversation history and tool outputs, and consistent instruction following. In Sim, OpenAI GPT-4.1, Anthropic Claude Sonnet, and Google Gemini 2.5 Pro are popular choices — each supports tool use, structured outputs, and context windows of 128K tokens or more. For cost-sensitive or high-throughput agents, Groq and Cerebras offer significantly faster inference at lower cost.',
|
||||
},
|
||||
{
|
||||
question: 'Can I compare models from multiple providers in one place?',
|
||||
question: 'What does context window size mean when running an AI agent?',
|
||||
answer:
|
||||
'Yes. This page organizes every tracked model by provider and lets you search across providers, model names, and capabilities. You can quickly compare OpenAI, Anthropic, Google, xAI, Mistral, Groq, Cerebras, Fireworks, Bedrock, and more from a single directory.',
|
||||
'The context window is the total number of tokens a model can process in a single call, including your system prompt, conversation history, tool call results, and any documents you pass in. For agents running multi-step tasks, context fills up quickly — each tool result and each retrieved document adds tokens. A 128K-token context window fits roughly 300 pages of text; models like Gemini 2.5 Pro support up to 1M tokens, enough to hold an entire codebase in a single pass.',
|
||||
},
|
||||
{
|
||||
question: 'Are these model prices shown per million tokens?',
|
||||
question: 'Are model prices shown per million tokens?',
|
||||
answer:
|
||||
'Yes. Input, cached input, and output prices on this page are shown per one million tokens based on the provider metadata tracked in Sim.',
|
||||
'Yes. Input, cached input, and output prices are all listed per one million tokens, matching how providers bill through their APIs. For agents that chain multiple calls, costs compound quickly — an agent completing 100 turns at 10K tokens each consumes roughly 1M tokens per session. Cached input pricing applies when a provider supports prompt caching, where a repeated prefix like a system prompt is billed at a reduced rate.',
|
||||
},
|
||||
{
|
||||
question: 'Does Sim support providers with dynamic model catalogs too?',
|
||||
question: 'Which AI models support tool use and function calling?',
|
||||
answer:
|
||||
'Yes. Some providers such as OpenRouter, Fireworks, Ollama, and vLLM load their model lists dynamically at runtime. Those providers are still shown here even when their full public model list is not hard-coded into the catalog.',
|
||||
'Tool use — also called function calling — lets an agent invoke external APIs, query databases, run code, or take any action you define. In Sim, all first-party models from OpenAI, Anthropic, Google, Mistral, Groq, Cerebras, and xAI support tool use. Look for the Tool Use capability tag on any model card in this directory to confirm support.',
|
||||
},
|
||||
{
|
||||
question: 'How do I add a model to a Sim agent workflow?',
|
||||
answer:
|
||||
'Open any workflow in Sim, add an Agent block, and select your provider and model from the model picker inside that block. Every model listed in this directory is available in the Agent block. Swapping models takes one click and does not affect the rest of your workflow, making it straightforward to test different models on the same task without rebuilding anything.',
|
||||
},
|
||||
]
|
||||
|
||||
@@ -82,15 +92,15 @@ export default function ModelsPage() {
|
||||
const flatModels = MODEL_CATALOG_PROVIDERS.flatMap((provider) =>
|
||||
provider.models.map((model) => ({ provider, model }))
|
||||
)
|
||||
const featuredProviders = MODEL_PROVIDERS_WITH_CATALOGS.slice(0, 6)
|
||||
const featuredModels = MODEL_PROVIDERS_WITH_CATALOGS.flatMap((provider) =>
|
||||
provider.featuredModels[0] ? [{ provider, model: provider.featuredModels[0] }] : []
|
||||
).slice(0, 6)
|
||||
const heroProviders = ['openai', 'anthropic', 'azure-openai', 'google', 'bedrock']
|
||||
.map((providerId) => MODEL_CATALOG_PROVIDERS.find((provider) => provider.id === providerId))
|
||||
.filter(
|
||||
(provider): provider is (typeof MODEL_CATALOG_PROVIDERS)[number] => provider !== undefined
|
||||
const featuredProviderOrder = ['anthropic', 'openai', 'google']
|
||||
const featuredProviders = featuredProviderOrder
|
||||
.map((id) => MODEL_PROVIDERS_WITH_CATALOGS.find((p) => p.id === id))
|
||||
.filter((p): p is (typeof MODEL_PROVIDERS_WITH_CATALOGS)[number] => p !== undefined)
|
||||
const featuredModels = featuredProviders
|
||||
.map((provider) =>
|
||||
provider.featuredModels[0] ? { provider, model: provider.featuredModels[0] } : null
|
||||
)
|
||||
.filter((entry): entry is NonNullable<typeof entry> => entry !== null)
|
||||
|
||||
const breadcrumbJsonLd = {
|
||||
'@context': 'https://schema.org',
|
||||
@@ -159,135 +169,89 @@ export default function ModelsPage() {
|
||||
dangerouslySetInnerHTML={{ __html: JSON.stringify(faqJsonLd) }}
|
||||
/>
|
||||
|
||||
<div className='mx-auto max-w-[1280px] px-6 py-16 sm:px-8 md:px-12'>
|
||||
<section aria-labelledby='models-heading' className='mb-14'>
|
||||
<div className='max-w-[840px]'>
|
||||
<p className='mb-3 text-[12px] text-[var(--landing-text-muted)] uppercase tracking-[0.16em]'>
|
||||
Public model directory
|
||||
</p>
|
||||
<section className='bg-[var(--landing-bg)]'>
|
||||
<div className='px-5 pt-[60px] lg:px-16 lg:pt-[100px]'>
|
||||
<Badge
|
||||
variant='blue'
|
||||
size='md'
|
||||
dot
|
||||
className='mb-5 bg-white/10 font-season text-white uppercase tracking-[0.02em]'
|
||||
>
|
||||
Models
|
||||
</Badge>
|
||||
|
||||
<div className='flex flex-col gap-4 xl:flex-row xl:items-end xl:justify-between'>
|
||||
<h1
|
||||
id='models-heading'
|
||||
className='text-balance font-[500] text-[40px] text-[var(--landing-text)] leading-tight sm:text-[56px]'
|
||||
className='text-balance text-[28px] text-white leading-[100%] tracking-[-0.02em] lg:text-[40px]'
|
||||
>
|
||||
Browse AI models by provider, pricing, and capabilities
|
||||
Models
|
||||
</h1>
|
||||
<p className='mt-5 max-w-[760px] text-[18px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
Explore every model tracked in Sim across providers like{' '}
|
||||
{heroProviders.map((provider, index, allProviders) => {
|
||||
const Icon = provider.icon
|
||||
|
||||
return (
|
||||
<span key={provider.id}>
|
||||
<span className='inline-flex items-center gap-1 whitespace-nowrap align-[0.02em]'>
|
||||
{Icon ? (
|
||||
<span
|
||||
aria-hidden='true'
|
||||
className='relative top-[0.02em] inline-flex shrink-0 text-[var(--landing-text)]'
|
||||
>
|
||||
<Icon className='h-[0.82em] w-[0.82em]' />
|
||||
</span>
|
||||
) : null}
|
||||
<span>{provider.name}</span>
|
||||
</span>
|
||||
{index < allProviders.length - 1 ? ', ' : ''}
|
||||
</span>
|
||||
)
|
||||
})}
|
||||
{
|
||||
' and more. Compare model IDs, token pricing, context windows, and features such as reasoning, structured outputs, and deep research from one clean catalog.'
|
||||
}
|
||||
<p className='font-[430] font-season text-[var(--landing-text-muted)] text-sm leading-[150%] tracking-[0.02em] lg:text-base'>
|
||||
Browse {TOTAL_MODELS} AI models across {TOTAL_MODEL_PROVIDERS} providers. Compare
|
||||
pricing, context windows, and capabilities.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className='mt-8 flex flex-wrap gap-3'>
|
||||
<a
|
||||
href='https://sim.ai'
|
||||
className='inline-flex h-[34px] items-center rounded-[6px] border border-[var(--white)] bg-[var(--white)] px-3 font-[430] text-[14px] text-[var(--landing-text-dark)] transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
|
||||
>
|
||||
Start building free
|
||||
</a>
|
||||
<Link
|
||||
href='/integrations'
|
||||
className='inline-flex h-[34px] items-center rounded-[6px] border border-[var(--landing-border-strong)] px-3 font-[430] text-[14px] text-[var(--landing-text)] transition-colors hover:bg-[var(--landing-bg-elevated)]'
|
||||
>
|
||||
Explore integrations
|
||||
</Link>
|
||||
</div>
|
||||
</section>
|
||||
<div className='mt-8 h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
<section aria-labelledby='providers-heading' className='mb-16'>
|
||||
<div className='mb-6'>
|
||||
<div className='mx-5 border-[var(--landing-bg-elevated)] border-x lg:mx-16'>
|
||||
{featuredProviders.length > 0 && (
|
||||
<>
|
||||
<nav aria-label='Featured providers' className='flex flex-col sm:flex-row'>
|
||||
{featuredProviders.map((provider) => (
|
||||
<FeaturedProviderCard key={provider.id} provider={provider} />
|
||||
))}
|
||||
</nav>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</>
|
||||
)}
|
||||
|
||||
{featuredModels.length > 0 && (
|
||||
<>
|
||||
<nav aria-label='Featured models' className='flex flex-col sm:flex-row'>
|
||||
{featuredModels.map(({ provider, model }) => (
|
||||
<FeaturedModelCard key={model.id} provider={provider} model={model} />
|
||||
))}
|
||||
</nav>
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</>
|
||||
)}
|
||||
|
||||
<ModelComparisonCharts models={ALL_CATALOG_MODELS} />
|
||||
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
<section aria-labelledby='all-models-heading'>
|
||||
<div className='px-6 pt-10 pb-4'>
|
||||
<h2
|
||||
id='all-models-heading'
|
||||
className='mb-2 text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'
|
||||
>
|
||||
All models
|
||||
</h2>
|
||||
</div>
|
||||
<ModelDirectory />
|
||||
</section>
|
||||
|
||||
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
|
||||
<section aria-labelledby='faq-heading' className='px-6 py-10'>
|
||||
<h2
|
||||
id='providers-heading'
|
||||
className='font-[500] text-[28px] text-[var(--landing-text)]'
|
||||
id='faq-heading'
|
||||
className='mb-8 text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'
|
||||
>
|
||||
Browse by provider
|
||||
Frequently asked questions
|
||||
</h2>
|
||||
<p className='mt-2 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
Each provider has its own generated SEO page with model lineup details, featured
|
||||
models, provider FAQs, and internal links to individual model pages.
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<LandingFAQ faqs={faqItems} />
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
|
||||
<div className='grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-3'>
|
||||
{featuredProviders.map((provider) => (
|
||||
<ProviderCard key={provider.id} provider={provider} />
|
||||
))}
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section aria-labelledby='featured-models-heading' className='mb-16'>
|
||||
<div className='mb-6'>
|
||||
<h2
|
||||
id='featured-models-heading'
|
||||
className='font-[500] text-[28px] text-[var(--landing-text)]'
|
||||
>
|
||||
Featured model pages
|
||||
</h2>
|
||||
<p className='mt-2 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
These pages are generated directly from the model registry and target high-intent
|
||||
search queries around pricing, context windows, and model capabilities.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className='grid grid-cols-1 gap-4 xl:grid-cols-2'>
|
||||
{featuredModels.map(({ provider, model }) => (
|
||||
<ModelCard key={model.id} provider={provider} model={model} showProvider />
|
||||
))}
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section aria-labelledby='all-models-heading'>
|
||||
<div className='mb-6'>
|
||||
<h2
|
||||
id='all-models-heading'
|
||||
className='font-[500] text-[28px] text-[var(--landing-text)]'
|
||||
>
|
||||
All models
|
||||
</h2>
|
||||
<p className='mt-2 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
|
||||
Search the full catalog by provider, model ID, or capability. Use it to compare
|
||||
providers, sanity-check pricing, and quickly understand which models fit the workflow
|
||||
you're building. All pricing is shown per one million tokens using the metadata
|
||||
currently tracked in Sim.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<ModelDirectory />
|
||||
</section>
|
||||
|
||||
<section
|
||||
aria-labelledby='faq-heading'
|
||||
className='mt-16 rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-6 sm:p-8'
|
||||
>
|
||||
<h2 id='faq-heading' className='font-[500] text-[28px] text-[var(--landing-text)]'>
|
||||
Frequently asked questions
|
||||
</h2>
|
||||
<div className='mt-3'>
|
||||
<LandingFAQ faqs={faqItems} />
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<div className='-mt-px h-px w-full bg-[var(--landing-bg-elevated)]' />
|
||||
</section>
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -13,12 +13,6 @@ const PROVIDER_PREFIXES: Record<string, string[]> = {
|
||||
vllm: ['vllm/'],
|
||||
}
|
||||
|
||||
const PROVIDER_NAME_OVERRIDES: Record<string, string> = {
|
||||
deepseek: 'DeepSeek',
|
||||
vllm: 'vLLM',
|
||||
xai: 'xAI',
|
||||
}
|
||||
|
||||
const TOKEN_REPLACEMENTS: Record<string, string> = {
|
||||
ai: 'AI',
|
||||
aws: 'AWS',
|
||||
@@ -108,6 +102,7 @@ export interface CatalogModel {
|
||||
providerName: string
|
||||
providerSlug: string
|
||||
contextWindow: number | null
|
||||
releaseDate: string | null
|
||||
pricing: PricingInfo
|
||||
capabilities: ModelCapabilities
|
||||
capabilityTags: string[]
|
||||
@@ -126,6 +121,8 @@ export interface CatalogProvider {
|
||||
defaultModel: string
|
||||
defaultModelDisplayName: string
|
||||
icon?: ComponentType<{ className?: string }>
|
||||
color?: string
|
||||
isReseller: boolean
|
||||
contextInformationAvailable: boolean
|
||||
providerCapabilityTags: string[]
|
||||
modelCount: number
|
||||
@@ -418,10 +415,6 @@ function buildModelSummary(
|
||||
return parts.filter(Boolean).join(' ')
|
||||
}
|
||||
|
||||
function getProviderDisplayName(providerId: string, providerName: string): string {
|
||||
return PROVIDER_NAME_OVERRIDES[providerId] ?? providerName
|
||||
}
|
||||
|
||||
function computeModelRelevanceScore(model: CatalogModel): number {
|
||||
return (
|
||||
(model.capabilities.reasoningEffort ? 10 : 0) +
|
||||
@@ -438,7 +431,7 @@ function compareModelsByRelevance(a: CatalogModel, b: CatalogModel): number {
|
||||
|
||||
const rawProviders = Object.values(PROVIDER_DEFINITIONS).map((provider) => {
|
||||
const providerSlug = slugify(provider.id)
|
||||
const providerDisplayName = getProviderDisplayName(provider.id, provider.name)
|
||||
const providerDisplayName = provider.name
|
||||
const providerCapabilityTags = buildCapabilityTags(provider.capabilities ?? {})
|
||||
|
||||
const models: CatalogModel[] = provider.models.map((model) => {
|
||||
@@ -464,6 +457,7 @@ const rawProviders = Object.values(PROVIDER_DEFINITIONS).map((provider) => {
|
||||
providerName: providerDisplayName,
|
||||
providerSlug,
|
||||
contextWindow: model.contextWindow ?? null,
|
||||
releaseDate: model.releaseDate ?? null,
|
||||
pricing: model.pricing,
|
||||
capabilities: mergedCapabilities,
|
||||
capabilityTags,
|
||||
@@ -507,6 +501,8 @@ const rawProviders = Object.values(PROVIDER_DEFINITIONS).map((provider) => {
|
||||
defaultModel: provider.defaultModel,
|
||||
defaultModelDisplayName,
|
||||
icon: provider.icon,
|
||||
color: provider.color,
|
||||
isReseller: provider.isReseller ?? false,
|
||||
contextInformationAvailable: provider.contextInformationAvailable !== false,
|
||||
providerCapabilityTags,
|
||||
modelCount: models.length,
|
||||
@@ -514,7 +510,6 @@ const rawProviders = Object.values(PROVIDER_DEFINITIONS).map((provider) => {
|
||||
featuredModels,
|
||||
searchText: [
|
||||
provider.name,
|
||||
providerDisplayName,
|
||||
provider.id,
|
||||
provider.description,
|
||||
provider.defaultModel,
|
||||
@@ -631,7 +626,13 @@ export function buildProviderFaqs(provider: CatalogProvider): CatalogFaq[] {
|
||||
const cheapestModel = getCheapestProviderModel(provider)
|
||||
const largestContextModel = getLargestContextProviderModel(provider)
|
||||
|
||||
return [
|
||||
const toolUseModels = provider.models.filter(
|
||||
(m) =>
|
||||
m.capabilities.toolUsageControl !== undefined ||
|
||||
provider.providerCapabilityTags.includes('Tool Use')
|
||||
)
|
||||
|
||||
const faqs: CatalogFaq[] = [
|
||||
{
|
||||
question: `What ${provider.name} models are available in Sim?`,
|
||||
answer: `Sim currently tracks ${provider.modelCount} ${provider.name} model${provider.modelCount === 1 ? '' : 's'} including ${provider.models
|
||||
@@ -662,10 +663,27 @@ export function buildProviderFaqs(provider: CatalogProvider): CatalogFaq[] {
|
||||
: `Context window details are not fully available for every ${provider.name} model in the public catalog.`,
|
||||
},
|
||||
]
|
||||
|
||||
if (toolUseModels.length > 0) {
|
||||
faqs.push({
|
||||
question: `Which ${provider.name} models support tool use and function calling in Sim?`,
|
||||
answer:
|
||||
toolUseModels.length === provider.modelCount
|
||||
? `All ${provider.name} models in Sim support tool use and function calling, allowing agents to invoke external APIs, query databases, and run custom actions.`
|
||||
: `${toolUseModels
|
||||
.slice(0, 5)
|
||||
.map((m) => m.displayName)
|
||||
.join(
|
||||
', '
|
||||
)}${toolUseModels.length > 5 ? ', and others' : ''} support tool use and function calling in Sim, enabling agents to invoke external APIs and run custom actions.`,
|
||||
})
|
||||
}
|
||||
|
||||
return faqs
|
||||
}
|
||||
|
||||
export function buildModelFaqs(provider: CatalogProvider, model: CatalogModel): CatalogFaq[] {
|
||||
return [
|
||||
const faqs: CatalogFaq[] = [
|
||||
{
|
||||
question: `What is ${model.displayName}?`,
|
||||
answer: `${model.displayName} is a ${provider.name} model available in Sim. ${model.summary}`,
|
||||
@@ -677,17 +695,26 @@ export function buildModelFaqs(provider: CatalogProvider, model: CatalogModel):
|
||||
{
|
||||
question: `What is the context window for ${model.displayName}?`,
|
||||
answer: model.contextWindow
|
||||
? `${model.displayName} supports a listed context window of ${formatTokenCount(model.contextWindow)} tokens in Sim.`
|
||||
? `${model.displayName} supports a context window of ${formatTokenCount(model.contextWindow)} tokens in Sim. In an agent workflow, this determines how much conversation history, tool outputs, and retrieved documents the model can hold in a single call.`
|
||||
: `A public context window value is not currently tracked for ${model.displayName}.`,
|
||||
},
|
||||
{
|
||||
question: `What capabilities does ${model.displayName} support?`,
|
||||
answer:
|
||||
model.capabilityTags.length > 0
|
||||
? `${model.displayName} supports ${model.capabilityTags.join(', ')}.`
|
||||
: `${model.displayName} is available in Sim, but no extra public capability flags are currently tracked for this model.`,
|
||||
? `${model.displayName} supports the following capabilities in Sim: ${model.capabilityTags.join(', ')}.`
|
||||
: `${model.displayName} supports standard text generation in Sim. No additional capability flags such as tool use or structured outputs are currently tracked for this model.`,
|
||||
},
|
||||
]
|
||||
|
||||
if (model.bestFor) {
|
||||
faqs.push({
|
||||
question: `What is ${model.displayName} best used for?`,
|
||||
answer: `${model.bestFor} When used in a Sim workflow, it can be selected in any Agent block from the model picker.`,
|
||||
})
|
||||
}
|
||||
|
||||
return faqs
|
||||
}
|
||||
|
||||
export function buildModelCapabilityFacts(model: CatalogModel): CapabilityFact[] {
|
||||
|
||||
@@ -15,14 +15,6 @@ import { captureServerEvent } from '@/lib/posthog/server'
|
||||
|
||||
const logger = createLogger('KnowledgeBaseAPI')
|
||||
|
||||
/**
|
||||
* Schema for creating a knowledge base
|
||||
*
|
||||
* Chunking config units:
|
||||
* - maxSize: tokens (1 token ≈ 4 characters)
|
||||
* - minSize: characters
|
||||
* - overlap: tokens (1 token ≈ 4 characters)
|
||||
*/
|
||||
const CreateKnowledgeBaseSchema = z.object({
|
||||
name: z.string().min(1, 'Name is required'),
|
||||
description: z.string().optional(),
|
||||
@@ -31,12 +23,20 @@ const CreateKnowledgeBaseSchema = z.object({
|
||||
embeddingDimension: z.literal(1536).default(1536),
|
||||
chunkingConfig: z
|
||||
.object({
|
||||
/** Maximum chunk size in tokens (1 token ≈ 4 characters) */
|
||||
maxSize: z.number().min(100).max(4000).default(1024),
|
||||
/** Minimum chunk size in characters */
|
||||
minSize: z.number().min(1).max(2000).default(100),
|
||||
/** Overlap between chunks in tokens (1 token ≈ 4 characters) */
|
||||
overlap: z.number().min(0).max(500).default(200),
|
||||
strategy: z
|
||||
.enum(['auto', 'text', 'regex', 'recursive', 'sentence', 'token'])
|
||||
.default('auto')
|
||||
.optional(),
|
||||
strategyOptions: z
|
||||
.object({
|
||||
pattern: z.string().max(500).optional(),
|
||||
separators: z.array(z.string()).optional(),
|
||||
recipe: z.enum(['plain', 'markdown', 'code']).optional(),
|
||||
})
|
||||
.optional(),
|
||||
})
|
||||
.default({
|
||||
maxSize: 1024,
|
||||
@@ -45,13 +45,31 @@ const CreateKnowledgeBaseSchema = z.object({
|
||||
})
|
||||
.refine(
|
||||
(data) => {
|
||||
// Convert maxSize from tokens to characters for comparison (1 token ≈ 4 chars)
|
||||
const maxSizeInChars = data.maxSize * 4
|
||||
return data.minSize < maxSizeInChars
|
||||
},
|
||||
{
|
||||
message: 'Min chunk size (characters) must be less than max chunk size (tokens × 4)',
|
||||
}
|
||||
)
|
||||
.refine(
|
||||
(data) => {
|
||||
return data.overlap < data.maxSize
|
||||
},
|
||||
{
|
||||
message: 'Overlap must be less than max chunk size',
|
||||
}
|
||||
)
|
||||
.refine(
|
||||
(data) => {
|
||||
if (data.strategy === 'regex' && !data.strategyOptions?.pattern) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
},
|
||||
{
|
||||
message: 'Regex pattern is required when using the regex chunking strategy',
|
||||
}
|
||||
),
|
||||
})
|
||||
|
||||
|
||||
@@ -36,6 +36,8 @@ export interface AddResourceDropdownProps {
|
||||
existingKeys: Set<string>
|
||||
onAdd: (resource: MothershipResource) => void
|
||||
onSwitch?: (resourceId: string) => void
|
||||
/** Resource types to hide from the dropdown (e.g. `['folder', 'task']`). */
|
||||
excludeTypes?: readonly MothershipResourceType[]
|
||||
}
|
||||
|
||||
export type AvailableItem = { id: string; name: string; isOpen?: boolean; [key: string]: unknown }
|
||||
@@ -47,7 +49,8 @@ interface AvailableItemsByType {
|
||||
|
||||
export function useAvailableResources(
|
||||
workspaceId: string,
|
||||
existingKeys: Set<string>
|
||||
existingKeys: Set<string>,
|
||||
excludeTypes?: readonly MothershipResourceType[]
|
||||
): AvailableItemsByType[] {
|
||||
const { data: workflows = [] } = useWorkflows(workspaceId)
|
||||
const { data: tables = [] } = useTablesList(workspaceId)
|
||||
@@ -56,8 +59,9 @@ export function useAvailableResources(
|
||||
const { data: folders = [] } = useFolders(workspaceId)
|
||||
const { data: tasks = [] } = useTasks(workspaceId)
|
||||
|
||||
return useMemo(
|
||||
() => [
|
||||
return useMemo(() => {
|
||||
const excluded = new Set<MothershipResourceType>(excludeTypes ?? [])
|
||||
const groups: AvailableItemsByType[] = [
|
||||
{
|
||||
type: 'workflow' as const,
|
||||
items: workflows.map((w) => ({
|
||||
@@ -107,9 +111,9 @@ export function useAvailableResources(
|
||||
isOpen: existingKeys.has(`task:${t.id}`),
|
||||
})),
|
||||
},
|
||||
],
|
||||
[workflows, folders, tables, files, knowledgeBases, tasks, existingKeys]
|
||||
)
|
||||
]
|
||||
return groups.filter((g) => !excluded.has(g.type))
|
||||
}, [workflows, folders, tables, files, knowledgeBases, tasks, existingKeys, excludeTypes])
|
||||
}
|
||||
|
||||
export function AddResourceDropdown({
|
||||
@@ -117,11 +121,12 @@ export function AddResourceDropdown({
|
||||
existingKeys,
|
||||
onAdd,
|
||||
onSwitch,
|
||||
excludeTypes,
|
||||
}: AddResourceDropdownProps) {
|
||||
const [open, setOpen] = useState(false)
|
||||
const [search, setSearch] = useState('')
|
||||
const [activeIndex, setActiveIndex] = useState(0)
|
||||
const available = useAvailableResources(workspaceId, existingKeys)
|
||||
const available = useAvailableResources(workspaceId, existingKeys, excludeTypes)
|
||||
|
||||
const handleOpenChange = useCallback((next: boolean) => {
|
||||
setOpen(next)
|
||||
@@ -162,9 +167,9 @@ export function AddResourceDropdown({
|
||||
} else if (e.key === 'ArrowUp') {
|
||||
e.preventDefault()
|
||||
setActiveIndex((prev) => Math.max(prev - 1, 0))
|
||||
} else if (e.key === 'Enter') {
|
||||
e.preventDefault()
|
||||
} else if (e.key === 'Enter' || (e.key === 'Tab' && !e.shiftKey)) {
|
||||
if (filtered.length > 0 && filtered[activeIndex]) {
|
||||
e.preventDefault()
|
||||
const { type, item } = filtered[activeIndex]
|
||||
select({ type, id: item.id, title: item.name }, item.isOpen)
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ import {
|
||||
import { Button, Tooltip } from '@/components/emcn'
|
||||
import { Columns3, Eye, PanelLeft, Pencil } from '@/components/emcn/icons'
|
||||
import { isEphemeralResource } from '@/lib/copilot/resource-extraction'
|
||||
import { SIM_RESOURCE_DRAG_TYPE } from '@/lib/copilot/resource-types'
|
||||
import { SIM_RESOURCE_DRAG_TYPE, SIM_RESOURCES_DRAG_TYPE } from '@/lib/copilot/resource-types'
|
||||
import { cn } from '@/lib/core/utils/cn'
|
||||
import type { PreviewMode } from '@/app/workspace/[workspaceId]/files/components/file-viewer'
|
||||
import { AddResourceDropdown } from '@/app/workspace/[workspaceId]/home/components/mothership-view/components/add-resource-dropdown'
|
||||
@@ -38,6 +38,62 @@ import { useWorkspaceFiles } from '@/hooks/queries/workspace-files'
|
||||
const EDGE_ZONE = 40
|
||||
const SCROLL_SPEED = 8
|
||||
|
||||
const ADD_RESOURCE_EXCLUDED_TYPES: readonly MothershipResourceType[] = ['folder', 'task'] as const
|
||||
|
||||
/**
|
||||
* Returns the id of the nearest resource to `idx` that is in `filter`
|
||||
* (or any resource if `filter` is null). Returns undefined if nothing qualifies.
|
||||
*/
|
||||
function findNearestId(
|
||||
resources: MothershipResource[],
|
||||
idx: number,
|
||||
filter: Set<string> | null
|
||||
): string | undefined {
|
||||
for (let offset = 1; offset < resources.length; offset++) {
|
||||
for (const candidate of [idx + offset, idx - offset]) {
|
||||
const r = resources[candidate]
|
||||
if (r && (!filter || filter.has(r.id))) return r.id
|
||||
}
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an offscreen drag image showing all selected tabs side-by-side, so the
|
||||
* cursor visibly carries every tab in the multi-selection. The element is
|
||||
* appended to the document and removed on the next tick after the browser has
|
||||
* snapshotted it.
|
||||
*/
|
||||
function buildMultiDragImage(
|
||||
scrollNode: HTMLElement | null,
|
||||
selected: MothershipResource[]
|
||||
): HTMLElement | null {
|
||||
if (!scrollNode || selected.length === 0) return null
|
||||
const container = document.createElement('div')
|
||||
container.style.position = 'fixed'
|
||||
container.style.top = '-10000px'
|
||||
container.style.left = '-10000px'
|
||||
container.style.display = 'flex'
|
||||
container.style.alignItems = 'center'
|
||||
container.style.gap = '6px'
|
||||
container.style.padding = '4px'
|
||||
container.style.pointerEvents = 'none'
|
||||
let appendedAny = false
|
||||
for (const r of selected) {
|
||||
const original = scrollNode.querySelector<HTMLElement>(
|
||||
`[data-resource-tab-id="${CSS.escape(r.id)}"]`
|
||||
)
|
||||
if (!original) continue
|
||||
const clone = original.cloneNode(true) as HTMLElement
|
||||
clone.style.opacity = '0.95'
|
||||
container.appendChild(clone)
|
||||
appendedAny = true
|
||||
}
|
||||
if (!appendedAny) return null
|
||||
document.body.appendChild(container)
|
||||
return container
|
||||
}
|
||||
|
||||
const PREVIEW_MODE_ICONS = {
|
||||
editor: Columns3,
|
||||
split: Eye,
|
||||
@@ -125,8 +181,19 @@ export function ResourceTabs({
|
||||
const [hoveredTabId, setHoveredTabId] = useState<string | null>(null)
|
||||
const [draggedIdx, setDraggedIdx] = useState<number | null>(null)
|
||||
const [dropGapIdx, setDropGapIdx] = useState<number | null>(null)
|
||||
const [selectedIds, setSelectedIds] = useState<Set<string>>(new Set())
|
||||
const dragStartIdx = useRef<number | null>(null)
|
||||
const autoScrollRaf = useRef<number | null>(null)
|
||||
const anchorIdRef = useRef<string | null>(null)
|
||||
const prevChatIdRef = useRef(chatId)
|
||||
|
||||
// Reset selection when switching chats — component instance persists across
|
||||
// chat switches so stale IDs would otherwise carry over.
|
||||
if (prevChatIdRef.current !== chatId) {
|
||||
prevChatIdRef.current = chatId
|
||||
setSelectedIds(new Set())
|
||||
anchorIdRef.current = null
|
||||
}
|
||||
|
||||
const existingKeys = useMemo(
|
||||
() => new Set(resources.map((r) => `${r.type}:${r.id}`)),
|
||||
@@ -143,34 +210,129 @@ export function ResourceTabs({
|
||||
[chatId, onAddResource]
|
||||
)
|
||||
|
||||
const handleTabClick = useCallback(
|
||||
(e: React.MouseEvent, idx: number) => {
|
||||
const resource = resources[idx]
|
||||
if (!resource) return
|
||||
|
||||
// Shift+click: contiguous range from anchor
|
||||
if (e.shiftKey) {
|
||||
// Fall back to activeId when no explicit anchor exists (e.g. tab opened via sidebar)
|
||||
const anchorId = anchorIdRef.current ?? activeId
|
||||
const anchorIdx = anchorId ? resources.findIndex((r) => r.id === anchorId) : -1
|
||||
if (anchorIdx !== -1) {
|
||||
const start = Math.min(anchorIdx, idx)
|
||||
const end = Math.max(anchorIdx, idx)
|
||||
const next = new Set<string>()
|
||||
for (let i = start; i <= end; i++) next.add(resources[i].id)
|
||||
setSelectedIds(next)
|
||||
onSelect(resource.id)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Cmd/Ctrl+click: toggle individual tab in/out of selection
|
||||
if (e.metaKey || e.ctrlKey) {
|
||||
const wasSelected = selectedIds.has(resource.id)
|
||||
if (wasSelected) {
|
||||
const next = new Set(selectedIds)
|
||||
next.delete(resource.id)
|
||||
setSelectedIds(next)
|
||||
// Only switch active if we just deselected the currently-active tab
|
||||
if (activeId === resource.id) {
|
||||
const fallback =
|
||||
findNearestId(resources, idx, next) ?? findNearestId(resources, idx, null)
|
||||
if (fallback) onSelect(fallback)
|
||||
}
|
||||
} else {
|
||||
setSelectedIds((prev) => new Set(prev).add(resource.id))
|
||||
onSelect(resource.id)
|
||||
}
|
||||
if (!anchorIdRef.current) anchorIdRef.current = resource.id
|
||||
return
|
||||
}
|
||||
|
||||
// Plain click: single-select
|
||||
anchorIdRef.current = resource.id
|
||||
setSelectedIds(new Set([resource.id]))
|
||||
onSelect(resource.id)
|
||||
},
|
||||
[resources, onSelect, selectedIds, activeId]
|
||||
)
|
||||
|
||||
const handleRemove = useCallback(
|
||||
(e: React.MouseEvent, resource: MothershipResource) => {
|
||||
e.stopPropagation()
|
||||
if (!chatId) return
|
||||
if (!isEphemeralResource(resource)) {
|
||||
removeResource.mutate({ chatId, resourceType: resource.type, resourceId: resource.id })
|
||||
const isMulti = selectedIds.has(resource.id) && selectedIds.size > 1
|
||||
const targets = isMulti ? resources.filter((r) => selectedIds.has(r.id)) : [resource]
|
||||
// Update parent state immediately for all targets
|
||||
for (const r of targets) {
|
||||
onRemoveResource(r.type, r.id)
|
||||
}
|
||||
// Clear stale selection and anchor for all removed targets
|
||||
const removedIds = new Set(targets.map((r) => r.id))
|
||||
setSelectedIds((prev) => {
|
||||
const next = new Set(prev)
|
||||
for (const id of removedIds) next.delete(id)
|
||||
return next
|
||||
})
|
||||
if (anchorIdRef.current && removedIds.has(anchorIdRef.current)) {
|
||||
anchorIdRef.current = null
|
||||
}
|
||||
// Serialize mutations so each onMutate sees the cache updated by the prior
|
||||
// one. Continue on individual failures so remaining removals still fire.
|
||||
const persistable = targets.filter((r) => !isEphemeralResource(r))
|
||||
if (persistable.length > 0) {
|
||||
void (async () => {
|
||||
for (const r of persistable) {
|
||||
try {
|
||||
await removeResource.mutateAsync({
|
||||
chatId,
|
||||
resourceType: r.type,
|
||||
resourceId: r.id,
|
||||
})
|
||||
} catch {
|
||||
// Individual failure — the mutation's onError already rolled back
|
||||
// this resource in cache. Remaining removals continue.
|
||||
}
|
||||
}
|
||||
})()
|
||||
}
|
||||
onRemoveResource(resource.type, resource.id)
|
||||
},
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
[chatId, onRemoveResource]
|
||||
[chatId, onRemoveResource, resources, selectedIds]
|
||||
)
|
||||
|
||||
const handleDragStart = useCallback(
|
||||
(e: React.DragEvent, idx: number) => {
|
||||
const resource = resources[idx]
|
||||
if (!resource) return
|
||||
const selected = resources.filter((r) => selectedIds.has(r.id))
|
||||
const isMultiDrag = selected.length > 1 && selectedIds.has(resource.id)
|
||||
if (isMultiDrag) {
|
||||
e.dataTransfer.effectAllowed = 'copy'
|
||||
e.dataTransfer.setData(SIM_RESOURCES_DRAG_TYPE, JSON.stringify(selected))
|
||||
const dragImage = buildMultiDragImage(scrollNodeRef.current, selected)
|
||||
if (dragImage) {
|
||||
e.dataTransfer.setDragImage(dragImage, 16, 16)
|
||||
setTimeout(() => dragImage.remove(), 0)
|
||||
}
|
||||
// Skip dragStartIdx so internal reorder is disabled for multi-select drags
|
||||
dragStartIdx.current = null
|
||||
setDraggedIdx(null)
|
||||
return
|
||||
}
|
||||
dragStartIdx.current = idx
|
||||
setDraggedIdx(idx)
|
||||
e.dataTransfer.effectAllowed = 'copyMove'
|
||||
e.dataTransfer.setData('text/plain', String(idx))
|
||||
const resource = resources[idx]
|
||||
if (resource) {
|
||||
e.dataTransfer.setData(
|
||||
SIM_RESOURCE_DRAG_TYPE,
|
||||
JSON.stringify({ type: resource.type, id: resource.id, title: resource.title })
|
||||
)
|
||||
}
|
||||
e.dataTransfer.setData(
|
||||
SIM_RESOURCE_DRAG_TYPE,
|
||||
JSON.stringify({ type: resource.type, id: resource.id, title: resource.title })
|
||||
)
|
||||
},
|
||||
[resources]
|
||||
[resources, selectedIds]
|
||||
)
|
||||
|
||||
const stopAutoScroll = useCallback(() => {
|
||||
@@ -308,6 +470,7 @@ export function ResourceTabs({
|
||||
const isActive = activeId === resource.id
|
||||
const isHovered = hoveredTabId === resource.id
|
||||
const isDragging = draggedIdx === idx
|
||||
const isSelected = selectedIds.has(resource.id) && selectedIds.size > 1
|
||||
const showGapBefore =
|
||||
dropGapIdx === idx &&
|
||||
draggedIdx !== null &&
|
||||
@@ -329,22 +492,24 @@ export function ResourceTabs({
|
||||
<Button
|
||||
variant='subtle'
|
||||
draggable
|
||||
data-resource-tab-id={resource.id}
|
||||
onDragStart={(e) => handleDragStart(e, idx)}
|
||||
onDragOver={(e) => handleDragOver(e, idx)}
|
||||
onDragLeave={handleDragLeave}
|
||||
onDragEnd={handleDragEnd}
|
||||
onMouseDown={(e) => {
|
||||
if (e.button === 1 && chatId) {
|
||||
if (e.button === 1) {
|
||||
e.preventDefault()
|
||||
handleRemove(e, resource)
|
||||
if (chatId) handleRemove(e, resource)
|
||||
}
|
||||
}}
|
||||
onClick={() => onSelect(resource.id)}
|
||||
onClick={(e) => handleTabClick(e, idx)}
|
||||
onMouseEnter={() => setHoveredTabId(resource.id)}
|
||||
onMouseLeave={() => setHoveredTabId(null)}
|
||||
className={cn(
|
||||
'group relative shrink-0 bg-transparent px-2 py-1 pr-[22px] text-caption transition-opacity duration-150',
|
||||
isActive && 'bg-[var(--surface-4)]',
|
||||
isSelected && !isActive && 'bg-[var(--surface-3)]',
|
||||
isDragging && 'opacity-30'
|
||||
)}
|
||||
>
|
||||
@@ -394,6 +559,7 @@ export function ResourceTabs({
|
||||
existingKeys={existingKeys}
|
||||
onAdd={handleAdd}
|
||||
onSwitch={onSelect}
|
||||
excludeTypes={ADD_RESOURCE_EXCLUDED_TYPES}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -263,7 +263,8 @@ export function AddDocumentsModal({
|
||||
{isDragging ? 'Drop files here' : 'Drop files here or click to browse'}
|
||||
</span>
|
||||
<span className='text-[var(--text-tertiary)] text-xs'>
|
||||
PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML (max 100MB each)
|
||||
PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML, JSONL (max 100MB
|
||||
each)
|
||||
</span>
|
||||
</div>
|
||||
</Button>
|
||||
|
||||
@@ -9,6 +9,8 @@ import { useForm } from 'react-hook-form'
|
||||
import { z } from 'zod'
|
||||
import {
|
||||
Button,
|
||||
Combobox,
|
||||
type ComboboxOption,
|
||||
Input,
|
||||
Label,
|
||||
Modal,
|
||||
@@ -18,6 +20,7 @@ import {
|
||||
ModalHeader,
|
||||
Textarea,
|
||||
} from '@/components/emcn'
|
||||
import type { StrategyOptions } from '@/lib/chunkers/types'
|
||||
import { cn } from '@/lib/core/utils/cn'
|
||||
import { formatFileSize, validateKnowledgeBaseFile } from '@/lib/uploads/utils/file-utils'
|
||||
import { ACCEPT_ATTRIBUTE } from '@/lib/uploads/utils/validation'
|
||||
@@ -35,6 +38,20 @@ interface CreateBaseModalProps {
|
||||
onOpenChange: (open: boolean) => void
|
||||
}
|
||||
|
||||
const STRATEGY_OPTIONS = [
|
||||
{ value: 'auto', label: 'Auto (detect from content)' },
|
||||
{ value: 'text', label: 'Text (word boundary splitting)' },
|
||||
{ value: 'recursive', label: 'Recursive (configurable separators)' },
|
||||
{ value: 'sentence', label: 'Sentence' },
|
||||
{ value: 'token', label: 'Token (fixed-size)' },
|
||||
{ value: 'regex', label: 'Regex (custom pattern)' },
|
||||
] as const
|
||||
|
||||
const STRATEGY_COMBOBOX_OPTIONS: ComboboxOption[] = STRATEGY_OPTIONS.map((o) => ({
|
||||
label: o.label,
|
||||
value: o.value,
|
||||
}))
|
||||
|
||||
const FormSchema = z
|
||||
.object({
|
||||
name: z
|
||||
@@ -43,25 +60,24 @@ const FormSchema = z
|
||||
.max(100, 'Name must be less than 100 characters')
|
||||
.refine((value) => value.trim().length > 0, 'Name cannot be empty'),
|
||||
description: z.string().max(500, 'Description must be less than 500 characters').optional(),
|
||||
/** Minimum chunk size in characters */
|
||||
minChunkSize: z
|
||||
.number()
|
||||
.min(1, 'Min chunk size must be at least 1 character')
|
||||
.max(2000, 'Min chunk size must be less than 2000 characters'),
|
||||
/** Maximum chunk size in tokens (1 token ≈ 4 characters) */
|
||||
maxChunkSize: z
|
||||
.number()
|
||||
.min(100, 'Max chunk size must be at least 100 tokens')
|
||||
.max(4000, 'Max chunk size must be less than 4000 tokens'),
|
||||
/** Overlap between chunks in tokens */
|
||||
overlapSize: z
|
||||
.number()
|
||||
.min(0, 'Overlap must be non-negative')
|
||||
.max(500, 'Overlap must be less than 500 tokens'),
|
||||
strategy: z.enum(['auto', 'text', 'regex', 'recursive', 'sentence', 'token']).default('auto'),
|
||||
regexPattern: z.string().optional(),
|
||||
customSeparators: z.string().optional(),
|
||||
})
|
||||
.refine(
|
||||
(data) => {
|
||||
// Convert maxChunkSize from tokens to characters for comparison (1 token ≈ 4 chars)
|
||||
const maxChunkSizeInChars = data.maxChunkSize * 4
|
||||
return data.minChunkSize < maxChunkSizeInChars
|
||||
},
|
||||
@@ -70,6 +86,27 @@ const FormSchema = z
|
||||
path: ['minChunkSize'],
|
||||
}
|
||||
)
|
||||
.refine(
|
||||
(data) => {
|
||||
return data.overlapSize < data.maxChunkSize
|
||||
},
|
||||
{
|
||||
message: 'Overlap must be less than max chunk size',
|
||||
path: ['overlapSize'],
|
||||
}
|
||||
)
|
||||
.refine(
|
||||
(data) => {
|
||||
if (data.strategy === 'regex' && !data.regexPattern?.trim()) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
},
|
||||
{
|
||||
message: 'Regex pattern is required when using the regex strategy',
|
||||
path: ['regexPattern'],
|
||||
}
|
||||
)
|
||||
|
||||
type FormValues = z.infer<typeof FormSchema>
|
||||
|
||||
@@ -124,6 +161,7 @@ export const CreateBaseModal = memo(function CreateBaseModal({
|
||||
handleSubmit,
|
||||
reset,
|
||||
watch,
|
||||
setValue,
|
||||
formState: { errors },
|
||||
} = useForm<FormValues>({
|
||||
resolver: zodResolver(FormSchema),
|
||||
@@ -133,11 +171,15 @@ export const CreateBaseModal = memo(function CreateBaseModal({
|
||||
minChunkSize: 100,
|
||||
maxChunkSize: 1024,
|
||||
overlapSize: 200,
|
||||
strategy: 'auto',
|
||||
regexPattern: '',
|
||||
customSeparators: '',
|
||||
},
|
||||
mode: 'onSubmit',
|
||||
})
|
||||
|
||||
const nameValue = watch('name')
|
||||
const strategyValue = watch('strategy')
|
||||
|
||||
useEffect(() => {
|
||||
if (open) {
|
||||
@@ -153,6 +195,9 @@ export const CreateBaseModal = memo(function CreateBaseModal({
|
||||
minChunkSize: 100,
|
||||
maxChunkSize: 1024,
|
||||
overlapSize: 200,
|
||||
strategy: 'auto',
|
||||
regexPattern: '',
|
||||
customSeparators: '',
|
||||
})
|
||||
}
|
||||
}, [open, reset])
|
||||
@@ -255,6 +300,17 @@ export const CreateBaseModal = memo(function CreateBaseModal({
|
||||
setSubmitStatus(null)
|
||||
|
||||
try {
|
||||
const strategyOptions: StrategyOptions | undefined =
|
||||
data.strategy === 'regex' && data.regexPattern
|
||||
? { pattern: data.regexPattern }
|
||||
: data.strategy === 'recursive' && data.customSeparators?.trim()
|
||||
? {
|
||||
separators: data.customSeparators
|
||||
.split(',')
|
||||
.map((s) => s.trim().replace(/\\n/g, '\n').replace(/\\t/g, '\t')),
|
||||
}
|
||||
: undefined
|
||||
|
||||
const newKnowledgeBase = await createKnowledgeBaseMutation.mutateAsync({
|
||||
name: data.name,
|
||||
description: data.description || undefined,
|
||||
@@ -263,6 +319,8 @@ export const CreateBaseModal = memo(function CreateBaseModal({
|
||||
maxSize: data.maxChunkSize,
|
||||
minSize: data.minChunkSize,
|
||||
overlap: data.overlapSize,
|
||||
...(data.strategy !== 'auto' && { strategy: data.strategy }),
|
||||
...(strategyOptions && { strategyOptions }),
|
||||
},
|
||||
})
|
||||
|
||||
@@ -312,7 +370,6 @@ export const CreateBaseModal = memo(function CreateBaseModal({
|
||||
<div className='space-y-3'>
|
||||
<div className='flex flex-col gap-2'>
|
||||
<Label htmlFor='kb-name'>Name</Label>
|
||||
{/* Hidden decoy fields to prevent browser autofill */}
|
||||
<input
|
||||
type='text'
|
||||
name='fakeusernameremembered'
|
||||
@@ -403,6 +460,59 @@ export const CreateBaseModal = memo(function CreateBaseModal({
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className='flex flex-col gap-2'>
|
||||
<Label>Chunking Strategy</Label>
|
||||
<Combobox
|
||||
options={STRATEGY_COMBOBOX_OPTIONS}
|
||||
value={strategyValue}
|
||||
onChange={(value) => setValue('strategy', value as FormValues['strategy'])}
|
||||
dropdownWidth='trigger'
|
||||
align='start'
|
||||
/>
|
||||
<p className='text-[var(--text-muted)] text-xs'>
|
||||
Auto detects the best strategy based on file content type.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{strategyValue === 'regex' && (
|
||||
<div className='flex flex-col gap-2'>
|
||||
<Label htmlFor='regexPattern'>Regex Pattern</Label>
|
||||
<Input
|
||||
id='regexPattern'
|
||||
placeholder='e.g. \\n\\n or (?<=\\})\\s*(?=\\{)'
|
||||
{...register('regexPattern')}
|
||||
className={cn(errors.regexPattern && 'border-[var(--text-error)]')}
|
||||
autoComplete='off'
|
||||
data-form-type='other'
|
||||
/>
|
||||
{errors.regexPattern && (
|
||||
<p className='text-[var(--text-error)] text-xs'>
|
||||
{errors.regexPattern.message}
|
||||
</p>
|
||||
)}
|
||||
<p className='text-[var(--text-muted)] text-xs'>
|
||||
Text will be split at each match of this regex pattern.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{strategyValue === 'recursive' && (
|
||||
<div className='flex flex-col gap-2'>
|
||||
<Label htmlFor='customSeparators'>Custom Separators (optional)</Label>
|
||||
<Input
|
||||
id='customSeparators'
|
||||
placeholder='e.g. \n\n, \n, . , '
|
||||
{...register('customSeparators')}
|
||||
autoComplete='off'
|
||||
data-form-type='other'
|
||||
/>
|
||||
<p className='text-[var(--text-muted)] text-xs'>
|
||||
Comma-separated list of delimiters in priority order. Leave empty for default
|
||||
separators.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className='flex flex-col gap-2'>
|
||||
<Label>Upload Documents</Label>
|
||||
<Button
|
||||
@@ -431,7 +541,8 @@ export const CreateBaseModal = memo(function CreateBaseModal({
|
||||
{isDragging ? 'Drop files here' : 'Drop files here or click to browse'}
|
||||
</span>
|
||||
<span className='text-[var(--text-tertiary)] text-xs'>
|
||||
PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML (max 100MB each)
|
||||
PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML, JSONL (max 100MB
|
||||
each)
|
||||
</span>
|
||||
</div>
|
||||
</Button>
|
||||
|
||||
@@ -59,40 +59,61 @@ export function WorkspacePermissionsProvider({ children }: WorkspacePermissionsP
|
||||
const hasOperationError = useOperationQueueStore((state) => state.hasOperationError)
|
||||
const addNotification = useNotificationStore((state) => state.addNotification)
|
||||
const removeNotification = useNotificationStore((state) => state.removeNotification)
|
||||
const { isReconnecting } = useSocket()
|
||||
const reconnectingNotificationIdRef = useRef<string | null>(null)
|
||||
const { isReconnecting, isRetryingWorkflowJoin } = useSocket()
|
||||
const realtimeStatusNotificationIdRef = useRef<string | null>(null)
|
||||
const realtimeStatusNotificationMessageRef = useRef<string | null>(null)
|
||||
|
||||
const isOfflineMode = hasOperationError
|
||||
const realtimeStatusMessage = isReconnecting
|
||||
? 'Reconnecting...'
|
||||
: isRetryingWorkflowJoin
|
||||
? 'Joining workflow...'
|
||||
: null
|
||||
|
||||
const clearRealtimeStatusNotification = useCallback(() => {
|
||||
if (!realtimeStatusNotificationIdRef.current) {
|
||||
return
|
||||
}
|
||||
|
||||
removeNotification(realtimeStatusNotificationIdRef.current)
|
||||
realtimeStatusNotificationIdRef.current = null
|
||||
realtimeStatusNotificationMessageRef.current = null
|
||||
}, [removeNotification])
|
||||
|
||||
useEffect(() => {
|
||||
if (isReconnecting && !reconnectingNotificationIdRef.current && !isOfflineMode) {
|
||||
const id = addNotification({
|
||||
level: 'error',
|
||||
message: 'Reconnecting...',
|
||||
})
|
||||
reconnectingNotificationIdRef.current = id
|
||||
} else if (!isReconnecting && reconnectingNotificationIdRef.current) {
|
||||
removeNotification(reconnectingNotificationIdRef.current)
|
||||
reconnectingNotificationIdRef.current = null
|
||||
if (isOfflineMode || !realtimeStatusMessage) {
|
||||
clearRealtimeStatusNotification()
|
||||
return
|
||||
}
|
||||
|
||||
return () => {
|
||||
if (reconnectingNotificationIdRef.current) {
|
||||
removeNotification(reconnectingNotificationIdRef.current)
|
||||
reconnectingNotificationIdRef.current = null
|
||||
}
|
||||
if (
|
||||
realtimeStatusNotificationIdRef.current &&
|
||||
realtimeStatusNotificationMessageRef.current === realtimeStatusMessage
|
||||
) {
|
||||
return
|
||||
}
|
||||
}, [isReconnecting, isOfflineMode, addNotification, removeNotification])
|
||||
|
||||
clearRealtimeStatusNotification()
|
||||
|
||||
const id = addNotification({
|
||||
level: 'error',
|
||||
message: realtimeStatusMessage,
|
||||
})
|
||||
|
||||
realtimeStatusNotificationIdRef.current = id
|
||||
realtimeStatusNotificationMessageRef.current = realtimeStatusMessage
|
||||
}, [addNotification, clearRealtimeStatusNotification, isOfflineMode, realtimeStatusMessage])
|
||||
|
||||
useEffect(() => {
|
||||
return clearRealtimeStatusNotification
|
||||
}, [clearRealtimeStatusNotification])
|
||||
|
||||
useEffect(() => {
|
||||
if (!isOfflineMode || hasShownOfflineNotification) {
|
||||
return
|
||||
}
|
||||
|
||||
if (reconnectingNotificationIdRef.current) {
|
||||
removeNotification(reconnectingNotificationIdRef.current)
|
||||
reconnectingNotificationIdRef.current = null
|
||||
}
|
||||
clearRealtimeStatusNotification()
|
||||
|
||||
try {
|
||||
addNotification({
|
||||
@@ -107,7 +128,7 @@ export function WorkspacePermissionsProvider({ children }: WorkspacePermissionsP
|
||||
} catch (error) {
|
||||
logger.error('Failed to add offline notification', { error })
|
||||
}
|
||||
}, [addNotification, removeNotification, hasShownOfflineNotification, isOfflineMode])
|
||||
}, [addNotification, clearRealtimeStatusNotification, hasShownOfflineNotification, isOfflineMode])
|
||||
|
||||
const {
|
||||
data: workspacePermissions,
|
||||
|
||||
@@ -5,6 +5,7 @@ import { useViewport } from 'reactflow'
|
||||
import { getUserColor } from '@/lib/workspaces/colors'
|
||||
import { usePreventZoom } from '@/app/workspace/[workspaceId]/w/[workflowId]/hooks'
|
||||
import { useSocket } from '@/app/workspace/providers/socket-provider'
|
||||
import { useWorkflowRegistry } from '@/stores/workflows/registry/store'
|
||||
|
||||
interface CursorPoint {
|
||||
x: number
|
||||
@@ -19,11 +20,16 @@ interface CursorRenderData {
|
||||
}
|
||||
|
||||
const CursorsComponent = () => {
|
||||
const { presenceUsers, currentSocketId } = useSocket()
|
||||
const activeWorkflowId = useWorkflowRegistry((state) => state.activeWorkflowId)
|
||||
const { currentWorkflowId, presenceUsers, currentSocketId } = useSocket()
|
||||
const viewport = useViewport()
|
||||
const preventZoomRef = usePreventZoom()
|
||||
|
||||
const cursors = useMemo<CursorRenderData[]>(() => {
|
||||
if (!activeWorkflowId || currentWorkflowId !== activeWorkflowId) {
|
||||
return []
|
||||
}
|
||||
|
||||
return presenceUsers
|
||||
.filter((user): user is typeof user & { cursor: CursorPoint } => Boolean(user.cursor))
|
||||
.filter((user) => user.socketId !== currentSocketId)
|
||||
@@ -33,7 +39,7 @@ const CursorsComponent = () => {
|
||||
cursor: user.cursor,
|
||||
color: getUserColor(user.userId),
|
||||
}))
|
||||
}, [currentSocketId, presenceUsers])
|
||||
}, [activeWorkflowId, currentSocketId, currentWorkflowId, presenceUsers])
|
||||
|
||||
if (!cursors.length) {
|
||||
return null
|
||||
|
||||
@@ -98,7 +98,7 @@ export function CredentialSelector({
|
||||
)
|
||||
const provider = effectiveProviderId
|
||||
|
||||
const isTriggerMode = subBlock.mode === 'trigger'
|
||||
const isTriggerMode = subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced'
|
||||
|
||||
const {
|
||||
data: rawCredentials = [],
|
||||
|
||||
@@ -242,9 +242,13 @@ export const EnvVarDropdown: React.FC<EnvVarDropdownProps> = ({
|
||||
})
|
||||
break
|
||||
case 'Enter':
|
||||
e.preventDefault()
|
||||
e.stopPropagation()
|
||||
handleEnvVarSelect(filteredEnvVars[selectedIndex])
|
||||
case 'Tab':
|
||||
if (e.key === 'Tab' && e.shiftKey) break
|
||||
if (filteredEnvVars[selectedIndex]) {
|
||||
e.preventDefault()
|
||||
e.stopPropagation()
|
||||
handleEnvVarSelect(filteredEnvVars[selectedIndex])
|
||||
}
|
||||
break
|
||||
case 'Escape':
|
||||
e.preventDefault()
|
||||
|
||||
@@ -279,9 +279,11 @@ export const KeyboardNavigationHandler: React.FC<KeyboardNavigationHandlerProps>
|
||||
}
|
||||
break
|
||||
case 'Enter':
|
||||
e.preventDefault()
|
||||
e.stopPropagation()
|
||||
case 'Tab':
|
||||
if (e.key === 'Tab' && e.shiftKey) break
|
||||
if (selected && selectedIndex >= 0 && selectedIndex < flatTagList.length) {
|
||||
e.preventDefault()
|
||||
e.stopPropagation()
|
||||
handleTagSelect(selected.tag, selected.group)
|
||||
}
|
||||
break
|
||||
|
||||
@@ -145,7 +145,9 @@ export function Editor() {
|
||||
if (!triggerMode) return subBlocks
|
||||
return subBlocks.filter(
|
||||
(subBlock) =>
|
||||
subBlock.mode === 'trigger' || subBlock.type === ('trigger-config' as SubBlockType)
|
||||
subBlock.mode === 'trigger' ||
|
||||
subBlock.mode === 'trigger-advanced' ||
|
||||
subBlock.type === ('trigger-config' as SubBlockType)
|
||||
)
|
||||
}, [blockConfig?.subBlocks, triggerMode])
|
||||
|
||||
|
||||
@@ -102,7 +102,9 @@ export function useEditorSubblockLayout(
|
||||
const subBlocksForCanonical = displayTriggerMode
|
||||
? (config.subBlocks || []).filter(
|
||||
(subBlock) =>
|
||||
subBlock.mode === 'trigger' || subBlock.type === ('trigger-config' as SubBlockType)
|
||||
subBlock.mode === 'trigger' ||
|
||||
subBlock.mode === 'trigger-advanced' ||
|
||||
subBlock.type === ('trigger-config' as SubBlockType)
|
||||
)
|
||||
: config.subBlocks || []
|
||||
const canonicalIndex = buildCanonicalIndex(subBlocksForCanonical)
|
||||
@@ -137,12 +139,12 @@ export function useEditorSubblockLayout(
|
||||
}
|
||||
|
||||
// Filter by mode if specified
|
||||
if (block.mode === 'trigger') {
|
||||
if (block.mode === 'trigger' || block.mode === 'trigger-advanced') {
|
||||
if (!displayTriggerMode) return false
|
||||
}
|
||||
|
||||
// When in trigger mode, hide blocks that don't have mode: 'trigger'
|
||||
if (displayTriggerMode && block.mode !== 'trigger') {
|
||||
// When in trigger mode, hide blocks that don't have mode: 'trigger' or 'trigger-advanced'
|
||||
if (displayTriggerMode && block.mode !== 'trigger' && block.mode !== 'trigger-advanced') {
|
||||
return false
|
||||
}
|
||||
|
||||
|
||||
@@ -534,7 +534,6 @@ const SubBlockRow = memo(function SubBlockRow({
|
||||
workspaceId
|
||||
)
|
||||
|
||||
const credentialId = dependencyValues.credential
|
||||
const knowledgeBaseId = dependencyValues.knowledgeBaseId
|
||||
|
||||
const dropdownLabel = useMemo(() => {
|
||||
@@ -576,6 +575,7 @@ const SubBlockRow = memo(function SubBlockRow({
|
||||
const collectionIdValue = resolveContextValue('collectionId')
|
||||
const spreadsheetIdValue = resolveContextValue('spreadsheetId')
|
||||
const fileIdValue = resolveContextValue('fileId')
|
||||
const credentialId = dependencyValues.credential ?? resolveContextValue('oauthCredential')
|
||||
|
||||
const { displayName: selectorDisplayName } = useSelectorDisplayName({
|
||||
subBlock,
|
||||
|
||||
@@ -1153,8 +1153,10 @@ function PreviewEditorContent({
|
||||
if (subBlock.type === ('trigger-config' as SubBlockType)) {
|
||||
return effectiveTrigger || isPureTriggerBlock
|
||||
}
|
||||
if (subBlock.mode === 'trigger' && !effectiveTrigger) return false
|
||||
if (effectiveTrigger && subBlock.mode !== 'trigger') return false
|
||||
if ((subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced') && !effectiveTrigger)
|
||||
return false
|
||||
if (effectiveTrigger && subBlock.mode !== 'trigger' && subBlock.mode !== 'trigger-advanced')
|
||||
return false
|
||||
if (!isSubBlockFeatureEnabled(subBlock)) return false
|
||||
if (
|
||||
!isSubBlockVisibleForMode(
|
||||
|
||||
@@ -319,11 +319,11 @@ function WorkflowPreviewBlockInner({ data }: NodeProps<WorkflowPreviewBlockData>
|
||||
|
||||
if (effectiveTrigger) {
|
||||
const isValidTriggerSubblock = isPureTriggerBlock
|
||||
? subBlock.mode === 'trigger' || !subBlock.mode
|
||||
: subBlock.mode === 'trigger'
|
||||
? subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced' || !subBlock.mode
|
||||
: subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced'
|
||||
if (!isValidTriggerSubblock) return false
|
||||
} else {
|
||||
if (subBlock.mode === 'trigger') return false
|
||||
if (subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced') return false
|
||||
}
|
||||
|
||||
/** Skip value-dependent visibility checks in lightweight mode */
|
||||
|
||||
246
apps/sim/app/workspace/providers/socket-join-controller.test.ts
Normal file
246
apps/sim/app/workspace/providers/socket-join-controller.test.ts
Normal file
@@ -0,0 +1,246 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import {
|
||||
SOCKET_JOIN_RETRY_BASE_DELAY_MS,
|
||||
SOCKET_JOIN_RETRY_MAX_DELAY_MS,
|
||||
SocketJoinController,
|
||||
} from '@/app/workspace/providers/socket-join-controller'
|
||||
|
||||
describe('SocketJoinController', () => {
|
||||
it('blocks rejoining a deleted workflow until the desired workflow changes', () => {
|
||||
const controller = new SocketJoinController()
|
||||
|
||||
expect(controller.setConnected(true)).toEqual([])
|
||||
expect(controller.requestWorkflow('workflow-a')).toEqual([
|
||||
{ type: 'join', workflowId: 'workflow-a' },
|
||||
])
|
||||
expect(controller.handleJoinSuccess('workflow-a')).toMatchObject({
|
||||
apply: true,
|
||||
ignored: false,
|
||||
commands: [],
|
||||
workflowId: 'workflow-a',
|
||||
})
|
||||
|
||||
expect(controller.handleWorkflowDeleted('workflow-a')).toEqual({
|
||||
shouldClearCurrent: true,
|
||||
commands: [],
|
||||
})
|
||||
expect(controller.requestWorkflow('workflow-a')).toEqual([])
|
||||
expect(controller.requestWorkflow('workflow-b')).toEqual([
|
||||
{ type: 'join', workflowId: 'workflow-b' },
|
||||
])
|
||||
})
|
||||
|
||||
it('joins only the latest desired workflow after rapid A to B to C switching', () => {
|
||||
const controller = new SocketJoinController()
|
||||
|
||||
controller.setConnected(true)
|
||||
controller.requestWorkflow('workflow-a')
|
||||
controller.handleJoinSuccess('workflow-a')
|
||||
|
||||
expect(controller.requestWorkflow('workflow-b')).toEqual([
|
||||
{ type: 'join', workflowId: 'workflow-b' },
|
||||
])
|
||||
expect(controller.requestWorkflow('workflow-c')).toEqual([])
|
||||
|
||||
expect(controller.handleJoinSuccess('workflow-b')).toMatchObject({
|
||||
apply: false,
|
||||
ignored: true,
|
||||
workflowId: 'workflow-b',
|
||||
commands: [{ type: 'join', workflowId: 'workflow-c' }],
|
||||
})
|
||||
expect(controller.handleJoinSuccess('workflow-c')).toMatchObject({
|
||||
apply: true,
|
||||
ignored: false,
|
||||
workflowId: 'workflow-c',
|
||||
commands: [],
|
||||
})
|
||||
})
|
||||
|
||||
it('rejoins the original workflow when a stale success lands after switching back', () => {
|
||||
const controller = new SocketJoinController()
|
||||
|
||||
controller.setConnected(true)
|
||||
controller.requestWorkflow('workflow-a')
|
||||
controller.handleJoinSuccess('workflow-a')
|
||||
|
||||
expect(controller.requestWorkflow('workflow-b')).toEqual([
|
||||
{ type: 'join', workflowId: 'workflow-b' },
|
||||
])
|
||||
expect(controller.requestWorkflow('workflow-a')).toEqual([])
|
||||
|
||||
expect(controller.handleJoinSuccess('workflow-b')).toMatchObject({
|
||||
apply: false,
|
||||
ignored: true,
|
||||
workflowId: 'workflow-b',
|
||||
commands: [{ type: 'join', workflowId: 'workflow-a' }],
|
||||
})
|
||||
expect(controller.handleJoinSuccess('workflow-a')).toMatchObject({
|
||||
apply: true,
|
||||
ignored: false,
|
||||
workflowId: 'workflow-a',
|
||||
commands: [],
|
||||
})
|
||||
})
|
||||
|
||||
it('leaves the room when a late join succeeds after navigating away', () => {
|
||||
const controller = new SocketJoinController()
|
||||
|
||||
controller.setConnected(true)
|
||||
controller.requestWorkflow('workflow-a')
|
||||
controller.handleJoinSuccess('workflow-a')
|
||||
|
||||
expect(controller.requestWorkflow('workflow-b')).toEqual([
|
||||
{ type: 'join', workflowId: 'workflow-b' },
|
||||
])
|
||||
expect(controller.requestWorkflow(null)).toEqual([])
|
||||
|
||||
expect(controller.handleJoinSuccess('workflow-b')).toMatchObject({
|
||||
apply: false,
|
||||
ignored: true,
|
||||
workflowId: 'workflow-b',
|
||||
commands: [{ type: 'leave' }],
|
||||
})
|
||||
})
|
||||
|
||||
it('preserves the last joined workflow during retryable switch failures', () => {
|
||||
const controller = new SocketJoinController()
|
||||
|
||||
controller.setConnected(true)
|
||||
expect(controller.requestWorkflow('workflow-a')).toEqual([
|
||||
{ type: 'join', workflowId: 'workflow-a' },
|
||||
])
|
||||
controller.handleJoinSuccess('workflow-a')
|
||||
|
||||
expect(controller.requestWorkflow('workflow-b')).toEqual([
|
||||
{ type: 'join', workflowId: 'workflow-b' },
|
||||
])
|
||||
|
||||
const errorResult = controller.handleJoinError({
|
||||
workflowId: 'workflow-b',
|
||||
retryable: true,
|
||||
})
|
||||
|
||||
expect(errorResult.apply).toBe(false)
|
||||
expect(errorResult.retryScheduled).toBe(true)
|
||||
expect(errorResult.commands).toEqual([
|
||||
{
|
||||
type: 'schedule-retry',
|
||||
workflowId: 'workflow-b',
|
||||
attempt: 1,
|
||||
delayMs: SOCKET_JOIN_RETRY_BASE_DELAY_MS,
|
||||
},
|
||||
])
|
||||
expect(controller.getJoinedWorkflowId()).toBe('workflow-a')
|
||||
expect(controller.retryJoin('workflow-b')).toEqual([{ type: 'join', workflowId: 'workflow-b' }])
|
||||
})
|
||||
|
||||
it('uses capped exponential backoff for retryable join failures', () => {
|
||||
const controller = new SocketJoinController()
|
||||
|
||||
controller.setConnected(true)
|
||||
controller.requestWorkflow('workflow-a')
|
||||
|
||||
const first = controller.handleJoinError({ workflowId: 'workflow-a', retryable: true })
|
||||
expect(first.commands).toEqual([
|
||||
{
|
||||
type: 'schedule-retry',
|
||||
workflowId: 'workflow-a',
|
||||
attempt: 1,
|
||||
delayMs: SOCKET_JOIN_RETRY_BASE_DELAY_MS,
|
||||
},
|
||||
])
|
||||
|
||||
controller.retryJoin('workflow-a')
|
||||
const second = controller.handleJoinError({ workflowId: 'workflow-a', retryable: true })
|
||||
expect(second.commands).toEqual([
|
||||
{
|
||||
type: 'schedule-retry',
|
||||
workflowId: 'workflow-a',
|
||||
attempt: 2,
|
||||
delayMs: SOCKET_JOIN_RETRY_BASE_DELAY_MS * 2,
|
||||
},
|
||||
])
|
||||
|
||||
controller.retryJoin('workflow-a')
|
||||
controller.handleJoinError({ workflowId: 'workflow-a', retryable: true })
|
||||
controller.retryJoin('workflow-a')
|
||||
const fourth = controller.handleJoinError({ workflowId: 'workflow-a', retryable: true })
|
||||
expect(fourth.commands).toEqual([
|
||||
{
|
||||
type: 'schedule-retry',
|
||||
workflowId: 'workflow-a',
|
||||
attempt: 4,
|
||||
delayMs: SOCKET_JOIN_RETRY_BASE_DELAY_MS * 8,
|
||||
},
|
||||
])
|
||||
|
||||
controller.retryJoin('workflow-a')
|
||||
const fifth = controller.handleJoinError({ workflowId: 'workflow-a', retryable: true })
|
||||
expect(fifth.commands).toEqual([
|
||||
{
|
||||
type: 'schedule-retry',
|
||||
workflowId: 'workflow-a',
|
||||
attempt: 5,
|
||||
delayMs: SOCKET_JOIN_RETRY_MAX_DELAY_MS,
|
||||
},
|
||||
])
|
||||
})
|
||||
|
||||
it('blocks a permanently failed workflow and leaves the fallback room cleanly', () => {
|
||||
const controller = new SocketJoinController()
|
||||
|
||||
controller.setConnected(true)
|
||||
controller.requestWorkflow('workflow-a')
|
||||
controller.handleJoinSuccess('workflow-a')
|
||||
|
||||
expect(controller.requestWorkflow('workflow-b')).toEqual([
|
||||
{ type: 'join', workflowId: 'workflow-b' },
|
||||
])
|
||||
|
||||
const errorResult = controller.handleJoinError({
|
||||
workflowId: 'workflow-b',
|
||||
retryable: false,
|
||||
})
|
||||
|
||||
expect(errorResult.apply).toBe(true)
|
||||
expect(errorResult.commands).toEqual([{ type: 'leave' }])
|
||||
expect(controller.getJoinedWorkflowId()).toBeNull()
|
||||
expect(controller.requestWorkflow('workflow-b')).toEqual([])
|
||||
expect(controller.requestWorkflow('workflow-c')).toEqual([
|
||||
{ type: 'join', workflowId: 'workflow-c' },
|
||||
])
|
||||
})
|
||||
|
||||
it('rejoins the desired workflow when the server session is lost', () => {
|
||||
const controller = new SocketJoinController()
|
||||
|
||||
controller.setConnected(true)
|
||||
controller.requestWorkflow('workflow-a')
|
||||
controller.handleJoinSuccess('workflow-a')
|
||||
|
||||
expect(controller.forceRejoinWorkflow('workflow-a')).toEqual([
|
||||
{ type: 'join', workflowId: 'workflow-a' },
|
||||
])
|
||||
expect(controller.getJoinedWorkflowId()).toBeNull()
|
||||
})
|
||||
|
||||
it('resolves retryable errors without workflowId against the pending join', () => {
|
||||
const controller = new SocketJoinController()
|
||||
|
||||
controller.setConnected(true)
|
||||
controller.requestWorkflow('workflow-a')
|
||||
|
||||
const errorResult = controller.handleJoinError({ retryable: true })
|
||||
|
||||
expect(errorResult.workflowId).toBe('workflow-a')
|
||||
expect(errorResult.retryScheduled).toBe(true)
|
||||
expect(errorResult.commands).toEqual([
|
||||
{
|
||||
type: 'schedule-retry',
|
||||
workflowId: 'workflow-a',
|
||||
attempt: 1,
|
||||
delayMs: SOCKET_JOIN_RETRY_BASE_DELAY_MS,
|
||||
},
|
||||
])
|
||||
})
|
||||
})
|
||||
294
apps/sim/app/workspace/providers/socket-join-controller.ts
Normal file
294
apps/sim/app/workspace/providers/socket-join-controller.ts
Normal file
@@ -0,0 +1,294 @@
|
||||
export const SOCKET_JOIN_RETRY_BASE_DELAY_MS = 1000
|
||||
export const SOCKET_JOIN_RETRY_MAX_DELAY_MS = 10000
|
||||
|
||||
export type SocketJoinCommand =
|
||||
| { type: 'cancel-retry' }
|
||||
| { type: 'join'; workflowId: string }
|
||||
| { type: 'leave' }
|
||||
| {
|
||||
type: 'schedule-retry'
|
||||
workflowId: string
|
||||
attempt: number
|
||||
delayMs: number
|
||||
}
|
||||
|
||||
interface SocketJoinSuccessResult {
|
||||
apply: boolean
|
||||
commands: SocketJoinCommand[]
|
||||
ignored: boolean
|
||||
workflowId: string
|
||||
}
|
||||
|
||||
interface SocketJoinErrorResult {
|
||||
apply: boolean
|
||||
commands: SocketJoinCommand[]
|
||||
ignored: boolean
|
||||
retryScheduled: boolean
|
||||
workflowId: string | null
|
||||
}
|
||||
|
||||
interface SocketJoinDeleteResult {
|
||||
commands: SocketJoinCommand[]
|
||||
shouldClearCurrent: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Coordinates desired workflow room membership with async socket join results.
|
||||
*/
|
||||
export class SocketJoinController {
|
||||
private desiredWorkflowId: string | null = null
|
||||
private joinedWorkflowId: string | null = null
|
||||
private pendingJoinWorkflowId: string | null = null
|
||||
private blockedWorkflowId: string | null = null
|
||||
private retryWorkflowId: string | null = null
|
||||
private retryAttempt = 0
|
||||
private isConnected = false
|
||||
|
||||
getJoinedWorkflowId(): string | null {
|
||||
return this.joinedWorkflowId
|
||||
}
|
||||
|
||||
setConnected(connected: boolean): SocketJoinCommand[] {
|
||||
this.isConnected = connected
|
||||
if (!connected) {
|
||||
this.pendingJoinWorkflowId = null
|
||||
this.joinedWorkflowId = null
|
||||
return this.clearRetryCommands()
|
||||
}
|
||||
|
||||
return this.flush()
|
||||
}
|
||||
|
||||
requestWorkflow(workflowId: string | null): SocketJoinCommand[] {
|
||||
const commands = this.takeRetryResetCommands(workflowId)
|
||||
this.desiredWorkflowId = workflowId
|
||||
|
||||
if (workflowId !== this.blockedWorkflowId) {
|
||||
this.blockedWorkflowId = null
|
||||
}
|
||||
|
||||
return [...commands, ...this.flush()]
|
||||
}
|
||||
|
||||
forceRejoinWorkflow(workflowId: string | null): SocketJoinCommand[] {
|
||||
const commands = this.requestWorkflow(workflowId)
|
||||
const alreadyChangingRooms = commands.some(
|
||||
(command) => command.type === 'join' || command.type === 'leave'
|
||||
)
|
||||
|
||||
if (
|
||||
alreadyChangingRooms ||
|
||||
!this.isConnected ||
|
||||
!this.desiredWorkflowId ||
|
||||
this.pendingJoinWorkflowId === this.desiredWorkflowId ||
|
||||
this.blockedWorkflowId === this.desiredWorkflowId
|
||||
) {
|
||||
return commands
|
||||
}
|
||||
|
||||
this.joinedWorkflowId = null
|
||||
|
||||
return [...commands, ...this.flush()]
|
||||
}
|
||||
|
||||
handleWorkflowDeleted(workflowId: string): SocketJoinDeleteResult {
|
||||
const commands = this.takeRetryResetCommands(
|
||||
this.retryWorkflowId === workflowId ? null : this.retryWorkflowId
|
||||
)
|
||||
|
||||
if (this.desiredWorkflowId === workflowId) {
|
||||
this.blockedWorkflowId = workflowId
|
||||
}
|
||||
|
||||
if (this.pendingJoinWorkflowId === workflowId) {
|
||||
this.pendingJoinWorkflowId = null
|
||||
}
|
||||
|
||||
const shouldClearCurrent = this.joinedWorkflowId === workflowId
|
||||
if (shouldClearCurrent) {
|
||||
this.joinedWorkflowId = null
|
||||
}
|
||||
|
||||
return {
|
||||
commands: [...commands, ...this.flush()],
|
||||
shouldClearCurrent,
|
||||
}
|
||||
}
|
||||
|
||||
handleJoinSuccess(workflowId: string): SocketJoinSuccessResult {
|
||||
const commands = this.clearRetryCommands(workflowId)
|
||||
this.pendingJoinWorkflowId = null
|
||||
this.joinedWorkflowId = workflowId
|
||||
|
||||
const apply = this.desiredWorkflowId === workflowId && this.blockedWorkflowId !== workflowId
|
||||
|
||||
return {
|
||||
apply,
|
||||
commands: [...commands, ...this.flush()],
|
||||
ignored: !apply,
|
||||
workflowId,
|
||||
}
|
||||
}
|
||||
|
||||
handleJoinError({
|
||||
workflowId,
|
||||
retryable,
|
||||
}: {
|
||||
workflowId?: string | null
|
||||
retryable?: boolean
|
||||
}): SocketJoinErrorResult {
|
||||
const resolvedWorkflowId = workflowId ?? this.pendingJoinWorkflowId
|
||||
|
||||
if (resolvedWorkflowId && this.pendingJoinWorkflowId === resolvedWorkflowId) {
|
||||
this.pendingJoinWorkflowId = null
|
||||
if (this.joinedWorkflowId === resolvedWorkflowId) {
|
||||
this.joinedWorkflowId = null
|
||||
}
|
||||
}
|
||||
|
||||
const isCurrentDesired =
|
||||
Boolean(resolvedWorkflowId) &&
|
||||
this.desiredWorkflowId === resolvedWorkflowId &&
|
||||
this.blockedWorkflowId !== resolvedWorkflowId
|
||||
|
||||
const baseCommands =
|
||||
resolvedWorkflowId !== null
|
||||
? this.takeRetryResetCommands(resolvedWorkflowId)
|
||||
: this.clearRetryCommands()
|
||||
|
||||
if (!isCurrentDesired) {
|
||||
return {
|
||||
apply: false,
|
||||
commands: [...baseCommands, ...this.flush()],
|
||||
ignored: true,
|
||||
retryScheduled: false,
|
||||
workflowId: resolvedWorkflowId,
|
||||
}
|
||||
}
|
||||
|
||||
if (retryable && resolvedWorkflowId) {
|
||||
const commands = this.scheduleRetry(resolvedWorkflowId)
|
||||
|
||||
return {
|
||||
apply: false,
|
||||
commands: [...baseCommands, ...commands],
|
||||
ignored: false,
|
||||
retryScheduled: true,
|
||||
workflowId: resolvedWorkflowId,
|
||||
}
|
||||
}
|
||||
|
||||
const leaveCommands = this.blockWorkflow(resolvedWorkflowId)
|
||||
|
||||
return {
|
||||
apply: true,
|
||||
commands: [...this.clearRetryCommands(), ...leaveCommands, ...this.flush()],
|
||||
ignored: false,
|
||||
retryScheduled: false,
|
||||
workflowId: resolvedWorkflowId,
|
||||
}
|
||||
}
|
||||
|
||||
retryJoin(workflowId: string): SocketJoinCommand[] {
|
||||
if (
|
||||
this.retryWorkflowId !== workflowId ||
|
||||
this.desiredWorkflowId !== workflowId ||
|
||||
this.blockedWorkflowId === workflowId
|
||||
) {
|
||||
return []
|
||||
}
|
||||
|
||||
return this.flush()
|
||||
}
|
||||
|
||||
private flush(): SocketJoinCommand[] {
|
||||
if (!this.isConnected || this.pendingJoinWorkflowId) {
|
||||
return []
|
||||
}
|
||||
|
||||
if (!this.desiredWorkflowId) {
|
||||
if (!this.joinedWorkflowId) {
|
||||
return []
|
||||
}
|
||||
|
||||
this.joinedWorkflowId = null
|
||||
return [{ type: 'leave' }]
|
||||
}
|
||||
|
||||
if (this.blockedWorkflowId === this.desiredWorkflowId) {
|
||||
return []
|
||||
}
|
||||
|
||||
if (this.joinedWorkflowId === this.desiredWorkflowId) {
|
||||
return []
|
||||
}
|
||||
|
||||
this.pendingJoinWorkflowId = this.desiredWorkflowId
|
||||
|
||||
return [{ type: 'join', workflowId: this.desiredWorkflowId }]
|
||||
}
|
||||
|
||||
private scheduleRetry(workflowId: string): SocketJoinCommand[] {
|
||||
const nextAttempt = this.retryWorkflowId === workflowId ? this.retryAttempt + 1 : 1
|
||||
const delayMs = Math.min(
|
||||
SOCKET_JOIN_RETRY_BASE_DELAY_MS * 2 ** Math.max(0, nextAttempt - 1),
|
||||
SOCKET_JOIN_RETRY_MAX_DELAY_MS
|
||||
)
|
||||
|
||||
this.retryWorkflowId = workflowId
|
||||
this.retryAttempt = nextAttempt
|
||||
|
||||
return [
|
||||
{
|
||||
type: 'schedule-retry',
|
||||
workflowId,
|
||||
attempt: nextAttempt,
|
||||
delayMs,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
private takeRetryResetCommands(nextWorkflowId?: string | null): SocketJoinCommand[] {
|
||||
const shouldClearRetry =
|
||||
this.retryWorkflowId !== null &&
|
||||
(nextWorkflowId === undefined || this.retryWorkflowId !== nextWorkflowId)
|
||||
|
||||
if (!shouldClearRetry) {
|
||||
return []
|
||||
}
|
||||
|
||||
this.retryWorkflowId = null
|
||||
this.retryAttempt = 0
|
||||
|
||||
return [{ type: 'cancel-retry' }]
|
||||
}
|
||||
|
||||
private clearRetryCommands(workflowId?: string): SocketJoinCommand[] {
|
||||
const shouldClear =
|
||||
this.retryWorkflowId !== null &&
|
||||
(workflowId === undefined || this.retryWorkflowId === workflowId)
|
||||
|
||||
if (!shouldClear) {
|
||||
return []
|
||||
}
|
||||
|
||||
this.retryWorkflowId = null
|
||||
this.retryAttempt = 0
|
||||
|
||||
return [{ type: 'cancel-retry' }]
|
||||
}
|
||||
|
||||
private blockWorkflow(workflowId: string | null): SocketJoinCommand[] {
|
||||
if (workflowId) {
|
||||
this.blockedWorkflowId = workflowId
|
||||
}
|
||||
|
||||
if (!this.joinedWorkflowId) {
|
||||
return []
|
||||
}
|
||||
|
||||
this.joinedWorkflowId = null
|
||||
|
||||
return [{ type: 'leave' }]
|
||||
}
|
||||
}
|
||||
54
apps/sim/app/workspace/providers/socket-join-target.test.ts
Normal file
54
apps/sim/app/workspace/providers/socket-join-target.test.ts
Normal file
@@ -0,0 +1,54 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import {
|
||||
isSocketWorkflowVisible,
|
||||
resolveSocketWorkflowTarget,
|
||||
} from '@/app/workspace/providers/socket-join-target'
|
||||
|
||||
describe('socket join target helpers', () => {
|
||||
it('uses the route workflow when there is no explicit workflow target', () => {
|
||||
expect(
|
||||
resolveSocketWorkflowTarget({
|
||||
routeWorkflowId: 'workflow-route',
|
||||
explicitWorkflowId: null,
|
||||
})
|
||||
).toBe('workflow-route')
|
||||
})
|
||||
|
||||
it('prefers the explicit workflow target for embedded workflows', () => {
|
||||
expect(
|
||||
resolveSocketWorkflowTarget({
|
||||
routeWorkflowId: null,
|
||||
explicitWorkflowId: 'workflow-embedded',
|
||||
})
|
||||
).toBe('workflow-embedded')
|
||||
})
|
||||
|
||||
it('lets an explicit workflow override the route workflow', () => {
|
||||
expect(
|
||||
resolveSocketWorkflowTarget({
|
||||
routeWorkflowId: 'workflow-route',
|
||||
explicitWorkflowId: 'workflow-embedded',
|
||||
})
|
||||
).toBe('workflow-embedded')
|
||||
})
|
||||
|
||||
it('treats the explicit embedded workflow as visible', () => {
|
||||
expect(
|
||||
isSocketWorkflowVisible({
|
||||
workflowId: 'workflow-embedded',
|
||||
routeWorkflowId: null,
|
||||
explicitWorkflowId: 'workflow-embedded',
|
||||
})
|
||||
).toBe(true)
|
||||
})
|
||||
|
||||
it('rejects mismatched workflow visibility', () => {
|
||||
expect(
|
||||
isSocketWorkflowVisible({
|
||||
workflowId: 'workflow-other',
|
||||
routeWorkflowId: 'workflow-route',
|
||||
explicitWorkflowId: null,
|
||||
})
|
||||
).toBe(false)
|
||||
})
|
||||
})
|
||||
28
apps/sim/app/workspace/providers/socket-join-target.ts
Normal file
28
apps/sim/app/workspace/providers/socket-join-target.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
interface ResolveSocketWorkflowTargetArgs {
|
||||
routeWorkflowId?: string | null
|
||||
explicitWorkflowId?: string | null
|
||||
}
|
||||
|
||||
export function resolveSocketWorkflowTarget({
|
||||
routeWorkflowId,
|
||||
explicitWorkflowId,
|
||||
}: ResolveSocketWorkflowTargetArgs): string | null {
|
||||
return explicitWorkflowId ?? routeWorkflowId ?? null
|
||||
}
|
||||
|
||||
interface IsSocketWorkflowVisibleArgs extends ResolveSocketWorkflowTargetArgs {
|
||||
workflowId?: string | null
|
||||
}
|
||||
|
||||
export function isSocketWorkflowVisible({
|
||||
workflowId,
|
||||
routeWorkflowId,
|
||||
explicitWorkflowId,
|
||||
}: IsSocketWorkflowVisibleArgs): boolean {
|
||||
const targetWorkflowId = workflowId ?? null
|
||||
if (!targetWorkflowId) {
|
||||
return false
|
||||
}
|
||||
|
||||
return targetWorkflowId === resolveSocketWorkflowTarget({ routeWorkflowId, explicitWorkflowId })
|
||||
}
|
||||
@@ -15,6 +15,14 @@ import { useParams } from 'next/navigation'
|
||||
import type { Socket } from 'socket.io-client'
|
||||
import { getEnv } from '@/lib/core/config/env'
|
||||
import { generateId } from '@/lib/core/utils/uuid'
|
||||
import {
|
||||
type SocketJoinCommand,
|
||||
SocketJoinController,
|
||||
} from '@/app/workspace/providers/socket-join-controller'
|
||||
import {
|
||||
isSocketWorkflowVisible,
|
||||
resolveSocketWorkflowTarget,
|
||||
} from '@/app/workspace/providers/socket-join-target'
|
||||
import { useOperationQueueStore } from '@/stores/operation-queue/store'
|
||||
import { useWorkflowRegistry as useWorkflowRegistryStore } from '@/stores/workflows/registry/store'
|
||||
|
||||
@@ -53,6 +61,7 @@ interface SocketContextType {
|
||||
isConnected: boolean
|
||||
isConnecting: boolean
|
||||
isReconnecting: boolean
|
||||
isRetryingWorkflowJoin: boolean
|
||||
authFailed: boolean
|
||||
currentWorkflowId: string | null
|
||||
currentSocketId: string | null
|
||||
@@ -61,6 +70,7 @@ interface SocketContextType {
|
||||
leaveWorkflow: () => void
|
||||
retryConnection: () => void
|
||||
emitWorkflowOperation: (
|
||||
workflowId: string,
|
||||
operation: string,
|
||||
target: string,
|
||||
payload: any,
|
||||
@@ -101,6 +111,7 @@ const SocketContext = createContext<SocketContextType>({
|
||||
isConnected: false,
|
||||
isConnecting: false,
|
||||
isReconnecting: false,
|
||||
isRetryingWorkflowJoin: false,
|
||||
authFailed: false,
|
||||
currentWorkflowId: null,
|
||||
currentSocketId: null,
|
||||
@@ -137,18 +148,24 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
const [isConnected, setIsConnected] = useState(false)
|
||||
const [isConnecting, setIsConnecting] = useState(false)
|
||||
const [isReconnecting, setIsReconnecting] = useState(false)
|
||||
const [isRetryingWorkflowJoin, setIsRetryingWorkflowJoin] = useState(false)
|
||||
const [currentWorkflowId, setCurrentWorkflowId] = useState<string | null>(null)
|
||||
const [currentSocketId, setCurrentSocketId] = useState<string | null>(null)
|
||||
const [presenceUsers, setPresenceUsers] = useState<PresenceUser[]>([])
|
||||
const [authFailed, setAuthFailed] = useState(false)
|
||||
const [explicitWorkflowId, setExplicitWorkflowId] = useState<string | null>(null)
|
||||
const initializedRef = useRef(false)
|
||||
const socketRef = useRef<Socket | null>(null)
|
||||
const triggerOfflineMode = useOperationQueueStore((state) => state.triggerOfflineMode)
|
||||
const currentWorkflowIdRef = useRef<string | null>(null)
|
||||
const explicitWorkflowIdRef = useRef<string | null>(explicitWorkflowId)
|
||||
const joinControllerRef = useRef(new SocketJoinController())
|
||||
const joinRetryTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null)
|
||||
|
||||
const params = useParams()
|
||||
const urlWorkflowId = params?.workflowId as string | undefined
|
||||
const urlWorkflowIdRef = useRef(urlWorkflowId)
|
||||
urlWorkflowIdRef.current = urlWorkflowId
|
||||
explicitWorkflowIdRef.current = explicitWorkflowId
|
||||
|
||||
const eventHandlers = useRef<{
|
||||
workflowOperation?: (data: any) => void
|
||||
@@ -164,9 +181,124 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
}>({})
|
||||
|
||||
const positionUpdateTimeouts = useRef<Map<string, number>>(new Map())
|
||||
const isRejoiningRef = useRef<boolean>(false)
|
||||
const pendingPositionUpdates = useRef<Map<string, any>>(new Map())
|
||||
|
||||
const setVisibleWorkflowId = useCallback((workflowId: string | null) => {
|
||||
currentWorkflowIdRef.current = workflowId
|
||||
setCurrentWorkflowId(workflowId)
|
||||
}, [])
|
||||
|
||||
const getRequestedWorkflowId = useCallback(() => {
|
||||
return resolveSocketWorkflowTarget({
|
||||
routeWorkflowId: urlWorkflowIdRef.current ?? null,
|
||||
explicitWorkflowId: explicitWorkflowIdRef.current,
|
||||
})
|
||||
}, [])
|
||||
|
||||
const isWorkflowVisible = useCallback((workflowId?: string | null) => {
|
||||
return isSocketWorkflowVisible({
|
||||
workflowId: workflowId ?? currentWorkflowIdRef.current,
|
||||
routeWorkflowId: urlWorkflowIdRef.current ?? null,
|
||||
explicitWorkflowId: explicitWorkflowIdRef.current,
|
||||
})
|
||||
}, [])
|
||||
|
||||
const clearJoinRetryTimeout = useCallback(() => {
|
||||
if (joinRetryTimeoutRef.current !== null) {
|
||||
clearTimeout(joinRetryTimeoutRef.current)
|
||||
joinRetryTimeoutRef.current = null
|
||||
}
|
||||
}, [])
|
||||
|
||||
const resetVisibleWorkflowState = useCallback((workflowId?: string | null) => {
|
||||
if (workflowId) {
|
||||
useOperationQueueStore.getState().cancelOperationsForWorkflow(workflowId)
|
||||
}
|
||||
|
||||
positionUpdateTimeouts.current.forEach((timeoutId) => {
|
||||
clearTimeout(timeoutId)
|
||||
})
|
||||
positionUpdateTimeouts.current.clear()
|
||||
pendingPositionUpdates.current.clear()
|
||||
}, [])
|
||||
|
||||
const clearJoinedWorkflowState = useCallback(
|
||||
(cancelOperations = false) => {
|
||||
const previousWorkflowId = currentWorkflowIdRef.current
|
||||
resetVisibleWorkflowState(cancelOperations ? previousWorkflowId : null)
|
||||
setPresenceUsers([])
|
||||
setVisibleWorkflowId(null)
|
||||
},
|
||||
[resetVisibleWorkflowState, setVisibleWorkflowId]
|
||||
)
|
||||
|
||||
const executeJoinCommands = useCallback(
|
||||
(commands: SocketJoinCommand[]) => {
|
||||
const socketInstance = socketRef.current
|
||||
|
||||
commands.forEach((command) => {
|
||||
if (command.type === 'cancel-retry') {
|
||||
clearJoinRetryTimeout()
|
||||
setIsRetryingWorkflowJoin(false)
|
||||
return
|
||||
}
|
||||
|
||||
if (command.type === 'leave') {
|
||||
setIsRetryingWorkflowJoin(false)
|
||||
clearJoinedWorkflowState(true)
|
||||
|
||||
if (!socketInstance) {
|
||||
return
|
||||
}
|
||||
|
||||
logger.info('Leaving current workflow room')
|
||||
socketInstance.emit('leave-workflow')
|
||||
return
|
||||
}
|
||||
|
||||
if (command.type === 'join') {
|
||||
const isWorkflowSwitch =
|
||||
currentWorkflowIdRef.current !== null &&
|
||||
currentWorkflowIdRef.current !== command.workflowId
|
||||
|
||||
if (isWorkflowSwitch) {
|
||||
resetVisibleWorkflowState(currentWorkflowIdRef.current)
|
||||
} else {
|
||||
resetVisibleWorkflowState()
|
||||
}
|
||||
|
||||
if (!socketInstance) {
|
||||
logger.warn('Cannot join workflow room: socket not available', {
|
||||
workflowId: command.workflowId,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
logger.info(`Joining workflow room: ${command.workflowId}`)
|
||||
socketInstance.emit('join-workflow', {
|
||||
workflowId: command.workflowId,
|
||||
tabSessionId: getTabSessionId(),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
clearJoinRetryTimeout()
|
||||
setIsRetryingWorkflowJoin(true)
|
||||
joinRetryTimeoutRef.current = setTimeout(() => {
|
||||
joinRetryTimeoutRef.current = null
|
||||
executeJoinCommands(joinControllerRef.current.retryJoin(command.workflowId))
|
||||
}, command.delayMs)
|
||||
|
||||
logger.warn('Realtime unavailable while joining workflow, scheduling retry', {
|
||||
workflowId: command.workflowId,
|
||||
attempt: command.attempt,
|
||||
delayMs: command.delayMs,
|
||||
})
|
||||
})
|
||||
},
|
||||
[clearJoinRetryTimeout, clearJoinedWorkflowState, resetVisibleWorkflowState]
|
||||
)
|
||||
|
||||
const generateSocketToken = async (): Promise<string> => {
|
||||
const res = await fetch('/api/auth/socket-token', {
|
||||
method: 'POST',
|
||||
@@ -244,17 +376,17 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
connected: socketInstance.connected,
|
||||
transport: socketInstance.io.engine?.transport?.name,
|
||||
})
|
||||
// Note: join-workflow is handled by the useEffect watching isConnected
|
||||
executeJoinCommands(joinControllerRef.current.setConnected(true))
|
||||
})
|
||||
|
||||
socketInstance.on('disconnect', (reason) => {
|
||||
setIsConnected(false)
|
||||
setIsConnecting(false)
|
||||
setIsRetryingWorkflowJoin(false)
|
||||
setCurrentSocketId(null)
|
||||
setCurrentWorkflowId(null)
|
||||
setPresenceUsers([])
|
||||
executeJoinCommands(joinControllerRef.current.setConnected(false))
|
||||
clearJoinedWorkflowState(false)
|
||||
|
||||
// socket.active indicates if auto-reconnect will happen
|
||||
if (socketInstance.active) {
|
||||
setIsReconnecting(true)
|
||||
logger.info('Socket disconnected, will auto-reconnect', { reason })
|
||||
@@ -317,6 +449,10 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
})
|
||||
|
||||
socketInstance.on('presence-update', (users: PresenceUser[]) => {
|
||||
if (!isWorkflowVisible()) {
|
||||
return
|
||||
}
|
||||
|
||||
setPresenceUsers((prev) => {
|
||||
const prevMap = new Map(prev.map((u) => [u.socketId, u]))
|
||||
|
||||
@@ -334,27 +470,52 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
})
|
||||
})
|
||||
|
||||
// Handle join workflow success - confirms room membership with presence list
|
||||
socketInstance.on('join-workflow-success', ({ workflowId, presenceUsers }) => {
|
||||
isRejoiningRef.current = false
|
||||
// Ignore stale success responses from previous navigation
|
||||
if (urlWorkflowIdRef.current && workflowId !== urlWorkflowIdRef.current) {
|
||||
const result = joinControllerRef.current.handleJoinSuccess(workflowId)
|
||||
|
||||
if (result.ignored) {
|
||||
logger.debug(`Ignoring stale join-workflow-success for ${workflowId}`)
|
||||
return
|
||||
} else {
|
||||
setIsRetryingWorkflowJoin(false)
|
||||
setVisibleWorkflowId(workflowId)
|
||||
setPresenceUsers(presenceUsers || [])
|
||||
logger.info(`Successfully joined workflow room: ${workflowId}`, {
|
||||
presenceCount: presenceUsers?.length || 0,
|
||||
})
|
||||
}
|
||||
setCurrentWorkflowId(workflowId)
|
||||
setPresenceUsers(presenceUsers || [])
|
||||
logger.info(`Successfully joined workflow room: ${workflowId}`, {
|
||||
presenceCount: presenceUsers?.length || 0,
|
||||
})
|
||||
|
||||
executeJoinCommands(result.commands)
|
||||
})
|
||||
|
||||
socketInstance.on('join-workflow-error', ({ error, code }) => {
|
||||
isRejoiningRef.current = false
|
||||
logger.error('Failed to join workflow:', { error, code })
|
||||
if (code === 'ROOM_MANAGER_UNAVAILABLE') {
|
||||
triggerOfflineMode()
|
||||
socketInstance.on('join-workflow-error', ({ workflowId, error, code, retryable }) => {
|
||||
const result = joinControllerRef.current.handleJoinError({ workflowId, retryable })
|
||||
|
||||
if (result.ignored) {
|
||||
logger.debug('Ignoring stale join-workflow-error', {
|
||||
workflowId: result.workflowId,
|
||||
error,
|
||||
code,
|
||||
})
|
||||
} else if (result.retryScheduled) {
|
||||
logger.warn('Retryable workflow join failure, waiting to retry', {
|
||||
workflowId: result.workflowId,
|
||||
error,
|
||||
code,
|
||||
})
|
||||
} else if (result.apply) {
|
||||
setIsRetryingWorkflowJoin(false)
|
||||
if (result.workflowId) {
|
||||
useOperationQueueStore.getState().cancelOperationsForWorkflow(result.workflowId)
|
||||
}
|
||||
|
||||
logger.error('Failed to join workflow:', {
|
||||
workflowId: result.workflowId,
|
||||
error,
|
||||
code,
|
||||
})
|
||||
}
|
||||
|
||||
executeJoinCommands(result.commands)
|
||||
})
|
||||
|
||||
socketInstance.on('workflow-operation', (data) => {
|
||||
@@ -371,13 +532,11 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
|
||||
socketInstance.on('workflow-deleted', (data) => {
|
||||
logger.warn(`Workflow ${data.workflowId} has been deleted`)
|
||||
setCurrentWorkflowId((current) => {
|
||||
if (current === data.workflowId) {
|
||||
setPresenceUsers([])
|
||||
return null
|
||||
}
|
||||
return current
|
||||
})
|
||||
const result = joinControllerRef.current.handleWorkflowDeleted(data.workflowId)
|
||||
if (result.shouldClearCurrent) {
|
||||
clearJoinedWorkflowState(true)
|
||||
}
|
||||
executeJoinCommands(result.commands)
|
||||
eventHandlers.current.workflowDeleted?.(data)
|
||||
})
|
||||
|
||||
@@ -457,6 +616,10 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
})
|
||||
|
||||
socketInstance.on('cursor-update', (data) => {
|
||||
if (!isWorkflowVisible()) {
|
||||
return
|
||||
}
|
||||
|
||||
setPresenceUsers((prev) => {
|
||||
const existingIndex = prev.findIndex((user) => user.socketId === data.socketId)
|
||||
if (existingIndex === -1) {
|
||||
@@ -471,6 +634,10 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
})
|
||||
|
||||
socketInstance.on('selection-update', (data) => {
|
||||
if (!isWorkflowVisible()) {
|
||||
return
|
||||
}
|
||||
|
||||
setPresenceUsers((prev) => {
|
||||
const existingIndex = prev.findIndex((user) => user.socketId === data.socketId)
|
||||
if (existingIndex === -1) {
|
||||
@@ -498,15 +665,11 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
logger.warn('Operation forbidden:', error)
|
||||
|
||||
if (error?.type === 'SESSION_ERROR') {
|
||||
const workflowId = urlWorkflowIdRef.current
|
||||
const workflowId = getRequestedWorkflowId()
|
||||
|
||||
if (workflowId && !isRejoiningRef.current) {
|
||||
isRejoiningRef.current = true
|
||||
if (workflowId) {
|
||||
logger.info(`Session expired, rejoining workflow: ${workflowId}`)
|
||||
socketInstance.emit('join-workflow', {
|
||||
workflowId,
|
||||
tabSessionId: getTabSessionId(),
|
||||
})
|
||||
executeJoinCommands(joinControllerRef.current.forceRejoinWorkflow(workflowId))
|
||||
}
|
||||
}
|
||||
})
|
||||
@@ -514,6 +677,19 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
socketInstance.on('workflow-state', async (workflowData) => {
|
||||
logger.info('Received workflow state from server')
|
||||
|
||||
if (
|
||||
!workflowData?.id ||
|
||||
currentWorkflowIdRef.current !== workflowData.id ||
|
||||
!isWorkflowVisible()
|
||||
) {
|
||||
logger.info('Ignoring workflow state for inactive room', {
|
||||
workflowId: workflowData?.id,
|
||||
currentWorkflowId: currentWorkflowIdRef.current,
|
||||
desiredWorkflowId: urlWorkflowIdRef.current,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
if (workflowData?.state) {
|
||||
try {
|
||||
await rehydrateWorkflowStores(workflowData.id, workflowData.state)
|
||||
@@ -534,6 +710,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
initializeSocket()
|
||||
|
||||
return () => {
|
||||
clearJoinRetryTimeout()
|
||||
positionUpdateTimeouts.current.forEach((timeoutId) => {
|
||||
clearTimeout(timeoutId)
|
||||
})
|
||||
@@ -552,77 +729,34 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
const hydrationPhase = useWorkflowRegistryStore((s) => s.hydration.phase)
|
||||
|
||||
useEffect(() => {
|
||||
if (!socket || !isConnected || !urlWorkflowId) return
|
||||
|
||||
if (hydrationPhase === 'creating') return
|
||||
|
||||
// Skip if already in the correct room
|
||||
if (currentWorkflowId === urlWorkflowId) return
|
||||
|
||||
logger.info(
|
||||
`URL workflow changed from ${currentWorkflowId} to ${urlWorkflowId}, switching rooms`
|
||||
)
|
||||
|
||||
if (currentWorkflowId) {
|
||||
logger.info(`Leaving current workflow ${currentWorkflowId} before joining ${urlWorkflowId}`)
|
||||
socket.emit('leave-workflow')
|
||||
if (hydrationPhase === 'creating') {
|
||||
return
|
||||
}
|
||||
|
||||
logger.info(`Joining workflow room: ${urlWorkflowId}`)
|
||||
socket.emit('join-workflow', {
|
||||
workflowId: urlWorkflowId,
|
||||
tabSessionId: getTabSessionId(),
|
||||
})
|
||||
}, [socket, isConnected, urlWorkflowId, currentWorkflowId, hydrationPhase])
|
||||
executeJoinCommands(joinControllerRef.current.requestWorkflow(getRequestedWorkflowId()))
|
||||
}, [
|
||||
explicitWorkflowId,
|
||||
getRequestedWorkflowId,
|
||||
hydrationPhase,
|
||||
urlWorkflowId,
|
||||
executeJoinCommands,
|
||||
])
|
||||
|
||||
const joinWorkflow = useCallback(
|
||||
(workflowId: string) => {
|
||||
if (!socket || !user?.id) {
|
||||
logger.warn('Cannot join workflow: socket or user not available')
|
||||
if (!user?.id) {
|
||||
logger.warn('Cannot join workflow: user not available')
|
||||
return
|
||||
}
|
||||
|
||||
if (currentWorkflowId === workflowId) {
|
||||
logger.info(`Already in workflow ${workflowId}, skipping join`)
|
||||
return
|
||||
}
|
||||
|
||||
if (currentWorkflowId) {
|
||||
logger.info(`Leaving current workflow ${currentWorkflowId} before joining ${workflowId}`)
|
||||
socket.emit('leave-workflow')
|
||||
}
|
||||
|
||||
logger.info(`Joining workflow: ${workflowId}`)
|
||||
socket.emit('join-workflow', {
|
||||
workflowId,
|
||||
tabSessionId: getTabSessionId(),
|
||||
})
|
||||
// currentWorkflowId will be set by join-workflow-success handler
|
||||
setExplicitWorkflowId(workflowId)
|
||||
},
|
||||
[socket, user, currentWorkflowId]
|
||||
[user]
|
||||
)
|
||||
|
||||
const leaveWorkflow = useCallback(() => {
|
||||
if (socket && currentWorkflowId) {
|
||||
logger.info(`Leaving workflow: ${currentWorkflowId}`)
|
||||
import('@/stores/operation-queue/store')
|
||||
.then(({ useOperationQueueStore }) => {
|
||||
useOperationQueueStore.getState().cancelOperationsForWorkflow(currentWorkflowId)
|
||||
})
|
||||
.catch((error) => {
|
||||
logger.warn('Failed to cancel operations for workflow:', error)
|
||||
})
|
||||
socket.emit('leave-workflow')
|
||||
setCurrentWorkflowId(null)
|
||||
setPresenceUsers([])
|
||||
|
||||
positionUpdateTimeouts.current.forEach((timeoutId) => {
|
||||
clearTimeout(timeoutId)
|
||||
})
|
||||
positionUpdateTimeouts.current.clear()
|
||||
pendingPositionUpdates.current.clear()
|
||||
}
|
||||
}, [socket, currentWorkflowId])
|
||||
setExplicitWorkflowId(null)
|
||||
}, [])
|
||||
|
||||
/**
|
||||
* Retry socket connection after auth failure.
|
||||
@@ -640,8 +774,20 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
}, [authFailed])
|
||||
|
||||
const emitWorkflowOperation = useCallback(
|
||||
(operation: string, target: string, payload: any, operationId?: string) => {
|
||||
if (!socket || !currentWorkflowId) {
|
||||
(workflowId: string, operation: string, target: string, payload: any, operationId?: string) => {
|
||||
if (
|
||||
!socket ||
|
||||
!currentWorkflowId ||
|
||||
workflowId !== currentWorkflowId ||
|
||||
!isWorkflowVisible(workflowId)
|
||||
) {
|
||||
logger.debug('Skipping workflow operation emit for inactive room', {
|
||||
workflowId,
|
||||
currentWorkflowId,
|
||||
desiredWorkflowId: urlWorkflowIdRef.current,
|
||||
operation,
|
||||
target,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -653,7 +799,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
|
||||
if (commit) {
|
||||
socket.emit('workflow-operation', {
|
||||
workflowId: currentWorkflowId,
|
||||
workflowId,
|
||||
operation,
|
||||
target,
|
||||
payload,
|
||||
@@ -670,7 +816,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
}
|
||||
|
||||
pendingPositionUpdates.current.set(blockId, {
|
||||
workflowId: currentWorkflowId,
|
||||
workflowId,
|
||||
operation,
|
||||
target,
|
||||
payload,
|
||||
@@ -692,7 +838,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
}
|
||||
} else {
|
||||
socket.emit('workflow-operation', {
|
||||
workflowId: currentWorkflowId,
|
||||
workflowId,
|
||||
operation,
|
||||
target,
|
||||
payload,
|
||||
@@ -701,7 +847,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
})
|
||||
}
|
||||
},
|
||||
[socket, currentWorkflowId]
|
||||
[socket, currentWorkflowId, isWorkflowVisible]
|
||||
)
|
||||
|
||||
const emitSubblockUpdate = useCallback(
|
||||
@@ -712,8 +858,24 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
operationId: string | undefined,
|
||||
workflowId: string
|
||||
) => {
|
||||
if (!socket) {
|
||||
logger.warn('Cannot emit subblock update: no socket connection', { workflowId, blockId })
|
||||
if (
|
||||
!socket ||
|
||||
workflowId !== currentWorkflowIdRef.current ||
|
||||
!isWorkflowVisible(workflowId)
|
||||
) {
|
||||
const reason = !socket
|
||||
? 'socket_unavailable'
|
||||
: workflowId !== currentWorkflowIdRef.current
|
||||
? 'joined_workflow_mismatch'
|
||||
: 'workflow_not_visible'
|
||||
|
||||
logger.debug('Skipping subblock update emit', {
|
||||
workflowId,
|
||||
blockId,
|
||||
subblockId,
|
||||
reason,
|
||||
currentWorkflowId: currentWorkflowIdRef.current,
|
||||
})
|
||||
return
|
||||
}
|
||||
socket.emit('subblock-update', {
|
||||
@@ -736,8 +898,24 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
operationId: string | undefined,
|
||||
workflowId: string
|
||||
) => {
|
||||
if (!socket) {
|
||||
logger.warn('Cannot emit variable update: no socket connection', { workflowId, variableId })
|
||||
if (
|
||||
!socket ||
|
||||
workflowId !== currentWorkflowIdRef.current ||
|
||||
!isWorkflowVisible(workflowId)
|
||||
) {
|
||||
const reason = !socket
|
||||
? 'socket_unavailable'
|
||||
: workflowId !== currentWorkflowIdRef.current
|
||||
? 'joined_workflow_mismatch'
|
||||
: 'workflow_not_visible'
|
||||
|
||||
logger.debug('Skipping variable update emit', {
|
||||
workflowId,
|
||||
variableId,
|
||||
field,
|
||||
reason,
|
||||
currentWorkflowId: currentWorkflowIdRef.current,
|
||||
})
|
||||
return
|
||||
}
|
||||
socket.emit('variable-update', {
|
||||
@@ -755,7 +933,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
const lastCursorEmit = useRef(0)
|
||||
const emitCursorUpdate = useCallback(
|
||||
(cursor: { x: number; y: number } | null) => {
|
||||
if (!socket || !currentWorkflowId) {
|
||||
if (!socket || !currentWorkflowId || !isWorkflowVisible(currentWorkflowId)) {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -772,16 +950,16 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
lastCursorEmit.current = now
|
||||
}
|
||||
},
|
||||
[socket, currentWorkflowId]
|
||||
[socket, currentWorkflowId, isWorkflowVisible]
|
||||
)
|
||||
|
||||
const emitSelectionUpdate = useCallback(
|
||||
(selection: { type: 'block' | 'edge' | 'none'; id?: string }) => {
|
||||
if (socket && currentWorkflowId) {
|
||||
if (socket && currentWorkflowId && isWorkflowVisible(currentWorkflowId)) {
|
||||
socket.emit('selection-update', { selection })
|
||||
}
|
||||
},
|
||||
[socket, currentWorkflowId]
|
||||
[socket, currentWorkflowId, isWorkflowVisible]
|
||||
)
|
||||
|
||||
const onWorkflowOperation = useCallback((handler: (data: any) => void) => {
|
||||
@@ -830,6 +1008,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
isConnected,
|
||||
isConnecting,
|
||||
isReconnecting,
|
||||
isRetryingWorkflowJoin,
|
||||
authFailed,
|
||||
currentWorkflowId,
|
||||
currentSocketId,
|
||||
@@ -858,6 +1037,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
|
||||
isConnected,
|
||||
isConnecting,
|
||||
isReconnecting,
|
||||
isRetryingWorkflowJoin,
|
||||
authFailed,
|
||||
currentWorkflowId,
|
||||
currentSocketId,
|
||||
|
||||
@@ -423,7 +423,7 @@ describe.concurrent('Blocks Module', () => {
|
||||
})
|
||||
|
||||
it('should have valid mode values for subBlocks', () => {
|
||||
const validModes = ['basic', 'advanced', 'both', 'trigger', undefined]
|
||||
const validModes = ['basic', 'advanced', 'both', 'trigger', 'trigger-advanced', undefined]
|
||||
const blocks = getAllBlocks()
|
||||
for (const block of blocks) {
|
||||
for (const subBlock of block.subBlocks) {
|
||||
@@ -669,7 +669,9 @@ describe.concurrent('Blocks Module', () => {
|
||||
for (const block of blocks) {
|
||||
// Exclude trigger-mode subBlocks — they operate in a separate rendering context
|
||||
// and their IDs don't participate in canonical param resolution
|
||||
const nonTriggerSubBlocks = block.subBlocks.filter((sb) => sb.mode !== 'trigger')
|
||||
const nonTriggerSubBlocks = block.subBlocks.filter(
|
||||
(sb) => sb.mode !== 'trigger' && sb.mode !== 'trigger-advanced'
|
||||
)
|
||||
const allSubBlockIds = new Set(nonTriggerSubBlocks.map((sb) => sb.id))
|
||||
const canonicalParamIds = new Set(
|
||||
nonTriggerSubBlocks.filter((sb) => sb.canonicalParamId).map((sb) => sb.canonicalParamId)
|
||||
@@ -795,6 +797,8 @@ describe.concurrent('Blocks Module', () => {
|
||||
>()
|
||||
|
||||
for (const subBlock of block.subBlocks) {
|
||||
// Skip trigger-mode subBlocks — they operate in a separate rendering context
|
||||
if (subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced') continue
|
||||
if (subBlock.canonicalParamId) {
|
||||
if (!canonicalGroups.has(subBlock.canonicalParamId)) {
|
||||
canonicalGroups.set(subBlock.canonicalParamId, [])
|
||||
@@ -861,7 +865,7 @@ describe.concurrent('Blocks Module', () => {
|
||||
continue
|
||||
}
|
||||
// Skip trigger-mode subBlocks — they operate in a separate rendering context
|
||||
if (subBlock.mode === 'trigger') {
|
||||
if (subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced') {
|
||||
continue
|
||||
}
|
||||
const conditionKey = serializeCondition(subBlock.condition)
|
||||
@@ -895,8 +899,11 @@ describe.concurrent('Blocks Module', () => {
|
||||
if (!block.inputs) continue
|
||||
|
||||
// Find all canonical groups (subBlocks with canonicalParamId)
|
||||
// Skip trigger-mode subBlocks — they operate in a separate rendering context
|
||||
// and are not wired to the block's inputs section
|
||||
const canonicalGroups = new Map<string, string[]>()
|
||||
for (const subBlock of block.subBlocks) {
|
||||
if (subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced') continue
|
||||
if (subBlock.canonicalParamId) {
|
||||
if (!canonicalGroups.has(subBlock.canonicalParamId)) {
|
||||
canonicalGroups.set(subBlock.canonicalParamId, [])
|
||||
@@ -948,8 +955,10 @@ describe.concurrent('Blocks Module', () => {
|
||||
.replace(/\/\*[\s\S]*?\*\//g, '') // Remove multi-line comments
|
||||
|
||||
// Find all canonical groups (subBlocks with canonicalParamId)
|
||||
// Skip trigger-mode subBlocks — they are not passed through params function
|
||||
const canonicalGroups = new Map<string, string[]>()
|
||||
for (const subBlock of block.subBlocks) {
|
||||
if (subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced') continue
|
||||
if (subBlock.canonicalParamId) {
|
||||
if (!canonicalGroups.has(subBlock.canonicalParamId)) {
|
||||
canonicalGroups.set(subBlock.canonicalParamId, [])
|
||||
@@ -995,8 +1004,11 @@ describe.concurrent('Blocks Module', () => {
|
||||
|
||||
for (const block of blocks) {
|
||||
// Find all canonical groups (subBlocks with canonicalParamId)
|
||||
// Skip trigger-mode subBlocks — they operate in a separate rendering context
|
||||
// and may have different required semantics from their block counterparts
|
||||
const canonicalGroups = new Map<string, typeof block.subBlocks>()
|
||||
for (const subBlock of block.subBlocks) {
|
||||
if (subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced') continue
|
||||
if (subBlock.canonicalParamId) {
|
||||
if (!canonicalGroups.has(subBlock.canonicalParamId)) {
|
||||
canonicalGroups.set(subBlock.canonicalParamId, [])
|
||||
|
||||
@@ -11,7 +11,7 @@ export const ApifyBlock: BlockConfig<RunActorResult> = {
|
||||
'Integrate Apify into your workflow. Run any Apify actor with custom input and retrieve results. Supports both synchronous and asynchronous execution with automatic dataset fetching.',
|
||||
docsLink: 'https://docs.sim.ai/tools/apify',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.Automation,
|
||||
integrationType: IntegrationType.Search,
|
||||
tags: ['web-scraping', 'automation', 'data-analytics'],
|
||||
bgColor: '#E0E0E0',
|
||||
icon: ApifyIcon,
|
||||
|
||||
@@ -12,7 +12,7 @@ export const ApolloBlock: BlockConfig<ApolloResponse> = {
|
||||
'Integrates Apollo.io into the workflow. Search for people and companies, enrich contact data, manage your CRM contacts and accounts, add contacts to sequences, and create tasks.',
|
||||
docsLink: 'https://docs.sim.ai/tools/apollo',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.SalesIntelligence,
|
||||
integrationType: IntegrationType.Sales,
|
||||
tags: ['enrichment', 'sales-engagement'],
|
||||
bgColor: '#EBF212',
|
||||
icon: ApolloIcon,
|
||||
|
||||
@@ -11,7 +11,7 @@ export const BrandfetchBlock: BlockConfig<BrandfetchGetBrandResponse | Brandfetc
|
||||
'Integrate Brandfetch into your workflow. Retrieve brand logos, colors, fonts, and company data by domain, ticker, or name search.',
|
||||
docsLink: 'https://docs.sim.ai/tools/brandfetch',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.SalesIntelligence,
|
||||
integrationType: IntegrationType.Sales,
|
||||
tags: ['enrichment', 'marketing'],
|
||||
bgColor: '#000000',
|
||||
icon: BrandfetchIcon,
|
||||
|
||||
@@ -11,7 +11,7 @@ export const BrowserUseBlock: BlockConfig<BrowserUseResponse> = {
|
||||
'Integrate Browser Use into the workflow. Can navigate the web and perform actions as if a real user was interacting with the browser.',
|
||||
docsLink: 'https://docs.sim.ai/tools/browser_use',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.Automation,
|
||||
integrationType: IntegrationType.AI,
|
||||
tags: ['web-scraping', 'automation', 'agentic'],
|
||||
bgColor: '#181C1E',
|
||||
icon: BrowserUseIcon,
|
||||
|
||||
@@ -10,7 +10,7 @@ export const ClayBlock: BlockConfig<ClayPopulateResponse> = {
|
||||
longDescription: 'Integrate Clay into the workflow. Can populate a table with data.',
|
||||
docsLink: 'https://docs.sim.ai/tools/clay',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.SalesIntelligence,
|
||||
integrationType: IntegrationType.Sales,
|
||||
tags: ['enrichment', 'sales-engagement', 'data-analytics'],
|
||||
bgColor: '#E0E0E0',
|
||||
icon: ClayIcon,
|
||||
|
||||
@@ -11,7 +11,7 @@ export const DagsterBlock: BlockConfig<DagsterResponse> = {
|
||||
'Connect to a Dagster instance to launch job runs, monitor run status, list available jobs across repositories, terminate or delete runs, reexecute failed runs, fetch run logs, and manage schedules and sensors. API token only required for Dagster+.',
|
||||
docsLink: 'https://docs.sim.ai/tools/dagster',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.Automation,
|
||||
integrationType: IntegrationType.Analytics,
|
||||
tags: ['data-analytics', 'automation'],
|
||||
bgColor: '#ffffff',
|
||||
icon: DagsterIcon,
|
||||
|
||||
@@ -10,7 +10,7 @@ export const ElevenLabsBlock: BlockConfig<ElevenLabsBlockResponse> = {
|
||||
longDescription: 'Integrate ElevenLabs into the workflow. Can convert text to speech.',
|
||||
docsLink: 'https://docs.sim.ai/tools/elevenlabs',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.Media,
|
||||
integrationType: IntegrationType.AI,
|
||||
tags: ['text-to-speech'],
|
||||
bgColor: '#181C1E',
|
||||
icon: ElevenLabsIcon,
|
||||
|
||||
@@ -11,7 +11,7 @@ export const EnrichBlock: BlockConfig = {
|
||||
'Access real-time B2B data intelligence with Enrich.so. Enrich profiles from email addresses, find work emails from LinkedIn, verify email deliverability, search for people and companies, and analyze LinkedIn post engagement.',
|
||||
docsLink: 'https://docs.enrich.so/',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.SalesIntelligence,
|
||||
integrationType: IntegrationType.Sales,
|
||||
tags: ['enrichment', 'data-analytics'],
|
||||
bgColor: '#E5E5E6',
|
||||
icon: EnrichSoIcon,
|
||||
|
||||
@@ -12,7 +12,7 @@ export const FirecrawlBlock: BlockConfig<FirecrawlResponse> = {
|
||||
'Integrate Firecrawl into the workflow. Scrape pages, search the web, crawl entire sites, map URL structures, and extract structured data with AI.',
|
||||
docsLink: 'https://docs.sim.ai/tools/firecrawl',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.Automation,
|
||||
integrationType: IntegrationType.Search,
|
||||
tags: ['web-scraping', 'automation'],
|
||||
bgColor: '#181C1E',
|
||||
icon: FirecrawlIcon,
|
||||
|
||||
@@ -17,7 +17,7 @@ export const FirefliesBlock: BlockConfig<FirefliesResponse> = {
|
||||
'Integrate Fireflies.ai into the workflow. Manage meeting transcripts, add bot to live meetings, create soundbites, and more. Can also trigger workflows when transcriptions complete.',
|
||||
docsLink: 'https://docs.sim.ai/tools/fireflies',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.Media,
|
||||
integrationType: IntegrationType.Productivity,
|
||||
tags: ['meeting', 'speech-to-text', 'note-taking'],
|
||||
icon: FirefliesIcon,
|
||||
bgColor: '#100730',
|
||||
@@ -615,7 +615,7 @@ export const FirefliesV2Block: BlockConfig<FirefliesResponse> = {
|
||||
name: 'Fireflies',
|
||||
description: 'Interact with Fireflies.ai meeting transcripts and recordings',
|
||||
hideFromToolbar: false,
|
||||
integrationType: IntegrationType.Media,
|
||||
integrationType: IntegrationType.Productivity,
|
||||
tags: ['meeting', 'speech-to-text', 'note-taking'],
|
||||
subBlocks: firefliesV2SubBlocks,
|
||||
tools: {
|
||||
|
||||
@@ -12,7 +12,7 @@ export const GongBlock: BlockConfig<GongResponse> = {
|
||||
'Integrate Gong into your workflow. Access call recordings, transcripts, user data, activity stats, scorecards, trackers, library content, coaching metrics, and more via the Gong API.',
|
||||
docsLink: 'https://docs.sim.ai/tools/gong',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.SalesIntelligence,
|
||||
integrationType: IntegrationType.Sales,
|
||||
tags: ['meeting', 'sales-engagement', 'speech-to-text'],
|
||||
bgColor: '#8039DF',
|
||||
icon: GongIcon,
|
||||
|
||||
@@ -4,6 +4,7 @@ import type { BlockConfig } from '@/blocks/types'
|
||||
import { AuthMode, IntegrationType } from '@/blocks/types'
|
||||
import { createVersionedToolSelector, SERVICE_ACCOUNT_SUBBLOCKS } from '@/blocks/utils'
|
||||
import type { GoogleCalendarResponse } from '@/tools/google_calendar/types'
|
||||
import { getTrigger } from '@/triggers'
|
||||
|
||||
export const GoogleCalendarBlock: BlockConfig<GoogleCalendarResponse> = {
|
||||
type: 'google_calendar',
|
||||
@@ -488,6 +489,7 @@ Return ONLY the natural language event text - no explanations.`,
|
||||
{ label: 'None (no emails sent)', id: 'none' },
|
||||
],
|
||||
},
|
||||
...getTrigger('google_calendar_poller').subBlocks,
|
||||
],
|
||||
tools: {
|
||||
access: [
|
||||
@@ -644,6 +646,10 @@ Return ONLY the natural language event text - no explanations.`,
|
||||
content: { type: 'string', description: 'Operation response content' },
|
||||
metadata: { type: 'json', description: 'Event or calendar metadata' },
|
||||
},
|
||||
triggers: {
|
||||
enabled: true,
|
||||
available: ['google_calendar_poller'],
|
||||
},
|
||||
}
|
||||
|
||||
export const GoogleCalendarV2Block: BlockConfig<GoogleCalendarResponse> = {
|
||||
|
||||
@@ -4,6 +4,7 @@ import type { BlockConfig } from '@/blocks/types'
|
||||
import { AuthMode, IntegrationType } from '@/blocks/types'
|
||||
import { normalizeFileInput, SERVICE_ACCOUNT_SUBBLOCKS } from '@/blocks/utils'
|
||||
import type { GoogleDriveResponse } from '@/tools/google_drive/types'
|
||||
import { getTrigger } from '@/triggers'
|
||||
|
||||
export const GoogleDriveBlock: BlockConfig<GoogleDriveResponse> = {
|
||||
type: 'google_drive',
|
||||
@@ -719,6 +720,7 @@ Return ONLY the message text - no subject line, no greetings/signatures, no extr
|
||||
required: true,
|
||||
},
|
||||
// Get Drive Info has no additional fields (just needs credential)
|
||||
...getTrigger('google_drive_poller').subBlocks,
|
||||
],
|
||||
tools: {
|
||||
access: [
|
||||
@@ -939,4 +941,8 @@ Return ONLY the message text - no subject line, no greetings/signatures, no extr
|
||||
deleted: { type: 'boolean', description: 'Whether file was deleted' },
|
||||
removed: { type: 'boolean', description: 'Whether permission was removed' },
|
||||
},
|
||||
triggers: {
|
||||
enabled: true,
|
||||
available: ['google_drive_poller'],
|
||||
},
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import type { BlockConfig } from '@/blocks/types'
|
||||
import { AuthMode, IntegrationType } from '@/blocks/types'
|
||||
import { createVersionedToolSelector, SERVICE_ACCOUNT_SUBBLOCKS } from '@/blocks/utils'
|
||||
import type { GoogleSheetsResponse, GoogleSheetsV2Response } from '@/tools/google_sheets/types'
|
||||
import { getTrigger } from '@/triggers'
|
||||
|
||||
// Legacy block - hidden from toolbar
|
||||
export const GoogleSheetsBlock: BlockConfig<GoogleSheetsResponse> = {
|
||||
@@ -716,6 +717,7 @@ Return ONLY the JSON array - no explanations, no markdown, no extra text.`,
|
||||
condition: { field: 'operation', value: 'copy_sheet' },
|
||||
required: true,
|
||||
},
|
||||
...getTrigger('google_sheets_poller').subBlocks,
|
||||
],
|
||||
tools: {
|
||||
access: [
|
||||
@@ -1068,4 +1070,8 @@ Return ONLY the JSON array - no explanations, no markdown, no extra text.`,
|
||||
},
|
||||
},
|
||||
},
|
||||
triggers: {
|
||||
enabled: true,
|
||||
available: ['google_sheets_poller'],
|
||||
},
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ export const GrainBlock: BlockConfig = {
|
||||
longDescription:
|
||||
'Integrate Grain into your workflow. Access meeting recordings, transcripts, highlights, and AI-generated summaries. Can also trigger workflows based on Grain webhook events.',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.Media,
|
||||
integrationType: IntegrationType.Productivity,
|
||||
tags: ['meeting', 'note-taking'],
|
||||
docsLink: 'https://docs.sim.ai/tools/grain',
|
||||
icon: GrainIcon,
|
||||
|
||||
@@ -11,7 +11,7 @@ export const HunterBlock: BlockConfig<HunterResponse> = {
|
||||
'Integrate Hunter into the workflow. Can search domains, find email addresses, verify email addresses, discover companies, find companies, and count email addresses.',
|
||||
docsLink: 'https://docs.sim.ai/tools/hunter',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.SalesIntelligence,
|
||||
integrationType: IntegrationType.Sales,
|
||||
tags: ['enrichment', 'sales-engagement'],
|
||||
bgColor: '#E0E0E0',
|
||||
icon: HunterIOIcon,
|
||||
|
||||
@@ -13,7 +13,7 @@ export const LinkedInBlock: BlockConfig<LinkedInResponse> = {
|
||||
'Integrate LinkedIn into workflows. Share posts to your personal feed and access your LinkedIn profile information.',
|
||||
docsLink: 'https://docs.sim.ai/tools/linkedin',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.Social,
|
||||
integrationType: IntegrationType.Sales,
|
||||
tags: ['marketing', 'sales-engagement', 'enrichment'],
|
||||
bgColor: '#0072B1',
|
||||
icon: LinkedInIcon,
|
||||
|
||||
@@ -13,7 +13,7 @@ export const RedditBlock: BlockConfig<RedditResponse> = {
|
||||
'Integrate Reddit into workflows. Read posts, comments, and search content. Submit posts, vote, reply, edit, manage messages, and access user and subreddit info.',
|
||||
docsLink: 'https://docs.sim.ai/tools/reddit',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.Social,
|
||||
integrationType: IntegrationType.Communication,
|
||||
tags: ['content-management', 'web-scraping'],
|
||||
bgColor: '#FF5700',
|
||||
icon: RedditIcon,
|
||||
|
||||
@@ -9,7 +9,7 @@ export const SixtyfourBlock: BlockConfig = {
|
||||
'Find emails, phone numbers, and enrich lead or company data with contact information, social profiles, and detailed research using Sixtyfour AI.',
|
||||
docsLink: 'https://docs.sim.ai/tools/sixtyfour',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.SalesIntelligence,
|
||||
integrationType: IntegrationType.Sales,
|
||||
tags: ['enrichment', 'sales-engagement'],
|
||||
bgColor: '#000000',
|
||||
icon: SixtyfourIcon,
|
||||
|
||||
@@ -13,7 +13,7 @@ export const SpotifyBlock: BlockConfig<ToolResponse> = {
|
||||
'Integrate Spotify into your workflow. Search for tracks, albums, artists, and playlists. Manage playlists, access your library, control playback, browse podcasts and audiobooks.',
|
||||
docsLink: 'https://docs.sim.ai/tools/spotify',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.Media,
|
||||
integrationType: IntegrationType.Communication,
|
||||
tags: ['content-management', 'automation'],
|
||||
hideFromToolbar: true,
|
||||
bgColor: '#000000',
|
||||
|
||||
@@ -35,7 +35,7 @@ export const StagehandBlock: BlockConfig<StagehandResponse> = {
|
||||
'Integrate Stagehand into the workflow. Can extract structured data from webpages or run an autonomous agent to perform tasks.',
|
||||
docsLink: 'https://docs.sim.ai/tools/stagehand',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.Automation,
|
||||
integrationType: IntegrationType.AI,
|
||||
tags: ['web-scraping', 'automation', 'agentic'],
|
||||
bgColor: '#FFC83C',
|
||||
icon: StagehandIcon,
|
||||
|
||||
@@ -12,7 +12,7 @@ export const XBlock: BlockConfig = {
|
||||
'Integrate X into the workflow. Search tweets, manage bookmarks, follow/block/mute users, like and retweet, view trends, and more.',
|
||||
docsLink: 'https://docs.sim.ai/tools/x',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.Social,
|
||||
integrationType: IntegrationType.Communication,
|
||||
tags: ['marketing', 'messaging'],
|
||||
bgColor: '#000000',
|
||||
icon: xIcon,
|
||||
|
||||
@@ -12,7 +12,7 @@ export const YouTubeBlock: BlockConfig<YouTubeResponse> = {
|
||||
'Integrate YouTube into the workflow. Can search for videos, get trending videos, get video details, get video categories, get channel information, get all videos from a channel, get channel playlists, get playlist items, and get video comments.',
|
||||
docsLink: 'https://docs.sim.ai/tools/youtube',
|
||||
category: 'tools',
|
||||
integrationType: IntegrationType.Media,
|
||||
integrationType: IntegrationType.Communication,
|
||||
tags: ['google-workspace', 'marketing', 'content-management'],
|
||||
bgColor: '#FF0000',
|
||||
icon: YouTubeIcon,
|
||||
|
||||
@@ -19,7 +19,6 @@ export type BlockCategory = 'blocks' | 'tools' | 'triggers'
|
||||
export enum IntegrationType {
|
||||
AI = 'ai',
|
||||
Analytics = 'analytics',
|
||||
Automation = 'automation',
|
||||
Communication = 'communication',
|
||||
CRM = 'crm',
|
||||
CustomerSupport = 'customer-support',
|
||||
@@ -31,13 +30,11 @@ export enum IntegrationType {
|
||||
Email = 'email',
|
||||
FileStorage = 'file-storage',
|
||||
HR = 'hr',
|
||||
Media = 'media',
|
||||
Other = 'other',
|
||||
Productivity = 'productivity',
|
||||
SalesIntelligence = 'sales-intelligence',
|
||||
Sales = 'sales',
|
||||
Search = 'search',
|
||||
Security = 'security',
|
||||
Social = 'social',
|
||||
}
|
||||
|
||||
export type IntegrationTag =
|
||||
@@ -275,7 +272,7 @@ export interface SubBlockConfig {
|
||||
id: string
|
||||
title?: string
|
||||
type: SubBlockType
|
||||
mode?: 'basic' | 'advanced' | 'both' | 'trigger' // Default is 'both' if not specified. 'trigger' means only shown in trigger mode
|
||||
mode?: 'basic' | 'advanced' | 'both' | 'trigger' | 'trigger-advanced' // Default is 'both' if not specified. 'trigger' means only shown in trigger mode. 'trigger-advanced' is for advanced canonical pair members shown in trigger mode
|
||||
canonicalParamId?: string
|
||||
/** Controls parameter visibility in agent/tool-input context */
|
||||
paramVisibility?: 'user-or-llm' | 'user-only' | 'llm-only' | 'hidden'
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { keepPreviousData, useMutation, useQuery, useQueryClient } from '@tanstack/react-query'
|
||||
import { toast } from '@/components/emcn'
|
||||
import type { ChunkingStrategy, StrategyOptions } from '@/lib/chunkers/types'
|
||||
import type {
|
||||
ChunkData,
|
||||
ChunksPagination,
|
||||
@@ -338,10 +339,7 @@ export interface DocumentChunkSearchParams {
|
||||
search: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches all chunks matching a search query by paginating through results.
|
||||
* This is used for search functionality where we need all matching chunks.
|
||||
*/
|
||||
/** Paginates through all matching chunks rather than returning a single page. */
|
||||
export async function fetchAllDocumentChunks(
|
||||
{ knowledgeBaseId, documentId, search }: DocumentChunkSearchParams,
|
||||
signal?: AbortSignal
|
||||
@@ -376,10 +374,6 @@ export const serializeSearchParams = (params: DocumentChunkSearchParams) =>
|
||||
search: params.search,
|
||||
})
|
||||
|
||||
/**
|
||||
* Hook to search for chunks in a document.
|
||||
* Fetches all matching chunks and returns them for client-side pagination.
|
||||
*/
|
||||
export function useDocumentChunkSearchQuery(
|
||||
params: DocumentChunkSearchParams,
|
||||
options?: {
|
||||
@@ -707,6 +701,8 @@ export interface CreateKnowledgeBaseParams {
|
||||
maxSize: number
|
||||
minSize: number
|
||||
overlap: number
|
||||
strategy?: ChunkingStrategy
|
||||
strategyOptions?: StrategyOptions
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -152,13 +152,23 @@ export function useCollaborativeWorkflow() {
|
||||
|
||||
// Register emit functions with operation queue store
|
||||
useEffect(() => {
|
||||
const registeredWorkflowId =
|
||||
isConnected && currentWorkflowId === activeWorkflowId ? currentWorkflowId : null
|
||||
|
||||
registerEmitFunctions(
|
||||
emitWorkflowOperation,
|
||||
emitSubblockUpdate,
|
||||
emitVariableUpdate,
|
||||
currentWorkflowId
|
||||
registeredWorkflowId
|
||||
)
|
||||
}, [emitWorkflowOperation, emitSubblockUpdate, emitVariableUpdate, currentWorkflowId])
|
||||
}, [
|
||||
activeWorkflowId,
|
||||
currentWorkflowId,
|
||||
emitWorkflowOperation,
|
||||
emitSubblockUpdate,
|
||||
emitVariableUpdate,
|
||||
isConnected,
|
||||
])
|
||||
|
||||
useEffect(() => {
|
||||
const handleWorkflowOperation = (data: any) => {
|
||||
|
||||
@@ -55,14 +55,17 @@ export function useTriggerConfigAggregation(
|
||||
let hasAnyValue = false
|
||||
|
||||
triggerDef.subBlocks
|
||||
.filter((sb) => sb.mode === 'trigger' && !SYSTEM_SUBBLOCK_IDS.includes(sb.id))
|
||||
.filter(
|
||||
(sb) =>
|
||||
(sb.mode === 'trigger' || sb.mode === 'trigger-advanced') &&
|
||||
!SYSTEM_SUBBLOCK_IDS.includes(sb.id)
|
||||
)
|
||||
.forEach((subBlock) => {
|
||||
const fieldValue = subBlockStore.getValue(blockId, subBlock.id)
|
||||
|
||||
let valueToUse = fieldValue
|
||||
if (
|
||||
(fieldValue === null || fieldValue === undefined || fieldValue === '') &&
|
||||
subBlock.required &&
|
||||
subBlock.defaultValue !== undefined
|
||||
) {
|
||||
valueToUse = subBlock.defaultValue
|
||||
@@ -117,7 +120,11 @@ export function populateTriggerFieldsFromConfig(
|
||||
const subBlockStore = useSubBlockStore.getState()
|
||||
|
||||
triggerDef.subBlocks
|
||||
.filter((sb) => sb.mode === 'trigger' && !SYSTEM_SUBBLOCK_IDS.includes(sb.id))
|
||||
.filter(
|
||||
(sb) =>
|
||||
(sb.mode === 'trigger' || sb.mode === 'trigger-advanced') &&
|
||||
!SYSTEM_SUBBLOCK_IDS.includes(sb.id)
|
||||
)
|
||||
.forEach((subBlock) => {
|
||||
let configValue: any
|
||||
|
||||
|
||||
@@ -3,12 +3,12 @@ import path from 'path'
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { TextChunker } from '@/lib/chunkers/text-chunker'
|
||||
import type { DocChunk, DocsChunkerOptions } from '@/lib/chunkers/types'
|
||||
import { estimateTokens } from '@/lib/chunkers/utils'
|
||||
import { generateEmbeddings } from '@/lib/knowledge/embeddings'
|
||||
|
||||
interface HeaderInfo {
|
||||
level: number
|
||||
text: string
|
||||
slug?: string
|
||||
anchor?: string
|
||||
position?: number
|
||||
}
|
||||
@@ -21,25 +21,21 @@ interface Frontmatter {
|
||||
|
||||
const logger = createLogger('DocsChunker')
|
||||
|
||||
/**
|
||||
* Docs-specific chunker that processes .mdx files and tracks header context
|
||||
*/
|
||||
export class DocsChunker {
|
||||
private readonly textChunker: TextChunker
|
||||
private readonly baseUrl: string
|
||||
private readonly chunkSize: number
|
||||
|
||||
constructor(options: DocsChunkerOptions = {}) {
|
||||
this.chunkSize = options.chunkSize ?? 300
|
||||
this.textChunker = new TextChunker({
|
||||
chunkSize: options.chunkSize ?? 300, // Max 300 tokens per chunk
|
||||
chunkSize: this.chunkSize,
|
||||
minCharactersPerChunk: options.minCharactersPerChunk ?? 1,
|
||||
chunkOverlap: options.chunkOverlap ?? 50,
|
||||
})
|
||||
this.baseUrl = options.baseUrl ?? 'https://docs.sim.ai'
|
||||
}
|
||||
|
||||
/**
|
||||
* Process all .mdx files in the docs directory
|
||||
*/
|
||||
async chunkAllDocs(docsPath: string): Promise<DocChunk[]> {
|
||||
const allChunks: DocChunk[] = []
|
||||
|
||||
@@ -65,20 +61,17 @@ export class DocsChunker {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single .mdx file
|
||||
*/
|
||||
async chunkMdxFile(filePath: string, basePath: string): Promise<DocChunk[]> {
|
||||
const content = await fs.readFile(filePath, 'utf-8')
|
||||
const relativePath = path.relative(basePath, filePath)
|
||||
|
||||
const { data: frontmatter, content: markdownContent } = this.parseFrontmatter(content)
|
||||
|
||||
const headers = this.extractHeaders(markdownContent)
|
||||
|
||||
const documentUrl = this.generateDocumentUrl(relativePath)
|
||||
|
||||
const textChunks = await this.splitContent(markdownContent)
|
||||
const { chunks: textChunks, cleanedContent } = await this.splitContent(markdownContent)
|
||||
|
||||
const headers = this.extractHeaders(cleanedContent)
|
||||
|
||||
logger.info(`Generating embeddings for ${textChunks.length} chunks in ${relativePath}`)
|
||||
const embeddings: number[][] =
|
||||
@@ -97,7 +90,7 @@ export class DocsChunker {
|
||||
|
||||
const chunk: DocChunk = {
|
||||
text: chunkText,
|
||||
tokenCount: Math.ceil(chunkText.length / 4), // Simple token estimation
|
||||
tokenCount: estimateTokens(chunkText),
|
||||
sourceDocument: relativePath,
|
||||
headerLink: relevantHeader ? `${documentUrl}#${relevantHeader.anchor}` : documentUrl,
|
||||
headerText: relevantHeader?.text || frontmatter.title || 'Document Root',
|
||||
@@ -118,9 +111,6 @@ export class DocsChunker {
|
||||
return chunks
|
||||
}
|
||||
|
||||
/**
|
||||
* Find all .mdx files recursively
|
||||
*/
|
||||
private async findMdxFiles(dirPath: string): Promise<string[]> {
|
||||
const files: string[] = []
|
||||
|
||||
@@ -140,9 +130,6 @@ export class DocsChunker {
|
||||
return files
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract headers and their positions from markdown content
|
||||
*/
|
||||
private extractHeaders(content: string): HeaderInfo[] {
|
||||
const headers: HeaderInfo[] = []
|
||||
const headerRegex = /^(#{1,6})\s+(.+)$/gm
|
||||
@@ -164,42 +151,28 @@ export class DocsChunker {
|
||||
return headers
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate URL-safe anchor from header text
|
||||
*/
|
||||
private generateAnchor(headerText: string): string {
|
||||
return headerText
|
||||
.toLowerCase()
|
||||
.replace(/[^\w\s-]/g, '') // Remove special characters except hyphens
|
||||
.replace(/\s+/g, '-') // Replace spaces with hyphens
|
||||
.replace(/-+/g, '-') // Replace multiple hyphens with single
|
||||
.replace(/^-|-$/g, '') // Remove leading/trailing hyphens
|
||||
.replace(/[^\w\s-]/g, '')
|
||||
.replace(/\s+/g, '-')
|
||||
.replace(/-+/g, '-')
|
||||
.replace(/^-|-$/g, '')
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate document URL from relative path
|
||||
* Handles index.mdx files specially - they are served at the parent directory path
|
||||
*/
|
||||
/** index.mdx files are served at the parent directory path */
|
||||
private generateDocumentUrl(relativePath: string): string {
|
||||
// Convert file path to URL path
|
||||
// e.g., "tools/knowledge.mdx" -> "/tools/knowledge"
|
||||
// e.g., "triggers/index.mdx" -> "/triggers" (NOT "/triggers/index")
|
||||
let urlPath = relativePath.replace(/\.mdx$/, '').replace(/\\/g, '/') // Handle Windows paths
|
||||
let urlPath = relativePath.replace(/\.mdx$/, '').replace(/\\/g, '/')
|
||||
|
||||
// In fumadocs, index.mdx files are served at the parent directory path
|
||||
// e.g., "triggers/index" -> "triggers"
|
||||
if (urlPath.endsWith('/index')) {
|
||||
urlPath = urlPath.slice(0, -6) // Remove "/index"
|
||||
urlPath = urlPath.slice(0, -6)
|
||||
} else if (urlPath === 'index') {
|
||||
urlPath = '' // Root index.mdx
|
||||
urlPath = ''
|
||||
}
|
||||
|
||||
return `${this.baseUrl}/${urlPath}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the most relevant header for a given position
|
||||
*/
|
||||
private findRelevantHeader(headers: HeaderInfo[], position: number): HeaderInfo | null {
|
||||
if (headers.length === 0) return null
|
||||
|
||||
@@ -216,10 +189,10 @@ export class DocsChunker {
|
||||
return relevantHeader
|
||||
}
|
||||
|
||||
/**
|
||||
* Split content into chunks using the existing TextChunker with table awareness
|
||||
*/
|
||||
private async splitContent(content: string): Promise<string[]> {
|
||||
/** Returns both chunks and cleaned content so header extraction uses aligned positions. */
|
||||
private async splitContent(
|
||||
content: string
|
||||
): Promise<{ chunks: string[]; cleanedContent: string }> {
|
||||
const cleanedContent = this.cleanContent(content)
|
||||
|
||||
const tableBoundaries = this.detectTableBoundaries(cleanedContent)
|
||||
@@ -234,30 +207,23 @@ export class DocsChunker {
|
||||
|
||||
const finalChunks = this.enforceSizeLimit(processedChunks)
|
||||
|
||||
return finalChunks
|
||||
return { chunks: finalChunks, cleanedContent }
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean content by removing MDX-specific elements and excessive whitespace
|
||||
*/
|
||||
private cleanContent(content: string): string {
|
||||
return (
|
||||
content
|
||||
// Remove import statements
|
||||
.replace(/^import\s+.*$/gm, '')
|
||||
// Remove JSX components and React-style comments
|
||||
.replace(/<[^>]+>/g, ' ')
|
||||
.replace(/\{\/\*[\s\S]*?\*\/\}/g, ' ')
|
||||
// Remove excessive whitespace
|
||||
.replace(/\n{3,}/g, '\n\n')
|
||||
.replace(/[ \t]{2,}/g, ' ')
|
||||
.trim()
|
||||
)
|
||||
return content
|
||||
.replace(/\r\n/g, '\n')
|
||||
.replace(/\r/g, '\n')
|
||||
.replace(/^import\s+.*$/gm, '')
|
||||
.replace(/^export\s+.*$/gm, '')
|
||||
.replace(/<\/?[a-zA-Z][^>]*>/g, ' ')
|
||||
.replace(/\{\/\*[\s\S]*?\*\/\}/g, ' ')
|
||||
.replace(/\{[^{}]*\}/g, ' ')
|
||||
.replace(/\n{3,}/g, '\n\n')
|
||||
.replace(/[ \t]{2,}/g, ' ')
|
||||
.trim()
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse frontmatter from MDX content
|
||||
*/
|
||||
private parseFrontmatter(content: string): { data: Frontmatter; content: string } {
|
||||
const frontmatterRegex = /^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/
|
||||
const match = content.match(frontmatterRegex)
|
||||
@@ -285,26 +251,25 @@ export class DocsChunker {
|
||||
return { data, content: markdownContent }
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate token count (rough approximation)
|
||||
*/
|
||||
private estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / 4)
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect table boundaries in markdown content to avoid splitting them
|
||||
*/
|
||||
/** Detects table boundaries to avoid splitting tables across chunks. */
|
||||
private detectTableBoundaries(content: string): { start: number; end: number }[] {
|
||||
const tables: { start: number; end: number }[] = []
|
||||
const lines = content.split('\n')
|
||||
|
||||
let inTable = false
|
||||
let inCodeBlock = false
|
||||
let tableStart = -1
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i].trim()
|
||||
|
||||
if (line.startsWith('```')) {
|
||||
inCodeBlock = !inCodeBlock
|
||||
continue
|
||||
}
|
||||
|
||||
if (inCodeBlock) continue
|
||||
|
||||
if (line.includes('|') && line.split('|').length >= 3 && !inTable) {
|
||||
const nextLine = lines[i + 1]?.trim()
|
||||
if (nextLine?.includes('|') && nextLine.includes('-')) {
|
||||
@@ -314,7 +279,7 @@ export class DocsChunker {
|
||||
} else if (inTable && (!line.includes('|') || line === '' || line.startsWith('#'))) {
|
||||
tables.push({
|
||||
start: this.getCharacterPosition(lines, tableStart),
|
||||
end: this.getCharacterPosition(lines, i - 1) + lines[i - 1]?.length || 0,
|
||||
end: this.getCharacterPosition(lines, i - 1) + (lines[i - 1]?.length ?? 0),
|
||||
})
|
||||
inTable = false
|
||||
}
|
||||
@@ -330,16 +295,10 @@ export class DocsChunker {
|
||||
return tables
|
||||
}
|
||||
|
||||
/**
|
||||
* Get character position from line number
|
||||
*/
|
||||
private getCharacterPosition(lines: string[], lineIndex: number): number {
|
||||
return lines.slice(0, lineIndex).reduce((acc, line) => acc + line.length + 1, 0)
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge chunks that would split tables
|
||||
*/
|
||||
private mergeTableChunks(
|
||||
chunks: string[],
|
||||
tableBoundaries: { start: number; end: number }[],
|
||||
@@ -354,6 +313,10 @@ export class DocsChunker {
|
||||
|
||||
for (const chunk of chunks) {
|
||||
const chunkStart = originalContent.indexOf(chunk, currentPosition)
|
||||
if (chunkStart === -1) {
|
||||
mergedChunks.push(chunk)
|
||||
continue
|
||||
}
|
||||
const chunkEnd = chunkStart + chunk.length
|
||||
|
||||
const intersectsTable = tableBoundaries.some(
|
||||
@@ -373,10 +336,10 @@ export class DocsChunker {
|
||||
|
||||
const minStart = Math.min(chunkStart, ...affectedTables.map((t) => t.start))
|
||||
const maxEnd = Math.max(chunkEnd, ...affectedTables.map((t) => t.end))
|
||||
const completeChunk = originalContent.slice(minStart, maxEnd)
|
||||
const completeChunk = originalContent.slice(minStart, maxEnd).trim()
|
||||
|
||||
if (!mergedChunks.some((existing) => existing.includes(completeChunk.trim()))) {
|
||||
mergedChunks.push(completeChunk.trim())
|
||||
if (completeChunk && !mergedChunks.some((existing) => existing === completeChunk)) {
|
||||
mergedChunks.push(completeChunk)
|
||||
}
|
||||
} else {
|
||||
mergedChunks.push(chunk)
|
||||
@@ -388,16 +351,13 @@ export class DocsChunker {
|
||||
return mergedChunks.filter((chunk) => chunk.length > 50)
|
||||
}
|
||||
|
||||
/**
|
||||
* Enforce 300 token size limit on chunks
|
||||
*/
|
||||
private enforceSizeLimit(chunks: string[]): string[] {
|
||||
const finalChunks: string[] = []
|
||||
|
||||
for (const chunk of chunks) {
|
||||
const tokens = this.estimateTokens(chunk)
|
||||
const tokens = estimateTokens(chunk)
|
||||
|
||||
if (tokens <= 300) {
|
||||
if (tokens <= this.chunkSize) {
|
||||
finalChunks.push(chunk)
|
||||
} else {
|
||||
const lines = chunk.split('\n')
|
||||
@@ -406,7 +366,7 @@ export class DocsChunker {
|
||||
for (const line of lines) {
|
||||
const testChunk = currentChunk ? `${currentChunk}\n${line}` : line
|
||||
|
||||
if (this.estimateTokens(testChunk) <= 300) {
|
||||
if (estimateTokens(testChunk) <= this.chunkSize) {
|
||||
currentChunk = testChunk
|
||||
} else {
|
||||
if (currentChunk.trim()) {
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
export { DocsChunker } from './docs-chunker'
|
||||
export { JsonYamlChunker } from './json-yaml-chunker'
|
||||
export { RecursiveChunker } from './recursive-chunker'
|
||||
export { RegexChunker } from './regex-chunker'
|
||||
export { SentenceChunker } from './sentence-chunker'
|
||||
export { StructuredDataChunker } from './structured-data-chunker'
|
||||
export { TextChunker } from './text-chunker'
|
||||
export { TokenChunker } from './token-chunker'
|
||||
export * from './types'
|
||||
|
||||
@@ -30,14 +30,11 @@ describe('JsonYamlChunker', () => {
|
||||
expect(JsonYamlChunker.isStructuredData('key: value\nother: data')).toBe(true)
|
||||
})
|
||||
|
||||
it('should return true for YAML-like plain text', () => {
|
||||
// Note: js-yaml is permissive and parses plain text as valid YAML (scalar value)
|
||||
// This is expected behavior of the YAML parser
|
||||
expect(JsonYamlChunker.isStructuredData('Hello, this is plain text.')).toBe(true)
|
||||
it('should return false for plain text parsed as YAML scalar', () => {
|
||||
expect(JsonYamlChunker.isStructuredData('Hello, this is plain text.')).toBe(false)
|
||||
})
|
||||
|
||||
it('should return false for invalid JSON/YAML with unbalanced braces', () => {
|
||||
// Only truly malformed content that fails YAML parsing returns false
|
||||
expect(JsonYamlChunker.isStructuredData('{invalid: json: content: {{')).toBe(false)
|
||||
})
|
||||
|
||||
@@ -61,7 +58,6 @@ describe('JsonYamlChunker', () => {
|
||||
const json = '{}'
|
||||
const chunks = await chunker.chunk(json)
|
||||
|
||||
// Empty object is valid JSON, should return at least metadata
|
||||
expect(chunks.length).toBeGreaterThanOrEqual(0)
|
||||
})
|
||||
|
||||
@@ -204,7 +200,6 @@ server:
|
||||
const json = '[]'
|
||||
const chunks = await chunker.chunk(json)
|
||||
|
||||
// Empty array should not produce chunks with meaningful content
|
||||
expect(chunks.length).toBeGreaterThanOrEqual(0)
|
||||
})
|
||||
|
||||
@@ -272,7 +267,6 @@ server:
|
||||
|
||||
it.concurrent('should fall back to text chunking for invalid JSON', async () => {
|
||||
const chunker = new JsonYamlChunker({ chunkSize: 100, minCharactersPerChunk: 10 })
|
||||
// Create content that fails YAML parsing and is long enough to produce chunks
|
||||
const invalidJson = `{this is not valid json: content: {{${' more content here '.repeat(10)}`
|
||||
const chunks = await chunker.chunk(invalidJson)
|
||||
|
||||
@@ -377,9 +371,7 @@ server:
|
||||
const json = JSON.stringify({ a: 1, b: 2, c: 3 })
|
||||
const chunks = await chunker.chunk(json)
|
||||
|
||||
// Should produce chunks that are valid
|
||||
expect(chunks.length).toBeGreaterThan(0)
|
||||
// The entire small object fits in one chunk
|
||||
expect(chunks[0].text.length).toBeGreaterThan(0)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import * as yaml from 'js-yaml'
|
||||
import type { Chunk, ChunkerOptions } from '@/lib/chunkers/types'
|
||||
import { getAccurateTokenCount } from '@/lib/tokenization'
|
||||
import { estimateTokenCount } from '@/lib/tokenization/estimators'
|
||||
import { estimateTokens } from '@/lib/chunkers/utils'
|
||||
|
||||
const logger = createLogger('JsonYamlChunker')
|
||||
|
||||
@@ -11,57 +10,31 @@ type JsonValue = JsonPrimitive | JsonObject | JsonArray
|
||||
type JsonObject = { [key: string]: JsonValue }
|
||||
type JsonArray = JsonValue[]
|
||||
|
||||
function getTokenCount(text: string): number {
|
||||
try {
|
||||
return getAccurateTokenCount(text, 'text-embedding-3-small')
|
||||
} catch (error) {
|
||||
logger.warn('Tiktoken failed, falling back to estimation')
|
||||
const estimate = estimateTokenCount(text)
|
||||
return estimate.count
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Configuration for JSON/YAML chunking
|
||||
* Reduced limits to ensure we stay well under OpenAI's 8,191 token limit per embedding request
|
||||
*/
|
||||
const JSON_YAML_CHUNKING_CONFIG = {
|
||||
TARGET_CHUNK_SIZE: 1024, // Target tokens per chunk
|
||||
MIN_CHARACTERS_PER_CHUNK: 100, // Minimum characters per chunk to filter tiny fragments
|
||||
MAX_CHUNK_SIZE: 1500, // Maximum tokens per chunk
|
||||
MAX_DEPTH_FOR_SPLITTING: 5, // Maximum depth to traverse for splitting
|
||||
}
|
||||
const MAX_DEPTH = 5
|
||||
|
||||
export class JsonYamlChunker {
|
||||
private chunkSize: number // in tokens
|
||||
private minCharactersPerChunk: number // in characters
|
||||
private chunkSize: number
|
||||
private minCharactersPerChunk: number
|
||||
|
||||
constructor(options: ChunkerOptions = {}) {
|
||||
this.chunkSize = options.chunkSize ?? JSON_YAML_CHUNKING_CONFIG.TARGET_CHUNK_SIZE
|
||||
this.minCharactersPerChunk =
|
||||
options.minCharactersPerChunk ?? JSON_YAML_CHUNKING_CONFIG.MIN_CHARACTERS_PER_CHUNK
|
||||
this.chunkSize = options.chunkSize ?? 1024
|
||||
this.minCharactersPerChunk = options.minCharactersPerChunk ?? 100
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if content is structured JSON/YAML data
|
||||
*/
|
||||
static isStructuredData(content: string): boolean {
|
||||
try {
|
||||
JSON.parse(content)
|
||||
return true
|
||||
const parsed = JSON.parse(content)
|
||||
return typeof parsed === 'object' && parsed !== null
|
||||
} catch {
|
||||
try {
|
||||
yaml.load(content)
|
||||
return true
|
||||
const parsed = yaml.load(content)
|
||||
return typeof parsed === 'object' && parsed !== null
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk JSON/YAML content intelligently based on structure
|
||||
*/
|
||||
async chunk(content: string): Promise<Chunk[]> {
|
||||
try {
|
||||
let data: JsonValue
|
||||
@@ -70,16 +43,10 @@ export class JsonYamlChunker {
|
||||
} catch {
|
||||
data = yaml.load(content) as JsonValue
|
||||
}
|
||||
const chunks = this.chunkStructuredData(data)
|
||||
const chunks = this.chunkStructuredData(data, [], 0)
|
||||
|
||||
const tokenCounts = chunks.map((c) => c.tokenCount)
|
||||
const totalTokens = tokenCounts.reduce((a, b) => a + b, 0)
|
||||
const maxTokens = Math.max(...tokenCounts)
|
||||
const avgTokens = Math.round(totalTokens / chunks.length)
|
||||
|
||||
logger.info(
|
||||
`JSON chunking complete: ${chunks.length} chunks, ${totalTokens} total tokens (avg: ${avgTokens}, max: ${maxTokens})`
|
||||
)
|
||||
const totalTokens = chunks.reduce((sum, c) => sum + c.tokenCount, 0)
|
||||
logger.info(`JSON chunking complete: ${chunks.length} chunks, ${totalTokens} total tokens`)
|
||||
|
||||
return chunks
|
||||
} catch (error) {
|
||||
@@ -88,42 +55,38 @@ export class JsonYamlChunker {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk structured data based on its structure
|
||||
*/
|
||||
private chunkStructuredData(data: JsonValue, path: string[] = []): Chunk[] {
|
||||
const chunks: Chunk[] = []
|
||||
|
||||
private chunkStructuredData(data: JsonValue, path: string[], depth: number): Chunk[] {
|
||||
if (Array.isArray(data)) {
|
||||
return this.chunkArray(data, path)
|
||||
return this.chunkArray(data, path, depth)
|
||||
}
|
||||
|
||||
if (typeof data === 'object' && data !== null) {
|
||||
return this.chunkObject(data as JsonObject, path)
|
||||
return this.chunkObject(data as JsonObject, path, depth)
|
||||
}
|
||||
|
||||
const content = JSON.stringify(data, null, 2)
|
||||
const tokenCount = getTokenCount(content)
|
||||
const contextHeader = path.length > 0 ? `// ${path.join('.')}\n` : ''
|
||||
const contentTokens = estimateTokens(content)
|
||||
|
||||
// Filter tiny fragments using character count
|
||||
if (content.length >= this.minCharactersPerChunk) {
|
||||
chunks.push({
|
||||
text: content,
|
||||
tokenCount,
|
||||
metadata: {
|
||||
startIndex: 0,
|
||||
endIndex: content.length,
|
||||
},
|
||||
})
|
||||
if (contentTokens > this.chunkSize) {
|
||||
return this.chunkAsText(contextHeader + content)
|
||||
}
|
||||
|
||||
return chunks
|
||||
if (content.length < this.minCharactersPerChunk) {
|
||||
return []
|
||||
}
|
||||
|
||||
const text = contextHeader + content
|
||||
return [
|
||||
{
|
||||
text,
|
||||
tokenCount: estimateTokens(text),
|
||||
metadata: { startIndex: 0, endIndex: text.length },
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk an array intelligently
|
||||
*/
|
||||
private chunkArray(arr: JsonArray, path: string[]): Chunk[] {
|
||||
private chunkArray(arr: JsonArray, path: string[], depth: number): Chunk[] {
|
||||
const chunks: Chunk[] = []
|
||||
let currentBatch: JsonValue[] = []
|
||||
let currentTokens = 0
|
||||
@@ -133,46 +96,30 @@ export class JsonYamlChunker {
|
||||
for (let i = 0; i < arr.length; i++) {
|
||||
const item = arr[i]
|
||||
const itemStr = JSON.stringify(item, null, 2)
|
||||
const itemTokens = getTokenCount(itemStr)
|
||||
const itemTokens = estimateTokens(itemStr)
|
||||
|
||||
if (itemTokens > this.chunkSize) {
|
||||
if (currentBatch.length > 0) {
|
||||
const batchContent = contextHeader + JSON.stringify(currentBatch, null, 2)
|
||||
chunks.push({
|
||||
text: batchContent,
|
||||
tokenCount: getTokenCount(batchContent),
|
||||
metadata: {
|
||||
startIndex: i - currentBatch.length,
|
||||
endIndex: i - 1,
|
||||
},
|
||||
})
|
||||
chunks.push(
|
||||
this.buildBatchChunk(contextHeader, currentBatch, i - currentBatch.length, i - 1)
|
||||
)
|
||||
currentBatch = []
|
||||
currentTokens = 0
|
||||
}
|
||||
|
||||
if (typeof item === 'object' && item !== null) {
|
||||
const subChunks = this.chunkStructuredData(item, [...path, `[${i}]`])
|
||||
chunks.push(...subChunks)
|
||||
if (depth < MAX_DEPTH && typeof item === 'object' && item !== null) {
|
||||
chunks.push(...this.chunkStructuredData(item, [...path, `[${i}]`], depth + 1))
|
||||
} else {
|
||||
chunks.push({
|
||||
text: contextHeader + itemStr,
|
||||
tokenCount: itemTokens,
|
||||
metadata: {
|
||||
startIndex: i,
|
||||
endIndex: i,
|
||||
},
|
||||
metadata: { startIndex: i, endIndex: i },
|
||||
})
|
||||
}
|
||||
} else if (currentTokens + itemTokens > this.chunkSize && currentBatch.length > 0) {
|
||||
const batchContent = contextHeader + JSON.stringify(currentBatch, null, 2)
|
||||
chunks.push({
|
||||
text: batchContent,
|
||||
tokenCount: getTokenCount(batchContent),
|
||||
metadata: {
|
||||
startIndex: i - currentBatch.length,
|
||||
endIndex: i - 1,
|
||||
},
|
||||
})
|
||||
chunks.push(
|
||||
this.buildBatchChunk(contextHeader, currentBatch, i - currentBatch.length, i - 1)
|
||||
)
|
||||
currentBatch = [item]
|
||||
currentTokens = itemTokens
|
||||
} else {
|
||||
@@ -182,121 +129,112 @@ export class JsonYamlChunker {
|
||||
}
|
||||
|
||||
if (currentBatch.length > 0) {
|
||||
const batchContent = contextHeader + JSON.stringify(currentBatch, null, 2)
|
||||
chunks.push({
|
||||
text: batchContent,
|
||||
tokenCount: getTokenCount(batchContent),
|
||||
metadata: {
|
||||
startIndex: arr.length - currentBatch.length,
|
||||
endIndex: arr.length - 1,
|
||||
},
|
||||
})
|
||||
chunks.push(
|
||||
this.buildBatchChunk(
|
||||
contextHeader,
|
||||
currentBatch,
|
||||
arr.length - currentBatch.length,
|
||||
arr.length - 1
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk an object intelligently
|
||||
*/
|
||||
private chunkObject(obj: JsonObject, path: string[]): Chunk[] {
|
||||
private chunkObject(obj: JsonObject, path: string[], depth: number): Chunk[] {
|
||||
const chunks: Chunk[] = []
|
||||
const entries = Object.entries(obj)
|
||||
|
||||
const fullContent = JSON.stringify(obj, null, 2)
|
||||
const fullTokens = getTokenCount(fullContent)
|
||||
const fullTokens = estimateTokens(fullContent)
|
||||
|
||||
if (fullTokens <= this.chunkSize) {
|
||||
chunks.push({
|
||||
text: fullContent,
|
||||
tokenCount: fullTokens,
|
||||
metadata: {
|
||||
startIndex: 0,
|
||||
endIndex: fullContent.length,
|
||||
const contextHeader = path.length > 0 ? `// ${path.join('.')}\n` : ''
|
||||
const text = contextHeader + fullContent
|
||||
return [
|
||||
{
|
||||
text,
|
||||
tokenCount: estimateTokens(text),
|
||||
metadata: { startIndex: 0, endIndex: text.length },
|
||||
},
|
||||
})
|
||||
return chunks
|
||||
]
|
||||
}
|
||||
|
||||
const contextHeader = path.length > 0 ? `// ${path.join('.')}\n` : ''
|
||||
let currentObj: JsonObject = {}
|
||||
let currentTokens = 0
|
||||
let currentKeys: string[] = []
|
||||
|
||||
for (const [key, value] of entries) {
|
||||
const valueStr = JSON.stringify({ [key]: value }, null, 2)
|
||||
const valueTokens = getTokenCount(valueStr)
|
||||
const valueTokens = estimateTokens(valueStr)
|
||||
|
||||
if (valueTokens > this.chunkSize) {
|
||||
if (Object.keys(currentObj).length > 0) {
|
||||
const objContent = JSON.stringify(currentObj, null, 2)
|
||||
const objContent = contextHeader + JSON.stringify(currentObj, null, 2)
|
||||
chunks.push({
|
||||
text: objContent,
|
||||
tokenCount: getTokenCount(objContent),
|
||||
metadata: {
|
||||
startIndex: 0,
|
||||
endIndex: objContent.length,
|
||||
},
|
||||
tokenCount: estimateTokens(objContent),
|
||||
metadata: { startIndex: 0, endIndex: objContent.length },
|
||||
})
|
||||
currentObj = {}
|
||||
currentTokens = 0
|
||||
currentKeys = []
|
||||
}
|
||||
|
||||
if (typeof value === 'object' && value !== null) {
|
||||
const subChunks = this.chunkStructuredData(value, [...path, key])
|
||||
chunks.push(...subChunks)
|
||||
if (depth < MAX_DEPTH && typeof value === 'object' && value !== null) {
|
||||
chunks.push(...this.chunkStructuredData(value, [...path, key], depth + 1))
|
||||
} else {
|
||||
chunks.push({
|
||||
text: valueStr,
|
||||
text: contextHeader + valueStr,
|
||||
tokenCount: valueTokens,
|
||||
metadata: {
|
||||
startIndex: 0,
|
||||
endIndex: valueStr.length,
|
||||
},
|
||||
metadata: { startIndex: 0, endIndex: valueStr.length },
|
||||
})
|
||||
}
|
||||
} else if (
|
||||
currentTokens + valueTokens > this.chunkSize &&
|
||||
Object.keys(currentObj).length > 0
|
||||
) {
|
||||
const objContent = JSON.stringify(currentObj, null, 2)
|
||||
const objContent = contextHeader + JSON.stringify(currentObj, null, 2)
|
||||
chunks.push({
|
||||
text: objContent,
|
||||
tokenCount: getTokenCount(objContent),
|
||||
metadata: {
|
||||
startIndex: 0,
|
||||
endIndex: objContent.length,
|
||||
},
|
||||
tokenCount: estimateTokens(objContent),
|
||||
metadata: { startIndex: 0, endIndex: objContent.length },
|
||||
})
|
||||
currentObj = { [key]: value }
|
||||
currentTokens = valueTokens
|
||||
currentKeys = [key]
|
||||
} else {
|
||||
currentObj[key] = value
|
||||
currentTokens += valueTokens
|
||||
currentKeys.push(key)
|
||||
}
|
||||
}
|
||||
|
||||
if (Object.keys(currentObj).length > 0) {
|
||||
const objContent = JSON.stringify(currentObj, null, 2)
|
||||
const objContent = contextHeader + JSON.stringify(currentObj, null, 2)
|
||||
chunks.push({
|
||||
text: objContent,
|
||||
tokenCount: getTokenCount(objContent),
|
||||
metadata: {
|
||||
startIndex: 0,
|
||||
endIndex: objContent.length,
|
||||
},
|
||||
tokenCount: estimateTokens(objContent),
|
||||
metadata: { startIndex: 0, endIndex: objContent.length },
|
||||
})
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
/**
|
||||
* Fall back to text chunking if JSON parsing fails
|
||||
*/
|
||||
private async chunkAsText(content: string): Promise<Chunk[]> {
|
||||
private buildBatchChunk(
|
||||
contextHeader: string,
|
||||
batch: JsonValue[],
|
||||
startIdx: number,
|
||||
endIdx: number
|
||||
): Chunk {
|
||||
const batchContent = contextHeader + JSON.stringify(batch, null, 2)
|
||||
return {
|
||||
text: batchContent,
|
||||
tokenCount: estimateTokens(batchContent),
|
||||
metadata: { startIndex: startIdx, endIndex: endIdx },
|
||||
}
|
||||
}
|
||||
|
||||
private chunkAsText(content: string): Chunk[] {
|
||||
const chunks: Chunk[] = []
|
||||
const lines = content.split('\n')
|
||||
let currentChunk = ''
|
||||
@@ -304,16 +242,13 @@ export class JsonYamlChunker {
|
||||
let startIndex = 0
|
||||
|
||||
for (const line of lines) {
|
||||
const lineTokens = getTokenCount(line)
|
||||
const lineTokens = estimateTokens(line)
|
||||
|
||||
if (currentTokens + lineTokens > this.chunkSize && currentChunk) {
|
||||
chunks.push({
|
||||
text: currentChunk,
|
||||
tokenCount: currentTokens,
|
||||
metadata: {
|
||||
startIndex,
|
||||
endIndex: startIndex + currentChunk.length,
|
||||
},
|
||||
metadata: { startIndex, endIndex: startIndex + currentChunk.length },
|
||||
})
|
||||
|
||||
startIndex += currentChunk.length + 1
|
||||
@@ -325,24 +260,17 @@ export class JsonYamlChunker {
|
||||
}
|
||||
}
|
||||
|
||||
// Filter tiny fragments using character count
|
||||
if (currentChunk && currentChunk.length >= this.minCharactersPerChunk) {
|
||||
chunks.push({
|
||||
text: currentChunk,
|
||||
tokenCount: currentTokens,
|
||||
metadata: {
|
||||
startIndex,
|
||||
endIndex: startIndex + currentChunk.length,
|
||||
},
|
||||
metadata: { startIndex, endIndex: startIndex + currentChunk.length },
|
||||
})
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
/**
|
||||
* Static method for chunking JSON/YAML data with default options
|
||||
*/
|
||||
static async chunkJsonYaml(content: string, options: ChunkerOptions = {}): Promise<Chunk[]> {
|
||||
const chunker = new JsonYamlChunker(options)
|
||||
return chunker.chunk(content)
|
||||
|
||||
275
apps/sim/lib/chunkers/recursive-chunker.test.ts
Normal file
275
apps/sim/lib/chunkers/recursive-chunker.test.ts
Normal file
@@ -0,0 +1,275 @@
|
||||
/**
|
||||
* @vitest-environment node
|
||||
*/
|
||||
|
||||
import { loggerMock } from '@sim/testing'
|
||||
import { describe, expect, it, vi } from 'vitest'
|
||||
import { RecursiveChunker } from './recursive-chunker'
|
||||
|
||||
vi.mock('@sim/logger', () => loggerMock)
|
||||
|
||||
describe('RecursiveChunker', () => {
|
||||
describe('empty and whitespace input', () => {
|
||||
it.concurrent('should return empty array for empty string', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 100 })
|
||||
const chunks = await chunker.chunk('')
|
||||
expect(chunks).toEqual([])
|
||||
})
|
||||
|
||||
it.concurrent('should return empty array for whitespace-only input', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 100 })
|
||||
const chunks = await chunker.chunk(' \n\n\t ')
|
||||
expect(chunks).toEqual([])
|
||||
})
|
||||
})
|
||||
|
||||
describe('small content', () => {
|
||||
it.concurrent('should return single chunk when content fits in one chunk', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 100 })
|
||||
const text = 'This is a short text.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks).toHaveLength(1)
|
||||
expect(chunks[0].text).toBe(text)
|
||||
})
|
||||
})
|
||||
|
||||
describe('paragraph splitting', () => {
|
||||
it.concurrent('should split at paragraph boundaries first', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 20 })
|
||||
const text =
|
||||
'First paragraph with enough content to matter.\n\nSecond paragraph with enough content to matter.\n\nThird paragraph with enough content here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe('line splitting fallback', () => {
|
||||
it.concurrent('should split at newlines when paragraphs are too large', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 15 })
|
||||
const text =
|
||||
'Line one with content here.\nLine two with content here.\nLine three with content here.\nLine four with content here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe('sentence splitting fallback', () => {
|
||||
it.concurrent('should split at sentence boundaries when lines are too large', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 10 })
|
||||
const text =
|
||||
'First sentence here. Second sentence here. Third sentence here. Fourth sentence here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe('word splitting fallback', () => {
|
||||
it.concurrent('should split at spaces when sentences are too large', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 5 })
|
||||
const text = 'word1 word2 word3 word4 word5 word6 word7 word8 word9 word10'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe('keep_separator behavior', () => {
|
||||
it.concurrent('should prepend separator to subsequent chunks', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 15 })
|
||||
const text =
|
||||
'First paragraph content here.\n\nSecond paragraph content here.\n\nThird paragraph content here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
if (chunks.length > 1) {
|
||||
expect(chunks[1].text.startsWith('\n\n') || chunks[1].text.length > 0).toBe(true)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('custom separators', () => {
|
||||
it.concurrent('should use custom separators instead of default recipe', async () => {
|
||||
const chunker = new RecursiveChunker({
|
||||
chunkSize: 15,
|
||||
separators: ['---', '\n'],
|
||||
})
|
||||
const text =
|
||||
'Section one content here with words.---Section two content here with words.---Section three content here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe('recipe: plain', () => {
|
||||
it.concurrent('should use plain recipe by default', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 20 })
|
||||
const text =
|
||||
'First paragraph with enough words to exceed the chunk size limit.\n\nSecond paragraph with enough words to exceed the chunk size limit.\n\nThird paragraph with enough words to exceed the chunk size limit.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe('recipe: markdown', () => {
|
||||
it.concurrent('should split at heading boundaries for markdown content', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 20, recipe: 'markdown' })
|
||||
const text =
|
||||
'\n# Title\n\nParagraph content under the title goes here.\n\n## Subtitle\n\nMore text content under the subtitle goes here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
})
|
||||
|
||||
it.concurrent('should handle markdown horizontal rules', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 20, recipe: 'markdown' })
|
||||
const text =
|
||||
'Section one content here.\n---\nSection two content here.\n---\nSection three content here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe('recipe: code', () => {
|
||||
it.concurrent('should split on function and class boundaries', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 20, recipe: 'code' })
|
||||
const text = [
|
||||
'const x = 1;',
|
||||
'function hello() {',
|
||||
' return "hello";',
|
||||
'}',
|
||||
'function world() {',
|
||||
' return "world";',
|
||||
'}',
|
||||
'class MyClass {',
|
||||
' constructor() {}',
|
||||
' method() { return true; }',
|
||||
'}',
|
||||
].join('\n')
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe('chunk size respected', () => {
|
||||
it.concurrent('should not exceed chunk size in tokens', async () => {
|
||||
const chunkSize = 30
|
||||
const chunker = new RecursiveChunker({ chunkSize })
|
||||
const text = 'This is a test sentence with content. '.repeat(30)
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.tokenCount).toBeLessThanOrEqual(chunkSize + 5)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('overlap', () => {
|
||||
it.concurrent('should share text between consecutive chunks when overlap is set', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 20, chunkOverlap: 5 })
|
||||
const text =
|
||||
'First paragraph with some content here.\n\nSecond paragraph with different content here.\n\nThird paragraph with more content here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
if (chunks.length > 1) {
|
||||
expect(chunks[1].text.length).toBeGreaterThan(0)
|
||||
}
|
||||
})
|
||||
|
||||
it.concurrent('should not add overlap when overlap is 0', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 20, chunkOverlap: 0 })
|
||||
const text =
|
||||
'First sentence content here. Second sentence content here. Third sentence content here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
if (chunks.length > 1) {
|
||||
const firstChunkEnd = chunks[0].text.slice(-10)
|
||||
expect(chunks[1].text.startsWith(firstChunkEnd)).toBe(false)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('chunk metadata', () => {
|
||||
it.concurrent('should include text, tokenCount, and metadata fields', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 100 })
|
||||
const text = 'This is test content for metadata.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks).toHaveLength(1)
|
||||
expect(chunks[0].text).toBe(text)
|
||||
expect(chunks[0].tokenCount).toBe(Math.ceil(text.length / 4))
|
||||
expect(chunks[0].metadata.startIndex).toBeDefined()
|
||||
expect(chunks[0].metadata.endIndex).toBeDefined()
|
||||
})
|
||||
|
||||
it.concurrent('should have startIndex of 0 for the first chunk', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 100 })
|
||||
const text = 'Some content here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks[0].metadata.startIndex).toBe(0)
|
||||
})
|
||||
|
||||
it.concurrent('should have non-negative indices for all chunks', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 20, chunkOverlap: 5 })
|
||||
const text = 'First part. Second part. Third part. Fourth part. Fifth part.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.metadata.startIndex).toBeGreaterThanOrEqual(0)
|
||||
expect(chunk.metadata.endIndex).toBeGreaterThanOrEqual(chunk.metadata.startIndex)
|
||||
}
|
||||
})
|
||||
|
||||
it.concurrent('should have endIndex greater than startIndex for non-empty chunks', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 20 })
|
||||
const text = 'Multiple sentences here. Another one here. And another. And more content.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.metadata.endIndex).toBeGreaterThan(chunk.metadata.startIndex)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('edge cases', () => {
|
||||
it.concurrent('should handle very long text', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 100 })
|
||||
const text = 'This is a sentence. '.repeat(1000)
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
})
|
||||
|
||||
it.concurrent('should handle text with no natural separators', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 5 })
|
||||
const text = 'abcdefghijklmnopqrstuvwxyz'.repeat(5)
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
})
|
||||
|
||||
it.concurrent('should handle unicode text', async () => {
|
||||
const chunker = new RecursiveChunker({ chunkSize: 100 })
|
||||
const text = '这是中文测试。日本語テスト。한국어 테스트.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(0)
|
||||
expect(chunks[0].text).toContain('中文')
|
||||
})
|
||||
|
||||
it.concurrent('should use default chunkSize of 1024 tokens', async () => {
|
||||
const chunker = new RecursiveChunker({})
|
||||
const text = 'Word '.repeat(400)
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks).toHaveLength(1)
|
||||
})
|
||||
})
|
||||
})
|
||||
145
apps/sim/lib/chunkers/recursive-chunker.ts
Normal file
145
apps/sim/lib/chunkers/recursive-chunker.ts
Normal file
@@ -0,0 +1,145 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import type { Chunk, RecursiveChunkerOptions } from '@/lib/chunkers/types'
|
||||
import {
|
||||
addOverlap,
|
||||
buildChunks,
|
||||
cleanText,
|
||||
estimateTokens,
|
||||
resolveChunkerOptions,
|
||||
splitAtWordBoundaries,
|
||||
tokensToChars,
|
||||
} from '@/lib/chunkers/utils'
|
||||
|
||||
const logger = createLogger('RecursiveChunker')
|
||||
|
||||
const RECIPES = {
|
||||
plain: ['\n\n', '\n', '. ', ' ', ''],
|
||||
markdown: [
|
||||
'\n---\n',
|
||||
'\n***\n',
|
||||
'\n___\n',
|
||||
'\n# ',
|
||||
'\n## ',
|
||||
'\n### ',
|
||||
'\n#### ',
|
||||
'\n##### ',
|
||||
'\n###### ',
|
||||
'\n```\n',
|
||||
'\n> ',
|
||||
'\n\n',
|
||||
'\n',
|
||||
'. ',
|
||||
' ',
|
||||
'',
|
||||
],
|
||||
code: [
|
||||
'\nfunction ',
|
||||
'\nclass ',
|
||||
'\nexport ',
|
||||
'\nconst ',
|
||||
'\nlet ',
|
||||
'\nvar ',
|
||||
'\nif ',
|
||||
'\nfor ',
|
||||
'\nwhile ',
|
||||
'\nswitch ',
|
||||
'\nreturn ',
|
||||
'\n\n',
|
||||
'\n',
|
||||
'; ',
|
||||
' ',
|
||||
'',
|
||||
],
|
||||
} as const
|
||||
|
||||
export class RecursiveChunker {
|
||||
private readonly chunkSize: number
|
||||
private readonly chunkOverlap: number
|
||||
private readonly separators: string[]
|
||||
|
||||
constructor(options: RecursiveChunkerOptions = {}) {
|
||||
const resolved = resolveChunkerOptions(options)
|
||||
this.chunkSize = resolved.chunkSize
|
||||
this.chunkOverlap = resolved.chunkOverlap
|
||||
|
||||
if (options.separators && options.separators.length > 0) {
|
||||
this.separators = options.separators
|
||||
} else {
|
||||
const recipe = options.recipe ?? 'plain'
|
||||
this.separators = [...RECIPES[recipe]]
|
||||
}
|
||||
}
|
||||
|
||||
private splitRecursively(text: string, separatorIndex = 0): string[] {
|
||||
const tokenCount = estimateTokens(text)
|
||||
|
||||
if (tokenCount <= this.chunkSize) {
|
||||
return text.trim() ? [text] : []
|
||||
}
|
||||
|
||||
if (separatorIndex >= this.separators.length) {
|
||||
const chunkSizeChars = tokensToChars(this.chunkSize)
|
||||
return splitAtWordBoundaries(text, chunkSizeChars)
|
||||
}
|
||||
|
||||
const separator = this.separators[separatorIndex]
|
||||
|
||||
if (separator === '') {
|
||||
return this.splitRecursively(text, this.separators.length)
|
||||
}
|
||||
|
||||
const parts = text.split(separator).filter((part) => part.trim())
|
||||
|
||||
if (parts.length <= 1) {
|
||||
return this.splitRecursively(text, separatorIndex + 1)
|
||||
}
|
||||
|
||||
const chunks: string[] = []
|
||||
let currentChunk = ''
|
||||
|
||||
for (const part of parts) {
|
||||
const testChunk = currentChunk + (currentChunk ? separator : '') + part
|
||||
|
||||
if (estimateTokens(testChunk) <= this.chunkSize) {
|
||||
currentChunk = testChunk
|
||||
} else {
|
||||
if (currentChunk.trim()) {
|
||||
chunks.push(currentChunk.trim())
|
||||
}
|
||||
|
||||
if (estimateTokens(part) > this.chunkSize) {
|
||||
const subChunks = this.splitRecursively(part, separatorIndex + 1)
|
||||
for (const subChunk of subChunks) {
|
||||
chunks.push(subChunk)
|
||||
}
|
||||
currentChunk = ''
|
||||
} else {
|
||||
currentChunk = part
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (currentChunk.trim()) {
|
||||
chunks.push(currentChunk.trim())
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
async chunk(content: string): Promise<Chunk[]> {
|
||||
if (!content?.trim()) {
|
||||
return []
|
||||
}
|
||||
|
||||
const cleaned = cleanText(content)
|
||||
let chunks = this.splitRecursively(cleaned)
|
||||
|
||||
if (this.chunkOverlap > 0) {
|
||||
const overlapChars = tokensToChars(this.chunkOverlap)
|
||||
chunks = addOverlap(chunks, overlapChars)
|
||||
}
|
||||
|
||||
logger.info(`Chunked into ${chunks.length} recursive chunks`)
|
||||
return buildChunks(chunks, this.chunkOverlap)
|
||||
}
|
||||
}
|
||||
189
apps/sim/lib/chunkers/regex-chunker.test.ts
Normal file
189
apps/sim/lib/chunkers/regex-chunker.test.ts
Normal file
@@ -0,0 +1,189 @@
|
||||
/**
|
||||
* @vitest-environment node
|
||||
*/
|
||||
|
||||
import { loggerMock } from '@sim/testing'
|
||||
import { describe, expect, it, vi } from 'vitest'
|
||||
import { RegexChunker } from './regex-chunker'
|
||||
|
||||
vi.mock('@sim/logger', () => loggerMock)
|
||||
|
||||
describe('RegexChunker', () => {
|
||||
describe('empty and whitespace input', () => {
|
||||
it.concurrent('should return empty array for empty string', async () => {
|
||||
const chunker = new RegexChunker({ pattern: '\\n\\n' })
|
||||
const chunks = await chunker.chunk('')
|
||||
expect(chunks).toEqual([])
|
||||
})
|
||||
|
||||
it.concurrent('should return empty array for whitespace-only input', async () => {
|
||||
const chunker = new RegexChunker({ pattern: '\\n\\n' })
|
||||
const chunks = await chunker.chunk(' \n\n ')
|
||||
expect(chunks).toEqual([])
|
||||
})
|
||||
})
|
||||
|
||||
describe('small content', () => {
|
||||
it.concurrent('should return single chunk when content fits in chunkSize', async () => {
|
||||
const chunker = new RegexChunker({ pattern: '\\n\\n', chunkSize: 100 })
|
||||
const text = 'This is a short text.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks).toHaveLength(1)
|
||||
expect(chunks[0].text).toBe(text)
|
||||
})
|
||||
})
|
||||
|
||||
describe('basic regex splitting', () => {
|
||||
it.concurrent('should split on double newlines with pattern \\n\\n', async () => {
|
||||
const chunker = new RegexChunker({ pattern: '\\n\\n', chunkSize: 20 })
|
||||
const text =
|
||||
'First paragraph content here.\n\nSecond paragraph content here.\n\nThird paragraph content here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe('custom pattern splitting', () => {
|
||||
it.concurrent('should split text at --- delimiters', async () => {
|
||||
const chunker = new RegexChunker({ pattern: '---', chunkSize: 20 })
|
||||
const text =
|
||||
'Section one has enough content to fill a chunk on its own here.---Section two also has enough content to fill another chunk here.---Section three needs content too for splitting.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe('segment merging', () => {
|
||||
it.concurrent('should merge small adjacent segments up to chunkSize', async () => {
|
||||
const chunker = new RegexChunker({ pattern: '\\n\\n', chunkSize: 100 })
|
||||
const text = 'Short.\n\nAlso short.\n\nTiny.\n\nSmall too.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks).toHaveLength(1)
|
||||
expect(chunks[0].text).toContain('Short.')
|
||||
expect(chunks[0].text).toContain('Also short.')
|
||||
})
|
||||
})
|
||||
|
||||
describe('oversized segment fallback', () => {
|
||||
it.concurrent(
|
||||
'should sub-chunk segments larger than chunkSize via word boundaries',
|
||||
async () => {
|
||||
const chunker = new RegexChunker({ pattern: '---', chunkSize: 10 })
|
||||
const longSegment =
|
||||
'This is a very long segment with many words that exceeds the chunk size limit significantly. '
|
||||
const text = `${longSegment}---${longSegment}`
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(2)
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
describe('no-match fallback', () => {
|
||||
it.concurrent(
|
||||
'should fall back to word-boundary splitting when regex matches nothing',
|
||||
async () => {
|
||||
const chunker = new RegexChunker({ pattern: '###SPLIT###', chunkSize: 10 })
|
||||
const text = 'This is a text with no matching delimiter anywhere in the content at all.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
describe('chunk size respected', () => {
|
||||
it.concurrent('should not exceed chunkSize tokens approximately', async () => {
|
||||
const chunkSize = 30
|
||||
const chunker = new RegexChunker({ pattern: '\\n\\n', chunkSize })
|
||||
const text =
|
||||
'Paragraph one with some words. '.repeat(5) +
|
||||
'\n\n' +
|
||||
'Paragraph two with more words. '.repeat(5) +
|
||||
'\n\n' +
|
||||
'Paragraph three continues here. '.repeat(5)
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.tokenCount).toBeLessThanOrEqual(chunkSize + 10)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('overlap', () => {
|
||||
it.concurrent('should share content between chunks when chunkOverlap > 0', async () => {
|
||||
const chunker = new RegexChunker({ pattern: '\\n\\n', chunkSize: 20, chunkOverlap: 5 })
|
||||
const text =
|
||||
'First paragraph with enough content.\n\nSecond paragraph with more content.\n\nThird paragraph with even more.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
if (chunks.length > 1) {
|
||||
const firstChunkEnd = chunks[0].text.slice(-10)
|
||||
const secondChunkStart = chunks[1].text.slice(0, 20)
|
||||
expect(secondChunkStart.length).toBeGreaterThan(0)
|
||||
expect(chunks[1].text.length).toBeGreaterThan(0)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('chunk metadata', () => {
|
||||
it.concurrent('should include text, tokenCount, and metadata with indices', async () => {
|
||||
const chunker = new RegexChunker({ pattern: '\\n\\n', chunkSize: 100 })
|
||||
const text = 'Hello world test content.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks).toHaveLength(1)
|
||||
expect(chunks[0].text).toBe(text)
|
||||
expect(chunks[0].tokenCount).toBe(Math.ceil(text.length / 4))
|
||||
expect(chunks[0].metadata.startIndex).toBeDefined()
|
||||
expect(chunks[0].metadata.endIndex).toBeDefined()
|
||||
expect(chunks[0].metadata.startIndex).toBe(0)
|
||||
})
|
||||
|
||||
it.concurrent('should have non-negative indices across multiple chunks', async () => {
|
||||
const chunker = new RegexChunker({ pattern: '\\n\\n', chunkSize: 20, chunkOverlap: 0 })
|
||||
const text = 'First paragraph here.\n\nSecond paragraph here.\n\nThird paragraph here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.metadata.startIndex).toBeGreaterThanOrEqual(0)
|
||||
expect(chunk.metadata.endIndex).toBeGreaterThanOrEqual(chunk.metadata.startIndex)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('invalid regex', () => {
|
||||
it.concurrent('should throw error for invalid regex pattern', async () => {
|
||||
expect(() => new RegexChunker({ pattern: '[invalid' })).toThrow()
|
||||
})
|
||||
})
|
||||
|
||||
describe('empty pattern', () => {
|
||||
it.concurrent('should throw error for empty pattern', async () => {
|
||||
expect(() => new RegexChunker({ pattern: '' })).toThrow('Regex pattern is required')
|
||||
})
|
||||
})
|
||||
|
||||
describe('pattern too long', () => {
|
||||
it.concurrent('should throw error for pattern exceeding 500 characters', async () => {
|
||||
const longPattern = 'a'.repeat(501)
|
||||
expect(() => new RegexChunker({ pattern: longPattern })).toThrow(
|
||||
'Regex pattern exceeds maximum length of 500 characters'
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('ReDoS protection', () => {
|
||||
it.concurrent('should accept safe pattern \\n+', async () => {
|
||||
expect(() => new RegexChunker({ pattern: '\\n+' })).not.toThrow()
|
||||
})
|
||||
|
||||
it.concurrent('should accept safe pattern [,;]', async () => {
|
||||
expect(() => new RegexChunker({ pattern: '[,;]' })).not.toThrow()
|
||||
})
|
||||
})
|
||||
})
|
||||
144
apps/sim/lib/chunkers/regex-chunker.ts
Normal file
144
apps/sim/lib/chunkers/regex-chunker.ts
Normal file
@@ -0,0 +1,144 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import type { Chunk, RegexChunkerOptions } from '@/lib/chunkers/types'
|
||||
import {
|
||||
addOverlap,
|
||||
buildChunks,
|
||||
cleanText,
|
||||
estimateTokens,
|
||||
resolveChunkerOptions,
|
||||
splitAtWordBoundaries,
|
||||
tokensToChars,
|
||||
} from '@/lib/chunkers/utils'
|
||||
|
||||
const logger = createLogger('RegexChunker')
|
||||
|
||||
const MAX_PATTERN_LENGTH = 500
|
||||
|
||||
export class RegexChunker {
|
||||
private readonly chunkSize: number
|
||||
private readonly chunkOverlap: number
|
||||
private readonly regex: RegExp
|
||||
|
||||
constructor(options: RegexChunkerOptions) {
|
||||
const resolved = resolveChunkerOptions(options)
|
||||
this.chunkSize = resolved.chunkSize
|
||||
this.chunkOverlap = resolved.chunkOverlap
|
||||
this.regex = this.compilePattern(options.pattern)
|
||||
}
|
||||
|
||||
private compilePattern(pattern: string): RegExp {
|
||||
if (!pattern) {
|
||||
throw new Error('Regex pattern is required')
|
||||
}
|
||||
|
||||
if (pattern.length > MAX_PATTERN_LENGTH) {
|
||||
throw new Error(`Regex pattern exceeds maximum length of ${MAX_PATTERN_LENGTH} characters`)
|
||||
}
|
||||
|
||||
try {
|
||||
const regex = new RegExp(pattern, 'g')
|
||||
|
||||
const testStrings = [
|
||||
'a'.repeat(10000),
|
||||
' '.repeat(10000),
|
||||
'a '.repeat(5000),
|
||||
'aB1 xY2\n'.repeat(1250),
|
||||
`${'a'.repeat(30)}!`,
|
||||
`${'a b '.repeat(25)}!`,
|
||||
]
|
||||
for (const testStr of testStrings) {
|
||||
regex.lastIndex = 0
|
||||
const start = Date.now()
|
||||
regex.test(testStr)
|
||||
const elapsed = Date.now() - start
|
||||
if (elapsed > 50) {
|
||||
throw new Error('Regex pattern appears to have catastrophic backtracking')
|
||||
}
|
||||
}
|
||||
|
||||
regex.lastIndex = 0
|
||||
return regex
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message.includes('catastrophic')) {
|
||||
throw error
|
||||
}
|
||||
throw new Error(
|
||||
`Invalid regex pattern "${pattern}": ${error instanceof Error ? error.message : String(error)}`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async chunk(content: string): Promise<Chunk[]> {
|
||||
if (!content?.trim()) {
|
||||
return []
|
||||
}
|
||||
|
||||
const cleaned = cleanText(content)
|
||||
|
||||
if (estimateTokens(cleaned) <= this.chunkSize) {
|
||||
logger.info('Content fits in single chunk')
|
||||
return buildChunks([cleaned], 0)
|
||||
}
|
||||
|
||||
this.regex.lastIndex = 0
|
||||
const segments = cleaned.split(this.regex).filter((s) => s.trim().length > 0)
|
||||
|
||||
if (segments.length <= 1) {
|
||||
logger.warn(
|
||||
'Regex pattern did not produce any splits, falling back to word-boundary splitting'
|
||||
)
|
||||
const chunkSizeChars = tokensToChars(this.chunkSize)
|
||||
let chunks = splitAtWordBoundaries(cleaned, chunkSizeChars)
|
||||
if (this.chunkOverlap > 0) {
|
||||
const overlapChars = tokensToChars(this.chunkOverlap)
|
||||
chunks = addOverlap(chunks, overlapChars)
|
||||
}
|
||||
return buildChunks(chunks, this.chunkOverlap)
|
||||
}
|
||||
|
||||
const merged = this.mergeSegments(segments)
|
||||
|
||||
let chunks = merged
|
||||
if (this.chunkOverlap > 0) {
|
||||
const overlapChars = tokensToChars(this.chunkOverlap)
|
||||
chunks = addOverlap(chunks, overlapChars)
|
||||
}
|
||||
|
||||
logger.info(`Chunked into ${chunks.length} regex-based chunks`)
|
||||
return buildChunks(chunks, this.chunkOverlap)
|
||||
}
|
||||
|
||||
private mergeSegments(segments: string[]): string[] {
|
||||
const chunks: string[] = []
|
||||
let current = ''
|
||||
|
||||
for (const segment of segments) {
|
||||
const test = current ? `${current}\n${segment}` : segment
|
||||
|
||||
if (estimateTokens(test) <= this.chunkSize) {
|
||||
current = test
|
||||
} else {
|
||||
if (current.trim()) {
|
||||
chunks.push(current.trim())
|
||||
}
|
||||
|
||||
if (estimateTokens(segment) > this.chunkSize) {
|
||||
const chunkSizeChars = tokensToChars(this.chunkSize)
|
||||
const subChunks = splitAtWordBoundaries(segment, chunkSizeChars)
|
||||
for (const sub of subChunks) {
|
||||
chunks.push(sub)
|
||||
}
|
||||
current = ''
|
||||
} else {
|
||||
current = segment
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (current.trim()) {
|
||||
chunks.push(current.trim())
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
}
|
||||
286
apps/sim/lib/chunkers/sentence-chunker.test.ts
Normal file
286
apps/sim/lib/chunkers/sentence-chunker.test.ts
Normal file
@@ -0,0 +1,286 @@
|
||||
/**
|
||||
* @vitest-environment node
|
||||
*/
|
||||
|
||||
import { loggerMock } from '@sim/testing'
|
||||
import { describe, expect, it, vi } from 'vitest'
|
||||
import { SentenceChunker } from './sentence-chunker'
|
||||
|
||||
vi.mock('@sim/logger', () => loggerMock)
|
||||
|
||||
describe('SentenceChunker', () => {
|
||||
describe('empty and whitespace input', () => {
|
||||
it.concurrent('should return empty array for empty string', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 100 })
|
||||
const chunks = await chunker.chunk('')
|
||||
expect(chunks).toEqual([])
|
||||
})
|
||||
|
||||
it.concurrent('should return empty array for whitespace-only input', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 100 })
|
||||
const chunks = await chunker.chunk(' \n\n\t ')
|
||||
expect(chunks).toEqual([])
|
||||
})
|
||||
|
||||
it.concurrent('should return empty array for null-ish content', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 100 })
|
||||
const chunks = await chunker.chunk(undefined as unknown as string)
|
||||
expect(chunks).toEqual([])
|
||||
})
|
||||
})
|
||||
|
||||
describe('small content (single chunk)', () => {
|
||||
it.concurrent('should return single chunk when content fits within chunk size', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 100 })
|
||||
const text = 'This is a short sentence. Another short one.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks).toHaveLength(1)
|
||||
expect(chunks[0].text).toBe(text)
|
||||
expect(chunks[0].tokenCount).toBe(Math.ceil(text.length / 4))
|
||||
})
|
||||
})
|
||||
|
||||
describe('sentence boundary splitting', () => {
|
||||
it.concurrent('should split text at sentence boundaries', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 20 })
|
||||
const text =
|
||||
'First sentence here. Second sentence here. Third sentence here. Fourth sentence here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
for (let i = 0; i < chunks.length - 1; i++) {
|
||||
const trimmed = chunks[i].text.trim()
|
||||
const lastChar = trimmed[trimmed.length - 1]
|
||||
expect(['.', '!', '?']).toContain(lastChar)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('abbreviation handling', () => {
|
||||
it.concurrent('should not split at common abbreviations', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 200 })
|
||||
const text = 'Mr. Smith went to Washington. He arrived on Jan. 5th.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks).toHaveLength(1)
|
||||
expect(chunks[0].text).toContain('Mr. Smith')
|
||||
expect(chunks[0].text).toContain('Jan. 5th')
|
||||
})
|
||||
|
||||
it.concurrent('should not split at Dr., Mrs., Ms., Prof., Jr., Sr., St.', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 500 })
|
||||
const text =
|
||||
'Dr. Jones and Mrs. Brown met Prof. Davis at St. Mary hospital. Jr. members joined Sr. staff in Feb. for a review.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks).toHaveLength(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe('single capital initial handling', () => {
|
||||
it.concurrent('should not split at single capital letter initials', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 200 })
|
||||
const text = 'J. K. Rowling wrote books. They are popular.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks).toHaveLength(1)
|
||||
expect(chunks[0].text).toContain('J. K. Rowling')
|
||||
})
|
||||
})
|
||||
|
||||
describe('decimal handling', () => {
|
||||
it.concurrent('should not split at decimal numbers', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 20 })
|
||||
const text = 'The value is 3.14. That is pi.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
const allText = chunks.map((c) => c.text).join(' ')
|
||||
expect(allText).toContain('3.14')
|
||||
|
||||
const largeChunker = new SentenceChunker({ chunkSize: 200 })
|
||||
const largeChunks = await largeChunker.chunk(text)
|
||||
expect(largeChunks).toHaveLength(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe('ellipsis handling', () => {
|
||||
it.concurrent('should not split at ellipsis', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 200 })
|
||||
const text = 'Wait for it... The answer is here. Done.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks).toHaveLength(1)
|
||||
expect(chunks[0].text).toContain('Wait for it...')
|
||||
})
|
||||
})
|
||||
|
||||
describe('exclamation and question marks', () => {
|
||||
it.concurrent('should split at exclamation and question marks', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 10 })
|
||||
const text = 'What is this? It is great! I agree.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
const allText = chunks.map((c) => c.text).join(' ')
|
||||
expect(allText).toContain('What is this?')
|
||||
expect(allText).toContain('It is great!')
|
||||
expect(allText).toContain('I agree.')
|
||||
})
|
||||
|
||||
it.concurrent('should treat ? and ! as sentence boundaries', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 15 })
|
||||
const text = 'What is this thing? It is really great! I strongly agree.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThanOrEqual(1)
|
||||
const allText = chunks.map((c) => c.text).join(' ')
|
||||
expect(allText).toContain('?')
|
||||
expect(allText).toContain('!')
|
||||
})
|
||||
})
|
||||
|
||||
describe('minSentencesPerChunk', () => {
|
||||
it.concurrent('should group at least minSentencesPerChunk sentences per chunk', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 100, minSentencesPerChunk: 2 })
|
||||
const text =
|
||||
'First sentence. Second sentence. Third sentence. Fourth sentence. Fifth sentence.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(0)
|
||||
expect(chunks).toHaveLength(1)
|
||||
})
|
||||
|
||||
it.concurrent('should enforce min sentences even when token limit is reached', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 6, minSentencesPerChunk: 2 })
|
||||
const text = 'Short one. Another one. Third one here. Fourth one here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
const firstChunkSentences = chunks[0].text
|
||||
.split(/(?<=[.!?])\s+/)
|
||||
.filter((s) => s.trim().length > 0)
|
||||
expect(firstChunkSentences.length).toBeGreaterThanOrEqual(2)
|
||||
})
|
||||
})
|
||||
|
||||
describe('oversized sentence fallback', () => {
|
||||
it.concurrent(
|
||||
'should chunk a single very long sentence via word-boundary splitting',
|
||||
async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 10 })
|
||||
const longSentence = `${'word '.repeat(50).trim()}.`
|
||||
const chunks = await chunker.chunk(longSentence)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
const allText = chunks.map((c) => c.text).join(' ')
|
||||
expect(allText).toContain('word')
|
||||
}
|
||||
)
|
||||
|
||||
it.concurrent('should handle oversized sentence mixed with normal sentences', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 10 })
|
||||
const longSentence = `${'word '.repeat(50).trim()}.`
|
||||
const text = `Short sentence. ${longSentence} Another short one.`
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(2)
|
||||
const allText = chunks.map((c) => c.text).join(' ')
|
||||
expect(allText).toContain('Short sentence.')
|
||||
expect(allText).toContain('Another short one.')
|
||||
})
|
||||
})
|
||||
|
||||
describe('sentence-level overlap', () => {
|
||||
it.concurrent('should include overlap from previous chunk when chunkOverlap > 0', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 15, chunkOverlap: 10 })
|
||||
const text =
|
||||
'First sentence here. Second sentence here. Third sentence here. Fourth sentence here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
if (chunks.length > 1) {
|
||||
expect(chunks[1].text.length).toBeGreaterThan(0)
|
||||
}
|
||||
})
|
||||
|
||||
it.concurrent('should not add overlap when chunkOverlap is 0', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 15, chunkOverlap: 0 })
|
||||
const text = 'First sentence here. Second sentence here. Third sentence here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
if (chunks.length > 1) {
|
||||
const chunk1End = chunks[0].text.slice(-20)
|
||||
expect(chunks[1].text.startsWith(chunk1End)).toBe(false)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('chunk metadata', () => {
|
||||
it.concurrent('should include text, tokenCount, and metadata in each chunk', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 100 })
|
||||
const text = 'This is a test sentence. Another sentence follows.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks).toHaveLength(1)
|
||||
expect(chunks[0]).toHaveProperty('text')
|
||||
expect(chunks[0]).toHaveProperty('tokenCount')
|
||||
expect(chunks[0]).toHaveProperty('metadata')
|
||||
expect(chunks[0].metadata).toHaveProperty('startIndex')
|
||||
expect(chunks[0].metadata).toHaveProperty('endIndex')
|
||||
})
|
||||
|
||||
it.concurrent('should have startIndex of 0 for the first chunk', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 10 })
|
||||
const text = 'First sentence. Second sentence. Third sentence.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks[0].metadata.startIndex).toBe(0)
|
||||
})
|
||||
|
||||
it.concurrent('should have non-negative indices for all chunks', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 10, chunkOverlap: 5 })
|
||||
const text =
|
||||
'First sentence here. Second sentence here. Third sentence here. Fourth sentence.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.metadata.startIndex).toBeGreaterThanOrEqual(0)
|
||||
expect(chunk.metadata.endIndex).toBeGreaterThanOrEqual(chunk.metadata.startIndex)
|
||||
}
|
||||
})
|
||||
|
||||
it.concurrent('should have correct tokenCount based on text length', async () => {
|
||||
const chunker = new SentenceChunker({ chunkSize: 100 })
|
||||
const text = 'Hello world test.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks[0].tokenCount).toBe(Math.ceil(text.length / 4))
|
||||
})
|
||||
})
|
||||
|
||||
describe('respects chunk size', () => {
|
||||
it.concurrent('should produce chunks within approximate token limit', async () => {
|
||||
const chunkSize = 20
|
||||
const chunker = new SentenceChunker({ chunkSize })
|
||||
const text =
|
||||
'This is the first sentence. Here is the second one. And the third sentence follows. Then comes the fourth. Finally the fifth sentence.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.tokenCount).toBeLessThanOrEqual(chunkSize * 2)
|
||||
}
|
||||
})
|
||||
|
||||
it.concurrent('should create more chunks with smaller chunk size', async () => {
|
||||
const text =
|
||||
'Sentence number one. Sentence number two. Sentence number three. Sentence number four. Sentence number five. Sentence number six.'
|
||||
|
||||
const largeChunker = new SentenceChunker({ chunkSize: 200 })
|
||||
const smallChunker = new SentenceChunker({ chunkSize: 10 })
|
||||
|
||||
const largeChunks = await largeChunker.chunk(text)
|
||||
const smallChunks = await smallChunker.chunk(text)
|
||||
|
||||
expect(smallChunks.length).toBeGreaterThan(largeChunks.length)
|
||||
})
|
||||
})
|
||||
})
|
||||
141
apps/sim/lib/chunkers/sentence-chunker.ts
Normal file
141
apps/sim/lib/chunkers/sentence-chunker.ts
Normal file
@@ -0,0 +1,141 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import type { Chunk, SentenceChunkerOptions } from '@/lib/chunkers/types'
|
||||
import {
|
||||
buildChunks,
|
||||
cleanText,
|
||||
estimateTokens,
|
||||
resolveChunkerOptions,
|
||||
splitAtWordBoundaries,
|
||||
tokensToChars,
|
||||
} from '@/lib/chunkers/utils'
|
||||
|
||||
const logger = createLogger('SentenceChunker')
|
||||
|
||||
/** Never splits mid-sentence unless a single sentence exceeds the limit. */
|
||||
export class SentenceChunker {
|
||||
private readonly chunkSize: number
|
||||
private readonly chunkOverlap: number
|
||||
private readonly minSentencesPerChunk: number
|
||||
|
||||
constructor(options: SentenceChunkerOptions = {}) {
|
||||
const resolved = resolveChunkerOptions(options)
|
||||
this.chunkSize = resolved.chunkSize
|
||||
this.chunkOverlap = resolved.chunkOverlap
|
||||
this.minSentencesPerChunk = options.minSentencesPerChunk ?? 1
|
||||
}
|
||||
|
||||
/** Splits on sentence boundaries while avoiding abbreviations, decimals, and ellipses. */
|
||||
private splitSentences(text: string): string[] {
|
||||
return text
|
||||
.split(
|
||||
/(?<!\b(?:Mr|Mrs|Ms|Dr|Prof|Sr|Jr|St|Rev|Gen|Sgt|Capt|Lt|Col|Maj|No|Fig|Vol|Ch|vs|etc|Inc|Ltd|Corp|Co|approx|dept|est|govt|Ave|Blvd|Rd|Jan|Feb|Mar|Apr|Jun|Jul|Aug|Sep|Oct|Nov|Dec|i\.e|e\.g)\.)(?<![A-Z]\.)(?<!\.\.)(?<!\d\.)(?<=[.!?])\s+/
|
||||
)
|
||||
.filter((s) => s.trim().length > 0)
|
||||
}
|
||||
|
||||
async chunk(content: string): Promise<Chunk[]> {
|
||||
if (!content?.trim()) {
|
||||
return []
|
||||
}
|
||||
|
||||
const cleaned = cleanText(content)
|
||||
const sentences = this.splitSentences(cleaned)
|
||||
|
||||
if (sentences.length === 0) {
|
||||
return []
|
||||
}
|
||||
|
||||
if (estimateTokens(cleaned) <= this.chunkSize) {
|
||||
logger.info('Content fits in single chunk')
|
||||
return buildChunks([cleaned], 0)
|
||||
}
|
||||
|
||||
const chunkSentenceGroups: string[][] = []
|
||||
let currentGroup: string[] = []
|
||||
let currentTokens = 0
|
||||
const chunkSizeChars = tokensToChars(this.chunkSize)
|
||||
|
||||
for (const sentence of sentences) {
|
||||
const sentenceTokens = estimateTokens(sentence)
|
||||
|
||||
if (sentenceTokens > this.chunkSize) {
|
||||
if (currentGroup.length > 0) {
|
||||
chunkSentenceGroups.push(currentGroup)
|
||||
currentGroup = []
|
||||
currentTokens = 0
|
||||
}
|
||||
const parts = splitAtWordBoundaries(sentence, chunkSizeChars)
|
||||
for (const part of parts) {
|
||||
chunkSentenceGroups.push([part])
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
const wouldExceed = currentTokens + sentenceTokens > this.chunkSize
|
||||
const hasMinSentences = currentGroup.length >= this.minSentencesPerChunk
|
||||
|
||||
if (wouldExceed && hasMinSentences) {
|
||||
chunkSentenceGroups.push(currentGroup)
|
||||
currentGroup = [sentence]
|
||||
currentTokens = sentenceTokens
|
||||
} else {
|
||||
currentGroup.push(sentence)
|
||||
currentTokens += sentenceTokens
|
||||
}
|
||||
}
|
||||
|
||||
if (currentGroup.length > 0) {
|
||||
chunkSentenceGroups.push(currentGroup)
|
||||
}
|
||||
|
||||
const rawChunks = this.applyOverlapFromGroups(chunkSentenceGroups)
|
||||
|
||||
logger.info(`Chunked into ${rawChunks.length} sentence-based chunks`)
|
||||
return buildChunks(rawChunks, this.chunkOverlap)
|
||||
}
|
||||
|
||||
/** Applies overlap at the sentence level using original groups to avoid re-splitting. */
|
||||
private applyOverlapFromGroups(groups: string[][]): string[] {
|
||||
if (this.chunkOverlap <= 0 || groups.length <= 1) {
|
||||
return groups.map((g) => g.join(' '))
|
||||
}
|
||||
|
||||
const overlapChars = tokensToChars(this.chunkOverlap)
|
||||
const result: string[] = []
|
||||
|
||||
for (let i = 0; i < groups.length; i++) {
|
||||
if (i === 0) {
|
||||
result.push(groups[i].join(' '))
|
||||
continue
|
||||
}
|
||||
|
||||
const prevGroup = groups[i - 1]
|
||||
const overlapSentences: string[] = []
|
||||
let overlapLen = 0
|
||||
|
||||
for (let j = prevGroup.length - 1; j >= 0; j--) {
|
||||
if (overlapLen + prevGroup[j].length > overlapChars) break
|
||||
overlapSentences.unshift(prevGroup[j])
|
||||
overlapLen += prevGroup[j].length
|
||||
}
|
||||
|
||||
const currentText = groups[i].join(' ')
|
||||
if (overlapSentences.length > 0) {
|
||||
result.push(`${overlapSentences.join(' ')} ${currentText}`)
|
||||
} else {
|
||||
// No complete sentence fits — fall back to character-level overlap
|
||||
const prevText = prevGroup.join(' ')
|
||||
const tail = prevText.slice(-overlapChars)
|
||||
const wordMatch = tail.match(/^\s*\S/)
|
||||
const cleanTail = wordMatch ? tail.slice(tail.indexOf(wordMatch[0].trim())) : tail
|
||||
if (cleanTail.trim()) {
|
||||
result.push(`${cleanTail.trim()} ${currentText}`)
|
||||
} else {
|
||||
result.push(currentText)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
}
|
||||
@@ -11,19 +11,16 @@ vi.mock('@sim/logger', () => loggerMock)
|
||||
describe('StructuredDataChunker', () => {
|
||||
describe('isStructuredData', () => {
|
||||
it('should detect CSV content with many columns', () => {
|
||||
// Detection requires >2 delimiters per line on average
|
||||
const csv = 'name,age,city,country\nAlice,30,NYC,USA\nBob,25,LA,USA'
|
||||
expect(StructuredDataChunker.isStructuredData(csv)).toBe(true)
|
||||
})
|
||||
|
||||
it('should detect TSV content with many columns', () => {
|
||||
// Detection requires >2 delimiters per line on average
|
||||
const tsv = 'name\tage\tcity\tcountry\nAlice\t30\tNYC\tUSA\nBob\t25\tLA\tUSA'
|
||||
expect(StructuredDataChunker.isStructuredData(tsv)).toBe(true)
|
||||
})
|
||||
|
||||
it('should detect pipe-delimited content with many columns', () => {
|
||||
// Detection requires >2 delimiters per line on average
|
||||
const piped = 'name|age|city|country\nAlice|30|NYC|USA\nBob|25|LA|USA'
|
||||
expect(StructuredDataChunker.isStructuredData(piped)).toBe(true)
|
||||
})
|
||||
@@ -64,7 +61,6 @@ describe('StructuredDataChunker', () => {
|
||||
|
||||
it('should handle inconsistent delimiter counts', () => {
|
||||
const inconsistent = 'name,age\nAlice,30,extra\nBob'
|
||||
// May or may not detect as structured depending on variance threshold
|
||||
const result = StructuredDataChunker.isStructuredData(inconsistent)
|
||||
expect(typeof result).toBe('boolean')
|
||||
})
|
||||
@@ -100,7 +96,7 @@ Bob,25`
|
||||
const chunks = await StructuredDataChunker.chunkStructuredData(csv)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(0)
|
||||
expect(chunks[0].text).toContain('Rows')
|
||||
expect(chunks[0].text).toContain('rows of data')
|
||||
})
|
||||
|
||||
it.concurrent('should include sheet name when provided', async () => {
|
||||
@@ -184,7 +180,6 @@ Alice,30`
|
||||
const csv = 'name,age,city'
|
||||
const chunks = await StructuredDataChunker.chunkStructuredData(csv)
|
||||
|
||||
// Only header, no data rows
|
||||
expect(chunks.length).toBeGreaterThanOrEqual(0)
|
||||
})
|
||||
|
||||
@@ -271,9 +266,8 @@ Alice,30`
|
||||
const chunks = await StructuredDataChunker.chunkStructuredData(csv, { chunkSize: 500 })
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
// Verify total rows are distributed across chunks
|
||||
const totalRowCount = chunks.reduce((sum, chunk) => {
|
||||
const match = chunk.text.match(/\[Rows (\d+) of data\]/)
|
||||
const match = chunk.text.match(/\[(\d+) rows of data\]/)
|
||||
return sum + (match ? Number.parseInt(match[1]) : 0)
|
||||
}, 0)
|
||||
expect(totalRowCount).toBeGreaterThan(0)
|
||||
@@ -319,9 +313,7 @@ Alice,30`
|
||||
it.concurrent('should not detect with fewer than 3 delimiters per line', async () => {
|
||||
const sparse = `a,b
|
||||
1,2`
|
||||
// Only 1 comma per line, below threshold of >2
|
||||
const result = StructuredDataChunker.isStructuredData(sparse)
|
||||
// May or may not pass depending on implementation threshold
|
||||
expect(typeof result).toBe('boolean')
|
||||
})
|
||||
})
|
||||
@@ -337,7 +329,6 @@ Alice,30`
|
||||
const chunks = await StructuredDataChunker.chunkStructuredData(csv, { chunkSize: 200 })
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
// Each chunk should contain header info
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.text).toContain('Headers:')
|
||||
}
|
||||
|
||||
@@ -1,37 +1,22 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import type { Chunk, StructuredDataOptions } from '@/lib/chunkers/types'
|
||||
|
||||
const logger = createLogger('StructuredDataChunker')
|
||||
|
||||
/**
|
||||
* Default configuration for structured data chunking (CSV, XLSX, etc.)
|
||||
* These are used when user doesn't provide preferences
|
||||
*/
|
||||
const DEFAULT_CONFIG = {
|
||||
// Target chunk size in tokens
|
||||
TARGET_CHUNK_SIZE: 1024,
|
||||
MIN_CHUNK_SIZE: 100,
|
||||
MAX_CHUNK_SIZE: 4000,
|
||||
|
||||
// For spreadsheets, group rows together
|
||||
ROWS_PER_CHUNK: 100,
|
||||
MIN_ROWS_PER_CHUNK: 20,
|
||||
MAX_ROWS_PER_CHUNK: 500,
|
||||
|
||||
// For better embeddings quality
|
||||
INCLUDE_HEADERS_IN_EACH_CHUNK: true,
|
||||
MAX_HEADER_SIZE: 200, // tokens
|
||||
/** Structured data is denser in tokens (~3 chars/token vs ~4 for prose) */
|
||||
function estimateStructuredTokens(text: string): number {
|
||||
if (!text?.trim()) return 0
|
||||
return Math.ceil(text.length / 3)
|
||||
}
|
||||
|
||||
/**
|
||||
* Smart chunker for structured data (CSV, XLSX) that preserves semantic meaning
|
||||
* Preserves headers in each chunk for better semantic context
|
||||
*/
|
||||
const logger = createLogger('StructuredDataChunker')
|
||||
|
||||
const DEFAULT_CONFIG = {
|
||||
TARGET_CHUNK_SIZE: 1024,
|
||||
MIN_ROWS_PER_CHUNK: 5,
|
||||
MAX_ROWS_PER_CHUNK: 500,
|
||||
INCLUDE_HEADERS_IN_EACH_CHUNK: true,
|
||||
} as const
|
||||
|
||||
export class StructuredDataChunker {
|
||||
/**
|
||||
* Chunk structured data intelligently based on rows and semantic boundaries
|
||||
* Respects user's chunkSize preference when provided
|
||||
*/
|
||||
static async chunkStructuredData(
|
||||
content: string,
|
||||
options: StructuredDataOptions = {}
|
||||
@@ -43,15 +28,12 @@ export class StructuredDataChunker {
|
||||
return chunks
|
||||
}
|
||||
|
||||
// Use user's chunk size or fall back to default
|
||||
const targetChunkSize = options.chunkSize ?? DEFAULT_CONFIG.TARGET_CHUNK_SIZE
|
||||
|
||||
// Detect headers (first line or provided)
|
||||
const headerLine = options.headers?.join('\t') || lines[0]
|
||||
const dataStartIndex = options.headers ? 0 : 1
|
||||
|
||||
// Calculate optimal rows per chunk based on content and user's target size
|
||||
const estimatedTokensPerRow = StructuredDataChunker.estimateTokensPerRow(
|
||||
const estimatedTokensPerRow = StructuredDataChunker.estimateStructuredTokensPerRow(
|
||||
lines.slice(dataStartIndex, Math.min(10, lines.length))
|
||||
)
|
||||
const optimalRowsPerChunk = StructuredDataChunker.calculateOptimalRowsPerChunk(
|
||||
@@ -65,14 +47,13 @@ export class StructuredDataChunker {
|
||||
|
||||
let currentChunkRows: string[] = []
|
||||
let currentTokenEstimate = 0
|
||||
const headerTokens = StructuredDataChunker.estimateTokens(headerLine)
|
||||
const headerTokens = estimateStructuredTokens(headerLine)
|
||||
let chunkStartRow = dataStartIndex
|
||||
|
||||
for (let i = dataStartIndex; i < lines.length; i++) {
|
||||
const row = lines[i]
|
||||
const rowTokens = StructuredDataChunker.estimateTokens(row)
|
||||
const rowTokens = estimateStructuredTokens(row)
|
||||
|
||||
// Check if adding this row would exceed our target
|
||||
const projectedTokens =
|
||||
currentTokenEstimate +
|
||||
rowTokens +
|
||||
@@ -84,7 +65,6 @@ export class StructuredDataChunker {
|
||||
currentChunkRows.length >= optimalRowsPerChunk
|
||||
|
||||
if (shouldCreateChunk && currentChunkRows.length > 0) {
|
||||
// Create chunk with current rows
|
||||
const chunkContent = StructuredDataChunker.formatChunk(
|
||||
headerLine,
|
||||
currentChunkRows,
|
||||
@@ -92,7 +72,6 @@ export class StructuredDataChunker {
|
||||
)
|
||||
chunks.push(StructuredDataChunker.createChunk(chunkContent, chunkStartRow, i - 1))
|
||||
|
||||
// Reset for next chunk
|
||||
currentChunkRows = []
|
||||
currentTokenEstimate = 0
|
||||
chunkStartRow = i
|
||||
@@ -102,7 +81,6 @@ export class StructuredDataChunker {
|
||||
currentTokenEstimate += rowTokens
|
||||
}
|
||||
|
||||
// Add remaining rows as final chunk
|
||||
if (currentChunkRows.length > 0) {
|
||||
const chunkContent = StructuredDataChunker.formatChunk(
|
||||
headerLine,
|
||||
@@ -117,41 +95,28 @@ export class StructuredDataChunker {
|
||||
return chunks
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a chunk with headers and context
|
||||
*/
|
||||
private static formatChunk(headerLine: string, rows: string[], sheetName?: string): string {
|
||||
let content = ''
|
||||
|
||||
// Add sheet name context if available
|
||||
if (sheetName) {
|
||||
content += `=== ${sheetName} ===\n\n`
|
||||
}
|
||||
|
||||
// Add headers for context
|
||||
if (DEFAULT_CONFIG.INCLUDE_HEADERS_IN_EACH_CHUNK) {
|
||||
content += `Headers: ${headerLine}\n`
|
||||
content += `${'-'.repeat(Math.min(80, headerLine.length))}\n`
|
||||
}
|
||||
|
||||
// Add data rows
|
||||
content += rows.join('\n')
|
||||
|
||||
// Add row count for context
|
||||
content += `\n\n[Rows ${rows.length} of data]`
|
||||
content += `\n\n[${rows.length} rows of data]`
|
||||
|
||||
return content
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a chunk object with actual row indices
|
||||
*/
|
||||
private static createChunk(content: string, startRow: number, endRow: number): Chunk {
|
||||
const tokenCount = StructuredDataChunker.estimateTokens(content)
|
||||
|
||||
return {
|
||||
text: content,
|
||||
tokenCount,
|
||||
tokenCount: estimateStructuredTokens(content),
|
||||
metadata: {
|
||||
startIndex: startRow,
|
||||
endIndex: endRow,
|
||||
@@ -159,30 +124,13 @@ export class StructuredDataChunker {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate tokens in text (rough approximation)
|
||||
* For structured data with numbers, uses 1 token per 3 characters
|
||||
*/
|
||||
private static estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / 3)
|
||||
}
|
||||
private static estimateStructuredTokensPerRow(sampleRows: string[]): number {
|
||||
if (sampleRows.length === 0) return 50
|
||||
|
||||
/**
|
||||
* Estimate average tokens per row from sample
|
||||
*/
|
||||
private static estimateTokensPerRow(sampleRows: string[]): number {
|
||||
if (sampleRows.length === 0) return 50 // default estimate
|
||||
|
||||
const totalTokens = sampleRows.reduce(
|
||||
(sum, row) => sum + StructuredDataChunker.estimateTokens(row),
|
||||
0
|
||||
)
|
||||
const totalTokens = sampleRows.reduce((sum, row) => sum + estimateStructuredTokens(row), 0)
|
||||
return Math.ceil(totalTokens / sampleRows.length)
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate optimal rows per chunk based on token estimates and target size
|
||||
*/
|
||||
private static calculateOptimalRowsPerChunk(
|
||||
tokensPerRow: number,
|
||||
targetChunkSize: number
|
||||
@@ -195,11 +143,7 @@ export class StructuredDataChunker {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if content appears to be structured data
|
||||
*/
|
||||
static isStructuredData(content: string, mimeType?: string): boolean {
|
||||
// Check mime type first
|
||||
if (mimeType) {
|
||||
const structuredMimeTypes = [
|
||||
'text/csv',
|
||||
@@ -212,20 +156,17 @@ export class StructuredDataChunker {
|
||||
}
|
||||
}
|
||||
|
||||
// Check content structure
|
||||
const lines = content.split('\n').slice(0, 10) // Check first 10 lines
|
||||
const lines = content.split('\n').slice(0, 10)
|
||||
if (lines.length < 2) return false
|
||||
|
||||
// Check for consistent delimiters (comma, tab, pipe)
|
||||
const delimiters = [',', '\t', '|']
|
||||
for (const delimiter of delimiters) {
|
||||
const counts = lines.map(
|
||||
(line) => (line.match(new RegExp(`\\${delimiter}`, 'g')) || []).length
|
||||
)
|
||||
const escaped = delimiter.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
||||
const counts = lines.map((line) => (line.match(new RegExp(escaped, 'g')) || []).length)
|
||||
const avgCount = counts.reduce((a, b) => a + b, 0) / counts.length
|
||||
|
||||
// If most lines have similar delimiter counts, it's likely structured
|
||||
if (avgCount > 2 && counts.every((c) => Math.abs(c - avgCount) <= 2)) {
|
||||
const tolerance = Math.max(1, Math.ceil(avgCount * 0.2))
|
||||
if (avgCount > 2 && counts.every((c) => Math.abs(c - avgCount) <= tolerance)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ describe('TextChunker', () => {
|
||||
|
||||
it.concurrent('should include token count in chunk metadata', async () => {
|
||||
const chunker = new TextChunker({ chunkSize: 100 })
|
||||
const text = 'Hello world' // ~3 tokens (11 chars / 4)
|
||||
const text = 'Hello world'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks[0].tokenCount).toBe(3)
|
||||
@@ -201,7 +201,6 @@ describe('TextChunker', () => {
|
||||
|
||||
it.concurrent('should use default minCharactersPerChunk of 100', async () => {
|
||||
const chunker = new TextChunker({ chunkSize: 10 })
|
||||
// Text with 150+ characters to ensure chunks pass the 100 character minimum
|
||||
const text = 'This is a longer sentence with more content. '.repeat(5)
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
@@ -266,7 +265,6 @@ describe('TextChunker', () => {
|
||||
describe('boundary conditions', () => {
|
||||
it.concurrent('should handle text exactly at chunk size boundary', async () => {
|
||||
const chunker = new TextChunker({ chunkSize: 10 })
|
||||
// 40 characters = 10 tokens exactly
|
||||
const text = 'A'.repeat(40)
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
@@ -276,7 +274,6 @@ describe('TextChunker', () => {
|
||||
|
||||
it.concurrent('should handle text one token over chunk size', async () => {
|
||||
const chunker = new TextChunker({ chunkSize: 10 })
|
||||
// 44 characters = 11 tokens, just over limit
|
||||
const text = 'A'.repeat(44)
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
@@ -300,7 +297,6 @@ describe('TextChunker', () => {
|
||||
})
|
||||
|
||||
it.concurrent('should clamp overlap to max 50% of chunk size', async () => {
|
||||
// Overlap of 60 should be clamped to 10 (50% of chunkSize 20)
|
||||
const chunker = new TextChunker({ chunkSize: 20, chunkOverlap: 60 })
|
||||
const text = 'First paragraph here.\n\nSecond paragraph here.\n\nThird paragraph here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
@@ -359,7 +355,6 @@ describe('TextChunker', () => {
|
||||
|
||||
it.concurrent('should handle combining diacritics', async () => {
|
||||
const chunker = new TextChunker({ chunkSize: 100 })
|
||||
// e + combining acute accent
|
||||
const text = 'cafe\u0301 resume\u0301 naive\u0308'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
@@ -368,7 +363,6 @@ describe('TextChunker', () => {
|
||||
|
||||
it.concurrent('should handle zero-width characters', async () => {
|
||||
const chunker = new TextChunker({ chunkSize: 100 })
|
||||
// Zero-width space, zero-width non-joiner, zero-width joiner
|
||||
const text = 'Hello\u200B\u200C\u200DWorld'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
@@ -391,14 +385,12 @@ describe('TextChunker', () => {
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
// Verify all content is preserved
|
||||
const totalChars = chunks.reduce((sum, c) => sum + c.text.length, 0)
|
||||
expect(totalChars).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
it.concurrent('should handle 1MB of text', async () => {
|
||||
const chunker = new TextChunker({ chunkSize: 500 })
|
||||
// 1MB of text
|
||||
const text = 'Lorem ipsum dolor sit amet. '.repeat(40000)
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
@@ -407,7 +399,6 @@ describe('TextChunker', () => {
|
||||
|
||||
it.concurrent('should handle very long single line', async () => {
|
||||
const chunker = new TextChunker({ chunkSize: 50 })
|
||||
// Single line with no natural break points
|
||||
const text = 'Word'.repeat(10000)
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
|
||||
@@ -1,99 +1,61 @@
|
||||
import type { Chunk, ChunkerOptions } from '@/lib/chunkers/types'
|
||||
import {
|
||||
addOverlap,
|
||||
buildChunks,
|
||||
cleanText,
|
||||
estimateTokens,
|
||||
resolveChunkerOptions,
|
||||
splitAtWordBoundaries,
|
||||
tokensToChars,
|
||||
} from '@/lib/chunkers/utils'
|
||||
|
||||
/**
|
||||
* Lightweight text chunker optimized for RAG applications
|
||||
* Uses hierarchical splitting with simple character-based token estimation
|
||||
*
|
||||
* Parameters:
|
||||
* - chunkSize: Maximum chunk size in TOKENS (default: 1024)
|
||||
* - chunkOverlap: Overlap between chunks in TOKENS (default: 0)
|
||||
* - minCharactersPerChunk: Minimum characters to keep a chunk (default: 100)
|
||||
*/
|
||||
export class TextChunker {
|
||||
private readonly chunkSize: number // Max chunk size in tokens
|
||||
private readonly chunkOverlap: number // Overlap in tokens
|
||||
private readonly minCharactersPerChunk: number // Min characters per chunk
|
||||
private readonly chunkSize: number
|
||||
private readonly chunkOverlap: number
|
||||
|
||||
// Hierarchical separators ordered from largest to smallest semantic units
|
||||
private readonly separators = [
|
||||
'\n\n\n', // Document sections
|
||||
'\n---\n', // Markdown horizontal rules
|
||||
'\n***\n', // Markdown horizontal rules (alternative)
|
||||
'\n___\n', // Markdown horizontal rules (alternative)
|
||||
'\n# ', // Markdown H1 headings
|
||||
'\n## ', // Markdown H2 headings
|
||||
'\n### ', // Markdown H3 headings
|
||||
'\n#### ', // Markdown H4 headings
|
||||
'\n##### ', // Markdown H5 headings
|
||||
'\n###### ', // Markdown H6 headings
|
||||
'\n\n', // Paragraphs
|
||||
'\n', // Lines
|
||||
'. ', // Sentences
|
||||
'! ', // Exclamations
|
||||
'? ', // Questions
|
||||
'; ', // Semicolons
|
||||
', ', // Commas
|
||||
' ', // Words
|
||||
'\n---\n',
|
||||
'\n***\n',
|
||||
'\n___\n',
|
||||
'\n# ',
|
||||
'\n## ',
|
||||
'\n### ',
|
||||
'\n#### ',
|
||||
'\n##### ',
|
||||
'\n###### ',
|
||||
'\n\n',
|
||||
'\n',
|
||||
'. ',
|
||||
'! ',
|
||||
'? ',
|
||||
'; ',
|
||||
', ',
|
||||
' ',
|
||||
]
|
||||
|
||||
constructor(options: ChunkerOptions = {}) {
|
||||
this.chunkSize = options.chunkSize ?? 1024
|
||||
// Clamp overlap to prevent exceeding chunk size (max 50% of chunk size)
|
||||
const maxOverlap = Math.floor(this.chunkSize * 0.5)
|
||||
this.chunkOverlap = Math.min(options.chunkOverlap ?? 0, maxOverlap)
|
||||
this.minCharactersPerChunk = options.minCharactersPerChunk ?? 100
|
||||
const resolved = resolveChunkerOptions(options)
|
||||
this.chunkSize = resolved.chunkSize
|
||||
this.chunkOverlap = resolved.chunkOverlap
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple token estimation using character count
|
||||
* 1 token ≈ 4 characters for English text
|
||||
*/
|
||||
private estimateTokens(text: string): number {
|
||||
if (!text?.trim()) return 0
|
||||
return Math.ceil(text.length / 4)
|
||||
}
|
||||
private splitRecursively(text: string, separatorIndex = 0): string[] {
|
||||
const tokenCount = estimateTokens(text)
|
||||
|
||||
/**
|
||||
* Convert tokens to approximate character count
|
||||
*/
|
||||
private tokensToChars(tokens: number): number {
|
||||
return tokens * 4
|
||||
}
|
||||
|
||||
/**
|
||||
* Split text recursively using hierarchical separators
|
||||
*/
|
||||
private async splitRecursively(text: string, separatorIndex = 0): Promise<string[]> {
|
||||
const tokenCount = this.estimateTokens(text)
|
||||
|
||||
// If chunk is small enough (within max token limit), return it
|
||||
// Keep chunks even if below minCharactersPerChunk to avoid data loss
|
||||
if (tokenCount <= this.chunkSize) {
|
||||
// Only filter out empty/whitespace-only text, not small chunks
|
||||
return text.trim() ? [text] : []
|
||||
}
|
||||
|
||||
// If we've run out of separators, force split by character count
|
||||
if (separatorIndex >= this.separators.length) {
|
||||
const chunks: string[] = []
|
||||
const targetLength = Math.ceil((text.length * this.chunkSize) / tokenCount)
|
||||
|
||||
for (let i = 0; i < text.length; i += targetLength) {
|
||||
const chunk = text.slice(i, i + targetLength).trim()
|
||||
// Keep all non-empty chunks to avoid data loss
|
||||
if (chunk) {
|
||||
chunks.push(chunk)
|
||||
}
|
||||
}
|
||||
return chunks
|
||||
const chunkSizeChars = tokensToChars(this.chunkSize)
|
||||
return splitAtWordBoundaries(text, chunkSizeChars)
|
||||
}
|
||||
|
||||
const separator = this.separators[separatorIndex]
|
||||
const parts = text.split(separator).filter((part) => part.trim())
|
||||
|
||||
// If no split occurred, try next separator
|
||||
if (parts.length <= 1) {
|
||||
return await this.splitRecursively(text, separatorIndex + 1)
|
||||
return this.splitRecursively(text, separatorIndex + 1)
|
||||
}
|
||||
|
||||
const chunks: string[] = []
|
||||
@@ -102,17 +64,15 @@ export class TextChunker {
|
||||
for (const part of parts) {
|
||||
const testChunk = currentChunk + (currentChunk ? separator : '') + part
|
||||
|
||||
if (this.estimateTokens(testChunk) <= this.chunkSize) {
|
||||
if (estimateTokens(testChunk) <= this.chunkSize) {
|
||||
currentChunk = testChunk
|
||||
} else {
|
||||
// Save current chunk - keep even if below minCharactersPerChunk to avoid data loss
|
||||
if (currentChunk.trim()) {
|
||||
chunks.push(currentChunk.trim())
|
||||
}
|
||||
|
||||
// If part itself is too large, split it further
|
||||
if (this.estimateTokens(part) > this.chunkSize) {
|
||||
const subChunks = await this.splitRecursively(part, separatorIndex + 1)
|
||||
if (estimateTokens(part) > this.chunkSize) {
|
||||
const subChunks = this.splitRecursively(part, separatorIndex + 1)
|
||||
for (const subChunk of subChunks) {
|
||||
chunks.push(subChunk)
|
||||
}
|
||||
@@ -123,7 +83,6 @@ export class TextChunker {
|
||||
}
|
||||
}
|
||||
|
||||
// Add final chunk if it exists - keep even if below minCharactersPerChunk to avoid data loss
|
||||
if (currentChunk.trim()) {
|
||||
chunks.push(currentChunk.trim())
|
||||
}
|
||||
@@ -131,111 +90,19 @@ export class TextChunker {
|
||||
return chunks
|
||||
}
|
||||
|
||||
/**
|
||||
* Add overlap between chunks (overlap is in tokens, converted to characters)
|
||||
*/
|
||||
private addOverlap(chunks: string[]): string[] {
|
||||
if (this.chunkOverlap <= 0 || chunks.length <= 1) {
|
||||
return chunks
|
||||
}
|
||||
|
||||
const overlappedChunks: string[] = []
|
||||
// Convert token overlap to character overlap
|
||||
const overlapChars = this.tokensToChars(this.chunkOverlap)
|
||||
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
let chunk = chunks[i]
|
||||
|
||||
// Add overlap from previous chunk (converted from tokens to characters)
|
||||
if (i > 0) {
|
||||
const prevChunk = chunks[i - 1]
|
||||
// Take the last N characters from previous chunk (based on token overlap)
|
||||
const overlapLength = Math.min(overlapChars, prevChunk.length)
|
||||
const overlapText = prevChunk.slice(-overlapLength)
|
||||
|
||||
// Try to start overlap at a word boundary for cleaner text
|
||||
const wordBoundaryMatch = overlapText.match(/^\s*\S/)
|
||||
const cleanOverlap = wordBoundaryMatch
|
||||
? overlapText.slice(overlapText.indexOf(wordBoundaryMatch[0].trim()))
|
||||
: overlapText
|
||||
|
||||
if (cleanOverlap.trim()) {
|
||||
chunk = `${cleanOverlap.trim()} ${chunk}`
|
||||
}
|
||||
}
|
||||
|
||||
overlappedChunks.push(chunk)
|
||||
}
|
||||
|
||||
return overlappedChunks
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean and normalize text
|
||||
*/
|
||||
private cleanText(text: string): string {
|
||||
return text
|
||||
.replace(/\r\n/g, '\n') // Normalize Windows line endings
|
||||
.replace(/\r/g, '\n') // Normalize old Mac line endings
|
||||
.replace(/\n{3,}/g, '\n\n') // Limit consecutive newlines
|
||||
.replace(/\t/g, ' ') // Convert tabs to spaces
|
||||
.replace(/ {2,}/g, ' ') // Collapse multiple spaces
|
||||
.trim()
|
||||
}
|
||||
|
||||
/**
|
||||
* Main chunking method
|
||||
*/
|
||||
async chunk(text: string): Promise<Chunk[]> {
|
||||
if (!text?.trim()) {
|
||||
return []
|
||||
}
|
||||
|
||||
// Clean the text
|
||||
const cleanedText = this.cleanText(text)
|
||||
const cleaned = cleanText(text)
|
||||
let chunks = this.splitRecursively(cleaned)
|
||||
|
||||
// Split into chunks
|
||||
let chunks = await this.splitRecursively(cleanedText)
|
||||
if (this.chunkOverlap > 0) {
|
||||
const overlapChars = tokensToChars(this.chunkOverlap)
|
||||
chunks = addOverlap(chunks, overlapChars)
|
||||
}
|
||||
|
||||
// Add overlap if configured
|
||||
chunks = this.addOverlap(chunks)
|
||||
|
||||
// Convert to Chunk objects with metadata
|
||||
let previousEndIndex = 0
|
||||
const chunkPromises = chunks.map(async (chunkText, index) => {
|
||||
let startIndex: number
|
||||
let actualContentLength: number
|
||||
|
||||
if (index === 0 || this.chunkOverlap <= 0) {
|
||||
// First chunk or no overlap - start from previous end
|
||||
startIndex = previousEndIndex
|
||||
actualContentLength = chunkText.length
|
||||
} else {
|
||||
// Calculate overlap length in characters (converted from tokens)
|
||||
const prevChunk = chunks[index - 1]
|
||||
const overlapChars = this.tokensToChars(this.chunkOverlap)
|
||||
const overlapLength = Math.min(overlapChars, prevChunk.length, chunkText.length)
|
||||
|
||||
startIndex = previousEndIndex - overlapLength
|
||||
actualContentLength = chunkText.length - overlapLength
|
||||
}
|
||||
|
||||
const safeStart = Math.max(0, startIndex)
|
||||
const endIndexSafe = safeStart + Math.max(0, actualContentLength)
|
||||
|
||||
const chunk: Chunk = {
|
||||
text: chunkText,
|
||||
tokenCount: this.estimateTokens(chunkText),
|
||||
metadata: {
|
||||
startIndex: safeStart,
|
||||
endIndex: endIndexSafe,
|
||||
},
|
||||
}
|
||||
|
||||
previousEndIndex = endIndexSafe
|
||||
return chunk
|
||||
})
|
||||
|
||||
return await Promise.all(chunkPromises)
|
||||
return buildChunks(chunks, this.chunkOverlap)
|
||||
}
|
||||
}
|
||||
|
||||
239
apps/sim/lib/chunkers/token-chunker.test.ts
Normal file
239
apps/sim/lib/chunkers/token-chunker.test.ts
Normal file
@@ -0,0 +1,239 @@
|
||||
/**
|
||||
* @vitest-environment node
|
||||
*/
|
||||
|
||||
import { loggerMock } from '@sim/testing'
|
||||
import { describe, expect, it, vi } from 'vitest'
|
||||
import { TokenChunker } from './token-chunker'
|
||||
|
||||
vi.mock('@sim/logger', () => loggerMock)
|
||||
|
||||
describe('TokenChunker', () => {
|
||||
describe('empty and whitespace input', () => {
|
||||
it.concurrent('should return empty array for empty string', async () => {
|
||||
const chunker = new TokenChunker({ chunkSize: 100 })
|
||||
const chunks = await chunker.chunk('')
|
||||
expect(chunks).toEqual([])
|
||||
})
|
||||
|
||||
it.concurrent('should return empty array for whitespace-only input', async () => {
|
||||
const chunker = new TokenChunker({ chunkSize: 100 })
|
||||
const chunks = await chunker.chunk(' \n\n\t ')
|
||||
expect(chunks).toEqual([])
|
||||
})
|
||||
})
|
||||
|
||||
describe('small content', () => {
|
||||
it.concurrent('should return single chunk when content fits within chunkSize', async () => {
|
||||
const chunker = new TokenChunker({ chunkSize: 100 })
|
||||
const text = 'This is a short text.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks).toHaveLength(1)
|
||||
expect(chunks[0].text).toBe(text)
|
||||
})
|
||||
})
|
||||
|
||||
describe('token count accuracy', () => {
|
||||
it.concurrent('should compute tokenCount as Math.ceil(text.length / 4)', async () => {
|
||||
const chunker = new TokenChunker({ chunkSize: 100 })
|
||||
const text = 'Hello world'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks[0].tokenCount).toBe(Math.ceil(text.length / 4))
|
||||
})
|
||||
|
||||
it.concurrent('should compute tokenCount correctly for longer text', async () => {
|
||||
const chunker = new TokenChunker({ chunkSize: 100 })
|
||||
const text = 'The quick brown fox jumps over the lazy dog.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks[0].tokenCount).toBe(11)
|
||||
})
|
||||
})
|
||||
|
||||
describe('chunk metadata', () => {
|
||||
it.concurrent(
|
||||
'should include text, tokenCount, and metadata with startIndex and endIndex',
|
||||
async () => {
|
||||
const chunker = new TokenChunker({ chunkSize: 100 })
|
||||
const text = 'Some test content here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks[0]).toHaveProperty('text')
|
||||
expect(chunks[0]).toHaveProperty('tokenCount')
|
||||
expect(chunks[0].metadata).toHaveProperty('startIndex')
|
||||
expect(chunks[0].metadata).toHaveProperty('endIndex')
|
||||
expect(chunks[0].metadata.startIndex).toBe(0)
|
||||
expect(chunks[0].metadata.endIndex).toBeGreaterThan(0)
|
||||
}
|
||||
)
|
||||
|
||||
it.concurrent('should have non-negative indices across all chunks', async () => {
|
||||
const chunker = new TokenChunker({ chunkSize: 20, chunkOverlap: 0 })
|
||||
const text = 'First part of the text. Second part of the text. Third part of the text.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.metadata.startIndex).toBeGreaterThanOrEqual(0)
|
||||
expect(chunk.metadata.endIndex).toBeGreaterThanOrEqual(chunk.metadata.startIndex)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('respects chunk size', () => {
|
||||
it.concurrent('should not produce chunks exceeding chunkSize tokens', async () => {
|
||||
const chunkSize = 50
|
||||
const chunker = new TokenChunker({ chunkSize })
|
||||
const text = 'This is a test sentence with several words. '.repeat(30)
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.tokenCount).toBeLessThanOrEqual(chunkSize)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('splitting behavior', () => {
|
||||
it.concurrent('should produce multiple chunks for long text', async () => {
|
||||
const chunker = new TokenChunker({ chunkSize: 50 })
|
||||
const text = 'This is a test sentence. '.repeat(30)
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
})
|
||||
|
||||
it.concurrent('should create more chunks with smaller chunkSize', async () => {
|
||||
const text = 'This is a test sentence with content. '.repeat(20)
|
||||
|
||||
const largeChunker = new TokenChunker({ chunkSize: 200 })
|
||||
const smallChunker = new TokenChunker({ chunkSize: 50 })
|
||||
|
||||
const largeChunks = await largeChunker.chunk(text)
|
||||
const smallChunks = await smallChunker.chunk(text)
|
||||
|
||||
expect(smallChunks.length).toBeGreaterThan(largeChunks.length)
|
||||
})
|
||||
})
|
||||
|
||||
describe('sliding window overlap', () => {
|
||||
it.concurrent('should produce more chunks with overlap than without', async () => {
|
||||
const text =
|
||||
'Alpha bravo charlie delta echo foxtrot golf hotel india juliet kilo lima mike november oscar papa quebec romeo sierra tango uniform victor whiskey xray yankee zulu. '.repeat(
|
||||
5
|
||||
)
|
||||
const withOverlap = new TokenChunker({ chunkSize: 30, chunkOverlap: 10 })
|
||||
const withoutOverlap = new TokenChunker({ chunkSize: 30, chunkOverlap: 0 })
|
||||
|
||||
const overlapChunks = await withOverlap.chunk(text)
|
||||
const noOverlapChunks = await withoutOverlap.chunk(text)
|
||||
|
||||
expect(overlapChunks.length).toBeGreaterThan(noOverlapChunks.length)
|
||||
})
|
||||
|
||||
it.concurrent('should not share text between chunks when chunkOverlap is 0', async () => {
|
||||
const chunker = new TokenChunker({ chunkSize: 20, chunkOverlap: 0 })
|
||||
const text =
|
||||
'First sentence here. Second sentence here. Third sentence here. Fourth sentence here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
if (chunks.length > 1) {
|
||||
const firstChunkEnd = chunks[0].text.slice(-10)
|
||||
expect(chunks[1].text.startsWith(firstChunkEnd)).toBe(false)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('overlap clamped to 50%', () => {
|
||||
it.concurrent('should still work when overlap is set >= chunkSize', async () => {
|
||||
const chunker = new TokenChunker({ chunkSize: 20, chunkOverlap: 100 })
|
||||
const text =
|
||||
'First paragraph content here. Second paragraph content here. Third paragraph here.'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
it.concurrent('should clamp overlap to 50% of chunkSize', async () => {
|
||||
const chunkerClamped = new TokenChunker({ chunkSize: 20, chunkOverlap: 100 })
|
||||
const chunkerHalf = new TokenChunker({ chunkSize: 20, chunkOverlap: 10 })
|
||||
const text =
|
||||
'Word one two three four five six seven eight nine ten eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen twenty. '.repeat(
|
||||
5
|
||||
)
|
||||
|
||||
const clampedChunks = await chunkerClamped.chunk(text)
|
||||
const halfChunks = await chunkerHalf.chunk(text)
|
||||
|
||||
expect(clampedChunks.length).toBe(halfChunks.length)
|
||||
})
|
||||
})
|
||||
|
||||
describe('word boundary snapping', () => {
|
||||
it.concurrent('should produce trimmed chunks without leading or trailing spaces', async () => {
|
||||
const chunker = new TokenChunker({ chunkSize: 20 })
|
||||
const text =
|
||||
'the cat sat on the mat and the dog ran fast over the big red fox and then the bird flew high up in the clear blue sky above the green hill'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1)
|
||||
for (const chunk of chunks) {
|
||||
const trimmed = chunk.text.trim()
|
||||
expect(trimmed).toBe(chunk.text)
|
||||
expect(trimmed.length).toBeGreaterThan(0)
|
||||
}
|
||||
})
|
||||
|
||||
it.concurrent('should produce chunks that start and end on word boundaries', async () => {
|
||||
const chunker = new TokenChunker({ chunkSize: 15 })
|
||||
const text =
|
||||
'The quick brown fox jumps over the lazy dog and then runs away quickly into the forest'
|
||||
const chunks = await chunker.chunk(text)
|
||||
|
||||
for (const chunk of chunks) {
|
||||
const trimmed = chunk.text.trim()
|
||||
expect(trimmed).toBe(chunk.text)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe('consistent coverage', () => {
|
||||
it.concurrent('should represent all content from original text across chunks', async () => {
|
||||
const chunker = new TokenChunker({ chunkSize: 30, chunkOverlap: 0 })
|
||||
const originalText =
|
||||
'The quick brown fox jumps over the lazy dog. Pack my box with five dozen liquor jugs.'
|
||||
const chunks = await chunker.chunk(originalText)
|
||||
|
||||
const allText = chunks.map((c) => c.text).join(' ')
|
||||
expect(allText).toContain('quick')
|
||||
expect(allText).toContain('fox')
|
||||
expect(allText).toContain('lazy')
|
||||
expect(allText).toContain('dog')
|
||||
expect(allText).toContain('liquor')
|
||||
expect(allText).toContain('jugs')
|
||||
})
|
||||
|
||||
it.concurrent('should preserve all words across chunks for longer text', async () => {
|
||||
const chunker = new TokenChunker({ chunkSize: 20, chunkOverlap: 0 })
|
||||
const words = [
|
||||
'alpha',
|
||||
'bravo',
|
||||
'charlie',
|
||||
'delta',
|
||||
'echo',
|
||||
'foxtrot',
|
||||
'golf',
|
||||
'hotel',
|
||||
'india',
|
||||
'juliet',
|
||||
]
|
||||
const originalText = `${words.join(' is a word and ')} is also a word.`
|
||||
const chunks = await chunker.chunk(originalText)
|
||||
|
||||
const combined = chunks.map((c) => c.text).join(' ')
|
||||
for (const word of words) {
|
||||
expect(combined).toContain(word)
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
54
apps/sim/lib/chunkers/token-chunker.ts
Normal file
54
apps/sim/lib/chunkers/token-chunker.ts
Normal file
@@ -0,0 +1,54 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import type { Chunk, ChunkerOptions } from '@/lib/chunkers/types'
|
||||
import {
|
||||
buildChunks,
|
||||
cleanText,
|
||||
estimateTokens,
|
||||
resolveChunkerOptions,
|
||||
splitAtWordBoundaries,
|
||||
tokensToChars,
|
||||
} from '@/lib/chunkers/utils'
|
||||
|
||||
const logger = createLogger('TokenChunker')
|
||||
|
||||
export class TokenChunker {
|
||||
private readonly chunkSize: number
|
||||
private readonly chunkOverlap: number
|
||||
private readonly minCharactersPerChunk: number
|
||||
|
||||
constructor(options: ChunkerOptions = {}) {
|
||||
const resolved = resolveChunkerOptions(options)
|
||||
this.chunkSize = resolved.chunkSize
|
||||
this.chunkOverlap = resolved.chunkOverlap
|
||||
this.minCharactersPerChunk = resolved.minCharactersPerChunk
|
||||
}
|
||||
|
||||
async chunk(content: string): Promise<Chunk[]> {
|
||||
if (!content?.trim()) {
|
||||
return []
|
||||
}
|
||||
|
||||
const cleaned = cleanText(content)
|
||||
|
||||
if (estimateTokens(cleaned) <= this.chunkSize) {
|
||||
logger.info('Content fits in single chunk')
|
||||
return buildChunks([cleaned], 0)
|
||||
}
|
||||
|
||||
const chunkSizeChars = tokensToChars(this.chunkSize)
|
||||
const overlapChars = tokensToChars(this.chunkOverlap)
|
||||
const stepChars = this.chunkOverlap > 0 ? chunkSizeChars - overlapChars : undefined
|
||||
|
||||
const rawChunks = splitAtWordBoundaries(cleaned, chunkSizeChars, stepChars)
|
||||
|
||||
const filtered =
|
||||
rawChunks.length > 1
|
||||
? rawChunks.filter((c) => c.length >= this.minCharactersPerChunk)
|
||||
: rawChunks
|
||||
|
||||
const chunks = filtered.length > 0 ? filtered : rawChunks
|
||||
|
||||
logger.info(`Chunked into ${chunks.length} token-based chunks`)
|
||||
return buildChunks(chunks, this.chunkOverlap)
|
||||
}
|
||||
}
|
||||
@@ -1,17 +1,11 @@
|
||||
/**
|
||||
* Options for configuring text chunkers
|
||||
*
|
||||
* Units:
|
||||
* - chunkSize: Maximum chunk size in TOKENS (1 token ≈ 4 characters)
|
||||
* - chunkOverlap: Overlap between chunks in TOKENS
|
||||
* - minCharactersPerChunk: Minimum chunk size in CHARACTERS (filters tiny fragments)
|
||||
* - chunkSize/chunkOverlap: TOKENS (1 token ≈ 4 characters)
|
||||
* - minCharactersPerChunk: CHARACTERS
|
||||
*/
|
||||
export interface ChunkerOptions {
|
||||
/** Maximum chunk size in tokens (default: 1024) */
|
||||
chunkSize?: number
|
||||
/** Overlap between chunks in tokens (default: 0) */
|
||||
chunkOverlap?: number
|
||||
/** Minimum chunk size in characters to avoid tiny fragments (default: 100) */
|
||||
minCharactersPerChunk?: number
|
||||
}
|
||||
|
||||
@@ -51,3 +45,26 @@ export interface DocChunk {
|
||||
export interface DocsChunkerOptions extends ChunkerOptions {
|
||||
baseUrl?: string
|
||||
}
|
||||
|
||||
export type ChunkingStrategy = 'auto' | 'text' | 'regex' | 'recursive' | 'sentence' | 'token'
|
||||
|
||||
export type RecursiveRecipe = 'plain' | 'markdown' | 'code'
|
||||
|
||||
export interface StrategyOptions {
|
||||
pattern?: string
|
||||
separators?: string[]
|
||||
recipe?: RecursiveRecipe
|
||||
}
|
||||
|
||||
export interface SentenceChunkerOptions extends ChunkerOptions {
|
||||
minSentencesPerChunk?: number
|
||||
}
|
||||
|
||||
export interface RecursiveChunkerOptions extends ChunkerOptions {
|
||||
separators?: string[]
|
||||
recipe?: RecursiveRecipe
|
||||
}
|
||||
|
||||
export interface RegexChunkerOptions extends ChunkerOptions {
|
||||
pattern: string
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user