v0.6.36: new chunkers, sockets state machine, google sheets/drive/calendar triggers, docs updates, integrations/models pages improvements

This commit is contained in:
Waleed
2026-04-10 21:58:16 -07:00
committed by GitHub
135 changed files with 9246 additions and 2583 deletions

View File

@@ -1,17 +1,17 @@
---
description: Create webhook triggers for a Sim integration using the generic trigger builder
description: Create webhook or polling triggers for a Sim integration
argument-hint: <service-name>
---
# Add Trigger
You are an expert at creating webhook triggers for Sim. You understand the trigger system, the generic `buildTriggerSubBlocks` helper, and how triggers connect to blocks.
You are an expert at creating webhook and polling triggers for Sim. You understand the trigger system, the generic `buildTriggerSubBlocks` helper, polling infrastructure, and how triggers connect to blocks.
## Your Task
1. Research what webhook events the service supports
2. Create the trigger files using the generic builder
3. Create a provider handler if custom auth, formatting, or subscriptions are needed
1. Research what webhook events the service supports — if the service lacks reliable webhooks, use polling
2. Create the trigger files using the generic builder (webhook) or manual config (polling)
3. Create a provider handler (webhook) or polling handler (polling)
4. Register triggers and connect them to the block
## Directory Structure
@@ -146,23 +146,37 @@ export const TRIGGER_REGISTRY: TriggerRegistry = {
### Block file (`apps/sim/blocks/blocks/{service}.ts`)
Wire triggers into the block so the trigger UI appears and `generate-docs.ts` discovers them. Two changes are needed:
1. **Spread trigger subBlocks** at the end of the block's `subBlocks` array
2. **Add `triggers` property** after `outputs` with `enabled: true` and `available: [...]`
```typescript
import { getTrigger } from '@/triggers'
export const {Service}Block: BlockConfig = {
// ...
triggers: {
enabled: true,
available: ['{service}_event_a', '{service}_event_b'],
},
subBlocks: [
// Regular tool subBlocks first...
...getTrigger('{service}_event_a').subBlocks,
...getTrigger('{service}_event_b').subBlocks,
],
// ... tools, inputs, outputs ...
triggers: {
enabled: true,
available: ['{service}_event_a', '{service}_event_b'],
},
}
```
**Versioned blocks (V1 + V2):** Many integrations have a hidden V1 block and a visible V2 block. Where you add the trigger wiring depends on how V2 inherits from V1:
- **V2 uses `...V1Block` spread** (e.g., Google Calendar): Add trigger to V1 — V2 inherits both `subBlocks` and `triggers` automatically.
- **V2 defines its own `subBlocks`** (e.g., Google Sheets): Add trigger to V2 (the visible block). V1 is hidden and doesn't need it.
- **Single block, no V2** (e.g., Google Drive): Add trigger directly.
`generate-docs.ts` deduplicates by base type (first match wins). If V1 is processed first without triggers, the V2 triggers won't appear in `integrations.json`. Always verify by checking the output after running the script.
## Provider Handler
All provider-specific webhook logic lives in a single handler file: `apps/sim/lib/webhooks/providers/{service}.ts`.
@@ -327,6 +341,122 @@ export function buildOutputs(): Record<string, TriggerOutput> {
}
```
## Polling Triggers
Use polling when the service lacks reliable webhooks (e.g., Google Sheets, Google Drive, Google Calendar, Gmail, RSS, IMAP). Polling triggers do NOT use `buildTriggerSubBlocks` — they define subBlocks manually.
### Directory Structure
```
apps/sim/triggers/{service}/
├── index.ts # Barrel export
└── poller.ts # TriggerConfig with polling: true
apps/sim/lib/webhooks/polling/
└── {service}.ts # PollingProviderHandler implementation
```
### Polling Handler (`apps/sim/lib/webhooks/polling/{service}.ts`)
```typescript
import { pollingIdempotency } from '@/lib/core/idempotency/service'
import type { PollingProviderHandler, PollWebhookContext } from '@/lib/webhooks/polling/types'
import { markWebhookFailed, markWebhookSuccess, resolveOAuthCredential, updateWebhookProviderConfig } from '@/lib/webhooks/polling/utils'
import { processPolledWebhookEvent } from '@/lib/webhooks/processor'
export const {service}PollingHandler: PollingProviderHandler = {
provider: '{service}',
label: '{Service}',
async pollWebhook(ctx: PollWebhookContext): Promise<'success' | 'failure'> {
const { webhookData, workflowData, requestId, logger } = ctx
const webhookId = webhookData.id
try {
// For OAuth services:
const accessToken = await resolveOAuthCredential(webhookData, '{service}', requestId, logger)
const config = webhookData.providerConfig as unknown as {Service}WebhookConfig
// First poll: seed state, emit nothing
if (!config.lastCheckedTimestamp) {
await updateWebhookProviderConfig(webhookId, { lastCheckedTimestamp: new Date().toISOString() }, logger)
await markWebhookSuccess(webhookId, logger)
return 'success'
}
// Fetch changes since last poll, process with idempotency
// ...
await markWebhookSuccess(webhookId, logger)
return 'success'
} catch (error) {
logger.error(`[${requestId}] Error processing {service} webhook ${webhookId}:`, error)
await markWebhookFailed(webhookId, logger)
return 'failure'
}
},
}
```
**Key patterns:**
- First poll seeds state and emits nothing (avoids flooding with existing data)
- Use `pollingIdempotency.executeWithIdempotency(provider, key, callback)` for dedup
- Use `processPolledWebhookEvent(webhookData, workflowData, payload, requestId)` to fire the workflow
- Use `updateWebhookProviderConfig(webhookId, partialConfig, logger)` for read-merge-write on state
- Use the latest server-side timestamp from API responses (not wall clock) to avoid clock skew
### Trigger Config (`apps/sim/triggers/{service}/poller.ts`)
```typescript
import { {Service}Icon } from '@/components/icons'
import type { TriggerConfig } from '@/triggers/types'
export const {service}PollingTrigger: TriggerConfig = {
id: '{service}_poller',
name: '{Service} Trigger',
provider: '{service}',
description: 'Triggers when ...',
version: '1.0.0',
icon: {Service}Icon,
polling: true, // REQUIRED — routes to polling infrastructure
subBlocks: [
{ id: 'triggerCredentials', type: 'oauth-input', title: 'Credentials', serviceId: '{service}', requiredScopes: [], required: true, mode: 'trigger', supportsCredentialSets: true },
// ... service-specific config fields (dropdowns, inputs, switches) ...
{ id: 'triggerSave', type: 'trigger-save', title: '', hideFromPreview: true, mode: 'trigger', triggerId: '{service}_poller' },
{ id: 'triggerInstructions', type: 'text', title: 'Setup Instructions', hideFromPreview: true, mode: 'trigger', defaultValue: '...' },
],
outputs: {
// Must match the payload shape from processPolledWebhookEvent
},
}
```
### Registration (3 places)
1. **`apps/sim/triggers/constants.ts`** — add provider to `POLLING_PROVIDERS` Set
2. **`apps/sim/lib/webhooks/polling/registry.ts`** — import handler, add to `POLLING_HANDLERS`
3. **`apps/sim/triggers/registry.ts`** — import trigger config, add to `TRIGGER_REGISTRY`
### Helm Cron Job
Add to `helm/sim/values.yaml` under the existing polling cron jobs:
```yaml
{service}WebhookPoll:
schedule: "*/1 * * * *"
concurrencyPolicy: Forbid
url: "http://sim:3000/api/webhooks/poll/{service}"
```
### Reference Implementations
- Simple: `apps/sim/lib/webhooks/polling/rss.ts` + `apps/sim/triggers/rss/poller.ts`
- Complex (OAuth, attachments): `apps/sim/lib/webhooks/polling/gmail.ts` + `apps/sim/triggers/gmail/poller.ts`
- Cursor-based (changes API): `apps/sim/lib/webhooks/polling/google-drive.ts`
- Timestamp-based: `apps/sim/lib/webhooks/polling/google-calendar.ts`
## Checklist
### Trigger Definition
@@ -352,7 +482,18 @@ export function buildOutputs(): Record<string, TriggerOutput> {
- [ ] NO changes to `route.ts`, `provider-subscriptions.ts`, or `deploy.ts`
- [ ] API key field uses `password: true`
### Polling Trigger (if applicable)
- [ ] Handler implements `PollingProviderHandler` at `lib/webhooks/polling/{service}.ts`
- [ ] Trigger config has `polling: true` and defines subBlocks manually (no `buildTriggerSubBlocks`)
- [ ] Provider string matches across: trigger config, handler, `POLLING_PROVIDERS`, polling registry
- [ ] `triggerSave` subBlock `triggerId` matches trigger config `id`
- [ ] First poll seeds state and emits nothing
- [ ] Added provider to `POLLING_PROVIDERS` in `triggers/constants.ts`
- [ ] Added handler to `POLLING_HANDLERS` in `lib/webhooks/polling/registry.ts`
- [ ] Added cron job to `helm/sim/values.yaml`
- [ ] Payload shape matches trigger `outputs` schema
### Testing
- [ ] `bun run type-check` passes
- [ ] Manually verify `formatInput` output keys match trigger `outputs` keys
- [ ] Manually verify output keys match trigger `outputs` keys
- [ ] Trigger UI shows correctly in the block

View File

@@ -1,12 +1,12 @@
# Add Trigger
You are an expert at creating webhook triggers for Sim. You understand the trigger system, the generic `buildTriggerSubBlocks` helper, and how triggers connect to blocks.
You are an expert at creating webhook and polling triggers for Sim. You understand the trigger system, the generic `buildTriggerSubBlocks` helper, polling infrastructure, and how triggers connect to blocks.
## Your Task
1. Research what webhook events the service supports
2. Create the trigger files using the generic builder
3. Create a provider handler if custom auth, formatting, or subscriptions are needed
1. Research what webhook events the service supports — if the service lacks reliable webhooks, use polling
2. Create the trigger files using the generic builder (webhook) or manual config (polling)
3. Create a provider handler (webhook) or polling handler (polling)
4. Register triggers and connect them to the block
## Directory Structure
@@ -141,23 +141,37 @@ export const TRIGGER_REGISTRY: TriggerRegistry = {
### Block file (`apps/sim/blocks/blocks/{service}.ts`)
Wire triggers into the block so the trigger UI appears and `generate-docs.ts` discovers them. Two changes are needed:
1. **Spread trigger subBlocks** at the end of the block's `subBlocks` array
2. **Add `triggers` property** after `outputs` with `enabled: true` and `available: [...]`
```typescript
import { getTrigger } from '@/triggers'
export const {Service}Block: BlockConfig = {
// ...
triggers: {
enabled: true,
available: ['{service}_event_a', '{service}_event_b'],
},
subBlocks: [
// Regular tool subBlocks first...
...getTrigger('{service}_event_a').subBlocks,
...getTrigger('{service}_event_b').subBlocks,
],
// ... tools, inputs, outputs ...
triggers: {
enabled: true,
available: ['{service}_event_a', '{service}_event_b'],
},
}
```
**Versioned blocks (V1 + V2):** Many integrations have a hidden V1 block and a visible V2 block. Where you add the trigger wiring depends on how V2 inherits from V1:
- **V2 uses `...V1Block` spread** (e.g., Google Calendar): Add trigger to V1 — V2 inherits both `subBlocks` and `triggers` automatically.
- **V2 defines its own `subBlocks`** (e.g., Google Sheets): Add trigger to V2 (the visible block). V1 is hidden and doesn't need it.
- **Single block, no V2** (e.g., Google Drive): Add trigger directly.
`generate-docs.ts` deduplicates by base type (first match wins). If V1 is processed first without triggers, the V2 triggers won't appear in `integrations.json`. Always verify by checking the output after running the script.
## Provider Handler
All provider-specific webhook logic lives in a single handler file: `apps/sim/lib/webhooks/providers/{service}.ts`.
@@ -322,6 +336,122 @@ export function buildOutputs(): Record<string, TriggerOutput> {
}
```
## Polling Triggers
Use polling when the service lacks reliable webhooks (e.g., Google Sheets, Google Drive, Google Calendar, Gmail, RSS, IMAP). Polling triggers do NOT use `buildTriggerSubBlocks` — they define subBlocks manually.
### Directory Structure
```
apps/sim/triggers/{service}/
├── index.ts # Barrel export
└── poller.ts # TriggerConfig with polling: true
apps/sim/lib/webhooks/polling/
└── {service}.ts # PollingProviderHandler implementation
```
### Polling Handler (`apps/sim/lib/webhooks/polling/{service}.ts`)
```typescript
import { pollingIdempotency } from '@/lib/core/idempotency/service'
import type { PollingProviderHandler, PollWebhookContext } from '@/lib/webhooks/polling/types'
import { markWebhookFailed, markWebhookSuccess, resolveOAuthCredential, updateWebhookProviderConfig } from '@/lib/webhooks/polling/utils'
import { processPolledWebhookEvent } from '@/lib/webhooks/processor'
export const {service}PollingHandler: PollingProviderHandler = {
provider: '{service}',
label: '{Service}',
async pollWebhook(ctx: PollWebhookContext): Promise<'success' | 'failure'> {
const { webhookData, workflowData, requestId, logger } = ctx
const webhookId = webhookData.id
try {
// For OAuth services:
const accessToken = await resolveOAuthCredential(webhookData, '{service}', requestId, logger)
const config = webhookData.providerConfig as unknown as {Service}WebhookConfig
// First poll: seed state, emit nothing
if (!config.lastCheckedTimestamp) {
await updateWebhookProviderConfig(webhookId, { lastCheckedTimestamp: new Date().toISOString() }, logger)
await markWebhookSuccess(webhookId, logger)
return 'success'
}
// Fetch changes since last poll, process with idempotency
// ...
await markWebhookSuccess(webhookId, logger)
return 'success'
} catch (error) {
logger.error(`[${requestId}] Error processing {service} webhook ${webhookId}:`, error)
await markWebhookFailed(webhookId, logger)
return 'failure'
}
},
}
```
**Key patterns:**
- First poll seeds state and emits nothing (avoids flooding with existing data)
- Use `pollingIdempotency.executeWithIdempotency(provider, key, callback)` for dedup
- Use `processPolledWebhookEvent(webhookData, workflowData, payload, requestId)` to fire the workflow
- Use `updateWebhookProviderConfig(webhookId, partialConfig, logger)` for read-merge-write on state
- Use the latest server-side timestamp from API responses (not wall clock) to avoid clock skew
### Trigger Config (`apps/sim/triggers/{service}/poller.ts`)
```typescript
import { {Service}Icon } from '@/components/icons'
import type { TriggerConfig } from '@/triggers/types'
export const {service}PollingTrigger: TriggerConfig = {
id: '{service}_poller',
name: '{Service} Trigger',
provider: '{service}',
description: 'Triggers when ...',
version: '1.0.0',
icon: {Service}Icon,
polling: true, // REQUIRED — routes to polling infrastructure
subBlocks: [
{ id: 'triggerCredentials', type: 'oauth-input', title: 'Credentials', serviceId: '{service}', requiredScopes: [], required: true, mode: 'trigger', supportsCredentialSets: true },
// ... service-specific config fields (dropdowns, inputs, switches) ...
{ id: 'triggerSave', type: 'trigger-save', title: '', hideFromPreview: true, mode: 'trigger', triggerId: '{service}_poller' },
{ id: 'triggerInstructions', type: 'text', title: 'Setup Instructions', hideFromPreview: true, mode: 'trigger', defaultValue: '...' },
],
outputs: {
// Must match the payload shape from processPolledWebhookEvent
},
}
```
### Registration (3 places)
1. **`apps/sim/triggers/constants.ts`** — add provider to `POLLING_PROVIDERS` Set
2. **`apps/sim/lib/webhooks/polling/registry.ts`** — import handler, add to `POLLING_HANDLERS`
3. **`apps/sim/triggers/registry.ts`** — import trigger config, add to `TRIGGER_REGISTRY`
### Helm Cron Job
Add to `helm/sim/values.yaml` under the existing polling cron jobs:
```yaml
{service}WebhookPoll:
schedule: "*/1 * * * *"
concurrencyPolicy: Forbid
url: "http://sim:3000/api/webhooks/poll/{service}"
```
### Reference Implementations
- Simple: `apps/sim/lib/webhooks/polling/rss.ts` + `apps/sim/triggers/rss/poller.ts`
- Complex (OAuth, attachments): `apps/sim/lib/webhooks/polling/gmail.ts` + `apps/sim/triggers/gmail/poller.ts`
- Cursor-based (changes API): `apps/sim/lib/webhooks/polling/google-drive.ts`
- Timestamp-based: `apps/sim/lib/webhooks/polling/google-calendar.ts`
## Checklist
### Trigger Definition
@@ -347,7 +477,18 @@ export function buildOutputs(): Record<string, TriggerOutput> {
- [ ] NO changes to `route.ts`, `provider-subscriptions.ts`, or `deploy.ts`
- [ ] API key field uses `password: true`
### Polling Trigger (if applicable)
- [ ] Handler implements `PollingProviderHandler` at `lib/webhooks/polling/{service}.ts`
- [ ] Trigger config has `polling: true` and defines subBlocks manually (no `buildTriggerSubBlocks`)
- [ ] Provider string matches across: trigger config, handler, `POLLING_PROVIDERS`, polling registry
- [ ] `triggerSave` subBlock `triggerId` matches trigger config `id`
- [ ] First poll seeds state and emits nothing
- [ ] Added provider to `POLLING_PROVIDERS` in `triggers/constants.ts`
- [ ] Added handler to `POLLING_HANDLERS` in `lib/webhooks/polling/registry.ts`
- [ ] Added cron job to `helm/sim/values.yaml`
- [ ] Payload shape matches trigger `outputs` schema
### Testing
- [ ] `bun run type-check` passes
- [ ] Manually verify `formatInput` output keys match trigger `outputs` keys
- [ ] Manually verify output keys match trigger `outputs` keys
- [ ] Trigger UI shows correctly in the block

View File

@@ -21,7 +21,17 @@ Verwenden Sie Ihre eigenen API-Schlüssel für KI-Modellanbieter anstelle der ge
| OpenAI | Knowledge Base-Embeddings, Agent-Block |
| Anthropic | Agent-Block |
| Google | Agent-Block |
| Mistral | Knowledge Base OCR |
| Mistral | Knowledge Base OCR, Agent-Block |
| Fireworks | Agent-Block |
| Firecrawl | Web-Scraping, Crawling, Suche und Extraktion |
| Exa | KI-gestützte Suche und Recherche |
| Serper | Google-Such-API |
| Linkup | Websuche und Inhaltsabruf |
| Parallel AI | Websuche, Extraktion und tiefgehende Recherche |
| Perplexity | KI-gestützter Chat und Websuche |
| Jina AI | Web-Lesen und Suche |
| Google Cloud | Translate, Maps, PageSpeed und Books APIs |
| Brandfetch | Marken-Assets, Logos, Farben und Unternehmensinformationen |
### Einrichtung

View File

@@ -105,9 +105,108 @@ Die Modellaufschlüsselung zeigt:
Die angezeigten Preise entsprechen den Tarifen vom 10. September 2025. Überprüfen Sie die Dokumentation der Anbieter für aktuelle Preise.
</Callout>
## Gehostete Tool-Preise
Wenn Workflows Tool-Blöcke mit den gehosteten API-Schlüsseln von Sim verwenden, werden die Kosten pro Operation berechnet. Verwenden Sie Ihre eigenen Schlüssel über BYOK, um direkt an die Anbieter zu zahlen.
<Tabs items={['Firecrawl', 'Exa', 'Serper', 'Perplexity', 'Linkup', 'Parallel AI', 'Jina AI', 'Google Cloud', 'Brandfetch']}>
<Tab>
**Firecrawl** - Web-Scraping, Crawling, Suche und Extraktion
| Operation | Cost |
|-----------|------|
| Scrape | $0.001 per credit used |
| Crawl | $0.001 per credit used |
| Search | $0.001 per credit used |
| Extract | $0.001 per credit used |
| Map | $0.001 per credit used |
</Tab>
<Tab>
**Exa** - KI-gestützte Suche und Recherche
| Operation | Cost |
|-----------|------|
| Search | Dynamic (returned by API) |
| Get Contents | Dynamic (returned by API) |
| Find Similar Links | Dynamic (returned by API) |
| Answer | Dynamic (returned by API) |
</Tab>
<Tab>
**Serper** - Google-Such-API
| Operation | Cost |
|-----------|------|
| Search (≤10 results) | $0.001 |
| Search (>10 results) | $0.002 |
</Tab>
<Tab>
**Perplexity** - KI-gestützter Chat und Websuche
| Operation | Cost |
|-----------|------|
| Search | $0.005 per request |
| Chat | Token-based (varies by model) |
</Tab>
<Tab>
**Linkup** - Websuche und Inhaltsabruf
| Operation | Cost |
|-----------|------|
| Standard search | ~$0.006 |
| Deep search | ~$0.055 |
</Tab>
<Tab>
**Parallel AI** - Websuche, Extraktion und tiefgehende Recherche
| Operation | Cost |
|-----------|------|
| Search (≤10 results) | $0.005 |
| Search (>10 results) | $0.005 + $0.001 per additional result |
| Extract | $0.001 per URL |
| Deep Research | $0.005$2.40 (varies by processor tier) |
</Tab>
<Tab>
**Jina AI** - Web-Lesen und Suche
| Operation | Cost |
|-----------|------|
| Read URL | $0.20 per 1M tokens |
| Search | $0.20 per 1M tokens (minimum 10K tokens) |
</Tab>
<Tab>
**Google Cloud** - Translate, Maps, PageSpeed und Books APIs
| Operation | Cost |
|-----------|------|
| Translate / Detect | $0.00002 per character |
| Maps (Geocode, Directions, Distance Matrix, Elevation, Timezone, Reverse Geocode, Geolocate, Validate Address) | $0.005 per request |
| Maps (Snap to Roads) | $0.01 per request |
| Maps (Place Details) | $0.017 per request |
| Maps (Places Search) | $0.032 per request |
| PageSpeed | Free |
| Books (Search, Details) | Free |
</Tab>
<Tab>
**Brandfetch** - Marken-Assets, Logos, Farben und Unternehmensinformationen
| Operation | Cost |
|-----------|------|
| Search | Free |
| Get Brand | $0.04 per request |
</Tab>
</Tabs>
## Bring Your Own Key (BYOK)
Sie können Ihre eigenen API-Schlüssel für gehostete Modelle (OpenAI, Anthropic, Google, Mistral) unter **Einstellungen → BYOK** verwenden, um Basispreise zu zahlen. Schlüssel werden verschlüsselt und gelten arbeitsbereichsweit.
Sie können Ihre eigenen API-Schlüssel für unterstützte Anbieter (OpenAI, Anthropic, Google, Mistral, Fireworks, Firecrawl, Exa, Serper, Linkup, Parallel AI, Perplexity, Jina AI, Google Cloud, Brandfetch) unter **Einstellungen → BYOK** verwenden, um Basispreise zu zahlen. Schlüssel werden verschlüsselt und gelten arbeitsbereichsweit.
## Strategien zur Kostenoptimierung

View File

@@ -110,9 +110,108 @@ The model breakdown shows:
Pricing shown reflects rates as of September 10, 2025. Check provider documentation for current pricing.
</Callout>
## Hosted Tool Pricing
When workflows use tool blocks with Sim's hosted API keys, costs are charged per operation. Use your own keys via BYOK to pay providers directly instead.
<Tabs items={['Firecrawl', 'Exa', 'Serper', 'Perplexity', 'Linkup', 'Parallel AI', 'Jina AI', 'Google Cloud', 'Brandfetch']}>
<Tab>
**Firecrawl** - Web scraping, crawling, search, and extraction
| Operation | Cost |
|-----------|------|
| Scrape | $0.001 per credit used |
| Crawl | $0.001 per credit used |
| Search | $0.001 per credit used |
| Extract | $0.001 per credit used |
| Map | $0.001 per credit used |
</Tab>
<Tab>
**Exa** - AI-powered search and research
| Operation | Cost |
|-----------|------|
| Search | Dynamic (returned by API) |
| Get Contents | Dynamic (returned by API) |
| Find Similar Links | Dynamic (returned by API) |
| Answer | Dynamic (returned by API) |
</Tab>
<Tab>
**Serper** - Google search API
| Operation | Cost |
|-----------|------|
| Search (≤10 results) | $0.001 |
| Search (>10 results) | $0.002 |
</Tab>
<Tab>
**Perplexity** - AI-powered chat and web search
| Operation | Cost |
|-----------|------|
| Search | $0.005 per request |
| Chat | Token-based (varies by model) |
</Tab>
<Tab>
**Linkup** - Web search and content retrieval
| Operation | Cost |
|-----------|------|
| Standard search | ~$0.006 |
| Deep search | ~$0.055 |
</Tab>
<Tab>
**Parallel AI** - Web search, extraction, and deep research
| Operation | Cost |
|-----------|------|
| Search (≤10 results) | $0.005 |
| Search (>10 results) | $0.005 + $0.001 per additional result |
| Extract | $0.001 per URL |
| Deep Research | $0.005$2.40 (varies by processor tier) |
</Tab>
<Tab>
**Jina AI** - Web reading and search
| Operation | Cost |
|-----------|------|
| Read URL | $0.20 per 1M tokens |
| Search | $0.20 per 1M tokens (minimum 10K tokens) |
</Tab>
<Tab>
**Google Cloud** - Translate, Maps, PageSpeed, and Books APIs
| Operation | Cost |
|-----------|------|
| Translate / Detect | $0.00002 per character |
| Maps (Geocode, Directions, Distance Matrix, Elevation, Timezone, Reverse Geocode, Geolocate, Validate Address) | $0.005 per request |
| Maps (Snap to Roads) | $0.01 per request |
| Maps (Place Details) | $0.017 per request |
| Maps (Places Search) | $0.032 per request |
| PageSpeed | Free |
| Books (Search, Details) | Free |
</Tab>
<Tab>
**Brandfetch** - Brand assets, logos, colors, and company info
| Operation | Cost |
|-----------|------|
| Search | Free |
| Get Brand | $0.04 per request |
</Tab>
</Tabs>
## Bring Your Own Key (BYOK)
Use your own API keys for AI model providers instead of Sim's hosted keys to pay base prices with no markup.
Use your own API keys for supported providers instead of Sim's hosted keys to pay base prices with no markup.
### Supported Providers
@@ -121,7 +220,17 @@ Use your own API keys for AI model providers instead of Sim's hosted keys to pay
| OpenAI | Knowledge Base embeddings, Agent block |
| Anthropic | Agent block |
| Google | Agent block |
| Mistral | Knowledge Base OCR |
| Mistral | Knowledge Base OCR, Agent block |
| Fireworks | Agent block |
| Firecrawl | Web scraping, crawling, search, and extraction |
| Exa | AI-powered search and research |
| Serper | Google search API |
| Linkup | Web search and content retrieval |
| Parallel AI | Web search, extraction, and deep research |
| Perplexity | AI-powered chat and web search |
| Jina AI | Web reading and search |
| Google Cloud | Translate, Maps, PageSpeed, and Books APIs |
| Brandfetch | Brand assets, logos, colors, and company info |
### Setup
@@ -152,20 +261,20 @@ Each voice session is billed when it starts. In deployed chat voice mode, each c
## Plans
Sim has two paid plan tiers **Pro** and **Max**. Either can be used individually or with a team. Team plans pool credits across all seats in the organization.
Sim has two paid plan tiers - **Pro** and **Max**. Either can be used individually or with a team. Team plans pool credits across all seats in the organization.
| Plan | Price | Credits Included | Daily Refresh |
|------|-------|------------------|---------------|
| **Community** | $0 | 1,000 (one-time) | |
| **Community** | $0 | 1,000 (one-time) | - |
| **Pro** | $25/mo | 6,000/mo | +50/day |
| **Max** | $100/mo | 25,000/mo | +200/day |
| **Enterprise** | Custom | Custom | |
| **Enterprise** | Custom | Custom | - |
To use Pro or Max with a team, select **Get For Team** in subscription settings and choose the tier and number of seats. Credits are pooled across the organization at the per-seat rate (e.g. Max for Teams with 3 seats = 75,000 credits/mo pooled).
### Daily Refresh Credits
Paid plans include a small daily credit allowance that does not count toward your plan limit. Each day, usage up to the daily refresh amount is excluded from billable usage. This allowance resets every 24 hours and does not carry over use it or lose it.
Paid plans include a small daily credit allowance that does not count toward your plan limit. Each day, usage up to the daily refresh amount is excluded from billable usage. This allowance resets every 24 hours and does not carry over - use it or lose it.
| Plan | Daily Refresh |
|------|---------------|
@@ -252,7 +361,7 @@ Sim uses a **base subscription + overage** billing model:
### How It Works
**Pro Plan ($25/month 6,000 credits):**
**Pro Plan ($25/month - 6,000 credits):**
- Monthly subscription includes 6,000 credits of usage
- Usage under 6,000 credits → No additional charges
- Usage over 6,000 credits (with on-demand enabled) → Pay the overage at month end

View File

@@ -21,7 +21,17 @@ Usa tus propias claves API para proveedores de modelos de IA en lugar de las cla
| OpenAI | Embeddings de base de conocimiento, bloque Agent |
| Anthropic | Bloque Agent |
| Google | Bloque Agent |
| Mistral | OCR de base de conocimiento |
| Mistral | OCR de base de conocimiento, bloque Agent |
| Fireworks | Bloque Agent |
| Firecrawl | Web scraping, crawling, búsqueda y extracción |
| Exa | Búsqueda e investigación impulsada por IA |
| Serper | API de búsqueda de Google |
| Linkup | Búsqueda web y recuperación de contenido |
| Parallel AI | Búsqueda web, extracción e investigación profunda |
| Perplexity | Chat y búsqueda web impulsada por IA |
| Jina AI | Lectura y búsqueda web |
| Google Cloud | APIs de Translate, Maps, PageSpeed y Books |
| Brandfetch | Activos de marca, logos, colores e información de empresas |
### Configuración

View File

@@ -105,9 +105,108 @@ El desglose del modelo muestra:
Los precios mostrados reflejan las tarifas a partir del 10 de septiembre de 2025. Consulta la documentación del proveedor para conocer los precios actuales.
</Callout>
## Precios de herramientas alojadas
Cuando los flujos de trabajo usan bloques de herramientas con las claves API alojadas de Sim, los costos se cobran por operación. Usa tus propias claves a través de BYOK para pagar directamente a los proveedores.
<Tabs items={['Firecrawl', 'Exa', 'Serper', 'Perplexity', 'Linkup', 'Parallel AI', 'Jina AI', 'Google Cloud', 'Brandfetch']}>
<Tab>
**Firecrawl** - Web scraping, crawling, búsqueda y extracción
| Operation | Cost |
|-----------|------|
| Scrape | $0.001 per credit used |
| Crawl | $0.001 per credit used |
| Search | $0.001 per credit used |
| Extract | $0.001 per credit used |
| Map | $0.001 per credit used |
</Tab>
<Tab>
**Exa** - Búsqueda e investigación impulsada por IA
| Operation | Cost |
|-----------|------|
| Search | Dynamic (returned by API) |
| Get Contents | Dynamic (returned by API) |
| Find Similar Links | Dynamic (returned by API) |
| Answer | Dynamic (returned by API) |
</Tab>
<Tab>
**Serper** - API de búsqueda de Google
| Operation | Cost |
|-----------|------|
| Search (≤10 results) | $0.001 |
| Search (>10 results) | $0.002 |
</Tab>
<Tab>
**Perplexity** - Chat y búsqueda web impulsada por IA
| Operation | Cost |
|-----------|------|
| Search | $0.005 per request |
| Chat | Token-based (varies by model) |
</Tab>
<Tab>
**Linkup** - Búsqueda web y recuperación de contenido
| Operation | Cost |
|-----------|------|
| Standard search | ~$0.006 |
| Deep search | ~$0.055 |
</Tab>
<Tab>
**Parallel AI** - Búsqueda web, extracción e investigación profunda
| Operation | Cost |
|-----------|------|
| Search (≤10 results) | $0.005 |
| Search (>10 results) | $0.005 + $0.001 per additional result |
| Extract | $0.001 per URL |
| Deep Research | $0.005$2.40 (varies by processor tier) |
</Tab>
<Tab>
**Jina AI** - Lectura y búsqueda web
| Operation | Cost |
|-----------|------|
| Read URL | $0.20 per 1M tokens |
| Search | $0.20 per 1M tokens (minimum 10K tokens) |
</Tab>
<Tab>
**Google Cloud** - APIs de Translate, Maps, PageSpeed y Books
| Operation | Cost |
|-----------|------|
| Translate / Detect | $0.00002 per character |
| Maps (Geocode, Directions, Distance Matrix, Elevation, Timezone, Reverse Geocode, Geolocate, Validate Address) | $0.005 per request |
| Maps (Snap to Roads) | $0.01 per request |
| Maps (Place Details) | $0.017 per request |
| Maps (Places Search) | $0.032 per request |
| PageSpeed | Free |
| Books (Search, Details) | Free |
</Tab>
<Tab>
**Brandfetch** - Activos de marca, logos, colores e información de empresas
| Operation | Cost |
|-----------|------|
| Search | Free |
| Get Brand | $0.04 per request |
</Tab>
</Tabs>
## Trae tu propia clave (BYOK)
Puedes usar tus propias claves API para modelos alojados (OpenAI, Anthropic, Google, Mistral) en **Configuración → BYOK** para pagar precios base. Las claves están encriptadas y se aplican a todo el espacio de trabajo.
Puedes usar tus propias claves API para proveedores compatibles (OpenAI, Anthropic, Google, Mistral, Fireworks, Firecrawl, Exa, Serper, Linkup, Parallel AI, Perplexity, Jina AI, Google Cloud, Brandfetch) en **Configuración → BYOK** para pagar precios base. Las claves están encriptadas y se aplican a todo el espacio de trabajo.
## Estrategias de optimización de costos

View File

@@ -21,7 +21,17 @@ Utilisez vos propres clés API pour les fournisseurs de modèles IA au lieu des
| OpenAI | Embeddings de base de connaissances, bloc Agent |
| Anthropic | Bloc Agent |
| Google | Bloc Agent |
| Mistral | OCR de base de connaissances |
| Mistral | OCR de base de connaissances, bloc Agent |
| Fireworks | Bloc Agent |
| Firecrawl | Web scraping, crawling, recherche et extraction |
| Exa | Recherche et investigation alimentées par l'IA |
| Serper | API de recherche Google |
| Linkup | Recherche web et récupération de contenu |
| Parallel AI | Recherche web, extraction et recherche approfondie |
| Perplexity | Chat et recherche web alimentés par l'IA |
| Jina AI | Lecture et recherche web |
| Google Cloud | APIs Translate, Maps, PageSpeed et Books |
| Brandfetch | Ressources de marque, logos, couleurs et informations d'entreprise |
### Configuration

View File

@@ -105,9 +105,108 @@ La répartition des modèles montre :
Les prix indiqués reflètent les tarifs en date du 10 septembre 2025. Consultez la documentation des fournisseurs pour les tarifs actuels.
</Callout>
## Tarification des outils hébergés
Lorsque les workflows utilisent des blocs d'outils avec les clés API hébergées par Sim, les coûts sont facturés par opération. Utilisez vos propres clés via BYOK pour payer directement les fournisseurs.
<Tabs items={['Firecrawl', 'Exa', 'Serper', 'Perplexity', 'Linkup', 'Parallel AI', 'Jina AI', 'Google Cloud', 'Brandfetch']}>
<Tab>
**Firecrawl** - Web scraping, crawling, recherche et extraction
| Operation | Cost |
|-----------|------|
| Scrape | $0.001 per credit used |
| Crawl | $0.001 per credit used |
| Search | $0.001 per credit used |
| Extract | $0.001 per credit used |
| Map | $0.001 per credit used |
</Tab>
<Tab>
**Exa** - Recherche et investigation alimentées par l'IA
| Operation | Cost |
|-----------|------|
| Search | Dynamic (returned by API) |
| Get Contents | Dynamic (returned by API) |
| Find Similar Links | Dynamic (returned by API) |
| Answer | Dynamic (returned by API) |
</Tab>
<Tab>
**Serper** - API de recherche Google
| Operation | Cost |
|-----------|------|
| Search (≤10 results) | $0.001 |
| Search (>10 results) | $0.002 |
</Tab>
<Tab>
**Perplexity** - Chat et recherche web alimentés par l'IA
| Operation | Cost |
|-----------|------|
| Search | $0.005 per request |
| Chat | Token-based (varies by model) |
</Tab>
<Tab>
**Linkup** - Recherche web et récupération de contenu
| Operation | Cost |
|-----------|------|
| Standard search | ~$0.006 |
| Deep search | ~$0.055 |
</Tab>
<Tab>
**Parallel AI** - Recherche web, extraction et recherche approfondie
| Operation | Cost |
|-----------|------|
| Search (≤10 results) | $0.005 |
| Search (>10 results) | $0.005 + $0.001 per additional result |
| Extract | $0.001 per URL |
| Deep Research | $0.005$2.40 (varies by processor tier) |
</Tab>
<Tab>
**Jina AI** - Lecture et recherche web
| Operation | Cost |
|-----------|------|
| Read URL | $0.20 per 1M tokens |
| Search | $0.20 per 1M tokens (minimum 10K tokens) |
</Tab>
<Tab>
**Google Cloud** - APIs Translate, Maps, PageSpeed et Books
| Operation | Cost |
|-----------|------|
| Translate / Detect | $0.00002 per character |
| Maps (Geocode, Directions, Distance Matrix, Elevation, Timezone, Reverse Geocode, Geolocate, Validate Address) | $0.005 per request |
| Maps (Snap to Roads) | $0.01 per request |
| Maps (Place Details) | $0.017 per request |
| Maps (Places Search) | $0.032 per request |
| PageSpeed | Free |
| Books (Search, Details) | Free |
</Tab>
<Tab>
**Brandfetch** - Ressources de marque, logos, couleurs et informations d'entreprise
| Operation | Cost |
|-----------|------|
| Search | Free |
| Get Brand | $0.04 per request |
</Tab>
</Tabs>
## Apportez votre propre clé (BYOK)
Vous pouvez utiliser vos propres clés API pour les modèles hébergés (OpenAI, Anthropic, Google, Mistral) dans **Paramètres → BYOK** pour payer les prix de base. Les clés sont chiffrées et s'appliquent à l'ensemble de l'espace de travail.
Vous pouvez utiliser vos propres clés API pour les fournisseurs pris en charge (OpenAI, Anthropic, Google, Mistral, Fireworks, Firecrawl, Exa, Serper, Linkup, Parallel AI, Perplexity, Jina AI, Google Cloud, Brandfetch) dans **Paramètres → BYOK** pour payer les prix de base. Les clés sont chiffrées et s'appliquent à l'ensemble de l'espace de travail.
## Stratégies d'optimisation des coûts

View File

@@ -20,7 +20,17 @@ Simのホストキーの代わりに、AIモデルプロバイダー用の独自
| OpenAI | ナレッジベースの埋め込み、エージェントブロック |
| Anthropic | エージェントブロック |
| Google | エージェントブロック |
| Mistral | ナレッジベースOCR |
| Mistral | ナレッジベースOCR、エージェントブロック |
| Fireworks | エージェントブロック |
| Firecrawl | Webスクレイピング、クローリング、検索、抽出 |
| Exa | AI搭載の検索とリサーチ |
| Serper | Google検索API |
| Linkup | Web検索とコンテンツ取得 |
| Parallel AI | Web検索、抽出、ディープリサーチ |
| Perplexity | AI搭載のチャットとWeb検索 |
| Jina AI | Web閲覧と検索 |
| Google Cloud | Translate、Maps、PageSpeed、Books API |
| Brandfetch | ブランドアセット、ロゴ、カラー、企業情報 |
### セットアップ

View File

@@ -105,9 +105,108 @@ AIブロックを使用するワークフローでは、ログで詳細なコス
表示価格は2025年9月10日時点のレートを反映しています。最新の価格については各プロバイダーのドキュメントをご確認ください。
</Callout>
## ホスティングツールの料金
ワークフローがSimのホスティングAPIキーを使用するツールブロックを利用する場合、操作ごとに料金が発生します。BYOKで独自のキーを使用すると、プロバイダーに直接支払うことができます。
<Tabs items={['Firecrawl', 'Exa', 'Serper', 'Perplexity', 'Linkup', 'Parallel AI', 'Jina AI', 'Google Cloud', 'Brandfetch']}>
<Tab>
**Firecrawl** - Webスクレイピング、クローリング、検索、抽出
| Operation | Cost |
|-----------|------|
| Scrape | $0.001 per credit used |
| Crawl | $0.001 per credit used |
| Search | $0.001 per credit used |
| Extract | $0.001 per credit used |
| Map | $0.001 per credit used |
</Tab>
<Tab>
**Exa** - AI搭載の検索とリサーチ
| Operation | Cost |
|-----------|------|
| Search | Dynamic (returned by API) |
| Get Contents | Dynamic (returned by API) |
| Find Similar Links | Dynamic (returned by API) |
| Answer | Dynamic (returned by API) |
</Tab>
<Tab>
**Serper** - Google検索API
| Operation | Cost |
|-----------|------|
| Search (≤10 results) | $0.001 |
| Search (>10 results) | $0.002 |
</Tab>
<Tab>
**Perplexity** - AI搭載のチャットとWeb検索
| Operation | Cost |
|-----------|------|
| Search | $0.005 per request |
| Chat | Token-based (varies by model) |
</Tab>
<Tab>
**Linkup** - Web検索とコンテンツ取得
| Operation | Cost |
|-----------|------|
| Standard search | ~$0.006 |
| Deep search | ~$0.055 |
</Tab>
<Tab>
**Parallel AI** - Web検索、抽出、ディープリサーチ
| Operation | Cost |
|-----------|------|
| Search (≤10 results) | $0.005 |
| Search (>10 results) | $0.005 + $0.001 per additional result |
| Extract | $0.001 per URL |
| Deep Research | $0.005$2.40 (varies by processor tier) |
</Tab>
<Tab>
**Jina AI** - Web閲覧と検索
| Operation | Cost |
|-----------|------|
| Read URL | $0.20 per 1M tokens |
| Search | $0.20 per 1M tokens (minimum 10K tokens) |
</Tab>
<Tab>
**Google Cloud** - Translate、Maps、PageSpeed、Books API
| Operation | Cost |
|-----------|------|
| Translate / Detect | $0.00002 per character |
| Maps (Geocode, Directions, Distance Matrix, Elevation, Timezone, Reverse Geocode, Geolocate, Validate Address) | $0.005 per request |
| Maps (Snap to Roads) | $0.01 per request |
| Maps (Place Details) | $0.017 per request |
| Maps (Places Search) | $0.032 per request |
| PageSpeed | Free |
| Books (Search, Details) | Free |
</Tab>
<Tab>
**Brandfetch** - ブランドアセット、ロゴ、カラー、企業情報
| Operation | Cost |
|-----------|------|
| Search | Free |
| Get Brand | $0.04 per request |
</Tab>
</Tabs>
## Bring Your Own Key (BYOK)
ホストされたモデルOpenAI、Anthropic、Google、Mistralに対して、**設定 → BYOK**で独自のAPIキーを使用し、基本価格で支払うことができます。キーは暗号化され、ワークスペース全体に適用されます。
対応プロバイダーOpenAI、Anthropic、Google、Mistral、Fireworks、Firecrawl、Exa、Serper、Linkup、Parallel AI、Perplexity、Jina AI、Google Cloud、Brandfetch)に対して、**設定 → BYOK**で独自のAPIキーを使用し、基本価格で支払うことができます。キーは暗号化され、ワークスペース全体に適用されます。
## コスト最適化戦略

View File

@@ -20,7 +20,17 @@ Sim 企业版为需要更高安全性、合规性和管理能力的组织提供
| OpenAI | 知识库嵌入、Agent 模块 |
| Anthropic | Agent 模块 |
| Google | Agent 模块 |
| Mistral | 知识库 OCR |
| Mistral | 知识库 OCR、Agent 模块 |
| Fireworks | Agent 模块 |
| Firecrawl | 网页抓取、爬取、搜索和提取 |
| Exa | AI 驱动的搜索和研究 |
| Serper | Google 搜索 API |
| Linkup | 网络搜索和内容检索 |
| Parallel AI | 网络搜索、提取和深度研究 |
| Perplexity | AI 驱动的聊天和网络搜索 |
| Jina AI | 网页阅读和搜索 |
| Google Cloud | Translate、Maps、PageSpeed 和 Books API |
| Brandfetch | 品牌资产、标志、颜色和公司信息 |
### 配置方法

View File

@@ -105,9 +105,108 @@ totalCost = baseExecutionCharge + modelCost
显示的价格为截至 2025 年 9 月 10 日的费率。请查看提供商文档以获取最新价格。
</Callout>
## 托管工具定价
当工作流使用 Sim 托管 API 密钥的工具模块时,费用按操作收取。通过 BYOK 使用你自己的密钥可直接向服务商付费。
<Tabs items={['Firecrawl', 'Exa', 'Serper', 'Perplexity', 'Linkup', 'Parallel AI', 'Jina AI', 'Google Cloud', 'Brandfetch']}>
<Tab>
**Firecrawl** - 网页抓取、爬取、搜索和提取
| Operation | Cost |
|-----------|------|
| Scrape | $0.001 per credit used |
| Crawl | $0.001 per credit used |
| Search | $0.001 per credit used |
| Extract | $0.001 per credit used |
| Map | $0.001 per credit used |
</Tab>
<Tab>
**Exa** - AI 驱动的搜索和研究
| Operation | Cost |
|-----------|------|
| Search | Dynamic (returned by API) |
| Get Contents | Dynamic (returned by API) |
| Find Similar Links | Dynamic (returned by API) |
| Answer | Dynamic (returned by API) |
</Tab>
<Tab>
**Serper** - Google 搜索 API
| Operation | Cost |
|-----------|------|
| Search (≤10 results) | $0.001 |
| Search (>10 results) | $0.002 |
</Tab>
<Tab>
**Perplexity** - AI 驱动的聊天和网络搜索
| Operation | Cost |
|-----------|------|
| Search | $0.005 per request |
| Chat | Token-based (varies by model) |
</Tab>
<Tab>
**Linkup** - 网络搜索和内容检索
| Operation | Cost |
|-----------|------|
| Standard search | ~$0.006 |
| Deep search | ~$0.055 |
</Tab>
<Tab>
**Parallel AI** - 网络搜索、提取和深度研究
| Operation | Cost |
|-----------|------|
| Search (≤10 results) | $0.005 |
| Search (>10 results) | $0.005 + $0.001 per additional result |
| Extract | $0.001 per URL |
| Deep Research | $0.005$2.40 (varies by processor tier) |
</Tab>
<Tab>
**Jina AI** - 网页阅读和搜索
| Operation | Cost |
|-----------|------|
| Read URL | $0.20 per 1M tokens |
| Search | $0.20 per 1M tokens (minimum 10K tokens) |
</Tab>
<Tab>
**Google Cloud** - Translate、Maps、PageSpeed 和 Books API
| Operation | Cost |
|-----------|------|
| Translate / Detect | $0.00002 per character |
| Maps (Geocode, Directions, Distance Matrix, Elevation, Timezone, Reverse Geocode, Geolocate, Validate Address) | $0.005 per request |
| Maps (Snap to Roads) | $0.01 per request |
| Maps (Place Details) | $0.017 per request |
| Maps (Places Search) | $0.032 per request |
| PageSpeed | Free |
| Books (Search, Details) | Free |
</Tab>
<Tab>
**Brandfetch** - 品牌资产、标志、颜色和公司信息
| Operation | Cost |
|-----------|------|
| Search | Free |
| Get Brand | $0.04 per request |
</Tab>
</Tabs>
## 自带密钥BYOK
你可以在 **设置 → BYOK** 中为托管模型OpenAI、Anthropic、Google、Mistral使用你自己的 API 密钥,以按基础价格计费。密钥会被加密,并在整个工作区范围内生效。
你可以在 **设置 → BYOK** 中为支持的服务商OpenAI、Anthropic、Google、Mistral、Fireworks、Firecrawl、Exa、Serper、Linkup、Parallel AI、Perplexity、Jina AI、Google Cloud、Brandfetch)使用你自己的 API 密钥,以按基础价格计费。密钥会被加密,并在整个工作区范围内生效。
## 成本优化策略

View File

@@ -161,7 +161,7 @@ export default async function Page({ params }: { params: Promise<{ slug: string
<h3 className='font-[430] font-season text-lg text-white leading-tight tracking-[-0.01em]'>
{p.title}
</h3>
<p className='line-clamp-2 text-[#F6F6F0]/50 text-sm leading-[150%]'>
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
{p.description}
</p>
</div>

View File

@@ -110,7 +110,7 @@ export default async function BlogIndex({
<h1 className='text-balance font-[430] font-season text-[28px] text-white leading-[100%] tracking-[-0.02em] lg:text-[40px]'>
Latest from Sim
</h1>
<p className='max-w-[360px] font-[430] font-season text-[#F6F6F0]/50 text-sm leading-[150%] tracking-[0.02em] lg:text-base'>
<p className='max-w-[540px] font-[430] font-season text-[var(--landing-text-muted)] text-sm leading-[150%] tracking-[0.02em] lg:text-base'>
Announcements, insights, and guides for building AI agent workflows.
</p>
</div>
@@ -152,7 +152,7 @@ export default async function BlogIndex({
<h3 className='font-[430] font-season text-lg text-white leading-tight tracking-[-0.01em]'>
{p.title}
</h3>
<p className='line-clamp-2 text-[#F6F6F0]/50 text-sm leading-[150%]'>
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
{p.description}
</p>
</div>
@@ -191,7 +191,7 @@ export default async function BlogIndex({
<h3 className='font-[430] font-season text-base text-white leading-tight tracking-[-0.01em] lg:text-lg'>
{p.title}
</h3>
<p className='line-clamp-2 text-[#F6F6F0]/40 text-sm leading-[150%]'>
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
{p.description}
</p>
</div>

View File

@@ -63,10 +63,8 @@ const INTEGRATION_LINKS: FooterItem[] = [
{ label: 'Linear', href: 'https://docs.sim.ai/tools/linear', external: true },
{ label: 'Airtable', href: 'https://docs.sim.ai/tools/airtable', external: true },
{ label: 'Firecrawl', href: 'https://docs.sim.ai/tools/firecrawl', external: true },
{ label: 'Pinecone', href: 'https://docs.sim.ai/tools/pinecone', external: true },
{ label: 'Discord', href: 'https://docs.sim.ai/tools/discord', external: true },
{ label: 'Microsoft Teams', href: 'https://docs.sim.ai/tools/microsoft_teams', external: true },
{ label: 'Outlook', href: 'https://docs.sim.ai/tools/outlook', external: true },
{ label: 'Telegram', href: 'https://docs.sim.ai/tools/telegram', external: true },
]

View File

@@ -1,6 +1,7 @@
'use client'
import { useState } from 'react'
import { AnimatePresence, motion } from 'framer-motion'
import { ChevronDown } from '@/components/emcn'
import { cn } from '@/lib/core/utils/cn'
@@ -15,46 +16,67 @@ interface LandingFAQProps {
export function LandingFAQ({ faqs }: LandingFAQProps) {
const [openIndex, setOpenIndex] = useState<number | null>(0)
const [hoveredIndex, setHoveredIndex] = useState<number | null>(null)
return (
<div className='divide-y divide-[var(--landing-border)]'>
<div>
{faqs.map(({ question, answer }, index) => {
const isOpen = openIndex === index
const isHovered = hoveredIndex === index
const showDivider = index > 0 && hoveredIndex !== index && hoveredIndex !== index - 1
return (
<div key={question}>
<div
className={cn(
'h-px w-full bg-[var(--landing-bg-elevated)]',
index === 0 || !showDivider ? 'invisible' : 'visible'
)}
/>
<button
type='button'
onClick={() => setOpenIndex(isOpen ? null : index)}
className='flex w-full items-start justify-between gap-4 py-5 text-left'
onMouseEnter={() => setHoveredIndex(index)}
onMouseLeave={() => setHoveredIndex(null)}
className='-mx-6 flex w-[calc(100%+3rem)] items-center justify-between gap-4 px-6 py-4 text-left transition-colors hover:bg-[var(--landing-bg-elevated)]'
aria-expanded={isOpen}
>
<span
className={cn(
'font-[500] text-[15px] leading-snug transition-colors',
'text-[15px] leading-snug tracking-[-0.02em] transition-colors',
isOpen
? 'text-[var(--landing-text)]'
: 'text-[var(--landing-text-muted)] hover:text-[var(--landing-text)]'
: 'text-[var(--landing-text-body)] hover:text-[var(--landing-text)]'
)}
>
{question}
</span>
<ChevronDown
className={cn(
'mt-0.5 h-4 w-4 shrink-0 text-[#555] transition-transform duration-200',
'h-3 w-3 shrink-0 text-[var(--landing-text-subtle)] transition-transform duration-200',
isOpen ? 'rotate-180' : 'rotate-0'
)}
aria-hidden='true'
/>
</button>
{isOpen && (
<div className='pb-5'>
<p className='text-[14px] text-[var(--landing-text-muted)] leading-[1.75]'>
{answer}
</p>
</div>
)}
<AnimatePresence initial={false}>
{isOpen && (
<motion.div
initial={{ height: 0, opacity: 0 }}
animate={{ height: 'auto', opacity: 1 }}
exit={{ height: 0, opacity: 0 }}
transition={{ duration: 0.25, ease: [0.4, 0, 0.2, 1] }}
className='overflow-hidden'
>
<div className='pt-2 pb-4'>
<p className='text-[14px] text-[var(--landing-text-body)] leading-[1.75]'>
{answer}
</p>
</div>
</motion.div>
)}
</AnimatePresence>
</div>
)
})}

View File

@@ -0,0 +1,149 @@
import type { ComponentType, SVGProps } from 'react'
import Link from 'next/link'
import {
AgentIcon,
ApiIcon,
McpIcon,
PackageSearchIcon,
TableIcon,
WorkflowIcon,
} from '@/components/icons'
interface ProductLink {
label: string
description: string
href: string
external?: boolean
icon: ComponentType<SVGProps<SVGSVGElement>>
}
interface SidebarLink {
label: string
href: string
external?: boolean
}
const PLATFORM: ProductLink[] = [
{
label: 'Workflows',
description: 'Visual AI automation builder',
href: 'https://docs.sim.ai/getting-started',
external: true,
icon: WorkflowIcon,
},
{
label: 'Agent',
description: 'Build autonomous AI agents',
href: 'https://docs.sim.ai/blocks/agent',
external: true,
icon: AgentIcon,
},
{
label: 'MCP',
description: 'Connect external tools',
href: 'https://docs.sim.ai/mcp',
external: true,
icon: McpIcon,
},
{
label: 'Knowledge Base',
description: 'Retrieval-augmented context',
href: 'https://docs.sim.ai/knowledgebase',
external: true,
icon: PackageSearchIcon,
},
{
label: 'Tables',
description: 'Structured data storage',
href: 'https://docs.sim.ai/tables',
external: true,
icon: TableIcon,
},
{
label: 'API',
description: 'Deploy workflows as endpoints',
href: 'https://docs.sim.ai/api-reference/getting-started',
external: true,
icon: ApiIcon,
},
]
const EXPLORE: SidebarLink[] = [
{ label: 'Models', href: '/models' },
{ label: 'Integrations', href: '/integrations' },
{ label: 'Changelog', href: '/changelog' },
{ label: 'Self-hosting', href: 'https://docs.sim.ai/self-hosting', external: true },
]
function DropdownLink({ link }: { link: ProductLink }) {
const Icon = link.icon
const Tag = link.external ? 'a' : Link
const props = link.external
? { href: link.href, target: '_blank' as const, rel: 'noopener noreferrer' }
: { href: link.href }
return (
<Tag
{...props}
className='group/item flex items-start gap-2.5 rounded-[5px] px-2.5 py-2 transition-colors hover:bg-[var(--landing-bg-elevated)]'
>
<Icon className='mt-0.5 h-[15px] w-[15px] shrink-0 text-[var(--landing-text-icon)]' />
<div className='flex flex-col'>
<span className='font-[430] font-season text-[13px] text-white leading-tight'>
{link.label}
</span>
<span className='font-season text-[12px] text-[var(--landing-text-subtle)] leading-[150%]'>
{link.description}
</span>
</div>
</Tag>
)
}
export function ProductDropdown() {
return (
<div className='flex w-[560px] rounded-[5px] border border-[var(--landing-bg-elevated)] bg-[var(--landing-bg)] shadow-overlay'>
<div className='flex-1 p-2'>
<div className='mb-1 px-2.5 pt-1'>
<span className='font-[430] font-season text-[11px] text-[var(--landing-text-subtle)] uppercase tracking-[0.08em]'>
Platform
</span>
<div className='mt-1.5 h-px bg-[var(--landing-bg-elevated)]' />
</div>
<div className='grid grid-cols-2'>
{PLATFORM.map((link) => (
<DropdownLink key={link.label} link={link} />
))}
</div>
</div>
<div className='w-px self-stretch bg-[var(--landing-bg-elevated)]' />
<div className='w-[160px] p-2'>
<div className='mb-1 px-2.5 pt-1'>
<span className='font-[430] font-season text-[11px] text-[var(--landing-text-subtle)] uppercase tracking-[0.08em]'>
Explore
</span>
<div className='mt-1.5 h-px bg-[var(--landing-bg-elevated)]' />
</div>
{EXPLORE.map((link) => {
const Tag = link.external ? 'a' : Link
const props = link.external
? { href: link.href, target: '_blank' as const, rel: 'noopener noreferrer' }
: { href: link.href }
return (
<Tag
key={link.label}
{...props}
className='block rounded-[5px] px-2.5 py-1.5 font-[430] font-season text-[13px] text-white transition-colors hover:bg-[var(--landing-bg-elevated)]'
>
{link.label}
</Tag>
)
})}
</div>
</div>
)
}

View File

@@ -2,13 +2,15 @@
import { useRouter } from 'next/navigation'
import { LandingPromptStorage } from '@/lib/core/utils/browser-storage'
import { cn } from '@/lib/core/utils/cn'
interface TemplateCardButtonProps {
prompt: string
className?: string
children: React.ReactNode
}
export function TemplateCardButton({ prompt, children }: TemplateCardButtonProps) {
export function TemplateCardButton({ prompt, className, children }: TemplateCardButtonProps) {
const router = useRouter()
function handleClick() {
@@ -17,11 +19,7 @@ export function TemplateCardButton({ prompt, children }: TemplateCardButtonProps
}
return (
<button
type='button'
onClick={handleClick}
className='group flex w-full flex-col items-start rounded-lg border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-5 text-left transition-colors hover:border-[var(--landing-border-strong)] hover:bg-[var(--landing-bg-elevated)]'
>
<button type='button' onClick={handleClick} className={cn('w-full text-left', className)}>
{children}
</button>
)

View File

@@ -283,7 +283,7 @@ export default async function IntegrationPage({ params }: { params: Promise<{ sl
}
return (
<>
<section className='bg-[var(--landing-bg)]'>
<script
type='application/ld+json'
dangerouslySetInnerHTML={{ __html: JSON.stringify(breadcrumbJsonLd) }}
@@ -301,440 +301,434 @@ export default async function IntegrationPage({ params }: { params: Promise<{ sl
dangerouslySetInnerHTML={{ __html: JSON.stringify(faqJsonLd) }}
/>
<div className='mx-auto max-w-[1200px] px-6 py-12 sm:px-8 md:px-12'>
{/* Breadcrumb */}
<nav
aria-label='Breadcrumb'
className='mb-10 flex items-center gap-2 text-[#555] text-[13px]'
>
<Link href='/' className='transition-colors hover:text-[var(--landing-text-muted)]'>
Home
</Link>
<span aria-hidden='true'>/</span>
{/* Hero */}
<div className='px-5 pt-[60px] lg:px-16 lg:pt-[100px]'>
<div className='mb-6'>
<Link
href='/integrations'
className='transition-colors hover:text-[var(--landing-text-muted)]'
className='group/link inline-flex items-center gap-1.5 font-season text-[var(--landing-text-muted)] text-sm tracking-[0.02em] hover:text-[var(--landing-text)]'
>
Integrations
</Link>
<span aria-hidden='true'>/</span>
<span className='text-[var(--landing-text-muted)]'>{name}</span>
</nav>
{/* Hero */}
<section aria-labelledby='integration-heading' className='mb-16'>
<div className='mb-6 flex items-center gap-5'>
<IntegrationIcon
bgColor={bgColor}
name={name}
Icon={IconComponent}
className='h-16 w-16 rounded-xl'
iconClassName='h-8 w-8'
fallbackClassName='text-[26px]'
<svg
className='h-3 w-3 shrink-0'
viewBox='0 0 10 10'
fill='none'
xmlns='http://www.w3.org/2000/svg'
aria-hidden='true'
/>
<div>
<p className='mb-0.5 text-[#555] text-[12px]'>Integration</p>
<h1
id='integration-heading'
className='font-[500] text-[36px] text-[var(--landing-text)] leading-tight sm:text-[44px]'
>
{name}
</h1>
</div>
</div>
<p className='mb-8 max-w-[700px] text-[17px] text-[var(--landing-text-muted)] leading-[1.7]'>
{description}
</p>
{/* CTAs */}
<div className='flex flex-wrap gap-2'>
<a
href='https://sim.ai'
className='inline-flex h-[32px] items-center rounded-[5px] border border-[var(--white)] bg-[var(--white)] px-2.5 font-[430] font-season text-[14px] text-[var(--landing-text-dark)] transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
>
Start building free
</a>
<a
href={docsUrl}
target='_blank'
rel='noopener noreferrer'
className='inline-flex h-[32px] items-center gap-1.5 rounded-[5px] border border-[var(--landing-border-strong)] px-2.5 font-[430] font-season text-[14px] text-[var(--landing-text)] transition-colors hover:bg-[var(--landing-bg-elevated)]'
>
View docs
<svg
aria-hidden='true'
className='h-3 w-3'
fill='none'
<line
x1='1'
y1='5'
x2='10'
y2='5'
stroke='currentColor'
strokeWidth={2}
viewBox='0 0 24 24'
>
<path d='M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6' />
<polyline points='15 3 21 3 21 9' />
<line x1='10' x2='21' y1='14' y2='3' />
</svg>
</a>
strokeWidth='1.33'
strokeLinecap='square'
className='origin-right scale-x-0 transition-transform duration-200 ease-out [transform-box:fill-box] group-hover/link:scale-x-100'
/>
<path
d='M6.5 2L3.5 5L6.5 8'
stroke='currentColor'
strokeWidth='1.33'
strokeLinecap='square'
strokeLinejoin='miter'
fill='none'
className='group-hover/link:-translate-x-[30%] transition-transform duration-200 ease-out'
/>
</svg>
Back to Integrations
</Link>
</div>
{/* Hero content */}
<div className='mb-6 flex items-center gap-5'>
<IntegrationIcon
bgColor={bgColor}
name={name}
Icon={IconComponent}
className='h-12 w-12 rounded-[5px]'
iconClassName='h-6 w-6'
fallbackClassName='text-[20px]'
aria-hidden='true'
/>
<div>
<h1
id='integration-heading'
className='text-[28px] text-white leading-[100%] tracking-[-0.02em] sm:text-[36px] lg:text-[44px]'
>
{name}
</h1>
</div>
</div>
<p className='mb-8 max-w-[700px] text-[var(--landing-text-body)] text-base leading-[150%] tracking-[0.02em]'>
{description}
</p>
{/* CTAs */}
<div className='flex flex-wrap gap-2'>
<Link
href='/signup'
className='inline-flex h-[32px] items-center gap-2 rounded-[5px] border border-white bg-white px-2.5 font-season text-black text-sm transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
>
Start building free
</Link>
<a
href={docsUrl}
target='_blank'
rel='noopener noreferrer'
className='group/link inline-flex h-[32px] items-center gap-1.5 rounded-[5px] border border-[var(--landing-border-strong)] px-2.5 font-season text-[var(--landing-text)] text-sm transition-colors hover:bg-[var(--landing-bg-elevated)]'
>
View docs
<svg
aria-hidden='true'
className='-rotate-45 h-3 w-3 shrink-0'
viewBox='0 0 10 10'
fill='none'
>
<line
x1='0'
y1='5'
x2='9'
y2='5'
stroke='currentColor'
strokeWidth='1.33'
strokeLinecap='square'
className='origin-left scale-x-0 transition-transform duration-200 ease-out [transform-box:fill-box] group-hover/link:scale-x-100'
/>
<path
d='M3.5 2L6.5 5L3.5 8'
stroke='currentColor'
strokeWidth='1.33'
strokeLinecap='square'
strokeLinejoin='miter'
fill='none'
className='transition-transform duration-200 ease-out group-hover/link:translate-x-[30%]'
/>
</svg>
</a>
</div>
</div>
{/* Full-width divider */}
<div className='mt-8 h-px w-full bg-[var(--landing-bg-elevated)]' />
{/* Border-railed content */}
<div className='mx-5 border-[var(--landing-bg-elevated)] border-x lg:mx-16'>
{/* Overview */}
{longDescription && (
<>
<section aria-labelledby='overview-heading' className='px-6 py-10'>
<h2
id='overview-heading'
className='mb-4 text-[20px] text-white leading-[100%] tracking-[-0.02em]'
>
Overview
</h2>
<p className='text-[15px] text-[var(--landing-text-body)] leading-[150%] tracking-[0.02em]'>
{longDescription}
</p>
</section>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
</>
)}
{/* How to automate */}
<section aria-labelledby='how-it-works-heading' className='px-6 py-10'>
<h2
id='how-it-works-heading'
className='mb-6 text-[20px] text-white leading-[100%] tracking-[-0.02em]'
>
How to automate {name} with Sim
</h2>
<ol className='space-y-4' aria-label='Steps to set up automation'>
{[
{
step: '01',
title: 'Create a free account',
body: 'Sign up at sim.ai in seconds. No credit card required. Your workspace is ready immediately.',
},
{
step: '02',
title: `Add a ${name} block`,
body:
authType === 'oauth'
? `Open a workflow, drag a ${name} block onto the canvas, and connect your account with one-click OAuth.`
: authType === 'api-key'
? `Open a workflow, drag a ${name} block onto the canvas, and paste in your ${name} API key.`
: `Open a workflow, drag a ${name} block onto the canvas, and authenticate your account.`,
},
{
step: '03',
title: 'Configure, connect, and run',
body: `Pick the tool you need, wire in an AI agent for reasoning or data transformation, and run. Your ${name} automation is live.`,
},
].map(({ step, title, body }) => (
<li key={step} className='flex gap-4'>
<span
className='mt-0.5 flex h-7 w-7 shrink-0 items-center justify-center rounded-full border border-[var(--landing-border-strong)] font-martian-mono text-[11px] text-[var(--landing-text-subtle)]'
aria-hidden='true'
>
{step}
</span>
<div>
<h3 className='mb-1 text-[15px] text-white tracking-[-0.02em]'>{title}</h3>
<p className='text-[14px] text-[var(--landing-text-body)] leading-[150%] tracking-[0.02em]'>
{body}
</p>
</div>
</li>
))}
</ol>
</section>
{/* Two-column layout */}
<div className='grid grid-cols-1 gap-16 lg:grid-cols-[1fr_300px]'>
{/* Main column */}
<div className='min-w-0 space-y-16'>
{/* Overview */}
{longDescription && (
<section aria-labelledby='overview-heading'>
<h2
id='overview-heading'
className='mb-4 font-[500] text-[20px] text-[var(--landing-text)]'
>
Overview
</h2>
<p className='text-[15px] text-[var(--landing-text-muted)] leading-[1.8]'>
{longDescription}
</p>
</section>
)}
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
{/* How to automate — targets "how to connect X" queries */}
<section aria-labelledby='how-it-works-heading'>
<h2
id='how-it-works-heading'
className='mb-6 font-[500] text-[20px] text-[var(--landing-text)]'
>
How to automate {name} with Sim
</h2>
<ol className='space-y-4' aria-label='Steps to set up automation'>
{[
{
step: '01',
title: 'Create a free account',
body: 'Sign up at sim.ai in seconds. No credit card required. Your workspace is ready immediately.',
},
{
step: '02',
title: `Add a ${name} block`,
body:
authType === 'oauth'
? `Open a workflow, drag a ${name} block onto the canvas, and connect your account with one-click OAuth.`
: authType === 'api-key'
? `Open a workflow, drag a ${name} block onto the canvas, and paste in your ${name} API key.`
: `Open a workflow, drag a ${name} block onto the canvas, and authenticate your account.`,
},
{
step: '03',
title: 'Configure, connect, and run',
body: `Pick the tool you need, wire in an AI agent for reasoning or data transformation, and run. Your ${name} automation is live.`,
},
].map(({ step, title, body }) => (
<li key={step} className='flex gap-4'>
<span
className='mt-0.5 flex h-7 w-7 shrink-0 items-center justify-center rounded-full border border-[var(--landing-border-strong)] font-[500] text-[#555] text-[11px]'
aria-hidden='true'
>
{step}
</span>
<div>
<h3 className='mb-1 font-[500] text-[15px] text-[var(--landing-text)]'>
{title}
</h3>
<p className='text-[14px] text-[var(--landing-text-muted)] leading-relaxed'>
{body}
</p>
</div>
</li>
))}
</ol>
</section>
{/* Triggers */}
{triggers.length > 0 && (
<section aria-labelledby='triggers-heading'>
{/* Triggers — rows */}
{triggers.length > 0 && (
<section aria-labelledby='triggers-heading'>
<div className='px-6 pt-10 pb-4'>
<div className='mb-2 flex items-center gap-2.5'>
<span className='relative flex h-2 w-2' aria-hidden='true'>
<span className='absolute inline-flex h-full w-full animate-ping rounded-full bg-emerald-400 opacity-75' />
<span className='relative inline-flex h-2 w-2 rounded-full bg-emerald-500' />
</span>
<h2
id='triggers-heading'
className='mb-2 font-[500] text-[20px] text-[var(--landing-text)]'
className='text-[20px] text-white leading-[100%] tracking-[-0.02em]'
>
Real-time triggers
</h2>
<p className='mb-4 text-[14px] text-[var(--landing-text-muted)] leading-relaxed'>
Connect a {name} webhook to Sim and your workflow fires the instant an event
happens no polling, no delay. Sim receives the full event payload and makes
every field available as a variable inside your workflow.
</p>
{/* Event cards */}
<ul
className='grid grid-cols-1 gap-3 sm:grid-cols-2'
aria-label={`${name} trigger events`}
>
{triggers.map((trigger) => (
<li
key={trigger.id}
className='rounded-lg border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-4'
>
<div className='mb-2 flex items-center gap-2'>
<span className='inline-flex items-center gap-1 rounded-[4px] bg-[var(--landing-bg-elevated)] px-1.5 py-0.5 font-[500] text-[11px] text-[var(--landing-text)]'>
<svg
aria-hidden='true'
className='h-2.5 w-2.5'
fill='none'
stroke='currentColor'
strokeWidth={2.5}
viewBox='0 0 24 24'
>
<polygon points='13 2 3 14 12 14 11 22 21 10 12 10 13 2' />
</svg>
Event
</span>
</div>
<p className='font-[500] text-[13px] text-[var(--landing-text)]'>
{trigger.name}
</div>
<p className='text-[14px] text-[var(--landing-text-body)] leading-[150%] tracking-[0.02em]'>
Connect a {name} webhook to Sim and your workflow fires the instant an event happens
no polling, no delay.
</p>
</div>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
{triggers.map((trigger) => (
<div key={trigger.id}>
<div className='flex items-start gap-4 px-6 py-4'>
<div className='flex min-w-0 flex-1 flex-col gap-0.5'>
<p className='text-[14px] text-white leading-snug tracking-[-0.02em]'>
{trigger.name}
</p>
{trigger.description && (
<p className='text-[12px] text-[var(--landing-text-muted)] leading-[150%]'>
{trigger.description}
</p>
{trigger.description && (
<p className='mt-1 text-[12px] text-[var(--landing-text-muted)] leading-relaxed'>
{trigger.description}
</p>
)}
</div>
</div>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
</div>
))}
</section>
)}
{/* Workflow templates — horizontal cards */}
{matchingTemplates.length > 0 && (
<section aria-labelledby='templates-heading'>
<div className='px-6 pt-10 pb-4'>
<h2
id='templates-heading'
className='mb-2 text-[20px] text-white leading-[100%] tracking-[-0.02em]'
>
Workflow templates
</h2>
<p className='text-[14px] text-[var(--landing-text-body)] tracking-[0.02em]'>
Ready-to-use workflows featuring {name}. Click any to build it instantly.
</p>
</div>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
{(() => {
const isOdd = matchingTemplates.length % 2 === 1
const pairedTemplates = isOdd ? matchingTemplates.slice(0, -1) : matchingTemplates
const lastTemplate = isOdd ? matchingTemplates[matchingTemplates.length - 1] : null
const resolveTypes = (template: (typeof matchingTemplates)[number]) => [
integration.type,
...template.integrationBlockTypes.filter((bt) => bt !== integration.type),
]
const renderIcons = (allTypes: string[]) =>
allTypes.map((bt, idx) => {
const resolvedBt = byType.get(bt)
? bt
: byType.get(`${bt}_v2`)
? `${bt}_v2`
: byType.get(`${bt}_v3`)
? `${bt}_v3`
: bt
const int = byType.get(resolvedBt)
const ToolIcon = blockTypeToIconMap[resolvedBt]
return (
<span key={bt} className='inline-flex items-center gap-1.5'>
{idx > 0 && (
<span className='text-[#555] text-[11px]' aria-hidden='true'>
</span>
)}
</li>
))}
</ul>
</section>
)}
{/* Workflow templates */}
{matchingTemplates.length > 0 && (
<section aria-labelledby='templates-heading'>
<h2
id='templates-heading'
className='mb-2 font-[500] text-[20px] text-[var(--landing-text)]'
>
Workflow templates
</h2>
<p className='mb-6 text-[14px] text-[var(--landing-text-muted)]'>
Ready-to-use workflows featuring {name}. Click any to build it instantly.
</p>
<ul
className='grid grid-cols-1 gap-4 sm:grid-cols-2'
aria-label='Workflow templates'
>
{matchingTemplates.map((template) => {
const allTypes = [
integration.type,
...template.integrationBlockTypes.filter((bt) => bt !== integration.type),
]
<IntegrationIcon
bgColor={int?.bgColor ?? '#333'}
name={int?.name ?? bt}
Icon={ToolIcon}
as='span'
className='h-6 w-6 rounded-[4px]'
iconClassName='h-3.5 w-3.5'
fallbackClassName='text-[10px]'
aria-hidden='true'
/>
</span>
)
})
return (
<>
{/* Paired rows of 2 */}
{Array.from({ length: Math.ceil(pairedTemplates.length / 2) }, (_, rowIdx) => {
const row = pairedTemplates.slice(rowIdx * 2, rowIdx * 2 + 2)
return (
<li key={template.title}>
<TemplateCardButton prompt={template.prompt}>
{/* Integration pills row */}
<div className='mb-3 flex flex-wrap items-center gap-1.5 text-[12px]'>
{allTypes.map((bt, idx) => {
// Templates may use unversioned keys (e.g. "notion") while the
// icon map has versioned keys ("notion_v2") — fall back to _v2.
const resolvedBt = byType.get(bt)
? bt
: byType.get(`${bt}_v2`)
? `${bt}_v2`
: bt
const int = byType.get(resolvedBt)
const intName = int?.name ?? bt
return (
<span key={bt} className='inline-flex items-center gap-1.5'>
{idx > 0 && (
<span className='text-[#555]' aria-hidden='true'>
</span>
)}
<span className='inline-flex items-center gap-1 rounded-[3px] bg-[var(--landing-bg-elevated)] px-1.5 py-0.5 font-[500] text-[var(--landing-text)]'>
<IntegrationIcon
bgColor={int?.bgColor ?? '#6B7280'}
name={intName}
Icon={blockTypeToIconMap[resolvedBt]}
as='span'
className='h-3.5 w-3.5 rounded-[2px]'
iconClassName='h-2.5 w-2.5'
aria-hidden='true'
/>
{intName}
</span>
</span>
)
})}
</div>
<p className='mb-1 font-[500] text-[14px] text-[var(--landing-text)]'>
{template.title}
</p>
<p className='mt-3 text-[#555] text-[13px] transition-colors group-hover:text-[var(--landing-text-muted)]'>
Try this workflow
</p>
</TemplateCardButton>
</li>
<div key={rowIdx}>
<nav
aria-label={`Template row ${rowIdx + 1}`}
className='flex flex-col sm:flex-row'
>
{row.map((template) => (
<TemplateCardButton
key={template.title}
prompt={template.prompt}
className='group flex flex-1 flex-col gap-4 border-[var(--landing-bg-elevated)] border-t p-6 transition-colors first:border-t-0 hover:bg-[var(--landing-bg-elevated)] sm:border-t-0 sm:border-l sm:first:border-l-0'
>
<div className='flex items-center gap-1.5'>
{renderIcons(resolveTypes(template))}
</div>
<div className='flex flex-col gap-2'>
<h3 className='text-[14px] text-white leading-snug tracking-[-0.02em]'>
{template.title}
</h3>
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
{template.prompt}
</p>
</div>
</TemplateCardButton>
))}
</nav>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
</div>
)
})}
</ul>
</section>
)}
{/* Tools */}
{operations.length > 0 && (
<section aria-labelledby='tools-heading'>
<h2
id='tools-heading'
className='mb-2 font-[500] text-[20px] text-[var(--landing-text)]'
>
Supported tools
</h2>
<p className='mb-6 text-[14px] text-[var(--landing-text-muted)]'>
{operations.length} {name} tool{operations.length === 1 ? '' : 's'} available in
Sim
</p>
<ul
className='grid grid-cols-1 gap-2 sm:grid-cols-2'
aria-label={`${name} supported tools`}
>
{operations.map((op) => (
<li
key={op.name}
className='rounded-[6px] border border-[var(--landing-border)] bg-[var(--landing-bg-card)] px-3.5 py-3'
>
<p className='font-[500] text-[13px] text-[var(--landing-text)]'>{op.name}</p>
{op.description && (
<p className='mt-0.5 text-[#555] text-[12px] leading-relaxed'>
{op.description}
</p>
)}
</li>
))}
</ul>
</section>
)}
{/* FAQ */}
<section aria-labelledby='faq-heading'>
<h2
id='faq-heading'
className='mb-8 font-[500] text-[20px] text-[var(--landing-text)]'
>
Frequently asked questions
</h2>
<IntegrationFAQ faqs={faqs} />
</section>
</div>
{/* Sidebar */}
<aside className='space-y-5' aria-label='Integration details'>
{/* Quick details */}
<div className='rounded-lg border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-5'>
<h3 className='mb-4 font-[500] text-[14px] text-[var(--landing-text)]'>Details</h3>
<dl className='space-y-3 text-[13px]'>
{operations.length > 0 && (
<div>
<dt className='text-[#555]'>Tools</dt>
<dd className='text-[var(--landing-text)]'>{operations.length} supported</dd>
</div>
)}
{triggers.length > 0 && (
<div>
<dt className='text-[#555]'>Triggers</dt>
<dd className='text-[var(--landing-text)]'>{triggers.length} available</dd>
</div>
)}
<div>
<dt className='text-[#555]'>Auth</dt>
<dd className='text-[var(--landing-text)]'>
{authType === 'oauth'
? 'One-click OAuth'
: authType === 'api-key'
? 'API key'
: 'None required'}
</dd>
</div>
<div>
<dt className='text-[#555]'>Pricing</dt>
<dd className='text-[var(--landing-text)]'>Free to start</dd>
</div>
</dl>
<div className='mt-5 flex flex-col gap-2'>
<a
href='https://sim.ai'
className='flex h-[32px] w-full items-center justify-center rounded-[5px] border border-[var(--white)] bg-[var(--white)] font-[430] font-season text-[13px] text-[var(--landing-text-dark)] transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
>
Get started free
</a>
<a
href={docsUrl}
target='_blank'
rel='noopener noreferrer'
className='flex h-[32px] w-full items-center justify-center gap-1.5 rounded-[5px] border border-[var(--landing-border-strong)] font-[430] font-season text-[13px] text-[var(--landing-text)] transition-colors hover:bg-[var(--landing-bg-elevated)]'
>
View docs
<svg
aria-hidden='true'
className='h-3 w-3'
fill='none'
stroke='currentColor'
strokeWidth={2}
viewBox='0 0 24 24'
>
<path d='M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6' />
<polyline points='15 3 21 3 21 9' />
<line x1='10' x2='21' y1='14' y2='3' />
</svg>
</a>
</div>
</div>
{/* Related integrations — internal linking for SEO */}
<div className='rounded-lg border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-5'>
{relatedIntegrations.length > 0 && (
<>
<h3 className='mb-4 font-[500] text-[14px] text-[var(--landing-text)]'>
Related integrations
</h3>
<ul className='space-y-2'>
{relatedIntegrations.map((rel) => (
<li key={rel.slug}>
<Link
href={`/integrations/${rel.slug}`}
className='flex items-center gap-2.5 rounded-[6px] p-1.5 text-[13px] text-[var(--landing-text-muted)] transition-colors hover:bg-[var(--landing-bg-elevated)] hover:text-[var(--landing-text)]'
>
<IntegrationIcon
bgColor={rel.bgColor}
name={rel.name}
Icon={blockTypeToIconMap[rel.type]}
as='span'
className='h-6 w-6 rounded-[4px]'
iconClassName='h-3.5 w-3.5'
fallbackClassName='text-[10px]'
aria-hidden='true'
/>
{rel.name}
</Link>
</li>
))}
</ul>
{/* Last template as a full-width row when odd */}
{lastTemplate && (
<>
<TemplateCardButton
prompt={lastTemplate.prompt}
className='group/link flex items-center gap-4 px-6 py-4 transition-colors hover:bg-[var(--landing-bg-elevated)]'
>
<div className='flex items-center gap-1.5'>
{renderIcons(resolveTypes(lastTemplate))}
</div>
<div className='flex min-w-0 flex-1 flex-col gap-0.5'>
<h3 className='text-[14px] text-white leading-snug tracking-[-0.02em]'>
{lastTemplate.title}
</h3>
<p className='line-clamp-1 text-[12px] text-[var(--landing-text-muted)] leading-[150%]'>
{lastTemplate.prompt}
</p>
</div>
</TemplateCardButton>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
</>
)}
</>
)}
<Link
href='/integrations'
className={`block text-[#555] text-[12px] transition-colors hover:text-[var(--landing-text-muted)]${relatedIntegrations.length > 0 ? ' mt-4' : ''}`}
)
})()}
</section>
)}
{/* Supported tools — rows */}
{operations.length > 0 && (
<section aria-labelledby='tools-heading'>
<div className='px-6 pt-10 pb-4'>
<h2
id='tools-heading'
className='mb-2 text-[20px] text-white leading-[100%] tracking-[-0.02em]'
>
All integrations
</Link>
Supported tools
</h2>
<p className='text-[14px] text-[var(--landing-text-body)] tracking-[0.02em]'>
{operations.length} {name} tool{operations.length === 1 ? '' : 's'} available in Sim
</p>
</div>
</aside>
</div>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
{operations.map((op) => (
<div key={op.name}>
<div className='flex items-start gap-4 px-6 py-4'>
<div className='flex min-w-0 flex-1 flex-col gap-0.5'>
<p className='text-[14px] text-white leading-snug tracking-[-0.02em]'>
{op.name}
</p>
{op.description && (
<p className='text-[12px] text-[var(--landing-text-muted)] leading-[150%]'>
{op.description}
</p>
)}
</div>
</div>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
</div>
))}
</section>
)}
{/* FAQ — full width */}
<section aria-labelledby='faq-heading' className='px-6 py-10'>
<h2
id='faq-heading'
className='mb-8 text-[20px] text-white leading-[100%] tracking-[-0.02em]'
>
Frequently asked questions
</h2>
<IntegrationFAQ faqs={faqs} />
</section>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
{/* Related integrations — horizontal cards with vertical dividers (blog featured pattern) */}
{relatedIntegrations.length > 0 && (
<>
<nav aria-label='Related integrations' className='flex flex-col sm:flex-row'>
{relatedIntegrations.slice(0, 4).map((rel) => (
<Link
key={rel.slug}
href={`/integrations/${rel.slug}`}
className='group flex flex-1 flex-col gap-4 border-[var(--landing-bg-elevated)] border-t p-6 transition-colors first:border-t-0 hover:bg-[var(--landing-bg-elevated)] sm:border-t-0 sm:border-l sm:first:border-l-0'
>
<IntegrationIcon
bgColor={rel.bgColor}
name={rel.name}
Icon={blockTypeToIconMap[rel.type]}
as='span'
className='h-10 w-10 rounded-[5px]'
aria-hidden='true'
/>
<div className='flex flex-col gap-2'>
<h3 className='text-lg text-white leading-tight tracking-[-0.01em]'>
{rel.name}
</h3>
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
{rel.description}
</p>
</div>
</Link>
))}
</nav>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
</>
)}
{/* Bottom CTA */}
<section
aria-labelledby='cta-heading'
className='mt-20 rounded-xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-8 text-center sm:p-12'
>
{/* Logo pair: Sim × Integration */}
<section aria-labelledby='cta-heading' className='px-6 py-16 text-center'>
<div className='mx-auto mb-6 flex items-center justify-center gap-3'>
<Image
src='/brandbook/logo/small.png'
@@ -776,22 +770,25 @@ export default async function IntegrationPage({ params }: { params: Promise<{ sl
</div>
<h2
id='cta-heading'
className='mb-3 font-[500] text-[28px] text-[var(--landing-text)] sm:text-[34px]'
className='mb-3 text-[28px] text-white leading-[100%] tracking-[-0.02em] sm:text-[34px]'
>
Start automating {name} today
</h2>
<p className='mx-auto mb-8 max-w-[480px] text-[16px] text-[var(--landing-text-muted)] leading-relaxed'>
<p className='mx-auto mb-8 max-w-[480px] text-[var(--landing-text-body)] text-base leading-[150%] tracking-[0.02em]'>
Build your first AI workflow with {name} in minutes. Connect to every tool your team
uses. Free to start no credit card required.
</p>
<a
href='https://sim.ai'
className='inline-flex h-[32px] items-center rounded-[5px] border border-[var(--white)] bg-[var(--white)] px-2.5 font-[430] font-season text-[14px] text-[var(--landing-text-dark)] transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
<Link
href='/signup'
className='inline-flex h-[32px] items-center gap-2 rounded-[5px] border border-white bg-white px-2.5 font-season text-black text-sm transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
>
Build for free
</a>
Build for free
</Link>
</section>
</div>
</>
{/* Closing full-width divider */}
<div className='-mt-px h-px w-full bg-[var(--landing-bg-elevated)]' />
</section>
)
}

View File

@@ -1,7 +1,7 @@
import type { ComponentType, SVGProps } from 'react'
import Link from 'next/link'
import { Badge } from '@/components/emcn'
import type { Integration } from '@/app/(landing)/integrations/data/types'
import { ChevronArrow } from '@/app/(landing)/models/components/model-primitives'
import { IntegrationIcon } from './integration-icon'
interface IntegrationCardProps {
@@ -9,49 +9,76 @@ interface IntegrationCardProps {
IconComponent?: ComponentType<SVGProps<SVGSVGElement>>
}
/**
* Featured integration card — matches blog featured post pattern.
* Used in flex rows separated by border-l dividers.
*/
export function IntegrationCard({ integration, IconComponent }: IntegrationCardProps) {
const { slug, name, description, bgColor, operationCount, triggerCount } = integration
const { slug, name, description, bgColor } = integration
return (
<Link
href={`/integrations/${slug}`}
className='group flex flex-col rounded-lg border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-4 transition-colors hover:border-[var(--landing-border-strong)] hover:bg-[var(--landing-bg-elevated)]'
aria-label={`${name} integration`}
className='group/link flex flex-1 flex-col gap-4 border-[var(--landing-bg-elevated)] border-t p-6 transition-colors first:border-t-0 hover:bg-[var(--landing-bg-elevated)] sm:border-t-0 sm:border-l sm:first:border-l-0'
>
<IntegrationIcon
bgColor={bgColor}
name={name}
Icon={IconComponent}
className='mb-3 h-10 w-10 rounded-lg'
className='h-10 w-10 rounded-[5px]'
aria-hidden='true'
/>
{/* Name */}
<h3 className='mb-1 font-[500] text-[14px] text-[var(--landing-text)] leading-snug'>
{name}
</h3>
{/* Description — clamped to 2 lines */}
<p className='mb-3 line-clamp-2 flex-1 text-[12px] text-[var(--landing-text-muted)] leading-relaxed'>
{description}
</p>
{/* Footer row */}
<div className='flex flex-wrap items-center gap-1.5'>
{operationCount > 0 && (
<Badge className='border-0 bg-[#333] text-[11px] text-[var(--landing-text-muted)]'>
{operationCount} {operationCount === 1 ? 'tool' : 'tools'}
</Badge>
)}
{triggerCount > 0 && (
<Badge className='border-0 bg-[#333] text-[11px] text-[var(--landing-text-muted)]'>
{triggerCount} {triggerCount === 1 ? 'trigger' : 'triggers'}
</Badge>
)}
<span className='ml-auto text-[#555] text-[12px] transition-colors group-hover:text-[var(--landing-text-muted)]'>
Learn more
</span>
<div className='flex flex-col gap-2'>
<h3 className='text-lg text-white leading-tight tracking-[-0.01em]'>{name}</h3>
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
{description}
</p>
</div>
</Link>
)
}
interface IntegrationRowProps {
integration: Integration
IconComponent?: ComponentType<SVGProps<SVGSVGElement>>
}
/**
* Integration list row — matches blog remaining post pattern.
* Each row followed by an h-px divider.
*/
export function IntegrationRow({ integration, IconComponent }: IntegrationRowProps) {
const { slug, name, description, bgColor } = integration
return (
<>
<Link
href={`/integrations/${slug}`}
className='group/link flex items-center gap-4 px-6 py-4 transition-colors hover:bg-[var(--landing-bg-elevated)]'
aria-label={`${name} integration`}
>
<IntegrationIcon
bgColor={bgColor}
name={name}
Icon={IconComponent}
className='h-8 w-8 shrink-0 rounded-[5px]'
iconClassName='h-4 w-4'
fallbackClassName='text-[13px]'
aria-hidden='true'
/>
{/* Name + description */}
<div className='flex min-w-0 flex-1 flex-col gap-0.5'>
<h3 className='text-[14px] text-white leading-snug tracking-[-0.02em]'>{name}</h3>
<p className='line-clamp-1 hidden text-[12px] text-[var(--landing-text-muted)] leading-[150%] sm:block'>
{description}
</p>
</div>
{/* Animated arrow */}
<ChevronArrow />
</Link>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
</>
)
}

View File

@@ -4,12 +4,11 @@ import { useMemo, useState } from 'react'
import { Input } from '@/components/emcn'
import { blockTypeToIconMap } from '@/app/(landing)/integrations/data/icon-mapping'
import type { Integration } from '@/app/(landing)/integrations/data/types'
import { IntegrationCard } from './integration-card'
import { IntegrationRow } from './integration-card'
const CATEGORY_LABELS: Record<string, string> = {
ai: 'AI',
analytics: 'Analytics',
automation: 'Automation',
communication: 'Communication',
crm: 'CRM',
'customer-support': 'Customer Support',
@@ -21,12 +20,10 @@ const CATEGORY_LABELS: Record<string, string> = {
email: 'Email',
'file-storage': 'File Storage',
hr: 'HR',
media: 'Media',
productivity: 'Productivity',
'sales-intelligence': 'Sales Intelligence',
sales: 'Sales',
search: 'Search',
security: 'Security',
social: 'Social',
other: 'Other',
} as const
@@ -41,8 +38,10 @@ export function IntegrationGrid({ integrations }: IntegrationGridProps) {
const availableCategories = useMemo(() => {
const counts = new Map<string, number>()
for (const i of integrations) {
if (i.integrationType) {
counts.set(i.integrationType, (counts.get(i.integrationType) || 0) + 1)
if (i.integrationTypes) {
for (const t of i.integrationTypes) {
counts.set(t, (counts.get(t) || 0) + 1)
}
}
}
return Array.from(counts.entries())
@@ -54,7 +53,7 @@ export function IntegrationGrid({ integrations }: IntegrationGridProps) {
let results = integrations
if (activeCategory) {
results = results.filter((i) => i.integrationType === activeCategory)
results = results.filter((i) => i.integrationTypes?.includes(activeCategory))
}
const q = query.trim().toLowerCase()
@@ -75,7 +74,7 @@ export function IntegrationGrid({ integrations }: IntegrationGridProps) {
return (
<div>
<div className='mb-6 flex flex-col gap-4 sm:flex-row sm:items-center'>
<div className='mb-6 flex flex-col gap-4 px-6 sm:flex-row sm:items-center'>
<div className='relative max-w-[480px] flex-1'>
<svg
aria-hidden='true'
@@ -99,14 +98,14 @@ export function IntegrationGrid({ integrations }: IntegrationGridProps) {
</div>
</div>
<div className='mb-8 flex flex-wrap gap-2'>
<div className='mb-6 flex flex-wrap gap-2 px-6'>
<button
type='button'
onClick={() => setActiveCategory(null)}
className={`rounded-md border px-3 py-1 text-[12px] transition-colors ${
className={`rounded-[5px] border px-[9px] py-0.5 text-[13.5px] transition-colors ${
activeCategory === null
? 'border-[#555] bg-[#333] text-[var(--landing-text)]'
: 'border-[var(--landing-border)] bg-transparent text-[var(--landing-text-muted)] hover:border-[var(--landing-border-strong)] hover:text-[var(--landing-text)]'
? 'border-[var(--landing-border-strong)] bg-[var(--landing-bg-elevated)] text-[var(--landing-text)]'
: 'border-[var(--landing-border-strong)] text-[var(--landing-text)] hover:bg-[var(--landing-bg-elevated)]'
}`}
>
All
@@ -116,10 +115,10 @@ export function IntegrationGrid({ integrations }: IntegrationGridProps) {
key={cat}
type='button'
onClick={() => setActiveCategory(activeCategory === cat ? null : cat)}
className={`rounded-md border px-3 py-1 text-[12px] transition-colors ${
className={`rounded-[5px] border px-[9px] py-0.5 text-[13.5px] transition-colors ${
activeCategory === cat
? 'border-[#555] bg-[#333] text-[var(--landing-text)]'
: 'border-[var(--landing-border)] bg-transparent text-[var(--landing-text-muted)] hover:border-[var(--landing-border-strong)] hover:text-[var(--landing-text)]'
? 'border-[var(--landing-border-strong)] bg-[var(--landing-bg-elevated)] text-[var(--landing-text)]'
: 'border-[var(--landing-border-strong)] text-[var(--landing-text)] hover:bg-[var(--landing-bg-elevated)]'
}`}
>
{CATEGORY_LABELS[cat] || cat}
@@ -127,16 +126,18 @@ export function IntegrationGrid({ integrations }: IntegrationGridProps) {
))}
</div>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
{filtered.length === 0 ? (
<p className='py-12 text-center text-[#555] text-[15px]'>
<p className='py-12 text-center text-[15px] text-[var(--landing-text-subtle)]'>
No integrations found
{query ? <> for &ldquo;{query}&rdquo;</> : null}
{activeCategory ? <> in {CATEGORY_LABELS[activeCategory] || activeCategory}</> : null}
</p>
) : (
<div className='grid grid-cols-1 gap-4 sm:grid-cols-2 md:grid-cols-3 lg:grid-cols-4'>
<div>
{filtered.map((integration) => (
<IntegrationCard
<IntegrationRow
key={integration.type}
integration={integration}
IconComponent={blockTypeToIconMap[integration.type]}

View File

@@ -41,9 +41,7 @@ export function IntegrationIcon({
{Icon ? (
<Icon className={cn(iconClassName, 'text-white')} />
) : (
<span className={cn('font-[500] text-white leading-none', fallbackClassName)}>
{name.charAt(0)}
</span>
<span className={cn('text-white leading-none', fallbackClassName)}>{name.charAt(0)}</span>
)}
</Tag>
)

File diff suppressed because it is too large Load Diff

View File

@@ -34,6 +34,6 @@ export interface Integration {
triggerCount: number
authType: AuthType
category: string
integrationType?: string
integrationTypes?: string[]
tags?: string[]
}

View File

@@ -1,5 +1,7 @@
import type { Metadata } from 'next'
import { Badge } from '@/components/emcn'
import { getBaseUrl } from '@/lib/core/utils/urls'
import { IntegrationCard } from './components/integration-card'
import { IntegrationGrid } from './components/integration-grid'
import { RequestIntegrationModal } from './components/request-integration-modal'
import { blockTypeToIconMap } from './data/icon-mapping'
@@ -18,6 +20,14 @@ const TOP_NAMES = [...new Set(POPULAR_WORKFLOWS.flatMap((p) => [p.from, p.to]))]
const baseUrl = getBaseUrl()
/** Curated featured integrations — high-recognition services shown as cards. */
const FEATURED_SLUGS = ['slack', 'notion', 'github', 'gmail'] as const
const bySlug = new Map(allIntegrations.map((i) => [i.slug, i]))
const featured = FEATURED_SLUGS.map((s) => bySlug.get(s)).filter(
(i): i is Integration => i !== undefined
)
export const metadata: Metadata = {
title: 'Integrations',
description: `Connect ${INTEGRATION_COUNT}+ apps and services with Sim's AI workflow automation. Build intelligent pipelines with ${TOP_NAMES.join(', ')}, and more.`,
@@ -90,7 +100,7 @@ export default function IntegrationsPage() {
}
return (
<>
<section className='bg-[var(--landing-bg)]'>
<script
type='application/ld+json'
dangerouslySetInnerHTML={{ __html: JSON.stringify(breadcrumbJsonLd) }}
@@ -100,64 +110,81 @@ export default function IntegrationsPage() {
dangerouslySetInnerHTML={{ __html: JSON.stringify(itemListJsonLd) }}
/>
<div className='mx-auto max-w-[1200px] px-6 py-16 sm:px-8 md:px-12'>
{/* Hero */}
<section aria-labelledby='integrations-heading' className='mb-16'>
{/* Hero */}
<div className='px-5 pt-[60px] lg:px-16 lg:pt-[100px]'>
<Badge
variant='blue'
size='md'
dot
className='mb-5 bg-white/10 font-season text-white uppercase tracking-[0.02em]'
>
Integrations
</Badge>
<div className='flex flex-col gap-4 xl:flex-row xl:items-end xl:justify-between'>
<h1
id='integrations-heading'
className='mb-4 text-balance font-[500] text-[40px] text-[var(--landing-text)] leading-tight sm:text-[56px]'
className='text-balance text-[28px] text-white leading-[100%] tracking-[-0.02em] lg:text-[40px]'
>
Integrations
</h1>
<p className='max-w-[640px] text-[18px] text-[var(--landing-text-muted)] leading-relaxed'>
<p className='font-[430] font-season text-[var(--landing-text-muted)] text-sm leading-[150%] tracking-[0.02em] lg:text-base'>
Connect every tool your team uses. Build AI-powered workflows that automate tasks across{' '}
{TOP_NAMES.slice(0, 4).map((name, i, arr) => {
const integration = allIntegrations.find((int) => int.name === name)
const Icon = integration ? blockTypeToIconMap[integration.type] : undefined
return (
<span key={name} className='inline-flex items-center gap-[5px]'>
{Icon && (
<span
aria-hidden='true'
className='inline-flex shrink-0'
style={{ opacity: 0.65 }}
>
<Icon className='h-[0.85em] w-[0.85em]' />
</span>
)}
{name}
{i < arr.length - 1 ? ', ' : ''}
</span>
)
})}
{' and more.'}
{INTEGRATION_COUNT} apps and services.
</p>
</section>
</div>
</div>
{/* Searchable grid — client component */}
{/* Full-width divider */}
<div className='mt-8 h-px w-full bg-[var(--landing-bg-elevated)]' />
{/* Border-railed content */}
<div className='mx-5 border-[var(--landing-bg-elevated)] border-x lg:mx-16'>
{/* Featured integrations — top */}
{featured.length > 0 && (
<>
<nav aria-label='Featured integrations' className='flex flex-col sm:flex-row'>
{featured.map((integration) => (
<IntegrationCard
key={integration.type}
integration={integration}
IconComponent={blockTypeToIconMap[integration.type]}
/>
))}
</nav>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
</>
)}
{/* All Integrations — search, filters, rows */}
<section aria-labelledby='all-integrations-heading'>
<h2
id='all-integrations-heading'
className='mb-8 font-[500] text-[24px] text-[var(--landing-text)]'
>
All Integrations
</h2>
<div className='px-6 pt-10 pb-4'>
<h2
id='all-integrations-heading'
className='mb-2 text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'
>
All Integrations
</h2>
</div>
<IntegrationGrid integrations={allIntegrations} />
</section>
{/* Integration request */}
<div className='mt-16 flex flex-col items-start gap-3 border-[var(--landing-border)] border-t pt-10 sm:flex-row sm:items-center sm:justify-between'>
<div className='flex flex-col items-start gap-3 px-6 py-6 sm:flex-row sm:items-center sm:justify-between'>
<div>
<p className='font-[500] text-[15px] text-[var(--landing-text)]'>
<p className='text-[15px] text-white tracking-[-0.02em]'>
Don&apos;t see the integration you need?
</p>
<p className='mt-0.5 text-[#555] text-[13px]'>
<p className='mt-0.5 font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
Let us know and we&apos;ll prioritize it.
</p>
</div>
<RequestIntegrationModal />
</div>
</div>
</>
{/* Closing full-width divider */}
<div className='-mt-px h-px w-full bg-[var(--landing-bg-elevated)]' />
</section>
)
}

View File

@@ -3,14 +3,7 @@ import Link from 'next/link'
import { notFound } from 'next/navigation'
import { getBaseUrl } from '@/lib/core/utils/urls'
import { LandingFAQ } from '@/app/(landing)/components/landing-faq'
import {
Breadcrumbs,
CapabilityTags,
DetailItem,
ModelCard,
ProviderIcon,
StatCard,
} from '@/app/(landing)/models/components/model-primitives'
import { FeaturedModelCard, ProviderIcon } from '@/app/(landing)/models/components/model-primitives'
import {
ALL_CATALOG_MODELS,
buildModelCapabilityFacts,
@@ -165,66 +158,88 @@ export default async function ModelPage({
dangerouslySetInnerHTML={{ __html: JSON.stringify(faqJsonLd) }}
/>
<div className='mx-auto max-w-[1280px] px-6 py-12 sm:px-8 md:px-12'>
<Breadcrumbs
items={[
{ label: 'Home', href: '/' },
{ label: 'Models', href: '/models' },
{ label: provider.name, href: provider.href },
{ label: model.displayName },
]}
/>
<section className='bg-[var(--landing-bg)]'>
<div className='px-5 pt-[60px] lg:px-16 lg:pt-[100px]'>
<div className='mb-6'>
<Link
href={provider.href}
className='group/link inline-flex items-center gap-1.5 font-season text-[var(--landing-text-muted)] text-sm tracking-[0.02em] hover:text-[var(--landing-text)]'
>
<svg
className='h-3 w-3 shrink-0'
viewBox='0 0 10 10'
fill='none'
xmlns='http://www.w3.org/2000/svg'
>
<line
x1='1'
y1='5'
x2='10'
y2='5'
stroke='currentColor'
strokeWidth='1.33'
strokeLinecap='square'
className='origin-right scale-x-0 transition-transform duration-200 ease-out [transform-box:fill-box] group-hover/link:scale-x-100'
/>
<path
d='M6.5 2L3.5 5L6.5 8'
stroke='currentColor'
strokeWidth='1.33'
strokeLinecap='square'
strokeLinejoin='miter'
fill='none'
className='group-hover/link:-translate-x-[30%] transition-transform duration-200 ease-out'
/>
</svg>
Back to {provider.name}
</Link>
</div>
<section aria-labelledby='model-heading' className='mb-14'>
<div className='mb-6 flex items-start gap-4'>
<div className='mb-6 flex items-center gap-5'>
<ProviderIcon
provider={provider}
className='h-16 w-16 rounded-3xl'
className='h-16 w-16 rounded-[5px]'
iconClassName='h-8 w-8'
/>
<div className='min-w-0'>
<p className='text-[12px] text-[var(--landing-text-muted)] uppercase tracking-[0.12em]'>
<div>
<p className='mb-0.5 font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
{provider.name} model
</p>
<h1
id='model-heading'
className='font-[500] text-[38px] text-[var(--landing-text)] leading-tight sm:text-[48px]'
className='text-[28px] text-white leading-[100%] tracking-[-0.02em] sm:text-[36px] lg:text-[44px]'
>
{model.displayName}
</h1>
<p className='mt-2 break-all text-[13px] text-[var(--landing-text-muted)]'>
Model ID: {model.id}
</p>
</div>
</div>
<p className='max-w-[820px] text-[17px] text-[var(--landing-text-muted)] leading-relaxed'>
<p className='mb-8 max-w-[700px] text-[var(--landing-text-body)] text-base leading-[150%] tracking-[0.02em]'>
{model.summary}
{model.bestFor ? ` ${model.bestFor}` : ''}
</p>
<div className='mt-8 flex flex-wrap gap-3'>
<Link
href={provider.href}
className='inline-flex h-[34px] items-center rounded-[6px] border border-[var(--landing-border-strong)] px-3 font-[430] text-[14px] text-[var(--landing-text)] transition-colors hover:bg-[var(--landing-bg-elevated)]'
>
Explore {provider.name} models
</Link>
<div className='flex flex-wrap gap-2'>
<a
href='https://sim.ai'
className='inline-flex h-[34px] items-center rounded-[6px] border border-[var(--white)] bg-[var(--white)] px-3 font-[430] text-[14px] text-[var(--landing-text-dark)] transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
className='inline-flex h-[32px] items-center gap-2 rounded-[5px] border border-white bg-white px-2.5 font-season text-black text-sm transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
>
Build with this model
</a>
<Link
href={provider.href}
className='inline-flex h-[32px] items-center rounded-[5px] border border-[var(--landing-border-strong)] px-2.5 font-season text-[var(--landing-text)] text-sm transition-colors hover:bg-[var(--landing-bg-elevated)]'
>
All {provider.name} models
</Link>
</div>
</section>
</div>
<section
aria-label='Model stats'
className='mb-16 grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-4'
>
<StatCard label='Input price' value={`${formatPrice(model.pricing.input)}/1M`} />
<StatCard
<div className='mt-8 h-px w-full bg-[var(--landing-bg-elevated)]' />
<div className='mx-5 border-[var(--landing-bg-elevated)] border-x lg:mx-16'>
<InfoRow label='Input price' value={`${formatPrice(model.pricing.input)}/1M`} />
<InfoRow
label='Cached input'
value={
model.pricing.cachedInput !== undefined
@@ -232,158 +247,72 @@ export default async function ModelPage({
: 'N/A'
}
/>
<StatCard label='Output price' value={`${formatPrice(model.pricing.output)}/1M`} />
<StatCard
<InfoRow label='Output price' value={`${formatPrice(model.pricing.output)}/1M`} />
<InfoRow
label='Context window'
value={model.contextWindow ? formatTokenCount(model.contextWindow) : 'Unknown'}
/>
</section>
<InfoRow
label='Max output'
value={
model.capabilities.maxOutputTokens
? `${formatTokenCount(getEffectiveMaxOutputTokens(model.capabilities))} tokens`
: 'Not published'
}
/>
<InfoRow label='Provider' value={provider.name} />
<InfoRow label='Updated' value={formatUpdatedAt(model.pricing.updatedAt)} />
{model.bestFor ? <InfoRow label='Best for' value={model.bestFor} /> : null}
<div className='grid grid-cols-1 gap-16 lg:grid-cols-[1fr_320px]'>
<div className='min-w-0 space-y-16'>
<section aria-labelledby='pricing-heading'>
<h2
id='pricing-heading'
className='mb-2 font-[500] text-[28px] text-[var(--landing-text)]'
>
Pricing and limits
</h2>
<p className='mb-6 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
Pricing below is generated directly from the provider registry in Sim. All amounts
are listed per one million tokens.
</p>
{capabilityFacts.length > 0 && (
<>
{capabilityFacts.map((item) => (
<InfoRow key={item.label} label={item.label} value={item.value} />
))}
</>
)}
<div className='grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-4'>
<DetailItem label='Input price' value={`${formatPrice(model.pricing.input)}/1M`} />
<DetailItem
label='Cached input'
value={
model.pricing.cachedInput !== undefined
? `${formatPrice(model.pricing.cachedInput)}/1M`
: 'N/A'
}
/>
<DetailItem
label='Output price'
value={`${formatPrice(model.pricing.output)}/1M`}
/>
<DetailItem label='Updated' value={formatUpdatedAt(model.pricing.updatedAt)} />
<DetailItem
label='Context window'
value={
model.contextWindow
? `${formatTokenCount(model.contextWindow)} tokens`
: 'Unknown'
}
/>
<DetailItem
label='Max output'
value={
model.capabilities.maxOutputTokens
? `${formatTokenCount(getEffectiveMaxOutputTokens(model.capabilities))} tokens`
: 'Not published'
}
/>
<DetailItem label='Provider' value={provider.name} />
{model.bestFor ? <DetailItem label='Best for' value={model.bestFor} /> : null}
</div>
</section>
<section aria-labelledby='capabilities-heading'>
<h2
id='capabilities-heading'
className='mb-2 font-[500] text-[28px] text-[var(--landing-text)]'
>
Capabilities
</h2>
<p className='mb-6 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
These capability flags are generated from the provider and model definitions tracked
in Sim.
</p>
<CapabilityTags tags={model.capabilityTags} />
<div className='mt-8 grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-3'>
{capabilityFacts.map((item) => (
<DetailItem key={item.label} label={item.label} value={item.value} />
{relatedModels.length > 0 && (
<>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
<nav aria-label='Related models' className='flex flex-col sm:flex-row'>
{relatedModels.slice(0, 3).map((entry) => (
<FeaturedModelCard key={entry.id} provider={provider} model={entry} />
))}
</div>
</section>
</nav>
</>
)}
{relatedModels.length > 0 && (
<section aria-labelledby='related-models-heading'>
<h2
id='related-models-heading'
className='mb-2 font-[500] text-[28px] text-[var(--landing-text)]'
>
Related {provider.name} models
</h2>
<p className='mb-8 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
Browse comparable models from the same provider to compare pricing, context
window, and capability coverage.
</p>
<div className='grid grid-cols-1 gap-4 xl:grid-cols-2'>
{relatedModels.map((entry) => (
<ModelCard key={entry.id} provider={provider} model={entry} />
))}
</div>
</section>
)}
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
<section
aria-labelledby='model-faq-heading'
className='rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-6 sm:p-8'
<section aria-labelledby='model-faq-heading' className='px-6 py-10'>
<h2
id='model-faq-heading'
className='mb-8 text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'
>
<h2
id='model-faq-heading'
className='font-[500] text-[28px] text-[var(--landing-text)]'
>
Frequently asked questions
</h2>
<div className='mt-3'>
<LandingFAQ faqs={faqs} />
</div>
</section>
</div>
<aside className='space-y-5' aria-label='Model details'>
<div className='rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-5'>
<h2 className='mb-4 font-[500] text-[16px] text-[var(--landing-text)]'>
Quick details
</h2>
<div className='space-y-3'>
<DetailItem label='Display name' value={model.displayName} />
<DetailItem label='Provider' value={provider.name} />
<DetailItem
label='Context tracked'
value={model.contextWindow ? 'Yes' : 'Partial'}
/>
<DetailItem
label='Pricing updated'
value={formatUpdatedAt(model.pricing.updatedAt)}
/>
</div>
Frequently asked questions
</h2>
<div>
<LandingFAQ faqs={faqs} />
</div>
<div className='rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-5'>
<h2 className='mb-4 font-[500] text-[16px] text-[var(--landing-text)]'>
Browse more
</h2>
<div className='space-y-2'>
<Link
href={provider.href}
className='block rounded-xl px-3 py-2 text-[14px] text-[var(--landing-text-muted)] transition-colors hover:bg-[var(--landing-bg-elevated)] hover:text-[var(--landing-text)]'
>
All {provider.name} models
</Link>
<Link
href='/models'
className='block rounded-xl px-3 py-2 text-[14px] text-[var(--landing-text-muted)] transition-colors hover:bg-[var(--landing-bg-elevated)] hover:text-[var(--landing-text)]'
>
Full models directory
</Link>
</div>
</div>
</aside>
</section>
</div>
<div className='-mt-px h-px w-full bg-[var(--landing-bg-elevated)]' />
</section>
</>
)
}
function InfoRow({ label, value }: { label: string; value: string }) {
return (
<>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
<div className='flex items-baseline justify-between gap-4 px-6 py-4'>
<span className='font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
{label}
</span>
<span className='text-right text-[14px] text-white leading-snug'>{value}</span>
</div>
</>
)

View File

@@ -1,19 +1,21 @@
import type { Metadata } from 'next'
import Link from 'next/link'
import { notFound } from 'next/navigation'
import { Badge } from '@/components/emcn'
import { getBaseUrl } from '@/lib/core/utils/urls'
import { LandingFAQ } from '@/app/(landing)/components/landing-faq'
import {
Breadcrumbs,
CapabilityTags,
ModelCard,
ProviderCard,
ChevronArrow,
FeaturedModelCard,
FeaturedProviderCard,
ProviderIcon,
StatCard,
} from '@/app/(landing)/models/components/model-primitives'
import { ModelTimelineChart } from '@/app/(landing)/models/components/model-timeline-chart'
import {
buildProviderFaqs,
formatPrice,
formatTokenCount,
getProviderBySlug,
getProviderCapabilitySummary,
MODEL_PROVIDERS_WITH_CATALOGS,
TOP_MODEL_PROVIDERS,
} from '@/app/(landing)/models/utils'
@@ -95,7 +97,6 @@ export default async function ProviderModelsPage({
}
const faqs = buildProviderFaqs(provider)
const capabilitySummary = getProviderCapabilitySummary(provider)
const relatedProviders = MODEL_PROVIDERS_WITH_CATALOGS.filter(
(entry) => entry.id !== provider.id && TOP_MODEL_PROVIDERS.includes(entry.name)
).slice(0, 4)
@@ -153,142 +154,149 @@ export default async function ProviderModelsPage({
dangerouslySetInnerHTML={{ __html: JSON.stringify(faqJsonLd) }}
/>
<div className='mx-auto max-w-[1280px] px-6 py-12 sm:px-8 md:px-12'>
<Breadcrumbs
items={[
{ label: 'Home', href: '/' },
{ label: 'Models', href: '/models' },
{ label: provider.name },
]}
/>
<section className='bg-[var(--landing-bg)]'>
<div className='px-5 pt-[60px] lg:px-16 lg:pt-[100px]'>
<div className='mb-6'>
<Link
href='/models'
className='group/link inline-flex items-center gap-1.5 font-season text-[var(--landing-text-muted)] text-sm tracking-[0.02em] hover:text-[var(--landing-text)]'
>
<svg
className='h-3 w-3 shrink-0'
viewBox='0 0 10 10'
fill='none'
xmlns='http://www.w3.org/2000/svg'
>
<line
x1='1'
y1='5'
x2='10'
y2='5'
stroke='currentColor'
strokeWidth='1.33'
strokeLinecap='square'
className='origin-right scale-x-0 transition-transform duration-200 ease-out [transform-box:fill-box] group-hover/link:scale-x-100'
/>
<path
d='M6.5 2L3.5 5L6.5 8'
stroke='currentColor'
strokeWidth='1.33'
strokeLinecap='square'
strokeLinejoin='miter'
fill='none'
className='group-hover/link:-translate-x-[30%] transition-transform duration-200 ease-out'
/>
</svg>
Back to Models
</Link>
</div>
<section aria-labelledby='provider-heading' className='mb-14'>
<div className='mb-6 flex items-center gap-4'>
<ProviderIcon
provider={provider}
className='h-16 w-16 rounded-3xl'
iconClassName='h-8 w-8'
/>
<div>
<p className='text-[12px] text-[var(--landing-text-muted)] uppercase tracking-[0.12em]'>
Provider
</p>
<Badge
variant='blue'
size='md'
dot
className='mb-5 bg-white/10 font-season text-white uppercase tracking-[0.02em]'
>
Provider
</Badge>
<div className='flex flex-col gap-4 lg:flex-row lg:items-end lg:justify-between'>
<div className='flex items-center gap-4'>
<ProviderIcon
provider={provider}
className='h-12 w-12 rounded-[5px]'
iconClassName='h-6 w-6'
/>
<h1
id='provider-heading'
className='font-[500] text-[38px] text-[var(--landing-text)] leading-tight sm:text-[48px]'
className='font-[430] font-season text-[28px] text-white leading-[100%] tracking-[-0.02em] lg:text-[40px]'
>
{provider.name} models
</h1>
</div>
<span className='shrink-0 font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
{provider.modelCount} models
</span>
</div>
</div>
<p className='max-w-[820px] text-[17px] text-[var(--landing-text-muted)] leading-relaxed'>
{provider.summary} Browse every {provider.name} model page generated from Sim&apos;s
provider registry with human-readable names, pricing, context windows, and capability
metadata.
</p>
<div className='mt-8 h-px w-full bg-[var(--landing-bg-elevated)]' />
<div className='mt-8 grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-4'>
<StatCard label='Models tracked' value={provider.modelCount.toString()} />
<StatCard
label='Default model'
value={provider.defaultModelDisplayName || 'Dynamic'}
compact
/>
<StatCard
label='Metadata coverage'
value={provider.contextInformationAvailable ? 'Tracked' : 'Partial'}
compact
/>
<StatCard
label='Featured models'
value={provider.featuredModels.length.toString()}
compact
/>
</div>
<div className='mx-5 border-[var(--landing-bg-elevated)] border-x lg:mx-16'>
{provider.featuredModels.length > 0 && (
<>
<nav aria-label='Featured models' className='flex flex-col sm:flex-row'>
{provider.featuredModels.slice(0, 3).map((model) => (
<FeaturedModelCard key={model.id} provider={provider} model={model} />
))}
</nav>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
</>
)}
<div className='mt-6'>
<CapabilityTags tags={provider.providerCapabilityTags} />
</div>
</section>
<ModelTimelineChart models={provider.models} providerId={provider.id} />
<section aria-labelledby='provider-models-heading' className='mb-16'>
<h2
id='provider-models-heading'
className='mb-2 font-[500] text-[28px] text-[var(--landing-text)]'
>
All {provider.name} models
</h2>
<p className='mb-8 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
Every model below links to a dedicated SEO page with exact pricing, context window,
capability support, and related model recommendations.
</p>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
<div className='grid grid-cols-1 gap-4 xl:grid-cols-2'>
{provider.models.map((model) => (
<ModelCard key={model.id} provider={provider} model={model} />
))}
</div>
</section>
{provider.models.map((model) => (
<div key={model.id}>
<Link
href={model.href}
className='group/link flex items-center gap-4 px-6 py-4 transition-colors hover:bg-[var(--landing-bg-elevated)]'
>
<div className='flex min-w-0 flex-1 flex-col gap-0.5'>
<h3 className='text-[14px] text-white leading-snug tracking-[-0.02em]'>
{model.displayName}
</h3>
<p className='line-clamp-1 hidden text-[12px] text-[var(--landing-text-muted)] leading-[150%] sm:block'>
{model.id}
</p>
</div>
<span className='hidden shrink-0 font-martian-mono text-[11px] text-[var(--landing-text-muted)] uppercase tracking-[0.1em] md:block'>
{formatPrice(model.pricing.input)}/1M in
</span>
<span className='hidden shrink-0 font-martian-mono text-[11px] text-[var(--landing-text-muted)] uppercase tracking-[0.1em] md:block'>
{formatPrice(model.pricing.output)}/1M out
</span>
{model.contextWindow ? (
<span className='hidden shrink-0 font-martian-mono text-[11px] text-[var(--landing-text-muted)] uppercase tracking-[0.1em] lg:block'>
{formatTokenCount(model.contextWindow)} ctx
</span>
) : null}
<ChevronArrow />
</Link>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
</div>
))}
<section
aria-labelledby='lineup-snapshot-heading'
className='mb-16 rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-6 sm:p-8'
>
<h2
id='lineup-snapshot-heading'
className='mb-2 font-[500] text-[28px] text-[var(--landing-text)]'
>
Lineup snapshot
</h2>
<p className='mb-8 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
A quick view of the strongest differentiators in the {provider.name} model lineup based
on the metadata currently tracked in Sim.
</p>
{relatedProviders.length > 0 && (
<>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
<nav aria-label='Related providers' className='flex flex-col sm:flex-row'>
{relatedProviders.map((entry) => (
<FeaturedProviderCard key={entry.id} provider={entry} />
))}
</nav>
</>
)}
<div className='grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-3'>
{capabilitySummary.map((item) => (
<StatCard key={item.label} label={item.label} value={item.value} compact />
))}
</div>
</section>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
{relatedProviders.length > 0 && (
<section aria-labelledby='related-providers-heading' className='mb-16'>
<section aria-labelledby='provider-faq-heading' className='px-6 py-10'>
<h2
id='related-providers-heading'
className='mb-2 font-[500] text-[28px] text-[var(--landing-text)]'
id='provider-faq-heading'
className='mb-8 text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'
>
Compare with other providers
Frequently asked questions
</h2>
<p className='mb-8 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
Explore similar provider hubs to compare model lineups, pricing surfaces, and
long-context coverage across the broader AI ecosystem.
</p>
<div className='grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-4'>
{relatedProviders.map((entry) => (
<ProviderCard key={entry.id} provider={entry} />
))}
<div>
<LandingFAQ faqs={faqs} />
</div>
</section>
)}
</div>
<section
aria-labelledby='provider-faq-heading'
className='rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-6 sm:p-8'
>
<h2
id='provider-faq-heading'
className='font-[500] text-[28px] text-[var(--landing-text)]'
>
Frequently asked questions
</h2>
<div className='mt-3'>
<LandingFAQ faqs={faqs} />
</div>
</section>
</div>
<div className='-mt-px h-px w-full bg-[var(--landing-bg-elevated)]' />
</section>
</>
)
}

View File

@@ -0,0 +1,9 @@
import { MODEL_CATALOG_PROVIDERS } from '@/app/(landing)/models/utils'
const colorMap = new Map(
MODEL_CATALOG_PROVIDERS.filter((p) => p.color).map((p) => [p.id, p.color as string])
)
export function getProviderColor(providerId: string): string {
return colorMap.get(providerId) ?? '#888888'
}

View File

@@ -0,0 +1,245 @@
'use client'
import type { ComponentType } from 'react'
import { useMemo } from 'react'
import Link from 'next/link'
import { getProviderColor } from '@/app/(landing)/models/components/consts'
import type { CatalogModel } from '@/app/(landing)/models/utils'
import {
formatPrice,
formatTokenCount,
MODEL_CATALOG_PROVIDERS,
} from '@/app/(landing)/models/utils'
/** Providers that host other providers' models — deprioritized to avoid duplicates. */
const RESELLER_PROVIDERS = new Set(
MODEL_CATALOG_PROVIDERS.filter((p) => p.isReseller).map((p) => p.id)
)
const PROVIDER_ICON_MAP: Record<string, ComponentType<{ className?: string }>> = (() => {
const map: Record<string, ComponentType<{ className?: string }>> = {}
for (const provider of MODEL_CATALOG_PROVIDERS) {
if (provider.icon) {
map[provider.id] = provider.icon
}
}
return map
})()
function selectComparisonModels(models: CatalogModel[]): CatalogModel[] {
const seen = new Set<string>()
const result: CatalogModel[] = []
const sorted = [...models].sort((a, b) => {
const score = (m: CatalogModel) => {
const reseller = RESELLER_PROVIDERS.has(m.providerId) ? -50 : 0
const reasoning = m.capabilities.reasoningEffort || m.capabilities.thinking ? 10 : 0
const context = (m.contextWindow ?? 0) / 100000
return reseller + reasoning + context
}
return score(b) - score(a)
})
for (const model of sorted) {
if (result.length >= 10) break
const nameKey = model.displayName.toLowerCase()
if (seen.has(nameKey)) continue
seen.add(nameKey)
result.push(model)
}
return result
}
interface ModelLabelProps {
model: CatalogModel
}
function ModelLabel({ model }: ModelLabelProps) {
const Icon = PROVIDER_ICON_MAP[model.providerId]
return (
<div className='flex w-[140px] shrink-0 items-center justify-end gap-1.5 sm:w-[180px]'>
{Icon && <Icon className='h-3.5 w-3.5 shrink-0' />}
<span className='truncate font-medium text-[13px] text-[var(--landing-text)] leading-none tracking-[-0.01em]'>
{model.displayName}
</span>
</div>
)
}
interface ChartProps {
models: CatalogModel[]
}
function StackedCostChart({ models }: ChartProps) {
const data = useMemo(() => {
const entries = models
.map((model) => ({
model,
input: model.pricing.input,
output: model.pricing.output,
total: model.pricing.input + model.pricing.output,
}))
.filter((e) => e.total > 0)
.sort((a, b) => a.total - b.total)
const maxTotal = entries.length > 0 ? Math.max(...entries.map((e) => e.total)) : 0
return { entries, maxTotal }
}, [models])
if (data.entries.length === 0) return null
return (
<div className='flex flex-col gap-3'>
<div className='flex flex-col gap-1'>
<h3 className='text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'>
Cost
</h3>
<span className='font-[430] font-season text-[var(--landing-text-muted)] text-sm leading-[150%] tracking-[0.02em]'>
Per 1M tokens
</span>
</div>
<div className='flex flex-col gap-1.5'>
{data.entries.map(({ model, input, output, total }) => {
const totalPct = data.maxTotal > 0 ? (total / data.maxTotal) * 100 : 0
const inputPct = total > 0 ? (input / total) * 100 : 0
const color = getProviderColor(model.providerId)
return (
<Link
key={model.id}
href={model.href}
className='-mx-2 flex items-center gap-3 rounded-md px-2 transition-colors hover:bg-[var(--landing-bg-elevated)]'
>
<ModelLabel model={model} />
<div className='relative flex h-7 min-w-0 flex-1 items-center'>
<div
className='flex h-full overflow-hidden rounded-r-[3px]'
style={{ width: `${Math.max(totalPct, 3)}%` }}
>
<div
className='h-full'
style={{
width: `${inputPct}%`,
backgroundColor: color,
opacity: 0.8,
}}
/>
<div
className='h-full'
style={{
width: `${100 - inputPct}%`,
backgroundColor: color,
opacity: 0.35,
}}
/>
</div>
<span className='ml-2.5 shrink-0 font-mono text-[var(--landing-text-muted)] text-xs'>
{formatPrice(input)} input / {formatPrice(output)} output
</span>
</div>
</Link>
)
})}
</div>
</div>
)
}
function ContextWindowChart({ models }: ChartProps) {
const data = useMemo(() => {
const entries = models
.map((model) => ({
model,
value: model.contextWindow,
}))
.filter((e): e is { model: CatalogModel; value: number } => e.value !== null && e.value > 0)
.sort((a, b) => a.value - b.value)
const maxValue = entries.length > 0 ? Math.max(...entries.map((e) => e.value)) : 0
return { entries, maxValue }
}, [models])
if (data.entries.length === 0) return null
return (
<div className='flex flex-col gap-3'>
<div className='flex flex-col gap-1'>
<h3 className='text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'>
Context window
</h3>
<span className='font-[430] font-season text-[var(--landing-text-muted)] text-sm leading-[150%] tracking-[0.02em]'>
Max tokens
</span>
</div>
<div className='flex flex-col gap-1.5'>
{data.entries.map(({ model, value }) => {
const pct = data.maxValue > 0 ? (value / data.maxValue) * 100 : 0
const color = getProviderColor(model.providerId)
return (
<Link
key={model.id}
href={model.href}
className='-mx-2 flex items-center gap-3 rounded-md px-2 transition-colors hover:bg-[var(--landing-bg-elevated)]'
>
<ModelLabel model={model} />
<div className='relative flex h-7 min-w-0 flex-1 items-center'>
<div
className='h-full rounded-r-[3px]'
style={{
width: `${Math.max(pct, 3)}%`,
backgroundColor: color,
opacity: 0.8,
}}
/>
<span className='ml-2.5 shrink-0 font-mono text-[var(--landing-text-muted)] text-xs'>
{formatTokenCount(value)}
</span>
</div>
</Link>
)
})}
</div>
</div>
)
}
interface ModelComparisonChartsProps {
models: CatalogModel[]
}
export function ModelComparisonCharts({ models }: ModelComparisonChartsProps) {
const comparisonModels = useMemo(() => selectComparisonModels(models), [models])
return (
<section aria-labelledby='comparison-heading'>
<div className='px-6 pt-10 pb-4'>
<h2
id='comparison-heading'
className='mb-2 text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'
>
Compare models
</h2>
<p className='font-[430] font-season text-[var(--landing-text-muted)] text-sm leading-[150%] tracking-[0.02em]'>
Side-by-side comparison of top models across key metrics.
</p>
</div>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
<div className='flex flex-col sm:flex-row'>
<div className='flex-1 p-6'>
<StackedCostChart models={comparisonModels} />
</div>
<div className='h-px w-full bg-[var(--landing-bg-elevated)] sm:h-auto sm:w-px' />
<div className='flex-1 p-6'>
<ContextWindowChart models={comparisonModels} />
</div>
</div>
</section>
)
}

View File

@@ -3,20 +3,14 @@
import { useMemo, useState } from 'react'
import Link from 'next/link'
import { Input } from '@/components/emcn'
import { SearchIcon } from '@/components/icons'
import { cn } from '@/lib/core/utils/cn'
import {
CapabilityTags,
DetailItem,
ModelCard,
ProviderIcon,
StatCard,
} from '@/app/(landing)/models/components/model-primitives'
import { ChevronArrow, ProviderIcon } from '@/app/(landing)/models/components/model-primitives'
import {
type CatalogModel,
type CatalogProvider,
formatPrice,
formatTokenCount,
MODEL_PROVIDERS_WITH_CATALOGS,
MODEL_PROVIDERS_WITH_DYNAMIC_CATALOGS,
TOTAL_MODELS,
} from '@/app/(landing)/models/utils'
export function ModelDirectory() {
@@ -35,7 +29,7 @@ export function ModelDirectory() {
const normalizedQuery = query.trim().toLowerCase()
const { filteredProviders, filteredDynamicProviders, visibleModelCount } = useMemo(() => {
const { filteredProviders, filteredDynamicProviders } = useMemo(() => {
const filteredProviders = MODEL_PROVIDERS_WITH_CATALOGS.map((provider) => {
const providerMatchesSearch =
normalizedQuery.length > 0 && provider.searchText.includes(normalizedQuery)
@@ -77,15 +71,9 @@ export function ModelDirectory() {
return provider.searchText.includes(normalizedQuery)
})
const visibleModelCount = filteredProviders.reduce(
(count, provider) => count + provider.models.length,
0
)
return {
filteredProviders,
filteredDynamicProviders,
visibleModelCount,
}
}, [activeProviderId, normalizedQuery])
@@ -93,170 +81,143 @@ export function ModelDirectory() {
return (
<div>
<div className='mb-8 flex flex-col gap-4 lg:flex-row lg:items-center lg:justify-between'>
<div className='relative max-w-[560px] flex-1'>
<SearchIcon
<div className='mb-6 flex flex-col gap-4 px-6 sm:flex-row sm:items-center'>
<div className='relative max-w-[480px] flex-1'>
<svg
aria-hidden='true'
className='-translate-y-1/2 pointer-events-none absolute top-1/2 left-3 h-4 w-4 text-[var(--landing-text-muted)]'
/>
className='-translate-y-1/2 pointer-events-none absolute top-1/2 left-3 h-4 w-4 text-[#555]'
fill='none'
stroke='currentColor'
strokeWidth={2}
viewBox='0 0 24 24'
>
<circle cx={11} cy={11} r={8} />
<path d='m21 21-4.35-4.35' />
</svg>
<Input
type='search'
placeholder='Search models, providers, capabilities, or pricing details'
placeholder='Search models, providers, or capabilities'
value={query}
onChange={(event) => setQuery(event.target.value)}
className='h-11 border-[var(--landing-border)] bg-[var(--landing-bg-card)] pl-10 text-[var(--landing-text)] placeholder:text-[var(--landing-text-muted)]'
className='pl-9'
aria-label='Search AI models'
/>
</div>
<p className='text-[13px] text-[var(--landing-text-muted)] leading-relaxed'>
Showing {visibleModelCount.toLocaleString('en-US')} of{' '}
{TOTAL_MODELS.toLocaleString('en-US')} models
{activeProviderId ? ' in one provider' : ''}.
</p>
</div>
<div className='mb-10 flex flex-wrap gap-2'>
<FilterButton
isActive={activeProviderId === null}
<div className='mb-6 flex flex-wrap gap-2 px-6'>
<button
type='button'
onClick={() => setActiveProviderId(null)}
label={`All providers (${MODEL_PROVIDERS_WITH_CATALOGS.length})`}
/>
className={`rounded-[5px] border px-[9px] py-0.5 text-[13.5px] transition-colors ${
activeProviderId === null
? 'border-[var(--landing-border-strong)] bg-[var(--landing-bg-elevated)] text-[var(--landing-text)]'
: 'border-[var(--landing-border-strong)] text-[var(--landing-text)] hover:bg-[var(--landing-bg-elevated)]'
}`}
>
All
</button>
{providerOptions.map((provider) => (
<FilterButton
<button
key={provider.id}
isActive={activeProviderId === provider.id}
type='button'
onClick={() =>
setActiveProviderId(activeProviderId === provider.id ? null : provider.id)
}
label={`${provider.name} (${provider.count})`}
/>
className={`rounded-[5px] border px-[9px] py-0.5 text-[13.5px] transition-colors ${
activeProviderId === provider.id
? 'border-[var(--landing-border-strong)] bg-[var(--landing-bg-elevated)] text-[var(--landing-text)]'
: 'border-[var(--landing-border-strong)] text-[var(--landing-text)] hover:bg-[var(--landing-bg-elevated)]'
}`}
>
{provider.name}
</button>
))}
</div>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
{!hasResults ? (
<div className='rounded-2xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] px-6 py-12 text-center'>
<h3 className='font-[500] text-[18px] text-[var(--landing-text)]'>No matches found</h3>
<p className='mt-2 text-[14px] text-[var(--landing-text-muted)] leading-relaxed'>
<div className='px-6 py-12 text-center'>
<h3 className='text-[18px] text-white'>No matches found</h3>
<p className='mt-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
Try a provider name like OpenAI or Anthropic, or search for capabilities like
&nbsp;structured outputs, reasoning, or deep research.
</p>
</div>
) : (
<div className='space-y-10'>
{filteredProviders.map((provider) => (
<section
key={provider.id}
aria-labelledby={`${provider.id}-heading`}
className='rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-6 sm:p-8'
>
<div className='mb-6 flex flex-col gap-5 border-[var(--landing-border)] border-b pb-6 lg:flex-row lg:items-start lg:justify-between'>
<div className='min-w-0'>
<div className='mb-3 flex items-center gap-3'>
<ProviderIcon provider={provider} />
<div>
<p className='text-[12px] text-[var(--landing-text-muted)]'>Provider</p>
<h2
id={`${provider.id}-heading`}
className='font-[500] text-[24px] text-[var(--landing-text)]'
>
{provider.name}
</h2>
</div>
</div>
<div>
{filteredProviders.map((provider, index) => (
<section key={provider.id} aria-labelledby={`${provider.id}-heading`}>
{index > 0 && <div className='h-px w-full bg-[var(--landing-bg-elevated)]' />}
<p className='max-w-[720px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
{provider.description}
</p>
<Link
href={provider.href}
className='mt-3 inline-flex text-[#555] text-[13px] transition-colors hover:text-[var(--landing-text-muted)]'
<Link
href={provider.href}
className='group/link flex items-center gap-3 px-6 py-4 transition-colors hover:bg-[var(--landing-bg-elevated)]'
>
<ProviderIcon
provider={provider}
className='h-8 w-8 rounded-[5px]'
iconClassName='h-4 w-4'
/>
<div className='min-w-0 flex-1'>
<h2
id={`${provider.id}-heading`}
className='text-[14px] text-white leading-snug tracking-[-0.02em]'
>
View provider page
</Link>
{provider.name}
</h2>
<p className='line-clamp-1 hidden text-[12px] text-[var(--landing-text-muted)] leading-[150%] sm:block'>
{provider.modelCount} models &middot; {provider.description}
</p>
</div>
<ChevronArrow />
</Link>
<div className='grid shrink-0 grid-cols-2 gap-3 sm:grid-cols-3'>
<StatCard label='Models' value={provider.models.length.toString()} />
<StatCard
label='Default'
value={provider.defaultModelDisplayName || 'Dynamic'}
compact
/>
<StatCard
label='Context info'
value={provider.contextInformationAvailable ? 'Tracked' : 'Limited'}
compact
/>
</div>
</div>
<div className='mb-6'>
<CapabilityTags tags={provider.providerCapabilityTags} />
</div>
<div className='grid grid-cols-1 gap-4 xl:grid-cols-2'>
{provider.models.map((model) => (
<ModelCard key={model.id} provider={provider} model={model} />
))}
</div>
{provider.models.map((model) => (
<ModelRow key={model.id} provider={provider} model={model} />
))}
</section>
))}
{filteredDynamicProviders.length > 0 && (
<section
aria-labelledby='dynamic-catalogs-heading'
className='rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-6 sm:p-8'
>
<div className='mb-6'>
<section aria-labelledby='dynamic-catalogs-heading'>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
<div className='px-6 pt-8 pb-6'>
<h2
id='dynamic-catalogs-heading'
className='font-[500] text-[24px] text-[var(--landing-text)]'
className='text-[18px] text-white leading-[100%] tracking-[-0.02em] lg:text-[20px]'
>
Dynamic model catalogs
</h2>
<p className='mt-2 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
These providers are supported by Sim, but their model lists are loaded dynamically
at runtime rather than hard-coded into the public catalog.
<p className='mt-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
These providers load their model lists dynamically at runtime.
</p>
</div>
<div className='grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-4'>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
<nav aria-label='Dynamic catalog providers' className='flex flex-col lg:flex-row'>
{filteredDynamicProviders.map((provider) => (
<article
<div
key={provider.id}
className='rounded-2xl border border-[var(--landing-border)] bg-[var(--landing-bg-elevated)] p-5'
className='flex flex-1 items-center gap-3 border-[var(--landing-bg-elevated)] border-t px-6 py-4 first:border-t-0 lg:border-t-0 lg:border-l lg:first:border-l-0'
>
<div className='mb-4 flex items-center gap-3'>
<ProviderIcon provider={provider} />
<div className='min-w-0'>
<h3 className='font-[500] text-[16px] text-[var(--landing-text)]'>
{provider.name}
</h3>
<p className='text-[12px] text-[var(--landing-text-muted)]'>
{provider.id}
</p>
</div>
<ProviderIcon
provider={provider}
className='h-8 w-8 rounded-[5px]'
iconClassName='h-4 w-4'
/>
<div className='min-w-0 flex-1'>
<h3 className='text-[14px] text-white leading-snug'>{provider.name}</h3>
<p className='line-clamp-1 text-[12px] text-[var(--landing-text-muted)] leading-[150%]'>
{provider.description}
</p>
</div>
<p className='text-[13px] text-[var(--landing-text-muted)] leading-relaxed'>
{provider.description}
</p>
<div className='mt-4 space-y-3 text-[13px]'>
<DetailItem
label='Default'
value={provider.defaultModelDisplayName || 'Selected at runtime'}
/>
<DetailItem label='Catalog source' value='Loaded dynamically inside Sim' />
</div>
<div className='mt-4'>
<CapabilityTags tags={provider.providerCapabilityTags} />
</div>
</article>
</div>
))}
</div>
</nav>
</section>
)}
</div>
@@ -265,27 +226,33 @@ export function ModelDirectory() {
)
}
function FilterButton({
isActive,
onClick,
label,
}: {
isActive: boolean
onClick: () => void
label: string
}) {
function ModelRow({ provider, model }: { provider: CatalogProvider; model: CatalogModel }) {
return (
<button
type='button'
onClick={onClick}
className={cn(
'rounded-full border px-3 py-1.5 text-[12px] transition-colors',
isActive
? 'border-[#555] bg-[#333] text-[var(--landing-text)]'
: 'border-[var(--landing-border)] bg-transparent text-[var(--landing-text-muted)] hover:border-[var(--landing-border-strong)] hover:text-[var(--landing-text)]'
)}
>
{label}
</button>
<>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
<Link
href={model.href}
className='group/link flex items-center gap-4 px-6 py-4 transition-colors hover:bg-[var(--landing-bg-elevated)]'
>
<ProviderIcon
provider={provider}
className='h-8 w-8 shrink-0 rounded-[5px]'
iconClassName='h-4 w-4'
/>
<div className='flex min-w-0 flex-1 flex-col gap-0.5'>
<h3 className='text-[14px] text-white leading-snug tracking-[-0.02em]'>
{model.displayName}
</h3>
<p className='line-clamp-1 hidden text-[12px] text-[var(--landing-text-muted)] leading-[150%] sm:block'>
{model.id} &middot; Input {formatPrice(model.pricing.input)}/1M &middot; Output{' '}
{formatPrice(model.pricing.output)}/1M
{model.contextWindow ? ` · ${formatTokenCount(model.contextWindow)} context` : ''}
</p>
</div>
<ChevronArrow />
</Link>
</>
)
}

View File

@@ -12,7 +12,7 @@ export function Breadcrumbs({ items }: { items: Array<{ label: string; href?: st
return (
<nav
aria-label='Breadcrumb'
className='mb-10 flex flex-wrap items-center gap-2 text-[#555] text-[13px]'
className='mb-10 flex flex-wrap items-center gap-2 font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'
>
{items.map((item, index) => (
<span key={`${item.label}-${index}`} className='inline-flex items-center gap-2'>
@@ -35,7 +35,7 @@ export function Breadcrumbs({ items }: { items: Array<{ label: string; href?: st
export function ProviderIcon({
provider,
className = 'h-12 w-12 rounded-2xl',
className = 'h-12 w-12 rounded-[5px]',
iconClassName = 'h-6 w-6',
}: {
provider: Pick<CatalogProvider, 'icon' | 'name'>
@@ -51,7 +51,7 @@ export function ProviderIcon({
{Icon ? (
<Icon className={iconClassName} />
) : (
<span className='font-[500] text-[14px] text-[var(--landing-text)]'>
<span className='font-[430] text-[14px] text-[var(--landing-text)]'>
{provider.name.slice(0, 2).toUpperCase()}
</span>
)}
@@ -69,12 +69,12 @@ export function StatCard({
compact?: boolean
}) {
return (
<div className='rounded-2xl border border-[var(--landing-border)] bg-[var(--landing-bg-elevated)] px-4 py-3'>
<p className='text-[11px] text-[var(--landing-text-muted)] uppercase tracking-[0.08em]'>
<div className='rounded-[5px] border border-[var(--landing-border)] bg-[var(--landing-bg-elevated)] px-4 py-3'>
<p className='font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
{label}
</p>
<p
className={`mt-1 font-[500] text-[var(--landing-text)] ${
className={`mt-1 font-[430] text-[var(--landing-text)] ${
compact ? 'break-all text-[12px] leading-snug' : 'text-[18px]'
}`}
>
@@ -86,17 +86,49 @@ export function StatCard({
export function DetailItem({ label, value }: { label: string; value: string }) {
return (
<div className='rounded-xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] px-3 py-2'>
<p className='text-[11px] text-[var(--landing-text-muted)] uppercase tracking-[0.08em]'>
<div className='rounded-[5px] border border-[var(--landing-border)] bg-[var(--landing-bg-card)] px-3 py-2'>
<p className='font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
{label}
</p>
<p className='mt-1 break-words font-[500] text-[12px] text-[var(--landing-text)] leading-snug'>
<p className='mt-1 break-words font-[430] text-[12px] text-[var(--landing-text)] leading-snug'>
{value}
</p>
</div>
)
}
export function ChevronArrow() {
return (
<svg
className='h-3 w-3 shrink-0 text-[var(--landing-text-subtle)]'
viewBox='0 0 10 10'
fill='none'
xmlns='http://www.w3.org/2000/svg'
aria-hidden='true'
>
<line
x1='0'
y1='5'
x2='9'
y2='5'
stroke='currentColor'
strokeWidth='1.33'
strokeLinecap='square'
className='origin-left scale-x-0 transition-transform duration-200 ease-out [transform-box:fill-box] group-hover/link:scale-x-100'
/>
<path
d='M3.5 2L6.5 5L3.5 8'
stroke='currentColor'
strokeWidth='1.33'
strokeLinecap='square'
strokeLinejoin='miter'
fill='none'
className='transition-transform duration-200 ease-out group-hover/link:translate-x-[30%]'
/>
</svg>
)
}
export function CapabilityTags({ tags }: { tags: string[] }) {
if (tags.length === 0) {
return null
@@ -116,23 +148,76 @@ export function CapabilityTags({ tags }: { tags: string[] }) {
)
}
export function FeaturedProviderCard({ provider }: { provider: CatalogProvider }) {
return (
<Link
href={provider.href}
className='group flex flex-1 flex-col gap-4 border-[var(--landing-bg-elevated)] border-t p-6 transition-colors first:border-t-0 hover:bg-[var(--landing-bg-elevated)] sm:border-t-0 sm:border-l sm:first:border-l-0'
>
<ProviderIcon
provider={provider}
className='h-10 w-10 rounded-[5px]'
iconClassName='h-5 w-5'
/>
<div className='flex flex-col gap-2'>
<h3 className='text-lg text-white leading-tight tracking-[-0.01em]'>{provider.name}</h3>
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
{provider.description}
</p>
</div>
</Link>
)
}
export function FeaturedModelCard({
provider,
model,
}: {
provider: CatalogProvider
model: CatalogModel
}) {
return (
<Link
href={model.href}
className='group flex flex-1 flex-col gap-4 border-[var(--landing-bg-elevated)] border-t p-6 transition-colors first:border-t-0 hover:bg-[var(--landing-bg-elevated)] sm:border-t-0 sm:border-l sm:first:border-l-0'
>
<ProviderIcon
provider={provider}
className='h-10 w-10 rounded-[5px]'
iconClassName='h-5 w-5'
/>
<div className='flex flex-col gap-2'>
<span className='font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
{provider.name}
</span>
<h3 className='text-lg text-white leading-tight tracking-[-0.01em]'>{model.displayName}</h3>
<p className='line-clamp-2 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
{model.summary}
</p>
</div>
</Link>
)
}
export function ProviderCard({ provider }: { provider: CatalogProvider }) {
return (
<Link
href={provider.href}
className='group flex h-full flex-col rounded-lg border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-4 transition-colors hover:border-[var(--landing-border-strong)] hover:bg-[var(--landing-bg-elevated)]'
className='group flex h-full flex-col rounded-[5px] border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-4 transition-colors hover:border-[var(--landing-border-strong)] hover:bg-[var(--landing-bg-elevated)]'
>
<div className='mb-4 flex items-center gap-3'>
<ProviderIcon provider={provider} />
<div className='min-w-0'>
<h3 className='font-[500] text-[18px] text-[var(--landing-text)]'>{provider.name}</h3>
<p className='text-[12px] text-[var(--landing-text-muted)]'>
<h3 className='font-[430] font-season text-base text-white tracking-[-0.01em]'>
{provider.name}
</h3>
<p className='font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
{provider.modelCount} models tracked
</p>
</div>
</div>
<p className='mb-4 flex-1 text-[14px] text-[var(--landing-text-muted)] leading-relaxed'>
<p className='mb-4 flex-1 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
{provider.description}
</p>
@@ -165,26 +250,30 @@ export function ModelCard({
return (
<Link
href={model.href}
className='group flex h-full flex-col rounded-lg border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-4 transition-colors hover:border-[var(--landing-border-strong)] hover:bg-[var(--landing-bg-elevated)]'
className='group flex h-full flex-col rounded-[5px] border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-4 transition-colors hover:border-[var(--landing-border-strong)] hover:bg-[var(--landing-bg-elevated)]'
>
<div className='mb-4 flex items-start gap-3'>
<ProviderIcon
provider={provider}
className='h-10 w-10 rounded-xl'
className='h-10 w-10 rounded-[5px]'
iconClassName='h-5 w-5'
/>
<div className='min-w-0 flex-1'>
{showProvider ? (
<p className='mb-1 text-[12px] text-[var(--landing-text-muted)]'>{provider.name}</p>
<p className='mb-1 font-martian-mono text-[var(--landing-text-subtle)] text-xs uppercase tracking-[0.1em]'>
{provider.name}
</p>
) : null}
<h3 className='break-all font-[500] text-[16px] text-[var(--landing-text)] leading-snug'>
<h3 className='break-all font-[430] font-season text-base text-white leading-snug tracking-[-0.01em]'>
{model.displayName}
</h3>
<p className='mt-1 break-all text-[12px] text-[var(--landing-text-muted)]'>{model.id}</p>
<p className='mt-1 break-all font-martian-mono text-[var(--landing-text-subtle)] text-xs tracking-[0.1em]'>
{model.id}
</p>
</div>
</div>
<p className='mb-3 line-clamp-3 flex-1 text-[12px] text-[var(--landing-text-muted)] leading-relaxed'>
<p className='mb-3 line-clamp-3 flex-1 text-[var(--landing-text-muted)] text-sm leading-[150%]'>
{model.summary}
</p>

View File

@@ -0,0 +1,132 @@
'use client'
import { useMemo } from 'react'
import Link from 'next/link'
import { getProviderColor } from '@/app/(landing)/models/components/consts'
import type { CatalogModel } from '@/app/(landing)/models/utils'
function formatShortDate(date: string): string {
try {
return new Intl.DateTimeFormat('en-US', {
month: 'short',
day: 'numeric',
year: 'numeric',
timeZone: 'UTC',
}).format(new Date(date))
} catch {
return date
}
}
interface ModelTimelineChartProps {
models: CatalogModel[]
providerId: string
}
const ITEM_WIDTH = 150
export function ModelTimelineChart({ models, providerId }: ModelTimelineChartProps) {
const entries = useMemo(() => {
return models
.filter((m) => m.releaseDate !== null)
.map((m) => ({
model: m,
date: new Date(m.releaseDate as string),
dateStr: m.releaseDate as string,
}))
.sort((a, b) => a.date.getTime() - b.date.getTime())
}, [models])
if (entries.length === 0) return null
const color = getProviderColor(providerId)
return (
<section aria-labelledby='timeline-heading'>
<div className='px-6 pt-10 pb-4'>
<h2
id='timeline-heading'
className='mb-2 text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'
>
Release timeline
</h2>
<p className='font-[430] font-season text-[var(--landing-text-muted)] text-sm leading-[150%] tracking-[0.02em]'>
When each model was first publicly available.
</p>
</div>
<div className='overflow-x-auto px-6 pb-8'>
{/* Fixed height: top labels + line + bottom labels */}
<div
className='relative h-[140px]'
style={{ minWidth: `${entries.length * ITEM_WIDTH}px` }}
>
{/* Horizontal line — vertically centered */}
<div className='absolute top-[70px] right-0 left-0 h-px bg-[var(--landing-border-strong)]' />
{entries.map(({ model, dateStr }, i) => {
const left = i * ITEM_WIDTH + ITEM_WIDTH / 2
const isAbove = i % 2 === 0
return (
<Link
key={model.id}
href={model.href}
className='group absolute flex flex-col items-center'
style={{
left: `${left}px`,
width: `${ITEM_WIDTH}px`,
marginLeft: `${-ITEM_WIDTH / 2}px`,
top: 0,
height: '100%',
}}
>
{/* Dot — centered exactly on the line (70px - 4.5px) */}
<div
className='-translate-x-1/2 absolute top-[66px] left-1/2 h-[9px] w-[9px] rounded-full transition-[filter,transform] duration-150 group-hover:scale-150 group-hover:brightness-150'
style={{ backgroundColor: color, opacity: 0.85 }}
/>
{/* Stem + label above */}
{isAbove && (
<div className='-translate-x-1/2 absolute bottom-[74px] left-1/2 flex flex-col items-center'>
<div className='flex flex-col items-center gap-0.5 pb-1.5'>
<span className='whitespace-nowrap font-medium text-[12px] text-[var(--landing-text)] leading-none tracking-[-0.01em] transition-colors group-hover:text-white'>
{model.displayName}
</span>
<span className='whitespace-nowrap font-mono text-[10px] text-[var(--landing-text-muted)] leading-none'>
{formatShortDate(dateStr)}
</span>
</div>
<div
className='w-px'
style={{ height: '10px', backgroundColor: color, opacity: 0.2 }}
/>
</div>
)}
{/* Stem + label below */}
{!isAbove && (
<div className='-translate-x-1/2 absolute top-[75px] left-1/2 flex flex-col items-center'>
<div
className='w-px'
style={{ height: '10px', backgroundColor: color, opacity: 0.2 }}
/>
<div className='flex flex-col items-center gap-0.5 pt-1.5'>
<span className='whitespace-nowrap font-medium text-[12px] text-[var(--landing-text)] leading-none tracking-[-0.01em] transition-colors group-hover:text-white'>
{model.displayName}
</span>
<span className='whitespace-nowrap font-mono text-[10px] text-[var(--landing-text-muted)] leading-none'>
{formatShortDate(dateStr)}
</span>
</div>
</div>
)}
</Link>
)
})}
</div>
</div>
</section>
)
}

View File

@@ -1,10 +1,15 @@
import type { Metadata } from 'next'
import Link from 'next/link'
import { Badge } from '@/components/emcn'
import { getBaseUrl } from '@/lib/core/utils/urls'
import { LandingFAQ } from '@/app/(landing)/components/landing-faq'
import { ModelComparisonCharts } from '@/app/(landing)/models/components/model-comparison-charts'
import { ModelDirectory } from '@/app/(landing)/models/components/model-directory'
import { ModelCard, ProviderCard } from '@/app/(landing)/models/components/model-primitives'
import {
FeaturedModelCard,
FeaturedProviderCard,
} from '@/app/(landing)/models/components/model-primitives'
import {
ALL_CATALOG_MODELS,
getPricingBounds,
MODEL_CATALOG_PROVIDERS,
MODEL_PROVIDERS_WITH_CATALOGS,
@@ -17,24 +22,29 @@ const baseUrl = getBaseUrl()
const faqItems = [
{
question: 'What is the Sim AI models directory?',
question: 'Which AI models are best for building agents and automated workflows?',
answer:
'The Sim AI models directory is a public catalog of the language models and providers tracked inside Sim. It shows provider coverage, model IDs, pricing per one million tokens, context windows, and supported capabilities such as reasoning controls, structured outputs, and deep research.',
'The most important factors for agent tasks are reliable tool use (function calling), a large enough context window to track conversation history and tool outputs, and consistent instruction following. In Sim, OpenAI GPT-4.1, Anthropic Claude Sonnet, and Google Gemini 2.5 Pro are popular choices — each supports tool use, structured outputs, and context windows of 128K tokens or more. For cost-sensitive or high-throughput agents, Groq and Cerebras offer significantly faster inference at lower cost.',
},
{
question: 'Can I compare models from multiple providers in one place?',
question: 'What does context window size mean when running an AI agent?',
answer:
'Yes. This page organizes every tracked model by provider and lets you search across providers, model names, and capabilities. You can quickly compare OpenAI, Anthropic, Google, xAI, Mistral, Groq, Cerebras, Fireworks, Bedrock, and more from a single directory.',
'The context window is the total number of tokens a model can process in a single call, including your system prompt, conversation history, tool call results, and any documents you pass in. For agents running multi-step tasks, context fills up quickly — each tool result and each retrieved document adds tokens. A 128K-token context window fits roughly 300 pages of text; models like Gemini 2.5 Pro support up to 1M tokens, enough to hold an entire codebase in a single pass.',
},
{
question: 'Are these model prices shown per million tokens?',
question: 'Are model prices shown per million tokens?',
answer:
'Yes. Input, cached input, and output prices on this page are shown per one million tokens based on the provider metadata tracked in Sim.',
'Yes. Input, cached input, and output prices are all listed per one million tokens, matching how providers bill through their APIs. For agents that chain multiple calls, costs compound quickly — an agent completing 100 turns at 10K tokens each consumes roughly 1M tokens per session. Cached input pricing applies when a provider supports prompt caching, where a repeated prefix like a system prompt is billed at a reduced rate.',
},
{
question: 'Does Sim support providers with dynamic model catalogs too?',
question: 'Which AI models support tool use and function calling?',
answer:
'Yes. Some providers such as OpenRouter, Fireworks, Ollama, and vLLM load their model lists dynamically at runtime. Those providers are still shown here even when their full public model list is not hard-coded into the catalog.',
'Tool use — also called function calling — lets an agent invoke external APIs, query databases, run code, or take any action you define. In Sim, all first-party models from OpenAI, Anthropic, Google, Mistral, Groq, Cerebras, and xAI support tool use. Look for the Tool Use capability tag on any model card in this directory to confirm support.',
},
{
question: 'How do I add a model to a Sim agent workflow?',
answer:
'Open any workflow in Sim, add an Agent block, and select your provider and model from the model picker inside that block. Every model listed in this directory is available in the Agent block. Swapping models takes one click and does not affect the rest of your workflow, making it straightforward to test different models on the same task without rebuilding anything.',
},
]
@@ -82,15 +92,15 @@ export default function ModelsPage() {
const flatModels = MODEL_CATALOG_PROVIDERS.flatMap((provider) =>
provider.models.map((model) => ({ provider, model }))
)
const featuredProviders = MODEL_PROVIDERS_WITH_CATALOGS.slice(0, 6)
const featuredModels = MODEL_PROVIDERS_WITH_CATALOGS.flatMap((provider) =>
provider.featuredModels[0] ? [{ provider, model: provider.featuredModels[0] }] : []
).slice(0, 6)
const heroProviders = ['openai', 'anthropic', 'azure-openai', 'google', 'bedrock']
.map((providerId) => MODEL_CATALOG_PROVIDERS.find((provider) => provider.id === providerId))
.filter(
(provider): provider is (typeof MODEL_CATALOG_PROVIDERS)[number] => provider !== undefined
const featuredProviderOrder = ['anthropic', 'openai', 'google']
const featuredProviders = featuredProviderOrder
.map((id) => MODEL_PROVIDERS_WITH_CATALOGS.find((p) => p.id === id))
.filter((p): p is (typeof MODEL_PROVIDERS_WITH_CATALOGS)[number] => p !== undefined)
const featuredModels = featuredProviders
.map((provider) =>
provider.featuredModels[0] ? { provider, model: provider.featuredModels[0] } : null
)
.filter((entry): entry is NonNullable<typeof entry> => entry !== null)
const breadcrumbJsonLd = {
'@context': 'https://schema.org',
@@ -159,135 +169,89 @@ export default function ModelsPage() {
dangerouslySetInnerHTML={{ __html: JSON.stringify(faqJsonLd) }}
/>
<div className='mx-auto max-w-[1280px] px-6 py-16 sm:px-8 md:px-12'>
<section aria-labelledby='models-heading' className='mb-14'>
<div className='max-w-[840px]'>
<p className='mb-3 text-[12px] text-[var(--landing-text-muted)] uppercase tracking-[0.16em]'>
Public model directory
</p>
<section className='bg-[var(--landing-bg)]'>
<div className='px-5 pt-[60px] lg:px-16 lg:pt-[100px]'>
<Badge
variant='blue'
size='md'
dot
className='mb-5 bg-white/10 font-season text-white uppercase tracking-[0.02em]'
>
Models
</Badge>
<div className='flex flex-col gap-4 xl:flex-row xl:items-end xl:justify-between'>
<h1
id='models-heading'
className='text-balance font-[500] text-[40px] text-[var(--landing-text)] leading-tight sm:text-[56px]'
className='text-balance text-[28px] text-white leading-[100%] tracking-[-0.02em] lg:text-[40px]'
>
Browse AI models by provider, pricing, and capabilities
Models
</h1>
<p className='mt-5 max-w-[760px] text-[18px] text-[var(--landing-text-muted)] leading-relaxed'>
Explore every model tracked in Sim across providers like{' '}
{heroProviders.map((provider, index, allProviders) => {
const Icon = provider.icon
return (
<span key={provider.id}>
<span className='inline-flex items-center gap-1 whitespace-nowrap align-[0.02em]'>
{Icon ? (
<span
aria-hidden='true'
className='relative top-[0.02em] inline-flex shrink-0 text-[var(--landing-text)]'
>
<Icon className='h-[0.82em] w-[0.82em]' />
</span>
) : null}
<span>{provider.name}</span>
</span>
{index < allProviders.length - 1 ? ', ' : ''}
</span>
)
})}
{
' and more. Compare model IDs, token pricing, context windows, and features such as reasoning, structured outputs, and deep research from one clean catalog.'
}
<p className='font-[430] font-season text-[var(--landing-text-muted)] text-sm leading-[150%] tracking-[0.02em] lg:text-base'>
Browse {TOTAL_MODELS} AI models across {TOTAL_MODEL_PROVIDERS} providers. Compare
pricing, context windows, and capabilities.
</p>
</div>
</div>
<div className='mt-8 flex flex-wrap gap-3'>
<a
href='https://sim.ai'
className='inline-flex h-[34px] items-center rounded-[6px] border border-[var(--white)] bg-[var(--white)] px-3 font-[430] text-[14px] text-[var(--landing-text-dark)] transition-colors hover:border-[#E0E0E0] hover:bg-[#E0E0E0]'
>
Start building free
</a>
<Link
href='/integrations'
className='inline-flex h-[34px] items-center rounded-[6px] border border-[var(--landing-border-strong)] px-3 font-[430] text-[14px] text-[var(--landing-text)] transition-colors hover:bg-[var(--landing-bg-elevated)]'
>
Explore integrations
</Link>
</div>
</section>
<div className='mt-8 h-px w-full bg-[var(--landing-bg-elevated)]' />
<section aria-labelledby='providers-heading' className='mb-16'>
<div className='mb-6'>
<div className='mx-5 border-[var(--landing-bg-elevated)] border-x lg:mx-16'>
{featuredProviders.length > 0 && (
<>
<nav aria-label='Featured providers' className='flex flex-col sm:flex-row'>
{featuredProviders.map((provider) => (
<FeaturedProviderCard key={provider.id} provider={provider} />
))}
</nav>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
</>
)}
{featuredModels.length > 0 && (
<>
<nav aria-label='Featured models' className='flex flex-col sm:flex-row'>
{featuredModels.map(({ provider, model }) => (
<FeaturedModelCard key={model.id} provider={provider} model={model} />
))}
</nav>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
</>
)}
<ModelComparisonCharts models={ALL_CATALOG_MODELS} />
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
<section aria-labelledby='all-models-heading'>
<div className='px-6 pt-10 pb-4'>
<h2
id='all-models-heading'
className='mb-2 text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'
>
All models
</h2>
</div>
<ModelDirectory />
</section>
<div className='h-px w-full bg-[var(--landing-bg-elevated)]' />
<section aria-labelledby='faq-heading' className='px-6 py-10'>
<h2
id='providers-heading'
className='font-[500] text-[28px] text-[var(--landing-text)]'
id='faq-heading'
className='mb-8 text-[20px] text-white leading-[100%] tracking-[-0.02em] lg:text-[24px]'
>
Browse by provider
Frequently asked questions
</h2>
<p className='mt-2 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
Each provider has its own generated SEO page with model lineup details, featured
models, provider FAQs, and internal links to individual model pages.
</p>
</div>
<div>
<LandingFAQ faqs={faqItems} />
</div>
</section>
</div>
<div className='grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-3'>
{featuredProviders.map((provider) => (
<ProviderCard key={provider.id} provider={provider} />
))}
</div>
</section>
<section aria-labelledby='featured-models-heading' className='mb-16'>
<div className='mb-6'>
<h2
id='featured-models-heading'
className='font-[500] text-[28px] text-[var(--landing-text)]'
>
Featured model pages
</h2>
<p className='mt-2 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
These pages are generated directly from the model registry and target high-intent
search queries around pricing, context windows, and model capabilities.
</p>
</div>
<div className='grid grid-cols-1 gap-4 xl:grid-cols-2'>
{featuredModels.map(({ provider, model }) => (
<ModelCard key={model.id} provider={provider} model={model} showProvider />
))}
</div>
</section>
<section aria-labelledby='all-models-heading'>
<div className='mb-6'>
<h2
id='all-models-heading'
className='font-[500] text-[28px] text-[var(--landing-text)]'
>
All models
</h2>
<p className='mt-2 max-w-[760px] text-[15px] text-[var(--landing-text-muted)] leading-relaxed'>
Search the full catalog by provider, model ID, or capability. Use it to compare
providers, sanity-check pricing, and quickly understand which models fit the workflow
you&apos;re building. All pricing is shown per one million tokens using the metadata
currently tracked in Sim.
</p>
</div>
<ModelDirectory />
</section>
<section
aria-labelledby='faq-heading'
className='mt-16 rounded-3xl border border-[var(--landing-border)] bg-[var(--landing-bg-card)] p-6 sm:p-8'
>
<h2 id='faq-heading' className='font-[500] text-[28px] text-[var(--landing-text)]'>
Frequently asked questions
</h2>
<div className='mt-3'>
<LandingFAQ faqs={faqItems} />
</div>
</section>
</div>
<div className='-mt-px h-px w-full bg-[var(--landing-bg-elevated)]' />
</section>
</>
)
}

View File

@@ -13,12 +13,6 @@ const PROVIDER_PREFIXES: Record<string, string[]> = {
vllm: ['vllm/'],
}
const PROVIDER_NAME_OVERRIDES: Record<string, string> = {
deepseek: 'DeepSeek',
vllm: 'vLLM',
xai: 'xAI',
}
const TOKEN_REPLACEMENTS: Record<string, string> = {
ai: 'AI',
aws: 'AWS',
@@ -108,6 +102,7 @@ export interface CatalogModel {
providerName: string
providerSlug: string
contextWindow: number | null
releaseDate: string | null
pricing: PricingInfo
capabilities: ModelCapabilities
capabilityTags: string[]
@@ -126,6 +121,8 @@ export interface CatalogProvider {
defaultModel: string
defaultModelDisplayName: string
icon?: ComponentType<{ className?: string }>
color?: string
isReseller: boolean
contextInformationAvailable: boolean
providerCapabilityTags: string[]
modelCount: number
@@ -418,10 +415,6 @@ function buildModelSummary(
return parts.filter(Boolean).join(' ')
}
function getProviderDisplayName(providerId: string, providerName: string): string {
return PROVIDER_NAME_OVERRIDES[providerId] ?? providerName
}
function computeModelRelevanceScore(model: CatalogModel): number {
return (
(model.capabilities.reasoningEffort ? 10 : 0) +
@@ -438,7 +431,7 @@ function compareModelsByRelevance(a: CatalogModel, b: CatalogModel): number {
const rawProviders = Object.values(PROVIDER_DEFINITIONS).map((provider) => {
const providerSlug = slugify(provider.id)
const providerDisplayName = getProviderDisplayName(provider.id, provider.name)
const providerDisplayName = provider.name
const providerCapabilityTags = buildCapabilityTags(provider.capabilities ?? {})
const models: CatalogModel[] = provider.models.map((model) => {
@@ -464,6 +457,7 @@ const rawProviders = Object.values(PROVIDER_DEFINITIONS).map((provider) => {
providerName: providerDisplayName,
providerSlug,
contextWindow: model.contextWindow ?? null,
releaseDate: model.releaseDate ?? null,
pricing: model.pricing,
capabilities: mergedCapabilities,
capabilityTags,
@@ -507,6 +501,8 @@ const rawProviders = Object.values(PROVIDER_DEFINITIONS).map((provider) => {
defaultModel: provider.defaultModel,
defaultModelDisplayName,
icon: provider.icon,
color: provider.color,
isReseller: provider.isReseller ?? false,
contextInformationAvailable: provider.contextInformationAvailable !== false,
providerCapabilityTags,
modelCount: models.length,
@@ -514,7 +510,6 @@ const rawProviders = Object.values(PROVIDER_DEFINITIONS).map((provider) => {
featuredModels,
searchText: [
provider.name,
providerDisplayName,
provider.id,
provider.description,
provider.defaultModel,
@@ -631,7 +626,13 @@ export function buildProviderFaqs(provider: CatalogProvider): CatalogFaq[] {
const cheapestModel = getCheapestProviderModel(provider)
const largestContextModel = getLargestContextProviderModel(provider)
return [
const toolUseModels = provider.models.filter(
(m) =>
m.capabilities.toolUsageControl !== undefined ||
provider.providerCapabilityTags.includes('Tool Use')
)
const faqs: CatalogFaq[] = [
{
question: `What ${provider.name} models are available in Sim?`,
answer: `Sim currently tracks ${provider.modelCount} ${provider.name} model${provider.modelCount === 1 ? '' : 's'} including ${provider.models
@@ -662,10 +663,27 @@ export function buildProviderFaqs(provider: CatalogProvider): CatalogFaq[] {
: `Context window details are not fully available for every ${provider.name} model in the public catalog.`,
},
]
if (toolUseModels.length > 0) {
faqs.push({
question: `Which ${provider.name} models support tool use and function calling in Sim?`,
answer:
toolUseModels.length === provider.modelCount
? `All ${provider.name} models in Sim support tool use and function calling, allowing agents to invoke external APIs, query databases, and run custom actions.`
: `${toolUseModels
.slice(0, 5)
.map((m) => m.displayName)
.join(
', '
)}${toolUseModels.length > 5 ? ', and others' : ''} support tool use and function calling in Sim, enabling agents to invoke external APIs and run custom actions.`,
})
}
return faqs
}
export function buildModelFaqs(provider: CatalogProvider, model: CatalogModel): CatalogFaq[] {
return [
const faqs: CatalogFaq[] = [
{
question: `What is ${model.displayName}?`,
answer: `${model.displayName} is a ${provider.name} model available in Sim. ${model.summary}`,
@@ -677,17 +695,26 @@ export function buildModelFaqs(provider: CatalogProvider, model: CatalogModel):
{
question: `What is the context window for ${model.displayName}?`,
answer: model.contextWindow
? `${model.displayName} supports a listed context window of ${formatTokenCount(model.contextWindow)} tokens in Sim.`
? `${model.displayName} supports a context window of ${formatTokenCount(model.contextWindow)} tokens in Sim. In an agent workflow, this determines how much conversation history, tool outputs, and retrieved documents the model can hold in a single call.`
: `A public context window value is not currently tracked for ${model.displayName}.`,
},
{
question: `What capabilities does ${model.displayName} support?`,
answer:
model.capabilityTags.length > 0
? `${model.displayName} supports ${model.capabilityTags.join(', ')}.`
: `${model.displayName} is available in Sim, but no extra public capability flags are currently tracked for this model.`,
? `${model.displayName} supports the following capabilities in Sim: ${model.capabilityTags.join(', ')}.`
: `${model.displayName} supports standard text generation in Sim. No additional capability flags such as tool use or structured outputs are currently tracked for this model.`,
},
]
if (model.bestFor) {
faqs.push({
question: `What is ${model.displayName} best used for?`,
answer: `${model.bestFor} When used in a Sim workflow, it can be selected in any Agent block from the model picker.`,
})
}
return faqs
}
export function buildModelCapabilityFacts(model: CatalogModel): CapabilityFact[] {

View File

@@ -15,14 +15,6 @@ import { captureServerEvent } from '@/lib/posthog/server'
const logger = createLogger('KnowledgeBaseAPI')
/**
* Schema for creating a knowledge base
*
* Chunking config units:
* - maxSize: tokens (1 token ≈ 4 characters)
* - minSize: characters
* - overlap: tokens (1 token ≈ 4 characters)
*/
const CreateKnowledgeBaseSchema = z.object({
name: z.string().min(1, 'Name is required'),
description: z.string().optional(),
@@ -31,12 +23,20 @@ const CreateKnowledgeBaseSchema = z.object({
embeddingDimension: z.literal(1536).default(1536),
chunkingConfig: z
.object({
/** Maximum chunk size in tokens (1 token ≈ 4 characters) */
maxSize: z.number().min(100).max(4000).default(1024),
/** Minimum chunk size in characters */
minSize: z.number().min(1).max(2000).default(100),
/** Overlap between chunks in tokens (1 token ≈ 4 characters) */
overlap: z.number().min(0).max(500).default(200),
strategy: z
.enum(['auto', 'text', 'regex', 'recursive', 'sentence', 'token'])
.default('auto')
.optional(),
strategyOptions: z
.object({
pattern: z.string().max(500).optional(),
separators: z.array(z.string()).optional(),
recipe: z.enum(['plain', 'markdown', 'code']).optional(),
})
.optional(),
})
.default({
maxSize: 1024,
@@ -45,13 +45,31 @@ const CreateKnowledgeBaseSchema = z.object({
})
.refine(
(data) => {
// Convert maxSize from tokens to characters for comparison (1 token ≈ 4 chars)
const maxSizeInChars = data.maxSize * 4
return data.minSize < maxSizeInChars
},
{
message: 'Min chunk size (characters) must be less than max chunk size (tokens × 4)',
}
)
.refine(
(data) => {
return data.overlap < data.maxSize
},
{
message: 'Overlap must be less than max chunk size',
}
)
.refine(
(data) => {
if (data.strategy === 'regex' && !data.strategyOptions?.pattern) {
return false
}
return true
},
{
message: 'Regex pattern is required when using the regex chunking strategy',
}
),
})

View File

@@ -36,6 +36,8 @@ export interface AddResourceDropdownProps {
existingKeys: Set<string>
onAdd: (resource: MothershipResource) => void
onSwitch?: (resourceId: string) => void
/** Resource types to hide from the dropdown (e.g. `['folder', 'task']`). */
excludeTypes?: readonly MothershipResourceType[]
}
export type AvailableItem = { id: string; name: string; isOpen?: boolean; [key: string]: unknown }
@@ -47,7 +49,8 @@ interface AvailableItemsByType {
export function useAvailableResources(
workspaceId: string,
existingKeys: Set<string>
existingKeys: Set<string>,
excludeTypes?: readonly MothershipResourceType[]
): AvailableItemsByType[] {
const { data: workflows = [] } = useWorkflows(workspaceId)
const { data: tables = [] } = useTablesList(workspaceId)
@@ -56,8 +59,9 @@ export function useAvailableResources(
const { data: folders = [] } = useFolders(workspaceId)
const { data: tasks = [] } = useTasks(workspaceId)
return useMemo(
() => [
return useMemo(() => {
const excluded = new Set<MothershipResourceType>(excludeTypes ?? [])
const groups: AvailableItemsByType[] = [
{
type: 'workflow' as const,
items: workflows.map((w) => ({
@@ -107,9 +111,9 @@ export function useAvailableResources(
isOpen: existingKeys.has(`task:${t.id}`),
})),
},
],
[workflows, folders, tables, files, knowledgeBases, tasks, existingKeys]
)
]
return groups.filter((g) => !excluded.has(g.type))
}, [workflows, folders, tables, files, knowledgeBases, tasks, existingKeys, excludeTypes])
}
export function AddResourceDropdown({
@@ -117,11 +121,12 @@ export function AddResourceDropdown({
existingKeys,
onAdd,
onSwitch,
excludeTypes,
}: AddResourceDropdownProps) {
const [open, setOpen] = useState(false)
const [search, setSearch] = useState('')
const [activeIndex, setActiveIndex] = useState(0)
const available = useAvailableResources(workspaceId, existingKeys)
const available = useAvailableResources(workspaceId, existingKeys, excludeTypes)
const handleOpenChange = useCallback((next: boolean) => {
setOpen(next)
@@ -162,9 +167,9 @@ export function AddResourceDropdown({
} else if (e.key === 'ArrowUp') {
e.preventDefault()
setActiveIndex((prev) => Math.max(prev - 1, 0))
} else if (e.key === 'Enter') {
e.preventDefault()
} else if (e.key === 'Enter' || (e.key === 'Tab' && !e.shiftKey)) {
if (filtered.length > 0 && filtered[activeIndex]) {
e.preventDefault()
const { type, item } = filtered[activeIndex]
select({ type, id: item.id, title: item.name }, item.isOpen)
}

View File

@@ -10,7 +10,7 @@ import {
import { Button, Tooltip } from '@/components/emcn'
import { Columns3, Eye, PanelLeft, Pencil } from '@/components/emcn/icons'
import { isEphemeralResource } from '@/lib/copilot/resource-extraction'
import { SIM_RESOURCE_DRAG_TYPE } from '@/lib/copilot/resource-types'
import { SIM_RESOURCE_DRAG_TYPE, SIM_RESOURCES_DRAG_TYPE } from '@/lib/copilot/resource-types'
import { cn } from '@/lib/core/utils/cn'
import type { PreviewMode } from '@/app/workspace/[workspaceId]/files/components/file-viewer'
import { AddResourceDropdown } from '@/app/workspace/[workspaceId]/home/components/mothership-view/components/add-resource-dropdown'
@@ -38,6 +38,62 @@ import { useWorkspaceFiles } from '@/hooks/queries/workspace-files'
const EDGE_ZONE = 40
const SCROLL_SPEED = 8
const ADD_RESOURCE_EXCLUDED_TYPES: readonly MothershipResourceType[] = ['folder', 'task'] as const
/**
* Returns the id of the nearest resource to `idx` that is in `filter`
* (or any resource if `filter` is null). Returns undefined if nothing qualifies.
*/
function findNearestId(
resources: MothershipResource[],
idx: number,
filter: Set<string> | null
): string | undefined {
for (let offset = 1; offset < resources.length; offset++) {
for (const candidate of [idx + offset, idx - offset]) {
const r = resources[candidate]
if (r && (!filter || filter.has(r.id))) return r.id
}
}
return undefined
}
/**
* Builds an offscreen drag image showing all selected tabs side-by-side, so the
* cursor visibly carries every tab in the multi-selection. The element is
* appended to the document and removed on the next tick after the browser has
* snapshotted it.
*/
function buildMultiDragImage(
scrollNode: HTMLElement | null,
selected: MothershipResource[]
): HTMLElement | null {
if (!scrollNode || selected.length === 0) return null
const container = document.createElement('div')
container.style.position = 'fixed'
container.style.top = '-10000px'
container.style.left = '-10000px'
container.style.display = 'flex'
container.style.alignItems = 'center'
container.style.gap = '6px'
container.style.padding = '4px'
container.style.pointerEvents = 'none'
let appendedAny = false
for (const r of selected) {
const original = scrollNode.querySelector<HTMLElement>(
`[data-resource-tab-id="${CSS.escape(r.id)}"]`
)
if (!original) continue
const clone = original.cloneNode(true) as HTMLElement
clone.style.opacity = '0.95'
container.appendChild(clone)
appendedAny = true
}
if (!appendedAny) return null
document.body.appendChild(container)
return container
}
const PREVIEW_MODE_ICONS = {
editor: Columns3,
split: Eye,
@@ -125,8 +181,19 @@ export function ResourceTabs({
const [hoveredTabId, setHoveredTabId] = useState<string | null>(null)
const [draggedIdx, setDraggedIdx] = useState<number | null>(null)
const [dropGapIdx, setDropGapIdx] = useState<number | null>(null)
const [selectedIds, setSelectedIds] = useState<Set<string>>(new Set())
const dragStartIdx = useRef<number | null>(null)
const autoScrollRaf = useRef<number | null>(null)
const anchorIdRef = useRef<string | null>(null)
const prevChatIdRef = useRef(chatId)
// Reset selection when switching chats — component instance persists across
// chat switches so stale IDs would otherwise carry over.
if (prevChatIdRef.current !== chatId) {
prevChatIdRef.current = chatId
setSelectedIds(new Set())
anchorIdRef.current = null
}
const existingKeys = useMemo(
() => new Set(resources.map((r) => `${r.type}:${r.id}`)),
@@ -143,34 +210,129 @@ export function ResourceTabs({
[chatId, onAddResource]
)
const handleTabClick = useCallback(
(e: React.MouseEvent, idx: number) => {
const resource = resources[idx]
if (!resource) return
// Shift+click: contiguous range from anchor
if (e.shiftKey) {
// Fall back to activeId when no explicit anchor exists (e.g. tab opened via sidebar)
const anchorId = anchorIdRef.current ?? activeId
const anchorIdx = anchorId ? resources.findIndex((r) => r.id === anchorId) : -1
if (anchorIdx !== -1) {
const start = Math.min(anchorIdx, idx)
const end = Math.max(anchorIdx, idx)
const next = new Set<string>()
for (let i = start; i <= end; i++) next.add(resources[i].id)
setSelectedIds(next)
onSelect(resource.id)
return
}
}
// Cmd/Ctrl+click: toggle individual tab in/out of selection
if (e.metaKey || e.ctrlKey) {
const wasSelected = selectedIds.has(resource.id)
if (wasSelected) {
const next = new Set(selectedIds)
next.delete(resource.id)
setSelectedIds(next)
// Only switch active if we just deselected the currently-active tab
if (activeId === resource.id) {
const fallback =
findNearestId(resources, idx, next) ?? findNearestId(resources, idx, null)
if (fallback) onSelect(fallback)
}
} else {
setSelectedIds((prev) => new Set(prev).add(resource.id))
onSelect(resource.id)
}
if (!anchorIdRef.current) anchorIdRef.current = resource.id
return
}
// Plain click: single-select
anchorIdRef.current = resource.id
setSelectedIds(new Set([resource.id]))
onSelect(resource.id)
},
[resources, onSelect, selectedIds, activeId]
)
const handleRemove = useCallback(
(e: React.MouseEvent, resource: MothershipResource) => {
e.stopPropagation()
if (!chatId) return
if (!isEphemeralResource(resource)) {
removeResource.mutate({ chatId, resourceType: resource.type, resourceId: resource.id })
const isMulti = selectedIds.has(resource.id) && selectedIds.size > 1
const targets = isMulti ? resources.filter((r) => selectedIds.has(r.id)) : [resource]
// Update parent state immediately for all targets
for (const r of targets) {
onRemoveResource(r.type, r.id)
}
// Clear stale selection and anchor for all removed targets
const removedIds = new Set(targets.map((r) => r.id))
setSelectedIds((prev) => {
const next = new Set(prev)
for (const id of removedIds) next.delete(id)
return next
})
if (anchorIdRef.current && removedIds.has(anchorIdRef.current)) {
anchorIdRef.current = null
}
// Serialize mutations so each onMutate sees the cache updated by the prior
// one. Continue on individual failures so remaining removals still fire.
const persistable = targets.filter((r) => !isEphemeralResource(r))
if (persistable.length > 0) {
void (async () => {
for (const r of persistable) {
try {
await removeResource.mutateAsync({
chatId,
resourceType: r.type,
resourceId: r.id,
})
} catch {
// Individual failure — the mutation's onError already rolled back
// this resource in cache. Remaining removals continue.
}
}
})()
}
onRemoveResource(resource.type, resource.id)
},
// eslint-disable-next-line react-hooks/exhaustive-deps
[chatId, onRemoveResource]
[chatId, onRemoveResource, resources, selectedIds]
)
const handleDragStart = useCallback(
(e: React.DragEvent, idx: number) => {
const resource = resources[idx]
if (!resource) return
const selected = resources.filter((r) => selectedIds.has(r.id))
const isMultiDrag = selected.length > 1 && selectedIds.has(resource.id)
if (isMultiDrag) {
e.dataTransfer.effectAllowed = 'copy'
e.dataTransfer.setData(SIM_RESOURCES_DRAG_TYPE, JSON.stringify(selected))
const dragImage = buildMultiDragImage(scrollNodeRef.current, selected)
if (dragImage) {
e.dataTransfer.setDragImage(dragImage, 16, 16)
setTimeout(() => dragImage.remove(), 0)
}
// Skip dragStartIdx so internal reorder is disabled for multi-select drags
dragStartIdx.current = null
setDraggedIdx(null)
return
}
dragStartIdx.current = idx
setDraggedIdx(idx)
e.dataTransfer.effectAllowed = 'copyMove'
e.dataTransfer.setData('text/plain', String(idx))
const resource = resources[idx]
if (resource) {
e.dataTransfer.setData(
SIM_RESOURCE_DRAG_TYPE,
JSON.stringify({ type: resource.type, id: resource.id, title: resource.title })
)
}
e.dataTransfer.setData(
SIM_RESOURCE_DRAG_TYPE,
JSON.stringify({ type: resource.type, id: resource.id, title: resource.title })
)
},
[resources]
[resources, selectedIds]
)
const stopAutoScroll = useCallback(() => {
@@ -308,6 +470,7 @@ export function ResourceTabs({
const isActive = activeId === resource.id
const isHovered = hoveredTabId === resource.id
const isDragging = draggedIdx === idx
const isSelected = selectedIds.has(resource.id) && selectedIds.size > 1
const showGapBefore =
dropGapIdx === idx &&
draggedIdx !== null &&
@@ -329,22 +492,24 @@ export function ResourceTabs({
<Button
variant='subtle'
draggable
data-resource-tab-id={resource.id}
onDragStart={(e) => handleDragStart(e, idx)}
onDragOver={(e) => handleDragOver(e, idx)}
onDragLeave={handleDragLeave}
onDragEnd={handleDragEnd}
onMouseDown={(e) => {
if (e.button === 1 && chatId) {
if (e.button === 1) {
e.preventDefault()
handleRemove(e, resource)
if (chatId) handleRemove(e, resource)
}
}}
onClick={() => onSelect(resource.id)}
onClick={(e) => handleTabClick(e, idx)}
onMouseEnter={() => setHoveredTabId(resource.id)}
onMouseLeave={() => setHoveredTabId(null)}
className={cn(
'group relative shrink-0 bg-transparent px-2 py-1 pr-[22px] text-caption transition-opacity duration-150',
isActive && 'bg-[var(--surface-4)]',
isSelected && !isActive && 'bg-[var(--surface-3)]',
isDragging && 'opacity-30'
)}
>
@@ -394,6 +559,7 @@ export function ResourceTabs({
existingKeys={existingKeys}
onAdd={handleAdd}
onSwitch={onSelect}
excludeTypes={ADD_RESOURCE_EXCLUDED_TYPES}
/>
)}
</div>

View File

@@ -263,7 +263,8 @@ export function AddDocumentsModal({
{isDragging ? 'Drop files here' : 'Drop files here or click to browse'}
</span>
<span className='text-[var(--text-tertiary)] text-xs'>
PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML (max 100MB each)
PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML, JSONL (max 100MB
each)
</span>
</div>
</Button>

View File

@@ -9,6 +9,8 @@ import { useForm } from 'react-hook-form'
import { z } from 'zod'
import {
Button,
Combobox,
type ComboboxOption,
Input,
Label,
Modal,
@@ -18,6 +20,7 @@ import {
ModalHeader,
Textarea,
} from '@/components/emcn'
import type { StrategyOptions } from '@/lib/chunkers/types'
import { cn } from '@/lib/core/utils/cn'
import { formatFileSize, validateKnowledgeBaseFile } from '@/lib/uploads/utils/file-utils'
import { ACCEPT_ATTRIBUTE } from '@/lib/uploads/utils/validation'
@@ -35,6 +38,20 @@ interface CreateBaseModalProps {
onOpenChange: (open: boolean) => void
}
const STRATEGY_OPTIONS = [
{ value: 'auto', label: 'Auto (detect from content)' },
{ value: 'text', label: 'Text (word boundary splitting)' },
{ value: 'recursive', label: 'Recursive (configurable separators)' },
{ value: 'sentence', label: 'Sentence' },
{ value: 'token', label: 'Token (fixed-size)' },
{ value: 'regex', label: 'Regex (custom pattern)' },
] as const
const STRATEGY_COMBOBOX_OPTIONS: ComboboxOption[] = STRATEGY_OPTIONS.map((o) => ({
label: o.label,
value: o.value,
}))
const FormSchema = z
.object({
name: z
@@ -43,25 +60,24 @@ const FormSchema = z
.max(100, 'Name must be less than 100 characters')
.refine((value) => value.trim().length > 0, 'Name cannot be empty'),
description: z.string().max(500, 'Description must be less than 500 characters').optional(),
/** Minimum chunk size in characters */
minChunkSize: z
.number()
.min(1, 'Min chunk size must be at least 1 character')
.max(2000, 'Min chunk size must be less than 2000 characters'),
/** Maximum chunk size in tokens (1 token ≈ 4 characters) */
maxChunkSize: z
.number()
.min(100, 'Max chunk size must be at least 100 tokens')
.max(4000, 'Max chunk size must be less than 4000 tokens'),
/** Overlap between chunks in tokens */
overlapSize: z
.number()
.min(0, 'Overlap must be non-negative')
.max(500, 'Overlap must be less than 500 tokens'),
strategy: z.enum(['auto', 'text', 'regex', 'recursive', 'sentence', 'token']).default('auto'),
regexPattern: z.string().optional(),
customSeparators: z.string().optional(),
})
.refine(
(data) => {
// Convert maxChunkSize from tokens to characters for comparison (1 token ≈ 4 chars)
const maxChunkSizeInChars = data.maxChunkSize * 4
return data.minChunkSize < maxChunkSizeInChars
},
@@ -70,6 +86,27 @@ const FormSchema = z
path: ['minChunkSize'],
}
)
.refine(
(data) => {
return data.overlapSize < data.maxChunkSize
},
{
message: 'Overlap must be less than max chunk size',
path: ['overlapSize'],
}
)
.refine(
(data) => {
if (data.strategy === 'regex' && !data.regexPattern?.trim()) {
return false
}
return true
},
{
message: 'Regex pattern is required when using the regex strategy',
path: ['regexPattern'],
}
)
type FormValues = z.infer<typeof FormSchema>
@@ -124,6 +161,7 @@ export const CreateBaseModal = memo(function CreateBaseModal({
handleSubmit,
reset,
watch,
setValue,
formState: { errors },
} = useForm<FormValues>({
resolver: zodResolver(FormSchema),
@@ -133,11 +171,15 @@ export const CreateBaseModal = memo(function CreateBaseModal({
minChunkSize: 100,
maxChunkSize: 1024,
overlapSize: 200,
strategy: 'auto',
regexPattern: '',
customSeparators: '',
},
mode: 'onSubmit',
})
const nameValue = watch('name')
const strategyValue = watch('strategy')
useEffect(() => {
if (open) {
@@ -153,6 +195,9 @@ export const CreateBaseModal = memo(function CreateBaseModal({
minChunkSize: 100,
maxChunkSize: 1024,
overlapSize: 200,
strategy: 'auto',
regexPattern: '',
customSeparators: '',
})
}
}, [open, reset])
@@ -255,6 +300,17 @@ export const CreateBaseModal = memo(function CreateBaseModal({
setSubmitStatus(null)
try {
const strategyOptions: StrategyOptions | undefined =
data.strategy === 'regex' && data.regexPattern
? { pattern: data.regexPattern }
: data.strategy === 'recursive' && data.customSeparators?.trim()
? {
separators: data.customSeparators
.split(',')
.map((s) => s.trim().replace(/\\n/g, '\n').replace(/\\t/g, '\t')),
}
: undefined
const newKnowledgeBase = await createKnowledgeBaseMutation.mutateAsync({
name: data.name,
description: data.description || undefined,
@@ -263,6 +319,8 @@ export const CreateBaseModal = memo(function CreateBaseModal({
maxSize: data.maxChunkSize,
minSize: data.minChunkSize,
overlap: data.overlapSize,
...(data.strategy !== 'auto' && { strategy: data.strategy }),
...(strategyOptions && { strategyOptions }),
},
})
@@ -312,7 +370,6 @@ export const CreateBaseModal = memo(function CreateBaseModal({
<div className='space-y-3'>
<div className='flex flex-col gap-2'>
<Label htmlFor='kb-name'>Name</Label>
{/* Hidden decoy fields to prevent browser autofill */}
<input
type='text'
name='fakeusernameremembered'
@@ -403,6 +460,59 @@ export const CreateBaseModal = memo(function CreateBaseModal({
</p>
</div>
<div className='flex flex-col gap-2'>
<Label>Chunking Strategy</Label>
<Combobox
options={STRATEGY_COMBOBOX_OPTIONS}
value={strategyValue}
onChange={(value) => setValue('strategy', value as FormValues['strategy'])}
dropdownWidth='trigger'
align='start'
/>
<p className='text-[var(--text-muted)] text-xs'>
Auto detects the best strategy based on file content type.
</p>
</div>
{strategyValue === 'regex' && (
<div className='flex flex-col gap-2'>
<Label htmlFor='regexPattern'>Regex Pattern</Label>
<Input
id='regexPattern'
placeholder='e.g. \\n\\n or (?<=\\})\\s*(?=\\{)'
{...register('regexPattern')}
className={cn(errors.regexPattern && 'border-[var(--text-error)]')}
autoComplete='off'
data-form-type='other'
/>
{errors.regexPattern && (
<p className='text-[var(--text-error)] text-xs'>
{errors.regexPattern.message}
</p>
)}
<p className='text-[var(--text-muted)] text-xs'>
Text will be split at each match of this regex pattern.
</p>
</div>
)}
{strategyValue === 'recursive' && (
<div className='flex flex-col gap-2'>
<Label htmlFor='customSeparators'>Custom Separators (optional)</Label>
<Input
id='customSeparators'
placeholder='e.g. \n\n, \n, . , '
{...register('customSeparators')}
autoComplete='off'
data-form-type='other'
/>
<p className='text-[var(--text-muted)] text-xs'>
Comma-separated list of delimiters in priority order. Leave empty for default
separators.
</p>
</div>
)}
<div className='flex flex-col gap-2'>
<Label>Upload Documents</Label>
<Button
@@ -431,7 +541,8 @@ export const CreateBaseModal = memo(function CreateBaseModal({
{isDragging ? 'Drop files here' : 'Drop files here or click to browse'}
</span>
<span className='text-[var(--text-tertiary)] text-xs'>
PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML (max 100MB each)
PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML, JSONL (max 100MB
each)
</span>
</div>
</Button>

View File

@@ -59,40 +59,61 @@ export function WorkspacePermissionsProvider({ children }: WorkspacePermissionsP
const hasOperationError = useOperationQueueStore((state) => state.hasOperationError)
const addNotification = useNotificationStore((state) => state.addNotification)
const removeNotification = useNotificationStore((state) => state.removeNotification)
const { isReconnecting } = useSocket()
const reconnectingNotificationIdRef = useRef<string | null>(null)
const { isReconnecting, isRetryingWorkflowJoin } = useSocket()
const realtimeStatusNotificationIdRef = useRef<string | null>(null)
const realtimeStatusNotificationMessageRef = useRef<string | null>(null)
const isOfflineMode = hasOperationError
const realtimeStatusMessage = isReconnecting
? 'Reconnecting...'
: isRetryingWorkflowJoin
? 'Joining workflow...'
: null
const clearRealtimeStatusNotification = useCallback(() => {
if (!realtimeStatusNotificationIdRef.current) {
return
}
removeNotification(realtimeStatusNotificationIdRef.current)
realtimeStatusNotificationIdRef.current = null
realtimeStatusNotificationMessageRef.current = null
}, [removeNotification])
useEffect(() => {
if (isReconnecting && !reconnectingNotificationIdRef.current && !isOfflineMode) {
const id = addNotification({
level: 'error',
message: 'Reconnecting...',
})
reconnectingNotificationIdRef.current = id
} else if (!isReconnecting && reconnectingNotificationIdRef.current) {
removeNotification(reconnectingNotificationIdRef.current)
reconnectingNotificationIdRef.current = null
if (isOfflineMode || !realtimeStatusMessage) {
clearRealtimeStatusNotification()
return
}
return () => {
if (reconnectingNotificationIdRef.current) {
removeNotification(reconnectingNotificationIdRef.current)
reconnectingNotificationIdRef.current = null
}
if (
realtimeStatusNotificationIdRef.current &&
realtimeStatusNotificationMessageRef.current === realtimeStatusMessage
) {
return
}
}, [isReconnecting, isOfflineMode, addNotification, removeNotification])
clearRealtimeStatusNotification()
const id = addNotification({
level: 'error',
message: realtimeStatusMessage,
})
realtimeStatusNotificationIdRef.current = id
realtimeStatusNotificationMessageRef.current = realtimeStatusMessage
}, [addNotification, clearRealtimeStatusNotification, isOfflineMode, realtimeStatusMessage])
useEffect(() => {
return clearRealtimeStatusNotification
}, [clearRealtimeStatusNotification])
useEffect(() => {
if (!isOfflineMode || hasShownOfflineNotification) {
return
}
if (reconnectingNotificationIdRef.current) {
removeNotification(reconnectingNotificationIdRef.current)
reconnectingNotificationIdRef.current = null
}
clearRealtimeStatusNotification()
try {
addNotification({
@@ -107,7 +128,7 @@ export function WorkspacePermissionsProvider({ children }: WorkspacePermissionsP
} catch (error) {
logger.error('Failed to add offline notification', { error })
}
}, [addNotification, removeNotification, hasShownOfflineNotification, isOfflineMode])
}, [addNotification, clearRealtimeStatusNotification, hasShownOfflineNotification, isOfflineMode])
const {
data: workspacePermissions,

View File

@@ -5,6 +5,7 @@ import { useViewport } from 'reactflow'
import { getUserColor } from '@/lib/workspaces/colors'
import { usePreventZoom } from '@/app/workspace/[workspaceId]/w/[workflowId]/hooks'
import { useSocket } from '@/app/workspace/providers/socket-provider'
import { useWorkflowRegistry } from '@/stores/workflows/registry/store'
interface CursorPoint {
x: number
@@ -19,11 +20,16 @@ interface CursorRenderData {
}
const CursorsComponent = () => {
const { presenceUsers, currentSocketId } = useSocket()
const activeWorkflowId = useWorkflowRegistry((state) => state.activeWorkflowId)
const { currentWorkflowId, presenceUsers, currentSocketId } = useSocket()
const viewport = useViewport()
const preventZoomRef = usePreventZoom()
const cursors = useMemo<CursorRenderData[]>(() => {
if (!activeWorkflowId || currentWorkflowId !== activeWorkflowId) {
return []
}
return presenceUsers
.filter((user): user is typeof user & { cursor: CursorPoint } => Boolean(user.cursor))
.filter((user) => user.socketId !== currentSocketId)
@@ -33,7 +39,7 @@ const CursorsComponent = () => {
cursor: user.cursor,
color: getUserColor(user.userId),
}))
}, [currentSocketId, presenceUsers])
}, [activeWorkflowId, currentSocketId, currentWorkflowId, presenceUsers])
if (!cursors.length) {
return null

View File

@@ -98,7 +98,7 @@ export function CredentialSelector({
)
const provider = effectiveProviderId
const isTriggerMode = subBlock.mode === 'trigger'
const isTriggerMode = subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced'
const {
data: rawCredentials = [],

View File

@@ -242,9 +242,13 @@ export const EnvVarDropdown: React.FC<EnvVarDropdownProps> = ({
})
break
case 'Enter':
e.preventDefault()
e.stopPropagation()
handleEnvVarSelect(filteredEnvVars[selectedIndex])
case 'Tab':
if (e.key === 'Tab' && e.shiftKey) break
if (filteredEnvVars[selectedIndex]) {
e.preventDefault()
e.stopPropagation()
handleEnvVarSelect(filteredEnvVars[selectedIndex])
}
break
case 'Escape':
e.preventDefault()

View File

@@ -279,9 +279,11 @@ export const KeyboardNavigationHandler: React.FC<KeyboardNavigationHandlerProps>
}
break
case 'Enter':
e.preventDefault()
e.stopPropagation()
case 'Tab':
if (e.key === 'Tab' && e.shiftKey) break
if (selected && selectedIndex >= 0 && selectedIndex < flatTagList.length) {
e.preventDefault()
e.stopPropagation()
handleTagSelect(selected.tag, selected.group)
}
break

View File

@@ -145,7 +145,9 @@ export function Editor() {
if (!triggerMode) return subBlocks
return subBlocks.filter(
(subBlock) =>
subBlock.mode === 'trigger' || subBlock.type === ('trigger-config' as SubBlockType)
subBlock.mode === 'trigger' ||
subBlock.mode === 'trigger-advanced' ||
subBlock.type === ('trigger-config' as SubBlockType)
)
}, [blockConfig?.subBlocks, triggerMode])

View File

@@ -102,7 +102,9 @@ export function useEditorSubblockLayout(
const subBlocksForCanonical = displayTriggerMode
? (config.subBlocks || []).filter(
(subBlock) =>
subBlock.mode === 'trigger' || subBlock.type === ('trigger-config' as SubBlockType)
subBlock.mode === 'trigger' ||
subBlock.mode === 'trigger-advanced' ||
subBlock.type === ('trigger-config' as SubBlockType)
)
: config.subBlocks || []
const canonicalIndex = buildCanonicalIndex(subBlocksForCanonical)
@@ -137,12 +139,12 @@ export function useEditorSubblockLayout(
}
// Filter by mode if specified
if (block.mode === 'trigger') {
if (block.mode === 'trigger' || block.mode === 'trigger-advanced') {
if (!displayTriggerMode) return false
}
// When in trigger mode, hide blocks that don't have mode: 'trigger'
if (displayTriggerMode && block.mode !== 'trigger') {
// When in trigger mode, hide blocks that don't have mode: 'trigger' or 'trigger-advanced'
if (displayTriggerMode && block.mode !== 'trigger' && block.mode !== 'trigger-advanced') {
return false
}

View File

@@ -534,7 +534,6 @@ const SubBlockRow = memo(function SubBlockRow({
workspaceId
)
const credentialId = dependencyValues.credential
const knowledgeBaseId = dependencyValues.knowledgeBaseId
const dropdownLabel = useMemo(() => {
@@ -576,6 +575,7 @@ const SubBlockRow = memo(function SubBlockRow({
const collectionIdValue = resolveContextValue('collectionId')
const spreadsheetIdValue = resolveContextValue('spreadsheetId')
const fileIdValue = resolveContextValue('fileId')
const credentialId = dependencyValues.credential ?? resolveContextValue('oauthCredential')
const { displayName: selectorDisplayName } = useSelectorDisplayName({
subBlock,

View File

@@ -1153,8 +1153,10 @@ function PreviewEditorContent({
if (subBlock.type === ('trigger-config' as SubBlockType)) {
return effectiveTrigger || isPureTriggerBlock
}
if (subBlock.mode === 'trigger' && !effectiveTrigger) return false
if (effectiveTrigger && subBlock.mode !== 'trigger') return false
if ((subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced') && !effectiveTrigger)
return false
if (effectiveTrigger && subBlock.mode !== 'trigger' && subBlock.mode !== 'trigger-advanced')
return false
if (!isSubBlockFeatureEnabled(subBlock)) return false
if (
!isSubBlockVisibleForMode(

View File

@@ -319,11 +319,11 @@ function WorkflowPreviewBlockInner({ data }: NodeProps<WorkflowPreviewBlockData>
if (effectiveTrigger) {
const isValidTriggerSubblock = isPureTriggerBlock
? subBlock.mode === 'trigger' || !subBlock.mode
: subBlock.mode === 'trigger'
? subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced' || !subBlock.mode
: subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced'
if (!isValidTriggerSubblock) return false
} else {
if (subBlock.mode === 'trigger') return false
if (subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced') return false
}
/** Skip value-dependent visibility checks in lightweight mode */

View File

@@ -0,0 +1,246 @@
import { describe, expect, it } from 'vitest'
import {
SOCKET_JOIN_RETRY_BASE_DELAY_MS,
SOCKET_JOIN_RETRY_MAX_DELAY_MS,
SocketJoinController,
} from '@/app/workspace/providers/socket-join-controller'
describe('SocketJoinController', () => {
it('blocks rejoining a deleted workflow until the desired workflow changes', () => {
const controller = new SocketJoinController()
expect(controller.setConnected(true)).toEqual([])
expect(controller.requestWorkflow('workflow-a')).toEqual([
{ type: 'join', workflowId: 'workflow-a' },
])
expect(controller.handleJoinSuccess('workflow-a')).toMatchObject({
apply: true,
ignored: false,
commands: [],
workflowId: 'workflow-a',
})
expect(controller.handleWorkflowDeleted('workflow-a')).toEqual({
shouldClearCurrent: true,
commands: [],
})
expect(controller.requestWorkflow('workflow-a')).toEqual([])
expect(controller.requestWorkflow('workflow-b')).toEqual([
{ type: 'join', workflowId: 'workflow-b' },
])
})
it('joins only the latest desired workflow after rapid A to B to C switching', () => {
const controller = new SocketJoinController()
controller.setConnected(true)
controller.requestWorkflow('workflow-a')
controller.handleJoinSuccess('workflow-a')
expect(controller.requestWorkflow('workflow-b')).toEqual([
{ type: 'join', workflowId: 'workflow-b' },
])
expect(controller.requestWorkflow('workflow-c')).toEqual([])
expect(controller.handleJoinSuccess('workflow-b')).toMatchObject({
apply: false,
ignored: true,
workflowId: 'workflow-b',
commands: [{ type: 'join', workflowId: 'workflow-c' }],
})
expect(controller.handleJoinSuccess('workflow-c')).toMatchObject({
apply: true,
ignored: false,
workflowId: 'workflow-c',
commands: [],
})
})
it('rejoins the original workflow when a stale success lands after switching back', () => {
const controller = new SocketJoinController()
controller.setConnected(true)
controller.requestWorkflow('workflow-a')
controller.handleJoinSuccess('workflow-a')
expect(controller.requestWorkflow('workflow-b')).toEqual([
{ type: 'join', workflowId: 'workflow-b' },
])
expect(controller.requestWorkflow('workflow-a')).toEqual([])
expect(controller.handleJoinSuccess('workflow-b')).toMatchObject({
apply: false,
ignored: true,
workflowId: 'workflow-b',
commands: [{ type: 'join', workflowId: 'workflow-a' }],
})
expect(controller.handleJoinSuccess('workflow-a')).toMatchObject({
apply: true,
ignored: false,
workflowId: 'workflow-a',
commands: [],
})
})
it('leaves the room when a late join succeeds after navigating away', () => {
const controller = new SocketJoinController()
controller.setConnected(true)
controller.requestWorkflow('workflow-a')
controller.handleJoinSuccess('workflow-a')
expect(controller.requestWorkflow('workflow-b')).toEqual([
{ type: 'join', workflowId: 'workflow-b' },
])
expect(controller.requestWorkflow(null)).toEqual([])
expect(controller.handleJoinSuccess('workflow-b')).toMatchObject({
apply: false,
ignored: true,
workflowId: 'workflow-b',
commands: [{ type: 'leave' }],
})
})
it('preserves the last joined workflow during retryable switch failures', () => {
const controller = new SocketJoinController()
controller.setConnected(true)
expect(controller.requestWorkflow('workflow-a')).toEqual([
{ type: 'join', workflowId: 'workflow-a' },
])
controller.handleJoinSuccess('workflow-a')
expect(controller.requestWorkflow('workflow-b')).toEqual([
{ type: 'join', workflowId: 'workflow-b' },
])
const errorResult = controller.handleJoinError({
workflowId: 'workflow-b',
retryable: true,
})
expect(errorResult.apply).toBe(false)
expect(errorResult.retryScheduled).toBe(true)
expect(errorResult.commands).toEqual([
{
type: 'schedule-retry',
workflowId: 'workflow-b',
attempt: 1,
delayMs: SOCKET_JOIN_RETRY_BASE_DELAY_MS,
},
])
expect(controller.getJoinedWorkflowId()).toBe('workflow-a')
expect(controller.retryJoin('workflow-b')).toEqual([{ type: 'join', workflowId: 'workflow-b' }])
})
it('uses capped exponential backoff for retryable join failures', () => {
const controller = new SocketJoinController()
controller.setConnected(true)
controller.requestWorkflow('workflow-a')
const first = controller.handleJoinError({ workflowId: 'workflow-a', retryable: true })
expect(first.commands).toEqual([
{
type: 'schedule-retry',
workflowId: 'workflow-a',
attempt: 1,
delayMs: SOCKET_JOIN_RETRY_BASE_DELAY_MS,
},
])
controller.retryJoin('workflow-a')
const second = controller.handleJoinError({ workflowId: 'workflow-a', retryable: true })
expect(second.commands).toEqual([
{
type: 'schedule-retry',
workflowId: 'workflow-a',
attempt: 2,
delayMs: SOCKET_JOIN_RETRY_BASE_DELAY_MS * 2,
},
])
controller.retryJoin('workflow-a')
controller.handleJoinError({ workflowId: 'workflow-a', retryable: true })
controller.retryJoin('workflow-a')
const fourth = controller.handleJoinError({ workflowId: 'workflow-a', retryable: true })
expect(fourth.commands).toEqual([
{
type: 'schedule-retry',
workflowId: 'workflow-a',
attempt: 4,
delayMs: SOCKET_JOIN_RETRY_BASE_DELAY_MS * 8,
},
])
controller.retryJoin('workflow-a')
const fifth = controller.handleJoinError({ workflowId: 'workflow-a', retryable: true })
expect(fifth.commands).toEqual([
{
type: 'schedule-retry',
workflowId: 'workflow-a',
attempt: 5,
delayMs: SOCKET_JOIN_RETRY_MAX_DELAY_MS,
},
])
})
it('blocks a permanently failed workflow and leaves the fallback room cleanly', () => {
const controller = new SocketJoinController()
controller.setConnected(true)
controller.requestWorkflow('workflow-a')
controller.handleJoinSuccess('workflow-a')
expect(controller.requestWorkflow('workflow-b')).toEqual([
{ type: 'join', workflowId: 'workflow-b' },
])
const errorResult = controller.handleJoinError({
workflowId: 'workflow-b',
retryable: false,
})
expect(errorResult.apply).toBe(true)
expect(errorResult.commands).toEqual([{ type: 'leave' }])
expect(controller.getJoinedWorkflowId()).toBeNull()
expect(controller.requestWorkflow('workflow-b')).toEqual([])
expect(controller.requestWorkflow('workflow-c')).toEqual([
{ type: 'join', workflowId: 'workflow-c' },
])
})
it('rejoins the desired workflow when the server session is lost', () => {
const controller = new SocketJoinController()
controller.setConnected(true)
controller.requestWorkflow('workflow-a')
controller.handleJoinSuccess('workflow-a')
expect(controller.forceRejoinWorkflow('workflow-a')).toEqual([
{ type: 'join', workflowId: 'workflow-a' },
])
expect(controller.getJoinedWorkflowId()).toBeNull()
})
it('resolves retryable errors without workflowId against the pending join', () => {
const controller = new SocketJoinController()
controller.setConnected(true)
controller.requestWorkflow('workflow-a')
const errorResult = controller.handleJoinError({ retryable: true })
expect(errorResult.workflowId).toBe('workflow-a')
expect(errorResult.retryScheduled).toBe(true)
expect(errorResult.commands).toEqual([
{
type: 'schedule-retry',
workflowId: 'workflow-a',
attempt: 1,
delayMs: SOCKET_JOIN_RETRY_BASE_DELAY_MS,
},
])
})
})

View File

@@ -0,0 +1,294 @@
export const SOCKET_JOIN_RETRY_BASE_DELAY_MS = 1000
export const SOCKET_JOIN_RETRY_MAX_DELAY_MS = 10000
export type SocketJoinCommand =
| { type: 'cancel-retry' }
| { type: 'join'; workflowId: string }
| { type: 'leave' }
| {
type: 'schedule-retry'
workflowId: string
attempt: number
delayMs: number
}
interface SocketJoinSuccessResult {
apply: boolean
commands: SocketJoinCommand[]
ignored: boolean
workflowId: string
}
interface SocketJoinErrorResult {
apply: boolean
commands: SocketJoinCommand[]
ignored: boolean
retryScheduled: boolean
workflowId: string | null
}
interface SocketJoinDeleteResult {
commands: SocketJoinCommand[]
shouldClearCurrent: boolean
}
/**
* Coordinates desired workflow room membership with async socket join results.
*/
export class SocketJoinController {
private desiredWorkflowId: string | null = null
private joinedWorkflowId: string | null = null
private pendingJoinWorkflowId: string | null = null
private blockedWorkflowId: string | null = null
private retryWorkflowId: string | null = null
private retryAttempt = 0
private isConnected = false
getJoinedWorkflowId(): string | null {
return this.joinedWorkflowId
}
setConnected(connected: boolean): SocketJoinCommand[] {
this.isConnected = connected
if (!connected) {
this.pendingJoinWorkflowId = null
this.joinedWorkflowId = null
return this.clearRetryCommands()
}
return this.flush()
}
requestWorkflow(workflowId: string | null): SocketJoinCommand[] {
const commands = this.takeRetryResetCommands(workflowId)
this.desiredWorkflowId = workflowId
if (workflowId !== this.blockedWorkflowId) {
this.blockedWorkflowId = null
}
return [...commands, ...this.flush()]
}
forceRejoinWorkflow(workflowId: string | null): SocketJoinCommand[] {
const commands = this.requestWorkflow(workflowId)
const alreadyChangingRooms = commands.some(
(command) => command.type === 'join' || command.type === 'leave'
)
if (
alreadyChangingRooms ||
!this.isConnected ||
!this.desiredWorkflowId ||
this.pendingJoinWorkflowId === this.desiredWorkflowId ||
this.blockedWorkflowId === this.desiredWorkflowId
) {
return commands
}
this.joinedWorkflowId = null
return [...commands, ...this.flush()]
}
handleWorkflowDeleted(workflowId: string): SocketJoinDeleteResult {
const commands = this.takeRetryResetCommands(
this.retryWorkflowId === workflowId ? null : this.retryWorkflowId
)
if (this.desiredWorkflowId === workflowId) {
this.blockedWorkflowId = workflowId
}
if (this.pendingJoinWorkflowId === workflowId) {
this.pendingJoinWorkflowId = null
}
const shouldClearCurrent = this.joinedWorkflowId === workflowId
if (shouldClearCurrent) {
this.joinedWorkflowId = null
}
return {
commands: [...commands, ...this.flush()],
shouldClearCurrent,
}
}
handleJoinSuccess(workflowId: string): SocketJoinSuccessResult {
const commands = this.clearRetryCommands(workflowId)
this.pendingJoinWorkflowId = null
this.joinedWorkflowId = workflowId
const apply = this.desiredWorkflowId === workflowId && this.blockedWorkflowId !== workflowId
return {
apply,
commands: [...commands, ...this.flush()],
ignored: !apply,
workflowId,
}
}
handleJoinError({
workflowId,
retryable,
}: {
workflowId?: string | null
retryable?: boolean
}): SocketJoinErrorResult {
const resolvedWorkflowId = workflowId ?? this.pendingJoinWorkflowId
if (resolvedWorkflowId && this.pendingJoinWorkflowId === resolvedWorkflowId) {
this.pendingJoinWorkflowId = null
if (this.joinedWorkflowId === resolvedWorkflowId) {
this.joinedWorkflowId = null
}
}
const isCurrentDesired =
Boolean(resolvedWorkflowId) &&
this.desiredWorkflowId === resolvedWorkflowId &&
this.blockedWorkflowId !== resolvedWorkflowId
const baseCommands =
resolvedWorkflowId !== null
? this.takeRetryResetCommands(resolvedWorkflowId)
: this.clearRetryCommands()
if (!isCurrentDesired) {
return {
apply: false,
commands: [...baseCommands, ...this.flush()],
ignored: true,
retryScheduled: false,
workflowId: resolvedWorkflowId,
}
}
if (retryable && resolvedWorkflowId) {
const commands = this.scheduleRetry(resolvedWorkflowId)
return {
apply: false,
commands: [...baseCommands, ...commands],
ignored: false,
retryScheduled: true,
workflowId: resolvedWorkflowId,
}
}
const leaveCommands = this.blockWorkflow(resolvedWorkflowId)
return {
apply: true,
commands: [...this.clearRetryCommands(), ...leaveCommands, ...this.flush()],
ignored: false,
retryScheduled: false,
workflowId: resolvedWorkflowId,
}
}
retryJoin(workflowId: string): SocketJoinCommand[] {
if (
this.retryWorkflowId !== workflowId ||
this.desiredWorkflowId !== workflowId ||
this.blockedWorkflowId === workflowId
) {
return []
}
return this.flush()
}
private flush(): SocketJoinCommand[] {
if (!this.isConnected || this.pendingJoinWorkflowId) {
return []
}
if (!this.desiredWorkflowId) {
if (!this.joinedWorkflowId) {
return []
}
this.joinedWorkflowId = null
return [{ type: 'leave' }]
}
if (this.blockedWorkflowId === this.desiredWorkflowId) {
return []
}
if (this.joinedWorkflowId === this.desiredWorkflowId) {
return []
}
this.pendingJoinWorkflowId = this.desiredWorkflowId
return [{ type: 'join', workflowId: this.desiredWorkflowId }]
}
private scheduleRetry(workflowId: string): SocketJoinCommand[] {
const nextAttempt = this.retryWorkflowId === workflowId ? this.retryAttempt + 1 : 1
const delayMs = Math.min(
SOCKET_JOIN_RETRY_BASE_DELAY_MS * 2 ** Math.max(0, nextAttempt - 1),
SOCKET_JOIN_RETRY_MAX_DELAY_MS
)
this.retryWorkflowId = workflowId
this.retryAttempt = nextAttempt
return [
{
type: 'schedule-retry',
workflowId,
attempt: nextAttempt,
delayMs,
},
]
}
private takeRetryResetCommands(nextWorkflowId?: string | null): SocketJoinCommand[] {
const shouldClearRetry =
this.retryWorkflowId !== null &&
(nextWorkflowId === undefined || this.retryWorkflowId !== nextWorkflowId)
if (!shouldClearRetry) {
return []
}
this.retryWorkflowId = null
this.retryAttempt = 0
return [{ type: 'cancel-retry' }]
}
private clearRetryCommands(workflowId?: string): SocketJoinCommand[] {
const shouldClear =
this.retryWorkflowId !== null &&
(workflowId === undefined || this.retryWorkflowId === workflowId)
if (!shouldClear) {
return []
}
this.retryWorkflowId = null
this.retryAttempt = 0
return [{ type: 'cancel-retry' }]
}
private blockWorkflow(workflowId: string | null): SocketJoinCommand[] {
if (workflowId) {
this.blockedWorkflowId = workflowId
}
if (!this.joinedWorkflowId) {
return []
}
this.joinedWorkflowId = null
return [{ type: 'leave' }]
}
}

View File

@@ -0,0 +1,54 @@
import { describe, expect, it } from 'vitest'
import {
isSocketWorkflowVisible,
resolveSocketWorkflowTarget,
} from '@/app/workspace/providers/socket-join-target'
describe('socket join target helpers', () => {
it('uses the route workflow when there is no explicit workflow target', () => {
expect(
resolveSocketWorkflowTarget({
routeWorkflowId: 'workflow-route',
explicitWorkflowId: null,
})
).toBe('workflow-route')
})
it('prefers the explicit workflow target for embedded workflows', () => {
expect(
resolveSocketWorkflowTarget({
routeWorkflowId: null,
explicitWorkflowId: 'workflow-embedded',
})
).toBe('workflow-embedded')
})
it('lets an explicit workflow override the route workflow', () => {
expect(
resolveSocketWorkflowTarget({
routeWorkflowId: 'workflow-route',
explicitWorkflowId: 'workflow-embedded',
})
).toBe('workflow-embedded')
})
it('treats the explicit embedded workflow as visible', () => {
expect(
isSocketWorkflowVisible({
workflowId: 'workflow-embedded',
routeWorkflowId: null,
explicitWorkflowId: 'workflow-embedded',
})
).toBe(true)
})
it('rejects mismatched workflow visibility', () => {
expect(
isSocketWorkflowVisible({
workflowId: 'workflow-other',
routeWorkflowId: 'workflow-route',
explicitWorkflowId: null,
})
).toBe(false)
})
})

View File

@@ -0,0 +1,28 @@
interface ResolveSocketWorkflowTargetArgs {
routeWorkflowId?: string | null
explicitWorkflowId?: string | null
}
export function resolveSocketWorkflowTarget({
routeWorkflowId,
explicitWorkflowId,
}: ResolveSocketWorkflowTargetArgs): string | null {
return explicitWorkflowId ?? routeWorkflowId ?? null
}
interface IsSocketWorkflowVisibleArgs extends ResolveSocketWorkflowTargetArgs {
workflowId?: string | null
}
export function isSocketWorkflowVisible({
workflowId,
routeWorkflowId,
explicitWorkflowId,
}: IsSocketWorkflowVisibleArgs): boolean {
const targetWorkflowId = workflowId ?? null
if (!targetWorkflowId) {
return false
}
return targetWorkflowId === resolveSocketWorkflowTarget({ routeWorkflowId, explicitWorkflowId })
}

View File

@@ -15,6 +15,14 @@ import { useParams } from 'next/navigation'
import type { Socket } from 'socket.io-client'
import { getEnv } from '@/lib/core/config/env'
import { generateId } from '@/lib/core/utils/uuid'
import {
type SocketJoinCommand,
SocketJoinController,
} from '@/app/workspace/providers/socket-join-controller'
import {
isSocketWorkflowVisible,
resolveSocketWorkflowTarget,
} from '@/app/workspace/providers/socket-join-target'
import { useOperationQueueStore } from '@/stores/operation-queue/store'
import { useWorkflowRegistry as useWorkflowRegistryStore } from '@/stores/workflows/registry/store'
@@ -53,6 +61,7 @@ interface SocketContextType {
isConnected: boolean
isConnecting: boolean
isReconnecting: boolean
isRetryingWorkflowJoin: boolean
authFailed: boolean
currentWorkflowId: string | null
currentSocketId: string | null
@@ -61,6 +70,7 @@ interface SocketContextType {
leaveWorkflow: () => void
retryConnection: () => void
emitWorkflowOperation: (
workflowId: string,
operation: string,
target: string,
payload: any,
@@ -101,6 +111,7 @@ const SocketContext = createContext<SocketContextType>({
isConnected: false,
isConnecting: false,
isReconnecting: false,
isRetryingWorkflowJoin: false,
authFailed: false,
currentWorkflowId: null,
currentSocketId: null,
@@ -137,18 +148,24 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
const [isConnected, setIsConnected] = useState(false)
const [isConnecting, setIsConnecting] = useState(false)
const [isReconnecting, setIsReconnecting] = useState(false)
const [isRetryingWorkflowJoin, setIsRetryingWorkflowJoin] = useState(false)
const [currentWorkflowId, setCurrentWorkflowId] = useState<string | null>(null)
const [currentSocketId, setCurrentSocketId] = useState<string | null>(null)
const [presenceUsers, setPresenceUsers] = useState<PresenceUser[]>([])
const [authFailed, setAuthFailed] = useState(false)
const [explicitWorkflowId, setExplicitWorkflowId] = useState<string | null>(null)
const initializedRef = useRef(false)
const socketRef = useRef<Socket | null>(null)
const triggerOfflineMode = useOperationQueueStore((state) => state.triggerOfflineMode)
const currentWorkflowIdRef = useRef<string | null>(null)
const explicitWorkflowIdRef = useRef<string | null>(explicitWorkflowId)
const joinControllerRef = useRef(new SocketJoinController())
const joinRetryTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null)
const params = useParams()
const urlWorkflowId = params?.workflowId as string | undefined
const urlWorkflowIdRef = useRef(urlWorkflowId)
urlWorkflowIdRef.current = urlWorkflowId
explicitWorkflowIdRef.current = explicitWorkflowId
const eventHandlers = useRef<{
workflowOperation?: (data: any) => void
@@ -164,9 +181,124 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
}>({})
const positionUpdateTimeouts = useRef<Map<string, number>>(new Map())
const isRejoiningRef = useRef<boolean>(false)
const pendingPositionUpdates = useRef<Map<string, any>>(new Map())
const setVisibleWorkflowId = useCallback((workflowId: string | null) => {
currentWorkflowIdRef.current = workflowId
setCurrentWorkflowId(workflowId)
}, [])
const getRequestedWorkflowId = useCallback(() => {
return resolveSocketWorkflowTarget({
routeWorkflowId: urlWorkflowIdRef.current ?? null,
explicitWorkflowId: explicitWorkflowIdRef.current,
})
}, [])
const isWorkflowVisible = useCallback((workflowId?: string | null) => {
return isSocketWorkflowVisible({
workflowId: workflowId ?? currentWorkflowIdRef.current,
routeWorkflowId: urlWorkflowIdRef.current ?? null,
explicitWorkflowId: explicitWorkflowIdRef.current,
})
}, [])
const clearJoinRetryTimeout = useCallback(() => {
if (joinRetryTimeoutRef.current !== null) {
clearTimeout(joinRetryTimeoutRef.current)
joinRetryTimeoutRef.current = null
}
}, [])
const resetVisibleWorkflowState = useCallback((workflowId?: string | null) => {
if (workflowId) {
useOperationQueueStore.getState().cancelOperationsForWorkflow(workflowId)
}
positionUpdateTimeouts.current.forEach((timeoutId) => {
clearTimeout(timeoutId)
})
positionUpdateTimeouts.current.clear()
pendingPositionUpdates.current.clear()
}, [])
const clearJoinedWorkflowState = useCallback(
(cancelOperations = false) => {
const previousWorkflowId = currentWorkflowIdRef.current
resetVisibleWorkflowState(cancelOperations ? previousWorkflowId : null)
setPresenceUsers([])
setVisibleWorkflowId(null)
},
[resetVisibleWorkflowState, setVisibleWorkflowId]
)
const executeJoinCommands = useCallback(
(commands: SocketJoinCommand[]) => {
const socketInstance = socketRef.current
commands.forEach((command) => {
if (command.type === 'cancel-retry') {
clearJoinRetryTimeout()
setIsRetryingWorkflowJoin(false)
return
}
if (command.type === 'leave') {
setIsRetryingWorkflowJoin(false)
clearJoinedWorkflowState(true)
if (!socketInstance) {
return
}
logger.info('Leaving current workflow room')
socketInstance.emit('leave-workflow')
return
}
if (command.type === 'join') {
const isWorkflowSwitch =
currentWorkflowIdRef.current !== null &&
currentWorkflowIdRef.current !== command.workflowId
if (isWorkflowSwitch) {
resetVisibleWorkflowState(currentWorkflowIdRef.current)
} else {
resetVisibleWorkflowState()
}
if (!socketInstance) {
logger.warn('Cannot join workflow room: socket not available', {
workflowId: command.workflowId,
})
return
}
logger.info(`Joining workflow room: ${command.workflowId}`)
socketInstance.emit('join-workflow', {
workflowId: command.workflowId,
tabSessionId: getTabSessionId(),
})
return
}
clearJoinRetryTimeout()
setIsRetryingWorkflowJoin(true)
joinRetryTimeoutRef.current = setTimeout(() => {
joinRetryTimeoutRef.current = null
executeJoinCommands(joinControllerRef.current.retryJoin(command.workflowId))
}, command.delayMs)
logger.warn('Realtime unavailable while joining workflow, scheduling retry', {
workflowId: command.workflowId,
attempt: command.attempt,
delayMs: command.delayMs,
})
})
},
[clearJoinRetryTimeout, clearJoinedWorkflowState, resetVisibleWorkflowState]
)
const generateSocketToken = async (): Promise<string> => {
const res = await fetch('/api/auth/socket-token', {
method: 'POST',
@@ -244,17 +376,17 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
connected: socketInstance.connected,
transport: socketInstance.io.engine?.transport?.name,
})
// Note: join-workflow is handled by the useEffect watching isConnected
executeJoinCommands(joinControllerRef.current.setConnected(true))
})
socketInstance.on('disconnect', (reason) => {
setIsConnected(false)
setIsConnecting(false)
setIsRetryingWorkflowJoin(false)
setCurrentSocketId(null)
setCurrentWorkflowId(null)
setPresenceUsers([])
executeJoinCommands(joinControllerRef.current.setConnected(false))
clearJoinedWorkflowState(false)
// socket.active indicates if auto-reconnect will happen
if (socketInstance.active) {
setIsReconnecting(true)
logger.info('Socket disconnected, will auto-reconnect', { reason })
@@ -317,6 +449,10 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
})
socketInstance.on('presence-update', (users: PresenceUser[]) => {
if (!isWorkflowVisible()) {
return
}
setPresenceUsers((prev) => {
const prevMap = new Map(prev.map((u) => [u.socketId, u]))
@@ -334,27 +470,52 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
})
})
// Handle join workflow success - confirms room membership with presence list
socketInstance.on('join-workflow-success', ({ workflowId, presenceUsers }) => {
isRejoiningRef.current = false
// Ignore stale success responses from previous navigation
if (urlWorkflowIdRef.current && workflowId !== urlWorkflowIdRef.current) {
const result = joinControllerRef.current.handleJoinSuccess(workflowId)
if (result.ignored) {
logger.debug(`Ignoring stale join-workflow-success for ${workflowId}`)
return
} else {
setIsRetryingWorkflowJoin(false)
setVisibleWorkflowId(workflowId)
setPresenceUsers(presenceUsers || [])
logger.info(`Successfully joined workflow room: ${workflowId}`, {
presenceCount: presenceUsers?.length || 0,
})
}
setCurrentWorkflowId(workflowId)
setPresenceUsers(presenceUsers || [])
logger.info(`Successfully joined workflow room: ${workflowId}`, {
presenceCount: presenceUsers?.length || 0,
})
executeJoinCommands(result.commands)
})
socketInstance.on('join-workflow-error', ({ error, code }) => {
isRejoiningRef.current = false
logger.error('Failed to join workflow:', { error, code })
if (code === 'ROOM_MANAGER_UNAVAILABLE') {
triggerOfflineMode()
socketInstance.on('join-workflow-error', ({ workflowId, error, code, retryable }) => {
const result = joinControllerRef.current.handleJoinError({ workflowId, retryable })
if (result.ignored) {
logger.debug('Ignoring stale join-workflow-error', {
workflowId: result.workflowId,
error,
code,
})
} else if (result.retryScheduled) {
logger.warn('Retryable workflow join failure, waiting to retry', {
workflowId: result.workflowId,
error,
code,
})
} else if (result.apply) {
setIsRetryingWorkflowJoin(false)
if (result.workflowId) {
useOperationQueueStore.getState().cancelOperationsForWorkflow(result.workflowId)
}
logger.error('Failed to join workflow:', {
workflowId: result.workflowId,
error,
code,
})
}
executeJoinCommands(result.commands)
})
socketInstance.on('workflow-operation', (data) => {
@@ -371,13 +532,11 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
socketInstance.on('workflow-deleted', (data) => {
logger.warn(`Workflow ${data.workflowId} has been deleted`)
setCurrentWorkflowId((current) => {
if (current === data.workflowId) {
setPresenceUsers([])
return null
}
return current
})
const result = joinControllerRef.current.handleWorkflowDeleted(data.workflowId)
if (result.shouldClearCurrent) {
clearJoinedWorkflowState(true)
}
executeJoinCommands(result.commands)
eventHandlers.current.workflowDeleted?.(data)
})
@@ -457,6 +616,10 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
})
socketInstance.on('cursor-update', (data) => {
if (!isWorkflowVisible()) {
return
}
setPresenceUsers((prev) => {
const existingIndex = prev.findIndex((user) => user.socketId === data.socketId)
if (existingIndex === -1) {
@@ -471,6 +634,10 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
})
socketInstance.on('selection-update', (data) => {
if (!isWorkflowVisible()) {
return
}
setPresenceUsers((prev) => {
const existingIndex = prev.findIndex((user) => user.socketId === data.socketId)
if (existingIndex === -1) {
@@ -498,15 +665,11 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
logger.warn('Operation forbidden:', error)
if (error?.type === 'SESSION_ERROR') {
const workflowId = urlWorkflowIdRef.current
const workflowId = getRequestedWorkflowId()
if (workflowId && !isRejoiningRef.current) {
isRejoiningRef.current = true
if (workflowId) {
logger.info(`Session expired, rejoining workflow: ${workflowId}`)
socketInstance.emit('join-workflow', {
workflowId,
tabSessionId: getTabSessionId(),
})
executeJoinCommands(joinControllerRef.current.forceRejoinWorkflow(workflowId))
}
}
})
@@ -514,6 +677,19 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
socketInstance.on('workflow-state', async (workflowData) => {
logger.info('Received workflow state from server')
if (
!workflowData?.id ||
currentWorkflowIdRef.current !== workflowData.id ||
!isWorkflowVisible()
) {
logger.info('Ignoring workflow state for inactive room', {
workflowId: workflowData?.id,
currentWorkflowId: currentWorkflowIdRef.current,
desiredWorkflowId: urlWorkflowIdRef.current,
})
return
}
if (workflowData?.state) {
try {
await rehydrateWorkflowStores(workflowData.id, workflowData.state)
@@ -534,6 +710,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
initializeSocket()
return () => {
clearJoinRetryTimeout()
positionUpdateTimeouts.current.forEach((timeoutId) => {
clearTimeout(timeoutId)
})
@@ -552,77 +729,34 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
const hydrationPhase = useWorkflowRegistryStore((s) => s.hydration.phase)
useEffect(() => {
if (!socket || !isConnected || !urlWorkflowId) return
if (hydrationPhase === 'creating') return
// Skip if already in the correct room
if (currentWorkflowId === urlWorkflowId) return
logger.info(
`URL workflow changed from ${currentWorkflowId} to ${urlWorkflowId}, switching rooms`
)
if (currentWorkflowId) {
logger.info(`Leaving current workflow ${currentWorkflowId} before joining ${urlWorkflowId}`)
socket.emit('leave-workflow')
if (hydrationPhase === 'creating') {
return
}
logger.info(`Joining workflow room: ${urlWorkflowId}`)
socket.emit('join-workflow', {
workflowId: urlWorkflowId,
tabSessionId: getTabSessionId(),
})
}, [socket, isConnected, urlWorkflowId, currentWorkflowId, hydrationPhase])
executeJoinCommands(joinControllerRef.current.requestWorkflow(getRequestedWorkflowId()))
}, [
explicitWorkflowId,
getRequestedWorkflowId,
hydrationPhase,
urlWorkflowId,
executeJoinCommands,
])
const joinWorkflow = useCallback(
(workflowId: string) => {
if (!socket || !user?.id) {
logger.warn('Cannot join workflow: socket or user not available')
if (!user?.id) {
logger.warn('Cannot join workflow: user not available')
return
}
if (currentWorkflowId === workflowId) {
logger.info(`Already in workflow ${workflowId}, skipping join`)
return
}
if (currentWorkflowId) {
logger.info(`Leaving current workflow ${currentWorkflowId} before joining ${workflowId}`)
socket.emit('leave-workflow')
}
logger.info(`Joining workflow: ${workflowId}`)
socket.emit('join-workflow', {
workflowId,
tabSessionId: getTabSessionId(),
})
// currentWorkflowId will be set by join-workflow-success handler
setExplicitWorkflowId(workflowId)
},
[socket, user, currentWorkflowId]
[user]
)
const leaveWorkflow = useCallback(() => {
if (socket && currentWorkflowId) {
logger.info(`Leaving workflow: ${currentWorkflowId}`)
import('@/stores/operation-queue/store')
.then(({ useOperationQueueStore }) => {
useOperationQueueStore.getState().cancelOperationsForWorkflow(currentWorkflowId)
})
.catch((error) => {
logger.warn('Failed to cancel operations for workflow:', error)
})
socket.emit('leave-workflow')
setCurrentWorkflowId(null)
setPresenceUsers([])
positionUpdateTimeouts.current.forEach((timeoutId) => {
clearTimeout(timeoutId)
})
positionUpdateTimeouts.current.clear()
pendingPositionUpdates.current.clear()
}
}, [socket, currentWorkflowId])
setExplicitWorkflowId(null)
}, [])
/**
* Retry socket connection after auth failure.
@@ -640,8 +774,20 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
}, [authFailed])
const emitWorkflowOperation = useCallback(
(operation: string, target: string, payload: any, operationId?: string) => {
if (!socket || !currentWorkflowId) {
(workflowId: string, operation: string, target: string, payload: any, operationId?: string) => {
if (
!socket ||
!currentWorkflowId ||
workflowId !== currentWorkflowId ||
!isWorkflowVisible(workflowId)
) {
logger.debug('Skipping workflow operation emit for inactive room', {
workflowId,
currentWorkflowId,
desiredWorkflowId: urlWorkflowIdRef.current,
operation,
target,
})
return
}
@@ -653,7 +799,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
if (commit) {
socket.emit('workflow-operation', {
workflowId: currentWorkflowId,
workflowId,
operation,
target,
payload,
@@ -670,7 +816,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
}
pendingPositionUpdates.current.set(blockId, {
workflowId: currentWorkflowId,
workflowId,
operation,
target,
payload,
@@ -692,7 +838,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
}
} else {
socket.emit('workflow-operation', {
workflowId: currentWorkflowId,
workflowId,
operation,
target,
payload,
@@ -701,7 +847,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
})
}
},
[socket, currentWorkflowId]
[socket, currentWorkflowId, isWorkflowVisible]
)
const emitSubblockUpdate = useCallback(
@@ -712,8 +858,24 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
operationId: string | undefined,
workflowId: string
) => {
if (!socket) {
logger.warn('Cannot emit subblock update: no socket connection', { workflowId, blockId })
if (
!socket ||
workflowId !== currentWorkflowIdRef.current ||
!isWorkflowVisible(workflowId)
) {
const reason = !socket
? 'socket_unavailable'
: workflowId !== currentWorkflowIdRef.current
? 'joined_workflow_mismatch'
: 'workflow_not_visible'
logger.debug('Skipping subblock update emit', {
workflowId,
blockId,
subblockId,
reason,
currentWorkflowId: currentWorkflowIdRef.current,
})
return
}
socket.emit('subblock-update', {
@@ -736,8 +898,24 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
operationId: string | undefined,
workflowId: string
) => {
if (!socket) {
logger.warn('Cannot emit variable update: no socket connection', { workflowId, variableId })
if (
!socket ||
workflowId !== currentWorkflowIdRef.current ||
!isWorkflowVisible(workflowId)
) {
const reason = !socket
? 'socket_unavailable'
: workflowId !== currentWorkflowIdRef.current
? 'joined_workflow_mismatch'
: 'workflow_not_visible'
logger.debug('Skipping variable update emit', {
workflowId,
variableId,
field,
reason,
currentWorkflowId: currentWorkflowIdRef.current,
})
return
}
socket.emit('variable-update', {
@@ -755,7 +933,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
const lastCursorEmit = useRef(0)
const emitCursorUpdate = useCallback(
(cursor: { x: number; y: number } | null) => {
if (!socket || !currentWorkflowId) {
if (!socket || !currentWorkflowId || !isWorkflowVisible(currentWorkflowId)) {
return
}
@@ -772,16 +950,16 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
lastCursorEmit.current = now
}
},
[socket, currentWorkflowId]
[socket, currentWorkflowId, isWorkflowVisible]
)
const emitSelectionUpdate = useCallback(
(selection: { type: 'block' | 'edge' | 'none'; id?: string }) => {
if (socket && currentWorkflowId) {
if (socket && currentWorkflowId && isWorkflowVisible(currentWorkflowId)) {
socket.emit('selection-update', { selection })
}
},
[socket, currentWorkflowId]
[socket, currentWorkflowId, isWorkflowVisible]
)
const onWorkflowOperation = useCallback((handler: (data: any) => void) => {
@@ -830,6 +1008,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
isConnected,
isConnecting,
isReconnecting,
isRetryingWorkflowJoin,
authFailed,
currentWorkflowId,
currentSocketId,
@@ -858,6 +1037,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
isConnected,
isConnecting,
isReconnecting,
isRetryingWorkflowJoin,
authFailed,
currentWorkflowId,
currentSocketId,

View File

@@ -423,7 +423,7 @@ describe.concurrent('Blocks Module', () => {
})
it('should have valid mode values for subBlocks', () => {
const validModes = ['basic', 'advanced', 'both', 'trigger', undefined]
const validModes = ['basic', 'advanced', 'both', 'trigger', 'trigger-advanced', undefined]
const blocks = getAllBlocks()
for (const block of blocks) {
for (const subBlock of block.subBlocks) {
@@ -669,7 +669,9 @@ describe.concurrent('Blocks Module', () => {
for (const block of blocks) {
// Exclude trigger-mode subBlocks — they operate in a separate rendering context
// and their IDs don't participate in canonical param resolution
const nonTriggerSubBlocks = block.subBlocks.filter((sb) => sb.mode !== 'trigger')
const nonTriggerSubBlocks = block.subBlocks.filter(
(sb) => sb.mode !== 'trigger' && sb.mode !== 'trigger-advanced'
)
const allSubBlockIds = new Set(nonTriggerSubBlocks.map((sb) => sb.id))
const canonicalParamIds = new Set(
nonTriggerSubBlocks.filter((sb) => sb.canonicalParamId).map((sb) => sb.canonicalParamId)
@@ -795,6 +797,8 @@ describe.concurrent('Blocks Module', () => {
>()
for (const subBlock of block.subBlocks) {
// Skip trigger-mode subBlocks — they operate in a separate rendering context
if (subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced') continue
if (subBlock.canonicalParamId) {
if (!canonicalGroups.has(subBlock.canonicalParamId)) {
canonicalGroups.set(subBlock.canonicalParamId, [])
@@ -861,7 +865,7 @@ describe.concurrent('Blocks Module', () => {
continue
}
// Skip trigger-mode subBlocks — they operate in a separate rendering context
if (subBlock.mode === 'trigger') {
if (subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced') {
continue
}
const conditionKey = serializeCondition(subBlock.condition)
@@ -895,8 +899,11 @@ describe.concurrent('Blocks Module', () => {
if (!block.inputs) continue
// Find all canonical groups (subBlocks with canonicalParamId)
// Skip trigger-mode subBlocks — they operate in a separate rendering context
// and are not wired to the block's inputs section
const canonicalGroups = new Map<string, string[]>()
for (const subBlock of block.subBlocks) {
if (subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced') continue
if (subBlock.canonicalParamId) {
if (!canonicalGroups.has(subBlock.canonicalParamId)) {
canonicalGroups.set(subBlock.canonicalParamId, [])
@@ -948,8 +955,10 @@ describe.concurrent('Blocks Module', () => {
.replace(/\/\*[\s\S]*?\*\//g, '') // Remove multi-line comments
// Find all canonical groups (subBlocks with canonicalParamId)
// Skip trigger-mode subBlocks — they are not passed through params function
const canonicalGroups = new Map<string, string[]>()
for (const subBlock of block.subBlocks) {
if (subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced') continue
if (subBlock.canonicalParamId) {
if (!canonicalGroups.has(subBlock.canonicalParamId)) {
canonicalGroups.set(subBlock.canonicalParamId, [])
@@ -995,8 +1004,11 @@ describe.concurrent('Blocks Module', () => {
for (const block of blocks) {
// Find all canonical groups (subBlocks with canonicalParamId)
// Skip trigger-mode subBlocks — they operate in a separate rendering context
// and may have different required semantics from their block counterparts
const canonicalGroups = new Map<string, typeof block.subBlocks>()
for (const subBlock of block.subBlocks) {
if (subBlock.mode === 'trigger' || subBlock.mode === 'trigger-advanced') continue
if (subBlock.canonicalParamId) {
if (!canonicalGroups.has(subBlock.canonicalParamId)) {
canonicalGroups.set(subBlock.canonicalParamId, [])

View File

@@ -11,7 +11,7 @@ export const ApifyBlock: BlockConfig<RunActorResult> = {
'Integrate Apify into your workflow. Run any Apify actor with custom input and retrieve results. Supports both synchronous and asynchronous execution with automatic dataset fetching.',
docsLink: 'https://docs.sim.ai/tools/apify',
category: 'tools',
integrationType: IntegrationType.Automation,
integrationType: IntegrationType.Search,
tags: ['web-scraping', 'automation', 'data-analytics'],
bgColor: '#E0E0E0',
icon: ApifyIcon,

View File

@@ -12,7 +12,7 @@ export const ApolloBlock: BlockConfig<ApolloResponse> = {
'Integrates Apollo.io into the workflow. Search for people and companies, enrich contact data, manage your CRM contacts and accounts, add contacts to sequences, and create tasks.',
docsLink: 'https://docs.sim.ai/tools/apollo',
category: 'tools',
integrationType: IntegrationType.SalesIntelligence,
integrationType: IntegrationType.Sales,
tags: ['enrichment', 'sales-engagement'],
bgColor: '#EBF212',
icon: ApolloIcon,

View File

@@ -11,7 +11,7 @@ export const BrandfetchBlock: BlockConfig<BrandfetchGetBrandResponse | Brandfetc
'Integrate Brandfetch into your workflow. Retrieve brand logos, colors, fonts, and company data by domain, ticker, or name search.',
docsLink: 'https://docs.sim.ai/tools/brandfetch',
category: 'tools',
integrationType: IntegrationType.SalesIntelligence,
integrationType: IntegrationType.Sales,
tags: ['enrichment', 'marketing'],
bgColor: '#000000',
icon: BrandfetchIcon,

View File

@@ -11,7 +11,7 @@ export const BrowserUseBlock: BlockConfig<BrowserUseResponse> = {
'Integrate Browser Use into the workflow. Can navigate the web and perform actions as if a real user was interacting with the browser.',
docsLink: 'https://docs.sim.ai/tools/browser_use',
category: 'tools',
integrationType: IntegrationType.Automation,
integrationType: IntegrationType.AI,
tags: ['web-scraping', 'automation', 'agentic'],
bgColor: '#181C1E',
icon: BrowserUseIcon,

View File

@@ -10,7 +10,7 @@ export const ClayBlock: BlockConfig<ClayPopulateResponse> = {
longDescription: 'Integrate Clay into the workflow. Can populate a table with data.',
docsLink: 'https://docs.sim.ai/tools/clay',
category: 'tools',
integrationType: IntegrationType.SalesIntelligence,
integrationType: IntegrationType.Sales,
tags: ['enrichment', 'sales-engagement', 'data-analytics'],
bgColor: '#E0E0E0',
icon: ClayIcon,

View File

@@ -11,7 +11,7 @@ export const DagsterBlock: BlockConfig<DagsterResponse> = {
'Connect to a Dagster instance to launch job runs, monitor run status, list available jobs across repositories, terminate or delete runs, reexecute failed runs, fetch run logs, and manage schedules and sensors. API token only required for Dagster+.',
docsLink: 'https://docs.sim.ai/tools/dagster',
category: 'tools',
integrationType: IntegrationType.Automation,
integrationType: IntegrationType.Analytics,
tags: ['data-analytics', 'automation'],
bgColor: '#ffffff',
icon: DagsterIcon,

View File

@@ -10,7 +10,7 @@ export const ElevenLabsBlock: BlockConfig<ElevenLabsBlockResponse> = {
longDescription: 'Integrate ElevenLabs into the workflow. Can convert text to speech.',
docsLink: 'https://docs.sim.ai/tools/elevenlabs',
category: 'tools',
integrationType: IntegrationType.Media,
integrationType: IntegrationType.AI,
tags: ['text-to-speech'],
bgColor: '#181C1E',
icon: ElevenLabsIcon,

View File

@@ -11,7 +11,7 @@ export const EnrichBlock: BlockConfig = {
'Access real-time B2B data intelligence with Enrich.so. Enrich profiles from email addresses, find work emails from LinkedIn, verify email deliverability, search for people and companies, and analyze LinkedIn post engagement.',
docsLink: 'https://docs.enrich.so/',
category: 'tools',
integrationType: IntegrationType.SalesIntelligence,
integrationType: IntegrationType.Sales,
tags: ['enrichment', 'data-analytics'],
bgColor: '#E5E5E6',
icon: EnrichSoIcon,

View File

@@ -12,7 +12,7 @@ export const FirecrawlBlock: BlockConfig<FirecrawlResponse> = {
'Integrate Firecrawl into the workflow. Scrape pages, search the web, crawl entire sites, map URL structures, and extract structured data with AI.',
docsLink: 'https://docs.sim.ai/tools/firecrawl',
category: 'tools',
integrationType: IntegrationType.Automation,
integrationType: IntegrationType.Search,
tags: ['web-scraping', 'automation'],
bgColor: '#181C1E',
icon: FirecrawlIcon,

View File

@@ -17,7 +17,7 @@ export const FirefliesBlock: BlockConfig<FirefliesResponse> = {
'Integrate Fireflies.ai into the workflow. Manage meeting transcripts, add bot to live meetings, create soundbites, and more. Can also trigger workflows when transcriptions complete.',
docsLink: 'https://docs.sim.ai/tools/fireflies',
category: 'tools',
integrationType: IntegrationType.Media,
integrationType: IntegrationType.Productivity,
tags: ['meeting', 'speech-to-text', 'note-taking'],
icon: FirefliesIcon,
bgColor: '#100730',
@@ -615,7 +615,7 @@ export const FirefliesV2Block: BlockConfig<FirefliesResponse> = {
name: 'Fireflies',
description: 'Interact with Fireflies.ai meeting transcripts and recordings',
hideFromToolbar: false,
integrationType: IntegrationType.Media,
integrationType: IntegrationType.Productivity,
tags: ['meeting', 'speech-to-text', 'note-taking'],
subBlocks: firefliesV2SubBlocks,
tools: {

View File

@@ -12,7 +12,7 @@ export const GongBlock: BlockConfig<GongResponse> = {
'Integrate Gong into your workflow. Access call recordings, transcripts, user data, activity stats, scorecards, trackers, library content, coaching metrics, and more via the Gong API.',
docsLink: 'https://docs.sim.ai/tools/gong',
category: 'tools',
integrationType: IntegrationType.SalesIntelligence,
integrationType: IntegrationType.Sales,
tags: ['meeting', 'sales-engagement', 'speech-to-text'],
bgColor: '#8039DF',
icon: GongIcon,

View File

@@ -4,6 +4,7 @@ import type { BlockConfig } from '@/blocks/types'
import { AuthMode, IntegrationType } from '@/blocks/types'
import { createVersionedToolSelector, SERVICE_ACCOUNT_SUBBLOCKS } from '@/blocks/utils'
import type { GoogleCalendarResponse } from '@/tools/google_calendar/types'
import { getTrigger } from '@/triggers'
export const GoogleCalendarBlock: BlockConfig<GoogleCalendarResponse> = {
type: 'google_calendar',
@@ -488,6 +489,7 @@ Return ONLY the natural language event text - no explanations.`,
{ label: 'None (no emails sent)', id: 'none' },
],
},
...getTrigger('google_calendar_poller').subBlocks,
],
tools: {
access: [
@@ -644,6 +646,10 @@ Return ONLY the natural language event text - no explanations.`,
content: { type: 'string', description: 'Operation response content' },
metadata: { type: 'json', description: 'Event or calendar metadata' },
},
triggers: {
enabled: true,
available: ['google_calendar_poller'],
},
}
export const GoogleCalendarV2Block: BlockConfig<GoogleCalendarResponse> = {

View File

@@ -4,6 +4,7 @@ import type { BlockConfig } from '@/blocks/types'
import { AuthMode, IntegrationType } from '@/blocks/types'
import { normalizeFileInput, SERVICE_ACCOUNT_SUBBLOCKS } from '@/blocks/utils'
import type { GoogleDriveResponse } from '@/tools/google_drive/types'
import { getTrigger } from '@/triggers'
export const GoogleDriveBlock: BlockConfig<GoogleDriveResponse> = {
type: 'google_drive',
@@ -719,6 +720,7 @@ Return ONLY the message text - no subject line, no greetings/signatures, no extr
required: true,
},
// Get Drive Info has no additional fields (just needs credential)
...getTrigger('google_drive_poller').subBlocks,
],
tools: {
access: [
@@ -939,4 +941,8 @@ Return ONLY the message text - no subject line, no greetings/signatures, no extr
deleted: { type: 'boolean', description: 'Whether file was deleted' },
removed: { type: 'boolean', description: 'Whether permission was removed' },
},
triggers: {
enabled: true,
available: ['google_drive_poller'],
},
}

View File

@@ -4,6 +4,7 @@ import type { BlockConfig } from '@/blocks/types'
import { AuthMode, IntegrationType } from '@/blocks/types'
import { createVersionedToolSelector, SERVICE_ACCOUNT_SUBBLOCKS } from '@/blocks/utils'
import type { GoogleSheetsResponse, GoogleSheetsV2Response } from '@/tools/google_sheets/types'
import { getTrigger } from '@/triggers'
// Legacy block - hidden from toolbar
export const GoogleSheetsBlock: BlockConfig<GoogleSheetsResponse> = {
@@ -716,6 +717,7 @@ Return ONLY the JSON array - no explanations, no markdown, no extra text.`,
condition: { field: 'operation', value: 'copy_sheet' },
required: true,
},
...getTrigger('google_sheets_poller').subBlocks,
],
tools: {
access: [
@@ -1068,4 +1070,8 @@ Return ONLY the JSON array - no explanations, no markdown, no extra text.`,
},
},
},
triggers: {
enabled: true,
available: ['google_sheets_poller'],
},
}

View File

@@ -13,7 +13,7 @@ export const GrainBlock: BlockConfig = {
longDescription:
'Integrate Grain into your workflow. Access meeting recordings, transcripts, highlights, and AI-generated summaries. Can also trigger workflows based on Grain webhook events.',
category: 'tools',
integrationType: IntegrationType.Media,
integrationType: IntegrationType.Productivity,
tags: ['meeting', 'note-taking'],
docsLink: 'https://docs.sim.ai/tools/grain',
icon: GrainIcon,

View File

@@ -11,7 +11,7 @@ export const HunterBlock: BlockConfig<HunterResponse> = {
'Integrate Hunter into the workflow. Can search domains, find email addresses, verify email addresses, discover companies, find companies, and count email addresses.',
docsLink: 'https://docs.sim.ai/tools/hunter',
category: 'tools',
integrationType: IntegrationType.SalesIntelligence,
integrationType: IntegrationType.Sales,
tags: ['enrichment', 'sales-engagement'],
bgColor: '#E0E0E0',
icon: HunterIOIcon,

View File

@@ -13,7 +13,7 @@ export const LinkedInBlock: BlockConfig<LinkedInResponse> = {
'Integrate LinkedIn into workflows. Share posts to your personal feed and access your LinkedIn profile information.',
docsLink: 'https://docs.sim.ai/tools/linkedin',
category: 'tools',
integrationType: IntegrationType.Social,
integrationType: IntegrationType.Sales,
tags: ['marketing', 'sales-engagement', 'enrichment'],
bgColor: '#0072B1',
icon: LinkedInIcon,

View File

@@ -13,7 +13,7 @@ export const RedditBlock: BlockConfig<RedditResponse> = {
'Integrate Reddit into workflows. Read posts, comments, and search content. Submit posts, vote, reply, edit, manage messages, and access user and subreddit info.',
docsLink: 'https://docs.sim.ai/tools/reddit',
category: 'tools',
integrationType: IntegrationType.Social,
integrationType: IntegrationType.Communication,
tags: ['content-management', 'web-scraping'],
bgColor: '#FF5700',
icon: RedditIcon,

View File

@@ -9,7 +9,7 @@ export const SixtyfourBlock: BlockConfig = {
'Find emails, phone numbers, and enrich lead or company data with contact information, social profiles, and detailed research using Sixtyfour AI.',
docsLink: 'https://docs.sim.ai/tools/sixtyfour',
category: 'tools',
integrationType: IntegrationType.SalesIntelligence,
integrationType: IntegrationType.Sales,
tags: ['enrichment', 'sales-engagement'],
bgColor: '#000000',
icon: SixtyfourIcon,

View File

@@ -13,7 +13,7 @@ export const SpotifyBlock: BlockConfig<ToolResponse> = {
'Integrate Spotify into your workflow. Search for tracks, albums, artists, and playlists. Manage playlists, access your library, control playback, browse podcasts and audiobooks.',
docsLink: 'https://docs.sim.ai/tools/spotify',
category: 'tools',
integrationType: IntegrationType.Media,
integrationType: IntegrationType.Communication,
tags: ['content-management', 'automation'],
hideFromToolbar: true,
bgColor: '#000000',

View File

@@ -35,7 +35,7 @@ export const StagehandBlock: BlockConfig<StagehandResponse> = {
'Integrate Stagehand into the workflow. Can extract structured data from webpages or run an autonomous agent to perform tasks.',
docsLink: 'https://docs.sim.ai/tools/stagehand',
category: 'tools',
integrationType: IntegrationType.Automation,
integrationType: IntegrationType.AI,
tags: ['web-scraping', 'automation', 'agentic'],
bgColor: '#FFC83C',
icon: StagehandIcon,

View File

@@ -12,7 +12,7 @@ export const XBlock: BlockConfig = {
'Integrate X into the workflow. Search tweets, manage bookmarks, follow/block/mute users, like and retweet, view trends, and more.',
docsLink: 'https://docs.sim.ai/tools/x',
category: 'tools',
integrationType: IntegrationType.Social,
integrationType: IntegrationType.Communication,
tags: ['marketing', 'messaging'],
bgColor: '#000000',
icon: xIcon,

View File

@@ -12,7 +12,7 @@ export const YouTubeBlock: BlockConfig<YouTubeResponse> = {
'Integrate YouTube into the workflow. Can search for videos, get trending videos, get video details, get video categories, get channel information, get all videos from a channel, get channel playlists, get playlist items, and get video comments.',
docsLink: 'https://docs.sim.ai/tools/youtube',
category: 'tools',
integrationType: IntegrationType.Media,
integrationType: IntegrationType.Communication,
tags: ['google-workspace', 'marketing', 'content-management'],
bgColor: '#FF0000',
icon: YouTubeIcon,

View File

@@ -19,7 +19,6 @@ export type BlockCategory = 'blocks' | 'tools' | 'triggers'
export enum IntegrationType {
AI = 'ai',
Analytics = 'analytics',
Automation = 'automation',
Communication = 'communication',
CRM = 'crm',
CustomerSupport = 'customer-support',
@@ -31,13 +30,11 @@ export enum IntegrationType {
Email = 'email',
FileStorage = 'file-storage',
HR = 'hr',
Media = 'media',
Other = 'other',
Productivity = 'productivity',
SalesIntelligence = 'sales-intelligence',
Sales = 'sales',
Search = 'search',
Security = 'security',
Social = 'social',
}
export type IntegrationTag =
@@ -275,7 +272,7 @@ export interface SubBlockConfig {
id: string
title?: string
type: SubBlockType
mode?: 'basic' | 'advanced' | 'both' | 'trigger' // Default is 'both' if not specified. 'trigger' means only shown in trigger mode
mode?: 'basic' | 'advanced' | 'both' | 'trigger' | 'trigger-advanced' // Default is 'both' if not specified. 'trigger' means only shown in trigger mode. 'trigger-advanced' is for advanced canonical pair members shown in trigger mode
canonicalParamId?: string
/** Controls parameter visibility in agent/tool-input context */
paramVisibility?: 'user-or-llm' | 'user-only' | 'llm-only' | 'hidden'

View File

@@ -1,6 +1,7 @@
import { createLogger } from '@sim/logger'
import { keepPreviousData, useMutation, useQuery, useQueryClient } from '@tanstack/react-query'
import { toast } from '@/components/emcn'
import type { ChunkingStrategy, StrategyOptions } from '@/lib/chunkers/types'
import type {
ChunkData,
ChunksPagination,
@@ -338,10 +339,7 @@ export interface DocumentChunkSearchParams {
search: string
}
/**
* Fetches all chunks matching a search query by paginating through results.
* This is used for search functionality where we need all matching chunks.
*/
/** Paginates through all matching chunks rather than returning a single page. */
export async function fetchAllDocumentChunks(
{ knowledgeBaseId, documentId, search }: DocumentChunkSearchParams,
signal?: AbortSignal
@@ -376,10 +374,6 @@ export const serializeSearchParams = (params: DocumentChunkSearchParams) =>
search: params.search,
})
/**
* Hook to search for chunks in a document.
* Fetches all matching chunks and returns them for client-side pagination.
*/
export function useDocumentChunkSearchQuery(
params: DocumentChunkSearchParams,
options?: {
@@ -707,6 +701,8 @@ export interface CreateKnowledgeBaseParams {
maxSize: number
minSize: number
overlap: number
strategy?: ChunkingStrategy
strategyOptions?: StrategyOptions
}
}

View File

@@ -152,13 +152,23 @@ export function useCollaborativeWorkflow() {
// Register emit functions with operation queue store
useEffect(() => {
const registeredWorkflowId =
isConnected && currentWorkflowId === activeWorkflowId ? currentWorkflowId : null
registerEmitFunctions(
emitWorkflowOperation,
emitSubblockUpdate,
emitVariableUpdate,
currentWorkflowId
registeredWorkflowId
)
}, [emitWorkflowOperation, emitSubblockUpdate, emitVariableUpdate, currentWorkflowId])
}, [
activeWorkflowId,
currentWorkflowId,
emitWorkflowOperation,
emitSubblockUpdate,
emitVariableUpdate,
isConnected,
])
useEffect(() => {
const handleWorkflowOperation = (data: any) => {

View File

@@ -55,14 +55,17 @@ export function useTriggerConfigAggregation(
let hasAnyValue = false
triggerDef.subBlocks
.filter((sb) => sb.mode === 'trigger' && !SYSTEM_SUBBLOCK_IDS.includes(sb.id))
.filter(
(sb) =>
(sb.mode === 'trigger' || sb.mode === 'trigger-advanced') &&
!SYSTEM_SUBBLOCK_IDS.includes(sb.id)
)
.forEach((subBlock) => {
const fieldValue = subBlockStore.getValue(blockId, subBlock.id)
let valueToUse = fieldValue
if (
(fieldValue === null || fieldValue === undefined || fieldValue === '') &&
subBlock.required &&
subBlock.defaultValue !== undefined
) {
valueToUse = subBlock.defaultValue
@@ -117,7 +120,11 @@ export function populateTriggerFieldsFromConfig(
const subBlockStore = useSubBlockStore.getState()
triggerDef.subBlocks
.filter((sb) => sb.mode === 'trigger' && !SYSTEM_SUBBLOCK_IDS.includes(sb.id))
.filter(
(sb) =>
(sb.mode === 'trigger' || sb.mode === 'trigger-advanced') &&
!SYSTEM_SUBBLOCK_IDS.includes(sb.id)
)
.forEach((subBlock) => {
let configValue: any

View File

@@ -3,12 +3,12 @@ import path from 'path'
import { createLogger } from '@sim/logger'
import { TextChunker } from '@/lib/chunkers/text-chunker'
import type { DocChunk, DocsChunkerOptions } from '@/lib/chunkers/types'
import { estimateTokens } from '@/lib/chunkers/utils'
import { generateEmbeddings } from '@/lib/knowledge/embeddings'
interface HeaderInfo {
level: number
text: string
slug?: string
anchor?: string
position?: number
}
@@ -21,25 +21,21 @@ interface Frontmatter {
const logger = createLogger('DocsChunker')
/**
* Docs-specific chunker that processes .mdx files and tracks header context
*/
export class DocsChunker {
private readonly textChunker: TextChunker
private readonly baseUrl: string
private readonly chunkSize: number
constructor(options: DocsChunkerOptions = {}) {
this.chunkSize = options.chunkSize ?? 300
this.textChunker = new TextChunker({
chunkSize: options.chunkSize ?? 300, // Max 300 tokens per chunk
chunkSize: this.chunkSize,
minCharactersPerChunk: options.minCharactersPerChunk ?? 1,
chunkOverlap: options.chunkOverlap ?? 50,
})
this.baseUrl = options.baseUrl ?? 'https://docs.sim.ai'
}
/**
* Process all .mdx files in the docs directory
*/
async chunkAllDocs(docsPath: string): Promise<DocChunk[]> {
const allChunks: DocChunk[] = []
@@ -65,20 +61,17 @@ export class DocsChunker {
}
}
/**
* Process a single .mdx file
*/
async chunkMdxFile(filePath: string, basePath: string): Promise<DocChunk[]> {
const content = await fs.readFile(filePath, 'utf-8')
const relativePath = path.relative(basePath, filePath)
const { data: frontmatter, content: markdownContent } = this.parseFrontmatter(content)
const headers = this.extractHeaders(markdownContent)
const documentUrl = this.generateDocumentUrl(relativePath)
const textChunks = await this.splitContent(markdownContent)
const { chunks: textChunks, cleanedContent } = await this.splitContent(markdownContent)
const headers = this.extractHeaders(cleanedContent)
logger.info(`Generating embeddings for ${textChunks.length} chunks in ${relativePath}`)
const embeddings: number[][] =
@@ -97,7 +90,7 @@ export class DocsChunker {
const chunk: DocChunk = {
text: chunkText,
tokenCount: Math.ceil(chunkText.length / 4), // Simple token estimation
tokenCount: estimateTokens(chunkText),
sourceDocument: relativePath,
headerLink: relevantHeader ? `${documentUrl}#${relevantHeader.anchor}` : documentUrl,
headerText: relevantHeader?.text || frontmatter.title || 'Document Root',
@@ -118,9 +111,6 @@ export class DocsChunker {
return chunks
}
/**
* Find all .mdx files recursively
*/
private async findMdxFiles(dirPath: string): Promise<string[]> {
const files: string[] = []
@@ -140,9 +130,6 @@ export class DocsChunker {
return files
}
/**
* Extract headers and their positions from markdown content
*/
private extractHeaders(content: string): HeaderInfo[] {
const headers: HeaderInfo[] = []
const headerRegex = /^(#{1,6})\s+(.+)$/gm
@@ -164,42 +151,28 @@ export class DocsChunker {
return headers
}
/**
* Generate URL-safe anchor from header text
*/
private generateAnchor(headerText: string): string {
return headerText
.toLowerCase()
.replace(/[^\w\s-]/g, '') // Remove special characters except hyphens
.replace(/\s+/g, '-') // Replace spaces with hyphens
.replace(/-+/g, '-') // Replace multiple hyphens with single
.replace(/^-|-$/g, '') // Remove leading/trailing hyphens
.replace(/[^\w\s-]/g, '')
.replace(/\s+/g, '-')
.replace(/-+/g, '-')
.replace(/^-|-$/g, '')
}
/**
* Generate document URL from relative path
* Handles index.mdx files specially - they are served at the parent directory path
*/
/** index.mdx files are served at the parent directory path */
private generateDocumentUrl(relativePath: string): string {
// Convert file path to URL path
// e.g., "tools/knowledge.mdx" -> "/tools/knowledge"
// e.g., "triggers/index.mdx" -> "/triggers" (NOT "/triggers/index")
let urlPath = relativePath.replace(/\.mdx$/, '').replace(/\\/g, '/') // Handle Windows paths
let urlPath = relativePath.replace(/\.mdx$/, '').replace(/\\/g, '/')
// In fumadocs, index.mdx files are served at the parent directory path
// e.g., "triggers/index" -> "triggers"
if (urlPath.endsWith('/index')) {
urlPath = urlPath.slice(0, -6) // Remove "/index"
urlPath = urlPath.slice(0, -6)
} else if (urlPath === 'index') {
urlPath = '' // Root index.mdx
urlPath = ''
}
return `${this.baseUrl}/${urlPath}`
}
/**
* Find the most relevant header for a given position
*/
private findRelevantHeader(headers: HeaderInfo[], position: number): HeaderInfo | null {
if (headers.length === 0) return null
@@ -216,10 +189,10 @@ export class DocsChunker {
return relevantHeader
}
/**
* Split content into chunks using the existing TextChunker with table awareness
*/
private async splitContent(content: string): Promise<string[]> {
/** Returns both chunks and cleaned content so header extraction uses aligned positions. */
private async splitContent(
content: string
): Promise<{ chunks: string[]; cleanedContent: string }> {
const cleanedContent = this.cleanContent(content)
const tableBoundaries = this.detectTableBoundaries(cleanedContent)
@@ -234,30 +207,23 @@ export class DocsChunker {
const finalChunks = this.enforceSizeLimit(processedChunks)
return finalChunks
return { chunks: finalChunks, cleanedContent }
}
/**
* Clean content by removing MDX-specific elements and excessive whitespace
*/
private cleanContent(content: string): string {
return (
content
// Remove import statements
.replace(/^import\s+.*$/gm, '')
// Remove JSX components and React-style comments
.replace(/<[^>]+>/g, ' ')
.replace(/\{\/\*[\s\S]*?\*\/\}/g, ' ')
// Remove excessive whitespace
.replace(/\n{3,}/g, '\n\n')
.replace(/[ \t]{2,}/g, ' ')
.trim()
)
return content
.replace(/\r\n/g, '\n')
.replace(/\r/g, '\n')
.replace(/^import\s+.*$/gm, '')
.replace(/^export\s+.*$/gm, '')
.replace(/<\/?[a-zA-Z][^>]*>/g, ' ')
.replace(/\{\/\*[\s\S]*?\*\/\}/g, ' ')
.replace(/\{[^{}]*\}/g, ' ')
.replace(/\n{3,}/g, '\n\n')
.replace(/[ \t]{2,}/g, ' ')
.trim()
}
/**
* Parse frontmatter from MDX content
*/
private parseFrontmatter(content: string): { data: Frontmatter; content: string } {
const frontmatterRegex = /^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/
const match = content.match(frontmatterRegex)
@@ -285,26 +251,25 @@ export class DocsChunker {
return { data, content: markdownContent }
}
/**
* Estimate token count (rough approximation)
*/
private estimateTokens(text: string): number {
return Math.ceil(text.length / 4)
}
/**
* Detect table boundaries in markdown content to avoid splitting them
*/
/** Detects table boundaries to avoid splitting tables across chunks. */
private detectTableBoundaries(content: string): { start: number; end: number }[] {
const tables: { start: number; end: number }[] = []
const lines = content.split('\n')
let inTable = false
let inCodeBlock = false
let tableStart = -1
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim()
if (line.startsWith('```')) {
inCodeBlock = !inCodeBlock
continue
}
if (inCodeBlock) continue
if (line.includes('|') && line.split('|').length >= 3 && !inTable) {
const nextLine = lines[i + 1]?.trim()
if (nextLine?.includes('|') && nextLine.includes('-')) {
@@ -314,7 +279,7 @@ export class DocsChunker {
} else if (inTable && (!line.includes('|') || line === '' || line.startsWith('#'))) {
tables.push({
start: this.getCharacterPosition(lines, tableStart),
end: this.getCharacterPosition(lines, i - 1) + lines[i - 1]?.length || 0,
end: this.getCharacterPosition(lines, i - 1) + (lines[i - 1]?.length ?? 0),
})
inTable = false
}
@@ -330,16 +295,10 @@ export class DocsChunker {
return tables
}
/**
* Get character position from line number
*/
private getCharacterPosition(lines: string[], lineIndex: number): number {
return lines.slice(0, lineIndex).reduce((acc, line) => acc + line.length + 1, 0)
}
/**
* Merge chunks that would split tables
*/
private mergeTableChunks(
chunks: string[],
tableBoundaries: { start: number; end: number }[],
@@ -354,6 +313,10 @@ export class DocsChunker {
for (const chunk of chunks) {
const chunkStart = originalContent.indexOf(chunk, currentPosition)
if (chunkStart === -1) {
mergedChunks.push(chunk)
continue
}
const chunkEnd = chunkStart + chunk.length
const intersectsTable = tableBoundaries.some(
@@ -373,10 +336,10 @@ export class DocsChunker {
const minStart = Math.min(chunkStart, ...affectedTables.map((t) => t.start))
const maxEnd = Math.max(chunkEnd, ...affectedTables.map((t) => t.end))
const completeChunk = originalContent.slice(minStart, maxEnd)
const completeChunk = originalContent.slice(minStart, maxEnd).trim()
if (!mergedChunks.some((existing) => existing.includes(completeChunk.trim()))) {
mergedChunks.push(completeChunk.trim())
if (completeChunk && !mergedChunks.some((existing) => existing === completeChunk)) {
mergedChunks.push(completeChunk)
}
} else {
mergedChunks.push(chunk)
@@ -388,16 +351,13 @@ export class DocsChunker {
return mergedChunks.filter((chunk) => chunk.length > 50)
}
/**
* Enforce 300 token size limit on chunks
*/
private enforceSizeLimit(chunks: string[]): string[] {
const finalChunks: string[] = []
for (const chunk of chunks) {
const tokens = this.estimateTokens(chunk)
const tokens = estimateTokens(chunk)
if (tokens <= 300) {
if (tokens <= this.chunkSize) {
finalChunks.push(chunk)
} else {
const lines = chunk.split('\n')
@@ -406,7 +366,7 @@ export class DocsChunker {
for (const line of lines) {
const testChunk = currentChunk ? `${currentChunk}\n${line}` : line
if (this.estimateTokens(testChunk) <= 300) {
if (estimateTokens(testChunk) <= this.chunkSize) {
currentChunk = testChunk
} else {
if (currentChunk.trim()) {

View File

@@ -1,5 +1,9 @@
export { DocsChunker } from './docs-chunker'
export { JsonYamlChunker } from './json-yaml-chunker'
export { RecursiveChunker } from './recursive-chunker'
export { RegexChunker } from './regex-chunker'
export { SentenceChunker } from './sentence-chunker'
export { StructuredDataChunker } from './structured-data-chunker'
export { TextChunker } from './text-chunker'
export { TokenChunker } from './token-chunker'
export * from './types'

View File

@@ -30,14 +30,11 @@ describe('JsonYamlChunker', () => {
expect(JsonYamlChunker.isStructuredData('key: value\nother: data')).toBe(true)
})
it('should return true for YAML-like plain text', () => {
// Note: js-yaml is permissive and parses plain text as valid YAML (scalar value)
// This is expected behavior of the YAML parser
expect(JsonYamlChunker.isStructuredData('Hello, this is plain text.')).toBe(true)
it('should return false for plain text parsed as YAML scalar', () => {
expect(JsonYamlChunker.isStructuredData('Hello, this is plain text.')).toBe(false)
})
it('should return false for invalid JSON/YAML with unbalanced braces', () => {
// Only truly malformed content that fails YAML parsing returns false
expect(JsonYamlChunker.isStructuredData('{invalid: json: content: {{')).toBe(false)
})
@@ -61,7 +58,6 @@ describe('JsonYamlChunker', () => {
const json = '{}'
const chunks = await chunker.chunk(json)
// Empty object is valid JSON, should return at least metadata
expect(chunks.length).toBeGreaterThanOrEqual(0)
})
@@ -204,7 +200,6 @@ server:
const json = '[]'
const chunks = await chunker.chunk(json)
// Empty array should not produce chunks with meaningful content
expect(chunks.length).toBeGreaterThanOrEqual(0)
})
@@ -272,7 +267,6 @@ server:
it.concurrent('should fall back to text chunking for invalid JSON', async () => {
const chunker = new JsonYamlChunker({ chunkSize: 100, minCharactersPerChunk: 10 })
// Create content that fails YAML parsing and is long enough to produce chunks
const invalidJson = `{this is not valid json: content: {{${' more content here '.repeat(10)}`
const chunks = await chunker.chunk(invalidJson)
@@ -377,9 +371,7 @@ server:
const json = JSON.stringify({ a: 1, b: 2, c: 3 })
const chunks = await chunker.chunk(json)
// Should produce chunks that are valid
expect(chunks.length).toBeGreaterThan(0)
// The entire small object fits in one chunk
expect(chunks[0].text.length).toBeGreaterThan(0)
})
})

View File

@@ -1,8 +1,7 @@
import { createLogger } from '@sim/logger'
import * as yaml from 'js-yaml'
import type { Chunk, ChunkerOptions } from '@/lib/chunkers/types'
import { getAccurateTokenCount } from '@/lib/tokenization'
import { estimateTokenCount } from '@/lib/tokenization/estimators'
import { estimateTokens } from '@/lib/chunkers/utils'
const logger = createLogger('JsonYamlChunker')
@@ -11,57 +10,31 @@ type JsonValue = JsonPrimitive | JsonObject | JsonArray
type JsonObject = { [key: string]: JsonValue }
type JsonArray = JsonValue[]
function getTokenCount(text: string): number {
try {
return getAccurateTokenCount(text, 'text-embedding-3-small')
} catch (error) {
logger.warn('Tiktoken failed, falling back to estimation')
const estimate = estimateTokenCount(text)
return estimate.count
}
}
/**
* Configuration for JSON/YAML chunking
* Reduced limits to ensure we stay well under OpenAI's 8,191 token limit per embedding request
*/
const JSON_YAML_CHUNKING_CONFIG = {
TARGET_CHUNK_SIZE: 1024, // Target tokens per chunk
MIN_CHARACTERS_PER_CHUNK: 100, // Minimum characters per chunk to filter tiny fragments
MAX_CHUNK_SIZE: 1500, // Maximum tokens per chunk
MAX_DEPTH_FOR_SPLITTING: 5, // Maximum depth to traverse for splitting
}
const MAX_DEPTH = 5
export class JsonYamlChunker {
private chunkSize: number // in tokens
private minCharactersPerChunk: number // in characters
private chunkSize: number
private minCharactersPerChunk: number
constructor(options: ChunkerOptions = {}) {
this.chunkSize = options.chunkSize ?? JSON_YAML_CHUNKING_CONFIG.TARGET_CHUNK_SIZE
this.minCharactersPerChunk =
options.minCharactersPerChunk ?? JSON_YAML_CHUNKING_CONFIG.MIN_CHARACTERS_PER_CHUNK
this.chunkSize = options.chunkSize ?? 1024
this.minCharactersPerChunk = options.minCharactersPerChunk ?? 100
}
/**
* Check if content is structured JSON/YAML data
*/
static isStructuredData(content: string): boolean {
try {
JSON.parse(content)
return true
const parsed = JSON.parse(content)
return typeof parsed === 'object' && parsed !== null
} catch {
try {
yaml.load(content)
return true
const parsed = yaml.load(content)
return typeof parsed === 'object' && parsed !== null
} catch {
return false
}
}
}
/**
* Chunk JSON/YAML content intelligently based on structure
*/
async chunk(content: string): Promise<Chunk[]> {
try {
let data: JsonValue
@@ -70,16 +43,10 @@ export class JsonYamlChunker {
} catch {
data = yaml.load(content) as JsonValue
}
const chunks = this.chunkStructuredData(data)
const chunks = this.chunkStructuredData(data, [], 0)
const tokenCounts = chunks.map((c) => c.tokenCount)
const totalTokens = tokenCounts.reduce((a, b) => a + b, 0)
const maxTokens = Math.max(...tokenCounts)
const avgTokens = Math.round(totalTokens / chunks.length)
logger.info(
`JSON chunking complete: ${chunks.length} chunks, ${totalTokens} total tokens (avg: ${avgTokens}, max: ${maxTokens})`
)
const totalTokens = chunks.reduce((sum, c) => sum + c.tokenCount, 0)
logger.info(`JSON chunking complete: ${chunks.length} chunks, ${totalTokens} total tokens`)
return chunks
} catch (error) {
@@ -88,42 +55,38 @@ export class JsonYamlChunker {
}
}
/**
* Chunk structured data based on its structure
*/
private chunkStructuredData(data: JsonValue, path: string[] = []): Chunk[] {
const chunks: Chunk[] = []
private chunkStructuredData(data: JsonValue, path: string[], depth: number): Chunk[] {
if (Array.isArray(data)) {
return this.chunkArray(data, path)
return this.chunkArray(data, path, depth)
}
if (typeof data === 'object' && data !== null) {
return this.chunkObject(data as JsonObject, path)
return this.chunkObject(data as JsonObject, path, depth)
}
const content = JSON.stringify(data, null, 2)
const tokenCount = getTokenCount(content)
const contextHeader = path.length > 0 ? `// ${path.join('.')}\n` : ''
const contentTokens = estimateTokens(content)
// Filter tiny fragments using character count
if (content.length >= this.minCharactersPerChunk) {
chunks.push({
text: content,
tokenCount,
metadata: {
startIndex: 0,
endIndex: content.length,
},
})
if (contentTokens > this.chunkSize) {
return this.chunkAsText(contextHeader + content)
}
return chunks
if (content.length < this.minCharactersPerChunk) {
return []
}
const text = contextHeader + content
return [
{
text,
tokenCount: estimateTokens(text),
metadata: { startIndex: 0, endIndex: text.length },
},
]
}
/**
* Chunk an array intelligently
*/
private chunkArray(arr: JsonArray, path: string[]): Chunk[] {
private chunkArray(arr: JsonArray, path: string[], depth: number): Chunk[] {
const chunks: Chunk[] = []
let currentBatch: JsonValue[] = []
let currentTokens = 0
@@ -133,46 +96,30 @@ export class JsonYamlChunker {
for (let i = 0; i < arr.length; i++) {
const item = arr[i]
const itemStr = JSON.stringify(item, null, 2)
const itemTokens = getTokenCount(itemStr)
const itemTokens = estimateTokens(itemStr)
if (itemTokens > this.chunkSize) {
if (currentBatch.length > 0) {
const batchContent = contextHeader + JSON.stringify(currentBatch, null, 2)
chunks.push({
text: batchContent,
tokenCount: getTokenCount(batchContent),
metadata: {
startIndex: i - currentBatch.length,
endIndex: i - 1,
},
})
chunks.push(
this.buildBatchChunk(contextHeader, currentBatch, i - currentBatch.length, i - 1)
)
currentBatch = []
currentTokens = 0
}
if (typeof item === 'object' && item !== null) {
const subChunks = this.chunkStructuredData(item, [...path, `[${i}]`])
chunks.push(...subChunks)
if (depth < MAX_DEPTH && typeof item === 'object' && item !== null) {
chunks.push(...this.chunkStructuredData(item, [...path, `[${i}]`], depth + 1))
} else {
chunks.push({
text: contextHeader + itemStr,
tokenCount: itemTokens,
metadata: {
startIndex: i,
endIndex: i,
},
metadata: { startIndex: i, endIndex: i },
})
}
} else if (currentTokens + itemTokens > this.chunkSize && currentBatch.length > 0) {
const batchContent = contextHeader + JSON.stringify(currentBatch, null, 2)
chunks.push({
text: batchContent,
tokenCount: getTokenCount(batchContent),
metadata: {
startIndex: i - currentBatch.length,
endIndex: i - 1,
},
})
chunks.push(
this.buildBatchChunk(contextHeader, currentBatch, i - currentBatch.length, i - 1)
)
currentBatch = [item]
currentTokens = itemTokens
} else {
@@ -182,121 +129,112 @@ export class JsonYamlChunker {
}
if (currentBatch.length > 0) {
const batchContent = contextHeader + JSON.stringify(currentBatch, null, 2)
chunks.push({
text: batchContent,
tokenCount: getTokenCount(batchContent),
metadata: {
startIndex: arr.length - currentBatch.length,
endIndex: arr.length - 1,
},
})
chunks.push(
this.buildBatchChunk(
contextHeader,
currentBatch,
arr.length - currentBatch.length,
arr.length - 1
)
)
}
return chunks
}
/**
* Chunk an object intelligently
*/
private chunkObject(obj: JsonObject, path: string[]): Chunk[] {
private chunkObject(obj: JsonObject, path: string[], depth: number): Chunk[] {
const chunks: Chunk[] = []
const entries = Object.entries(obj)
const fullContent = JSON.stringify(obj, null, 2)
const fullTokens = getTokenCount(fullContent)
const fullTokens = estimateTokens(fullContent)
if (fullTokens <= this.chunkSize) {
chunks.push({
text: fullContent,
tokenCount: fullTokens,
metadata: {
startIndex: 0,
endIndex: fullContent.length,
const contextHeader = path.length > 0 ? `// ${path.join('.')}\n` : ''
const text = contextHeader + fullContent
return [
{
text,
tokenCount: estimateTokens(text),
metadata: { startIndex: 0, endIndex: text.length },
},
})
return chunks
]
}
const contextHeader = path.length > 0 ? `// ${path.join('.')}\n` : ''
let currentObj: JsonObject = {}
let currentTokens = 0
let currentKeys: string[] = []
for (const [key, value] of entries) {
const valueStr = JSON.stringify({ [key]: value }, null, 2)
const valueTokens = getTokenCount(valueStr)
const valueTokens = estimateTokens(valueStr)
if (valueTokens > this.chunkSize) {
if (Object.keys(currentObj).length > 0) {
const objContent = JSON.stringify(currentObj, null, 2)
const objContent = contextHeader + JSON.stringify(currentObj, null, 2)
chunks.push({
text: objContent,
tokenCount: getTokenCount(objContent),
metadata: {
startIndex: 0,
endIndex: objContent.length,
},
tokenCount: estimateTokens(objContent),
metadata: { startIndex: 0, endIndex: objContent.length },
})
currentObj = {}
currentTokens = 0
currentKeys = []
}
if (typeof value === 'object' && value !== null) {
const subChunks = this.chunkStructuredData(value, [...path, key])
chunks.push(...subChunks)
if (depth < MAX_DEPTH && typeof value === 'object' && value !== null) {
chunks.push(...this.chunkStructuredData(value, [...path, key], depth + 1))
} else {
chunks.push({
text: valueStr,
text: contextHeader + valueStr,
tokenCount: valueTokens,
metadata: {
startIndex: 0,
endIndex: valueStr.length,
},
metadata: { startIndex: 0, endIndex: valueStr.length },
})
}
} else if (
currentTokens + valueTokens > this.chunkSize &&
Object.keys(currentObj).length > 0
) {
const objContent = JSON.stringify(currentObj, null, 2)
const objContent = contextHeader + JSON.stringify(currentObj, null, 2)
chunks.push({
text: objContent,
tokenCount: getTokenCount(objContent),
metadata: {
startIndex: 0,
endIndex: objContent.length,
},
tokenCount: estimateTokens(objContent),
metadata: { startIndex: 0, endIndex: objContent.length },
})
currentObj = { [key]: value }
currentTokens = valueTokens
currentKeys = [key]
} else {
currentObj[key] = value
currentTokens += valueTokens
currentKeys.push(key)
}
}
if (Object.keys(currentObj).length > 0) {
const objContent = JSON.stringify(currentObj, null, 2)
const objContent = contextHeader + JSON.stringify(currentObj, null, 2)
chunks.push({
text: objContent,
tokenCount: getTokenCount(objContent),
metadata: {
startIndex: 0,
endIndex: objContent.length,
},
tokenCount: estimateTokens(objContent),
metadata: { startIndex: 0, endIndex: objContent.length },
})
}
return chunks
}
/**
* Fall back to text chunking if JSON parsing fails
*/
private async chunkAsText(content: string): Promise<Chunk[]> {
private buildBatchChunk(
contextHeader: string,
batch: JsonValue[],
startIdx: number,
endIdx: number
): Chunk {
const batchContent = contextHeader + JSON.stringify(batch, null, 2)
return {
text: batchContent,
tokenCount: estimateTokens(batchContent),
metadata: { startIndex: startIdx, endIndex: endIdx },
}
}
private chunkAsText(content: string): Chunk[] {
const chunks: Chunk[] = []
const lines = content.split('\n')
let currentChunk = ''
@@ -304,16 +242,13 @@ export class JsonYamlChunker {
let startIndex = 0
for (const line of lines) {
const lineTokens = getTokenCount(line)
const lineTokens = estimateTokens(line)
if (currentTokens + lineTokens > this.chunkSize && currentChunk) {
chunks.push({
text: currentChunk,
tokenCount: currentTokens,
metadata: {
startIndex,
endIndex: startIndex + currentChunk.length,
},
metadata: { startIndex, endIndex: startIndex + currentChunk.length },
})
startIndex += currentChunk.length + 1
@@ -325,24 +260,17 @@ export class JsonYamlChunker {
}
}
// Filter tiny fragments using character count
if (currentChunk && currentChunk.length >= this.minCharactersPerChunk) {
chunks.push({
text: currentChunk,
tokenCount: currentTokens,
metadata: {
startIndex,
endIndex: startIndex + currentChunk.length,
},
metadata: { startIndex, endIndex: startIndex + currentChunk.length },
})
}
return chunks
}
/**
* Static method for chunking JSON/YAML data with default options
*/
static async chunkJsonYaml(content: string, options: ChunkerOptions = {}): Promise<Chunk[]> {
const chunker = new JsonYamlChunker(options)
return chunker.chunk(content)

View File

@@ -0,0 +1,275 @@
/**
* @vitest-environment node
*/
import { loggerMock } from '@sim/testing'
import { describe, expect, it, vi } from 'vitest'
import { RecursiveChunker } from './recursive-chunker'
vi.mock('@sim/logger', () => loggerMock)
describe('RecursiveChunker', () => {
describe('empty and whitespace input', () => {
it.concurrent('should return empty array for empty string', async () => {
const chunker = new RecursiveChunker({ chunkSize: 100 })
const chunks = await chunker.chunk('')
expect(chunks).toEqual([])
})
it.concurrent('should return empty array for whitespace-only input', async () => {
const chunker = new RecursiveChunker({ chunkSize: 100 })
const chunks = await chunker.chunk(' \n\n\t ')
expect(chunks).toEqual([])
})
})
describe('small content', () => {
it.concurrent('should return single chunk when content fits in one chunk', async () => {
const chunker = new RecursiveChunker({ chunkSize: 100 })
const text = 'This is a short text.'
const chunks = await chunker.chunk(text)
expect(chunks).toHaveLength(1)
expect(chunks[0].text).toBe(text)
})
})
describe('paragraph splitting', () => {
it.concurrent('should split at paragraph boundaries first', async () => {
const chunker = new RecursiveChunker({ chunkSize: 20 })
const text =
'First paragraph with enough content to matter.\n\nSecond paragraph with enough content to matter.\n\nThird paragraph with enough content here.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
})
})
describe('line splitting fallback', () => {
it.concurrent('should split at newlines when paragraphs are too large', async () => {
const chunker = new RecursiveChunker({ chunkSize: 15 })
const text =
'Line one with content here.\nLine two with content here.\nLine three with content here.\nLine four with content here.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
})
})
describe('sentence splitting fallback', () => {
it.concurrent('should split at sentence boundaries when lines are too large', async () => {
const chunker = new RecursiveChunker({ chunkSize: 10 })
const text =
'First sentence here. Second sentence here. Third sentence here. Fourth sentence here.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
})
})
describe('word splitting fallback', () => {
it.concurrent('should split at spaces when sentences are too large', async () => {
const chunker = new RecursiveChunker({ chunkSize: 5 })
const text = 'word1 word2 word3 word4 word5 word6 word7 word8 word9 word10'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
})
})
describe('keep_separator behavior', () => {
it.concurrent('should prepend separator to subsequent chunks', async () => {
const chunker = new RecursiveChunker({ chunkSize: 15 })
const text =
'First paragraph content here.\n\nSecond paragraph content here.\n\nThird paragraph content here.'
const chunks = await chunker.chunk(text)
if (chunks.length > 1) {
expect(chunks[1].text.startsWith('\n\n') || chunks[1].text.length > 0).toBe(true)
}
})
})
describe('custom separators', () => {
it.concurrent('should use custom separators instead of default recipe', async () => {
const chunker = new RecursiveChunker({
chunkSize: 15,
separators: ['---', '\n'],
})
const text =
'Section one content here with words.---Section two content here with words.---Section three content here.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
})
})
describe('recipe: plain', () => {
it.concurrent('should use plain recipe by default', async () => {
const chunker = new RecursiveChunker({ chunkSize: 20 })
const text =
'First paragraph with enough words to exceed the chunk size limit.\n\nSecond paragraph with enough words to exceed the chunk size limit.\n\nThird paragraph with enough words to exceed the chunk size limit.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
})
})
describe('recipe: markdown', () => {
it.concurrent('should split at heading boundaries for markdown content', async () => {
const chunker = new RecursiveChunker({ chunkSize: 20, recipe: 'markdown' })
const text =
'\n# Title\n\nParagraph content under the title goes here.\n\n## Subtitle\n\nMore text content under the subtitle goes here.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
})
it.concurrent('should handle markdown horizontal rules', async () => {
const chunker = new RecursiveChunker({ chunkSize: 20, recipe: 'markdown' })
const text =
'Section one content here.\n---\nSection two content here.\n---\nSection three content here.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(0)
})
})
describe('recipe: code', () => {
it.concurrent('should split on function and class boundaries', async () => {
const chunker = new RecursiveChunker({ chunkSize: 20, recipe: 'code' })
const text = [
'const x = 1;',
'function hello() {',
' return "hello";',
'}',
'function world() {',
' return "world";',
'}',
'class MyClass {',
' constructor() {}',
' method() { return true; }',
'}',
].join('\n')
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
})
})
describe('chunk size respected', () => {
it.concurrent('should not exceed chunk size in tokens', async () => {
const chunkSize = 30
const chunker = new RecursiveChunker({ chunkSize })
const text = 'This is a test sentence with content. '.repeat(30)
const chunks = await chunker.chunk(text)
for (const chunk of chunks) {
expect(chunk.tokenCount).toBeLessThanOrEqual(chunkSize + 5)
}
})
})
describe('overlap', () => {
it.concurrent('should share text between consecutive chunks when overlap is set', async () => {
const chunker = new RecursiveChunker({ chunkSize: 20, chunkOverlap: 5 })
const text =
'First paragraph with some content here.\n\nSecond paragraph with different content here.\n\nThird paragraph with more content here.'
const chunks = await chunker.chunk(text)
if (chunks.length > 1) {
expect(chunks[1].text.length).toBeGreaterThan(0)
}
})
it.concurrent('should not add overlap when overlap is 0', async () => {
const chunker = new RecursiveChunker({ chunkSize: 20, chunkOverlap: 0 })
const text =
'First sentence content here. Second sentence content here. Third sentence content here.'
const chunks = await chunker.chunk(text)
if (chunks.length > 1) {
const firstChunkEnd = chunks[0].text.slice(-10)
expect(chunks[1].text.startsWith(firstChunkEnd)).toBe(false)
}
})
})
describe('chunk metadata', () => {
it.concurrent('should include text, tokenCount, and metadata fields', async () => {
const chunker = new RecursiveChunker({ chunkSize: 100 })
const text = 'This is test content for metadata.'
const chunks = await chunker.chunk(text)
expect(chunks).toHaveLength(1)
expect(chunks[0].text).toBe(text)
expect(chunks[0].tokenCount).toBe(Math.ceil(text.length / 4))
expect(chunks[0].metadata.startIndex).toBeDefined()
expect(chunks[0].metadata.endIndex).toBeDefined()
})
it.concurrent('should have startIndex of 0 for the first chunk', async () => {
const chunker = new RecursiveChunker({ chunkSize: 100 })
const text = 'Some content here.'
const chunks = await chunker.chunk(text)
expect(chunks[0].metadata.startIndex).toBe(0)
})
it.concurrent('should have non-negative indices for all chunks', async () => {
const chunker = new RecursiveChunker({ chunkSize: 20, chunkOverlap: 5 })
const text = 'First part. Second part. Third part. Fourth part. Fifth part.'
const chunks = await chunker.chunk(text)
for (const chunk of chunks) {
expect(chunk.metadata.startIndex).toBeGreaterThanOrEqual(0)
expect(chunk.metadata.endIndex).toBeGreaterThanOrEqual(chunk.metadata.startIndex)
}
})
it.concurrent('should have endIndex greater than startIndex for non-empty chunks', async () => {
const chunker = new RecursiveChunker({ chunkSize: 20 })
const text = 'Multiple sentences here. Another one here. And another. And more content.'
const chunks = await chunker.chunk(text)
for (const chunk of chunks) {
expect(chunk.metadata.endIndex).toBeGreaterThan(chunk.metadata.startIndex)
}
})
})
describe('edge cases', () => {
it.concurrent('should handle very long text', async () => {
const chunker = new RecursiveChunker({ chunkSize: 100 })
const text = 'This is a sentence. '.repeat(1000)
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
})
it.concurrent('should handle text with no natural separators', async () => {
const chunker = new RecursiveChunker({ chunkSize: 5 })
const text = 'abcdefghijklmnopqrstuvwxyz'.repeat(5)
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
})
it.concurrent('should handle unicode text', async () => {
const chunker = new RecursiveChunker({ chunkSize: 100 })
const text = '这是中文测试。日本語テスト。한국어 테스트.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(0)
expect(chunks[0].text).toContain('中文')
})
it.concurrent('should use default chunkSize of 1024 tokens', async () => {
const chunker = new RecursiveChunker({})
const text = 'Word '.repeat(400)
const chunks = await chunker.chunk(text)
expect(chunks).toHaveLength(1)
})
})
})

View File

@@ -0,0 +1,145 @@
import { createLogger } from '@sim/logger'
import type { Chunk, RecursiveChunkerOptions } from '@/lib/chunkers/types'
import {
addOverlap,
buildChunks,
cleanText,
estimateTokens,
resolveChunkerOptions,
splitAtWordBoundaries,
tokensToChars,
} from '@/lib/chunkers/utils'
const logger = createLogger('RecursiveChunker')
const RECIPES = {
plain: ['\n\n', '\n', '. ', ' ', ''],
markdown: [
'\n---\n',
'\n***\n',
'\n___\n',
'\n# ',
'\n## ',
'\n### ',
'\n#### ',
'\n##### ',
'\n###### ',
'\n```\n',
'\n> ',
'\n\n',
'\n',
'. ',
' ',
'',
],
code: [
'\nfunction ',
'\nclass ',
'\nexport ',
'\nconst ',
'\nlet ',
'\nvar ',
'\nif ',
'\nfor ',
'\nwhile ',
'\nswitch ',
'\nreturn ',
'\n\n',
'\n',
'; ',
' ',
'',
],
} as const
export class RecursiveChunker {
private readonly chunkSize: number
private readonly chunkOverlap: number
private readonly separators: string[]
constructor(options: RecursiveChunkerOptions = {}) {
const resolved = resolveChunkerOptions(options)
this.chunkSize = resolved.chunkSize
this.chunkOverlap = resolved.chunkOverlap
if (options.separators && options.separators.length > 0) {
this.separators = options.separators
} else {
const recipe = options.recipe ?? 'plain'
this.separators = [...RECIPES[recipe]]
}
}
private splitRecursively(text: string, separatorIndex = 0): string[] {
const tokenCount = estimateTokens(text)
if (tokenCount <= this.chunkSize) {
return text.trim() ? [text] : []
}
if (separatorIndex >= this.separators.length) {
const chunkSizeChars = tokensToChars(this.chunkSize)
return splitAtWordBoundaries(text, chunkSizeChars)
}
const separator = this.separators[separatorIndex]
if (separator === '') {
return this.splitRecursively(text, this.separators.length)
}
const parts = text.split(separator).filter((part) => part.trim())
if (parts.length <= 1) {
return this.splitRecursively(text, separatorIndex + 1)
}
const chunks: string[] = []
let currentChunk = ''
for (const part of parts) {
const testChunk = currentChunk + (currentChunk ? separator : '') + part
if (estimateTokens(testChunk) <= this.chunkSize) {
currentChunk = testChunk
} else {
if (currentChunk.trim()) {
chunks.push(currentChunk.trim())
}
if (estimateTokens(part) > this.chunkSize) {
const subChunks = this.splitRecursively(part, separatorIndex + 1)
for (const subChunk of subChunks) {
chunks.push(subChunk)
}
currentChunk = ''
} else {
currentChunk = part
}
}
}
if (currentChunk.trim()) {
chunks.push(currentChunk.trim())
}
return chunks
}
async chunk(content: string): Promise<Chunk[]> {
if (!content?.trim()) {
return []
}
const cleaned = cleanText(content)
let chunks = this.splitRecursively(cleaned)
if (this.chunkOverlap > 0) {
const overlapChars = tokensToChars(this.chunkOverlap)
chunks = addOverlap(chunks, overlapChars)
}
logger.info(`Chunked into ${chunks.length} recursive chunks`)
return buildChunks(chunks, this.chunkOverlap)
}
}

View File

@@ -0,0 +1,189 @@
/**
* @vitest-environment node
*/
import { loggerMock } from '@sim/testing'
import { describe, expect, it, vi } from 'vitest'
import { RegexChunker } from './regex-chunker'
vi.mock('@sim/logger', () => loggerMock)
describe('RegexChunker', () => {
describe('empty and whitespace input', () => {
it.concurrent('should return empty array for empty string', async () => {
const chunker = new RegexChunker({ pattern: '\\n\\n' })
const chunks = await chunker.chunk('')
expect(chunks).toEqual([])
})
it.concurrent('should return empty array for whitespace-only input', async () => {
const chunker = new RegexChunker({ pattern: '\\n\\n' })
const chunks = await chunker.chunk(' \n\n ')
expect(chunks).toEqual([])
})
})
describe('small content', () => {
it.concurrent('should return single chunk when content fits in chunkSize', async () => {
const chunker = new RegexChunker({ pattern: '\\n\\n', chunkSize: 100 })
const text = 'This is a short text.'
const chunks = await chunker.chunk(text)
expect(chunks).toHaveLength(1)
expect(chunks[0].text).toBe(text)
})
})
describe('basic regex splitting', () => {
it.concurrent('should split on double newlines with pattern \\n\\n', async () => {
const chunker = new RegexChunker({ pattern: '\\n\\n', chunkSize: 20 })
const text =
'First paragraph content here.\n\nSecond paragraph content here.\n\nThird paragraph content here.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
})
})
describe('custom pattern splitting', () => {
it.concurrent('should split text at --- delimiters', async () => {
const chunker = new RegexChunker({ pattern: '---', chunkSize: 20 })
const text =
'Section one has enough content to fill a chunk on its own here.---Section two also has enough content to fill another chunk here.---Section three needs content too for splitting.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
})
})
describe('segment merging', () => {
it.concurrent('should merge small adjacent segments up to chunkSize', async () => {
const chunker = new RegexChunker({ pattern: '\\n\\n', chunkSize: 100 })
const text = 'Short.\n\nAlso short.\n\nTiny.\n\nSmall too.'
const chunks = await chunker.chunk(text)
expect(chunks).toHaveLength(1)
expect(chunks[0].text).toContain('Short.')
expect(chunks[0].text).toContain('Also short.')
})
})
describe('oversized segment fallback', () => {
it.concurrent(
'should sub-chunk segments larger than chunkSize via word boundaries',
async () => {
const chunker = new RegexChunker({ pattern: '---', chunkSize: 10 })
const longSegment =
'This is a very long segment with many words that exceeds the chunk size limit significantly. '
const text = `${longSegment}---${longSegment}`
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(2)
}
)
})
describe('no-match fallback', () => {
it.concurrent(
'should fall back to word-boundary splitting when regex matches nothing',
async () => {
const chunker = new RegexChunker({ pattern: '###SPLIT###', chunkSize: 10 })
const text = 'This is a text with no matching delimiter anywhere in the content at all.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
}
)
})
describe('chunk size respected', () => {
it.concurrent('should not exceed chunkSize tokens approximately', async () => {
const chunkSize = 30
const chunker = new RegexChunker({ pattern: '\\n\\n', chunkSize })
const text =
'Paragraph one with some words. '.repeat(5) +
'\n\n' +
'Paragraph two with more words. '.repeat(5) +
'\n\n' +
'Paragraph three continues here. '.repeat(5)
const chunks = await chunker.chunk(text)
for (const chunk of chunks) {
expect(chunk.tokenCount).toBeLessThanOrEqual(chunkSize + 10)
}
})
})
describe('overlap', () => {
it.concurrent('should share content between chunks when chunkOverlap > 0', async () => {
const chunker = new RegexChunker({ pattern: '\\n\\n', chunkSize: 20, chunkOverlap: 5 })
const text =
'First paragraph with enough content.\n\nSecond paragraph with more content.\n\nThird paragraph with even more.'
const chunks = await chunker.chunk(text)
if (chunks.length > 1) {
const firstChunkEnd = chunks[0].text.slice(-10)
const secondChunkStart = chunks[1].text.slice(0, 20)
expect(secondChunkStart.length).toBeGreaterThan(0)
expect(chunks[1].text.length).toBeGreaterThan(0)
}
})
})
describe('chunk metadata', () => {
it.concurrent('should include text, tokenCount, and metadata with indices', async () => {
const chunker = new RegexChunker({ pattern: '\\n\\n', chunkSize: 100 })
const text = 'Hello world test content.'
const chunks = await chunker.chunk(text)
expect(chunks).toHaveLength(1)
expect(chunks[0].text).toBe(text)
expect(chunks[0].tokenCount).toBe(Math.ceil(text.length / 4))
expect(chunks[0].metadata.startIndex).toBeDefined()
expect(chunks[0].metadata.endIndex).toBeDefined()
expect(chunks[0].metadata.startIndex).toBe(0)
})
it.concurrent('should have non-negative indices across multiple chunks', async () => {
const chunker = new RegexChunker({ pattern: '\\n\\n', chunkSize: 20, chunkOverlap: 0 })
const text = 'First paragraph here.\n\nSecond paragraph here.\n\nThird paragraph here.'
const chunks = await chunker.chunk(text)
for (const chunk of chunks) {
expect(chunk.metadata.startIndex).toBeGreaterThanOrEqual(0)
expect(chunk.metadata.endIndex).toBeGreaterThanOrEqual(chunk.metadata.startIndex)
}
})
})
describe('invalid regex', () => {
it.concurrent('should throw error for invalid regex pattern', async () => {
expect(() => new RegexChunker({ pattern: '[invalid' })).toThrow()
})
})
describe('empty pattern', () => {
it.concurrent('should throw error for empty pattern', async () => {
expect(() => new RegexChunker({ pattern: '' })).toThrow('Regex pattern is required')
})
})
describe('pattern too long', () => {
it.concurrent('should throw error for pattern exceeding 500 characters', async () => {
const longPattern = 'a'.repeat(501)
expect(() => new RegexChunker({ pattern: longPattern })).toThrow(
'Regex pattern exceeds maximum length of 500 characters'
)
})
})
describe('ReDoS protection', () => {
it.concurrent('should accept safe pattern \\n+', async () => {
expect(() => new RegexChunker({ pattern: '\\n+' })).not.toThrow()
})
it.concurrent('should accept safe pattern [,;]', async () => {
expect(() => new RegexChunker({ pattern: '[,;]' })).not.toThrow()
})
})
})

View File

@@ -0,0 +1,144 @@
import { createLogger } from '@sim/logger'
import type { Chunk, RegexChunkerOptions } from '@/lib/chunkers/types'
import {
addOverlap,
buildChunks,
cleanText,
estimateTokens,
resolveChunkerOptions,
splitAtWordBoundaries,
tokensToChars,
} from '@/lib/chunkers/utils'
const logger = createLogger('RegexChunker')
const MAX_PATTERN_LENGTH = 500
export class RegexChunker {
private readonly chunkSize: number
private readonly chunkOverlap: number
private readonly regex: RegExp
constructor(options: RegexChunkerOptions) {
const resolved = resolveChunkerOptions(options)
this.chunkSize = resolved.chunkSize
this.chunkOverlap = resolved.chunkOverlap
this.regex = this.compilePattern(options.pattern)
}
private compilePattern(pattern: string): RegExp {
if (!pattern) {
throw new Error('Regex pattern is required')
}
if (pattern.length > MAX_PATTERN_LENGTH) {
throw new Error(`Regex pattern exceeds maximum length of ${MAX_PATTERN_LENGTH} characters`)
}
try {
const regex = new RegExp(pattern, 'g')
const testStrings = [
'a'.repeat(10000),
' '.repeat(10000),
'a '.repeat(5000),
'aB1 xY2\n'.repeat(1250),
`${'a'.repeat(30)}!`,
`${'a b '.repeat(25)}!`,
]
for (const testStr of testStrings) {
regex.lastIndex = 0
const start = Date.now()
regex.test(testStr)
const elapsed = Date.now() - start
if (elapsed > 50) {
throw new Error('Regex pattern appears to have catastrophic backtracking')
}
}
regex.lastIndex = 0
return regex
} catch (error) {
if (error instanceof Error && error.message.includes('catastrophic')) {
throw error
}
throw new Error(
`Invalid regex pattern "${pattern}": ${error instanceof Error ? error.message : String(error)}`
)
}
}
async chunk(content: string): Promise<Chunk[]> {
if (!content?.trim()) {
return []
}
const cleaned = cleanText(content)
if (estimateTokens(cleaned) <= this.chunkSize) {
logger.info('Content fits in single chunk')
return buildChunks([cleaned], 0)
}
this.regex.lastIndex = 0
const segments = cleaned.split(this.regex).filter((s) => s.trim().length > 0)
if (segments.length <= 1) {
logger.warn(
'Regex pattern did not produce any splits, falling back to word-boundary splitting'
)
const chunkSizeChars = tokensToChars(this.chunkSize)
let chunks = splitAtWordBoundaries(cleaned, chunkSizeChars)
if (this.chunkOverlap > 0) {
const overlapChars = tokensToChars(this.chunkOverlap)
chunks = addOverlap(chunks, overlapChars)
}
return buildChunks(chunks, this.chunkOverlap)
}
const merged = this.mergeSegments(segments)
let chunks = merged
if (this.chunkOverlap > 0) {
const overlapChars = tokensToChars(this.chunkOverlap)
chunks = addOverlap(chunks, overlapChars)
}
logger.info(`Chunked into ${chunks.length} regex-based chunks`)
return buildChunks(chunks, this.chunkOverlap)
}
private mergeSegments(segments: string[]): string[] {
const chunks: string[] = []
let current = ''
for (const segment of segments) {
const test = current ? `${current}\n${segment}` : segment
if (estimateTokens(test) <= this.chunkSize) {
current = test
} else {
if (current.trim()) {
chunks.push(current.trim())
}
if (estimateTokens(segment) > this.chunkSize) {
const chunkSizeChars = tokensToChars(this.chunkSize)
const subChunks = splitAtWordBoundaries(segment, chunkSizeChars)
for (const sub of subChunks) {
chunks.push(sub)
}
current = ''
} else {
current = segment
}
}
}
if (current.trim()) {
chunks.push(current.trim())
}
return chunks
}
}

View File

@@ -0,0 +1,286 @@
/**
* @vitest-environment node
*/
import { loggerMock } from '@sim/testing'
import { describe, expect, it, vi } from 'vitest'
import { SentenceChunker } from './sentence-chunker'
vi.mock('@sim/logger', () => loggerMock)
describe('SentenceChunker', () => {
describe('empty and whitespace input', () => {
it.concurrent('should return empty array for empty string', async () => {
const chunker = new SentenceChunker({ chunkSize: 100 })
const chunks = await chunker.chunk('')
expect(chunks).toEqual([])
})
it.concurrent('should return empty array for whitespace-only input', async () => {
const chunker = new SentenceChunker({ chunkSize: 100 })
const chunks = await chunker.chunk(' \n\n\t ')
expect(chunks).toEqual([])
})
it.concurrent('should return empty array for null-ish content', async () => {
const chunker = new SentenceChunker({ chunkSize: 100 })
const chunks = await chunker.chunk(undefined as unknown as string)
expect(chunks).toEqual([])
})
})
describe('small content (single chunk)', () => {
it.concurrent('should return single chunk when content fits within chunk size', async () => {
const chunker = new SentenceChunker({ chunkSize: 100 })
const text = 'This is a short sentence. Another short one.'
const chunks = await chunker.chunk(text)
expect(chunks).toHaveLength(1)
expect(chunks[0].text).toBe(text)
expect(chunks[0].tokenCount).toBe(Math.ceil(text.length / 4))
})
})
describe('sentence boundary splitting', () => {
it.concurrent('should split text at sentence boundaries', async () => {
const chunker = new SentenceChunker({ chunkSize: 20 })
const text =
'First sentence here. Second sentence here. Third sentence here. Fourth sentence here.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
for (let i = 0; i < chunks.length - 1; i++) {
const trimmed = chunks[i].text.trim()
const lastChar = trimmed[trimmed.length - 1]
expect(['.', '!', '?']).toContain(lastChar)
}
})
})
describe('abbreviation handling', () => {
it.concurrent('should not split at common abbreviations', async () => {
const chunker = new SentenceChunker({ chunkSize: 200 })
const text = 'Mr. Smith went to Washington. He arrived on Jan. 5th.'
const chunks = await chunker.chunk(text)
expect(chunks).toHaveLength(1)
expect(chunks[0].text).toContain('Mr. Smith')
expect(chunks[0].text).toContain('Jan. 5th')
})
it.concurrent('should not split at Dr., Mrs., Ms., Prof., Jr., Sr., St.', async () => {
const chunker = new SentenceChunker({ chunkSize: 500 })
const text =
'Dr. Jones and Mrs. Brown met Prof. Davis at St. Mary hospital. Jr. members joined Sr. staff in Feb. for a review.'
const chunks = await chunker.chunk(text)
expect(chunks).toHaveLength(1)
})
})
describe('single capital initial handling', () => {
it.concurrent('should not split at single capital letter initials', async () => {
const chunker = new SentenceChunker({ chunkSize: 200 })
const text = 'J. K. Rowling wrote books. They are popular.'
const chunks = await chunker.chunk(text)
expect(chunks).toHaveLength(1)
expect(chunks[0].text).toContain('J. K. Rowling')
})
})
describe('decimal handling', () => {
it.concurrent('should not split at decimal numbers', async () => {
const chunker = new SentenceChunker({ chunkSize: 20 })
const text = 'The value is 3.14. That is pi.'
const chunks = await chunker.chunk(text)
const allText = chunks.map((c) => c.text).join(' ')
expect(allText).toContain('3.14')
const largeChunker = new SentenceChunker({ chunkSize: 200 })
const largeChunks = await largeChunker.chunk(text)
expect(largeChunks).toHaveLength(1)
})
})
describe('ellipsis handling', () => {
it.concurrent('should not split at ellipsis', async () => {
const chunker = new SentenceChunker({ chunkSize: 200 })
const text = 'Wait for it... The answer is here. Done.'
const chunks = await chunker.chunk(text)
expect(chunks).toHaveLength(1)
expect(chunks[0].text).toContain('Wait for it...')
})
})
describe('exclamation and question marks', () => {
it.concurrent('should split at exclamation and question marks', async () => {
const chunker = new SentenceChunker({ chunkSize: 10 })
const text = 'What is this? It is great! I agree.'
const chunks = await chunker.chunk(text)
const allText = chunks.map((c) => c.text).join(' ')
expect(allText).toContain('What is this?')
expect(allText).toContain('It is great!')
expect(allText).toContain('I agree.')
})
it.concurrent('should treat ? and ! as sentence boundaries', async () => {
const chunker = new SentenceChunker({ chunkSize: 15 })
const text = 'What is this thing? It is really great! I strongly agree.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThanOrEqual(1)
const allText = chunks.map((c) => c.text).join(' ')
expect(allText).toContain('?')
expect(allText).toContain('!')
})
})
describe('minSentencesPerChunk', () => {
it.concurrent('should group at least minSentencesPerChunk sentences per chunk', async () => {
const chunker = new SentenceChunker({ chunkSize: 100, minSentencesPerChunk: 2 })
const text =
'First sentence. Second sentence. Third sentence. Fourth sentence. Fifth sentence.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(0)
expect(chunks).toHaveLength(1)
})
it.concurrent('should enforce min sentences even when token limit is reached', async () => {
const chunker = new SentenceChunker({ chunkSize: 6, minSentencesPerChunk: 2 })
const text = 'Short one. Another one. Third one here. Fourth one here.'
const chunks = await chunker.chunk(text)
const firstChunkSentences = chunks[0].text
.split(/(?<=[.!?])\s+/)
.filter((s) => s.trim().length > 0)
expect(firstChunkSentences.length).toBeGreaterThanOrEqual(2)
})
})
describe('oversized sentence fallback', () => {
it.concurrent(
'should chunk a single very long sentence via word-boundary splitting',
async () => {
const chunker = new SentenceChunker({ chunkSize: 10 })
const longSentence = `${'word '.repeat(50).trim()}.`
const chunks = await chunker.chunk(longSentence)
expect(chunks.length).toBeGreaterThan(1)
const allText = chunks.map((c) => c.text).join(' ')
expect(allText).toContain('word')
}
)
it.concurrent('should handle oversized sentence mixed with normal sentences', async () => {
const chunker = new SentenceChunker({ chunkSize: 10 })
const longSentence = `${'word '.repeat(50).trim()}.`
const text = `Short sentence. ${longSentence} Another short one.`
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(2)
const allText = chunks.map((c) => c.text).join(' ')
expect(allText).toContain('Short sentence.')
expect(allText).toContain('Another short one.')
})
})
describe('sentence-level overlap', () => {
it.concurrent('should include overlap from previous chunk when chunkOverlap > 0', async () => {
const chunker = new SentenceChunker({ chunkSize: 15, chunkOverlap: 10 })
const text =
'First sentence here. Second sentence here. Third sentence here. Fourth sentence here.'
const chunks = await chunker.chunk(text)
if (chunks.length > 1) {
expect(chunks[1].text.length).toBeGreaterThan(0)
}
})
it.concurrent('should not add overlap when chunkOverlap is 0', async () => {
const chunker = new SentenceChunker({ chunkSize: 15, chunkOverlap: 0 })
const text = 'First sentence here. Second sentence here. Third sentence here.'
const chunks = await chunker.chunk(text)
if (chunks.length > 1) {
const chunk1End = chunks[0].text.slice(-20)
expect(chunks[1].text.startsWith(chunk1End)).toBe(false)
}
})
})
describe('chunk metadata', () => {
it.concurrent('should include text, tokenCount, and metadata in each chunk', async () => {
const chunker = new SentenceChunker({ chunkSize: 100 })
const text = 'This is a test sentence. Another sentence follows.'
const chunks = await chunker.chunk(text)
expect(chunks).toHaveLength(1)
expect(chunks[0]).toHaveProperty('text')
expect(chunks[0]).toHaveProperty('tokenCount')
expect(chunks[0]).toHaveProperty('metadata')
expect(chunks[0].metadata).toHaveProperty('startIndex')
expect(chunks[0].metadata).toHaveProperty('endIndex')
})
it.concurrent('should have startIndex of 0 for the first chunk', async () => {
const chunker = new SentenceChunker({ chunkSize: 10 })
const text = 'First sentence. Second sentence. Third sentence.'
const chunks = await chunker.chunk(text)
expect(chunks[0].metadata.startIndex).toBe(0)
})
it.concurrent('should have non-negative indices for all chunks', async () => {
const chunker = new SentenceChunker({ chunkSize: 10, chunkOverlap: 5 })
const text =
'First sentence here. Second sentence here. Third sentence here. Fourth sentence.'
const chunks = await chunker.chunk(text)
for (const chunk of chunks) {
expect(chunk.metadata.startIndex).toBeGreaterThanOrEqual(0)
expect(chunk.metadata.endIndex).toBeGreaterThanOrEqual(chunk.metadata.startIndex)
}
})
it.concurrent('should have correct tokenCount based on text length', async () => {
const chunker = new SentenceChunker({ chunkSize: 100 })
const text = 'Hello world test.'
const chunks = await chunker.chunk(text)
expect(chunks[0].tokenCount).toBe(Math.ceil(text.length / 4))
})
})
describe('respects chunk size', () => {
it.concurrent('should produce chunks within approximate token limit', async () => {
const chunkSize = 20
const chunker = new SentenceChunker({ chunkSize })
const text =
'This is the first sentence. Here is the second one. And the third sentence follows. Then comes the fourth. Finally the fifth sentence.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
for (const chunk of chunks) {
expect(chunk.tokenCount).toBeLessThanOrEqual(chunkSize * 2)
}
})
it.concurrent('should create more chunks with smaller chunk size', async () => {
const text =
'Sentence number one. Sentence number two. Sentence number three. Sentence number four. Sentence number five. Sentence number six.'
const largeChunker = new SentenceChunker({ chunkSize: 200 })
const smallChunker = new SentenceChunker({ chunkSize: 10 })
const largeChunks = await largeChunker.chunk(text)
const smallChunks = await smallChunker.chunk(text)
expect(smallChunks.length).toBeGreaterThan(largeChunks.length)
})
})
})

View File

@@ -0,0 +1,141 @@
import { createLogger } from '@sim/logger'
import type { Chunk, SentenceChunkerOptions } from '@/lib/chunkers/types'
import {
buildChunks,
cleanText,
estimateTokens,
resolveChunkerOptions,
splitAtWordBoundaries,
tokensToChars,
} from '@/lib/chunkers/utils'
const logger = createLogger('SentenceChunker')
/** Never splits mid-sentence unless a single sentence exceeds the limit. */
export class SentenceChunker {
private readonly chunkSize: number
private readonly chunkOverlap: number
private readonly minSentencesPerChunk: number
constructor(options: SentenceChunkerOptions = {}) {
const resolved = resolveChunkerOptions(options)
this.chunkSize = resolved.chunkSize
this.chunkOverlap = resolved.chunkOverlap
this.minSentencesPerChunk = options.minSentencesPerChunk ?? 1
}
/** Splits on sentence boundaries while avoiding abbreviations, decimals, and ellipses. */
private splitSentences(text: string): string[] {
return text
.split(
/(?<!\b(?:Mr|Mrs|Ms|Dr|Prof|Sr|Jr|St|Rev|Gen|Sgt|Capt|Lt|Col|Maj|No|Fig|Vol|Ch|vs|etc|Inc|Ltd|Corp|Co|approx|dept|est|govt|Ave|Blvd|Rd|Jan|Feb|Mar|Apr|Jun|Jul|Aug|Sep|Oct|Nov|Dec|i\.e|e\.g)\.)(?<![A-Z]\.)(?<!\.\.)(?<!\d\.)(?<=[.!?])\s+/
)
.filter((s) => s.trim().length > 0)
}
async chunk(content: string): Promise<Chunk[]> {
if (!content?.trim()) {
return []
}
const cleaned = cleanText(content)
const sentences = this.splitSentences(cleaned)
if (sentences.length === 0) {
return []
}
if (estimateTokens(cleaned) <= this.chunkSize) {
logger.info('Content fits in single chunk')
return buildChunks([cleaned], 0)
}
const chunkSentenceGroups: string[][] = []
let currentGroup: string[] = []
let currentTokens = 0
const chunkSizeChars = tokensToChars(this.chunkSize)
for (const sentence of sentences) {
const sentenceTokens = estimateTokens(sentence)
if (sentenceTokens > this.chunkSize) {
if (currentGroup.length > 0) {
chunkSentenceGroups.push(currentGroup)
currentGroup = []
currentTokens = 0
}
const parts = splitAtWordBoundaries(sentence, chunkSizeChars)
for (const part of parts) {
chunkSentenceGroups.push([part])
}
continue
}
const wouldExceed = currentTokens + sentenceTokens > this.chunkSize
const hasMinSentences = currentGroup.length >= this.minSentencesPerChunk
if (wouldExceed && hasMinSentences) {
chunkSentenceGroups.push(currentGroup)
currentGroup = [sentence]
currentTokens = sentenceTokens
} else {
currentGroup.push(sentence)
currentTokens += sentenceTokens
}
}
if (currentGroup.length > 0) {
chunkSentenceGroups.push(currentGroup)
}
const rawChunks = this.applyOverlapFromGroups(chunkSentenceGroups)
logger.info(`Chunked into ${rawChunks.length} sentence-based chunks`)
return buildChunks(rawChunks, this.chunkOverlap)
}
/** Applies overlap at the sentence level using original groups to avoid re-splitting. */
private applyOverlapFromGroups(groups: string[][]): string[] {
if (this.chunkOverlap <= 0 || groups.length <= 1) {
return groups.map((g) => g.join(' '))
}
const overlapChars = tokensToChars(this.chunkOverlap)
const result: string[] = []
for (let i = 0; i < groups.length; i++) {
if (i === 0) {
result.push(groups[i].join(' '))
continue
}
const prevGroup = groups[i - 1]
const overlapSentences: string[] = []
let overlapLen = 0
for (let j = prevGroup.length - 1; j >= 0; j--) {
if (overlapLen + prevGroup[j].length > overlapChars) break
overlapSentences.unshift(prevGroup[j])
overlapLen += prevGroup[j].length
}
const currentText = groups[i].join(' ')
if (overlapSentences.length > 0) {
result.push(`${overlapSentences.join(' ')} ${currentText}`)
} else {
// No complete sentence fits — fall back to character-level overlap
const prevText = prevGroup.join(' ')
const tail = prevText.slice(-overlapChars)
const wordMatch = tail.match(/^\s*\S/)
const cleanTail = wordMatch ? tail.slice(tail.indexOf(wordMatch[0].trim())) : tail
if (cleanTail.trim()) {
result.push(`${cleanTail.trim()} ${currentText}`)
} else {
result.push(currentText)
}
}
}
return result
}
}

View File

@@ -11,19 +11,16 @@ vi.mock('@sim/logger', () => loggerMock)
describe('StructuredDataChunker', () => {
describe('isStructuredData', () => {
it('should detect CSV content with many columns', () => {
// Detection requires >2 delimiters per line on average
const csv = 'name,age,city,country\nAlice,30,NYC,USA\nBob,25,LA,USA'
expect(StructuredDataChunker.isStructuredData(csv)).toBe(true)
})
it('should detect TSV content with many columns', () => {
// Detection requires >2 delimiters per line on average
const tsv = 'name\tage\tcity\tcountry\nAlice\t30\tNYC\tUSA\nBob\t25\tLA\tUSA'
expect(StructuredDataChunker.isStructuredData(tsv)).toBe(true)
})
it('should detect pipe-delimited content with many columns', () => {
// Detection requires >2 delimiters per line on average
const piped = 'name|age|city|country\nAlice|30|NYC|USA\nBob|25|LA|USA'
expect(StructuredDataChunker.isStructuredData(piped)).toBe(true)
})
@@ -64,7 +61,6 @@ describe('StructuredDataChunker', () => {
it('should handle inconsistent delimiter counts', () => {
const inconsistent = 'name,age\nAlice,30,extra\nBob'
// May or may not detect as structured depending on variance threshold
const result = StructuredDataChunker.isStructuredData(inconsistent)
expect(typeof result).toBe('boolean')
})
@@ -100,7 +96,7 @@ Bob,25`
const chunks = await StructuredDataChunker.chunkStructuredData(csv)
expect(chunks.length).toBeGreaterThan(0)
expect(chunks[0].text).toContain('Rows')
expect(chunks[0].text).toContain('rows of data')
})
it.concurrent('should include sheet name when provided', async () => {
@@ -184,7 +180,6 @@ Alice,30`
const csv = 'name,age,city'
const chunks = await StructuredDataChunker.chunkStructuredData(csv)
// Only header, no data rows
expect(chunks.length).toBeGreaterThanOrEqual(0)
})
@@ -271,9 +266,8 @@ Alice,30`
const chunks = await StructuredDataChunker.chunkStructuredData(csv, { chunkSize: 500 })
expect(chunks.length).toBeGreaterThan(1)
// Verify total rows are distributed across chunks
const totalRowCount = chunks.reduce((sum, chunk) => {
const match = chunk.text.match(/\[Rows (\d+) of data\]/)
const match = chunk.text.match(/\[(\d+) rows of data\]/)
return sum + (match ? Number.parseInt(match[1]) : 0)
}, 0)
expect(totalRowCount).toBeGreaterThan(0)
@@ -319,9 +313,7 @@ Alice,30`
it.concurrent('should not detect with fewer than 3 delimiters per line', async () => {
const sparse = `a,b
1,2`
// Only 1 comma per line, below threshold of >2
const result = StructuredDataChunker.isStructuredData(sparse)
// May or may not pass depending on implementation threshold
expect(typeof result).toBe('boolean')
})
})
@@ -337,7 +329,6 @@ Alice,30`
const chunks = await StructuredDataChunker.chunkStructuredData(csv, { chunkSize: 200 })
expect(chunks.length).toBeGreaterThan(1)
// Each chunk should contain header info
for (const chunk of chunks) {
expect(chunk.text).toContain('Headers:')
}

View File

@@ -1,37 +1,22 @@
import { createLogger } from '@sim/logger'
import type { Chunk, StructuredDataOptions } from '@/lib/chunkers/types'
const logger = createLogger('StructuredDataChunker')
/**
* Default configuration for structured data chunking (CSV, XLSX, etc.)
* These are used when user doesn't provide preferences
*/
const DEFAULT_CONFIG = {
// Target chunk size in tokens
TARGET_CHUNK_SIZE: 1024,
MIN_CHUNK_SIZE: 100,
MAX_CHUNK_SIZE: 4000,
// For spreadsheets, group rows together
ROWS_PER_CHUNK: 100,
MIN_ROWS_PER_CHUNK: 20,
MAX_ROWS_PER_CHUNK: 500,
// For better embeddings quality
INCLUDE_HEADERS_IN_EACH_CHUNK: true,
MAX_HEADER_SIZE: 200, // tokens
/** Structured data is denser in tokens (~3 chars/token vs ~4 for prose) */
function estimateStructuredTokens(text: string): number {
if (!text?.trim()) return 0
return Math.ceil(text.length / 3)
}
/**
* Smart chunker for structured data (CSV, XLSX) that preserves semantic meaning
* Preserves headers in each chunk for better semantic context
*/
const logger = createLogger('StructuredDataChunker')
const DEFAULT_CONFIG = {
TARGET_CHUNK_SIZE: 1024,
MIN_ROWS_PER_CHUNK: 5,
MAX_ROWS_PER_CHUNK: 500,
INCLUDE_HEADERS_IN_EACH_CHUNK: true,
} as const
export class StructuredDataChunker {
/**
* Chunk structured data intelligently based on rows and semantic boundaries
* Respects user's chunkSize preference when provided
*/
static async chunkStructuredData(
content: string,
options: StructuredDataOptions = {}
@@ -43,15 +28,12 @@ export class StructuredDataChunker {
return chunks
}
// Use user's chunk size or fall back to default
const targetChunkSize = options.chunkSize ?? DEFAULT_CONFIG.TARGET_CHUNK_SIZE
// Detect headers (first line or provided)
const headerLine = options.headers?.join('\t') || lines[0]
const dataStartIndex = options.headers ? 0 : 1
// Calculate optimal rows per chunk based on content and user's target size
const estimatedTokensPerRow = StructuredDataChunker.estimateTokensPerRow(
const estimatedTokensPerRow = StructuredDataChunker.estimateStructuredTokensPerRow(
lines.slice(dataStartIndex, Math.min(10, lines.length))
)
const optimalRowsPerChunk = StructuredDataChunker.calculateOptimalRowsPerChunk(
@@ -65,14 +47,13 @@ export class StructuredDataChunker {
let currentChunkRows: string[] = []
let currentTokenEstimate = 0
const headerTokens = StructuredDataChunker.estimateTokens(headerLine)
const headerTokens = estimateStructuredTokens(headerLine)
let chunkStartRow = dataStartIndex
for (let i = dataStartIndex; i < lines.length; i++) {
const row = lines[i]
const rowTokens = StructuredDataChunker.estimateTokens(row)
const rowTokens = estimateStructuredTokens(row)
// Check if adding this row would exceed our target
const projectedTokens =
currentTokenEstimate +
rowTokens +
@@ -84,7 +65,6 @@ export class StructuredDataChunker {
currentChunkRows.length >= optimalRowsPerChunk
if (shouldCreateChunk && currentChunkRows.length > 0) {
// Create chunk with current rows
const chunkContent = StructuredDataChunker.formatChunk(
headerLine,
currentChunkRows,
@@ -92,7 +72,6 @@ export class StructuredDataChunker {
)
chunks.push(StructuredDataChunker.createChunk(chunkContent, chunkStartRow, i - 1))
// Reset for next chunk
currentChunkRows = []
currentTokenEstimate = 0
chunkStartRow = i
@@ -102,7 +81,6 @@ export class StructuredDataChunker {
currentTokenEstimate += rowTokens
}
// Add remaining rows as final chunk
if (currentChunkRows.length > 0) {
const chunkContent = StructuredDataChunker.formatChunk(
headerLine,
@@ -117,41 +95,28 @@ export class StructuredDataChunker {
return chunks
}
/**
* Format a chunk with headers and context
*/
private static formatChunk(headerLine: string, rows: string[], sheetName?: string): string {
let content = ''
// Add sheet name context if available
if (sheetName) {
content += `=== ${sheetName} ===\n\n`
}
// Add headers for context
if (DEFAULT_CONFIG.INCLUDE_HEADERS_IN_EACH_CHUNK) {
content += `Headers: ${headerLine}\n`
content += `${'-'.repeat(Math.min(80, headerLine.length))}\n`
}
// Add data rows
content += rows.join('\n')
// Add row count for context
content += `\n\n[Rows ${rows.length} of data]`
content += `\n\n[${rows.length} rows of data]`
return content
}
/**
* Create a chunk object with actual row indices
*/
private static createChunk(content: string, startRow: number, endRow: number): Chunk {
const tokenCount = StructuredDataChunker.estimateTokens(content)
return {
text: content,
tokenCount,
tokenCount: estimateStructuredTokens(content),
metadata: {
startIndex: startRow,
endIndex: endRow,
@@ -159,30 +124,13 @@ export class StructuredDataChunker {
}
}
/**
* Estimate tokens in text (rough approximation)
* For structured data with numbers, uses 1 token per 3 characters
*/
private static estimateTokens(text: string): number {
return Math.ceil(text.length / 3)
}
private static estimateStructuredTokensPerRow(sampleRows: string[]): number {
if (sampleRows.length === 0) return 50
/**
* Estimate average tokens per row from sample
*/
private static estimateTokensPerRow(sampleRows: string[]): number {
if (sampleRows.length === 0) return 50 // default estimate
const totalTokens = sampleRows.reduce(
(sum, row) => sum + StructuredDataChunker.estimateTokens(row),
0
)
const totalTokens = sampleRows.reduce((sum, row) => sum + estimateStructuredTokens(row), 0)
return Math.ceil(totalTokens / sampleRows.length)
}
/**
* Calculate optimal rows per chunk based on token estimates and target size
*/
private static calculateOptimalRowsPerChunk(
tokensPerRow: number,
targetChunkSize: number
@@ -195,11 +143,7 @@ export class StructuredDataChunker {
)
}
/**
* Check if content appears to be structured data
*/
static isStructuredData(content: string, mimeType?: string): boolean {
// Check mime type first
if (mimeType) {
const structuredMimeTypes = [
'text/csv',
@@ -212,20 +156,17 @@ export class StructuredDataChunker {
}
}
// Check content structure
const lines = content.split('\n').slice(0, 10) // Check first 10 lines
const lines = content.split('\n').slice(0, 10)
if (lines.length < 2) return false
// Check for consistent delimiters (comma, tab, pipe)
const delimiters = [',', '\t', '|']
for (const delimiter of delimiters) {
const counts = lines.map(
(line) => (line.match(new RegExp(`\\${delimiter}`, 'g')) || []).length
)
const escaped = delimiter.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
const counts = lines.map((line) => (line.match(new RegExp(escaped, 'g')) || []).length)
const avgCount = counts.reduce((a, b) => a + b, 0) / counts.length
// If most lines have similar delimiter counts, it's likely structured
if (avgCount > 2 && counts.every((c) => Math.abs(c - avgCount) <= 2)) {
const tolerance = Math.max(1, Math.ceil(avgCount * 0.2))
if (avgCount > 2 && counts.every((c) => Math.abs(c - avgCount) <= tolerance)) {
return true
}
}

View File

@@ -30,7 +30,7 @@ describe('TextChunker', () => {
it.concurrent('should include token count in chunk metadata', async () => {
const chunker = new TextChunker({ chunkSize: 100 })
const text = 'Hello world' // ~3 tokens (11 chars / 4)
const text = 'Hello world'
const chunks = await chunker.chunk(text)
expect(chunks[0].tokenCount).toBe(3)
@@ -201,7 +201,6 @@ describe('TextChunker', () => {
it.concurrent('should use default minCharactersPerChunk of 100', async () => {
const chunker = new TextChunker({ chunkSize: 10 })
// Text with 150+ characters to ensure chunks pass the 100 character minimum
const text = 'This is a longer sentence with more content. '.repeat(5)
const chunks = await chunker.chunk(text)
@@ -266,7 +265,6 @@ describe('TextChunker', () => {
describe('boundary conditions', () => {
it.concurrent('should handle text exactly at chunk size boundary', async () => {
const chunker = new TextChunker({ chunkSize: 10 })
// 40 characters = 10 tokens exactly
const text = 'A'.repeat(40)
const chunks = await chunker.chunk(text)
@@ -276,7 +274,6 @@ describe('TextChunker', () => {
it.concurrent('should handle text one token over chunk size', async () => {
const chunker = new TextChunker({ chunkSize: 10 })
// 44 characters = 11 tokens, just over limit
const text = 'A'.repeat(44)
const chunks = await chunker.chunk(text)
@@ -300,7 +297,6 @@ describe('TextChunker', () => {
})
it.concurrent('should clamp overlap to max 50% of chunk size', async () => {
// Overlap of 60 should be clamped to 10 (50% of chunkSize 20)
const chunker = new TextChunker({ chunkSize: 20, chunkOverlap: 60 })
const text = 'First paragraph here.\n\nSecond paragraph here.\n\nThird paragraph here.'
const chunks = await chunker.chunk(text)
@@ -359,7 +355,6 @@ describe('TextChunker', () => {
it.concurrent('should handle combining diacritics', async () => {
const chunker = new TextChunker({ chunkSize: 100 })
// e + combining acute accent
const text = 'cafe\u0301 resume\u0301 naive\u0308'
const chunks = await chunker.chunk(text)
@@ -368,7 +363,6 @@ describe('TextChunker', () => {
it.concurrent('should handle zero-width characters', async () => {
const chunker = new TextChunker({ chunkSize: 100 })
// Zero-width space, zero-width non-joiner, zero-width joiner
const text = 'Hello\u200B\u200C\u200DWorld'
const chunks = await chunker.chunk(text)
@@ -391,14 +385,12 @@ describe('TextChunker', () => {
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
// Verify all content is preserved
const totalChars = chunks.reduce((sum, c) => sum + c.text.length, 0)
expect(totalChars).toBeGreaterThan(0)
})
it.concurrent('should handle 1MB of text', async () => {
const chunker = new TextChunker({ chunkSize: 500 })
// 1MB of text
const text = 'Lorem ipsum dolor sit amet. '.repeat(40000)
const chunks = await chunker.chunk(text)
@@ -407,7 +399,6 @@ describe('TextChunker', () => {
it.concurrent('should handle very long single line', async () => {
const chunker = new TextChunker({ chunkSize: 50 })
// Single line with no natural break points
const text = 'Word'.repeat(10000)
const chunks = await chunker.chunk(text)

View File

@@ -1,99 +1,61 @@
import type { Chunk, ChunkerOptions } from '@/lib/chunkers/types'
import {
addOverlap,
buildChunks,
cleanText,
estimateTokens,
resolveChunkerOptions,
splitAtWordBoundaries,
tokensToChars,
} from '@/lib/chunkers/utils'
/**
* Lightweight text chunker optimized for RAG applications
* Uses hierarchical splitting with simple character-based token estimation
*
* Parameters:
* - chunkSize: Maximum chunk size in TOKENS (default: 1024)
* - chunkOverlap: Overlap between chunks in TOKENS (default: 0)
* - minCharactersPerChunk: Minimum characters to keep a chunk (default: 100)
*/
export class TextChunker {
private readonly chunkSize: number // Max chunk size in tokens
private readonly chunkOverlap: number // Overlap in tokens
private readonly minCharactersPerChunk: number // Min characters per chunk
private readonly chunkSize: number
private readonly chunkOverlap: number
// Hierarchical separators ordered from largest to smallest semantic units
private readonly separators = [
'\n\n\n', // Document sections
'\n---\n', // Markdown horizontal rules
'\n***\n', // Markdown horizontal rules (alternative)
'\n___\n', // Markdown horizontal rules (alternative)
'\n# ', // Markdown H1 headings
'\n## ', // Markdown H2 headings
'\n### ', // Markdown H3 headings
'\n#### ', // Markdown H4 headings
'\n##### ', // Markdown H5 headings
'\n###### ', // Markdown H6 headings
'\n\n', // Paragraphs
'\n', // Lines
'. ', // Sentences
'! ', // Exclamations
'? ', // Questions
'; ', // Semicolons
', ', // Commas
' ', // Words
'\n---\n',
'\n***\n',
'\n___\n',
'\n# ',
'\n## ',
'\n### ',
'\n#### ',
'\n##### ',
'\n###### ',
'\n\n',
'\n',
'. ',
'! ',
'? ',
'; ',
', ',
' ',
]
constructor(options: ChunkerOptions = {}) {
this.chunkSize = options.chunkSize ?? 1024
// Clamp overlap to prevent exceeding chunk size (max 50% of chunk size)
const maxOverlap = Math.floor(this.chunkSize * 0.5)
this.chunkOverlap = Math.min(options.chunkOverlap ?? 0, maxOverlap)
this.minCharactersPerChunk = options.minCharactersPerChunk ?? 100
const resolved = resolveChunkerOptions(options)
this.chunkSize = resolved.chunkSize
this.chunkOverlap = resolved.chunkOverlap
}
/**
* Simple token estimation using character count
* 1 token ≈ 4 characters for English text
*/
private estimateTokens(text: string): number {
if (!text?.trim()) return 0
return Math.ceil(text.length / 4)
}
private splitRecursively(text: string, separatorIndex = 0): string[] {
const tokenCount = estimateTokens(text)
/**
* Convert tokens to approximate character count
*/
private tokensToChars(tokens: number): number {
return tokens * 4
}
/**
* Split text recursively using hierarchical separators
*/
private async splitRecursively(text: string, separatorIndex = 0): Promise<string[]> {
const tokenCount = this.estimateTokens(text)
// If chunk is small enough (within max token limit), return it
// Keep chunks even if below minCharactersPerChunk to avoid data loss
if (tokenCount <= this.chunkSize) {
// Only filter out empty/whitespace-only text, not small chunks
return text.trim() ? [text] : []
}
// If we've run out of separators, force split by character count
if (separatorIndex >= this.separators.length) {
const chunks: string[] = []
const targetLength = Math.ceil((text.length * this.chunkSize) / tokenCount)
for (let i = 0; i < text.length; i += targetLength) {
const chunk = text.slice(i, i + targetLength).trim()
// Keep all non-empty chunks to avoid data loss
if (chunk) {
chunks.push(chunk)
}
}
return chunks
const chunkSizeChars = tokensToChars(this.chunkSize)
return splitAtWordBoundaries(text, chunkSizeChars)
}
const separator = this.separators[separatorIndex]
const parts = text.split(separator).filter((part) => part.trim())
// If no split occurred, try next separator
if (parts.length <= 1) {
return await this.splitRecursively(text, separatorIndex + 1)
return this.splitRecursively(text, separatorIndex + 1)
}
const chunks: string[] = []
@@ -102,17 +64,15 @@ export class TextChunker {
for (const part of parts) {
const testChunk = currentChunk + (currentChunk ? separator : '') + part
if (this.estimateTokens(testChunk) <= this.chunkSize) {
if (estimateTokens(testChunk) <= this.chunkSize) {
currentChunk = testChunk
} else {
// Save current chunk - keep even if below minCharactersPerChunk to avoid data loss
if (currentChunk.trim()) {
chunks.push(currentChunk.trim())
}
// If part itself is too large, split it further
if (this.estimateTokens(part) > this.chunkSize) {
const subChunks = await this.splitRecursively(part, separatorIndex + 1)
if (estimateTokens(part) > this.chunkSize) {
const subChunks = this.splitRecursively(part, separatorIndex + 1)
for (const subChunk of subChunks) {
chunks.push(subChunk)
}
@@ -123,7 +83,6 @@ export class TextChunker {
}
}
// Add final chunk if it exists - keep even if below minCharactersPerChunk to avoid data loss
if (currentChunk.trim()) {
chunks.push(currentChunk.trim())
}
@@ -131,111 +90,19 @@ export class TextChunker {
return chunks
}
/**
* Add overlap between chunks (overlap is in tokens, converted to characters)
*/
private addOverlap(chunks: string[]): string[] {
if (this.chunkOverlap <= 0 || chunks.length <= 1) {
return chunks
}
const overlappedChunks: string[] = []
// Convert token overlap to character overlap
const overlapChars = this.tokensToChars(this.chunkOverlap)
for (let i = 0; i < chunks.length; i++) {
let chunk = chunks[i]
// Add overlap from previous chunk (converted from tokens to characters)
if (i > 0) {
const prevChunk = chunks[i - 1]
// Take the last N characters from previous chunk (based on token overlap)
const overlapLength = Math.min(overlapChars, prevChunk.length)
const overlapText = prevChunk.slice(-overlapLength)
// Try to start overlap at a word boundary for cleaner text
const wordBoundaryMatch = overlapText.match(/^\s*\S/)
const cleanOverlap = wordBoundaryMatch
? overlapText.slice(overlapText.indexOf(wordBoundaryMatch[0].trim()))
: overlapText
if (cleanOverlap.trim()) {
chunk = `${cleanOverlap.trim()} ${chunk}`
}
}
overlappedChunks.push(chunk)
}
return overlappedChunks
}
/**
* Clean and normalize text
*/
private cleanText(text: string): string {
return text
.replace(/\r\n/g, '\n') // Normalize Windows line endings
.replace(/\r/g, '\n') // Normalize old Mac line endings
.replace(/\n{3,}/g, '\n\n') // Limit consecutive newlines
.replace(/\t/g, ' ') // Convert tabs to spaces
.replace(/ {2,}/g, ' ') // Collapse multiple spaces
.trim()
}
/**
* Main chunking method
*/
async chunk(text: string): Promise<Chunk[]> {
if (!text?.trim()) {
return []
}
// Clean the text
const cleanedText = this.cleanText(text)
const cleaned = cleanText(text)
let chunks = this.splitRecursively(cleaned)
// Split into chunks
let chunks = await this.splitRecursively(cleanedText)
if (this.chunkOverlap > 0) {
const overlapChars = tokensToChars(this.chunkOverlap)
chunks = addOverlap(chunks, overlapChars)
}
// Add overlap if configured
chunks = this.addOverlap(chunks)
// Convert to Chunk objects with metadata
let previousEndIndex = 0
const chunkPromises = chunks.map(async (chunkText, index) => {
let startIndex: number
let actualContentLength: number
if (index === 0 || this.chunkOverlap <= 0) {
// First chunk or no overlap - start from previous end
startIndex = previousEndIndex
actualContentLength = chunkText.length
} else {
// Calculate overlap length in characters (converted from tokens)
const prevChunk = chunks[index - 1]
const overlapChars = this.tokensToChars(this.chunkOverlap)
const overlapLength = Math.min(overlapChars, prevChunk.length, chunkText.length)
startIndex = previousEndIndex - overlapLength
actualContentLength = chunkText.length - overlapLength
}
const safeStart = Math.max(0, startIndex)
const endIndexSafe = safeStart + Math.max(0, actualContentLength)
const chunk: Chunk = {
text: chunkText,
tokenCount: this.estimateTokens(chunkText),
metadata: {
startIndex: safeStart,
endIndex: endIndexSafe,
},
}
previousEndIndex = endIndexSafe
return chunk
})
return await Promise.all(chunkPromises)
return buildChunks(chunks, this.chunkOverlap)
}
}

View File

@@ -0,0 +1,239 @@
/**
* @vitest-environment node
*/
import { loggerMock } from '@sim/testing'
import { describe, expect, it, vi } from 'vitest'
import { TokenChunker } from './token-chunker'
vi.mock('@sim/logger', () => loggerMock)
describe('TokenChunker', () => {
describe('empty and whitespace input', () => {
it.concurrent('should return empty array for empty string', async () => {
const chunker = new TokenChunker({ chunkSize: 100 })
const chunks = await chunker.chunk('')
expect(chunks).toEqual([])
})
it.concurrent('should return empty array for whitespace-only input', async () => {
const chunker = new TokenChunker({ chunkSize: 100 })
const chunks = await chunker.chunk(' \n\n\t ')
expect(chunks).toEqual([])
})
})
describe('small content', () => {
it.concurrent('should return single chunk when content fits within chunkSize', async () => {
const chunker = new TokenChunker({ chunkSize: 100 })
const text = 'This is a short text.'
const chunks = await chunker.chunk(text)
expect(chunks).toHaveLength(1)
expect(chunks[0].text).toBe(text)
})
})
describe('token count accuracy', () => {
it.concurrent('should compute tokenCount as Math.ceil(text.length / 4)', async () => {
const chunker = new TokenChunker({ chunkSize: 100 })
const text = 'Hello world'
const chunks = await chunker.chunk(text)
expect(chunks[0].tokenCount).toBe(Math.ceil(text.length / 4))
})
it.concurrent('should compute tokenCount correctly for longer text', async () => {
const chunker = new TokenChunker({ chunkSize: 100 })
const text = 'The quick brown fox jumps over the lazy dog.'
const chunks = await chunker.chunk(text)
expect(chunks[0].tokenCount).toBe(11)
})
})
describe('chunk metadata', () => {
it.concurrent(
'should include text, tokenCount, and metadata with startIndex and endIndex',
async () => {
const chunker = new TokenChunker({ chunkSize: 100 })
const text = 'Some test content here.'
const chunks = await chunker.chunk(text)
expect(chunks[0]).toHaveProperty('text')
expect(chunks[0]).toHaveProperty('tokenCount')
expect(chunks[0].metadata).toHaveProperty('startIndex')
expect(chunks[0].metadata).toHaveProperty('endIndex')
expect(chunks[0].metadata.startIndex).toBe(0)
expect(chunks[0].metadata.endIndex).toBeGreaterThan(0)
}
)
it.concurrent('should have non-negative indices across all chunks', async () => {
const chunker = new TokenChunker({ chunkSize: 20, chunkOverlap: 0 })
const text = 'First part of the text. Second part of the text. Third part of the text.'
const chunks = await chunker.chunk(text)
for (const chunk of chunks) {
expect(chunk.metadata.startIndex).toBeGreaterThanOrEqual(0)
expect(chunk.metadata.endIndex).toBeGreaterThanOrEqual(chunk.metadata.startIndex)
}
})
})
describe('respects chunk size', () => {
it.concurrent('should not produce chunks exceeding chunkSize tokens', async () => {
const chunkSize = 50
const chunker = new TokenChunker({ chunkSize })
const text = 'This is a test sentence with several words. '.repeat(30)
const chunks = await chunker.chunk(text)
for (const chunk of chunks) {
expect(chunk.tokenCount).toBeLessThanOrEqual(chunkSize)
}
})
})
describe('splitting behavior', () => {
it.concurrent('should produce multiple chunks for long text', async () => {
const chunker = new TokenChunker({ chunkSize: 50 })
const text = 'This is a test sentence. '.repeat(30)
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
})
it.concurrent('should create more chunks with smaller chunkSize', async () => {
const text = 'This is a test sentence with content. '.repeat(20)
const largeChunker = new TokenChunker({ chunkSize: 200 })
const smallChunker = new TokenChunker({ chunkSize: 50 })
const largeChunks = await largeChunker.chunk(text)
const smallChunks = await smallChunker.chunk(text)
expect(smallChunks.length).toBeGreaterThan(largeChunks.length)
})
})
describe('sliding window overlap', () => {
it.concurrent('should produce more chunks with overlap than without', async () => {
const text =
'Alpha bravo charlie delta echo foxtrot golf hotel india juliet kilo lima mike november oscar papa quebec romeo sierra tango uniform victor whiskey xray yankee zulu. '.repeat(
5
)
const withOverlap = new TokenChunker({ chunkSize: 30, chunkOverlap: 10 })
const withoutOverlap = new TokenChunker({ chunkSize: 30, chunkOverlap: 0 })
const overlapChunks = await withOverlap.chunk(text)
const noOverlapChunks = await withoutOverlap.chunk(text)
expect(overlapChunks.length).toBeGreaterThan(noOverlapChunks.length)
})
it.concurrent('should not share text between chunks when chunkOverlap is 0', async () => {
const chunker = new TokenChunker({ chunkSize: 20, chunkOverlap: 0 })
const text =
'First sentence here. Second sentence here. Third sentence here. Fourth sentence here.'
const chunks = await chunker.chunk(text)
if (chunks.length > 1) {
const firstChunkEnd = chunks[0].text.slice(-10)
expect(chunks[1].text.startsWith(firstChunkEnd)).toBe(false)
}
})
})
describe('overlap clamped to 50%', () => {
it.concurrent('should still work when overlap is set >= chunkSize', async () => {
const chunker = new TokenChunker({ chunkSize: 20, chunkOverlap: 100 })
const text =
'First paragraph content here. Second paragraph content here. Third paragraph here.'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(0)
})
it.concurrent('should clamp overlap to 50% of chunkSize', async () => {
const chunkerClamped = new TokenChunker({ chunkSize: 20, chunkOverlap: 100 })
const chunkerHalf = new TokenChunker({ chunkSize: 20, chunkOverlap: 10 })
const text =
'Word one two three four five six seven eight nine ten eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen twenty. '.repeat(
5
)
const clampedChunks = await chunkerClamped.chunk(text)
const halfChunks = await chunkerHalf.chunk(text)
expect(clampedChunks.length).toBe(halfChunks.length)
})
})
describe('word boundary snapping', () => {
it.concurrent('should produce trimmed chunks without leading or trailing spaces', async () => {
const chunker = new TokenChunker({ chunkSize: 20 })
const text =
'the cat sat on the mat and the dog ran fast over the big red fox and then the bird flew high up in the clear blue sky above the green hill'
const chunks = await chunker.chunk(text)
expect(chunks.length).toBeGreaterThan(1)
for (const chunk of chunks) {
const trimmed = chunk.text.trim()
expect(trimmed).toBe(chunk.text)
expect(trimmed.length).toBeGreaterThan(0)
}
})
it.concurrent('should produce chunks that start and end on word boundaries', async () => {
const chunker = new TokenChunker({ chunkSize: 15 })
const text =
'The quick brown fox jumps over the lazy dog and then runs away quickly into the forest'
const chunks = await chunker.chunk(text)
for (const chunk of chunks) {
const trimmed = chunk.text.trim()
expect(trimmed).toBe(chunk.text)
}
})
})
describe('consistent coverage', () => {
it.concurrent('should represent all content from original text across chunks', async () => {
const chunker = new TokenChunker({ chunkSize: 30, chunkOverlap: 0 })
const originalText =
'The quick brown fox jumps over the lazy dog. Pack my box with five dozen liquor jugs.'
const chunks = await chunker.chunk(originalText)
const allText = chunks.map((c) => c.text).join(' ')
expect(allText).toContain('quick')
expect(allText).toContain('fox')
expect(allText).toContain('lazy')
expect(allText).toContain('dog')
expect(allText).toContain('liquor')
expect(allText).toContain('jugs')
})
it.concurrent('should preserve all words across chunks for longer text', async () => {
const chunker = new TokenChunker({ chunkSize: 20, chunkOverlap: 0 })
const words = [
'alpha',
'bravo',
'charlie',
'delta',
'echo',
'foxtrot',
'golf',
'hotel',
'india',
'juliet',
]
const originalText = `${words.join(' is a word and ')} is also a word.`
const chunks = await chunker.chunk(originalText)
const combined = chunks.map((c) => c.text).join(' ')
for (const word of words) {
expect(combined).toContain(word)
}
})
})
})

View File

@@ -0,0 +1,54 @@
import { createLogger } from '@sim/logger'
import type { Chunk, ChunkerOptions } from '@/lib/chunkers/types'
import {
buildChunks,
cleanText,
estimateTokens,
resolveChunkerOptions,
splitAtWordBoundaries,
tokensToChars,
} from '@/lib/chunkers/utils'
const logger = createLogger('TokenChunker')
export class TokenChunker {
private readonly chunkSize: number
private readonly chunkOverlap: number
private readonly minCharactersPerChunk: number
constructor(options: ChunkerOptions = {}) {
const resolved = resolveChunkerOptions(options)
this.chunkSize = resolved.chunkSize
this.chunkOverlap = resolved.chunkOverlap
this.minCharactersPerChunk = resolved.minCharactersPerChunk
}
async chunk(content: string): Promise<Chunk[]> {
if (!content?.trim()) {
return []
}
const cleaned = cleanText(content)
if (estimateTokens(cleaned) <= this.chunkSize) {
logger.info('Content fits in single chunk')
return buildChunks([cleaned], 0)
}
const chunkSizeChars = tokensToChars(this.chunkSize)
const overlapChars = tokensToChars(this.chunkOverlap)
const stepChars = this.chunkOverlap > 0 ? chunkSizeChars - overlapChars : undefined
const rawChunks = splitAtWordBoundaries(cleaned, chunkSizeChars, stepChars)
const filtered =
rawChunks.length > 1
? rawChunks.filter((c) => c.length >= this.minCharactersPerChunk)
: rawChunks
const chunks = filtered.length > 0 ? filtered : rawChunks
logger.info(`Chunked into ${chunks.length} token-based chunks`)
return buildChunks(chunks, this.chunkOverlap)
}
}

View File

@@ -1,17 +1,11 @@
/**
* Options for configuring text chunkers
*
* Units:
* - chunkSize: Maximum chunk size in TOKENS (1 token ≈ 4 characters)
* - chunkOverlap: Overlap between chunks in TOKENS
* - minCharactersPerChunk: Minimum chunk size in CHARACTERS (filters tiny fragments)
* - chunkSize/chunkOverlap: TOKENS (1 token ≈ 4 characters)
* - minCharactersPerChunk: CHARACTERS
*/
export interface ChunkerOptions {
/** Maximum chunk size in tokens (default: 1024) */
chunkSize?: number
/** Overlap between chunks in tokens (default: 0) */
chunkOverlap?: number
/** Minimum chunk size in characters to avoid tiny fragments (default: 100) */
minCharactersPerChunk?: number
}
@@ -51,3 +45,26 @@ export interface DocChunk {
export interface DocsChunkerOptions extends ChunkerOptions {
baseUrl?: string
}
export type ChunkingStrategy = 'auto' | 'text' | 'regex' | 'recursive' | 'sentence' | 'token'
export type RecursiveRecipe = 'plain' | 'markdown' | 'code'
export interface StrategyOptions {
pattern?: string
separators?: string[]
recipe?: RecursiveRecipe
}
export interface SentenceChunkerOptions extends ChunkerOptions {
minSentencesPerChunk?: number
}
export interface RecursiveChunkerOptions extends ChunkerOptions {
separators?: string[]
recipe?: RecursiveRecipe
}
export interface RegexChunkerOptions extends ChunkerOptions {
pattern: string
}

Some files were not shown because too many files have changed in this diff Show More