Compare commits

...

9 Commits

Author SHA1 Message Date
Waleed
377b84e18c v0.4.6: kb improvements, posthog fixes 2025-10-05 21:48:32 -07:00
Waleed
223ecda80e fix(posthog): add rewrites for posthog reverse proxy routes unconditionally, remove unused POSTHOG_ENABLED envvar (#1548) 2025-10-05 21:27:03 -07:00
Waleed
7dde01e74b fix(kb): force kb uploads to use serve route (#1547) 2025-10-05 17:50:21 -07:00
Vikhyath Mondreti
b768ca845e v0.4.5: copilot updates, kb improvements, payment failure fix 2025-10-04 16:37:41 -07:00
Waleed
86ed32ea10 feat(kb): added json/yaml parser+chunker, added dedicated csv chunker (#1539)
* feat(kb): added json/yaml parser+chunker, added dedicated csv chunker

* ack PR comments

* improved kb upload
2025-10-04 14:59:21 -07:00
Vikhyath Mondreti
0e838940f1 fix(copilot): targeted auto-layout for copilot edits + custom tool persistence (#1546)
* fix autolayout and custom tools persistence

* fix

* fix preserving positions within subflow

* more fixes

* fix resizing

* consolidate constants
2025-10-04 14:52:37 -07:00
Siddharth Ganesan
7cc9a23f99 fix(copilot): tool renaming 2025-10-04 11:52:20 -07:00
Vikhyath Mondreti
c42d2a32f3 feat(copilot): fix context / json parsing edge cases (#1542)
* Add get ops examples

* input format incorrectly created by copilot should not crash workflow

* fix tool edits triggering overall delta

* fix(db): add more options for SSL connection, add envvar for base64 db cert (#1533)

* fix trigger additions

* fix nested outputs for triggers

* add condition subblock sanitization

* fix custom tools json

* Model selector

* fix response format sanitization

* remove dead code

* fix export sanitization

* Update migration

* fix import race cond

* Copilot settings

* fix response format

* stop loops/parallels copilot generation from breaking diff view

* fix lint

* Apply suggestion from @greptile-apps[bot]

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* fix tests

* fix lint

---------

Co-authored-by: Siddharth Ganesan <siddharthganesan@gmail.com>
Co-authored-by: Waleed <walif6@gmail.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2025-10-03 19:08:57 -07:00
Vikhyath Mondreti
4da355d269 fix(billing-blocked): block platform usage if payment fails for regular subs as well (#1541) 2025-10-03 12:17:53 -07:00
61 changed files with 11162 additions and 1417 deletions

View File

@@ -32,4 +32,4 @@ jobs:
env:
DATABASE_URL: ${{ github.ref == 'refs/heads/main' && secrets.DATABASE_URL || secrets.STAGING_DATABASE_URL }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: bun run scripts/process-docs-embeddings.ts --clear
run: bun run scripts/process-docs.ts --clear

View File

@@ -233,7 +233,7 @@ describe('Copilot Chat API Route', () => {
model: 'claude-4.5-sonnet',
mode: 'agent',
messageId: 'mock-uuid-1234-5678',
version: '1.0.0',
version: '1.0.1',
chatId: 'chat-123',
}),
})
@@ -303,7 +303,7 @@ describe('Copilot Chat API Route', () => {
model: 'claude-4.5-sonnet',
mode: 'agent',
messageId: 'mock-uuid-1234-5678',
version: '1.0.0',
version: '1.0.1',
chatId: 'chat-123',
}),
})
@@ -361,7 +361,7 @@ describe('Copilot Chat API Route', () => {
model: 'claude-4.5-sonnet',
mode: 'agent',
messageId: 'mock-uuid-1234-5678',
version: '1.0.0',
version: '1.0.1',
chatId: 'chat-123',
}),
})
@@ -453,7 +453,7 @@ describe('Copilot Chat API Route', () => {
model: 'claude-4.5-sonnet',
mode: 'ask',
messageId: 'mock-uuid-1234-5678',
version: '1.0.0',
version: '1.0.1',
chatId: 'chat-123',
}),
})

View File

@@ -0,0 +1,131 @@
import { eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { auth } from '@/lib/auth'
import { createLogger } from '@/lib/logs/console/logger'
import { db } from '@/../../packages/db'
import { settings } from '@/../../packages/db/schema'
const logger = createLogger('CopilotUserModelsAPI')
const DEFAULT_ENABLED_MODELS: Record<string, boolean> = {
'gpt-4o': false,
'gpt-4.1': false,
'gpt-5-fast': false,
'gpt-5': true,
'gpt-5-medium': true,
'gpt-5-high': false,
o3: true,
'claude-4-sonnet': true,
'claude-4.5-sonnet': true,
'claude-4.1-opus': true,
}
// GET - Fetch user's enabled models
export async function GET(request: NextRequest) {
try {
const session = await auth.api.getSession({ headers: request.headers })
if (!session?.user?.id) {
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
const userId = session.user.id
// Try to fetch existing settings record
const [userSettings] = await db
.select()
.from(settings)
.where(eq(settings.userId, userId))
.limit(1)
if (userSettings) {
const userModelsMap = (userSettings.copilotEnabledModels as Record<string, boolean>) || {}
// Merge: start with defaults, then override with user's existing preferences
const mergedModels = { ...DEFAULT_ENABLED_MODELS }
for (const [modelId, enabled] of Object.entries(userModelsMap)) {
mergedModels[modelId] = enabled
}
// If we added any new models, update the database
const hasNewModels = Object.keys(DEFAULT_ENABLED_MODELS).some(
(key) => !(key in userModelsMap)
)
if (hasNewModels) {
await db
.update(settings)
.set({
copilotEnabledModels: mergedModels,
updatedAt: new Date(),
})
.where(eq(settings.userId, userId))
}
return NextResponse.json({
enabledModels: mergedModels,
})
}
// If no settings record exists, create one with empty object (client will use defaults)
const [created] = await db
.insert(settings)
.values({
id: userId,
userId,
copilotEnabledModels: {},
})
.returning()
return NextResponse.json({
enabledModels: DEFAULT_ENABLED_MODELS,
})
} catch (error) {
logger.error('Failed to fetch user models', { error })
return NextResponse.json({ error: 'Internal server error' }, { status: 500 })
}
}
// PUT - Update user's enabled models
export async function PUT(request: NextRequest) {
try {
const session = await auth.api.getSession({ headers: request.headers })
if (!session?.user?.id) {
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
const userId = session.user.id
const body = await request.json()
if (!body.enabledModels || typeof body.enabledModels !== 'object') {
return NextResponse.json({ error: 'enabledModels must be an object' }, { status: 400 })
}
// Check if settings record exists
const [existing] = await db.select().from(settings).where(eq(settings.userId, userId)).limit(1)
if (existing) {
// Update existing record
await db
.update(settings)
.set({
copilotEnabledModels: body.enabledModels,
updatedAt: new Date(),
})
.where(eq(settings.userId, userId))
} else {
// Create new settings record
await db.insert(settings).values({
id: userId,
userId,
copilotEnabledModels: body.enabledModels,
})
}
return NextResponse.json({ success: true })
} catch (error) {
logger.error('Failed to update user models', { error })
return NextResponse.json({ error: 'Internal server error' }, { status: 500 })
}
}

View File

@@ -265,9 +265,8 @@ async function handleS3PresignedUrl(
)
}
// For chat images, knowledge base files, and profile pictures, use direct URLs since they need to be accessible by external services
const finalPath =
uploadType === 'chat' || uploadType === 'knowledge-base' || uploadType === 'profile-pictures'
uploadType === 'chat' || uploadType === 'profile-pictures'
? `https://${config.bucket}.s3.${config.region}.amazonaws.com/${uniqueKey}`
: `/api/files/serve/s3/${encodeURIComponent(uniqueKey)}`

View File

@@ -7,6 +7,7 @@ import { getSession } from '@/lib/auth'
import { createLogger } from '@/lib/logs/console/logger'
import { getUserEntityPermissions } from '@/lib/permissions/utils'
import { generateRequestId } from '@/lib/utils'
import { extractAndPersistCustomTools } from '@/lib/workflows/custom-tools-persistence'
import { saveWorkflowToNormalizedTables } from '@/lib/workflows/db-helpers'
import { sanitizeAgentToolsInBlocks } from '@/lib/workflows/validation'
@@ -207,6 +208,21 @@ export async function PUT(request: NextRequest, { params }: { params: Promise<{
)
}
// Extract and persist custom tools to database
try {
const { saved, errors } = await extractAndPersistCustomTools(workflowState, userId)
if (saved > 0) {
logger.info(`[${requestId}] Persisted ${saved} custom tool(s) to database`, { workflowId })
}
if (errors.length > 0) {
logger.warn(`[${requestId}] Some custom tools failed to persist`, { errors, workflowId })
}
} catch (error) {
logger.error(`[${requestId}] Failed to persist custom tools`, { error, workflowId })
}
// Update workflow's lastSynced timestamp
await db
.update(workflow)

View File

@@ -312,7 +312,7 @@ export function EditChunkModal({
<Button
onClick={handleSaveContent}
disabled={!isFormValid || isSaving || !hasUnsavedChanges || isNavigating}
className='bg-[var(--brand-primary-hex)] font-[480] text-muted-foreground shadow-[0_0_0_0_var(--brand-primary-hex)] transition-all duration-200 hover:bg-[var(--brand-primary-hover-hex)] hover:shadow-[0_0_0_4px_rgba(127,47,255,0.15)]'
className='bg-[var(--brand-primary-hex)] font-[480] text-white shadow-[0_0_0_0_var(--brand-primary-hex)] transition-all duration-200 hover:bg-[var(--brand-primary-hover-hex)] hover:shadow-[0_0_0_4px_rgba(127,47,255,0.15)]'
>
{isSaving ? (
<>

View File

@@ -64,7 +64,7 @@ export function UploadModal({
return `File "${file.name}" is too large. Maximum size is 100MB.`
}
if (!ACCEPTED_FILE_TYPES.includes(file.type)) {
return `File "${file.name}" has an unsupported format. Please use PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, or HTML files.`
return `File "${file.name}" has an unsupported format. Please use PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML, JSON, YAML, or YML files.`
}
return null
}
@@ -193,8 +193,8 @@ export function UploadModal({
{isDragging ? 'Drop files here!' : 'Drop files here or click to browse'}
</p>
<p className='text-muted-foreground text-xs'>
Supports PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML (max 100MB
each)
Supports PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML, JSON, YAML,
YML (max 100MB each)
</p>
</div>
</div>

View File

@@ -158,7 +158,7 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea
// Check file type
if (!ACCEPTED_FILE_TYPES.includes(file.type)) {
setFileError(
`File ${file.name} has an unsupported format. Please use PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, or HTML.`
`File ${file.name} has an unsupported format. Please use PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML, JSON, YAML, or YML.`
)
hasError = true
continue
@@ -501,8 +501,8 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea
: 'Drop files here or click to browse'}
</p>
<p className='text-muted-foreground text-xs'>
Supports PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML (max
100MB each)
Supports PDF, DOC, DOCX, TXT, CSV, XLS, XLSX, MD, PPT, PPTX, HTML,
JSON, YAML, YML (max 100MB each)
</p>
</div>
</div>

View File

@@ -84,16 +84,200 @@ class ProcessingError extends KnowledgeUploadError {
}
const UPLOAD_CONFIG = {
BATCH_SIZE: 15, // Upload files in parallel - this is fast and not the bottleneck
MAX_RETRIES: 3, // Standard retry count
RETRY_DELAY: 2000, // Initial retry delay in ms (2 seconds)
RETRY_MULTIPLIER: 2, // Standard exponential backoff (2s, 4s, 8s)
CHUNK_SIZE: 5 * 1024 * 1024,
DIRECT_UPLOAD_THRESHOLD: 4 * 1024 * 1024, // Files > 4MB must use presigned URLs
LARGE_FILE_THRESHOLD: 50 * 1024 * 1024, // Files > 50MB need multipart upload
UPLOAD_TIMEOUT: 60000, // 60 second timeout per upload
MAX_PARALLEL_UPLOADS: 3, // Prevent client saturation mirrors guidance on limiting simultaneous transfers (@Web)
MAX_RETRIES: 3,
RETRY_DELAY_MS: 2000,
RETRY_BACKOFF: 2,
CHUNK_SIZE: 8 * 1024 * 1024, // 8MB keeps us well above S3 minimum part size while reducing part count (@Web)
DIRECT_UPLOAD_THRESHOLD: 4 * 1024 * 1024,
LARGE_FILE_THRESHOLD: 50 * 1024 * 1024,
BASE_TIMEOUT_MS: 2 * 60 * 1000, // baseline per transfer window per large-file guidance (@Web)
TIMEOUT_PER_MB_MS: 1500,
MAX_TIMEOUT_MS: 10 * 60 * 1000,
MULTIPART_PART_CONCURRENCY: 3,
MULTIPART_MAX_RETRIES: 3,
BATCH_REQUEST_SIZE: 50,
} as const
const calculateUploadTimeoutMs = (fileSize: number) => {
const sizeInMb = fileSize / (1024 * 1024)
const dynamicBudget = UPLOAD_CONFIG.BASE_TIMEOUT_MS + sizeInMb * UPLOAD_CONFIG.TIMEOUT_PER_MB_MS
return Math.min(dynamicBudget, UPLOAD_CONFIG.MAX_TIMEOUT_MS)
}
const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms))
const getHighResTime = () =>
typeof performance !== 'undefined' && typeof performance.now === 'function'
? performance.now()
: Date.now()
const formatMegabytes = (bytes: number) => Number((bytes / (1024 * 1024)).toFixed(2))
const calculateThroughputMbps = (bytes: number, durationMs: number) => {
if (!bytes || !durationMs) return 0
return Number((((bytes * 8) / durationMs) * 0.001).toFixed(2))
}
const formatDurationSeconds = (durationMs: number) => Number((durationMs / 1000).toFixed(2))
const runWithConcurrency = async <T, R>(
items: T[],
limit: number,
worker: (item: T, index: number) => Promise<R>
): Promise<Array<PromiseSettledResult<R>>> => {
const results: Array<PromiseSettledResult<R>> = Array(items.length)
if (items.length === 0) {
return results
}
const concurrency = Math.max(1, Math.min(limit, items.length))
let nextIndex = 0
const runners = Array.from({ length: concurrency }, async () => {
while (true) {
const currentIndex = nextIndex++
if (currentIndex >= items.length) {
break
}
try {
const value = await worker(items[currentIndex], currentIndex)
results[currentIndex] = { status: 'fulfilled', value }
} catch (error) {
results[currentIndex] = { status: 'rejected', reason: error }
}
}
})
await Promise.all(runners)
return results
}
const getErrorName = (error: unknown) =>
typeof error === 'object' && error !== null && 'name' in error ? String((error as any).name) : ''
const getErrorMessage = (error: unknown) =>
error instanceof Error ? error.message : typeof error === 'string' ? error : 'Unknown error'
const isAbortError = (error: unknown) => getErrorName(error) === 'AbortError'
const isNetworkError = (error: unknown) => {
if (!(error instanceof Error)) {
return false
}
const message = error.message.toLowerCase()
return (
message.includes('network') ||
message.includes('fetch') ||
message.includes('connection') ||
message.includes('timeout') ||
message.includes('timed out') ||
message.includes('ecconnreset')
)
}
interface PresignedFileInfo {
path: string
key: string
name: string
size: number
type: string
}
interface PresignedUploadInfo {
fileName: string
presignedUrl: string
fileInfo: PresignedFileInfo
uploadHeaders?: Record<string, string>
directUploadSupported: boolean
presignedUrls?: any
}
const normalizePresignedData = (data: any, context: string): PresignedUploadInfo => {
const presignedUrl = data?.presignedUrl || data?.uploadUrl
const fileInfo = data?.fileInfo
if (!presignedUrl || !fileInfo?.path) {
throw new PresignedUrlError(`Invalid presigned response for ${context}`, data)
}
return {
fileName: data.fileName || fileInfo.name || context,
presignedUrl,
fileInfo: {
path: fileInfo.path,
key: fileInfo.key,
name: fileInfo.name || context,
size: fileInfo.size || data.fileSize || 0,
type: fileInfo.type || data.contentType || '',
},
uploadHeaders: data.uploadHeaders || undefined,
directUploadSupported: data.directUploadSupported !== false,
presignedUrls: data.presignedUrls,
}
}
const getPresignedData = async (
file: File,
timeoutMs: number,
controller?: AbortController
): Promise<PresignedUploadInfo> => {
const localController = controller ?? new AbortController()
const timeoutId = setTimeout(() => localController.abort(), timeoutMs)
const startTime = getHighResTime()
try {
const presignedResponse = await fetch('/api/files/presigned?type=knowledge-base', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
fileName: file.name,
contentType: file.type,
fileSize: file.size,
}),
signal: localController.signal,
})
if (!presignedResponse.ok) {
let errorDetails: any = null
try {
errorDetails = await presignedResponse.json()
} catch {
// Ignore JSON parsing errors (@Web)
}
logger.error('Presigned URL request failed', {
status: presignedResponse.status,
fileSize: file.size,
})
throw new PresignedUrlError(
`Failed to get presigned URL for ${file.name}: ${presignedResponse.status} ${presignedResponse.statusText}`,
errorDetails
)
}
const presignedData = await presignedResponse.json()
const durationMs = getHighResTime() - startTime
logger.info('Fetched presigned URL', {
fileName: file.name,
sizeMB: formatMegabytes(file.size),
durationMs: formatDurationSeconds(durationMs),
})
return normalizePresignedData(presignedData, file.name)
} finally {
clearTimeout(timeoutId)
if (!controller) {
localController.abort()
}
}
}
export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
const [isUploading, setIsUploading] = useState(false)
const [uploadProgress, setUploadProgress] = useState<UploadProgress>({
@@ -153,85 +337,51 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
const uploadSingleFileWithRetry = async (
file: File,
retryCount = 0,
fileIndex?: number
fileIndex?: number,
presignedOverride?: PresignedUploadInfo
): Promise<UploadedFile> => {
const timeoutMs = calculateUploadTimeoutMs(file.size)
let presignedData: PresignedUploadInfo | undefined
const attempt = retryCount + 1
logger.info('Upload attempt started', {
fileName: file.name,
attempt,
sizeMB: formatMegabytes(file.size),
timeoutMs: formatDurationSeconds(timeoutMs),
})
try {
// Create abort controller for timeout
const controller = new AbortController()
const timeoutId = setTimeout(() => controller.abort(), UPLOAD_CONFIG.UPLOAD_TIMEOUT)
const timeoutId = setTimeout(() => controller.abort(), timeoutMs)
try {
// Get presigned URL
const presignedResponse = await fetch('/api/files/presigned?type=knowledge-base', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
fileName: file.name,
contentType: file.type,
fileSize: file.size,
}),
signal: controller.signal,
})
clearTimeout(timeoutId)
if (!presignedResponse.ok) {
let errorDetails: any = null
try {
errorDetails = await presignedResponse.json()
} catch {
// Ignore JSON parsing errors
}
logger.error('Presigned URL request failed', {
status: presignedResponse.status,
fileSize: file.size,
retryCount,
})
throw new PresignedUrlError(
`Failed to get presigned URL for ${file.name}: ${presignedResponse.status} ${presignedResponse.statusText}`,
errorDetails
)
}
const presignedData = await presignedResponse.json()
presignedData = presignedOverride ?? (await getPresignedData(file, timeoutMs, controller))
if (presignedData.directUploadSupported) {
// Use presigned URLs for all uploads when cloud storage is available
// Check if file needs multipart upload for large files
if (file.size > UPLOAD_CONFIG.LARGE_FILE_THRESHOLD) {
return await uploadFileInChunks(file, presignedData)
return await uploadFileInChunks(file, presignedData, timeoutMs, fileIndex)
}
return await uploadFileDirectly(file, presignedData, fileIndex)
return await uploadFileDirectly(file, presignedData, timeoutMs, controller, fileIndex)
}
// Fallback to traditional upload through API route
// This is only used when cloud storage is not configured
// Must check file size due to Vercel's 4.5MB limit
if (file.size > UPLOAD_CONFIG.DIRECT_UPLOAD_THRESHOLD) {
throw new DirectUploadError(
`File ${file.name} is too large (${(file.size / 1024 / 1024).toFixed(2)}MB) for upload. Cloud storage must be configured for files over 4MB.`,
{ fileSize: file.size, limit: UPLOAD_CONFIG.DIRECT_UPLOAD_THRESHOLD }
)
}
logger.warn(`Using API upload fallback for ${file.name} - cloud storage not configured`)
return await uploadFileThroughAPI(file)
return await uploadFileThroughAPI(file, timeoutMs)
} finally {
clearTimeout(timeoutId)
}
} catch (error) {
const isTimeout = error instanceof Error && error.name === 'AbortError'
const isNetwork =
error instanceof Error &&
(error.message.includes('fetch') ||
error.message.includes('network') ||
error.message.includes('Failed to fetch'))
const isTimeout = isAbortError(error)
const isNetwork = isNetworkError(error)
// Retry logic
if (retryCount < UPLOAD_CONFIG.MAX_RETRIES) {
const delay = UPLOAD_CONFIG.RETRY_DELAY * UPLOAD_CONFIG.RETRY_MULTIPLIER ** retryCount // More aggressive exponential backoff
const delay = UPLOAD_CONFIG.RETRY_DELAY_MS * UPLOAD_CONFIG.RETRY_BACKOFF ** retryCount // More aggressive exponential backoff (@Web)
if (isTimeout || isNetwork) {
logger.warn(
`Upload failed (${isTimeout ? 'timeout' : 'network'}), retrying in ${delay / 1000}s...`,
@@ -243,7 +393,6 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
)
}
// Reset progress to 0 before retry to indicate restart
if (fileIndex !== undefined) {
setUploadProgress((prev) => ({
...prev,
@@ -253,8 +402,14 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
}))
}
await new Promise((resolve) => setTimeout(resolve, delay))
return uploadSingleFileWithRetry(file, retryCount + 1, fileIndex)
await sleep(delay)
const shouldReusePresigned = (isTimeout || isNetwork) && presignedData
return uploadSingleFileWithRetry(
file,
retryCount + 1,
fileIndex,
shouldReusePresigned ? presignedData : undefined
)
}
logger.error('Upload failed after retries', {
@@ -271,12 +426,15 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
*/
const uploadFileDirectly = async (
file: File,
presignedData: any,
presignedData: PresignedUploadInfo,
timeoutMs: number,
outerController: AbortController,
fileIndex?: number
): Promise<UploadedFile> => {
return new Promise((resolve, reject) => {
const xhr = new XMLHttpRequest()
let isCompleted = false // Track if this upload has completed to prevent duplicate state updates
let isCompleted = false
const startTime = getHighResTime()
const timeoutId = setTimeout(() => {
if (!isCompleted) {
@@ -284,7 +442,18 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
xhr.abort()
reject(new Error('Upload timeout'))
}
}, UPLOAD_CONFIG.UPLOAD_TIMEOUT)
}, timeoutMs)
const abortHandler = () => {
if (!isCompleted) {
isCompleted = true
clearTimeout(timeoutId)
xhr.abort()
reject(new DirectUploadError(`Upload aborted for ${file.name}`, {}))
}
}
outerController.signal.addEventListener('abort', abortHandler)
// Track upload progress
xhr.upload.addEventListener('progress', (event) => {
@@ -309,10 +478,19 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
if (!isCompleted) {
isCompleted = true
clearTimeout(timeoutId)
outerController.signal.removeEventListener('abort', abortHandler)
const durationMs = getHighResTime() - startTime
if (xhr.status >= 200 && xhr.status < 300) {
const fullFileUrl = presignedData.fileInfo.path.startsWith('http')
? presignedData.fileInfo.path
: `${window.location.origin}${presignedData.fileInfo.path}`
logger.info('Direct upload completed', {
fileName: file.name,
sizeMB: formatMegabytes(file.size),
durationMs: formatDurationSeconds(durationMs),
throughputMbps: calculateThroughputMbps(file.size, durationMs),
status: xhr.status,
})
resolve(createUploadedFile(file.name, fullFileUrl, file.size, file.type, file))
} else {
logger.error('S3 PUT request failed', {
@@ -335,17 +513,18 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
if (!isCompleted) {
isCompleted = true
clearTimeout(timeoutId)
outerController.signal.removeEventListener('abort', abortHandler)
const durationMs = getHighResTime() - startTime
logger.error('Direct upload network error', {
fileName: file.name,
sizeMB: formatMegabytes(file.size),
durationMs: formatDurationSeconds(durationMs),
})
reject(new DirectUploadError(`Network error uploading ${file.name}`, {}))
}
})
xhr.addEventListener('abort', () => {
if (!isCompleted) {
isCompleted = true
clearTimeout(timeoutId)
reject(new DirectUploadError(`Upload aborted for ${file.name}`, {}))
}
})
xhr.addEventListener('abort', abortHandler)
// Start the upload
xhr.open('PUT', presignedData.presignedUrl)
@@ -365,10 +544,16 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
/**
* Upload large file in chunks (multipart upload)
*/
const uploadFileInChunks = async (file: File, presignedData: any): Promise<UploadedFile> => {
const uploadFileInChunks = async (
file: File,
presignedData: PresignedUploadInfo,
timeoutMs: number,
fileIndex?: number
): Promise<UploadedFile> => {
logger.info(
`Uploading large file ${file.name} (${(file.size / 1024 / 1024).toFixed(2)}MB) using multipart upload`
)
const startTime = getHighResTime()
try {
// Step 1: Initiate multipart upload
@@ -419,37 +604,76 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
// Step 4: Upload parts in parallel (batch them to avoid overwhelming the browser)
const uploadedParts: Array<{ ETag: string; PartNumber: number }> = []
const PARALLEL_UPLOADS = 3 // Upload 3 parts at a time
for (let i = 0; i < presignedUrls.length; i += PARALLEL_UPLOADS) {
const batch = presignedUrls.slice(i, i + PARALLEL_UPLOADS)
const controller = new AbortController()
const multipartTimeoutId = setTimeout(() => controller.abort(), timeoutMs)
const batchPromises = batch.map(async ({ partNumber, url }: any) => {
try {
const uploadPart = async ({ partNumber, url }: any) => {
const start = (partNumber - 1) * chunkSize
const end = Math.min(start + chunkSize, file.size)
const chunk = file.slice(start, end)
const uploadResponse = await fetch(url, {
method: 'PUT',
body: chunk,
headers: {
'Content-Type': file.type,
},
})
for (let attempt = 0; attempt <= UPLOAD_CONFIG.MULTIPART_MAX_RETRIES; attempt++) {
try {
const partResponse = await fetch(url, {
method: 'PUT',
body: chunk,
signal: controller.signal,
headers: {
'Content-Type': file.type,
},
})
if (!uploadResponse.ok) {
throw new Error(`Failed to upload part ${partNumber}: ${uploadResponse.statusText}`)
if (!partResponse.ok) {
throw new Error(`Failed to upload part ${partNumber}: ${partResponse.statusText}`)
}
const etag = partResponse.headers.get('ETag') || ''
logger.info(`Uploaded part ${partNumber}/${numParts}`)
if (fileIndex !== undefined) {
const partProgress = Math.min(100, Math.round((partNumber / numParts) * 100))
setUploadProgress((prev) => ({
...prev,
fileStatuses: prev.fileStatuses?.map((fs, idx) =>
idx === fileIndex ? { ...fs, progress: partProgress } : fs
),
}))
}
return { ETag: etag.replace(/"/g, ''), PartNumber: partNumber }
} catch (partError) {
if (attempt >= UPLOAD_CONFIG.MULTIPART_MAX_RETRIES) {
throw partError
}
const delay = UPLOAD_CONFIG.RETRY_DELAY_MS * UPLOAD_CONFIG.RETRY_BACKOFF ** attempt
logger.warn(
`Part ${partNumber} failed (attempt ${attempt + 1}), retrying in ${Math.round(delay / 1000)}s`
)
await sleep(delay)
}
}
// Get ETag from response headers
const etag = uploadResponse.headers.get('ETag') || ''
logger.info(`Uploaded part ${partNumber}/${numParts}`)
throw new Error(`Retries exhausted for part ${partNumber}`)
}
return { ETag: etag.replace(/"/g, ''), PartNumber: partNumber }
const partResults = await runWithConcurrency(
presignedUrls,
UPLOAD_CONFIG.MULTIPART_PART_CONCURRENCY,
uploadPart
)
partResults.forEach((result) => {
if (result?.status === 'fulfilled') {
uploadedParts.push(result.value)
} else if (result?.status === 'rejected') {
throw result.reason
}
})
const batchResults = await Promise.all(batchPromises)
uploadedParts.push(...batchResults)
} finally {
clearTimeout(multipartTimeoutId)
}
// Step 5: Complete multipart upload
@@ -470,23 +694,37 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
const { path } = await completeResponse.json()
logger.info(`Completed multipart upload for ${file.name}`)
const durationMs = getHighResTime() - startTime
logger.info('Multipart upload metrics', {
fileName: file.name,
sizeMB: formatMegabytes(file.size),
parts: uploadedParts.length,
durationMs: formatDurationSeconds(durationMs),
throughputMbps: calculateThroughputMbps(file.size, durationMs),
})
const fullFileUrl = path.startsWith('http') ? path : `${window.location.origin}${path}`
return createUploadedFile(file.name, fullFileUrl, file.size, file.type, file)
} catch (error) {
logger.error(`Multipart upload failed for ${file.name}:`, error)
const durationMs = getHighResTime() - startTime
logger.warn('Falling back to direct upload after multipart failure', {
fileName: file.name,
sizeMB: formatMegabytes(file.size),
durationMs: formatDurationSeconds(durationMs),
})
// Fall back to direct upload if multipart fails
logger.info('Falling back to direct upload')
return uploadFileDirectly(file, presignedData)
return uploadFileDirectly(file, presignedData, timeoutMs, new AbortController(), fileIndex)
}
}
/**
* Fallback upload through API
*/
const uploadFileThroughAPI = async (file: File): Promise<UploadedFile> => {
const uploadFileThroughAPI = async (file: File, timeoutMs: number): Promise<UploadedFile> => {
const controller = new AbortController()
const timeoutId = setTimeout(() => controller.abort(), UPLOAD_CONFIG.UPLOAD_TIMEOUT)
const timeoutId = setTimeout(() => controller.abort(), timeoutMs)
try {
const formData = new FormData()
@@ -559,19 +797,20 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
logger.info(`Starting batch upload of ${files.length} files`)
try {
const BATCH_SIZE = 100 // Process 100 files at a time
const batches = []
// Create all batches
for (let batchStart = 0; batchStart < files.length; batchStart += BATCH_SIZE) {
const batchFiles = files.slice(batchStart, batchStart + BATCH_SIZE)
for (
let batchStart = 0;
batchStart < files.length;
batchStart += UPLOAD_CONFIG.BATCH_REQUEST_SIZE
) {
const batchFiles = files.slice(batchStart, batchStart + UPLOAD_CONFIG.BATCH_REQUEST_SIZE)
const batchIndexOffset = batchStart
batches.push({ batchFiles, batchIndexOffset })
}
logger.info(`Starting parallel processing of ${batches.length} batches`)
// Step 1: Get ALL presigned URLs in parallel
const presignedPromises = batches.map(async ({ batchFiles }, batchIndex) => {
logger.info(
`Getting presigned URLs for batch ${batchIndex + 1}/${batches.length} (${batchFiles.length} files)`
@@ -604,9 +843,8 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
const allPresignedData = await Promise.all(presignedPromises)
logger.info(`Got all presigned URLs, starting uploads`)
// Step 2: Upload all files with global concurrency control
const allUploads = allPresignedData.flatMap(({ batchFiles, presignedData, batchIndex }) => {
const batchIndexOffset = batchIndex * BATCH_SIZE
const batchIndexOffset = batchIndex * UPLOAD_CONFIG.BATCH_REQUEST_SIZE
return batchFiles.map((file, batchFileIndex) => {
const fileIndex = batchIndexOffset + batchFileIndex
@@ -616,16 +854,14 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
})
})
// Process all uploads with concurrency control
for (let i = 0; i < allUploads.length; i += UPLOAD_CONFIG.BATCH_SIZE) {
const concurrentBatch = allUploads.slice(i, i + UPLOAD_CONFIG.BATCH_SIZE)
const uploadPromises = concurrentBatch.map(async ({ file, presigned, fileIndex }) => {
const uploadResults = await runWithConcurrency(
allUploads,
UPLOAD_CONFIG.MAX_PARALLEL_UPLOADS,
async ({ file, presigned, fileIndex }) => {
if (!presigned) {
throw new Error(`No presigned data for file ${file.name}`)
}
// Mark as uploading
setUploadProgress((prev) => ({
...prev,
fileStatuses: prev.fileStatuses?.map((fs, idx) =>
@@ -634,10 +870,8 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
}))
try {
// Upload directly to storage
const result = await uploadFileDirectly(file, presigned, fileIndex)
const result = await uploadSingleFileWithRetry(file, 0, fileIndex, presigned)
// Mark as completed
setUploadProgress((prev) => ({
...prev,
filesCompleted: prev.filesCompleted + 1,
@@ -648,7 +882,6 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
return result
} catch (error) {
// Mark as failed
setUploadProgress((prev) => ({
...prev,
fileStatuses: prev.fileStatuses?.map((fs, idx) =>
@@ -656,30 +889,27 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
? {
...fs,
status: 'failed' as const,
error: error instanceof Error ? error.message : 'Upload failed',
error: getErrorMessage(error),
}
: fs
),
}))
throw error
}
})
const batchResults = await Promise.allSettled(uploadPromises)
for (let j = 0; j < batchResults.length; j++) {
const result = batchResults[j]
if (result.status === 'fulfilled') {
results.push(result.value)
} else {
failedFiles.push({
file: concurrentBatch[j].file,
error:
result.reason instanceof Error ? result.reason : new Error(String(result.reason)),
})
}
}
}
)
uploadResults.forEach((result, idx) => {
if (result?.status === 'fulfilled') {
results.push(result.value)
} else if (result?.status === 'rejected') {
failedFiles.push({
file: allUploads[idx].file,
error:
result.reason instanceof Error ? result.reason : new Error(String(result.reason)),
})
}
})
if (failedFiles.length > 0) {
logger.error(`Failed to upload ${failedFiles.length} files`)

View File

@@ -3,6 +3,7 @@
import {
forwardRef,
type KeyboardEvent,
useCallback,
useEffect,
useImperativeHandle,
useRef,
@@ -41,7 +42,6 @@ import {
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuTrigger,
Switch,
Textarea,
Tooltip,
TooltipContent,
@@ -49,6 +49,7 @@ import {
TooltipTrigger,
} from '@/components/ui'
import { useSession } from '@/lib/auth-client'
import { isHosted } from '@/lib/environment'
import { createLogger } from '@/lib/logs/console/logger'
import { cn } from '@/lib/utils'
import { useCopilotStore } from '@/stores/copilot/store'
@@ -92,6 +93,7 @@ interface UserInputProps {
onModeChange?: (mode: 'ask' | 'agent') => void
value?: string // Controlled value from outside
onChange?: (value: string) => void // Callback when value changes
panelWidth?: number // Panel width to adjust truncation
}
interface UserInputRef {
@@ -112,6 +114,7 @@ const UserInput = forwardRef<UserInputRef, UserInputProps>(
onModeChange,
value: controlledValue,
onChange: onControlledChange,
panelWidth = 308,
},
ref
) => {
@@ -179,7 +182,7 @@ const UserInput = forwardRef<UserInputRef, UserInputProps>(
const [isLoadingLogs, setIsLoadingLogs] = useState(false)
const { data: session } = useSession()
const { currentChat, workflowId } = useCopilotStore()
const { currentChat, workflowId, enabledModels, setEnabledModels } = useCopilotStore()
const params = useParams()
const workspaceId = params.workspaceId as string
// Track per-chat preference for auto-adding workflow context
@@ -224,6 +227,30 @@ const UserInput = forwardRef<UserInputRef, UserInputProps>(
}
}, [workflowId])
// Fetch enabled models when dropdown is opened for the first time
const fetchEnabledModelsOnce = useCallback(async () => {
if (!isHosted) return
if (enabledModels !== null) return // Already loaded
try {
const res = await fetch('/api/copilot/user-models')
if (!res.ok) {
logger.error('Failed to fetch enabled models')
return
}
const data = await res.json()
const modelsMap = data.enabledModels || {}
// Convert to array for store (API already merged with defaults)
const enabledArray = Object.entries(modelsMap)
.filter(([_, enabled]) => enabled)
.map(([modelId]) => modelId)
setEnabledModels(enabledArray)
} catch (error) {
logger.error('Error fetching enabled models', { error })
}
}, [enabledModels, setEnabledModels])
// Track the last chat ID we've seen to detect chat changes
const [lastChatId, setLastChatId] = useState<string | undefined>(undefined)
// Track if we just sent a message to avoid re-adding context after submit
@@ -1780,7 +1807,7 @@ const UserInput = forwardRef<UserInputRef, UserInputProps>(
const { selectedModel, agentPrefetch, setSelectedModel, setAgentPrefetch } = useCopilotStore()
// Model configurations with their display names
const modelOptions = [
const allModelOptions = [
{ value: 'gpt-5-fast', label: 'gpt-5-fast' },
{ value: 'gpt-5', label: 'gpt-5' },
{ value: 'gpt-5-medium', label: 'gpt-5-medium' },
@@ -1793,23 +1820,36 @@ const UserInput = forwardRef<UserInputRef, UserInputProps>(
{ value: 'claude-4.1-opus', label: 'claude-4.1-opus' },
] as const
// Filter models based on user preferences (only for hosted)
const modelOptions =
isHosted && enabledModels !== null
? allModelOptions.filter((model) => enabledModels.includes(model.value))
: allModelOptions
const getCollapsedModeLabel = () => {
const model = modelOptions.find((m) => m.value === selectedModel)
return model ? model.label : 'Claude 4.5 Sonnet'
return model ? model.label : 'claude-4.5-sonnet'
}
const getModelIcon = () => {
const colorClass = !agentPrefetch
? 'text-[var(--brand-primary-hover-hex)]'
: 'text-muted-foreground'
// Only Brain and BrainCircuit models show purple when agentPrefetch is false
const isBrainModel = [
'gpt-5',
'gpt-5-medium',
'claude-4-sonnet',
'claude-4.5-sonnet',
].includes(selectedModel)
const isBrainCircuitModel = ['gpt-5-high', 'o3', 'claude-4.1-opus'].includes(selectedModel)
const colorClass =
(isBrainModel || isBrainCircuitModel) && !agentPrefetch
? 'text-[var(--brand-primary-hover-hex)]'
: 'text-muted-foreground'
// Match the dropdown icon logic exactly
if (['gpt-5-high', 'o3', 'claude-4.1-opus'].includes(selectedModel)) {
if (isBrainCircuitModel) {
return <BrainCircuit className={`h-3 w-3 ${colorClass}`} />
}
if (
['gpt-5', 'gpt-5-medium', 'claude-4-sonnet', 'claude-4.5-sonnet'].includes(selectedModel)
) {
if (isBrainModel) {
return <Brain className={`h-3 w-3 ${colorClass}`} />
}
if (['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(selectedModel)) {
@@ -3068,7 +3108,7 @@ const UserInput = forwardRef<UserInputRef, UserInputProps>(
variant='ghost'
size='sm'
disabled={!onModeChange}
className='flex h-6 items-center gap-1.5 rounded-full border px-2 py-1 font-medium text-xs'
className='flex h-6 items-center gap-1.5 rounded-full border px-2 py-1 font-medium text-xs focus-visible:ring-0 focus-visible:ring-offset-0'
>
{getModeIcon()}
<span>{getModeText()}</span>
@@ -3134,191 +3174,183 @@ const UserInput = forwardRef<UserInputRef, UserInputProps>(
</TooltipProvider>
</DropdownMenuContent>
</DropdownMenu>
{
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button
variant='ghost'
size='sm'
className={cn(
'flex h-6 items-center gap-1.5 rounded-full border px-2 py-1 font-medium text-xs',
!agentPrefetch
? 'border-[var(--brand-primary-hover-hex)] text-[var(--brand-primary-hover-hex)] hover:bg-[color-mix(in_srgb,var(--brand-primary-hover-hex)_8%,transparent)] hover:text-[var(--brand-primary-hover-hex)]'
: 'border-border text-foreground'
)}
title='Choose mode'
>
{getModelIcon()}
<span>
{getCollapsedModeLabel()}
{!agentPrefetch &&
!['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(selectedModel) && (
<span className='ml-1 font-semibold'>MAX</span>
)}
</span>
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent align='start' side='top' className='max-h-[400px] p-0'>
<TooltipProvider delayDuration={100} skipDelayDuration={0}>
<div className='w-[220px]'>
<div className='p-2 pb-0'>
<div className='mb-2 flex items-center justify-between'>
<div className='flex items-center gap-1.5'>
<span className='font-medium text-xs'>MAX mode</span>
<Tooltip>
<TooltipTrigger asChild>
<button
type='button'
className='h-3.5 w-3.5 rounded text-muted-foreground transition-colors hover:text-foreground'
aria-label='MAX mode info'
>
<Info className='h-3.5 w-3.5' />
</button>
</TooltipTrigger>
<TooltipContent
side='right'
sideOffset={6}
align='center'
className='max-w-[220px] border bg-popover p-2 text-[11px] text-popover-foreground leading-snug shadow-md'
>
Significantly increases depth of reasoning
<br />
<span className='text-[10px] text-muted-foreground italic'>
Only available for advanced models
</span>
</TooltipContent>
</Tooltip>
</div>
<Switch
checked={!agentPrefetch}
disabled={['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(selectedModel)}
title={
['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(selectedModel)
? 'MAX mode is only available for advanced models'
: undefined
}
onCheckedChange={(checked) => {
if (['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(selectedModel))
return
setAgentPrefetch(!checked)
}}
/>
</div>
<div className='my-1.5 flex justify-center'>
<div className='h-px w-[100%] bg-border' />
</div>
</div>
<div className='max-h-[280px] overflow-y-auto px-2 pb-2'>
<div>
<div className='mb-1'>
<span className='font-medium text-xs'>Model</span>
</div>
<div className='space-y-2'>
{/* Helper function to get icon for a model */}
{(() => {
const getModelIcon = (modelValue: string) => {
if (
['gpt-5-high', 'o3', 'claude-4.1-opus'].includes(modelValue)
) {
return (
<BrainCircuit className='h-3 w-3 text-muted-foreground' />
)
}
if (
[
'gpt-5',
'gpt-5-medium',
'claude-4-sonnet',
'claude-4.5-sonnet',
].includes(modelValue)
) {
return <Brain className='h-3 w-3 text-muted-foreground' />
}
if (['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(modelValue)) {
return <Zap className='h-3 w-3 text-muted-foreground' />
}
return <div className='h-3 w-3' />
}
{(() => {
const isBrainModel = [
'gpt-5',
'gpt-5-medium',
'claude-4-sonnet',
'claude-4.5-sonnet',
].includes(selectedModel)
const isBrainCircuitModel = ['gpt-5-high', 'o3', 'claude-4.1-opus'].includes(
selectedModel
)
const showPurple = (isBrainModel || isBrainCircuitModel) && !agentPrefetch
const renderModelOption = (
option: (typeof modelOptions)[number]
) => (
<DropdownMenuItem
key={option.value}
onSelect={() => {
setSelectedModel(option.value)
// Automatically turn off max mode for fast models (Zap icon)
if (
['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(
option.value
) &&
!agentPrefetch
) {
setAgentPrefetch(true)
}
}}
className={cn(
'flex h-7 items-center gap-1.5 px-2 py-1 text-left text-xs',
selectedModel === option.value ? 'bg-muted/50' : ''
)}
>
{getModelIcon(option.value)}
<span>{option.label}</span>
</DropdownMenuItem>
)
return (
<DropdownMenu
onOpenChange={(open) => {
if (open) {
fetchEnabledModelsOnce()
}
}}
>
<DropdownMenuTrigger asChild>
<Button
variant='ghost'
size='sm'
className={cn(
'flex h-6 items-center gap-1.5 rounded-full border px-2 py-1 font-medium text-xs focus-visible:ring-0 focus-visible:ring-offset-0',
showPurple
? 'border-[var(--brand-primary-hover-hex)] text-[var(--brand-primary-hover-hex)] hover:bg-[color-mix(in_srgb,var(--brand-primary-hover-hex)_8%,transparent)] hover:text-[var(--brand-primary-hover-hex)]'
: 'border-border text-foreground'
)}
title='Choose mode'
>
{getModelIcon()}
<span className={cn(panelWidth < 360 ? 'max-w-[72px] truncate' : '')}>
{getCollapsedModeLabel()}
{agentPrefetch &&
!['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(selectedModel) && (
<span className='ml-1 font-semibold'>Lite</span>
)}
</span>
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent align='start' side='top' className='max-h-[400px] p-0'>
<TooltipProvider delayDuration={100} skipDelayDuration={0}>
<div className='w-[220px]'>
<div className='max-h-[280px] overflow-y-auto p-2'>
<div>
<div className='mb-1'>
<span className='font-medium text-xs'>Model</span>
</div>
<div className='space-y-2'>
{/* Helper function to get icon for a model */}
{(() => {
const getModelIcon = (modelValue: string) => {
if (
['gpt-5-high', 'o3', 'claude-4.1-opus'].includes(modelValue)
) {
return (
<BrainCircuit className='h-3 w-3 text-muted-foreground' />
)
}
if (
[
'gpt-5',
'gpt-5-medium',
'claude-4-sonnet',
'claude-4.5-sonnet',
].includes(modelValue)
) {
return <Brain className='h-3 w-3 text-muted-foreground' />
}
if (['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(modelValue)) {
return <Zap className='h-3 w-3 text-muted-foreground' />
}
return <div className='h-3 w-3' />
}
return (
<>
{/* OpenAI Models */}
<div>
<div className='px-2 py-1 font-medium text-[10px] text-muted-foreground uppercase'>
OpenAI
</div>
<div className='space-y-0.5'>
{modelOptions
.filter((option) =>
[
'gpt-5-fast',
'gpt-5',
'gpt-5-medium',
'gpt-5-high',
'gpt-4o',
'gpt-4.1',
'o3',
].includes(option.value)
)
.map(renderModelOption)}
</div>
</div>
const renderModelOption = (
option: (typeof modelOptions)[number]
) => (
<DropdownMenuItem
key={option.value}
onSelect={() => {
setSelectedModel(option.value)
// Automatically turn off Lite mode for fast models (Zap icon)
if (
['gpt-4o', 'gpt-4.1', 'gpt-5-fast'].includes(
option.value
) &&
agentPrefetch
) {
setAgentPrefetch(false)
}
}}
className={cn(
'flex h-7 items-center gap-1.5 px-2 py-1 text-left text-xs',
selectedModel === option.value ? 'bg-muted/50' : ''
)}
>
{getModelIcon(option.value)}
<span>{option.label}</span>
</DropdownMenuItem>
)
{/* Anthropic Models */}
<div>
<div className='px-2 py-1 font-medium text-[10px] text-muted-foreground uppercase'>
Anthropic
return (
<>
{/* OpenAI Models */}
<div>
<div className='px-2 py-1 font-medium text-[10px] text-muted-foreground uppercase'>
OpenAI
</div>
<div className='space-y-0.5'>
{modelOptions
.filter((option) =>
[
'gpt-5-fast',
'gpt-5',
'gpt-5-medium',
'gpt-5-high',
'gpt-4o',
'gpt-4.1',
'o3',
].includes(option.value)
)
.map(renderModelOption)}
</div>
</div>
<div className='space-y-0.5'>
{modelOptions
.filter((option) =>
[
'claude-4-sonnet',
'claude-4.5-sonnet',
'claude-4.1-opus',
].includes(option.value)
)
.map(renderModelOption)}
{/* Anthropic Models */}
<div>
<div className='px-2 py-1 font-medium text-[10px] text-muted-foreground uppercase'>
Anthropic
</div>
<div className='space-y-0.5'>
{modelOptions
.filter((option) =>
[
'claude-4-sonnet',
'claude-4.5-sonnet',
'claude-4.1-opus',
].includes(option.value)
)
.map(renderModelOption)}
</div>
</div>
</div>
</>
)
})()}
{/* More Models Button (only for hosted) */}
{isHosted && (
<div className='mt-1 border-t pt-1'>
<button
type='button'
onClick={() => {
// Dispatch event to open settings modal on copilot tab
window.dispatchEvent(
new CustomEvent('open-settings', {
detail: { tab: 'copilot' },
})
)
}}
className='w-full rounded-sm px-2 py-1.5 text-left text-muted-foreground text-xs transition-colors hover:bg-muted/50'
>
More Models...
</button>
</div>
)}
</>
)
})()}
</div>
</div>
</div>
</div>
</div>
</TooltipProvider>
</DropdownMenuContent>
</DropdownMenu>
}
</TooltipProvider>
</DropdownMenuContent>
</DropdownMenu>
)
})()}
<Button
variant='ghost'
size='icon'

View File

@@ -440,6 +440,7 @@ export const Copilot = forwardRef<CopilotRef, CopilotProps>(({ panelWidth }, ref
onModeChange={setMode}
value={inputValue}
onChange={setInputValue}
panelWidth={panelWidth}
/>
)}
</>

View File

@@ -155,60 +155,30 @@ export function CreateMenu({ onCreateWorkflow, isCreatingWorkflow = false }: Cre
workspaceId,
})
// Load the imported workflow state into stores immediately (optimistic update)
const { useWorkflowStore } = await import('@/stores/workflows/workflow/store')
const { useSubBlockStore } = await import('@/stores/workflows/subblock/store')
// Set the workflow as active in the registry to prevent reload
useWorkflowRegistry.setState({ activeWorkflowId: newWorkflowId })
// Set the workflow state immediately
useWorkflowStore.setState({
blocks: workflowData.blocks || {},
edges: workflowData.edges || [],
loops: workflowData.loops || {},
parallels: workflowData.parallels || {},
lastSaved: Date.now(),
})
// Initialize subblock store with the imported blocks
useSubBlockStore.getState().initializeFromWorkflow(newWorkflowId, workflowData.blocks || {})
// Also set subblock values if they exist in the imported data
const subBlockStore = useSubBlockStore.getState()
Object.entries(workflowData.blocks).forEach(([blockId, block]: [string, any]) => {
if (block.subBlocks) {
Object.entries(block.subBlocks).forEach(([subBlockId, subBlock]: [string, any]) => {
if (subBlock.value !== null && subBlock.value !== undefined) {
subBlockStore.setValue(blockId, subBlockId, subBlock.value)
}
})
}
})
// Navigate to the new workflow after setting state
router.push(`/workspace/${workspaceId}/w/${newWorkflowId}`)
logger.info('Workflow imported successfully from JSON')
// Save to database in the background (fire and forget)
fetch(`/api/workflows/${newWorkflowId}/state`, {
// Save workflow state to database first
const response = await fetch(`/api/workflows/${newWorkflowId}/state`, {
method: 'PUT',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(workflowData),
})
.then((response) => {
if (!response.ok) {
logger.error('Failed to persist imported workflow to database')
} else {
logger.info('Imported workflow persisted to database')
}
})
.catch((error) => {
logger.error('Failed to persist imported workflow:', error)
})
if (!response.ok) {
logger.error('Failed to persist imported workflow to database')
throw new Error('Failed to save workflow')
}
logger.info('Imported workflow persisted to database')
// Pre-load the workflow state before navigating
const { setActiveWorkflow } = useWorkflowRegistry.getState()
await setActiveWorkflow(newWorkflowId)
// Navigate to the new workflow (replace to avoid history entry)
router.replace(`/workspace/${workspaceId}/w/${newWorkflowId}`)
logger.info('Workflow imported successfully from JSON')
} catch (error) {
logger.error('Failed to import workflow:', { error })
} finally {

View File

@@ -1,5 +1,5 @@
import { useCallback, useEffect, useState } from 'react'
import { Check, Copy, Plus, Search } from 'lucide-react'
import { useCallback, useEffect, useRef, useState } from 'react'
import { Brain, BrainCircuit, Check, Copy, Plus, Zap } from 'lucide-react'
import {
AlertDialog,
AlertDialogAction,
@@ -10,11 +10,12 @@ import {
AlertDialogHeader,
AlertDialogTitle,
Button,
Input,
Label,
Skeleton,
Switch,
} from '@/components/ui'
import { isHosted } from '@/lib/environment'
import { createLogger } from '@/lib/logs/console/logger'
import { useCopilotStore } from '@/stores/copilot/store'
const logger = createLogger('CopilotSettings')
@@ -23,26 +24,78 @@ interface CopilotKey {
displayKey: string
}
interface ModelOption {
value: string
label: string
icon: 'brain' | 'brainCircuit' | 'zap'
}
const OPENAI_MODELS: ModelOption[] = [
// Zap models first
{ value: 'gpt-4o', label: 'gpt-4o', icon: 'zap' },
{ value: 'gpt-4.1', label: 'gpt-4.1', icon: 'zap' },
{ value: 'gpt-5-fast', label: 'gpt-5-fast', icon: 'zap' },
// Brain models
{ value: 'gpt-5', label: 'gpt-5', icon: 'brain' },
{ value: 'gpt-5-medium', label: 'gpt-5-medium', icon: 'brain' },
// BrainCircuit models
{ value: 'gpt-5-high', label: 'gpt-5-high', icon: 'brainCircuit' },
{ value: 'o3', label: 'o3', icon: 'brainCircuit' },
]
const ANTHROPIC_MODELS: ModelOption[] = [
// Brain models
{ value: 'claude-4-sonnet', label: 'claude-4-sonnet', icon: 'brain' },
{ value: 'claude-4.5-sonnet', label: 'claude-4.5-sonnet', icon: 'brain' },
// BrainCircuit models
{ value: 'claude-4.1-opus', label: 'claude-4.1-opus', icon: 'brainCircuit' },
]
const ALL_MODELS: ModelOption[] = [...OPENAI_MODELS, ...ANTHROPIC_MODELS]
// Default enabled/disabled state for all models
const DEFAULT_ENABLED_MODELS: Record<string, boolean> = {
'gpt-4o': false,
'gpt-4.1': false,
'gpt-5-fast': false,
'gpt-5': true,
'gpt-5-medium': true,
'gpt-5-high': false,
o3: true,
'claude-4-sonnet': true,
'claude-4.5-sonnet': true,
'claude-4.1-opus': true,
}
const getModelIcon = (iconType: 'brain' | 'brainCircuit' | 'zap') => {
switch (iconType) {
case 'brainCircuit':
return <BrainCircuit className='h-3.5 w-3.5 text-muted-foreground' />
case 'brain':
return <Brain className='h-3.5 w-3.5 text-muted-foreground' />
case 'zap':
return <Zap className='h-3.5 w-3.5 text-muted-foreground' />
}
}
export function Copilot() {
const [keys, setKeys] = useState<CopilotKey[]>([])
const [isLoading, setIsLoading] = useState(true)
const [searchTerm, setSearchTerm] = useState('')
const [enabledModelsMap, setEnabledModelsMap] = useState<Record<string, boolean>>({})
const [isModelsLoading, setIsModelsLoading] = useState(true)
const hasFetchedModels = useRef(false)
const { setEnabledModels: setStoreEnabledModels } = useCopilotStore()
// Create flow state
const [showNewKeyDialog, setShowNewKeyDialog] = useState(false)
const [newKey, setNewKey] = useState<string | null>(null)
const [isCreatingKey] = useState(false)
const [newKeyCopySuccess, setNewKeyCopySuccess] = useState(false)
// Delete flow state
const [deleteKey, setDeleteKey] = useState<CopilotKey | null>(null)
const [showDeleteDialog, setShowDeleteDialog] = useState(false)
// Filter keys based on search term (by masked display value)
const filteredKeys = keys.filter((key) =>
key.displayKey.toLowerCase().includes(searchTerm.toLowerCase())
)
const fetchKeys = useCallback(async () => {
try {
setIsLoading(true)
@@ -58,9 +111,41 @@ export function Copilot() {
}
}, [])
const fetchEnabledModels = useCallback(async () => {
if (hasFetchedModels.current) return
hasFetchedModels.current = true
try {
setIsModelsLoading(true)
const res = await fetch('/api/copilot/user-models')
if (!res.ok) throw new Error(`Failed to fetch: ${res.status}`)
const data = await res.json()
const modelsMap = data.enabledModels || DEFAULT_ENABLED_MODELS
setEnabledModelsMap(modelsMap)
// Convert to array for store (API already merged with defaults)
const enabledArray = Object.entries(modelsMap)
.filter(([_, enabled]) => enabled)
.map(([modelId]) => modelId)
setStoreEnabledModels(enabledArray)
} catch (error) {
logger.error('Failed to fetch enabled models', { error })
setEnabledModelsMap(DEFAULT_ENABLED_MODELS)
setStoreEnabledModels(
Object.keys(DEFAULT_ENABLED_MODELS).filter((key) => DEFAULT_ENABLED_MODELS[key])
)
} finally {
setIsModelsLoading(false)
}
}, [setStoreEnabledModels])
useEffect(() => {
fetchKeys()
}, [fetchKeys])
if (isHosted) {
fetchKeys()
}
fetchEnabledModels()
}, [])
const onGenerate = async () => {
try {
@@ -102,63 +187,97 @@ export function Copilot() {
}
}
const onCopy = async (value: string, keyId?: string) => {
const onCopy = async (value: string) => {
try {
await navigator.clipboard.writeText(value)
if (!keyId) {
setNewKeyCopySuccess(true)
setTimeout(() => setNewKeyCopySuccess(false), 1500)
}
setNewKeyCopySuccess(true)
setTimeout(() => setNewKeyCopySuccess(false), 1500)
} catch (error) {
logger.error('Copy failed', { error })
}
}
const toggleModel = async (modelValue: string, enabled: boolean) => {
const newModelsMap = { ...enabledModelsMap, [modelValue]: enabled }
setEnabledModelsMap(newModelsMap)
// Convert to array for store
const enabledArray = Object.entries(newModelsMap)
.filter(([_, isEnabled]) => isEnabled)
.map(([modelId]) => modelId)
setStoreEnabledModels(enabledArray)
try {
const res = await fetch('/api/copilot/user-models', {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ enabledModels: newModelsMap }),
})
if (!res.ok) {
throw new Error('Failed to update models')
}
} catch (error) {
logger.error('Failed to update enabled models', { error })
// Revert on error
setEnabledModelsMap(enabledModelsMap)
const revertedArray = Object.entries(enabledModelsMap)
.filter(([_, isEnabled]) => isEnabled)
.map(([modelId]) => modelId)
setStoreEnabledModels(revertedArray)
}
}
const enabledCount = Object.values(enabledModelsMap).filter(Boolean).length
const totalCount = ALL_MODELS.length
return (
<div className='relative flex h-full flex-col'>
{/* Fixed Header */}
<div className='px-6 pt-4 pb-2'>
{/* Search Input */}
{isLoading ? (
<Skeleton className='h-9 w-56 rounded-lg' />
) : (
<div className='flex h-9 w-56 items-center gap-2 rounded-lg border bg-transparent pr-2 pl-3'>
<Search className='h-4 w-4 flex-shrink-0 text-muted-foreground' strokeWidth={2} />
<Input
placeholder='Search API keys...'
value={searchTerm}
onChange={(e) => setSearchTerm(e.target.value)}
className='flex-1 border-0 bg-transparent px-0 font-[380] font-sans text-base text-foreground leading-none placeholder:text-muted-foreground focus-visible:ring-0 focus-visible:ring-offset-0'
/>
</div>
)}
</div>
{/* Sticky Header with API Keys (only for hosted) */}
{isHosted && (
<div className='sticky top-0 z-10 border-b bg-background px-6 py-4'>
<div className='space-y-3'>
{/* API Keys Header */}
<div className='flex items-center justify-between'>
<div>
<h3 className='font-semibold text-foreground text-sm'>API Keys</h3>
<p className='text-muted-foreground text-xs'>
Generate keys for programmatic access
</p>
</div>
<Button
onClick={onGenerate}
variant='ghost'
size='sm'
className='h-8 rounded-[8px] border bg-background px-3 shadow-xs hover:bg-muted focus:outline-none focus-visible:ring-0 focus-visible:ring-offset-0'
disabled={isLoading}
>
<Plus className='h-3.5 w-3.5 stroke-[2px]' />
Create
</Button>
</div>
{/* Scrollable Content */}
<div className='scrollbar-thin scrollbar-thumb-muted scrollbar-track-transparent min-h-0 flex-1 overflow-y-auto px-6'>
<div className='h-full space-y-2 py-2'>
{isLoading ? (
{/* API Keys List */}
<div className='space-y-2'>
<CopilotKeySkeleton />
<CopilotKeySkeleton />
<CopilotKeySkeleton />
</div>
) : keys.length === 0 ? (
<div className='flex h-full items-center justify-center text-muted-foreground text-sm'>
Click "Generate Key" below to get started
</div>
) : (
<div className='space-y-2'>
{filteredKeys.map((k) => (
<div key={k.id} className='flex flex-col gap-2'>
<Label className='font-normal text-muted-foreground text-xs uppercase'>
Copilot API Key
</Label>
<div className='flex items-center justify-between gap-4'>
<div className='flex items-center gap-3'>
<div className='flex h-8 items-center rounded-[8px] bg-muted px-3'>
<code className='font-mono text-foreground text-xs'>{k.displayKey}</code>
</div>
{isLoading ? (
<>
<CopilotKeySkeleton />
<CopilotKeySkeleton />
</>
) : keys.length === 0 ? (
<div className='py-3 text-center text-muted-foreground text-xs'>
No API keys yet
</div>
) : (
keys.map((k) => (
<div
key={k.id}
className='flex items-center justify-between gap-4 rounded-lg border bg-muted/30 px-3 py-2'
>
<div className='flex min-w-0 items-center gap-3'>
<code className='truncate font-mono text-foreground text-xs'>
{k.displayKey}
</code>
</div>
<Button
@@ -168,44 +287,103 @@ export function Copilot() {
setDeleteKey(k)
setShowDeleteDialog(true)
}}
className='h-8 text-muted-foreground hover:text-foreground'
className='h-7 flex-shrink-0 text-muted-foreground text-xs hover:text-foreground'
>
Delete
</Button>
</div>
</div>
))}
{/* Show message when search has no results but there are keys */}
{searchTerm.trim() && filteredKeys.length === 0 && keys.length > 0 && (
<div className='py-8 text-center text-muted-foreground text-sm'>
No API keys found matching "{searchTerm}"
</div>
))
)}
</div>
)}
</div>
</div>
</div>
)}
{/* Footer */}
<div className='bg-background'>
<div className='flex w-full items-center justify-between px-6 py-4'>
{isLoading ? (
<>
<Skeleton className='h-9 w-[117px] rounded-[8px]' />
<div className='w-[108px]' />
</>
{/* Scrollable Content - Models Section */}
<div className='scrollbar-thin scrollbar-thumb-muted scrollbar-track-transparent flex-1 overflow-y-auto px-6 py-4'>
<div className='space-y-3'>
{/* Models Header */}
<div>
<h3 className='font-semibold text-foreground text-sm'>Models</h3>
<div className='text-muted-foreground text-xs'>
{isModelsLoading ? (
<Skeleton className='mt-0.5 h-3 w-32' />
) : (
<span>
{enabledCount} of {totalCount} enabled
</span>
)}
</div>
</div>
{/* Models List */}
{isModelsLoading ? (
<div className='space-y-2'>
{[1, 2, 3, 4, 5].map((i) => (
<div key={i} className='flex items-center justify-between py-1.5'>
<Skeleton className='h-4 w-32' />
<Skeleton className='h-5 w-9 rounded-full' />
</div>
))}
</div>
) : (
<>
<Button
onClick={onGenerate}
variant='ghost'
className='h-9 rounded-[8px] border bg-background px-3 shadow-xs hover:bg-muted focus:outline-none focus-visible:ring-0 focus-visible:ring-offset-0'
disabled={isLoading}
>
<Plus className='h-4 w-4 stroke-[2px]' />
Create Key
</Button>
</>
<div className='space-y-4'>
{/* OpenAI Models */}
<div>
<div className='mb-2 px-2 font-medium text-[10px] text-muted-foreground uppercase'>
OpenAI
</div>
<div className='space-y-1'>
{OPENAI_MODELS.map((model) => {
const isEnabled = enabledModelsMap[model.value] ?? false
return (
<div
key={model.value}
className='-mx-2 flex items-center justify-between rounded px-2 py-1.5 hover:bg-muted/50'
>
<div className='flex items-center gap-2'>
{getModelIcon(model.icon)}
<span className='text-foreground text-sm'>{model.label}</span>
</div>
<Switch
checked={isEnabled}
onCheckedChange={(checked) => toggleModel(model.value, checked)}
className='scale-90'
/>
</div>
)
})}
</div>
</div>
{/* Anthropic Models */}
<div>
<div className='mb-2 px-2 font-medium text-[10px] text-muted-foreground uppercase'>
Anthropic
</div>
<div className='space-y-1'>
{ANTHROPIC_MODELS.map((model) => {
const isEnabled = enabledModelsMap[model.value] ?? false
return (
<div
key={model.value}
className='-mx-2 flex items-center justify-between rounded px-2 py-1.5 hover:bg-muted/50'
>
<div className='flex items-center gap-2'>
{getModelIcon(model.icon)}
<span className='text-foreground text-sm'>{model.label}</span>
</div>
<Switch
checked={isEnabled}
onCheckedChange={(checked) => toggleModel(model.value, checked)}
className='scale-90'
/>
</div>
)
})}
</div>
</div>
</div>
)}
</div>
</div>
@@ -292,15 +470,9 @@ export function Copilot() {
function CopilotKeySkeleton() {
return (
<div className='flex flex-col gap-2'>
<Skeleton className='h-4 w-32' />
<div className='flex items-center justify-between gap-4'>
<div className='flex items-center gap-3'>
<Skeleton className='h-8 w-20 rounded-[8px]' />
<Skeleton className='h-4 w-24' />
</div>
<Skeleton className='h-8 w-16' />
</div>
<div className='flex items-center justify-between gap-4 rounded-lg border bg-muted/30 px-3 py-2'>
<Skeleton className='h-4 w-48' />
<Skeleton className='h-7 w-14' />
</div>
)
}

View File

@@ -96,7 +96,7 @@ const allNavigationItems: NavigationItem[] = [
},
{
id: 'copilot',
label: 'Copilot Keys',
label: 'Copilot',
icon: Bot,
},
{
@@ -163,9 +163,6 @@ export function SettingsNavigation({
}, [userId, isHosted])
const navigationItems = allNavigationItems.filter((item) => {
if (item.id === 'copilot' && !isHosted) {
return false
}
if (item.hideWhenBillingDisabled && !isBillingEnabled) {
return false
}

View File

@@ -3,7 +3,6 @@
import { useEffect, useRef, useState } from 'react'
import { Dialog, DialogContent, DialogHeader, DialogTitle } from '@/components/ui'
import { getEnv, isTruthy } from '@/lib/env'
import { isHosted } from '@/lib/environment'
import { createLogger } from '@/lib/logs/console/logger'
import {
Account,
@@ -181,7 +180,7 @@ export function SettingsModal({ open, onOpenChange }: SettingsModalProps) {
<SSO />
</div>
)}
{isHosted && activeSection === 'copilot' && (
{activeSection === 'copilot' && (
<div className='h-full'>
<Copilot />
</div>

View File

@@ -2,6 +2,7 @@ import { useCallback, useEffect, useRef } from 'react'
import type { Edge } from 'reactflow'
import { useSession } from '@/lib/auth-client'
import { createLogger } from '@/lib/logs/console/logger'
import { getBlockOutputs } from '@/lib/workflows/block-outputs'
import { getBlock } from '@/blocks'
import { resolveOutputType } from '@/blocks/utils'
import { useSocket } from '@/contexts/socket-context'
@@ -761,7 +762,11 @@ export function useCollaborativeWorkflow() {
})
}
const outputs = resolveOutputType(blockConfig.outputs)
// Get outputs based on trigger mode
const isTriggerMode = triggerMode || false
const outputs = isTriggerMode
? getBlockOutputs(type, subBlocks, isTriggerMode)
: resolveOutputType(blockConfig.outputs)
const completeBlockData = {
id,
@@ -775,7 +780,7 @@ export function useCollaborativeWorkflow() {
horizontalHandles: true,
isWide: false,
advancedMode: false,
triggerMode: triggerMode || false,
triggerMode: isTriggerMode,
height: 0, // Default height, will be set by the UI
parentId,
extent,

View File

@@ -137,15 +137,29 @@ export async function handleInvoicePaymentSucceeded(event: Stripe.Event) {
/**
* Handle invoice payment failed webhook
* This is triggered when a user's payment fails for a usage billing invoice
* This is triggered when a user's payment fails for any invoice (subscription or overage)
*/
export async function handleInvoicePaymentFailed(event: Stripe.Event) {
try {
const invoice = event.data.object as Stripe.Invoice
// Check if this is an overage billing invoice
if (invoice.metadata?.type !== 'overage_billing') {
logger.info('Ignoring non-overage billing invoice payment failure', { invoiceId: invoice.id })
const isOverageInvoice = invoice.metadata?.type === 'overage_billing'
let stripeSubscriptionId: string | undefined
if (isOverageInvoice) {
// Overage invoices store subscription ID in metadata
stripeSubscriptionId = invoice.metadata?.subscriptionId as string | undefined
} else {
// Regular subscription invoices have it in parent.subscription_details
const subscription = invoice.parent?.subscription_details?.subscription
stripeSubscriptionId = typeof subscription === 'string' ? subscription : subscription?.id
}
if (!stripeSubscriptionId) {
logger.info('No subscription found on invoice; skipping payment failed handler', {
invoiceId: invoice.id,
isOverageInvoice,
})
return
}
@@ -154,7 +168,7 @@ export async function handleInvoicePaymentFailed(event: Stripe.Event) {
const billingPeriod = invoice.metadata?.billingPeriod || 'unknown'
const attemptCount = invoice.attempt_count || 1
logger.warn('Overage billing invoice payment failed', {
logger.warn('Invoice payment failed', {
invoiceId: invoice.id,
customerId,
failedAmount,
@@ -162,47 +176,59 @@ export async function handleInvoicePaymentFailed(event: Stripe.Event) {
attemptCount,
customerEmail: invoice.customer_email,
hostedInvoiceUrl: invoice.hosted_invoice_url,
isOverageInvoice,
invoiceType: isOverageInvoice ? 'overage' : 'subscription',
})
// Implement dunning management logic here
// For example: suspend service after multiple failures, notify admins, etc.
// Block users after first payment failure
if (attemptCount >= 1) {
logger.error('Multiple payment failures for overage billing', {
logger.error('Payment failure - blocking users', {
invoiceId: invoice.id,
customerId,
attemptCount,
isOverageInvoice,
stripeSubscriptionId,
})
// Block all users under this customer (org members or individual)
// Overage invoices are manual invoices without parent.subscription_details
// We store the subscription ID in metadata when creating them
const stripeSubscriptionId = invoice.metadata?.subscriptionId as string | undefined
if (stripeSubscriptionId) {
const records = await db
.select()
.from(subscriptionTable)
.where(eq(subscriptionTable.stripeSubscriptionId, stripeSubscriptionId))
.limit(1)
if (records.length > 0) {
const sub = records[0]
if (sub.plan === 'team' || sub.plan === 'enterprise') {
const members = await db
.select({ userId: member.userId })
.from(member)
.where(eq(member.organizationId, sub.referenceId))
for (const m of members) {
await db
.update(userStats)
.set({ billingBlocked: true })
.where(eq(userStats.userId, m.userId))
}
} else {
const records = await db
.select()
.from(subscriptionTable)
.where(eq(subscriptionTable.stripeSubscriptionId, stripeSubscriptionId))
.limit(1)
if (records.length > 0) {
const sub = records[0]
if (sub.plan === 'team' || sub.plan === 'enterprise') {
const members = await db
.select({ userId: member.userId })
.from(member)
.where(eq(member.organizationId, sub.referenceId))
for (const m of members) {
await db
.update(userStats)
.set({ billingBlocked: true })
.where(eq(userStats.userId, sub.referenceId))
.where(eq(userStats.userId, m.userId))
}
logger.info('Blocked team/enterprise members due to payment failure', {
organizationId: sub.referenceId,
memberCount: members.length,
isOverageInvoice,
})
} else {
await db
.update(userStats)
.set({ billingBlocked: true })
.where(eq(userStats.userId, sub.referenceId))
logger.info('Blocked user due to payment failure', {
userId: sub.referenceId,
isOverageInvoice,
})
}
} else {
logger.warn('Subscription not found in database for failed payment', {
stripeSubscriptionId,
invoiceId: invoice.id,
})
}
}
} catch (error) {

View File

@@ -1,10 +1,17 @@
import fs from 'fs/promises'
import path from 'path'
import { generateEmbeddings } from '@/lib/embeddings/utils'
import { isDev } from '@/lib/environment'
import { TextChunker } from '@/lib/knowledge/documents/chunker'
import type { DocChunk, DocsChunkerOptions, HeaderInfo } from '@/lib/knowledge/documents/types'
import { createLogger } from '@/lib/logs/console/logger'
import { TextChunker } from './text-chunker'
import type { DocChunk, DocsChunkerOptions } from './types'
interface HeaderInfo {
level: number
text: string
slug?: string
anchor?: string
position?: number
}
interface Frontmatter {
title?: string
@@ -29,7 +36,7 @@ export class DocsChunker {
overlap: options.overlap ?? 50,
})
// Use localhost docs in development, production docs otherwise
this.baseUrl = options.baseUrl ?? (isDev ? 'http://localhost:3001' : 'https://docs.sim.ai')
this.baseUrl = options.baseUrl ?? 'https://docs.sim.ai'
}
/**
@@ -108,9 +115,7 @@ export class DocsChunker {
metadata: {
startIndex: chunkStart,
endIndex: chunkEnd,
hasFrontmatter: i === 0 && content.startsWith('---'),
documentTitle: frontmatter.title,
documentDescription: frontmatter.description,
title: frontmatter.title,
},
}
@@ -200,7 +205,7 @@ export class DocsChunker {
let relevantHeader: HeaderInfo | null = null
for (const header of headers) {
if (header.position <= position) {
if (header.position !== undefined && header.position <= position) {
relevantHeader = header
} else {
break
@@ -285,53 +290,6 @@ export class DocsChunker {
return { data, content: markdownContent }
}
/**
* Split content by headers to respect document structure
*/
private splitByHeaders(
content: string
): Array<{ header: string | null; content: string; level: number }> {
const lines = content.split('\n')
const sections: Array<{ header: string | null; content: string; level: number }> = []
let currentHeader: string | null = null
let currentLevel = 0
let currentContent: string[] = []
for (const line of lines) {
const headerMatch = line.match(/^(#{1,3})\s+(.+)$/) // Only split on H1-H3, not H4-H6
if (headerMatch) {
// Save previous section
if (currentContent.length > 0) {
sections.push({
header: currentHeader,
content: currentContent.join('\n').trim(),
level: currentLevel,
})
}
// Start new section
currentHeader = line
currentLevel = headerMatch[1].length
currentContent = []
} else {
currentContent.push(line)
}
}
// Add final section
if (currentContent.length > 0) {
sections.push({
header: currentHeader,
content: currentContent.join('\n').trim(),
level: currentLevel,
})
}
return sections.filter((section) => section.content.trim().length > 0)
}
/**
* Estimate token count (rough approximation)
*/
@@ -340,175 +298,6 @@ export class DocsChunker {
return Math.ceil(text.length / 4)
}
/**
* Merge small adjacent chunks to reach target size
*/
private mergeSmallChunks(chunks: string[]): string[] {
const merged: string[] = []
let currentChunk = ''
for (const chunk of chunks) {
const currentTokens = this.estimateTokens(currentChunk)
const chunkTokens = this.estimateTokens(chunk)
// If adding this chunk would exceed target size, save current and start new
if (currentTokens > 0 && currentTokens + chunkTokens > 500) {
if (currentChunk.trim()) {
merged.push(currentChunk.trim())
}
currentChunk = chunk
} else {
// Merge with current chunk
currentChunk = currentChunk ? `${currentChunk}\n\n${chunk}` : chunk
}
}
// Add final chunk
if (currentChunk.trim()) {
merged.push(currentChunk.trim())
}
return merged
}
/**
* Chunk a section while preserving tables and structure
*/
private async chunkSection(section: {
header: string | null
content: string
level: number
}): Promise<string[]> {
const content = section.content
const header = section.header
// Check if content contains tables
const hasTable = this.containsTable(content)
if (hasTable) {
// Split by tables and handle each part
return this.splitContentWithTables(content, header)
}
// Regular chunking for text-only content
const chunks = await this.textChunker.chunk(content)
return chunks.map((chunk, index) => {
// Add header to first chunk only
if (index === 0 && header) {
return `${header}\n\n${chunk.text}`.trim()
}
return chunk.text
})
}
/**
* Check if content contains markdown tables
*/
private containsTable(content: string): boolean {
const lines = content.split('\n')
return lines.some((line, index) => {
if (line.includes('|') && line.split('|').length >= 3) {
const nextLine = lines[index + 1]
return nextLine?.includes('|') && nextLine.includes('-')
}
return false
})
}
/**
* Split content that contains tables, keeping tables intact
*/
private splitContentWithTables(content: string, header: string | null): string[] {
const lines = content.split('\n')
const chunks: string[] = []
let currentChunk: string[] = []
let inTable = false
let tableLines: string[] = []
for (let i = 0; i < lines.length; i++) {
const line = lines[i]
// Detect table start
if (line.includes('|') && line.split('|').length >= 3 && !inTable) {
const nextLine = lines[i + 1]
if (nextLine?.includes('|') && nextLine.includes('-')) {
inTable = true
// Save current chunk if it has content
if (currentChunk.length > 0 && currentChunk.join('\n').trim().length > 50) {
const chunkText = currentChunk.join('\n').trim()
const withHeader =
chunks.length === 0 && header ? `${header}\n\n${chunkText}` : chunkText
chunks.push(withHeader)
currentChunk = []
}
tableLines = [line]
continue
}
}
if (inTable) {
tableLines.push(line)
// Detect table end
if (!line.includes('|') || line.trim() === '') {
inTable = false
// Save table as its own chunk
const tableText = tableLines
.filter((l) => l.trim())
.join('\n')
.trim()
if (tableText.length > 0) {
const withHeader =
chunks.length === 0 && header ? `${header}\n\n${tableText}` : tableText
chunks.push(withHeader)
}
tableLines = []
// Start new chunk if current line has content
if (line.trim() !== '') {
currentChunk = [line]
}
}
} else {
currentChunk.push(line)
// If chunk is getting large, save it
if (this.estimateTokens(currentChunk.join('\n')) > 250) {
const chunkText = currentChunk.join('\n').trim()
if (chunkText.length > 50) {
const withHeader =
chunks.length === 0 && header ? `${header}\n\n${chunkText}` : chunkText
chunks.push(withHeader)
}
currentChunk = []
}
}
}
// Handle remaining content
if (inTable && tableLines.length > 0) {
const tableText = tableLines
.filter((l) => l.trim())
.join('\n')
.trim()
if (tableText.length > 0) {
const withHeader = chunks.length === 0 && header ? `${header}\n\n${tableText}` : tableText
chunks.push(withHeader)
}
} else if (currentChunk.length > 0) {
const chunkText = currentChunk.join('\n').trim()
if (chunkText.length > 50) {
const withHeader = chunks.length === 0 && header ? `${header}\n\n${chunkText}` : chunkText
chunks.push(withHeader)
}
}
return chunks.filter((chunk) => chunk.trim().length > 50)
}
/**
* Detect table boundaries in markdown content to avoid splitting them
*/

View File

@@ -0,0 +1,5 @@
export { DocsChunker } from './docs-chunker'
export { JsonYamlChunker } from './json-yaml-chunker'
export { StructuredDataChunker } from './structured-data-chunker'
export { TextChunker } from './text-chunker'
export * from './types'

View File

@@ -0,0 +1,317 @@
import { estimateTokenCount } from '@/lib/tokenization/estimators'
import type { Chunk, ChunkerOptions } from './types'
function getTokenCount(text: string): number {
const estimate = estimateTokenCount(text)
return estimate.count
}
/**
* Configuration for JSON/YAML chunking
*/
const JSON_YAML_CHUNKING_CONFIG = {
TARGET_CHUNK_SIZE: 2000, // Target tokens per chunk
MIN_CHUNK_SIZE: 100, // Minimum tokens per chunk
MAX_CHUNK_SIZE: 3000, // Maximum tokens per chunk
MAX_DEPTH_FOR_SPLITTING: 5, // Maximum depth to traverse for splitting
}
export class JsonYamlChunker {
private chunkSize: number
private minChunkSize: number
constructor(options: ChunkerOptions = {}) {
this.chunkSize = options.chunkSize || JSON_YAML_CHUNKING_CONFIG.TARGET_CHUNK_SIZE
this.minChunkSize = options.minChunkSize || JSON_YAML_CHUNKING_CONFIG.MIN_CHUNK_SIZE
}
/**
* Check if content is structured JSON/YAML data
*/
static isStructuredData(content: string): boolean {
try {
JSON.parse(content)
return true
} catch {
try {
const yaml = require('js-yaml')
yaml.load(content)
return true
} catch {
return false
}
}
}
/**
* Chunk JSON/YAML content intelligently based on structure
*/
async chunk(content: string): Promise<Chunk[]> {
try {
const data = JSON.parse(content)
return this.chunkStructuredData(data)
} catch (error) {
return this.chunkAsText(content)
}
}
/**
* Chunk structured data based on its structure
*/
private chunkStructuredData(data: any, path: string[] = []): Chunk[] {
const chunks: Chunk[] = []
if (Array.isArray(data)) {
return this.chunkArray(data, path)
}
if (typeof data === 'object' && data !== null) {
return this.chunkObject(data, path)
}
const content = JSON.stringify(data, null, 2)
const tokenCount = getTokenCount(content)
if (tokenCount >= this.minChunkSize) {
chunks.push({
text: content,
tokenCount,
metadata: {
startIndex: 0,
endIndex: content.length,
},
})
}
return chunks
}
/**
* Chunk an array intelligently
*/
private chunkArray(arr: any[], path: string[]): Chunk[] {
const chunks: Chunk[] = []
let currentBatch: any[] = []
let currentTokens = 0
const contextHeader = path.length > 0 ? `// ${path.join('.')}\n` : ''
for (let i = 0; i < arr.length; i++) {
const item = arr[i]
const itemStr = JSON.stringify(item, null, 2)
const itemTokens = getTokenCount(itemStr)
if (itemTokens > this.chunkSize) {
// Save current batch if it has items
if (currentBatch.length > 0) {
const batchContent = contextHeader + JSON.stringify(currentBatch, null, 2)
chunks.push({
text: batchContent,
tokenCount: getTokenCount(batchContent),
metadata: {
startIndex: i - currentBatch.length,
endIndex: i - 1,
},
})
currentBatch = []
currentTokens = 0
}
if (typeof item === 'object' && item !== null) {
const subChunks = this.chunkStructuredData(item, [...path, `[${i}]`])
chunks.push(...subChunks)
} else {
chunks.push({
text: contextHeader + itemStr,
tokenCount: itemTokens,
metadata: {
startIndex: i,
endIndex: i,
},
})
}
} else if (currentTokens + itemTokens > this.chunkSize && currentBatch.length > 0) {
const batchContent = contextHeader + JSON.stringify(currentBatch, null, 2)
chunks.push({
text: batchContent,
tokenCount: currentTokens,
metadata: {
startIndex: i - currentBatch.length,
endIndex: i - 1,
},
})
currentBatch = [item]
currentTokens = itemTokens
} else {
currentBatch.push(item)
currentTokens += itemTokens
}
}
if (currentBatch.length > 0) {
const batchContent = contextHeader + JSON.stringify(currentBatch, null, 2)
chunks.push({
text: batchContent,
tokenCount: currentTokens,
metadata: {
startIndex: arr.length - currentBatch.length,
endIndex: arr.length - 1,
},
})
}
return chunks
}
/**
* Chunk an object intelligently
*/
private chunkObject(obj: Record<string, any>, path: string[]): Chunk[] {
const chunks: Chunk[] = []
const entries = Object.entries(obj)
const fullContent = JSON.stringify(obj, null, 2)
const fullTokens = getTokenCount(fullContent)
if (fullTokens <= this.chunkSize) {
chunks.push({
text: fullContent,
tokenCount: fullTokens,
metadata: {
startIndex: 0,
endIndex: fullContent.length,
},
})
return chunks
}
let currentObj: Record<string, any> = {}
let currentTokens = 0
let currentKeys: string[] = []
for (const [key, value] of entries) {
const valueStr = JSON.stringify({ [key]: value }, null, 2)
const valueTokens = getTokenCount(valueStr)
if (valueTokens > this.chunkSize) {
// Save current object if it has properties
if (Object.keys(currentObj).length > 0) {
const objContent = JSON.stringify(currentObj, null, 2)
chunks.push({
text: objContent,
tokenCount: currentTokens,
metadata: {
startIndex: 0,
endIndex: objContent.length,
},
})
currentObj = {}
currentTokens = 0
currentKeys = []
}
if (typeof value === 'object' && value !== null) {
const subChunks = this.chunkStructuredData(value, [...path, key])
chunks.push(...subChunks)
} else {
chunks.push({
text: valueStr,
tokenCount: valueTokens,
metadata: {
startIndex: 0,
endIndex: valueStr.length,
},
})
}
} else if (
currentTokens + valueTokens > this.chunkSize &&
Object.keys(currentObj).length > 0
) {
const objContent = JSON.stringify(currentObj, null, 2)
chunks.push({
text: objContent,
tokenCount: currentTokens,
metadata: {
startIndex: 0,
endIndex: objContent.length,
},
})
currentObj = { [key]: value }
currentTokens = valueTokens
currentKeys = [key]
} else {
currentObj[key] = value
currentTokens += valueTokens
currentKeys.push(key)
}
}
if (Object.keys(currentObj).length > 0) {
const objContent = JSON.stringify(currentObj, null, 2)
chunks.push({
text: objContent,
tokenCount: currentTokens,
metadata: {
startIndex: 0,
endIndex: objContent.length,
},
})
}
return chunks
}
/**
* Fall back to text chunking if JSON parsing fails.
*/
private async chunkAsText(content: string): Promise<Chunk[]> {
const chunks: Chunk[] = []
const lines = content.split('\n')
let currentChunk = ''
let currentTokens = 0
let startIndex = 0
for (const line of lines) {
const lineTokens = getTokenCount(line)
if (currentTokens + lineTokens > this.chunkSize && currentChunk) {
chunks.push({
text: currentChunk,
tokenCount: currentTokens,
metadata: {
startIndex,
endIndex: startIndex + currentChunk.length,
},
})
startIndex += currentChunk.length + 1
currentChunk = line
currentTokens = lineTokens
} else {
currentChunk = currentChunk ? `${currentChunk}\n${line}` : line
currentTokens += lineTokens
}
}
if (currentChunk && currentTokens >= this.minChunkSize) {
chunks.push({
text: currentChunk,
tokenCount: currentTokens,
metadata: {
startIndex,
endIndex: startIndex + currentChunk.length,
},
})
}
return chunks
}
/**
* Static method for chunking JSON/YAML data with default options.
*/
static async chunkJsonYaml(content: string, options: ChunkerOptions = {}): Promise<Chunk[]> {
const chunker = new JsonYamlChunker(options)
return chunker.chunk(content)
}
}

View File

@@ -0,0 +1,220 @@
import type { Chunk, StructuredDataOptions } from './types'
// Configuration for structured data chunking (CSV, XLSX, etc.)
const STRUCTURED_CHUNKING_CONFIG = {
// Target 2000-3000 tokens per chunk for better semantic meaning
TARGET_CHUNK_SIZE: 2500,
MIN_CHUNK_SIZE: 500,
MAX_CHUNK_SIZE: 4000,
// For spreadsheets, group rows together
ROWS_PER_CHUNK: 100, // Start with 100 rows per chunk
MIN_ROWS_PER_CHUNK: 20,
MAX_ROWS_PER_CHUNK: 500,
// For better embeddings quality
INCLUDE_HEADERS_IN_EACH_CHUNK: true,
MAX_HEADER_SIZE: 200, // tokens
}
/**
* Smart chunker for structured data (CSV, XLSX) that preserves semantic meaning
*/
export class StructuredDataChunker {
/**
* Chunk structured data intelligently based on rows and semantic boundaries
*/
static async chunkStructuredData(
content: string,
options: StructuredDataOptions = {}
): Promise<Chunk[]> {
const chunks: Chunk[] = []
const lines = content.split('\n').filter((line) => line.trim())
if (lines.length === 0) {
return chunks
}
// Detect headers (first line or provided)
const headerLine = options.headers?.join('\t') || lines[0]
const dataStartIndex = options.headers ? 0 : 1
// Calculate optimal rows per chunk based on content
const estimatedTokensPerRow = StructuredDataChunker.estimateTokensPerRow(
lines.slice(dataStartIndex, Math.min(10, lines.length))
)
const optimalRowsPerChunk =
StructuredDataChunker.calculateOptimalRowsPerChunk(estimatedTokensPerRow)
console.log(
`Structured data chunking: ${lines.length} rows, ~${estimatedTokensPerRow} tokens/row, ${optimalRowsPerChunk} rows/chunk`
)
let currentChunkRows: string[] = []
let currentTokenEstimate = 0
const headerTokens = StructuredDataChunker.estimateTokens(headerLine)
let chunkStartRow = dataStartIndex
for (let i = dataStartIndex; i < lines.length; i++) {
const row = lines[i]
const rowTokens = StructuredDataChunker.estimateTokens(row)
// Check if adding this row would exceed our target
const projectedTokens =
currentTokenEstimate +
rowTokens +
(STRUCTURED_CHUNKING_CONFIG.INCLUDE_HEADERS_IN_EACH_CHUNK ? headerTokens : 0)
const shouldCreateChunk =
(projectedTokens > STRUCTURED_CHUNKING_CONFIG.TARGET_CHUNK_SIZE &&
currentChunkRows.length >= STRUCTURED_CHUNKING_CONFIG.MIN_ROWS_PER_CHUNK) ||
currentChunkRows.length >= optimalRowsPerChunk
if (shouldCreateChunk && currentChunkRows.length > 0) {
// Create chunk with current rows
const chunkContent = StructuredDataChunker.formatChunk(
headerLine,
currentChunkRows,
options.sheetName
)
chunks.push(StructuredDataChunker.createChunk(chunkContent, chunkStartRow, i - 1))
// Reset for next chunk
currentChunkRows = []
currentTokenEstimate = 0
chunkStartRow = i
}
currentChunkRows.push(row)
currentTokenEstimate += rowTokens
}
// Add remaining rows as final chunk
if (currentChunkRows.length > 0) {
const chunkContent = StructuredDataChunker.formatChunk(
headerLine,
currentChunkRows,
options.sheetName
)
chunks.push(StructuredDataChunker.createChunk(chunkContent, chunkStartRow, lines.length - 1))
}
console.log(`Created ${chunks.length} chunks from ${lines.length} rows of structured data`)
return chunks
}
/**
* Format a chunk with headers and context
*/
private static formatChunk(headerLine: string, rows: string[], sheetName?: string): string {
let content = ''
// Add sheet name context if available
if (sheetName) {
content += `=== ${sheetName} ===\n\n`
}
// Add headers for context
if (STRUCTURED_CHUNKING_CONFIG.INCLUDE_HEADERS_IN_EACH_CHUNK) {
content += `Headers: ${headerLine}\n`
content += `${'-'.repeat(Math.min(80, headerLine.length))}\n`
}
// Add data rows
content += rows.join('\n')
// Add row count for context
content += `\n\n[Rows ${rows.length} of data]`
return content
}
/**
* Create a chunk object with actual row indices
*/
private static createChunk(content: string, startRow: number, endRow: number): Chunk {
const tokenCount = StructuredDataChunker.estimateTokens(content)
return {
text: content,
tokenCount,
metadata: {
startIndex: startRow,
endIndex: endRow,
},
}
}
/**
* Estimate tokens in text (rough approximation)
*/
private static estimateTokens(text: string): number {
// Rough estimate: 1 token per 4 characters for English text
// For structured data with numbers, it's closer to 1 token per 3 characters
return Math.ceil(text.length / 3)
}
/**
* Estimate average tokens per row from sample
*/
private static estimateTokensPerRow(sampleRows: string[]): number {
if (sampleRows.length === 0) return 50 // default estimate
const totalTokens = sampleRows.reduce(
(sum, row) => sum + StructuredDataChunker.estimateTokens(row),
0
)
return Math.ceil(totalTokens / sampleRows.length)
}
/**
* Calculate optimal rows per chunk based on token estimates
*/
private static calculateOptimalRowsPerChunk(tokensPerRow: number): number {
const optimal = Math.floor(STRUCTURED_CHUNKING_CONFIG.TARGET_CHUNK_SIZE / tokensPerRow)
return Math.min(
Math.max(optimal, STRUCTURED_CHUNKING_CONFIG.MIN_ROWS_PER_CHUNK),
STRUCTURED_CHUNKING_CONFIG.MAX_ROWS_PER_CHUNK
)
}
/**
* Check if content appears to be structured data
*/
static isStructuredData(content: string, mimeType?: string): boolean {
// Check mime type first
if (mimeType) {
const structuredMimeTypes = [
'text/csv',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/vnd.ms-excel',
'text/tab-separated-values',
]
if (structuredMimeTypes.includes(mimeType)) {
return true
}
}
// Check content structure
const lines = content.split('\n').slice(0, 10) // Check first 10 lines
if (lines.length < 2) return false
// Check for consistent delimiters (comma, tab, pipe)
const delimiters = [',', '\t', '|']
for (const delimiter of delimiters) {
const counts = lines.map(
(line) => (line.match(new RegExp(`\\${delimiter}`, 'g')) || []).length
)
const avgCount = counts.reduce((a, b) => a + b, 0) / counts.length
// If most lines have similar delimiter counts, it's likely structured
if (avgCount > 2 && counts.every((c) => Math.abs(c - avgCount) <= 2)) {
return true
}
}
return false
}
}

View File

@@ -1,28 +1,4 @@
export interface ChunkMetadata {
startIndex: number
endIndex: number
tokenCount: number
}
export interface TextChunk {
text: string
metadata: ChunkMetadata
}
export interface ChunkerOptions {
chunkSize?: number
minChunkSize?: number
overlap?: number
}
export interface Chunk {
text: string
tokenCount: number
metadata: {
startIndex: number
endIndex: number
}
}
import type { Chunk, ChunkerOptions } from './types'
/**
* Lightweight text chunker optimized for RAG applications

View File

@@ -0,0 +1,53 @@
export interface ChunkMetadata {
startIndex: number
endIndex: number
tokenCount: number
}
export interface TextChunk {
text: string
metadata: ChunkMetadata
}
export interface ChunkerOptions {
chunkSize?: number
minChunkSize?: number
overlap?: number
}
export interface Chunk {
text: string
tokenCount: number
metadata: {
startIndex: number
endIndex: number
}
}
export interface StructuredDataOptions {
headers?: string[]
totalRows?: number
sheetName?: string
}
export interface DocChunk {
text: string
tokenCount: number
sourceDocument: string
headerLink: string
headerText: string
headerLevel: number
embedding: number[]
embeddingModel: string
metadata: {
sourceUrl?: string
headers?: string[]
title?: string
startIndex: number
endIndex: number
}
}
export interface DocsChunkerOptions extends ChunkerOptions {
baseUrl?: string
}

View File

@@ -0,0 +1,37 @@
import { Loader2, MinusCircle, XCircle, Zap } from 'lucide-react'
import {
BaseClientTool,
type BaseClientToolMetadata,
ClientToolCallState,
} from '@/lib/copilot/tools/client/base-tool'
export class GetOperationsExamplesClientTool extends BaseClientTool {
static readonly id = 'get_operations_examples'
constructor(toolCallId: string) {
super(toolCallId, GetOperationsExamplesClientTool.id, GetOperationsExamplesClientTool.metadata)
}
static readonly metadata: BaseClientToolMetadata = {
displayNames: {
[ClientToolCallState.generating]: { text: 'Designing workflow component', icon: Loader2 },
[ClientToolCallState.pending]: { text: 'Designing workflow component', icon: Loader2 },
[ClientToolCallState.executing]: { text: 'Designing workflow component', icon: Loader2 },
[ClientToolCallState.success]: { text: 'Designed workflow component', icon: Zap },
[ClientToolCallState.error]: { text: 'Failed to design workflow component', icon: XCircle },
[ClientToolCallState.aborted]: {
text: 'Aborted designing workflow component',
icon: MinusCircle,
},
[ClientToolCallState.rejected]: {
text: 'Skipped designing workflow component',
icon: MinusCircle,
},
},
interrupt: undefined,
}
async execute(): Promise<void> {
return
}
}

View File

@@ -20,11 +20,11 @@ export class PlanClientTool extends BaseClientTool {
static readonly metadata: BaseClientToolMetadata = {
displayNames: {
[ClientToolCallState.generating]: { text: 'Crafting an approach', icon: Loader2 },
[ClientToolCallState.pending]: { text: 'Crafting an approach', icon: Loader2 },
[ClientToolCallState.executing]: { text: 'Crafting an approach', icon: Loader2 },
[ClientToolCallState.success]: { text: 'Crafted an approach', icon: ListTodo },
[ClientToolCallState.error]: { text: 'Failed to craft an approach', icon: X },
[ClientToolCallState.generating]: { text: 'Planning', icon: Loader2 },
[ClientToolCallState.pending]: { text: 'Planning', icon: Loader2 },
[ClientToolCallState.executing]: { text: 'Planning an approach', icon: Loader2 },
[ClientToolCallState.success]: { text: 'Finished planning', icon: ListTodo },
[ClientToolCallState.error]: { text: 'Failed to plan', icon: X },
[ClientToolCallState.aborted]: { text: 'Aborted planning', icon: XCircle },
[ClientToolCallState.rejected]: { text: 'Skipped planning approach', icon: XCircle },
},

View File

@@ -4,6 +4,8 @@ import { workflow as workflowTable } from '@sim/db/schema'
import { eq } from 'drizzle-orm'
import type { BaseServerTool } from '@/lib/copilot/tools/server/base-tool'
import { createLogger } from '@/lib/logs/console/logger'
import { getBlockOutputs } from '@/lib/workflows/block-outputs'
import { extractAndPersistCustomTools } from '@/lib/workflows/custom-tools-persistence'
import { loadWorkflowFromNormalizedTables } from '@/lib/workflows/db-helpers'
import { validateWorkflowState } from '@/lib/workflows/validation'
import { getAllBlocks } from '@/blocks/registry'
@@ -22,12 +24,123 @@ interface EditWorkflowParams {
currentUserWorkflow?: string
}
/**
* Topologically sort insert operations to ensure parents are created before children
* Returns sorted array where parent inserts always come before child inserts
*/
function topologicalSortInserts(
inserts: EditWorkflowOperation[],
adds: EditWorkflowOperation[]
): EditWorkflowOperation[] {
if (inserts.length === 0) return []
// Build a map of blockId -> operation for quick lookup
const insertMap = new Map<string, EditWorkflowOperation>()
inserts.forEach((op) => insertMap.set(op.block_id, op))
// Build a set of blocks being added (potential parents)
const addedBlocks = new Set(adds.map((op) => op.block_id))
// Build dependency graph: block -> blocks that depend on it
const dependents = new Map<string, Set<string>>()
const dependencies = new Map<string, Set<string>>()
inserts.forEach((op) => {
const blockId = op.block_id
const parentId = op.params?.subflowId
dependencies.set(blockId, new Set())
if (parentId) {
// Track dependency if parent is being inserted OR being added
// This ensures children wait for parents regardless of operation type
const parentBeingCreated = insertMap.has(parentId) || addedBlocks.has(parentId)
if (parentBeingCreated) {
// Only add dependency if parent is also being inserted (not added)
// Because adds run before inserts, added parents are already created
if (insertMap.has(parentId)) {
dependencies.get(blockId)!.add(parentId)
if (!dependents.has(parentId)) {
dependents.set(parentId, new Set())
}
dependents.get(parentId)!.add(blockId)
}
}
}
})
// Topological sort using Kahn's algorithm
const sorted: EditWorkflowOperation[] = []
const queue: string[] = []
// Start with nodes that have no dependencies (or depend only on added blocks)
inserts.forEach((op) => {
const deps = dependencies.get(op.block_id)!
if (deps.size === 0) {
queue.push(op.block_id)
}
})
while (queue.length > 0) {
const blockId = queue.shift()!
const op = insertMap.get(blockId)
if (op) {
sorted.push(op)
}
// Remove this node from dependencies of others
const children = dependents.get(blockId)
if (children) {
children.forEach((childId) => {
const childDeps = dependencies.get(childId)!
childDeps.delete(blockId)
if (childDeps.size === 0) {
queue.push(childId)
}
})
}
}
// If sorted length doesn't match input, there's a cycle (shouldn't happen with valid operations)
// Just append remaining operations
if (sorted.length < inserts.length) {
inserts.forEach((op) => {
if (!sorted.includes(op)) {
sorted.push(op)
}
})
}
return sorted
}
/**
* Helper to create a block state from operation params
*/
function createBlockFromParams(blockId: string, params: any, parentId?: string): any {
const blockConfig = getAllBlocks().find((b) => b.type === params.type)
// Determine outputs based on trigger mode
const triggerMode = params.triggerMode || false
let outputs: Record<string, any>
if (params.outputs) {
outputs = params.outputs
} else if (blockConfig) {
const subBlocks: Record<string, any> = {}
if (params.inputs) {
Object.entries(params.inputs).forEach(([key, value]) => {
subBlocks[key] = { id: key, type: 'short-input', value: value }
})
}
outputs = triggerMode
? getBlockOutputs(params.type, subBlocks, triggerMode)
: resolveOutputType(blockConfig.outputs)
} else {
outputs = {}
}
const blockState: any = {
id: blockId,
type: params.type,
@@ -38,19 +151,39 @@ function createBlockFromParams(blockId: string, params: any, parentId?: string):
isWide: false,
advancedMode: params.advancedMode || false,
height: 0,
triggerMode: params.triggerMode || false,
triggerMode: triggerMode,
subBlocks: {},
outputs: params.outputs || (blockConfig ? resolveOutputType(blockConfig.outputs) : {}),
outputs: outputs,
data: parentId ? { parentId, extent: 'parent' as const } : {},
}
// Add inputs as subBlocks
if (params.inputs) {
Object.entries(params.inputs).forEach(([key, value]) => {
let sanitizedValue = value
// Special handling for inputFormat - ensure it's an array
if (key === 'inputFormat' && value !== null && value !== undefined) {
if (!Array.isArray(value)) {
// Invalid format, default to empty array
sanitizedValue = []
}
}
// Special handling for tools - normalize to restore sanitized fields
if (key === 'tools' && Array.isArray(value)) {
sanitizedValue = normalizeTools(value)
}
// Special handling for responseFormat - normalize to ensure consistent format
if (key === 'responseFormat' && value) {
sanitizedValue = normalizeResponseFormat(value)
}
blockState.subBlocks[key] = {
id: key,
type: 'short-input',
value: value,
value: sanitizedValue,
}
})
}
@@ -71,6 +204,90 @@ function createBlockFromParams(blockId: string, params: any, parentId?: string):
return blockState
}
/**
* Normalize tools array by adding back fields that were sanitized for training
*/
function normalizeTools(tools: any[]): any[] {
return tools.map((tool) => {
if (tool.type === 'custom-tool') {
// Reconstruct sanitized custom tool fields
const normalized: any = {
...tool,
params: tool.params || {},
isExpanded: tool.isExpanded ?? true,
}
// Ensure schema has proper structure
if (normalized.schema?.function) {
normalized.schema = {
type: 'function',
function: {
name: tool.title, // Derive name from title
description: normalized.schema.function.description,
parameters: normalized.schema.function.parameters,
},
}
}
return normalized
}
// For other tool types, just ensure isExpanded exists
return {
...tool,
isExpanded: tool.isExpanded ?? true,
}
})
}
/**
* Normalize responseFormat to ensure consistent storage
* Handles both string (JSON) and object formats
* Returns pretty-printed JSON for better UI readability
*/
function normalizeResponseFormat(value: any): string {
try {
let obj = value
// If it's already a string, parse it first
if (typeof value === 'string') {
const trimmed = value.trim()
if (!trimmed) {
return ''
}
obj = JSON.parse(trimmed)
}
// If it's an object, stringify it with consistent formatting
if (obj && typeof obj === 'object') {
// Sort keys recursively for consistent comparison
const sortKeys = (item: any): any => {
if (Array.isArray(item)) {
return item.map(sortKeys)
}
if (item !== null && typeof item === 'object') {
return Object.keys(item)
.sort()
.reduce((result: any, key: string) => {
result[key] = sortKeys(item[key])
return result
}, {})
}
return item
}
// Return pretty-printed with 2-space indentation for UI readability
// The sanitizer will normalize it to minified format for comparison
return JSON.stringify(sortKeys(obj), null, 2)
}
return String(value)
} catch (error) {
// If parsing fails, return the original value as string
return String(value)
}
}
/**
* Helper to add connections as edges for a block
*/
@@ -106,13 +323,13 @@ function applyOperationsToWorkflowState(
// Log initial state
const logger = createLogger('EditWorkflowServerTool')
logger.debug('Initial blocks before operations:', {
blockCount: Object.keys(modifiedState.blocks || {}).length,
blockTypes: Object.entries(modifiedState.blocks || {}).map(([id, block]: [string, any]) => ({
id,
type: block.type,
hasType: block.type !== undefined,
})),
logger.info('Applying operations to workflow:', {
totalOperations: operations.length,
operationTypes: operations.reduce((acc: any, op) => {
acc[op.operation_type] = (acc[op.operation_type] || 0) + 1
return acc
}, {}),
initialBlockCount: Object.keys(modifiedState.blocks || {}).length,
})
// Reorder operations: delete -> extract -> add -> insert -> edit
@@ -121,17 +338,34 @@ function applyOperationsToWorkflowState(
const adds = operations.filter((op) => op.operation_type === 'add')
const inserts = operations.filter((op) => op.operation_type === 'insert_into_subflow')
const edits = operations.filter((op) => op.operation_type === 'edit')
// Sort insert operations to ensure parents are inserted before children
// This handles cases where a loop/parallel is being added along with its children
const sortedInserts = topologicalSortInserts(inserts, adds)
const orderedOperations: EditWorkflowOperation[] = [
...deletes,
...extracts,
...adds,
...inserts,
...sortedInserts,
...edits,
]
logger.info('Operations after reordering:', {
order: orderedOperations.map(
(op) =>
`${op.operation_type}:${op.block_id}${op.params?.subflowId ? `(parent:${op.params.subflowId})` : ''}`
),
})
for (const operation of orderedOperations) {
const { operation_type, block_id, params } = operation
logger.debug(`Executing operation: ${operation_type} for block ${block_id}`, {
params: params ? Object.keys(params) : [],
currentBlockCount: Object.keys(modifiedState.blocks).length,
})
switch (operation_type) {
case 'delete': {
if (modifiedState.blocks[block_id]) {
@@ -175,14 +409,34 @@ function applyOperationsToWorkflowState(
if (params?.inputs) {
if (!block.subBlocks) block.subBlocks = {}
Object.entries(params.inputs).forEach(([key, value]) => {
let sanitizedValue = value
// Special handling for inputFormat - ensure it's an array
if (key === 'inputFormat' && value !== null && value !== undefined) {
if (!Array.isArray(value)) {
// Invalid format, default to empty array
sanitizedValue = []
}
}
// Special handling for tools - normalize to restore sanitized fields
if (key === 'tools' && Array.isArray(value)) {
sanitizedValue = normalizeTools(value)
}
// Special handling for responseFormat - normalize to ensure consistent format
if (key === 'responseFormat' && value) {
sanitizedValue = normalizeResponseFormat(value)
}
if (!block.subBlocks[key]) {
block.subBlocks[key] = {
id: key,
type: 'short-input',
value: value,
value: sanitizedValue,
}
} else {
block.subBlocks[key].value = value
block.subBlocks[key].value = sanitizedValue
}
})
@@ -335,18 +589,8 @@ function applyOperationsToWorkflowState(
// Create new block with proper structure
const newBlock = createBlockFromParams(block_id, params)
// Handle nested nodes (for loops/parallels created from scratch)
// Set loop/parallel data on parent block BEFORE adding to blocks
if (params.nestedNodes) {
Object.entries(params.nestedNodes).forEach(([childId, childBlock]: [string, any]) => {
const childBlockState = createBlockFromParams(childId, childBlock, block_id)
modifiedState.blocks[childId] = childBlockState
if (childBlock.connections) {
addConnectionsAsEdges(modifiedState, childId, childBlock.connections)
}
})
// Set loop/parallel data on parent block
if (params.type === 'loop') {
newBlock.data = {
...newBlock.data,
@@ -364,8 +608,22 @@ function applyOperationsToWorkflowState(
}
}
// Add parent block FIRST before adding children
// This ensures children can reference valid parentId
modifiedState.blocks[block_id] = newBlock
// Handle nested nodes (for loops/parallels created from scratch)
if (params.nestedNodes) {
Object.entries(params.nestedNodes).forEach(([childId, childBlock]: [string, any]) => {
const childBlockState = createBlockFromParams(childId, childBlock, block_id)
modifiedState.blocks[childId] = childBlockState
if (childBlock.connections) {
addConnectionsAsEdges(modifiedState, childId, childBlock.connections)
}
})
}
// Add connections as edges
if (params.connections) {
addConnectionsAsEdges(modifiedState, block_id, params.connections)
@@ -377,15 +635,28 @@ function applyOperationsToWorkflowState(
case 'insert_into_subflow': {
const subflowId = params?.subflowId
if (!subflowId || !params?.type || !params?.name) {
logger.warn('Missing required params for insert_into_subflow', { block_id, params })
logger.error('Missing required params for insert_into_subflow', { block_id, params })
break
}
const subflowBlock = modifiedState.blocks[subflowId]
if (!subflowBlock || (subflowBlock.type !== 'loop' && subflowBlock.type !== 'parallel')) {
logger.warn('Subflow block not found or invalid type', {
if (!subflowBlock) {
logger.error('Subflow block not found - parent must be created first', {
subflowId,
type: subflowBlock?.type,
block_id,
existingBlocks: Object.keys(modifiedState.blocks),
operationType: 'insert_into_subflow',
})
// This is a critical error - the operation ordering is wrong
// Skip this operation but don't break the entire workflow
break
}
if (subflowBlock.type !== 'loop' && subflowBlock.type !== 'parallel') {
logger.error('Subflow block has invalid type', {
subflowId,
type: subflowBlock.type,
block_id,
})
break
}
@@ -407,10 +678,32 @@ function applyOperationsToWorkflowState(
// Update inputs if provided
if (params.inputs) {
Object.entries(params.inputs).forEach(([key, value]) => {
let sanitizedValue = value
if (key === 'inputFormat' && value !== null && value !== undefined) {
if (!Array.isArray(value)) {
sanitizedValue = []
}
}
// Special handling for tools - normalize to restore sanitized fields
if (key === 'tools' && Array.isArray(value)) {
sanitizedValue = normalizeTools(value)
}
// Special handling for responseFormat - normalize to ensure consistent format
if (key === 'responseFormat' && value) {
sanitizedValue = normalizeResponseFormat(value)
}
if (!existingBlock.subBlocks[key]) {
existingBlock.subBlocks[key] = { id: key, type: 'short-input', value }
existingBlock.subBlocks[key] = {
id: key,
type: 'short-input',
value: sanitizedValue,
}
} else {
existingBlock.subBlocks[key].value = value
existingBlock.subBlocks[key].value = sanitizedValue
}
})
}
@@ -553,7 +846,7 @@ async function getCurrentWorkflowStateFromDb(
export const editWorkflowServerTool: BaseServerTool<EditWorkflowParams, any> = {
name: 'edit_workflow',
async execute(params: EditWorkflowParams): Promise<any> {
async execute(params: EditWorkflowParams, context?: { userId: string }): Promise<any> {
const logger = createLogger('EditWorkflowServerTool')
const { operations, workflowId, currentUserWorkflow } = params
if (!operations || operations.length === 0) throw new Error('operations are required')
@@ -599,6 +892,29 @@ export const editWorkflowServerTool: BaseServerTool<EditWorkflowParams, any> = {
})
}
// Extract and persist custom tools to database
if (context?.userId) {
try {
const finalWorkflowState = validation.sanitizedState || modifiedWorkflowState
const { saved, errors } = await extractAndPersistCustomTools(
finalWorkflowState,
context.userId
)
if (saved > 0) {
logger.info(`Persisted ${saved} custom tool(s) to database`, { workflowId })
}
if (errors.length > 0) {
logger.warn('Some custom tools failed to persist', { errors, workflowId })
}
} catch (error) {
logger.error('Failed to persist custom tools', { error, workflowId })
}
} else {
logger.warn('No userId in context - skipping custom tools persistence', { workflowId })
}
logger.info('edit_workflow successfully applied operations', {
operationCount: operations.length,
blocksCount: Object.keys(modifiedWorkflowState.blocks).length,

View File

@@ -114,7 +114,8 @@ export async function generateEmbeddings(
logger.info(`Using ${config.useAzure ? 'Azure OpenAI' : 'OpenAI'} for embeddings generation`)
const batchSize = 100
// Reduced batch size to prevent API timeouts and improve reliability
const batchSize = 50 // Reduced from 100 to prevent issues with large documents
const allEmbeddings: number[][] = []
for (let i = 0; i < texts.length; i += batchSize) {
@@ -125,6 +126,11 @@ export async function generateEmbeddings(
logger.info(
`Generated embeddings for batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(texts.length / batchSize)}`
)
// Add small delay between batches to avoid rate limiting
if (i + batchSize < texts.length) {
await new Promise((resolve) => setTimeout(resolve, 100))
}
}
return allEmbeddings

View File

@@ -17,8 +17,6 @@ export const env = createEnv({
server: {
// Core Database & Authentication
DATABASE_URL: z.string().url(), // Primary database connection string
DATABASE_SSL: z.enum(['disable', 'prefer', 'require', 'verify-ca', 'verify-full']).optional(), // PostgreSQL SSL mode
DATABASE_SSL_CA: z.string().optional(), // Base64-encoded CA certificate for SSL verification
BETTER_AUTH_URL: z.string().url(), // Base URL for Better Auth service
BETTER_AUTH_SECRET: z.string().min(32), // Secret key for Better Auth JWT signing
DISABLE_REGISTRATION: z.boolean().optional(), // Flag to disable new user registration
@@ -36,7 +34,6 @@ export const env = createEnv({
AGENT_INDEXER_URL: z.string().url().optional(), // URL for agent training data indexer
AGENT_INDEXER_API_KEY: z.string().min(1).optional(), // API key for agent indexer authentication
// Database & Storage
REDIS_URL: z.string().url().optional(), // Redis connection string for caching/sessions
@@ -92,7 +89,6 @@ export const env = createEnv({
TELEMETRY_ENDPOINT: z.string().url().optional(), // Custom telemetry/analytics endpoint
COST_MULTIPLIER: z.number().optional(), // Multiplier for cost calculations
LOG_LEVEL: z.enum(['DEBUG', 'INFO', 'WARN', 'ERROR']).optional(), // Minimum log level to display (defaults to ERROR in production, DEBUG in development)
POSTHOG_ENABLED: z.boolean().optional(), // Enable PostHog analytics and session recording
// External Services
BROWSERBASE_API_KEY: z.string().min(1).optional(), // Browserbase API key for browser automation

View File

@@ -1,108 +1,154 @@
import { existsSync, readFileSync } from 'fs'
import * as Papa from 'papaparse'
import { createReadStream, existsSync } from 'fs'
import { Readable } from 'stream'
import { type Options, parse } from 'csv-parse'
import type { FileParseResult, FileParser } from '@/lib/file-parsers/types'
import { sanitizeTextForUTF8 } from '@/lib/file-parsers/utils'
import { createLogger } from '@/lib/logs/console/logger'
const logger = createLogger('CsvParser')
const PARSE_OPTIONS = {
header: true,
skipEmptyLines: true,
transformHeader: (header: string) => sanitizeTextForUTF8(String(header)),
transform: (value: string) => sanitizeTextForUTF8(String(value || '')),
const CONFIG = {
MAX_PREVIEW_ROWS: 1000, // Only keep first 1000 rows for preview
MAX_SAMPLE_ROWS: 100, // Sample for metadata
MAX_ERRORS: 100, // Stop after 100 errors
STREAM_CHUNK_SIZE: 16384, // 16KB chunks for streaming
}
export class CsvParser implements FileParser {
async parseFile(filePath: string): Promise<FileParseResult> {
try {
if (!filePath) {
throw new Error('No file path provided')
}
if (!existsSync(filePath)) {
throw new Error(`File not found: ${filePath}`)
}
const fileContent = readFileSync(filePath, 'utf8')
const parseResult = Papa.parse(fileContent, PARSE_OPTIONS)
if (parseResult.errors && parseResult.errors.length > 0) {
const errorMessages = parseResult.errors.map((err) => err.message).join(', ')
logger.error('CSV parsing errors:', parseResult.errors)
throw new Error(`Failed to parse CSV file: ${errorMessages}`)
}
const results = parseResult.data as Record<string, any>[]
const headers = parseResult.meta.fields || []
let content = ''
if (headers.length > 0) {
const cleanHeaders = headers.map((h) => sanitizeTextForUTF8(String(h)))
content += `${cleanHeaders.join(', ')}\n`
}
results.forEach((row) => {
const cleanValues = Object.values(row).map((v) => sanitizeTextForUTF8(String(v || '')))
content += `${cleanValues.join(', ')}\n`
})
return {
content: sanitizeTextForUTF8(content),
metadata: {
rowCount: results.length,
headers: headers,
rawData: results,
},
}
} catch (error) {
logger.error('CSV general error:', error)
throw new Error(`Failed to process CSV file: ${(error as Error).message}`)
if (!filePath) {
throw new Error('No file path provided')
}
if (!existsSync(filePath)) {
throw new Error(`File not found: ${filePath}`)
}
const stream = createReadStream(filePath, {
highWaterMark: CONFIG.STREAM_CHUNK_SIZE,
})
return this.parseStream(stream)
}
async parseBuffer(buffer: Buffer): Promise<FileParseResult> {
try {
logger.info('Parsing buffer, size:', buffer.length)
const bufferSize = buffer.length
logger.info(
`Parsing CSV buffer, size: ${bufferSize} bytes (${(bufferSize / 1024 / 1024).toFixed(2)} MB)`
)
const fileContent = buffer.toString('utf8')
const stream = Readable.from(buffer, {
highWaterMark: CONFIG.STREAM_CHUNK_SIZE,
})
const parseResult = Papa.parse(fileContent, PARSE_OPTIONS)
return this.parseStream(stream)
}
if (parseResult.errors && parseResult.errors.length > 0) {
const errorMessages = parseResult.errors.map((err) => err.message).join(', ')
logger.error('CSV parsing errors:', parseResult.errors)
throw new Error(`Failed to parse CSV buffer: ${errorMessages}`)
private parseStream(inputStream: NodeJS.ReadableStream): Promise<FileParseResult> {
return new Promise((resolve, reject) => {
let rowCount = 0
let errorCount = 0
let headers: string[] = []
let processedContent = ''
const sampledRows: any[] = []
const errors: string[] = []
let firstRowProcessed = false
let aborted = false
const parserOptions: Options = {
columns: true, // Use first row as headers
skip_empty_lines: true, // Skip empty lines
trim: true, // Trim whitespace
relax_column_count: true, // Allow variable column counts
relax_quotes: true, // Be lenient with quotes
skip_records_with_error: true, // Skip bad records
raw: false,
cast: false,
}
const parser = parse(parserOptions)
const results = parseResult.data as Record<string, any>[]
const headers = parseResult.meta.fields || []
parser.on('readable', () => {
let record
while ((record = parser.read()) !== null && !aborted) {
rowCount++
let content = ''
if (!firstRowProcessed && record) {
headers = Object.keys(record).map((h) => sanitizeTextForUTF8(String(h)))
processedContent = `${headers.join(', ')}\n`
firstRowProcessed = true
}
if (headers.length > 0) {
const cleanHeaders = headers.map((h) => sanitizeTextForUTF8(String(h)))
content += `${cleanHeaders.join(', ')}\n`
}
if (rowCount <= CONFIG.MAX_PREVIEW_ROWS) {
try {
const cleanValues = Object.values(record).map((v: any) =>
sanitizeTextForUTF8(String(v || ''))
)
processedContent += `${cleanValues.join(', ')}\n`
results.forEach((row) => {
const cleanValues = Object.values(row).map((v) => sanitizeTextForUTF8(String(v || '')))
content += `${cleanValues.join(', ')}\n`
if (rowCount <= CONFIG.MAX_SAMPLE_ROWS) {
sampledRows.push(record)
}
} catch (err) {
logger.warn(`Error processing row ${rowCount}:`, err)
}
}
if (rowCount % 10000 === 0) {
logger.info(`Processed ${rowCount} rows...`)
}
}
})
return {
content: sanitizeTextForUTF8(content),
metadata: {
rowCount: results.length,
headers: headers,
rawData: results,
},
}
} catch (error) {
logger.error('CSV buffer parsing error:', error)
throw new Error(`Failed to process CSV buffer: ${(error as Error).message}`)
}
parser.on('skip', (err: any) => {
errorCount++
if (errorCount <= 5) {
const errorMsg = `Row ${err.lines || rowCount}: ${err.message || 'Unknown error'}`
errors.push(errorMsg)
logger.warn('CSV skip:', errorMsg)
}
if (errorCount >= CONFIG.MAX_ERRORS) {
aborted = true
parser.destroy()
reject(new Error(`Too many errors (${errorCount}). File may be corrupted.`))
}
})
parser.on('error', (err: Error) => {
logger.error('CSV parser error:', err)
reject(new Error(`CSV parsing failed: ${err.message}`))
})
parser.on('end', () => {
if (!aborted) {
if (rowCount > CONFIG.MAX_PREVIEW_ROWS) {
processedContent += `\n[... ${rowCount.toLocaleString()} total rows, showing first ${CONFIG.MAX_PREVIEW_ROWS} ...]\n`
}
logger.info(`CSV parsing complete: ${rowCount} rows, ${errorCount} errors`)
resolve({
content: sanitizeTextForUTF8(processedContent),
metadata: {
rowCount,
headers,
errorCount,
errors: errors.slice(0, 10),
truncated: rowCount > CONFIG.MAX_PREVIEW_ROWS,
sampledData: sampledRows,
},
})
}
})
inputStream.on('error', (err) => {
logger.error('Input stream error:', err)
parser.destroy()
reject(new Error(`Stream error: ${err.message}`))
})
inputStream.pipe(parser)
})
}
}

View File

@@ -27,8 +27,9 @@ function getParserInstances(): Record<string, FileParser> {
try {
const { CsvParser } = require('@/lib/file-parsers/csv-parser')
parserInstances.csv = new CsvParser()
logger.info('Loaded streaming CSV parser with csv-parse library')
} catch (error) {
logger.error('Failed to load CSV parser:', error)
logger.error('Failed to load streaming CSV parser:', error)
}
try {
@@ -63,6 +64,7 @@ function getParserInstances(): Record<string, FileParser> {
const { XlsxParser } = require('@/lib/file-parsers/xlsx-parser')
parserInstances.xlsx = new XlsxParser()
parserInstances.xls = new XlsxParser()
logger.info('Loaded XLSX parser')
} catch (error) {
logger.error('Failed to load XLSX parser:', error)
}
@@ -82,6 +84,32 @@ function getParserInstances(): Record<string, FileParser> {
} catch (error) {
logger.error('Failed to load HTML parser:', error)
}
try {
const { parseJSON, parseJSONBuffer } = require('@/lib/file-parsers/json-parser')
parserInstances.json = {
parseFile: parseJSON,
parseBuffer: parseJSONBuffer,
}
logger.info('Loaded JSON parser')
} catch (error) {
logger.error('Failed to load JSON parser:', error)
}
try {
const { parseYAML, parseYAMLBuffer } = require('@/lib/file-parsers/yaml-parser')
parserInstances.yaml = {
parseFile: parseYAML,
parseBuffer: parseYAMLBuffer,
}
parserInstances.yml = {
parseFile: parseYAML,
parseBuffer: parseYAMLBuffer,
}
logger.info('Loaded YAML parser')
} catch (error) {
logger.error('Failed to load YAML parser:', error)
}
} catch (error) {
logger.error('Error loading file parsers:', error)
}

View File

@@ -0,0 +1,74 @@
import type { FileParseResult } from './types'
/**
* Parse JSON files
*/
export async function parseJSON(filePath: string): Promise<FileParseResult> {
const fs = await import('fs/promises')
const content = await fs.readFile(filePath, 'utf-8')
try {
// Parse to validate JSON
const jsonData = JSON.parse(content)
// Return pretty-printed JSON for better readability
const formattedContent = JSON.stringify(jsonData, null, 2)
// Extract metadata about the JSON structure
const metadata = {
type: 'json',
isArray: Array.isArray(jsonData),
keys: Array.isArray(jsonData) ? [] : Object.keys(jsonData),
itemCount: Array.isArray(jsonData) ? jsonData.length : undefined,
depth: getJsonDepth(jsonData),
}
return {
content: formattedContent,
metadata,
}
} catch (error) {
throw new Error(`Invalid JSON: ${error instanceof Error ? error.message : 'Unknown error'}`)
}
}
/**
* Parse JSON from buffer
*/
export async function parseJSONBuffer(buffer: Buffer): Promise<FileParseResult> {
const content = buffer.toString('utf-8')
try {
const jsonData = JSON.parse(content)
const formattedContent = JSON.stringify(jsonData, null, 2)
const metadata = {
type: 'json',
isArray: Array.isArray(jsonData),
keys: Array.isArray(jsonData) ? [] : Object.keys(jsonData),
itemCount: Array.isArray(jsonData) ? jsonData.length : undefined,
depth: getJsonDepth(jsonData),
}
return {
content: formattedContent,
metadata,
}
} catch (error) {
throw new Error(`Invalid JSON: ${error instanceof Error ? error.message : 'Unknown error'}`)
}
}
/**
* Calculate the depth of a JSON object
*/
function getJsonDepth(obj: any): number {
if (obj === null || typeof obj !== 'object') return 0
if (Array.isArray(obj)) {
return obj.length > 0 ? 1 + Math.max(...obj.map(getJsonDepth)) : 1
}
const depths = Object.values(obj).map(getJsonDepth)
return depths.length > 0 ? 1 + Math.max(...depths) : 1
}

View File

@@ -6,6 +6,15 @@ import { createLogger } from '@/lib/logs/console/logger'
const logger = createLogger('XlsxParser')
// Configuration for handling large XLSX files
const CONFIG = {
MAX_PREVIEW_ROWS: 1000, // Only keep first 1000 rows for preview
MAX_SAMPLE_ROWS: 100, // Sample for metadata
ROWS_PER_CHUNK: 50, // Aggregate 50 rows per chunk to reduce chunk count
MAX_CELL_LENGTH: 1000, // Truncate very long cell values
MAX_CONTENT_SIZE: 10 * 1024 * 1024, // 10MB max content size
}
export class XlsxParser implements FileParser {
async parseFile(filePath: string): Promise<FileParseResult> {
try {
@@ -19,7 +28,12 @@ export class XlsxParser implements FileParser {
logger.info(`Parsing XLSX file: ${filePath}`)
const workbook = XLSX.readFile(filePath)
// Read with streaming option for large files
const workbook = XLSX.readFile(filePath, {
dense: true, // Use dense mode for better memory efficiency
sheetStubs: false, // Don't create stub cells
})
return this.processWorkbook(workbook)
} catch (error) {
logger.error('XLSX file parsing error:', error)
@@ -29,13 +43,21 @@ export class XlsxParser implements FileParser {
async parseBuffer(buffer: Buffer): Promise<FileParseResult> {
try {
logger.info('Parsing XLSX buffer, size:', buffer.length)
const bufferSize = buffer.length
logger.info(
`Parsing XLSX buffer, size: ${bufferSize} bytes (${(bufferSize / 1024 / 1024).toFixed(2)} MB)`
)
if (!buffer || buffer.length === 0) {
throw new Error('Empty buffer provided')
}
const workbook = XLSX.read(buffer, { type: 'buffer' })
const workbook = XLSX.read(buffer, {
type: 'buffer',
dense: true, // Use dense mode for better memory efficiency
sheetStubs: false, // Don't create stub cells
})
return this.processWorkbook(workbook)
} catch (error) {
logger.error('XLSX buffer parsing error:', error)
@@ -45,44 +67,111 @@ export class XlsxParser implements FileParser {
private processWorkbook(workbook: XLSX.WorkBook): FileParseResult {
const sheetNames = workbook.SheetNames
const sheets: Record<string, any[]> = {}
let content = ''
let totalRows = 0
let truncated = false
let contentSize = 0
const sampledData: any[] = []
for (const sheetName of sheetNames) {
const worksheet = workbook.Sheets[sheetName]
const sheetData = XLSX.utils.sheet_to_json(worksheet, { header: 1 })
sheets[sheetName] = sheetData
totalRows += sheetData.length
// Get sheet dimensions
const range = XLSX.utils.decode_range(worksheet['!ref'] || 'A1')
const rowCount = range.e.r - range.s.r + 1
logger.info(`Processing sheet: ${sheetName} with ${rowCount} rows`)
// Convert to JSON with header row
const sheetData = XLSX.utils.sheet_to_json(worksheet, {
header: 1,
defval: '', // Default value for empty cells
blankrows: false, // Skip blank rows
})
const actualRowCount = sheetData.length
totalRows += actualRowCount
// Store limited sample for metadata
if (sampledData.length < CONFIG.MAX_SAMPLE_ROWS) {
const sampleSize = Math.min(CONFIG.MAX_SAMPLE_ROWS - sampledData.length, actualRowCount)
sampledData.push(...sheetData.slice(0, sampleSize))
}
// Only process limited rows for preview
const rowsToProcess = Math.min(actualRowCount, CONFIG.MAX_PREVIEW_ROWS)
const cleanSheetName = sanitizeTextForUTF8(sheetName)
content += `Sheet: ${cleanSheetName}\n`
content += `=${'='.repeat(cleanSheetName.length + 6)}\n\n`
if (sheetData.length > 0) {
sheetData.forEach((row: unknown, rowIndex: number) => {
if (Array.isArray(row) && row.length > 0) {
const rowString = row
.map((cell) => {
if (cell === null || cell === undefined) {
return ''
}
return sanitizeTextForUTF8(String(cell))
})
.join('\t')
// Add sheet header
const sheetHeader = `\n=== Sheet: ${cleanSheetName} ===\n`
content += sheetHeader
contentSize += sheetHeader.length
content += `${rowString}\n`
if (actualRowCount > 0) {
// Get headers if available
const headers = sheetData[0] as any[]
if (headers && headers.length > 0) {
const headerRow = headers.map((h) => this.truncateCell(h)).join('\t')
content += `${headerRow}\n`
content += `${'-'.repeat(Math.min(80, headerRow.length))}\n`
contentSize += headerRow.length + 82
}
// Process data rows in chunks
let chunkContent = ''
let chunkRowCount = 0
for (let i = 1; i < rowsToProcess; i++) {
const row = sheetData[i] as any[]
if (row && row.length > 0) {
const rowString = row.map((cell) => this.truncateCell(cell)).join('\t')
chunkContent += `${rowString}\n`
chunkRowCount++
// Add chunk separator every N rows for better readability
if (chunkRowCount >= CONFIG.ROWS_PER_CHUNK) {
content += chunkContent
contentSize += chunkContent.length
chunkContent = ''
chunkRowCount = 0
// Check content size limit
if (contentSize > CONFIG.MAX_CONTENT_SIZE) {
truncated = true
break
}
}
}
})
}
// Add remaining chunk content
if (chunkContent && contentSize < CONFIG.MAX_CONTENT_SIZE) {
content += chunkContent
contentSize += chunkContent.length
}
// Add truncation notice if needed
if (actualRowCount > rowsToProcess) {
const notice = `\n[... ${actualRowCount.toLocaleString()} total rows, showing first ${rowsToProcess.toLocaleString()} ...]\n`
content += notice
truncated = true
}
} else {
content += '[Empty sheet]\n'
}
content += '\n'
// Stop processing if content is too large
if (contentSize > CONFIG.MAX_CONTENT_SIZE) {
content += '\n[... Content truncated due to size limits ...]\n'
truncated = true
break
}
}
logger.info(`XLSX parsing completed: ${sheetNames.length} sheets, ${totalRows} total rows`)
logger.info(
`XLSX parsing completed: ${sheetNames.length} sheets, ${totalRows} total rows, truncated: ${truncated}`
)
const cleanContent = sanitizeTextForUTF8(content).trim()
@@ -92,8 +181,25 @@ export class XlsxParser implements FileParser {
sheetCount: sheetNames.length,
sheetNames: sheetNames,
totalRows: totalRows,
sheets: sheets,
truncated: truncated,
sampledData: sampledData.slice(0, CONFIG.MAX_SAMPLE_ROWS),
contentSize: contentSize,
},
}
}
private truncateCell(cell: any): string {
if (cell === null || cell === undefined) {
return ''
}
let cellStr = String(cell)
// Truncate very long cells
if (cellStr.length > CONFIG.MAX_CELL_LENGTH) {
cellStr = `${cellStr.substring(0, CONFIG.MAX_CELL_LENGTH)}...`
}
return sanitizeTextForUTF8(cellStr)
}
}

View File

@@ -0,0 +1,75 @@
import * as yaml from 'js-yaml'
import type { FileParseResult } from './types'
/**
* Parse YAML files
*/
export async function parseYAML(filePath: string): Promise<FileParseResult> {
const fs = await import('fs/promises')
const content = await fs.readFile(filePath, 'utf-8')
try {
// Parse YAML to validate and extract structure
const yamlData = yaml.load(content)
// Convert to JSON for consistent processing
const jsonContent = JSON.stringify(yamlData, null, 2)
// Extract metadata about the YAML structure
const metadata = {
type: 'yaml',
isArray: Array.isArray(yamlData),
keys: Array.isArray(yamlData) ? [] : Object.keys(yamlData || {}),
itemCount: Array.isArray(yamlData) ? yamlData.length : undefined,
depth: getYamlDepth(yamlData),
}
return {
content: jsonContent,
metadata,
}
} catch (error) {
throw new Error(`Invalid YAML: ${error instanceof Error ? error.message : 'Unknown error'}`)
}
}
/**
* Parse YAML from buffer
*/
export async function parseYAMLBuffer(buffer: Buffer): Promise<FileParseResult> {
const content = buffer.toString('utf-8')
try {
const yamlData = yaml.load(content)
const jsonContent = JSON.stringify(yamlData, null, 2)
const metadata = {
type: 'yaml',
isArray: Array.isArray(yamlData),
keys: Array.isArray(yamlData) ? [] : Object.keys(yamlData || {}),
itemCount: Array.isArray(yamlData) ? yamlData.length : undefined,
depth: getYamlDepth(yamlData),
}
return {
content: jsonContent,
metadata,
}
} catch (error) {
throw new Error(`Invalid YAML: ${error instanceof Error ? error.message : 'Unknown error'}`)
}
}
/**
* Calculate the depth of a YAML/JSON object
*/
function getYamlDepth(obj: any): number {
if (obj === null || typeof obj !== 'object') return 0
if (Array.isArray(obj)) {
return obj.length > 0 ? 1 + Math.max(...obj.map(getYamlDepth)) : 1
}
const depths = Object.values(obj).map(getYamlDepth)
return depths.length > 0 ? 1 + Math.max(...depths) : 1
}

View File

@@ -1,6 +1,6 @@
import { type Chunk, JsonYamlChunker, StructuredDataChunker, TextChunker } from '@/lib/chunkers'
import { env } from '@/lib/env'
import { parseBuffer, parseFile } from '@/lib/file-parsers'
import { type Chunk, TextChunker } from '@/lib/knowledge/documents/chunker'
import { retryWithExponentialBackoff } from '@/lib/knowledge/documents/utils'
import { createLogger } from '@/lib/logs/console/logger'
import {
@@ -15,8 +15,8 @@ import { mistralParserTool } from '@/tools/mistral/parser'
const logger = createLogger('DocumentProcessor')
const TIMEOUTS = {
FILE_DOWNLOAD: 60000,
MISTRAL_OCR_API: 90000,
FILE_DOWNLOAD: 180000,
MISTRAL_OCR_API: 120000,
} as const
type OCRResult = {
@@ -97,8 +97,32 @@ export async function processDocument(
const { content, processingMethod } = parseResult
const cloudUrl = 'cloudUrl' in parseResult ? parseResult.cloudUrl : undefined
const chunker = new TextChunker({ chunkSize, overlap: chunkOverlap, minChunkSize })
const chunks = await chunker.chunk(content)
let chunks: Chunk[]
const metadata = 'metadata' in parseResult ? parseResult.metadata : {}
const isJsonYaml =
metadata.type === 'json' ||
metadata.type === 'yaml' ||
mimeType.includes('json') ||
mimeType.includes('yaml')
if (isJsonYaml && JsonYamlChunker.isStructuredData(content)) {
logger.info('Using JSON/YAML chunker for structured data')
chunks = await JsonYamlChunker.chunkJsonYaml(content, {
chunkSize,
minChunkSize,
})
} else if (StructuredDataChunker.isStructuredData(content, mimeType)) {
logger.info('Using structured data chunker for spreadsheet/CSV content')
chunks = await StructuredDataChunker.chunkStructuredData(content, {
headers: metadata.headers,
totalRows: metadata.totalRows || metadata.rowCount,
sheetName: metadata.sheetNames?.[0],
})
} else {
const chunker = new TextChunker({ chunkSize, overlap: chunkOverlap, minChunkSize })
chunks = await chunker.chunk(content)
}
const characterCount = content.length
const tokenCount = chunks.reduce((sum, chunk) => sum + chunk.tokenCount, 0)
@@ -132,22 +156,23 @@ async function parseDocument(
content: string
processingMethod: 'file-parser' | 'mistral-ocr'
cloudUrl?: string
metadata?: any
}> {
const isPDF = mimeType === 'application/pdf'
const hasAzureMistralOCR =
env.OCR_AZURE_API_KEY && env.OCR_AZURE_ENDPOINT && env.OCR_AZURE_MODEL_NAME
const hasMistralOCR = env.MISTRAL_API_KEY
// Check Azure Mistral OCR configuration
if (isPDF && (hasAzureMistralOCR || hasMistralOCR)) {
if (hasAzureMistralOCR) {
logger.info(`Using Azure Mistral OCR: ${filename}`)
return parseWithAzureMistralOCR(fileUrl, filename, mimeType)
}
if (isPDF && hasAzureMistralOCR) {
logger.info(`Using Azure Mistral OCR: ${filename}`)
return parseWithAzureMistralOCR(fileUrl, filename, mimeType)
}
if (isPDF && hasMistralOCR) {
logger.info(`Using Mistral OCR: ${filename}`)
return parseWithMistralOCR(fileUrl, filename, mimeType)
if (hasMistralOCR) {
logger.info(`Using Mistral OCR: ${filename}`)
return parseWithMistralOCR(fileUrl, filename, mimeType)
}
}
logger.info(`Using file parser: ${filename}`)
@@ -200,9 +225,7 @@ async function downloadFileWithTimeout(fileUrl: string): Promise<Buffer> {
}
async function downloadFileForBase64(fileUrl: string): Promise<Buffer> {
// Handle different URL types for Azure Mistral OCR base64 requirement
if (fileUrl.startsWith('data:')) {
// Extract base64 data from data URI
const [, base64Data] = fileUrl.split(',')
if (!base64Data) {
throw new Error('Invalid data URI format')
@@ -210,10 +233,8 @@ async function downloadFileForBase64(fileUrl: string): Promise<Buffer> {
return Buffer.from(base64Data, 'base64')
}
if (fileUrl.startsWith('http')) {
// Download from HTTP(S) URL
return downloadFileWithTimeout(fileUrl)
}
// Local file - read it
const fs = await import('fs/promises')
return fs.readFile(fileUrl)
}
@@ -315,7 +336,6 @@ async function parseWithAzureMistralOCR(fileUrl: string, filename: string, mimeT
'Azure Mistral OCR'
)
// Azure Mistral OCR accepts data URIs with base64 content
const fileBuffer = await downloadFileForBase64(fileUrl)
const base64Data = fileBuffer.toString('base64')
const dataUri = `data:${mimeType};base64,${base64Data}`
@@ -409,21 +429,25 @@ async function parseWithMistralOCR(fileUrl: string, filename: string, mimeType:
async function parseWithFileParser(fileUrl: string, filename: string, mimeType: string) {
try {
let content: string
let metadata: any = {}
if (fileUrl.startsWith('data:')) {
content = await parseDataURI(fileUrl, filename, mimeType)
} else if (fileUrl.startsWith('http')) {
content = await parseHttpFile(fileUrl, filename)
const result = await parseHttpFile(fileUrl, filename)
content = result.content
metadata = result.metadata || {}
} else {
const result = await parseFile(fileUrl)
content = result.content
metadata = result.metadata || {}
}
if (!content.trim()) {
throw new Error('File parser returned empty content')
}
return { content, processingMethod: 'file-parser' as const, cloudUrl: undefined }
return { content, processingMethod: 'file-parser' as const, cloudUrl: undefined, metadata }
} catch (error) {
logger.error(`File parser failed for ${filename}:`, error)
throw error
@@ -448,7 +472,10 @@ async function parseDataURI(fileUrl: string, filename: string, mimeType: string)
return result.content
}
async function parseHttpFile(fileUrl: string, filename: string): Promise<string> {
async function parseHttpFile(
fileUrl: string,
filename: string
): Promise<{ content: string; metadata?: any }> {
const buffer = await downloadFileWithTimeout(fileUrl)
const extension = filename.split('.').pop()?.toLowerCase()
@@ -457,5 +484,5 @@ async function parseHttpFile(fileUrl: string, filename: string): Promise<string>
}
const result = await parseBuffer(buffer, extension)
return result.content
return result
}

View File

@@ -17,10 +17,18 @@ import type { DocumentSortField, SortOrder } from './types'
const logger = createLogger('DocumentService')
const TIMEOUTS = {
OVERALL_PROCESSING: (env.KB_CONFIG_MAX_DURATION || 300) * 1000,
OVERALL_PROCESSING: (env.KB_CONFIG_MAX_DURATION || 600) * 1000, // Increased to 10 minutes to match Trigger's timeout
EMBEDDINGS_API: (env.KB_CONFIG_MAX_TIMEOUT || 10000) * 18,
} as const
// Configuration for handling large documents
const LARGE_DOC_CONFIG = {
MAX_CHUNKS_PER_BATCH: 500, // Insert embeddings in batches of 500
MAX_EMBEDDING_BATCH: 50, // Generate embeddings in batches of 50
MAX_FILE_SIZE: 100 * 1024 * 1024, // 100MB max file size
MAX_CHUNKS_PER_DOCUMENT: 100000, // Maximum chunks allowed per document
}
/**
* Create a timeout wrapper for async operations
*/
@@ -448,14 +456,38 @@ export async function processDocumentAsync(
processingOptions.minCharactersPerChunk || 1
)
if (processed.chunks.length > LARGE_DOC_CONFIG.MAX_CHUNKS_PER_DOCUMENT) {
throw new Error(
`Document has ${processed.chunks.length.toLocaleString()} chunks, exceeding maximum of ${LARGE_DOC_CONFIG.MAX_CHUNKS_PER_DOCUMENT.toLocaleString()}. ` +
`This document is unusually large and may need to be split into multiple files or preprocessed to reduce content.`
)
}
const now = new Date()
logger.info(
`[${documentId}] Document parsed successfully, generating embeddings for ${processed.chunks.length} chunks`
)
// Generate embeddings in batches for large documents
const chunkTexts = processed.chunks.map((chunk) => chunk.text)
const embeddings = chunkTexts.length > 0 ? await generateEmbeddings(chunkTexts) : []
const embeddings: number[][] = []
if (chunkTexts.length > 0) {
const batchSize = LARGE_DOC_CONFIG.MAX_EMBEDDING_BATCH
const totalBatches = Math.ceil(chunkTexts.length / batchSize)
logger.info(`[${documentId}] Generating embeddings in ${totalBatches} batches`)
for (let i = 0; i < chunkTexts.length; i += batchSize) {
const batch = chunkTexts.slice(i, i + batchSize)
const batchNum = Math.floor(i / batchSize) + 1
logger.info(`[${documentId}] Processing embedding batch ${batchNum}/${totalBatches}`)
const batchEmbeddings = await generateEmbeddings(batch)
embeddings.push(...batchEmbeddings)
}
}
logger.info(`[${documentId}] Embeddings generated, fetching document tags`)
@@ -503,8 +535,24 @@ export async function processDocumentAsync(
}))
await db.transaction(async (tx) => {
// Insert embeddings in batches for large documents
if (embeddingRecords.length > 0) {
await tx.insert(embedding).values(embeddingRecords)
const batchSize = LARGE_DOC_CONFIG.MAX_CHUNKS_PER_BATCH
const totalBatches = Math.ceil(embeddingRecords.length / batchSize)
logger.info(
`[${documentId}] Inserting ${embeddingRecords.length} embeddings in ${totalBatches} batches`
)
for (let i = 0; i < embeddingRecords.length; i += batchSize) {
const batch = embeddingRecords.slice(i, i + batchSize)
const batchNum = Math.floor(i / batchSize) + 1
await tx.insert(embedding).values(batch)
logger.info(
`[${documentId}] Inserted batch ${batchNum}/${totalBatches} (${batch.length} records)`
)
}
}
await tx

View File

@@ -1,2 +1,2 @@
export const SIM_AGENT_API_URL_DEFAULT = 'https://copilot.sim.ai'
export const SIM_AGENT_VERSION = '1.0.0'
export const SIM_AGENT_VERSION = '1.0.1'

View File

@@ -15,6 +15,9 @@ export const SUPPORTED_DOCUMENT_EXTENSIONS = [
'pptx',
'html',
'htm',
'json',
'yaml',
'yml',
] as const
export type SupportedDocumentExtension = (typeof SUPPORTED_DOCUMENT_EXTENSIONS)[number]
@@ -46,6 +49,9 @@ export const SUPPORTED_MIME_TYPES: Record<SupportedDocumentExtension, string[]>
],
html: ['text/html', 'application/xhtml+xml'],
htm: ['text/html', 'application/xhtml+xml'],
json: ['application/json', 'text/json', 'application/x-json'],
yaml: ['text/yaml', 'text/x-yaml', 'application/yaml', 'application/x-yaml'],
yml: ['text/yaml', 'text/x-yaml', 'application/yaml', 'application/x-yaml'],
}
export const ACCEPTED_FILE_TYPES = Object.values(SUPPORTED_MIME_TYPES).flat()

View File

@@ -4,6 +4,9 @@ import { assignLayers, groupByLayer } from './layering'
import { calculatePositions } from './positioning'
import type { Edge, LayoutOptions } from './types'
import {
CONTAINER_PADDING,
CONTAINER_PADDING_X,
CONTAINER_PADDING_Y,
DEFAULT_CONTAINER_HEIGHT,
DEFAULT_CONTAINER_WIDTH,
getBlocksByParent,
@@ -12,10 +15,6 @@ import {
const logger = createLogger('AutoLayout:Containers')
const CONTAINER_PADDING = 150
const CONTAINER_HORIZONTAL_PADDING = 180
const CONTAINER_VERTICAL_PADDING = 100
export function layoutContainers(
blocks: Record<string, BlockState>,
edges: Edge[],
@@ -26,7 +25,7 @@ export function layoutContainers(
const containerOptions: LayoutOptions = {
horizontalSpacing: options.horizontalSpacing ? options.horizontalSpacing * 0.85 : 400,
verticalSpacing: options.verticalSpacing ? options.verticalSpacing : 200,
padding: { x: CONTAINER_HORIZONTAL_PADDING, y: CONTAINER_VERTICAL_PADDING },
padding: { x: CONTAINER_PADDING_X, y: CONTAINER_PADDING_Y },
alignment: options.alignment,
}
@@ -68,8 +67,8 @@ export function layoutContainers(
}
// Adjust all child positions to start at proper padding from container edges
const xOffset = CONTAINER_HORIZONTAL_PADDING - minX
const yOffset = CONTAINER_VERTICAL_PADDING - minY
const xOffset = CONTAINER_PADDING_X - minX
const yOffset = CONTAINER_PADDING_Y - minY
for (const node of childNodes.values()) {
childBlocks[node.id].position = {

View File

@@ -100,4 +100,6 @@ export function adjustForNewBlock(
}
export type { LayoutOptions, LayoutResult, AdjustmentOptions, Edge, Loop, Parallel }
export type { TargetedLayoutOptions } from './targeted'
export { applyTargetedLayout, transferBlockHeights } from './targeted'
export { getBlockMetrics, isContainerType } from './utils'

View File

@@ -0,0 +1,352 @@
import { createLogger } from '@/lib/logs/console/logger'
import type { BlockState } from '@/stores/workflows/workflow/types'
import { assignLayers, groupByLayer } from './layering'
import { calculatePositions } from './positioning'
import type { Edge, LayoutOptions } from './types'
import {
CONTAINER_PADDING,
CONTAINER_PADDING_X,
CONTAINER_PADDING_Y,
DEFAULT_CONTAINER_HEIGHT,
DEFAULT_CONTAINER_WIDTH,
getBlockMetrics,
getBlocksByParent,
isContainerType,
prepareBlockMetrics,
ROOT_PADDING_X,
ROOT_PADDING_Y,
} from './utils'
const logger = createLogger('AutoLayout:Targeted')
export interface TargetedLayoutOptions extends LayoutOptions {
changedBlockIds: string[]
verticalSpacing?: number
horizontalSpacing?: number
}
export function applyTargetedLayout(
blocks: Record<string, BlockState>,
edges: Edge[],
options: TargetedLayoutOptions
): Record<string, BlockState> {
const { changedBlockIds, verticalSpacing = 200, horizontalSpacing = 550 } = options
if (!changedBlockIds || changedBlockIds.length === 0) {
return blocks
}
const changedSet = new Set(changedBlockIds)
const blocksCopy: Record<string, BlockState> = JSON.parse(JSON.stringify(blocks))
const groups = getBlocksByParent(blocksCopy)
layoutGroup(null, groups.root, blocksCopy, edges, changedSet, verticalSpacing, horizontalSpacing)
for (const [parentId, childIds] of groups.children.entries()) {
layoutGroup(
parentId,
childIds,
blocksCopy,
edges,
changedSet,
verticalSpacing,
horizontalSpacing
)
}
return blocksCopy
}
function layoutGroup(
parentId: string | null,
childIds: string[],
blocks: Record<string, BlockState>,
edges: Edge[],
changedSet: Set<string>,
verticalSpacing: number,
horizontalSpacing: number
): void {
if (childIds.length === 0) return
const parentBlock = parentId ? blocks[parentId] : undefined
const requestedLayout = childIds.filter((id) => {
const block = blocks[id]
if (!block) return false
// Never reposition containers, only update their dimensions
if (isContainerType(block.type)) return false
return changedSet.has(id)
})
const missingPositions = childIds.filter((id) => {
const block = blocks[id]
if (!block) return false
// Containers with missing positions should still get positioned
return !hasPosition(block)
})
const needsLayoutSet = new Set([...requestedLayout, ...missingPositions])
const needsLayout = Array.from(needsLayoutSet)
if (parentBlock) {
updateContainerDimensions(parentBlock, childIds, blocks)
}
// Always update container dimensions even if no blocks need repositioning
// This ensures containers resize properly when children are added/removed
if (needsLayout.length === 0) {
return
}
const oldPositions = new Map<string, { x: number; y: number }>()
for (const id of childIds) {
const block = blocks[id]
if (!block) continue
oldPositions.set(id, { ...block.position })
}
const layoutPositions = computeLayoutPositions(
childIds,
blocks,
edges,
parentBlock,
horizontalSpacing,
verticalSpacing
)
if (layoutPositions.size === 0) {
// No layout positions computed, but still update container dimensions
if (parentBlock) {
updateContainerDimensions(parentBlock, childIds, blocks)
}
return
}
let offsetX = 0
let offsetY = 0
const anchorId = childIds.find((id) => !needsLayout.includes(id) && layoutPositions.has(id))
if (anchorId) {
const oldPos = oldPositions.get(anchorId)
const newPos = layoutPositions.get(anchorId)
if (oldPos && newPos) {
offsetX = oldPos.x - newPos.x
offsetY = oldPos.y - newPos.y
}
} else {
// No anchor - positions from calculatePositions are already correct relative to padding
// Container positions are parent-relative, root positions are absolute
// The normalization in computeLayoutPositions already handled the padding offset
offsetX = 0
offsetY = 0
}
for (const id of needsLayout) {
const block = blocks[id]
const newPos = layoutPositions.get(id)
if (!block || !newPos) continue
block.position = {
x: newPos.x + offsetX,
y: newPos.y + offsetY,
}
}
}
function computeLayoutPositions(
childIds: string[],
blocks: Record<string, BlockState>,
edges: Edge[],
parentBlock: BlockState | undefined,
horizontalSpacing: number,
verticalSpacing: number
): Map<string, { x: number; y: number }> {
const subsetBlocks: Record<string, BlockState> = {}
for (const id of childIds) {
subsetBlocks[id] = blocks[id]
}
const subsetEdges = edges.filter(
(edge) => childIds.includes(edge.source) && childIds.includes(edge.target)
)
if (Object.keys(subsetBlocks).length === 0) {
return new Map()
}
const nodes = assignLayers(subsetBlocks, subsetEdges)
prepareBlockMetrics(nodes)
const layoutOptions: LayoutOptions = parentBlock
? {
horizontalSpacing: horizontalSpacing * 0.85,
verticalSpacing,
padding: { x: CONTAINER_PADDING_X, y: CONTAINER_PADDING_Y },
alignment: 'center',
}
: {
horizontalSpacing,
verticalSpacing,
padding: { x: ROOT_PADDING_X, y: ROOT_PADDING_Y },
alignment: 'center',
}
calculatePositions(groupByLayer(nodes), layoutOptions)
// Now normalize positions to start from 0,0 relative to the container/root
let minX = Number.POSITIVE_INFINITY
let minY = Number.POSITIVE_INFINITY
let maxX = Number.NEGATIVE_INFINITY
let maxY = Number.NEGATIVE_INFINITY
for (const node of nodes.values()) {
minX = Math.min(minX, node.position.x)
minY = Math.min(minY, node.position.y)
maxX = Math.max(maxX, node.position.x + node.metrics.width)
maxY = Math.max(maxY, node.position.y + node.metrics.height)
}
// Adjust all positions to be relative to the padding offset
const xOffset = (parentBlock ? CONTAINER_PADDING_X : ROOT_PADDING_X) - minX
const yOffset = (parentBlock ? CONTAINER_PADDING_Y : ROOT_PADDING_Y) - minY
const positions = new Map<string, { x: number; y: number }>()
for (const node of nodes.values()) {
positions.set(node.id, {
x: node.position.x + xOffset,
y: node.position.y + yOffset,
})
}
if (parentBlock) {
const calculatedWidth = maxX - minX + CONTAINER_PADDING * 2
const calculatedHeight = maxY - minY + CONTAINER_PADDING * 2
parentBlock.data = {
...parentBlock.data,
width: Math.max(calculatedWidth, DEFAULT_CONTAINER_WIDTH),
height: Math.max(calculatedHeight, DEFAULT_CONTAINER_HEIGHT),
}
}
return positions
}
function getBounds(positions: Map<string, { x: number; y: number }>) {
let minX = Number.POSITIVE_INFINITY
let minY = Number.POSITIVE_INFINITY
for (const pos of positions.values()) {
minX = Math.min(minX, pos.x)
minY = Math.min(minY, pos.y)
}
return { minX, minY }
}
function updateContainerDimensions(
parentBlock: BlockState,
childIds: string[],
blocks: Record<string, BlockState>
): void {
if (childIds.length === 0) {
// No children - use minimum dimensions
parentBlock.data = {
...parentBlock.data,
width: DEFAULT_CONTAINER_WIDTH,
height: DEFAULT_CONTAINER_HEIGHT,
}
parentBlock.layout = {
...parentBlock.layout,
measuredWidth: DEFAULT_CONTAINER_WIDTH,
measuredHeight: DEFAULT_CONTAINER_HEIGHT,
}
return
}
let minX = Number.POSITIVE_INFINITY
let minY = Number.POSITIVE_INFINITY
let maxX = Number.NEGATIVE_INFINITY
let maxY = Number.NEGATIVE_INFINITY
for (const id of childIds) {
const child = blocks[id]
if (!child) continue
const metrics = getBlockMetrics(child)
minX = Math.min(minX, child.position.x)
minY = Math.min(minY, child.position.y)
maxX = Math.max(maxX, child.position.x + metrics.width)
maxY = Math.max(maxY, child.position.y + metrics.height)
}
if (!Number.isFinite(minX) || !Number.isFinite(minY)) {
return
}
// Match the regular autolayout's dimension calculation
const calculatedWidth = maxX - minX + CONTAINER_PADDING * 2
const calculatedHeight = maxY - minY + CONTAINER_PADDING * 2
parentBlock.data = {
...parentBlock.data,
width: Math.max(calculatedWidth, DEFAULT_CONTAINER_WIDTH),
height: Math.max(calculatedHeight, DEFAULT_CONTAINER_HEIGHT),
}
parentBlock.layout = {
...parentBlock.layout,
measuredWidth: parentBlock.data.width,
measuredHeight: parentBlock.data.height,
}
}
function hasPosition(block: BlockState): boolean {
if (!block.position) return false
const { x, y } = block.position
return Number.isFinite(x) && Number.isFinite(y)
}
/**
* Estimate block heights for diff view by using current workflow measurements
* This provides better height estimates than using default values
*/
export function transferBlockHeights(
sourceBlocks: Record<string, BlockState>,
targetBlocks: Record<string, BlockState>
): void {
// Build a map of block type+name to heights from source
const heightMap = new Map<string, { height: number; width: number; isWide: boolean }>()
for (const [id, block] of Object.entries(sourceBlocks)) {
const key = `${block.type}:${block.name}`
heightMap.set(key, {
height: block.height || 100,
width: block.layout?.measuredWidth || (block.isWide ? 480 : 350),
isWide: block.isWide || false,
})
}
// Transfer heights to target blocks
for (const block of Object.values(targetBlocks)) {
const key = `${block.type}:${block.name}`
const measurements = heightMap.get(key)
if (measurements) {
block.height = measurements.height
block.isWide = measurements.isWide
if (!block.layout) {
block.layout = {}
}
block.layout.measuredHeight = measurements.height
block.layout.measuredWidth = measurements.width
}
}
logger.debug('Transferred block heights from source workflow', {
sourceCount: Object.keys(sourceBlocks).length,
targetCount: Object.keys(targetBlocks).length,
heightsMapped: heightMap.size,
})
}

View File

@@ -9,6 +9,12 @@ export const DEFAULT_CONTAINER_WIDTH = 500
export const DEFAULT_CONTAINER_HEIGHT = 300
const DEFAULT_PADDING = 40
export const CONTAINER_PADDING = 150
export const CONTAINER_PADDING_X = 180
export const CONTAINER_PADDING_Y = 100
export const ROOT_PADDING_X = 150
export const ROOT_PADDING_Y = 150
function resolveNumeric(value: number | undefined, fallback: number): number {
return typeof value === 'number' && Number.isFinite(value) ? value : fallback
}

View File

@@ -1,16 +1,30 @@
import { getBlock } from '@/blocks'
import type { BlockConfig } from '@/blocks/types'
import { getTrigger } from '@/triggers'
/**
* Get the effective outputs for a block, including dynamic outputs from inputFormat
* and trigger outputs for blocks in trigger mode
*/
export function getBlockOutputs(
blockType: string,
subBlocks?: Record<string, any>
subBlocks?: Record<string, any>,
triggerMode?: boolean
): Record<string, any> {
const blockConfig = getBlock(blockType)
if (!blockConfig) return {}
// If block is in trigger mode, use trigger outputs instead of block outputs
if (triggerMode && blockConfig.triggers?.enabled) {
const triggerId = subBlocks?.triggerId?.value || blockConfig.triggers?.available?.[0]
if (triggerId) {
const trigger = getTrigger(triggerId)
if (trigger?.outputs) {
return trigger.outputs
}
}
}
// Start with the static outputs defined in the config
let outputs = { ...(blockConfig.outputs || {}) }
@@ -32,12 +46,20 @@ export function getBlockOutputs(
startWorkflowValue === 'manual'
) {
// API/manual mode - use inputFormat fields only
const inputFormatValue = subBlocks?.inputFormat?.value
let inputFormatValue = subBlocks?.inputFormat?.value
outputs = {}
if (
inputFormatValue !== null &&
inputFormatValue !== undefined &&
!Array.isArray(inputFormatValue)
) {
inputFormatValue = []
}
if (Array.isArray(inputFormatValue)) {
inputFormatValue.forEach((field: { name?: string; type?: string }) => {
if (field.name && field.name.trim() !== '') {
if (field?.name && field.name.trim() !== '') {
outputs[field.name] = {
type: (field.type || 'any') as any,
description: `Field from input format`,
@@ -52,7 +74,17 @@ export function getBlockOutputs(
// For blocks with inputFormat, add dynamic outputs
if (hasInputFormat(blockConfig) && subBlocks?.inputFormat?.value) {
const inputFormatValue = subBlocks.inputFormat.value
let inputFormatValue = subBlocks.inputFormat.value
// Sanitize inputFormat - ensure it's an array
if (
inputFormatValue !== null &&
inputFormatValue !== undefined &&
!Array.isArray(inputFormatValue)
) {
// Invalid format, default to empty array
inputFormatValue = []
}
if (Array.isArray(inputFormatValue)) {
// For API and Input triggers, only use inputFormat fields
@@ -61,7 +93,7 @@ export function getBlockOutputs(
// Add each field from inputFormat as an output at root level
inputFormatValue.forEach((field: { name?: string; type?: string }) => {
if (field.name && field.name.trim() !== '') {
if (field?.name && field.name.trim() !== '') {
outputs[field.name] = {
type: (field.type || 'any') as any,
description: `Field from input format`,
@@ -88,27 +120,66 @@ function hasInputFormat(blockConfig: BlockConfig): boolean {
/**
* Get output paths for a block (for tag dropdown)
*/
export function getBlockOutputPaths(blockType: string, subBlocks?: Record<string, any>): string[] {
const outputs = getBlockOutputs(blockType, subBlocks)
return Object.keys(outputs)
export function getBlockOutputPaths(
blockType: string,
subBlocks?: Record<string, any>,
triggerMode?: boolean
): string[] {
const outputs = getBlockOutputs(blockType, subBlocks, triggerMode)
// Recursively collect all paths from nested outputs
const paths: string[] = []
function collectPaths(obj: Record<string, any>, prefix = ''): void {
for (const [key, value] of Object.entries(obj)) {
const path = prefix ? `${prefix}.${key}` : key
// If value has 'type' property, it's a leaf node (output definition)
if (value && typeof value === 'object' && 'type' in value) {
paths.push(path)
}
// If value is an object without 'type', recurse into it
else if (value && typeof value === 'object' && !Array.isArray(value)) {
collectPaths(value, path)
}
// Otherwise treat as a leaf node
else {
paths.push(path)
}
}
}
collectPaths(outputs)
return paths
}
/**
* Get the type of a specific output path
* Get the type of a specific output path (supports nested paths like "email.subject")
*/
export function getBlockOutputType(
blockType: string,
outputPath: string,
subBlocks?: Record<string, any>
subBlocks?: Record<string, any>,
triggerMode?: boolean
): string {
const outputs = getBlockOutputs(blockType, subBlocks)
const output = outputs[outputPath]
const outputs = getBlockOutputs(blockType, subBlocks, triggerMode)
if (!output) return 'any'
// Navigate through nested path
const pathParts = outputPath.split('.')
let current: any = outputs
if (typeof output === 'object' && 'type' in output) {
return output.type
for (const part of pathParts) {
if (!current || typeof current !== 'object') {
return 'any'
}
current = current[part]
}
return typeof output === 'string' ? output : 'any'
if (!current) return 'any'
if (typeof current === 'object' && 'type' in current) {
return current.type
}
return typeof current === 'string' ? current : 'any'
}

View File

@@ -0,0 +1,203 @@
import { db } from '@sim/db'
import { customTools } from '@sim/db/schema'
import { eq } from 'drizzle-orm'
import { createLogger } from '@/lib/logs/console/logger'
const logger = createLogger('CustomToolsPersistence')
interface CustomTool {
id?: string
type: 'custom-tool'
title: string
toolId?: string
schema: {
function: {
name?: string
description: string
parameters: Record<string, any>
}
}
code: string
usageControl?: string
}
/**
* Extract all custom tools from agent blocks in the workflow state
*/
export function extractCustomToolsFromWorkflowState(workflowState: any): CustomTool[] {
const customToolsMap = new Map<string, CustomTool>()
if (!workflowState?.blocks) {
return []
}
for (const [blockId, block] of Object.entries(workflowState.blocks)) {
try {
const blockData = block as any
// Only process agent blocks
if (!blockData || blockData.type !== 'agent') {
continue
}
const subBlocks = blockData.subBlocks || {}
const toolsSubBlock = subBlocks.tools
if (!toolsSubBlock?.value) {
continue
}
let tools = toolsSubBlock.value
// Parse if it's a string
if (typeof tools === 'string') {
try {
tools = JSON.parse(tools)
} catch (error) {
logger.warn(`Failed to parse tools in block ${blockId}`, { error })
continue
}
}
if (!Array.isArray(tools)) {
continue
}
// Extract custom tools
for (const tool of tools) {
if (
tool &&
typeof tool === 'object' &&
tool.type === 'custom-tool' &&
tool.title &&
tool.schema?.function &&
tool.code
) {
// Use toolId if available, otherwise generate one from title
const toolKey = tool.toolId || tool.title
// Deduplicate by toolKey (if same tool appears in multiple blocks)
if (!customToolsMap.has(toolKey)) {
customToolsMap.set(toolKey, tool as CustomTool)
}
}
}
} catch (error) {
logger.error(`Error extracting custom tools from block ${blockId}`, { error })
}
}
return Array.from(customToolsMap.values())
}
/**
* Persist custom tools to the database
* Creates new tools or updates existing ones
*/
export async function persistCustomToolsToDatabase(
customToolsList: CustomTool[],
userId: string
): Promise<{ saved: number; errors: string[] }> {
if (!customToolsList || customToolsList.length === 0) {
return { saved: 0, errors: [] }
}
const errors: string[] = []
let saved = 0
try {
await db.transaction(async (tx) => {
for (const tool of customToolsList) {
try {
// Extract the base identifier (without 'custom_' prefix) for database storage
// If toolId exists and has the prefix, strip it; otherwise use title as base
let baseId: string
if (tool.toolId) {
baseId = tool.toolId.startsWith('custom_')
? tool.toolId.replace('custom_', '')
: tool.toolId
} else {
// Use title as the base identifier (agent handler will add 'custom_' prefix)
baseId = tool.title
}
const nowTime = new Date()
// Check if tool already exists
const existingTool = await tx
.select()
.from(customTools)
.where(eq(customTools.id, baseId))
.limit(1)
if (existingTool.length === 0) {
// Create new tool
await tx.insert(customTools).values({
id: baseId,
userId,
title: tool.title,
schema: tool.schema,
code: tool.code,
createdAt: nowTime,
updatedAt: nowTime,
})
logger.info(`Created custom tool: ${tool.title}`, { toolId: baseId })
saved++
} else if (existingTool[0].userId === userId) {
// Update existing tool if it belongs to the user
await tx
.update(customTools)
.set({
title: tool.title,
schema: tool.schema,
code: tool.code,
updatedAt: nowTime,
})
.where(eq(customTools.id, baseId))
logger.info(`Updated custom tool: ${tool.title}`, { toolId: baseId })
saved++
} else {
// Tool exists but belongs to different user - skip
logger.warn(`Skipping custom tool - belongs to different user: ${tool.title}`, {
toolId: baseId,
})
errors.push(`Tool ${tool.title} belongs to a different user`)
}
} catch (error) {
const errorMsg = `Failed to persist tool ${tool.title}: ${error instanceof Error ? error.message : String(error)}`
logger.error(errorMsg, { error })
errors.push(errorMsg)
}
}
})
} catch (error) {
const errorMsg = `Transaction failed while persisting custom tools: ${error instanceof Error ? error.message : String(error)}`
logger.error(errorMsg, { error })
errors.push(errorMsg)
}
return { saved, errors }
}
/**
* Extract and persist custom tools from workflow state in one operation
*/
export async function extractAndPersistCustomTools(
workflowState: any,
userId: string
): Promise<{ saved: number; errors: string[] }> {
const customToolsList = extractCustomToolsFromWorkflowState(workflowState)
if (customToolsList.length === 0) {
logger.debug('No custom tools found in workflow state')
return { saved: 0, errors: [] }
}
logger.info(`Found ${customToolsList.length} custom tool(s) to persist`, {
tools: customToolsList.map((t) => t.title),
})
return await persistCustomToolsToDatabase(customToolsList, userId)
}

View File

@@ -7,6 +7,206 @@ import type { BlockWithDiff } from './types'
const logger = createLogger('WorkflowDiffEngine')
type ParentIdentifier = string | null
function getParentId(block?: BlockState): ParentIdentifier {
return block?.data?.parentId ?? null
}
function buildEdgeKey(edge: Edge): string {
const sourceHandle = edge.sourceHandle ?? ''
const targetHandle = edge.targetHandle ?? ''
const edgeType = edge.type ?? ''
return `${edge.source}|${sourceHandle}->${edge.target}|${targetHandle}|${edgeType}`
}
function groupBlocksByParent(blocks: Record<string, BlockState>): {
root: string[]
children: Map<string, string[]>
} {
const root: string[] = []
const children = new Map<string, string[]>()
for (const [id, block] of Object.entries(blocks)) {
const parentId = getParentId(block)
if (!parentId) {
root.push(id)
continue
}
if (!children.has(parentId)) {
children.set(parentId, [])
}
children.get(parentId)!.push(id)
}
return { root, children }
}
function buildAdjacency(edges: Edge[]): Map<string, Set<string>> {
const adjacency = new Map<string, Set<string>>()
for (const edge of edges) {
if (!adjacency.has(edge.source)) {
adjacency.set(edge.source, new Set())
}
adjacency.get(edge.source)!.add(edge.target)
}
return adjacency
}
function expandImpactedBlocks(
seeds: Set<string>,
proposedBlocks: Record<string, BlockState>,
adjacency: Map<string, Set<string>>
): Set<string> {
const impacted = new Set<string>()
// Only expand to direct downstream neighbors (targets of impacted blocks)
// This ensures we make space for new/moved blocks without relocating unaffected ones
for (const seed of seeds) {
if (!proposedBlocks[seed]) continue
impacted.add(seed)
const seedBlock = proposedBlocks[seed]
const seedParent = getParentId(seedBlock)
const neighbors = adjacency.get(seed)
if (neighbors) {
for (const next of neighbors) {
const nextBlock = proposedBlocks[next]
if (!nextBlock) continue
// Only expand within same parent
if (getParentId(nextBlock) !== seedParent) continue
impacted.add(next)
}
}
}
return impacted
}
function computeStructuralLayoutImpact(params: {
baselineBlocks: Record<string, BlockState>
baselineEdges: Edge[]
proposedBlocks: Record<string, BlockState>
proposedEdges: Edge[]
}): {
impactedBlockIds: Set<string>
parentsToRelayout: Set<ParentIdentifier>
} {
const { baselineBlocks, baselineEdges, proposedBlocks, proposedEdges } = params
const impactedBlocks = new Set<string>()
const parentsToRelayout = new Set<ParentIdentifier>()
const baselineIds = new Set(Object.keys(baselineBlocks))
const proposedIds = new Set(Object.keys(proposedBlocks))
for (const id of proposedIds) {
if (!baselineIds.has(id)) {
impactedBlocks.add(id)
parentsToRelayout.add(getParentId(proposedBlocks[id]))
}
}
for (const id of baselineIds) {
if (!proposedIds.has(id)) {
parentsToRelayout.add(getParentId(baselineBlocks[id]))
}
}
for (const id of proposedIds) {
if (!baselineIds.has(id)) {
continue
}
const baselineBlock = baselineBlocks[id]
const proposedBlock = proposedBlocks[id]
const baselineParent = getParentId(baselineBlock)
const proposedParent = getParentId(proposedBlock)
if (baselineParent !== proposedParent) {
impactedBlocks.add(id)
parentsToRelayout.add(baselineParent)
parentsToRelayout.add(proposedParent)
}
}
const baselineEdgeMap = new Map<string, Edge>()
for (const edge of baselineEdges) {
baselineEdgeMap.set(buildEdgeKey(edge), edge)
}
const proposedEdgeMap = new Map<string, Edge>()
for (const edge of proposedEdges) {
proposedEdgeMap.set(buildEdgeKey(edge), edge)
}
for (const [key, edge] of proposedEdgeMap) {
if (baselineEdgeMap.has(key)) {
continue
}
if (proposedBlocks[edge.source]) {
impactedBlocks.add(edge.source)
}
if (proposedBlocks[edge.target]) {
impactedBlocks.add(edge.target)
}
}
for (const [key, edge] of baselineEdgeMap) {
if (proposedEdgeMap.has(key)) {
continue
}
if (proposedBlocks[edge.source]) {
impactedBlocks.add(edge.source)
}
if (proposedBlocks[edge.target]) {
impactedBlocks.add(edge.target)
}
parentsToRelayout.add(getParentId(baselineBlocks[edge.source]))
parentsToRelayout.add(getParentId(baselineBlocks[edge.target]))
}
const adjacency = buildAdjacency(proposedEdges)
const seedBlocks = new Set<string>()
for (const id of impactedBlocks) {
if (proposedBlocks[id]) {
seedBlocks.add(id)
}
}
const expandedImpacts = expandImpactedBlocks(seedBlocks, proposedBlocks, adjacency)
// Add parent containers to impacted set so their updated dimensions get transferred
const parentsWithImpactedChildren = new Set<string>()
for (const blockId of expandedImpacts) {
const block = proposedBlocks[blockId]
if (!block) continue
const parentId = getParentId(block)
if (parentId && proposedBlocks[parentId]) {
parentsWithImpactedChildren.add(parentId)
}
}
for (const parentId of parentsWithImpactedChildren) {
expandedImpacts.add(parentId)
}
return {
impactedBlockIds: expandedImpacts,
parentsToRelayout,
}
}
// Helper function to check if a block has changed
function hasBlockChanged(currentBlock: BlockState, proposedBlock: BlockState): boolean {
// Compare key fields that indicate a change
@@ -122,12 +322,12 @@ export class WorkflowDiffEngine {
private currentDiff: WorkflowDiff | undefined = undefined
/**
* Create a diff from YAML content
* Create a diff from workflow state
*/
async createDiffFromYaml(yamlContent: string, diffAnalysis?: DiffAnalysis): Promise<DiffResult> {
async createDiff(jsonContent: string, diffAnalysis?: DiffAnalysis): Promise<DiffResult> {
try {
logger.info('WorkflowDiffEngine.createDiffFromYaml called with:', {
yamlContentLength: yamlContent.length,
logger.info('WorkflowDiffEngine.createDiff called with:', {
jsonContentLength: jsonContent.length,
diffAnalysis: diffAnalysis,
diffAnalysisType: typeof diffAnalysis,
diffAnalysisUndefined: diffAnalysis === undefined,
@@ -163,7 +363,7 @@ export class WorkflowDiffEngine {
// Call the API route to create the diff
const body: any = {
yamlContent,
jsonContent,
currentWorkflowState: mergedBaseline,
}
@@ -211,7 +411,7 @@ export class WorkflowDiffEngine {
const result = await response.json()
logger.info('WorkflowDiffEngine.createDiffFromYaml response:', {
logger.info('WorkflowDiffEngine.createDiff response:', {
success: result.success,
hasDiff: !!result.diff,
errors: result.errors,
@@ -283,24 +483,45 @@ export class WorkflowDiffEngine {
hasDiffAnalysis: !!diffAnalysis,
})
// Get current workflow state for comparison
// Get baseline for comparison
// If we already have a diff, use it as baseline (editing on top of diff)
// Otherwise use the current workflow state
const { useWorkflowStore } = await import('@/stores/workflows/workflow/store')
const currentWorkflowState = useWorkflowStore.getState().getWorkflowState()
// Check if we're editing on top of an existing diff
const baselineForComparison = this.currentDiff?.proposedState || currentWorkflowState
const isEditingOnTopOfDiff = !!this.currentDiff
if (isEditingOnTopOfDiff) {
logger.info('Editing on top of existing diff - using diff as baseline for comparison', {
diffBlockCount: Object.keys(this.currentDiff!.proposedState.blocks).length,
})
}
// Merge subblock values from subblock store to ensure manual edits are included
let mergedBaseline: WorkflowState = currentWorkflowState
try {
mergedBaseline = {
...currentWorkflowState,
blocks: mergeSubblockState(currentWorkflowState.blocks),
let mergedBaseline: WorkflowState = baselineForComparison
// Only merge subblock values if we're comparing against original workflow
// If editing on top of diff, use the diff state as-is
if (!isEditingOnTopOfDiff) {
try {
mergedBaseline = {
...baselineForComparison,
blocks: mergeSubblockState(baselineForComparison.blocks),
}
logger.info('Merged subblock values into baseline for diff creation', {
blockCount: Object.keys(mergedBaseline.blocks || {}).length,
})
} catch (mergeError) {
logger.warn('Failed to merge subblock values into baseline; proceeding with raw state', {
error: mergeError instanceof Error ? mergeError.message : String(mergeError),
})
}
logger.info('Merged subblock values into baseline for diff creation', {
blockCount: Object.keys(mergedBaseline.blocks || {}).length,
})
} catch (mergeError) {
logger.warn('Failed to merge subblock values into baseline; proceeding with raw state', {
error: mergeError instanceof Error ? mergeError.message : String(mergeError),
})
} else {
logger.info(
'Using diff state as baseline without merging subblocks (editing on top of diff)'
)
}
// Build a map of existing blocks by type:name for matching
@@ -349,6 +570,14 @@ export class WorkflowDiffEngine {
id: finalId,
}
// Update parentId in data if it exists and has been remapped
if (finalBlock.data?.parentId && idMap[finalBlock.data.parentId]) {
finalBlock.data = {
...finalBlock.data,
parentId: idMap[finalBlock.data.parentId],
}
}
finalBlocks[finalId] = finalBlock
}
@@ -399,44 +628,153 @@ export class WorkflowDiffEngine {
finalProposedState.parallels = generateParallelBlocks(finalProposedState.blocks)
}
// Transfer block heights from baseline workflow for better measurements in diff view
// If editing on top of diff, this transfers from the diff (which already has good heights)
// Otherwise transfers from original workflow
logger.info('Transferring block heights from baseline workflow', {
isEditingOnTopOfDiff,
baselineBlockCount: Object.keys(mergedBaseline.blocks).length,
})
try {
const { transferBlockHeights } = await import('@/lib/workflows/autolayout')
transferBlockHeights(mergedBaseline.blocks, finalBlocks)
} catch (error) {
logger.warn('Failed to transfer block heights', {
error: error instanceof Error ? error.message : String(error),
})
}
// Apply autolayout to the proposed state
logger.info('Applying autolayout to proposed workflow state')
try {
const { applyAutoLayout: applyNativeAutoLayout } = await import(
'@/lib/workflows/autolayout'
)
// Compute diff analysis if not already provided to determine changed blocks
let tempComputed = diffAnalysis
if (!tempComputed) {
const currentIds = new Set(Object.keys(mergedBaseline.blocks))
const newBlocks: string[] = []
const editedBlocks: string[] = []
const autoLayoutOptions = {
horizontalSpacing: 550,
verticalSpacing: 200,
padding: {
x: 150,
y: 150,
},
alignment: 'center' as const,
for (const [id, block] of Object.entries(finalBlocks)) {
if (!currentIds.has(id)) {
newBlocks.push(id)
} else {
const currentBlock = mergedBaseline.blocks[id]
if (hasBlockChanged(currentBlock, block)) {
editedBlocks.push(id)
}
}
}
tempComputed = { new_blocks: newBlocks, edited_blocks: editedBlocks, deleted_blocks: [] }
}
const layoutResult = applyNativeAutoLayout(
finalBlocks,
finalProposedState.edges,
finalProposedState.loops || {},
finalProposedState.parallels || {},
autoLayoutOptions
)
const { impactedBlockIds } = computeStructuralLayoutImpact({
baselineBlocks: mergedBaseline.blocks,
baselineEdges: mergedBaseline.edges as Edge[],
proposedBlocks: finalBlocks,
proposedEdges: finalEdges,
})
if (layoutResult.success && layoutResult.blocks) {
Object.entries(layoutResult.blocks).forEach(([id, layoutBlock]) => {
const impactedBlockArray = Array.from(impactedBlockIds)
const totalBlocks = Object.keys(finalBlocks).length
const unchangedBlocks = totalBlocks - impactedBlockArray.length
if (impactedBlockArray.length === 0) {
logger.info('No structural changes detected; skipping autolayout', {
totalBlocks,
})
} else if (unchangedBlocks > 0) {
// Use targeted layout - preserves positions of unchanged blocks
logger.info('Using targeted layout for copilot edits (has unchanged blocks)', {
changedBlocks: impactedBlockArray.length,
unchangedBlocks: unchangedBlocks,
totalBlocks: totalBlocks,
percentChanged: Math.round((impactedBlockArray.length / totalBlocks) * 100),
})
const { applyTargetedLayout } = await import('@/lib/workflows/autolayout')
const layoutedBlocks = applyTargetedLayout(finalBlocks, finalProposedState.edges, {
changedBlockIds: impactedBlockArray,
horizontalSpacing: 550,
verticalSpacing: 200,
})
Object.entries(layoutedBlocks).forEach(([id, layoutBlock]) => {
if (finalBlocks[id]) {
finalBlocks[id].position = layoutBlock.position
if (layoutBlock.data) {
finalBlocks[id].data = {
...finalBlocks[id].data,
...layoutBlock.data,
}
}
if (layoutBlock.layout) {
finalBlocks[id].layout = {
...finalBlocks[id].layout,
...layoutBlock.layout,
}
}
if (typeof layoutBlock.height === 'number') {
finalBlocks[id].height = layoutBlock.height
}
if (typeof layoutBlock.isWide === 'boolean') {
finalBlocks[id].isWide = layoutBlock.isWide
}
}
})
logger.info('Successfully applied autolayout to proposed state', {
blocksLayouted: Object.keys(layoutResult.blocks).length,
logger.info('Successfully applied targeted layout to proposed state', {
blocksLayouted: Object.keys(layoutedBlocks).length,
changedBlocks: impactedBlockArray.length,
})
} else {
logger.warn('Autolayout failed, using default positions', {
error: layoutResult.error,
// Use full autolayout only when copilot built 100% of the workflow from scratch
logger.info('Using full autolayout (copilot built 100% of workflow)', {
totalBlocks: totalBlocks,
allBlocksAreNew: impactedBlockArray.length === totalBlocks,
})
const { applyAutoLayout: applyNativeAutoLayout } = await import(
'@/lib/workflows/autolayout'
)
const autoLayoutOptions = {
horizontalSpacing: 550,
verticalSpacing: 200,
padding: {
x: 150,
y: 150,
},
alignment: 'center' as const,
}
const layoutResult = applyNativeAutoLayout(
finalBlocks,
finalProposedState.edges,
finalProposedState.loops || {},
finalProposedState.parallels || {},
autoLayoutOptions
)
if (layoutResult.success && layoutResult.blocks) {
Object.entries(layoutResult.blocks).forEach(([id, layoutBlock]) => {
if (finalBlocks[id]) {
finalBlocks[id].position = layoutBlock.position
}
})
logger.info('Successfully applied full autolayout to proposed state', {
blocksLayouted: Object.keys(layoutResult.blocks).length,
})
} else {
logger.warn('Autolayout failed, using default positions', {
error: layoutResult.error,
})
}
}
} catch (layoutError) {
logger.warn('Error applying autolayout, using default positions', {
@@ -614,23 +952,23 @@ export class WorkflowDiffEngine {
}
/**
* Merge new YAML content into existing diff
* Merge new workflow state into existing diff
* Used for cumulative updates within the same message
*/
async mergeDiffFromYaml(yamlContent: string, diffAnalysis?: DiffAnalysis): Promise<DiffResult> {
async mergeDiff(jsonContent: string, diffAnalysis?: DiffAnalysis): Promise<DiffResult> {
try {
logger.info('Merging diff from YAML content')
logger.info('Merging diff from workflow state')
// If no existing diff, create a new one
if (!this.currentDiff) {
logger.info('No existing diff, creating new diff')
return this.createDiffFromYaml(yamlContent, diffAnalysis)
return this.createDiff(jsonContent, diffAnalysis)
}
// Call the API route to merge the diff
const body: any = {
existingDiff: this.currentDiff,
yamlContent,
jsonContent,
}
if (diffAnalysis !== undefined && diffAnalysis !== null) {

View File

@@ -18,7 +18,7 @@ export interface CopilotWorkflowState {
export interface CopilotBlockState {
type: string
name: string
inputs?: Record<string, string | number | string[][]>
inputs?: Record<string, string | number | string[][] | object>
outputs: BlockState['outputs']
connections?: Record<string, string | string[]>
nestedNodes?: Record<string, CopilotBlockState>
@@ -83,17 +83,127 @@ function isSensitiveSubBlock(key: string, subBlock: BlockState['subBlocks'][stri
return false
}
/**
* Sanitize condition blocks by removing UI-specific metadata
* Returns cleaned JSON string (not parsed array)
*/
function sanitizeConditions(conditionsJson: string): string {
try {
const conditions = JSON.parse(conditionsJson)
if (!Array.isArray(conditions)) return conditionsJson
// Keep only id, title, and value - remove UI state
const cleaned = conditions.map((cond: any) => ({
id: cond.id,
title: cond.title,
value: cond.value || '',
}))
return JSON.stringify(cleaned)
} catch {
return conditionsJson
}
}
/**
* Sanitize tools array by removing UI state and redundant fields
*/
function sanitizeTools(tools: any[]): any[] {
return tools.map((tool) => {
if (tool.type === 'custom-tool') {
const sanitized: any = {
type: tool.type,
title: tool.title,
toolId: tool.toolId,
usageControl: tool.usageControl,
}
if (tool.schema?.function) {
sanitized.schema = {
function: {
description: tool.schema.function.description,
parameters: tool.schema.function.parameters,
},
}
}
if (tool.code) {
sanitized.code = tool.code
}
return sanitized
}
const { isExpanded, ...cleanTool } = tool
return cleanTool
})
}
/**
* Sanitize subblocks by removing null values, secrets, and simplifying structure
* Maps each subblock key directly to its value instead of the full object
* Note: responseFormat is kept as an object for better copilot understanding
*/
function sanitizeSubBlocks(
subBlocks: BlockState['subBlocks']
): Record<string, string | number | string[][]> {
const sanitized: Record<string, string | number | string[][]> = {}
): Record<string, string | number | string[][] | object> {
const sanitized: Record<string, string | number | string[][] | object> = {}
Object.entries(subBlocks).forEach(([key, subBlock]) => {
// Skip null/undefined values
// Special handling for responseFormat - process BEFORE null check
// so we can detect when it's added/removed
if (key === 'responseFormat') {
try {
// Handle null/undefined - skip if no value
if (subBlock.value === null || subBlock.value === undefined) {
return
}
let obj = subBlock.value
// Handle string values - parse them first
if (typeof subBlock.value === 'string') {
const trimmed = subBlock.value.trim()
if (!trimmed) {
// Empty string - skip this field
return
}
obj = JSON.parse(trimmed)
}
// Handle object values - normalize keys and keep as object for copilot
if (obj && typeof obj === 'object') {
// Sort keys recursively for consistent comparison
const sortKeys = (item: any): any => {
if (Array.isArray(item)) {
return item.map(sortKeys)
}
if (item !== null && typeof item === 'object') {
return Object.keys(item)
.sort()
.reduce((result: any, key: string) => {
result[key] = sortKeys(item[key])
return result
}, {})
}
return item
}
// Keep as object (not stringified) for better copilot understanding
const normalized = sortKeys(obj)
sanitized[key] = normalized
return
}
// If we get here, obj is not an object (maybe null or primitive) - skip it
return
} catch (error) {
// Invalid JSON - skip this field to avoid crashes
return
}
}
// Skip null/undefined values for other fields
if (subBlock.value === null || subBlock.value === undefined) {
return
}
@@ -112,36 +222,24 @@ function sanitizeSubBlocks(
return
}
// For non-sensitive, non-null values, include them
// Special handling for condition-input type - clean UI metadata
if (subBlock.type === 'condition-input' && typeof subBlock.value === 'string') {
const cleanedConditions: string = sanitizeConditions(subBlock.value)
sanitized[key] = cleanedConditions
return
}
if (key === 'tools' && Array.isArray(subBlock.value)) {
sanitized[key] = sanitizeTools(subBlock.value)
return
}
sanitized[key] = subBlock.value
})
return sanitized
}
/**
* Reconstruct full subBlock structure from simplified copilot format
* Uses existing block structure as template for id and type fields
*/
function reconstructSubBlocks(
simplifiedSubBlocks: Record<string, string | number | string[][]>,
existingSubBlocks?: BlockState['subBlocks']
): BlockState['subBlocks'] {
const reconstructed: BlockState['subBlocks'] = {}
Object.entries(simplifiedSubBlocks).forEach(([key, value]) => {
const existingSubBlock = existingSubBlocks?.[key]
reconstructed[key] = {
id: existingSubBlock?.id || key,
type: existingSubBlock?.type || 'short-input',
value,
}
})
return reconstructed
}
/**
* Extract connections for a block from edges and format as operations-style connections
*/
@@ -198,14 +296,16 @@ export function sanitizeForCopilot(state: WorkflowState): CopilotWorkflowState {
const connections = extractConnectionsForBlock(blockId, state.edges)
// For loop/parallel blocks, extract config from block.data instead of subBlocks
let inputs: Record<string, string | number | string[][]> = {}
let inputs: Record<string, string | number | string[][] | object>
if (block.type === 'loop' || block.type === 'parallel') {
// Extract configuration from block.data
if (block.data?.loopType) inputs.loopType = block.data.loopType
if (block.data?.count !== undefined) inputs.iterations = block.data.count
if (block.data?.collection !== undefined) inputs.collection = block.data.collection
if (block.data?.parallelType) inputs.parallelType = block.data.parallelType
const loopInputs: Record<string, string | number | string[][] | object> = {}
if (block.data?.loopType) loopInputs.loopType = block.data.loopType
if (block.data?.count !== undefined) loopInputs.iterations = block.data.count
if (block.data?.collection !== undefined) loopInputs.collection = block.data.collection
if (block.data?.parallelType) loopInputs.parallelType = block.data.parallelType
inputs = loopInputs
} else {
// For regular blocks, sanitize subBlocks
inputs = sanitizeSubBlocks(block.subBlocks)
@@ -277,14 +377,10 @@ export function sanitizeForExport(state: WorkflowState): ExportWorkflowState {
Object.values(clonedState.blocks).forEach((block: any) => {
if (block.subBlocks) {
Object.entries(block.subBlocks).forEach(([key, subBlock]: [string, any]) => {
// Clear OAuth credentials and API keys using regex patterns
// Clear OAuth credentials and API keys based on field name only
if (
/credential|oauth|api[_-]?key|token|secret|auth|password|bearer/i.test(key) ||
/credential|oauth|api[_-]?key|token|secret|auth|password|bearer/i.test(
subBlock.type || ''
) ||
(typeof subBlock.value === 'string' &&
/credential|oauth|api[_-]?key|token|secret|auth|password|bearer/i.test(subBlock.value))
subBlock.type === 'oauth-input'
) {
subBlock.value = ''
}

View File

@@ -174,6 +174,17 @@ export function computeEditSequence(
if (!(blockId in startFlattened)) {
const { block, parentId } = endFlattened[blockId]
if (parentId) {
// Check if this block will be included in parent's nestedNodes
const parentData = endFlattened[parentId]
const parentIsNew = parentData && !(parentId in startFlattened)
const parentHasNestedNodes = parentData?.block?.nestedNodes?.[blockId]
// Skip if parent is new and will include this block in nestedNodes
if (parentIsNew && parentHasNestedNodes) {
// Parent's 'add' operation will include this child, skip separate operation
continue
}
// Block was added inside a subflow - include full block state
const addParams: EditOperation['params'] = {
subflowId: parentId,
@@ -181,8 +192,14 @@ export function computeEditSequence(
name: block.name,
outputs: block.outputs,
enabled: block.enabled !== undefined ? block.enabled : true,
...(block?.triggerMode !== undefined && { triggerMode: Boolean(block.triggerMode) }),
...(block?.advancedMode !== undefined && { advancedMode: Boolean(block.advancedMode) }),
}
// Only include triggerMode/advancedMode if true
if (block?.triggerMode === true) {
addParams.triggerMode = true
}
if (block?.advancedMode === true) {
addParams.advancedMode = true
}
// Add inputs if present
@@ -208,8 +225,14 @@ export function computeEditSequence(
const addParams: EditOperation['params'] = {
type: block.type,
name: block.name,
...(block?.triggerMode !== undefined && { triggerMode: Boolean(block.triggerMode) }),
...(block?.advancedMode !== undefined && { advancedMode: Boolean(block.advancedMode) }),
}
if (block?.triggerMode === true) {
addParams.triggerMode = true
}
if (block?.advancedMode === true) {
addParams.advancedMode = true
}
// Add inputs if present
@@ -224,10 +247,18 @@ export function computeEditSequence(
addParams.connections = connections
}
// Add nested nodes if present (for loops/parallels created from scratch)
// Add nested nodes if present AND all children are new
// This creates the loop/parallel with children in one operation
// If some children already exist, they'll have separate insert_into_subflow operations
if (block.nestedNodes && Object.keys(block.nestedNodes).length > 0) {
addParams.nestedNodes = block.nestedNodes
subflowsChanged++
const allChildrenNew = Object.keys(block.nestedNodes).every(
(childId) => !(childId in startFlattened)
)
if (allChildrenNew) {
addParams.nestedNodes = block.nestedNodes
subflowsChanged++
}
}
operations.push({
@@ -266,12 +297,14 @@ export function computeEditSequence(
name: endBlock.name,
outputs: endBlock.outputs,
enabled: endBlock.enabled !== undefined ? endBlock.enabled : true,
...(endBlock?.triggerMode !== undefined && {
triggerMode: Boolean(endBlock.triggerMode),
}),
...(endBlock?.advancedMode !== undefined && {
advancedMode: Boolean(endBlock.advancedMode),
}),
}
// Only include triggerMode/advancedMode if true
if (endBlock?.triggerMode === true) {
addParams.triggerMode = true
}
if (endBlock?.advancedMode === true) {
addParams.advancedMode = true
}
const inputs = extractInputValues(endBlock)
@@ -436,12 +469,13 @@ function computeBlockChanges(
hasChanges = true
}
// Check input value changes
// Check input value changes - only include changed fields
const startInputs = extractInputValues(startBlock)
const endInputs = extractInputValues(endBlock)
if (JSON.stringify(startInputs) !== JSON.stringify(endInputs)) {
changes.inputs = endInputs
const changedInputs = computeInputDelta(startInputs, endInputs)
if (Object.keys(changedInputs).length > 0) {
changes.inputs = changedInputs
hasChanges = true
}
@@ -457,6 +491,28 @@ function computeBlockChanges(
return hasChanges ? changes : null
}
/**
* Compute delta between two input objects
* Only returns fields that actually changed or were added
*/
function computeInputDelta(
startInputs: Record<string, any>,
endInputs: Record<string, any>
): Record<string, any> {
const delta: Record<string, any> = {}
for (const key in endInputs) {
if (
!(key in startInputs) ||
JSON.stringify(startInputs[key]) !== JSON.stringify(endInputs[key])
) {
delta[key] = endInputs[key]
}
}
return delta
}
/**
* Format edit operations into a human-readable description
*/

View File

@@ -239,10 +239,6 @@ const nextConfig: NextConfig = {
return redirects
},
async rewrites() {
if (!isTruthy(env.POSTHOG_ENABLED)) {
return []
}
return [
{
source: '/ingest/static/:path*',

View File

@@ -70,6 +70,7 @@
"clsx": "^2.1.1",
"cmdk": "^1.0.0",
"croner": "^9.0.0",
"csv-parse": "6.1.0",
"date-fns": "4.1.0",
"encoding": "0.1.13",
"entities": "6.0.1",

View File

@@ -1,98 +0,0 @@
#!/usr/bin/env bun
import path from 'path'
import { DocsChunker } from '@/lib/knowledge/documents/docs-chunker'
import type { DocChunk } from '@/lib/knowledge/documents/types'
import { createLogger } from '@/lib/logs/console/logger'
const logger = createLogger('ChunkDocsScript')
/**
* Script to chunk all .mdx files in the docs directory
*/
async function main() {
try {
// Initialize the docs chunker
const chunker = new DocsChunker({
chunkSize: 1024,
minChunkSize: 100,
overlap: 200,
baseUrl: 'https://docs.sim.ai',
})
// Path to the docs content directory
const docsPath = path.join(process.cwd(), '../../apps/docs/content/docs')
logger.info(`Processing docs from: ${docsPath}`)
// Process all .mdx files
const chunks = await chunker.chunkAllDocs(docsPath)
logger.info(`\n=== CHUNKING RESULTS ===`)
logger.info(`Total chunks: ${chunks.length}`)
// Group chunks by document
const chunksByDoc = chunks.reduce<Record<string, DocChunk[]>>((acc, chunk) => {
if (!acc[chunk.sourceDocument]) {
acc[chunk.sourceDocument] = []
}
acc[chunk.sourceDocument].push(chunk)
return acc
}, {})
// Display summary
logger.info(`\n=== DOCUMENT SUMMARY ===`)
for (const [doc, docChunks] of Object.entries(chunksByDoc)) {
logger.info(`${doc}: ${docChunks.length} chunks`)
}
// Display a few sample chunks
logger.info(`\n=== SAMPLE CHUNKS ===`)
chunks.slice(0, 3).forEach((chunk, index) => {
logger.info(`\nChunk ${index + 1}:`)
logger.info(` Source: ${chunk.sourceDocument}`)
logger.info(` Header: ${chunk.headerText} (Level ${chunk.headerLevel})`)
logger.info(` Link: ${chunk.headerLink}`)
logger.info(` Tokens: ${chunk.tokenCount}`)
logger.info(` Embedding: ${chunk.embedding.length} dimensions (${chunk.embeddingModel})`)
logger.info(
` Embedding Preview: [${chunk.embedding
.slice(0, 5)
.map((n) => n.toFixed(4))
.join(', ')}...]`
)
logger.info(` Text Preview: ${chunk.text.slice(0, 100)}...`)
})
// Calculate total token count
const totalTokens = chunks.reduce((sum, chunk) => sum + chunk.tokenCount, 0)
const chunksWithEmbeddings = chunks.filter((chunk) => chunk.embedding.length > 0).length
logger.info(`\n=== STATISTICS ===`)
logger.info(`Total tokens: ${totalTokens}`)
logger.info(`Average tokens per chunk: ${Math.round(totalTokens / chunks.length)}`)
logger.info(`Chunks with embeddings: ${chunksWithEmbeddings}/${chunks.length}`)
if (chunks.length > 0 && chunks[0].embedding.length > 0) {
logger.info(`Embedding model: ${chunks[0].embeddingModel}`)
logger.info(`Embedding dimensions: ${chunks[0].embedding.length}`)
}
const headerLevels = chunks.reduce<Record<number, number>>((acc, chunk) => {
acc[chunk.headerLevel] = (acc[chunk.headerLevel] || 0) + 1
return acc
}, {})
logger.info(`Header level distribution:`)
Object.entries(headerLevels)
.sort(([a], [b]) => Number(a) - Number(b))
.forEach(([level, count]) => {
logger.info(` H${level}: ${count} chunks`)
})
} catch (error) {
logger.error('Error processing docs:', error)
process.exit(1)
}
}
// Run the script
main().catch(console.error)

View File

@@ -1,215 +0,0 @@
#!/usr/bin/env bun
import path from 'path'
import { db } from '@sim/db'
import { docsEmbeddings } from '@sim/db/schema'
import { sql } from 'drizzle-orm'
import { isDev } from '@/lib/environment'
import { DocsChunker } from '@/lib/knowledge/documents/docs-chunker'
import { createLogger } from '@/lib/logs/console/logger'
const logger = createLogger('ProcessDocsEmbeddings')
interface ProcessingOptions {
/** Clear existing docs embeddings before processing */
clearExisting?: boolean
/** Path to docs directory */
docsPath?: string
/** Base URL for generating links */
baseUrl?: string
/** Chunk size in tokens */
chunkSize?: number
/** Minimum chunk size in tokens */
minChunkSize?: number
/** Overlap between chunks in tokens */
overlap?: number
}
/**
* Production script to process documentation and save embeddings to database
*/
async function processDocsEmbeddings(options: ProcessingOptions = {}) {
const startTime = Date.now()
let processedChunks = 0
let failedChunks = 0
try {
// Configuration
const config = {
clearExisting: options.clearExisting ?? false,
docsPath: options.docsPath ?? path.join(process.cwd(), '../../apps/docs/content/docs/en'),
baseUrl: options.baseUrl ?? (isDev ? 'http://localhost:3001' : 'https://docs.sim.ai'),
chunkSize: options.chunkSize ?? 300, // Max 300 tokens per chunk
minChunkSize: options.minChunkSize ?? 100,
overlap: options.overlap ?? 50,
}
logger.info('🚀 Starting docs embedding processing...')
logger.info(`Configuration:`, {
docsPath: config.docsPath,
baseUrl: config.baseUrl,
chunkSize: config.chunkSize,
clearExisting: config.clearExisting,
})
const chunker = new DocsChunker({
chunkSize: config.chunkSize,
minChunkSize: config.minChunkSize,
overlap: config.overlap,
baseUrl: config.baseUrl,
})
logger.info(`📚 Processing docs from: ${config.docsPath}`)
const chunks = await chunker.chunkAllDocs(config.docsPath)
if (chunks.length === 0) {
logger.warn('⚠️ No chunks generated from docs')
return { success: false, processedChunks: 0, failedChunks: 0 }
}
logger.info(`📊 Generated ${chunks.length} chunks with embeddings`)
if (config.clearExisting) {
logger.info('🗑️ Clearing existing docs embeddings...')
try {
const deleteResult = await db.delete(docsEmbeddings)
logger.info(`✅ Successfully deleted existing embeddings`)
} catch (error) {
logger.error('❌ Failed to delete existing embeddings:', error)
throw new Error('Failed to clear existing embeddings')
}
}
const batchSize = 10
logger.info(`💾 Saving chunks to database (batch size: ${batchSize})...`)
for (let i = 0; i < chunks.length; i += batchSize) {
const batch = chunks.slice(i, i + batchSize)
try {
const batchData = batch.map((chunk) => ({
chunkText: chunk.text,
sourceDocument: chunk.sourceDocument,
sourceLink: chunk.headerLink,
headerText: chunk.headerText,
headerLevel: chunk.headerLevel,
tokenCount: chunk.tokenCount,
embedding: chunk.embedding,
embeddingModel: chunk.embeddingModel,
metadata: chunk.metadata,
}))
await db.insert(docsEmbeddings).values(batchData)
processedChunks += batch.length
if (i % (batchSize * 5) === 0 || i + batchSize >= chunks.length) {
logger.info(
` 💾 Saved ${Math.min(i + batchSize, chunks.length)}/${chunks.length} chunks`
)
}
} catch (error) {
logger.error(`❌ Failed to save batch ${Math.floor(i / batchSize) + 1}:`, error)
failedChunks += batch.length
}
}
const savedCount = await db
.select({ count: sql<number>`count(*)` })
.from(docsEmbeddings)
.then((result) => result[0]?.count || 0)
const duration = Date.now() - startTime
logger.info(`✅ Processing complete!`)
logger.info(`📊 Results:`)
logger.info(` • Total chunks processed: ${chunks.length}`)
logger.info(` • Successfully saved: ${processedChunks}`)
logger.info(` • Failed: ${failedChunks}`)
logger.info(` • Database total: ${savedCount}`)
logger.info(` • Duration: ${Math.round(duration / 1000)}s`)
const documentStats = chunks.reduce(
(acc, chunk) => {
if (!acc[chunk.sourceDocument]) {
acc[chunk.sourceDocument] = { chunks: 0, tokens: 0 }
}
acc[chunk.sourceDocument].chunks++
acc[chunk.sourceDocument].tokens += chunk.tokenCount
return acc
},
{} as Record<string, { chunks: number; tokens: number }>
)
logger.info(`📋 Document breakdown:`)
Object.entries(documentStats)
.sort(([, a], [, b]) => b.chunks - a.chunks)
.slice(0, 10)
.forEach(([doc, stats]) => {
logger.info(`${doc}: ${stats.chunks} chunks, ${stats.tokens} tokens`)
})
if (Object.keys(documentStats).length > 10) {
logger.info(` • ... and ${Object.keys(documentStats).length - 10} more documents`)
}
return {
success: failedChunks === 0,
processedChunks,
failedChunks,
totalChunks: chunks.length,
databaseCount: savedCount,
duration,
}
} catch (error) {
logger.error('💥 Fatal error during processing:', error)
return {
success: false,
processedChunks,
failedChunks,
error: error instanceof Error ? error.message : 'Unknown error',
}
}
}
/**
* Main function - handle command line arguments
*/
async function main() {
const args = process.argv.slice(2)
const options: ProcessingOptions = {}
if (args.includes('--clear')) {
options.clearExisting = true
}
if (args.includes('--help') || args.includes('-h')) {
console.log(`
Usage: bun run scripts/process-docs-embeddings.ts [options]
Options:
--clear Clear existing docs embeddings before processing
--help, -h Show this help message
Examples:
bun run scripts/process-docs-embeddings.ts
bun run scripts/process-docs-embeddings.ts --clear
`)
process.exit(0)
}
const result = await processDocsEmbeddings(options)
if (!result.success) {
process.exit(1)
}
}
if (import.meta.url.includes('process-docs-embeddings.ts')) {
main().catch((error) => {
logger.error('Script failed:', error)
process.exit(1)
})
}
export { processDocsEmbeddings }

View File

@@ -0,0 +1,256 @@
#!/usr/bin/env bun
import path from 'path'
import { db } from '@sim/db'
import { docsEmbeddings } from '@sim/db/schema'
import { sql } from 'drizzle-orm'
import { type DocChunk, DocsChunker } from '@/lib/chunkers'
import { isDev } from '@/lib/environment'
import { createLogger } from '@/lib/logs/console/logger'
const logger = createLogger('ProcessDocs')
interface ProcessingOptions {
/** Clear existing docs embeddings before processing */
clearExisting?: boolean
/** Path to docs directory */
docsPath?: string
/** Base URL for generating links */
baseUrl?: string
/** Chunk size in tokens */
chunkSize?: number
/** Minimum chunk size */
minChunkSize?: number
/** Overlap between chunks */
overlap?: number
/** Dry run - only display results, don't save to DB */
dryRun?: boolean
/** Verbose output */
verbose?: boolean
}
/**
* Process documentation files and optionally save embeddings to database
*/
async function processDocs(options: ProcessingOptions = {}) {
const config = {
docsPath: options.docsPath || path.join(process.cwd(), '../../apps/docs/content/docs'),
baseUrl: options.baseUrl || (isDev ? 'http://localhost:4000' : 'https://docs.sim.ai'),
chunkSize: options.chunkSize || 1024,
minChunkSize: options.minChunkSize || 100,
overlap: options.overlap || 200,
clearExisting: options.clearExisting ?? false,
dryRun: options.dryRun ?? false,
verbose: options.verbose ?? false,
}
let processedChunks = 0
let failedChunks = 0
try {
logger.info('🚀 Starting docs processing with config:', {
docsPath: config.docsPath,
baseUrl: config.baseUrl,
chunkSize: config.chunkSize,
clearExisting: config.clearExisting,
dryRun: config.dryRun,
})
// Initialize the chunker
const chunker = new DocsChunker({
chunkSize: config.chunkSize,
minChunkSize: config.minChunkSize,
overlap: config.overlap,
baseUrl: config.baseUrl,
})
// Process all .mdx files
logger.info(`📚 Processing docs from: ${config.docsPath}`)
const chunks = await chunker.chunkAllDocs(config.docsPath)
if (chunks.length === 0) {
logger.warn('⚠️ No chunks generated from docs')
return { success: false, processedChunks: 0, failedChunks: 0 }
}
logger.info(`📊 Generated ${chunks.length} chunks with embeddings`)
// Group chunks by document for summary
const chunksByDoc = chunks.reduce<Record<string, DocChunk[]>>((acc, chunk) => {
if (!acc[chunk.sourceDocument]) {
acc[chunk.sourceDocument] = []
}
acc[chunk.sourceDocument].push(chunk)
return acc
}, {})
// Display summary
logger.info(`\n=== DOCUMENT SUMMARY ===`)
for (const [doc, docChunks] of Object.entries(chunksByDoc)) {
logger.info(`${doc}: ${docChunks.length} chunks`)
}
// Display sample chunks in verbose or dry-run mode
if (config.verbose || config.dryRun) {
logger.info(`\n=== SAMPLE CHUNKS ===`)
chunks.slice(0, 3).forEach((chunk, index) => {
logger.info(`\nChunk ${index + 1}:`)
logger.info(` Source: ${chunk.sourceDocument}`)
logger.info(` Header: ${chunk.headerText} (Level ${chunk.headerLevel})`)
logger.info(` Link: ${chunk.headerLink}`)
logger.info(` Tokens: ${chunk.tokenCount}`)
logger.info(` Embedding: ${chunk.embedding.length} dimensions (${chunk.embeddingModel})`)
if (config.verbose) {
logger.info(` Text Preview: ${chunk.text.substring(0, 200)}...`)
}
})
}
// If dry run, stop here
if (config.dryRun) {
logger.info('\n✅ Dry run complete - no data saved to database')
return { success: true, processedChunks: chunks.length, failedChunks: 0 }
}
// Clear existing embeddings if requested
if (config.clearExisting) {
logger.info('🗑️ Clearing existing docs embeddings...')
try {
await db.delete(docsEmbeddings)
logger.info(`✅ Successfully deleted existing embeddings`)
} catch (error) {
logger.error('❌ Failed to delete existing embeddings:', error)
throw new Error('Failed to clear existing embeddings')
}
}
// Save chunks to database in batches
const batchSize = 10
logger.info(`💾 Saving chunks to database (batch size: ${batchSize})...`)
for (let i = 0; i < chunks.length; i += batchSize) {
const batch = chunks.slice(i, i + batchSize)
try {
const batchData = batch.map((chunk) => ({
chunkText: chunk.text,
sourceDocument: chunk.sourceDocument,
sourceLink: chunk.headerLink,
headerText: chunk.headerText,
headerLevel: chunk.headerLevel,
tokenCount: chunk.tokenCount,
embedding: chunk.embedding,
embeddingModel: chunk.embeddingModel,
metadata: chunk.metadata,
}))
await db.insert(docsEmbeddings).values(batchData)
processedChunks += batch.length
if (i % (batchSize * 5) === 0 || i + batchSize >= chunks.length) {
logger.info(
` 💾 Saved ${Math.min(i + batchSize, chunks.length)}/${chunks.length} chunks`
)
}
} catch (error) {
logger.error(`❌ Failed to save batch ${Math.floor(i / batchSize) + 1}:`, error)
failedChunks += batch.length
}
}
// Verify final count
const savedCount = await db
.select({ count: sql<number>`count(*)` })
.from(docsEmbeddings)
.then((res) => res[0]?.count || 0)
logger.info(
`\n✅ Processing complete!\n` +
` 📊 Total chunks: ${chunks.length}\n` +
` ✅ Processed: ${processedChunks}\n` +
` ❌ Failed: ${failedChunks}\n` +
` 💾 Total in DB: ${savedCount}`
)
return { success: failedChunks === 0, processedChunks, failedChunks }
} catch (error) {
logger.error('❌ Fatal error during processing:', error)
return { success: false, processedChunks, failedChunks }
}
}
/**
* Main entry point with CLI argument parsing
*/
async function main() {
const args = process.argv.slice(2)
const options: ProcessingOptions = {
clearExisting: args.includes('--clear'),
dryRun: args.includes('--dry-run'),
verbose: args.includes('--verbose'),
}
// Parse custom path if provided
const pathIndex = args.indexOf('--path')
if (pathIndex !== -1 && args[pathIndex + 1]) {
options.docsPath = args[pathIndex + 1]
}
// Parse custom base URL if provided
const urlIndex = args.indexOf('--url')
if (urlIndex !== -1 && args[urlIndex + 1]) {
options.baseUrl = args[urlIndex + 1]
}
// Parse chunk size if provided
const chunkSizeIndex = args.indexOf('--chunk-size')
if (chunkSizeIndex !== -1 && args[chunkSizeIndex + 1]) {
options.chunkSize = Number.parseInt(args[chunkSizeIndex + 1], 10)
}
// Show help if requested
if (args.includes('--help') || args.includes('-h')) {
console.log(`
📚 Process Documentation Script
Usage: bun run process-docs.ts [options]
Options:
--clear Clear existing embeddings before processing
--dry-run Process and display results without saving to DB
--verbose Show detailed output including text previews
--path <path> Custom path to docs directory
--url <url> Custom base URL for links
--chunk-size <n> Custom chunk size in tokens (default: 1024)
--help, -h Show this help message
Examples:
# Dry run to test chunking
bun run process-docs.ts --dry-run
# Process and save to database
bun run process-docs.ts
# Clear existing and reprocess
bun run process-docs.ts --clear
# Custom path with verbose output
bun run process-docs.ts --path ./my-docs --verbose
`)
process.exit(0)
}
const result = await processDocs(options)
process.exit(result.success ? 0 : 1)
}
// Run if executed directly
if (import.meta.url === `file://${process.argv[1]}`) {
main().catch((error) => {
logger.error('Fatal error:', error)
process.exit(1)
})
}
export { processDocs }

View File

@@ -12,6 +12,7 @@ import { GetBlocksAndToolsClientTool } from '@/lib/copilot/tools/client/blocks/g
import { GetBlocksMetadataClientTool } from '@/lib/copilot/tools/client/blocks/get-blocks-metadata'
import { GetTriggerBlocksClientTool } from '@/lib/copilot/tools/client/blocks/get-trigger-blocks'
import { GetExamplesRagClientTool } from '@/lib/copilot/tools/client/examples/get-examples-rag'
import { GetOperationsExamplesClientTool } from '@/lib/copilot/tools/client/examples/get-operations-examples'
import { GetTriggerExamplesClientTool } from '@/lib/copilot/tools/client/examples/get-trigger-examples'
import { ListGDriveFilesClientTool } from '@/lib/copilot/tools/client/gdrive/list-files'
import { ReadGDriveFileClientTool } from '@/lib/copilot/tools/client/gdrive/read-file'
@@ -90,6 +91,7 @@ const CLIENT_TOOL_INSTANTIATORS: Record<string, (id: string) => any> = {
set_global_workflow_variables: (id) => new SetGlobalWorkflowVariablesClientTool(id),
get_trigger_examples: (id) => new GetTriggerExamplesClientTool(id),
get_examples_rag: (id) => new GetExamplesRagClientTool(id),
get_operations_examples: (id) => new GetOperationsExamplesClientTool(id),
}
// Read-only static metadata for class-based tools (no instances)
@@ -120,6 +122,7 @@ export const CLASS_TOOL_METADATA: Record<string, BaseClientToolMetadata | undefi
get_trigger_examples: (GetTriggerExamplesClientTool as any)?.metadata,
get_examples_rag: (GetExamplesRagClientTool as any)?.metadata,
oauth_request_access: (OAuthRequestAccessClientTool as any)?.metadata,
get_operations_examples: (GetOperationsExamplesClientTool as any)?.metadata,
}
function ensureClientToolInstance(toolName: string | undefined, toolCallId: string | undefined) {
@@ -1273,7 +1276,8 @@ async function* parseSSEStream(
const initialState = {
mode: 'agent' as const,
selectedModel: 'claude-4.5-sonnet' as CopilotStore['selectedModel'],
agentPrefetch: true,
agentPrefetch: false,
enabledModels: null as string[] | null, // Null means not loaded yet, empty array means all disabled
isCollapsed: false,
currentChat: null as CopilotChat | null,
chats: [] as CopilotChat[],
@@ -2181,6 +2185,7 @@ export const useCopilotStore = create<CopilotStore>()(
setSelectedModel: (model) => set({ selectedModel: model }),
setAgentPrefetch: (prefetch) => set({ agentPrefetch: prefetch }),
setEnabledModels: (models) => set({ enabledModels: models }),
}))
)

View File

@@ -80,6 +80,7 @@ export interface CopilotState {
| 'claude-4.5-sonnet'
| 'claude-4.1-opus'
agentPrefetch: boolean
enabledModels: string[] | null // Null means not loaded yet, array of model IDs when loaded
isCollapsed: boolean
currentChat: CopilotChat | null
@@ -129,6 +130,7 @@ export interface CopilotActions {
setMode: (mode: CopilotMode) => void
setSelectedModel: (model: CopilotStore['selectedModel']) => void
setAgentPrefetch: (prefetch: boolean) => void
setEnabledModels: (models: string[] | null) => void
setWorkflowId: (workflowId: string | null) => Promise<void>
validateCurrentChat: () => boolean

View File

@@ -60,8 +60,8 @@ interface WorkflowDiffState {
}
interface WorkflowDiffActions {
setProposedChanges: (yamlContent: string, diffAnalysis?: DiffAnalysis) => Promise<void>
mergeProposedChanges: (yamlContent: string, diffAnalysis?: DiffAnalysis) => Promise<void>
setProposedChanges: (jsonContent: string, diffAnalysis?: DiffAnalysis) => Promise<void>
mergeProposedChanges: (jsonContent: string, diffAnalysis?: DiffAnalysis) => Promise<void>
clearDiff: () => void
getCurrentWorkflowForCanvas: () => WorkflowState
toggleDiffView: () => void
@@ -131,10 +131,10 @@ export const useWorkflowDiffStore = create<WorkflowDiffState & WorkflowDiffActio
let result: { success: boolean; diff?: WorkflowDiff; errors?: string[] }
// Handle both YAML string and direct WorkflowState object
// Handle both JSON string and direct WorkflowState object
if (typeof proposedContent === 'string') {
// Legacy YAML path (for backward compatibility)
result = await diffEngine.createDiffFromYaml(proposedContent, diffAnalysis)
// JSON string path (for backward compatibility)
result = await diffEngine.createDiff(proposedContent, diffAnalysis)
} else {
// Direct WorkflowState path (new, more efficient)
result = await diffEngine.createDiffFromWorkflowState(proposedContent, diffAnalysis)
@@ -214,13 +214,13 @@ export const useWorkflowDiffStore = create<WorkflowDiffState & WorkflowDiffActio
}
},
mergeProposedChanges: async (yamlContent: string, diffAnalysis?: DiffAnalysis) => {
logger.info('Merging proposed changes via YAML')
mergeProposedChanges: async (jsonContent: string, diffAnalysis?: DiffAnalysis) => {
logger.info('Merging proposed changes from workflow state')
// First, set isDiffReady to false to prevent premature rendering
batchedUpdate({ isDiffReady: false, diffError: null })
const result = await diffEngine.mergeDiffFromYaml(yamlContent, diffAnalysis)
const result = await diffEngine.mergeDiff(jsonContent, diffAnalysis)
if (result.success && result.diff) {
// Validate proposed workflow using serializer round-trip to catch canvas-breaking issues

View File

@@ -2,6 +2,7 @@ import type { Edge } from 'reactflow'
import { create } from 'zustand'
import { devtools } from 'zustand/middleware'
import { createLogger } from '@/lib/logs/console/logger'
import { getBlockOutputs } from '@/lib/workflows/block-outputs'
import { getBlock } from '@/blocks'
import { resolveOutputType } from '@/blocks/utils'
import {
@@ -166,7 +167,11 @@ export const useWorkflowStore = create<WorkflowStoreWithHistory>()(
}
})
const outputs = resolveOutputType(blockConfig.outputs)
// Get outputs based on trigger mode
const triggerMode = blockProperties?.triggerMode ?? false
const outputs = triggerMode
? getBlockOutputs(type, subBlocks, triggerMode)
: resolveOutputType(blockConfig.outputs)
const newState = {
blocks: {
@@ -182,7 +187,7 @@ export const useWorkflowStore = create<WorkflowStoreWithHistory>()(
horizontalHandles: blockProperties?.horizontalHandles ?? true,
isWide: blockProperties?.isWide ?? false,
advancedMode: blockProperties?.advancedMode ?? false,
triggerMode: blockProperties?.triggerMode ?? false,
triggerMode: triggerMode,
height: blockProperties?.height ?? 0,
layout: {},
data: nodeData,

View File

@@ -229,19 +229,6 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
}
}
// Log the request (with sensitive data redacted)
logger.info('Mistral OCR request:', {
url: url.toString(),
hasApiKey: !!params.apiKey,
model: requestBody.model,
options: {
includesImages: requestBody.include_image_base64 ?? 'not specified',
pages: requestBody.pages ?? 'all pages',
imageLimit: requestBody.image_limit ?? 'no limit',
imageMinSize: requestBody.image_min_size ?? 'no minimum',
},
})
return requestBody
},
},

View File

@@ -104,6 +104,7 @@
"clsx": "^2.1.1",
"cmdk": "^1.0.0",
"croner": "^9.0.0",
"csv-parse": "6.1.0",
"date-fns": "4.1.0",
"encoding": "0.1.13",
"entities": "6.0.1",
@@ -1664,6 +1665,8 @@
"csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="],
"csv-parse": ["csv-parse@6.1.0", "", {}, "sha512-CEE+jwpgLn+MmtCpVcPtiCZpVtB6Z2OKPTr34pycYYoL7sxdOkXDdQ4lRiw6ioC0q6BLqhc6cKweCVvral8yhw=="],
"d3-color": ["d3-color@3.1.0", "", {}, "sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA=="],
"d3-dispatch": ["d3-dispatch@3.0.1", "", {}, "sha512-rzUyPU/S7rwUflMyLc1ETDeBj0NRuHKKAcvukozwhshr6g6c5d8zh4c2gQjY2bZ0dXeGLWc1PF174P2tVvKhfg=="],

View File

@@ -0,0 +1 @@
ALTER TABLE "settings" ADD COLUMN "copilot_enabled_models" jsonb DEFAULT '{}' NOT NULL;

File diff suppressed because it is too large Load Diff

View File

@@ -666,6 +666,13 @@
"when": 1759182244521,
"tag": "0095_cheerful_albert_cleary",
"breakpoints": true
},
{
"idx": 96,
"version": "7",
"when": 1759534968812,
"tag": "0096_tranquil_arachne",
"breakpoints": true
}
]
}

View File

@@ -375,6 +375,9 @@ export const settings = pgTable('settings', {
showFloatingControls: boolean('show_floating_controls').notNull().default(true),
showTrainingControls: boolean('show_training_controls').notNull().default(false),
// Copilot preferences - maps model_id to enabled/disabled boolean
copilotEnabledModels: jsonb('copilot_enabled_models').notNull().default('{}'),
updatedAt: timestamp('updated_at').notNull().defaultNow(),
})