Files
sim/apps/sim/app/api/knowledge/utils.ts
Waleed e37b4a926d feat(audit-log): add persistent audit log system with comprehensive route instrumentation (#3242)
* feat(audit-log): add persistent audit log system with comprehensive route instrumentation

* fix(audit-log): address PR review — nullable workspaceId, enum usage, remove redundant queries

- Make audit_log.workspace_id nullable with ON DELETE SET NULL (logs survive workspace/user deletion)
- Make audit_log.actor_id nullable with ON DELETE SET NULL
- Replace all 53 routes' string literal action/resourceType with AuditAction.X and AuditResourceType.X enums
- Fix empty workspaceId ('') → null for OAuth, form, and org routes to avoid FK violations
- Remove redundant DB queries in chat manage route (use checkChatAccess return data)
- Fix organization routes to pass workspaceId: null instead of organizationId

* fix(audit-log): replace remaining workspaceId '' fallbacks with null

* fix(audit-log): credential-set org IDs, workspace deletion FK, actorId fallback, string literal action

* reran migrations

* fix(mcp,audit): tighten env var domain bypass, add post-resolution check, form workspaceId

- Only bypass MCP domain check when env var is in hostname/authority, not path/query
- Add post-resolution validateMcpDomain call in test-connection endpoint
- Match client-side isDomainAllowed to same hostname-only bypass logic
- Return workspaceId from checkFormAccess, use in form audit logs
- Add 49 comprehensive domain-check tests covering all edge cases

* fix(mcp): stateful regex lastIndex bug, RFC 3986 authority parsing

- Remove /g flag from module-level ENV_VAR_PATTERN to avoid lastIndex state
- Create fresh regex instances per call in server-side hasEnvVarInHostname
- Fix authority extraction to terminate at /, ?, or # per RFC 3986
- Prevents bypass via https://evil.com?token={{SECRET}} (no path)
- Add test cases for query-only and fragment-only env var URLs (53 total)

* fix(audit-log): try/catch for never-throw contract, accept null actorName/Email, fix misleading action

- Wrap recordAudit body in try/catch so nanoid() or header extraction can't throw
- Accept string | null for actorName and actorEmail (session.user.name can be null)
- Normalize null -> undefined before insert to match DB column types
- Fix org members route: ORG_MEMBER_ADDED -> ORG_INVITATION_CREATED (sends invite, not adds member)

* improvement(audit-log): add resource names and specific invitation actions

* fix(audit-log): use validated chat record, add mock sync tests
2026-02-18 00:54:52 -08:00

405 lines
10 KiB
TypeScript

import { db } from '@sim/db'
import { document, embedding, knowledgeBase } from '@sim/db/schema'
import { and, eq, isNull } from 'drizzle-orm'
import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils'
export interface KnowledgeBaseData {
id: string
userId: string
workspaceId?: string | null
name: string
description?: string | null
tokenCount: number
embeddingModel: string
embeddingDimension: number
chunkingConfig: unknown
deletedAt?: Date | null
createdAt: Date
updatedAt: Date
}
export interface DocumentData {
id: string
knowledgeBaseId: string
filename: string
fileUrl: string
fileSize: number
mimeType: string
chunkCount: number
tokenCount: number
characterCount: number
processingStatus: string
processingStartedAt?: Date | null
processingCompletedAt?: Date | null
processingError?: string | null
enabled: boolean
deletedAt?: Date | null
uploadedAt: Date
// Text tags
tag1?: string | null
tag2?: string | null
tag3?: string | null
tag4?: string | null
tag5?: string | null
tag6?: string | null
tag7?: string | null
// Number tags (5 slots)
number1?: number | null
number2?: number | null
number3?: number | null
number4?: number | null
number5?: number | null
// Date tags (2 slots)
date1?: Date | null
date2?: Date | null
// Boolean tags (3 slots)
boolean1?: boolean | null
boolean2?: boolean | null
boolean3?: boolean | null
}
export interface EmbeddingData {
id: string
knowledgeBaseId: string
documentId: string
chunkIndex: number
chunkHash: string
content: string
contentLength: number
tokenCount: number
embedding?: number[] | null
embeddingModel: string
startOffset: number
endOffset: number
// Text tags
tag1?: string | null
tag2?: string | null
tag3?: string | null
tag4?: string | null
tag5?: string | null
tag6?: string | null
tag7?: string | null
// Number tags (5 slots)
number1?: number | null
number2?: number | null
number3?: number | null
number4?: number | null
number5?: number | null
// Date tags (2 slots)
date1?: Date | null
date2?: Date | null
// Boolean tags (3 slots)
boolean1?: boolean | null
boolean2?: boolean | null
boolean3?: boolean | null
enabled: boolean
createdAt: Date
updatedAt: Date
}
export interface KnowledgeBaseAccessResult {
hasAccess: true
knowledgeBase: Pick<KnowledgeBaseData, 'id' | 'userId' | 'workspaceId' | 'name'>
}
export interface KnowledgeBaseAccessDenied {
hasAccess: false
notFound?: boolean
reason?: string
}
export type KnowledgeBaseAccessCheck = KnowledgeBaseAccessResult | KnowledgeBaseAccessDenied
export interface DocumentAccessResult {
hasAccess: true
document: DocumentData
knowledgeBase: Pick<KnowledgeBaseData, 'id' | 'userId' | 'workspaceId' | 'name'>
}
export interface DocumentAccessDenied {
hasAccess: false
notFound?: boolean
reason: string
}
export type DocumentAccessCheck = DocumentAccessResult | DocumentAccessDenied
export interface ChunkAccessResult {
hasAccess: true
chunk: EmbeddingData
document: DocumentData
knowledgeBase: Pick<KnowledgeBaseData, 'id' | 'userId' | 'workspaceId' | 'name'>
}
export interface ChunkAccessDenied {
hasAccess: false
notFound?: boolean
reason: string
}
export type ChunkAccessCheck = ChunkAccessResult | ChunkAccessDenied
/**
* Check if a user has access to a knowledge base
*/
export async function checkKnowledgeBaseAccess(
knowledgeBaseId: string,
userId: string
): Promise<KnowledgeBaseAccessCheck> {
const kb = await db
.select({
id: knowledgeBase.id,
userId: knowledgeBase.userId,
workspaceId: knowledgeBase.workspaceId,
name: knowledgeBase.name,
})
.from(knowledgeBase)
.where(and(eq(knowledgeBase.id, knowledgeBaseId), isNull(knowledgeBase.deletedAt)))
.limit(1)
if (kb.length === 0) {
return { hasAccess: false, notFound: true }
}
const kbData = kb[0]
// Case 1: User owns the knowledge base directly
if (kbData.userId === userId) {
return { hasAccess: true, knowledgeBase: kbData }
}
// Case 2: Knowledge base belongs to a workspace the user has permissions for
if (kbData.workspaceId) {
const userPermission = await getUserEntityPermissions(userId, 'workspace', kbData.workspaceId)
if (userPermission !== null) {
return { hasAccess: true, knowledgeBase: kbData }
}
}
return { hasAccess: false }
}
/**
* Check if a user has write access to a knowledge base
* Write access is granted if:
* 1. User owns the knowledge base directly, OR
* 2. User has write or admin permissions on the knowledge base's workspace
*/
export async function checkKnowledgeBaseWriteAccess(
knowledgeBaseId: string,
userId: string
): Promise<KnowledgeBaseAccessCheck> {
const kb = await db
.select({
id: knowledgeBase.id,
userId: knowledgeBase.userId,
workspaceId: knowledgeBase.workspaceId,
name: knowledgeBase.name,
})
.from(knowledgeBase)
.where(and(eq(knowledgeBase.id, knowledgeBaseId), isNull(knowledgeBase.deletedAt)))
.limit(1)
if (kb.length === 0) {
return { hasAccess: false, notFound: true }
}
const kbData = kb[0]
// Case 1: User owns the knowledge base directly
if (kbData.userId === userId) {
return { hasAccess: true, knowledgeBase: kbData }
}
// Case 2: Knowledge base belongs to a workspace and user has write/admin permissions
if (kbData.workspaceId) {
const userPermission = await getUserEntityPermissions(userId, 'workspace', kbData.workspaceId)
if (userPermission === 'write' || userPermission === 'admin') {
return { hasAccess: true, knowledgeBase: kbData }
}
}
return { hasAccess: false }
}
/**
* Check if a user has write access to a specific document
* Write access is granted if user has write access to the knowledge base
*/
export async function checkDocumentWriteAccess(
knowledgeBaseId: string,
documentId: string,
userId: string
): Promise<DocumentAccessCheck> {
// First check if user has write access to the knowledge base
const kbAccess = await checkKnowledgeBaseWriteAccess(knowledgeBaseId, userId)
if (!kbAccess.hasAccess) {
return {
hasAccess: false,
notFound: kbAccess.notFound,
reason: kbAccess.notFound ? 'Knowledge base not found' : 'Unauthorized knowledge base access',
}
}
// Check if document exists
const doc = await db
.select({
id: document.id,
filename: document.filename,
fileUrl: document.fileUrl,
fileSize: document.fileSize,
mimeType: document.mimeType,
chunkCount: document.chunkCount,
tokenCount: document.tokenCount,
characterCount: document.characterCount,
enabled: document.enabled,
processingStatus: document.processingStatus,
processingError: document.processingError,
uploadedAt: document.uploadedAt,
processingStartedAt: document.processingStartedAt,
processingCompletedAt: document.processingCompletedAt,
knowledgeBaseId: document.knowledgeBaseId,
// Text tags
tag1: document.tag1,
tag2: document.tag2,
tag3: document.tag3,
tag4: document.tag4,
tag5: document.tag5,
tag6: document.tag6,
tag7: document.tag7,
// Number tags (5 slots)
number1: document.number1,
number2: document.number2,
number3: document.number3,
number4: document.number4,
number5: document.number5,
// Date tags (2 slots)
date1: document.date1,
date2: document.date2,
// Boolean tags (3 slots)
boolean1: document.boolean1,
boolean2: document.boolean2,
boolean3: document.boolean3,
})
.from(document)
.where(and(eq(document.id, documentId), isNull(document.deletedAt)))
.limit(1)
if (doc.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Document not found' }
}
return {
hasAccess: true,
document: doc[0] as DocumentData,
knowledgeBase: kbAccess.knowledgeBase!,
}
}
/**
* Check if a user has access to a document within a knowledge base
*/
export async function checkDocumentAccess(
knowledgeBaseId: string,
documentId: string,
userId: string
): Promise<DocumentAccessCheck> {
// First check if user has access to the knowledge base
const kbAccess = await checkKnowledgeBaseAccess(knowledgeBaseId, userId)
if (!kbAccess.hasAccess) {
return {
hasAccess: false,
notFound: kbAccess.notFound,
reason: kbAccess.notFound ? 'Knowledge base not found' : 'Unauthorized knowledge base access',
}
}
const doc = await db
.select()
.from(document)
.where(
and(
eq(document.id, documentId),
eq(document.knowledgeBaseId, knowledgeBaseId),
isNull(document.deletedAt)
)
)
.limit(1)
if (doc.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Document not found' }
}
return {
hasAccess: true,
document: doc[0] as DocumentData,
knowledgeBase: kbAccess.knowledgeBase!,
}
}
/**
* Check if a user has access to a chunk within a document and knowledge base
*/
export async function checkChunkAccess(
knowledgeBaseId: string,
documentId: string,
chunkId: string,
userId: string
): Promise<ChunkAccessCheck> {
// First check if user has access to the knowledge base
const kbAccess = await checkKnowledgeBaseAccess(knowledgeBaseId, userId)
if (!kbAccess.hasAccess) {
return {
hasAccess: false,
notFound: kbAccess.notFound,
reason: kbAccess.notFound ? 'Knowledge base not found' : 'Unauthorized knowledge base access',
}
}
const doc = await db
.select()
.from(document)
.where(
and(
eq(document.id, documentId),
eq(document.knowledgeBaseId, knowledgeBaseId),
isNull(document.deletedAt)
)
)
.limit(1)
if (doc.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Document not found' }
}
const docData = doc[0] as DocumentData
// Check if document processing is completed
if (docData.processingStatus !== 'completed') {
return {
hasAccess: false,
reason: `Document is not ready for access (status: ${docData.processingStatus})`,
}
}
const chunk = await db
.select()
.from(embedding)
.where(and(eq(embedding.id, chunkId), eq(embedding.documentId, documentId)))
.limit(1)
if (chunk.length === 0) {
return { hasAccess: false, notFound: true, reason: 'Chunk not found' }
}
return {
hasAccess: true,
chunk: chunk[0] as EmbeddingData,
document: docData,
knowledgeBase: kbAccess.knowledgeBase!,
}
}