Files
sim/apps/sim/lib/knowledge/service.ts
Vikhyath Mondreti be3cdcf981 Merge pull request #3179 from simstudioai/improvement/file-download-timeouts
improvement(timeouts): files/base64 should use max timeouts + auth centralization
2026-02-10 15:57:06 -08:00

268 lines
7.7 KiB
TypeScript

import { randomUUID } from 'crypto'
import { db } from '@sim/db'
import { document, knowledgeBase, permissions } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { and, count, eq, isNotNull, isNull, or } from 'drizzle-orm'
import type {
ChunkingConfig,
CreateKnowledgeBaseData,
KnowledgeBaseWithCounts,
} from '@/lib/knowledge/types'
import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils'
const logger = createLogger('KnowledgeBaseService')
/**
* Get knowledge bases that a user can access
*/
export async function getKnowledgeBases(
userId: string,
workspaceId?: string | null
): Promise<KnowledgeBaseWithCounts[]> {
const knowledgeBasesWithCounts = await db
.select({
id: knowledgeBase.id,
name: knowledgeBase.name,
description: knowledgeBase.description,
tokenCount: knowledgeBase.tokenCount,
embeddingModel: knowledgeBase.embeddingModel,
embeddingDimension: knowledgeBase.embeddingDimension,
chunkingConfig: knowledgeBase.chunkingConfig,
createdAt: knowledgeBase.createdAt,
updatedAt: knowledgeBase.updatedAt,
workspaceId: knowledgeBase.workspaceId,
docCount: count(document.id),
})
.from(knowledgeBase)
.leftJoin(
document,
and(eq(document.knowledgeBaseId, knowledgeBase.id), isNull(document.deletedAt))
)
.leftJoin(
permissions,
and(
eq(permissions.entityType, 'workspace'),
eq(permissions.entityId, knowledgeBase.workspaceId),
eq(permissions.userId, userId)
)
)
.where(
and(
isNull(knowledgeBase.deletedAt),
workspaceId
? // When filtering by workspace
or(
// Knowledge bases belonging to the specified workspace (user must have workspace permissions)
and(eq(knowledgeBase.workspaceId, workspaceId), isNotNull(permissions.userId)),
// Fallback: User-owned knowledge bases without workspace (legacy)
and(eq(knowledgeBase.userId, userId), isNull(knowledgeBase.workspaceId))
)
: // When not filtering by workspace, use original logic
or(
// User owns the knowledge base directly
eq(knowledgeBase.userId, userId),
// User has permissions on the knowledge base's workspace
isNotNull(permissions.userId)
)
)
)
.groupBy(knowledgeBase.id)
.orderBy(knowledgeBase.createdAt)
return knowledgeBasesWithCounts.map((kb) => ({
...kb,
chunkingConfig: kb.chunkingConfig as ChunkingConfig,
docCount: Number(kb.docCount),
}))
}
/**
* Create a new knowledge base
*/
export async function createKnowledgeBase(
data: CreateKnowledgeBaseData,
requestId: string
): Promise<KnowledgeBaseWithCounts> {
const kbId = randomUUID()
const now = new Date()
const hasPermission = await getUserEntityPermissions(data.userId, 'workspace', data.workspaceId)
if (hasPermission !== 'admin' && hasPermission !== 'write') {
throw new Error('User does not have permission to create knowledge bases in this workspace')
}
const newKnowledgeBase = {
id: kbId,
name: data.name,
description: data.description ?? null,
workspaceId: data.workspaceId,
userId: data.userId,
tokenCount: 0,
embeddingModel: data.embeddingModel,
embeddingDimension: data.embeddingDimension,
chunkingConfig: data.chunkingConfig,
createdAt: now,
updatedAt: now,
deletedAt: null,
}
await db.insert(knowledgeBase).values(newKnowledgeBase)
logger.info(`[${requestId}] Created knowledge base: ${data.name} (${kbId})`)
return {
id: kbId,
name: data.name,
description: data.description ?? null,
tokenCount: 0,
embeddingModel: data.embeddingModel,
embeddingDimension: data.embeddingDimension,
chunkingConfig: data.chunkingConfig,
createdAt: now,
updatedAt: now,
workspaceId: data.workspaceId,
docCount: 0,
}
}
/**
* Update a knowledge base
*/
export async function updateKnowledgeBase(
knowledgeBaseId: string,
updates: {
name?: string
description?: string
workspaceId?: string | null
chunkingConfig?: {
maxSize: number
minSize: number
overlap: number
}
},
requestId: string
): Promise<KnowledgeBaseWithCounts> {
const now = new Date()
const updateData: {
updatedAt: Date
name?: string
description?: string | null
workspaceId?: string | null
chunkingConfig?: {
maxSize: number
minSize: number
overlap: number
}
embeddingModel?: string
embeddingDimension?: number
} = {
updatedAt: now,
}
if (updates.name !== undefined) updateData.name = updates.name
if (updates.description !== undefined) updateData.description = updates.description
if (updates.workspaceId !== undefined) updateData.workspaceId = updates.workspaceId
if (updates.chunkingConfig !== undefined) {
updateData.chunkingConfig = updates.chunkingConfig
updateData.embeddingModel = 'text-embedding-3-small'
updateData.embeddingDimension = 1536
}
await db.update(knowledgeBase).set(updateData).where(eq(knowledgeBase.id, knowledgeBaseId))
const updatedKb = await db
.select({
id: knowledgeBase.id,
name: knowledgeBase.name,
description: knowledgeBase.description,
tokenCount: knowledgeBase.tokenCount,
embeddingModel: knowledgeBase.embeddingModel,
embeddingDimension: knowledgeBase.embeddingDimension,
chunkingConfig: knowledgeBase.chunkingConfig,
createdAt: knowledgeBase.createdAt,
updatedAt: knowledgeBase.updatedAt,
workspaceId: knowledgeBase.workspaceId,
docCount: count(document.id),
})
.from(knowledgeBase)
.leftJoin(
document,
and(eq(document.knowledgeBaseId, knowledgeBase.id), isNull(document.deletedAt))
)
.where(eq(knowledgeBase.id, knowledgeBaseId))
.groupBy(knowledgeBase.id)
.limit(1)
if (updatedKb.length === 0) {
throw new Error(`Knowledge base ${knowledgeBaseId} not found`)
}
logger.info(`[${requestId}] Updated knowledge base: ${knowledgeBaseId}`)
return {
...updatedKb[0],
chunkingConfig: updatedKb[0].chunkingConfig as ChunkingConfig,
docCount: Number(updatedKb[0].docCount),
}
}
/**
* Get a single knowledge base by ID
*/
export async function getKnowledgeBaseById(
knowledgeBaseId: string
): Promise<KnowledgeBaseWithCounts | null> {
const result = await db
.select({
id: knowledgeBase.id,
name: knowledgeBase.name,
description: knowledgeBase.description,
tokenCount: knowledgeBase.tokenCount,
embeddingModel: knowledgeBase.embeddingModel,
embeddingDimension: knowledgeBase.embeddingDimension,
chunkingConfig: knowledgeBase.chunkingConfig,
createdAt: knowledgeBase.createdAt,
updatedAt: knowledgeBase.updatedAt,
workspaceId: knowledgeBase.workspaceId,
docCount: count(document.id),
})
.from(knowledgeBase)
.leftJoin(
document,
and(eq(document.knowledgeBaseId, knowledgeBase.id), isNull(document.deletedAt))
)
.where(and(eq(knowledgeBase.id, knowledgeBaseId), isNull(knowledgeBase.deletedAt)))
.groupBy(knowledgeBase.id)
.limit(1)
if (result.length === 0) {
return null
}
return {
...result[0],
chunkingConfig: result[0].chunkingConfig as ChunkingConfig,
docCount: Number(result[0].docCount),
}
}
/**
* Delete a knowledge base (soft delete)
*/
export async function deleteKnowledgeBase(
knowledgeBaseId: string,
requestId: string
): Promise<void> {
const now = new Date()
await db
.update(knowledgeBase)
.set({
deletedAt: now,
updatedAt: now,
})
.where(eq(knowledgeBase.id, knowledgeBaseId))
logger.info(`[${requestId}] Soft deleted knowledge base: ${knowledgeBaseId}`)
}