From 7cc45749134bcef0ef3b9c4e7b1c2b7e2eb6c435 Mon Sep 17 00:00:00 2001 From: Emir Karabeg <78010029+emir-karabeg@users.noreply.github.com> Date: Thu, 28 Aug 2025 16:07:22 -0700 Subject: [PATCH] improvement(knowledge): search returns document name (#1167) --- apps/sim/app/api/knowledge/search/route.ts | 63 ++++++++++--------- apps/sim/app/api/knowledge/search/utils.ts | 13 +++- .../components/create-menu/create-menu.tsx | 2 +- apps/sim/tools/knowledge/search.ts | 1 + apps/sim/tools/knowledge/types.ts | 1 + 5 files changed, 50 insertions(+), 30 deletions(-) diff --git a/apps/sim/app/api/knowledge/search/route.ts b/apps/sim/app/api/knowledge/search/route.ts index 2177cb6f2..7f2f598db 100644 --- a/apps/sim/app/api/knowledge/search/route.ts +++ b/apps/sim/app/api/knowledge/search/route.ts @@ -77,14 +77,13 @@ export async function POST(request: NextRequest) { ? validatedData.knowledgeBaseIds : [validatedData.knowledgeBaseIds] - // Check access permissions for each knowledge base using proper workspace-based permissions - const accessibleKbIds: string[] = [] - for (const kbId of knowledgeBaseIds) { - const accessCheck = await checkKnowledgeBaseAccess(kbId, userId) - if (accessCheck.hasAccess) { - accessibleKbIds.push(kbId) - } - } + // Check access permissions in parallel for performance + const accessChecks = await Promise.all( + knowledgeBaseIds.map((kbId) => checkKnowledgeBaseAccess(kbId, userId)) + ) + const accessibleKbIds: string[] = knowledgeBaseIds.filter( + (_, idx) => accessChecks[idx]?.hasAccess + ) // Map display names to tag slots for filtering let mappedFilters: Record = {} @@ -137,7 +136,10 @@ export async function POST(request: NextRequest) { // Generate query embedding only if query is provided const hasQuery = validatedData.query && validatedData.query.trim().length > 0 - const queryEmbedding = hasQuery ? await generateSearchEmbedding(validatedData.query!) : null + // Start embedding generation early and await when needed + const queryEmbeddingPromise = hasQuery + ? generateSearchEmbedding(validatedData.query!) + : Promise.resolve(null) // Check if any requested knowledge bases were not accessible const inaccessibleKbIds = knowledgeBaseIds.filter((id) => !accessibleKbIds.includes(id)) @@ -165,7 +167,7 @@ export async function POST(request: NextRequest) { // Tag + Vector search logger.debug(`[${requestId}] Executing tag + vector search with filters:`, mappedFilters) const strategy = getQueryStrategy(accessibleKbIds.length, validatedData.topK) - const queryVector = JSON.stringify(queryEmbedding) + const queryVector = JSON.stringify(await queryEmbeddingPromise) results = await handleTagAndVectorSearch({ knowledgeBaseIds: accessibleKbIds, @@ -178,7 +180,7 @@ export async function POST(request: NextRequest) { // Vector-only search logger.debug(`[${requestId}] Executing vector-only search`) const strategy = getQueryStrategy(accessibleKbIds.length, validatedData.topK) - const queryVector = JSON.stringify(queryEmbedding) + const queryVector = JSON.stringify(await queryEmbeddingPromise) results = await handleVectorOnlySearch({ knowledgeBaseIds: accessibleKbIds, @@ -213,24 +215,28 @@ export async function POST(request: NextRequest) { } // Fetch tag definitions for display name mapping (reuse the same fetch from filtering) + const tagDefsResults = await Promise.all( + accessibleKbIds.map(async (kbId) => { + try { + const tagDefs = await getDocumentTagDefinitions(kbId) + const map: Record = {} + tagDefs.forEach((def) => { + map[def.tagSlot] = def.displayName + }) + return { kbId, map } + } catch (error) { + logger.warn( + `[${requestId}] Failed to fetch tag definitions for display mapping:`, + error + ) + return { kbId, map: {} as Record } + } + }) + ) const tagDefinitionsMap: Record> = {} - for (const kbId of accessibleKbIds) { - try { - const tagDefs = await getDocumentTagDefinitions(kbId) - - tagDefinitionsMap[kbId] = {} - tagDefs.forEach((def) => { - tagDefinitionsMap[kbId][def.tagSlot] = def.displayName - }) - logger.debug( - `[${requestId}] Display mapping - KB ${kbId} tag definitions:`, - tagDefinitionsMap[kbId] - ) - } catch (error) { - logger.warn(`[${requestId}] Failed to fetch tag definitions for display mapping:`, error) - tagDefinitionsMap[kbId] = {} - } - } + tagDefsResults.forEach(({ kbId, map }) => { + tagDefinitionsMap[kbId] = map + }) return NextResponse.json({ success: true, @@ -260,6 +266,7 @@ export async function POST(request: NextRequest) { id: result.id, content: result.content, documentId: result.documentId, + documentName: (result as any).documentName || undefined, chunkIndex: result.chunkIndex, tags, // Clean display name mapped tags similarity: hasQuery ? 1 - result.distance : 1, // Perfect similarity for tag-only searches diff --git a/apps/sim/app/api/knowledge/search/utils.ts b/apps/sim/app/api/knowledge/search/utils.ts index 7a72e2703..f85dece2f 100644 --- a/apps/sim/app/api/knowledge/search/utils.ts +++ b/apps/sim/app/api/knowledge/search/utils.ts @@ -1,7 +1,7 @@ import { and, eq, inArray, sql } from 'drizzle-orm' import { createLogger } from '@/lib/logs/console/logger' import { db } from '@/db' -import { embedding } from '@/db/schema' +import { document, embedding } from '@/db/schema' const logger = createLogger('KnowledgeSearchUtils') @@ -9,6 +9,7 @@ export interface SearchResult { id: string content: string documentId: string + documentName: string | null chunkIndex: number tag1: string | null tag2: string | null @@ -130,6 +131,7 @@ async function executeVectorSearchOnIds( id: embedding.id, content: embedding.content, documentId: embedding.documentId, + documentName: document.filename, chunkIndex: embedding.chunkIndex, tag1: embedding.tag1, tag2: embedding.tag2, @@ -142,6 +144,7 @@ async function executeVectorSearchOnIds( knowledgeBaseId: embedding.knowledgeBaseId, }) .from(embedding) + .innerJoin(document, eq(embedding.documentId, document.id)) .where( and( inArray(embedding.id, embeddingIds), @@ -173,6 +176,7 @@ export async function handleTagOnlySearch(params: SearchParams): Promise = { id: { type: 'string' }, content: { type: 'string' }, documentId: { type: 'string' }, + documentName: { type: 'string' }, chunkIndex: { type: 'number' }, similarity: { type: 'number' }, metadata: { type: 'object' }, diff --git a/apps/sim/tools/knowledge/types.ts b/apps/sim/tools/knowledge/types.ts index 3bf3234f8..60b00d2d8 100644 --- a/apps/sim/tools/knowledge/types.ts +++ b/apps/sim/tools/knowledge/types.ts @@ -2,6 +2,7 @@ export interface KnowledgeSearchResult { id: string content: string documentId: string + documentName: string chunkIndex: number metadata: Record similarity: number