improvement(knowledge): search returns document name (#1167)

This commit is contained in:
Emir Karabeg
2025-08-28 16:07:22 -07:00
committed by GitHub
parent 3f900947ce
commit 7cc4574913
5 changed files with 50 additions and 30 deletions

View File

@@ -77,14 +77,13 @@ export async function POST(request: NextRequest) {
? validatedData.knowledgeBaseIds
: [validatedData.knowledgeBaseIds]
// Check access permissions for each knowledge base using proper workspace-based permissions
const accessibleKbIds: string[] = []
for (const kbId of knowledgeBaseIds) {
const accessCheck = await checkKnowledgeBaseAccess(kbId, userId)
if (accessCheck.hasAccess) {
accessibleKbIds.push(kbId)
}
}
// Check access permissions in parallel for performance
const accessChecks = await Promise.all(
knowledgeBaseIds.map((kbId) => checkKnowledgeBaseAccess(kbId, userId))
)
const accessibleKbIds: string[] = knowledgeBaseIds.filter(
(_, idx) => accessChecks[idx]?.hasAccess
)
// Map display names to tag slots for filtering
let mappedFilters: Record<string, string> = {}
@@ -137,7 +136,10 @@ export async function POST(request: NextRequest) {
// Generate query embedding only if query is provided
const hasQuery = validatedData.query && validatedData.query.trim().length > 0
const queryEmbedding = hasQuery ? await generateSearchEmbedding(validatedData.query!) : null
// Start embedding generation early and await when needed
const queryEmbeddingPromise = hasQuery
? generateSearchEmbedding(validatedData.query!)
: Promise.resolve(null)
// Check if any requested knowledge bases were not accessible
const inaccessibleKbIds = knowledgeBaseIds.filter((id) => !accessibleKbIds.includes(id))
@@ -165,7 +167,7 @@ export async function POST(request: NextRequest) {
// Tag + Vector search
logger.debug(`[${requestId}] Executing tag + vector search with filters:`, mappedFilters)
const strategy = getQueryStrategy(accessibleKbIds.length, validatedData.topK)
const queryVector = JSON.stringify(queryEmbedding)
const queryVector = JSON.stringify(await queryEmbeddingPromise)
results = await handleTagAndVectorSearch({
knowledgeBaseIds: accessibleKbIds,
@@ -178,7 +180,7 @@ export async function POST(request: NextRequest) {
// Vector-only search
logger.debug(`[${requestId}] Executing vector-only search`)
const strategy = getQueryStrategy(accessibleKbIds.length, validatedData.topK)
const queryVector = JSON.stringify(queryEmbedding)
const queryVector = JSON.stringify(await queryEmbeddingPromise)
results = await handleVectorOnlySearch({
knowledgeBaseIds: accessibleKbIds,
@@ -213,24 +215,28 @@ export async function POST(request: NextRequest) {
}
// Fetch tag definitions for display name mapping (reuse the same fetch from filtering)
const tagDefsResults = await Promise.all(
accessibleKbIds.map(async (kbId) => {
try {
const tagDefs = await getDocumentTagDefinitions(kbId)
const map: Record<string, string> = {}
tagDefs.forEach((def) => {
map[def.tagSlot] = def.displayName
})
return { kbId, map }
} catch (error) {
logger.warn(
`[${requestId}] Failed to fetch tag definitions for display mapping:`,
error
)
return { kbId, map: {} as Record<string, string> }
}
})
)
const tagDefinitionsMap: Record<string, Record<string, string>> = {}
for (const kbId of accessibleKbIds) {
try {
const tagDefs = await getDocumentTagDefinitions(kbId)
tagDefinitionsMap[kbId] = {}
tagDefs.forEach((def) => {
tagDefinitionsMap[kbId][def.tagSlot] = def.displayName
})
logger.debug(
`[${requestId}] Display mapping - KB ${kbId} tag definitions:`,
tagDefinitionsMap[kbId]
)
} catch (error) {
logger.warn(`[${requestId}] Failed to fetch tag definitions for display mapping:`, error)
tagDefinitionsMap[kbId] = {}
}
}
tagDefsResults.forEach(({ kbId, map }) => {
tagDefinitionsMap[kbId] = map
})
return NextResponse.json({
success: true,
@@ -260,6 +266,7 @@ export async function POST(request: NextRequest) {
id: result.id,
content: result.content,
documentId: result.documentId,
documentName: (result as any).documentName || undefined,
chunkIndex: result.chunkIndex,
tags, // Clean display name mapped tags
similarity: hasQuery ? 1 - result.distance : 1, // Perfect similarity for tag-only searches

View File

@@ -1,7 +1,7 @@
import { and, eq, inArray, sql } from 'drizzle-orm'
import { createLogger } from '@/lib/logs/console/logger'
import { db } from '@/db'
import { embedding } from '@/db/schema'
import { document, embedding } from '@/db/schema'
const logger = createLogger('KnowledgeSearchUtils')
@@ -9,6 +9,7 @@ export interface SearchResult {
id: string
content: string
documentId: string
documentName: string | null
chunkIndex: number
tag1: string | null
tag2: string | null
@@ -130,6 +131,7 @@ async function executeVectorSearchOnIds(
id: embedding.id,
content: embedding.content,
documentId: embedding.documentId,
documentName: document.filename,
chunkIndex: embedding.chunkIndex,
tag1: embedding.tag1,
tag2: embedding.tag2,
@@ -142,6 +144,7 @@ async function executeVectorSearchOnIds(
knowledgeBaseId: embedding.knowledgeBaseId,
})
.from(embedding)
.innerJoin(document, eq(embedding.documentId, document.id))
.where(
and(
inArray(embedding.id, embeddingIds),
@@ -173,6 +176,7 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
id: embedding.id,
content: embedding.content,
documentId: embedding.documentId,
documentName: document.filename,
chunkIndex: embedding.chunkIndex,
tag1: embedding.tag1,
tag2: embedding.tag2,
@@ -185,6 +189,7 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
knowledgeBaseId: embedding.knowledgeBaseId,
})
.from(embedding)
.innerJoin(document, eq(embedding.documentId, document.id))
.where(
and(
eq(embedding.knowledgeBaseId, kbId),
@@ -204,6 +209,7 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
id: embedding.id,
content: embedding.content,
documentId: embedding.documentId,
documentName: document.filename,
chunkIndex: embedding.chunkIndex,
tag1: embedding.tag1,
tag2: embedding.tag2,
@@ -216,6 +222,7 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
knowledgeBaseId: embedding.knowledgeBaseId,
})
.from(embedding)
.innerJoin(document, eq(embedding.documentId, document.id))
.where(
and(
inArray(embedding.knowledgeBaseId, knowledgeBaseIds),
@@ -247,6 +254,7 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
id: embedding.id,
content: embedding.content,
documentId: embedding.documentId,
documentName: document.filename,
chunkIndex: embedding.chunkIndex,
tag1: embedding.tag1,
tag2: embedding.tag2,
@@ -259,6 +267,7 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
knowledgeBaseId: embedding.knowledgeBaseId,
})
.from(embedding)
.innerJoin(document, eq(embedding.documentId, document.id))
.where(
and(
eq(embedding.knowledgeBaseId, kbId),
@@ -280,6 +289,7 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
id: embedding.id,
content: embedding.content,
documentId: embedding.documentId,
documentName: document.filename,
chunkIndex: embedding.chunkIndex,
tag1: embedding.tag1,
tag2: embedding.tag2,
@@ -292,6 +302,7 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
knowledgeBaseId: embedding.knowledgeBaseId,
})
.from(embedding)
.innerJoin(document, eq(embedding.documentId, document.id))
.where(
and(
inArray(embedding.knowledgeBaseId, knowledgeBaseIds),

View File

@@ -18,7 +18,7 @@ const logger = createLogger('CreateMenu')
const TIMERS = {
LONG_PRESS_DELAY: 500,
CLOSE_DELAY: 150,
CLOSE_DELAY: 300,
} as const
interface CreateMenuProps {

View File

@@ -113,6 +113,7 @@ export const knowledgeSearchTool: ToolConfig<any, KnowledgeSearchResponse> = {
id: { type: 'string' },
content: { type: 'string' },
documentId: { type: 'string' },
documentName: { type: 'string' },
chunkIndex: { type: 'number' },
similarity: { type: 'number' },
metadata: { type: 'object' },

View File

@@ -2,6 +2,7 @@ export interface KnowledgeSearchResult {
id: string
content: string
documentId: string
documentName: string
chunkIndex: number
metadata: Record<string, any>
similarity: number