improvement(knowledge): remove innerJoin and add id identifiers to results, updated docs (#1170)

* improvement(knowledge): remove innerJoin and add id identifiers to results, updated docs

* cleanup

* add documentName to upload chunk op as well
This commit is contained in:
Waleed
2025-08-28 17:04:31 -07:00
committed by GitHub
parent 56543dafb4
commit fcf128f6db
8 changed files with 70 additions and 42 deletions

View File

@@ -187,6 +187,8 @@ export async function POST(
success: true,
data: {
...newChunk,
documentId,
documentName: doc.filename,
...(cost
? {
cost: {

View File

@@ -65,12 +65,14 @@ const mockHandleVectorOnlySearch = vi.fn()
const mockHandleTagAndVectorSearch = vi.fn()
const mockGetQueryStrategy = vi.fn()
const mockGenerateSearchEmbedding = vi.fn()
const mockGetDocumentNamesByIds = vi.fn()
vi.mock('./utils', () => ({
handleTagOnlySearch: mockHandleTagOnlySearch,
handleVectorOnlySearch: mockHandleVectorOnlySearch,
handleTagAndVectorSearch: mockHandleTagAndVectorSearch,
getQueryStrategy: mockGetQueryStrategy,
generateSearchEmbedding: mockGenerateSearchEmbedding,
getDocumentNamesByIds: mockGetDocumentNamesByIds,
APIError: class APIError extends Error {
public status: number
constructor(message: string, status: number) {
@@ -146,6 +148,10 @@ describe('Knowledge Search API Route', () => {
singleQueryOptimized: true,
})
mockGenerateSearchEmbedding.mockClear().mockResolvedValue([0.1, 0.2, 0.3, 0.4, 0.5])
mockGetDocumentNamesByIds.mockClear().mockResolvedValue({
doc1: 'Document 1',
doc2: 'Document 2',
})
vi.stubGlobal('crypto', {
randomUUID: vi.fn().mockReturnValue('mock-uuid-1234-5678'),

View File

@@ -9,6 +9,7 @@ import { checkKnowledgeBaseAccess } from '@/app/api/knowledge/utils'
import { calculateCost } from '@/providers/utils'
import {
generateSearchEmbedding,
getDocumentNamesByIds,
getQueryStrategy,
handleTagAndVectorSearch,
handleTagOnlySearch,
@@ -238,6 +239,10 @@ export async function POST(request: NextRequest) {
tagDefinitionsMap[kbId] = map
})
// Fetch document names for the results
const documentIds = results.map((result) => result.documentId)
const documentNameMap = await getDocumentNamesByIds(documentIds)
return NextResponse.json({
success: true,
data: {
@@ -263,12 +268,11 @@ export async function POST(request: NextRequest) {
})
return {
id: result.id,
content: result.content,
documentId: result.documentId,
documentName: (result as any).documentName || undefined,
documentName: documentNameMap[result.documentId] || undefined,
content: result.content,
chunkIndex: result.chunkIndex,
tags, // Clean display name mapped tags
metadata: tags, // Clean display name mapped tags
similarity: hasQuery ? 1 - result.distance : 1, // Perfect similarity for tag-only searches
}
}),

View File

@@ -5,11 +5,34 @@ import { document, embedding } from '@/db/schema'
const logger = createLogger('KnowledgeSearchUtils')
export async function getDocumentNamesByIds(
documentIds: string[]
): Promise<Record<string, string>> {
if (documentIds.length === 0) {
return {}
}
const uniqueIds = [...new Set(documentIds)]
const documents = await db
.select({
id: document.id,
filename: document.filename,
})
.from(document)
.where(inArray(document.id, uniqueIds))
const documentNameMap: Record<string, string> = {}
documents.forEach((doc) => {
documentNameMap[doc.id] = doc.filename
})
return documentNameMap
}
export interface SearchResult {
id: string
content: string
documentId: string
documentName: string | null
chunkIndex: number
tag1: string | null
tag2: string | null
@@ -131,7 +154,6 @@ async function executeVectorSearchOnIds(
id: embedding.id,
content: embedding.content,
documentId: embedding.documentId,
documentName: document.filename,
chunkIndex: embedding.chunkIndex,
tag1: embedding.tag1,
tag2: embedding.tag2,
@@ -144,7 +166,6 @@ async function executeVectorSearchOnIds(
knowledgeBaseId: embedding.knowledgeBaseId,
})
.from(embedding)
.innerJoin(document, eq(embedding.documentId, document.id))
.where(
and(
inArray(embedding.id, embeddingIds),
@@ -176,7 +197,6 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
id: embedding.id,
content: embedding.content,
documentId: embedding.documentId,
documentName: document.filename,
chunkIndex: embedding.chunkIndex,
tag1: embedding.tag1,
tag2: embedding.tag2,
@@ -189,7 +209,6 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
knowledgeBaseId: embedding.knowledgeBaseId,
})
.from(embedding)
.innerJoin(document, eq(embedding.documentId, document.id))
.where(
and(
eq(embedding.knowledgeBaseId, kbId),
@@ -209,7 +228,6 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
id: embedding.id,
content: embedding.content,
documentId: embedding.documentId,
documentName: document.filename,
chunkIndex: embedding.chunkIndex,
tag1: embedding.tag1,
tag2: embedding.tag2,
@@ -222,7 +240,6 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
knowledgeBaseId: embedding.knowledgeBaseId,
})
.from(embedding)
.innerJoin(document, eq(embedding.documentId, document.id))
.where(
and(
inArray(embedding.knowledgeBaseId, knowledgeBaseIds),
@@ -254,7 +271,6 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
id: embedding.id,
content: embedding.content,
documentId: embedding.documentId,
documentName: document.filename,
chunkIndex: embedding.chunkIndex,
tag1: embedding.tag1,
tag2: embedding.tag2,
@@ -267,7 +283,6 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
knowledgeBaseId: embedding.knowledgeBaseId,
})
.from(embedding)
.innerJoin(document, eq(embedding.documentId, document.id))
.where(
and(
eq(embedding.knowledgeBaseId, kbId),
@@ -289,7 +304,6 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
id: embedding.id,
content: embedding.content,
documentId: embedding.documentId,
documentName: document.filename,
chunkIndex: embedding.chunkIndex,
tag1: embedding.tag1,
tag2: embedding.tag2,
@@ -302,7 +316,6 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
knowledgeBaseId: embedding.knowledgeBaseId,
})
.from(embedding)
.innerJoin(document, eq(embedding.documentId, document.id))
.where(
and(
inArray(embedding.knowledgeBaseId, knowledgeBaseIds),

View File

@@ -160,19 +160,19 @@ export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumen
return {
success: true,
output: {
data: {
id: firstDocument?.documentId || firstDocument?.id || '',
name: uploadCount > 1 ? `${uploadCount} documents` : firstDocument?.filename || 'Unknown',
type: 'document',
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
enabled: true,
},
message:
uploadCount > 1
? `Successfully created ${uploadCount} documents in knowledge base`
: `Successfully created document in knowledge base`,
documentId: firstDocument?.documentId || firstDocument?.id || '',
data: {
documentId: firstDocument?.documentId || firstDocument?.id || '',
documentName:
uploadCount > 1 ? `${uploadCount} documents` : firstDocument?.filename || 'Unknown',
type: 'document',
enabled: true,
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
},
},
}
},
@@ -182,8 +182,8 @@ export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumen
type: 'object',
description: 'Information about the created document',
properties: {
id: { type: 'string', description: 'Document ID' },
name: { type: 'string', description: 'Document name' },
documentId: { type: 'string', description: 'Document ID' },
documentName: { type: 'string', description: 'Document name' },
type: { type: 'string', description: 'Document type' },
enabled: { type: 'boolean', description: 'Whether the document is enabled' },
createdAt: { type: 'string', description: 'Creation timestamp' },

View File

@@ -110,13 +110,12 @@ export const knowledgeSearchTool: ToolConfig<any, KnowledgeSearchResponse> = {
items: {
type: 'object',
properties: {
id: { type: 'string' },
content: { type: 'string' },
documentId: { type: 'string' },
documentName: { type: 'string' },
chunkIndex: { type: 'number' },
similarity: { type: 'number' },
metadata: { type: 'object' },
documentId: { type: 'string', description: 'Document ID' },
documentName: { type: 'string', description: 'Document name' },
content: { type: 'string', description: 'Content of the result' },
chunkIndex: { type: 'number', description: 'Index of the chunk within the document' },
similarity: { type: 'number', description: 'Similarity score of the result' },
metadata: { type: 'object', description: 'Metadata of the result, including tags' },
},
},
},

View File

@@ -1,8 +1,7 @@
export interface KnowledgeSearchResult {
id: string
content: string
documentId: string
documentName: string
content: string
chunkIndex: number
metadata: Record<string, any>
similarity: number
@@ -41,7 +40,7 @@ export interface KnowledgeSearchParams {
}
export interface KnowledgeUploadChunkResult {
id: string
chunkId: string
chunkIndex: number
content: string
contentLength: number
@@ -57,6 +56,7 @@ export interface KnowledgeUploadChunkResponse {
data: KnowledgeUploadChunkResult
message: string
documentId: string
documentName: string
cost?: {
input: number
output: number
@@ -84,8 +84,8 @@ export interface KnowledgeUploadChunkParams {
}
export interface KnowledgeCreateDocumentResult {
id: string
name: string
documentId: string
documentName: string
type: string
enabled: boolean
createdAt: string
@@ -97,7 +97,6 @@ export interface KnowledgeCreateDocumentResponse {
output: {
data: KnowledgeCreateDocumentResult
message: string
documentId: string
}
error?: string
}

View File

@@ -52,8 +52,9 @@ export const knowledgeUploadChunkTool: ToolConfig<any, KnowledgeUploadChunkRespo
return {
success: true,
output: {
message: `Successfully uploaded chunk to document`,
data: {
id: data.id,
chunkId: data.id,
chunkIndex: data.chunkIndex || 0,
content: data.content,
contentLength: data.contentLength || data.content?.length || 0,
@@ -62,8 +63,8 @@ export const knowledgeUploadChunkTool: ToolConfig<any, KnowledgeUploadChunkRespo
createdAt: data.createdAt,
updatedAt: data.updatedAt,
},
message: `Successfully uploaded chunk to document`,
documentId: data.documentId,
documentName: data.documentName,
cost: data.cost,
},
}
@@ -74,7 +75,7 @@ export const knowledgeUploadChunkTool: ToolConfig<any, KnowledgeUploadChunkRespo
type: 'object',
description: 'Information about the uploaded chunk',
properties: {
id: { type: 'string', description: 'Chunk ID' },
chunkId: { type: 'string', description: 'Chunk ID' },
chunkIndex: { type: 'number', description: 'Index of the chunk within the document' },
content: { type: 'string', description: 'Content of the chunk' },
contentLength: { type: 'number', description: 'Length of the content in characters' },
@@ -92,6 +93,10 @@ export const knowledgeUploadChunkTool: ToolConfig<any, KnowledgeUploadChunkRespo
type: 'string',
description: 'ID of the document the chunk was added to',
},
documentName: {
type: 'string',
description: 'Name of the document the chunk was added to',
},
cost: {
type: 'object',
description: 'Cost information for the upload operation',