mirror of
https://github.com/simstudioai/sim.git
synced 2026-04-06 03:00:16 -04:00
improvement(knowledge): remove innerJoin and add id identifiers to results, updated docs (#1170)
* improvement(knowledge): remove innerJoin and add id identifiers to results, updated docs * cleanup * add documentName to upload chunk op as well
This commit is contained in:
@@ -187,6 +187,8 @@ export async function POST(
|
||||
success: true,
|
||||
data: {
|
||||
...newChunk,
|
||||
documentId,
|
||||
documentName: doc.filename,
|
||||
...(cost
|
||||
? {
|
||||
cost: {
|
||||
|
||||
@@ -65,12 +65,14 @@ const mockHandleVectorOnlySearch = vi.fn()
|
||||
const mockHandleTagAndVectorSearch = vi.fn()
|
||||
const mockGetQueryStrategy = vi.fn()
|
||||
const mockGenerateSearchEmbedding = vi.fn()
|
||||
const mockGetDocumentNamesByIds = vi.fn()
|
||||
vi.mock('./utils', () => ({
|
||||
handleTagOnlySearch: mockHandleTagOnlySearch,
|
||||
handleVectorOnlySearch: mockHandleVectorOnlySearch,
|
||||
handleTagAndVectorSearch: mockHandleTagAndVectorSearch,
|
||||
getQueryStrategy: mockGetQueryStrategy,
|
||||
generateSearchEmbedding: mockGenerateSearchEmbedding,
|
||||
getDocumentNamesByIds: mockGetDocumentNamesByIds,
|
||||
APIError: class APIError extends Error {
|
||||
public status: number
|
||||
constructor(message: string, status: number) {
|
||||
@@ -146,6 +148,10 @@ describe('Knowledge Search API Route', () => {
|
||||
singleQueryOptimized: true,
|
||||
})
|
||||
mockGenerateSearchEmbedding.mockClear().mockResolvedValue([0.1, 0.2, 0.3, 0.4, 0.5])
|
||||
mockGetDocumentNamesByIds.mockClear().mockResolvedValue({
|
||||
doc1: 'Document 1',
|
||||
doc2: 'Document 2',
|
||||
})
|
||||
|
||||
vi.stubGlobal('crypto', {
|
||||
randomUUID: vi.fn().mockReturnValue('mock-uuid-1234-5678'),
|
||||
|
||||
@@ -9,6 +9,7 @@ import { checkKnowledgeBaseAccess } from '@/app/api/knowledge/utils'
|
||||
import { calculateCost } from '@/providers/utils'
|
||||
import {
|
||||
generateSearchEmbedding,
|
||||
getDocumentNamesByIds,
|
||||
getQueryStrategy,
|
||||
handleTagAndVectorSearch,
|
||||
handleTagOnlySearch,
|
||||
@@ -238,6 +239,10 @@ export async function POST(request: NextRequest) {
|
||||
tagDefinitionsMap[kbId] = map
|
||||
})
|
||||
|
||||
// Fetch document names for the results
|
||||
const documentIds = results.map((result) => result.documentId)
|
||||
const documentNameMap = await getDocumentNamesByIds(documentIds)
|
||||
|
||||
return NextResponse.json({
|
||||
success: true,
|
||||
data: {
|
||||
@@ -263,12 +268,11 @@ export async function POST(request: NextRequest) {
|
||||
})
|
||||
|
||||
return {
|
||||
id: result.id,
|
||||
content: result.content,
|
||||
documentId: result.documentId,
|
||||
documentName: (result as any).documentName || undefined,
|
||||
documentName: documentNameMap[result.documentId] || undefined,
|
||||
content: result.content,
|
||||
chunkIndex: result.chunkIndex,
|
||||
tags, // Clean display name mapped tags
|
||||
metadata: tags, // Clean display name mapped tags
|
||||
similarity: hasQuery ? 1 - result.distance : 1, // Perfect similarity for tag-only searches
|
||||
}
|
||||
}),
|
||||
|
||||
@@ -5,11 +5,34 @@ import { document, embedding } from '@/db/schema'
|
||||
|
||||
const logger = createLogger('KnowledgeSearchUtils')
|
||||
|
||||
export async function getDocumentNamesByIds(
|
||||
documentIds: string[]
|
||||
): Promise<Record<string, string>> {
|
||||
if (documentIds.length === 0) {
|
||||
return {}
|
||||
}
|
||||
|
||||
const uniqueIds = [...new Set(documentIds)]
|
||||
const documents = await db
|
||||
.select({
|
||||
id: document.id,
|
||||
filename: document.filename,
|
||||
})
|
||||
.from(document)
|
||||
.where(inArray(document.id, uniqueIds))
|
||||
|
||||
const documentNameMap: Record<string, string> = {}
|
||||
documents.forEach((doc) => {
|
||||
documentNameMap[doc.id] = doc.filename
|
||||
})
|
||||
|
||||
return documentNameMap
|
||||
}
|
||||
|
||||
export interface SearchResult {
|
||||
id: string
|
||||
content: string
|
||||
documentId: string
|
||||
documentName: string | null
|
||||
chunkIndex: number
|
||||
tag1: string | null
|
||||
tag2: string | null
|
||||
@@ -131,7 +154,6 @@ async function executeVectorSearchOnIds(
|
||||
id: embedding.id,
|
||||
content: embedding.content,
|
||||
documentId: embedding.documentId,
|
||||
documentName: document.filename,
|
||||
chunkIndex: embedding.chunkIndex,
|
||||
tag1: embedding.tag1,
|
||||
tag2: embedding.tag2,
|
||||
@@ -144,7 +166,6 @@ async function executeVectorSearchOnIds(
|
||||
knowledgeBaseId: embedding.knowledgeBaseId,
|
||||
})
|
||||
.from(embedding)
|
||||
.innerJoin(document, eq(embedding.documentId, document.id))
|
||||
.where(
|
||||
and(
|
||||
inArray(embedding.id, embeddingIds),
|
||||
@@ -176,7 +197,6 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
|
||||
id: embedding.id,
|
||||
content: embedding.content,
|
||||
documentId: embedding.documentId,
|
||||
documentName: document.filename,
|
||||
chunkIndex: embedding.chunkIndex,
|
||||
tag1: embedding.tag1,
|
||||
tag2: embedding.tag2,
|
||||
@@ -189,7 +209,6 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
|
||||
knowledgeBaseId: embedding.knowledgeBaseId,
|
||||
})
|
||||
.from(embedding)
|
||||
.innerJoin(document, eq(embedding.documentId, document.id))
|
||||
.where(
|
||||
and(
|
||||
eq(embedding.knowledgeBaseId, kbId),
|
||||
@@ -209,7 +228,6 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
|
||||
id: embedding.id,
|
||||
content: embedding.content,
|
||||
documentId: embedding.documentId,
|
||||
documentName: document.filename,
|
||||
chunkIndex: embedding.chunkIndex,
|
||||
tag1: embedding.tag1,
|
||||
tag2: embedding.tag2,
|
||||
@@ -222,7 +240,6 @@ export async function handleTagOnlySearch(params: SearchParams): Promise<SearchR
|
||||
knowledgeBaseId: embedding.knowledgeBaseId,
|
||||
})
|
||||
.from(embedding)
|
||||
.innerJoin(document, eq(embedding.documentId, document.id))
|
||||
.where(
|
||||
and(
|
||||
inArray(embedding.knowledgeBaseId, knowledgeBaseIds),
|
||||
@@ -254,7 +271,6 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
|
||||
id: embedding.id,
|
||||
content: embedding.content,
|
||||
documentId: embedding.documentId,
|
||||
documentName: document.filename,
|
||||
chunkIndex: embedding.chunkIndex,
|
||||
tag1: embedding.tag1,
|
||||
tag2: embedding.tag2,
|
||||
@@ -267,7 +283,6 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
|
||||
knowledgeBaseId: embedding.knowledgeBaseId,
|
||||
})
|
||||
.from(embedding)
|
||||
.innerJoin(document, eq(embedding.documentId, document.id))
|
||||
.where(
|
||||
and(
|
||||
eq(embedding.knowledgeBaseId, kbId),
|
||||
@@ -289,7 +304,6 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
|
||||
id: embedding.id,
|
||||
content: embedding.content,
|
||||
documentId: embedding.documentId,
|
||||
documentName: document.filename,
|
||||
chunkIndex: embedding.chunkIndex,
|
||||
tag1: embedding.tag1,
|
||||
tag2: embedding.tag2,
|
||||
@@ -302,7 +316,6 @@ export async function handleVectorOnlySearch(params: SearchParams): Promise<Sear
|
||||
knowledgeBaseId: embedding.knowledgeBaseId,
|
||||
})
|
||||
.from(embedding)
|
||||
.innerJoin(document, eq(embedding.documentId, document.id))
|
||||
.where(
|
||||
and(
|
||||
inArray(embedding.knowledgeBaseId, knowledgeBaseIds),
|
||||
|
||||
@@ -160,19 +160,19 @@ export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumen
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
data: {
|
||||
id: firstDocument?.documentId || firstDocument?.id || '',
|
||||
name: uploadCount > 1 ? `${uploadCount} documents` : firstDocument?.filename || 'Unknown',
|
||||
type: 'document',
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
enabled: true,
|
||||
},
|
||||
message:
|
||||
uploadCount > 1
|
||||
? `Successfully created ${uploadCount} documents in knowledge base`
|
||||
: `Successfully created document in knowledge base`,
|
||||
documentId: firstDocument?.documentId || firstDocument?.id || '',
|
||||
data: {
|
||||
documentId: firstDocument?.documentId || firstDocument?.id || '',
|
||||
documentName:
|
||||
uploadCount > 1 ? `${uploadCount} documents` : firstDocument?.filename || 'Unknown',
|
||||
type: 'document',
|
||||
enabled: true,
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
@@ -182,8 +182,8 @@ export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumen
|
||||
type: 'object',
|
||||
description: 'Information about the created document',
|
||||
properties: {
|
||||
id: { type: 'string', description: 'Document ID' },
|
||||
name: { type: 'string', description: 'Document name' },
|
||||
documentId: { type: 'string', description: 'Document ID' },
|
||||
documentName: { type: 'string', description: 'Document name' },
|
||||
type: { type: 'string', description: 'Document type' },
|
||||
enabled: { type: 'boolean', description: 'Whether the document is enabled' },
|
||||
createdAt: { type: 'string', description: 'Creation timestamp' },
|
||||
|
||||
@@ -110,13 +110,12 @@ export const knowledgeSearchTool: ToolConfig<any, KnowledgeSearchResponse> = {
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
id: { type: 'string' },
|
||||
content: { type: 'string' },
|
||||
documentId: { type: 'string' },
|
||||
documentName: { type: 'string' },
|
||||
chunkIndex: { type: 'number' },
|
||||
similarity: { type: 'number' },
|
||||
metadata: { type: 'object' },
|
||||
documentId: { type: 'string', description: 'Document ID' },
|
||||
documentName: { type: 'string', description: 'Document name' },
|
||||
content: { type: 'string', description: 'Content of the result' },
|
||||
chunkIndex: { type: 'number', description: 'Index of the chunk within the document' },
|
||||
similarity: { type: 'number', description: 'Similarity score of the result' },
|
||||
metadata: { type: 'object', description: 'Metadata of the result, including tags' },
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
export interface KnowledgeSearchResult {
|
||||
id: string
|
||||
content: string
|
||||
documentId: string
|
||||
documentName: string
|
||||
content: string
|
||||
chunkIndex: number
|
||||
metadata: Record<string, any>
|
||||
similarity: number
|
||||
@@ -41,7 +40,7 @@ export interface KnowledgeSearchParams {
|
||||
}
|
||||
|
||||
export interface KnowledgeUploadChunkResult {
|
||||
id: string
|
||||
chunkId: string
|
||||
chunkIndex: number
|
||||
content: string
|
||||
contentLength: number
|
||||
@@ -57,6 +56,7 @@ export interface KnowledgeUploadChunkResponse {
|
||||
data: KnowledgeUploadChunkResult
|
||||
message: string
|
||||
documentId: string
|
||||
documentName: string
|
||||
cost?: {
|
||||
input: number
|
||||
output: number
|
||||
@@ -84,8 +84,8 @@ export interface KnowledgeUploadChunkParams {
|
||||
}
|
||||
|
||||
export interface KnowledgeCreateDocumentResult {
|
||||
id: string
|
||||
name: string
|
||||
documentId: string
|
||||
documentName: string
|
||||
type: string
|
||||
enabled: boolean
|
||||
createdAt: string
|
||||
@@ -97,7 +97,6 @@ export interface KnowledgeCreateDocumentResponse {
|
||||
output: {
|
||||
data: KnowledgeCreateDocumentResult
|
||||
message: string
|
||||
documentId: string
|
||||
}
|
||||
error?: string
|
||||
}
|
||||
|
||||
@@ -52,8 +52,9 @@ export const knowledgeUploadChunkTool: ToolConfig<any, KnowledgeUploadChunkRespo
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
message: `Successfully uploaded chunk to document`,
|
||||
data: {
|
||||
id: data.id,
|
||||
chunkId: data.id,
|
||||
chunkIndex: data.chunkIndex || 0,
|
||||
content: data.content,
|
||||
contentLength: data.contentLength || data.content?.length || 0,
|
||||
@@ -62,8 +63,8 @@ export const knowledgeUploadChunkTool: ToolConfig<any, KnowledgeUploadChunkRespo
|
||||
createdAt: data.createdAt,
|
||||
updatedAt: data.updatedAt,
|
||||
},
|
||||
message: `Successfully uploaded chunk to document`,
|
||||
documentId: data.documentId,
|
||||
documentName: data.documentName,
|
||||
cost: data.cost,
|
||||
},
|
||||
}
|
||||
@@ -74,7 +75,7 @@ export const knowledgeUploadChunkTool: ToolConfig<any, KnowledgeUploadChunkRespo
|
||||
type: 'object',
|
||||
description: 'Information about the uploaded chunk',
|
||||
properties: {
|
||||
id: { type: 'string', description: 'Chunk ID' },
|
||||
chunkId: { type: 'string', description: 'Chunk ID' },
|
||||
chunkIndex: { type: 'number', description: 'Index of the chunk within the document' },
|
||||
content: { type: 'string', description: 'Content of the chunk' },
|
||||
contentLength: { type: 'number', description: 'Length of the content in characters' },
|
||||
@@ -92,6 +93,10 @@ export const knowledgeUploadChunkTool: ToolConfig<any, KnowledgeUploadChunkRespo
|
||||
type: 'string',
|
||||
description: 'ID of the document the chunk was added to',
|
||||
},
|
||||
documentName: {
|
||||
type: 'string',
|
||||
description: 'Name of the document the chunk was added to',
|
||||
},
|
||||
cost: {
|
||||
type: 'object',
|
||||
description: 'Cost information for the upload operation',
|
||||
|
||||
Reference in New Issue
Block a user