mirror of
https://github.com/simstudioai/sim.git
synced 2026-04-06 03:00:16 -04:00
fix(knowledge): infer MIME type from file extension in create/upsert tools
Both create_document and upsert_document forced .txt extension and text/plain MIME type regardless of the document name. Now the tools infer the correct MIME type from the file extension (html, md, csv, json, yaml, xml) and only default to .txt when no extension is given. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,7 @@
|
||||
import type { KnowledgeCreateDocumentResponse } from '@/tools/knowledge/types'
|
||||
import {
|
||||
inferDocumentFileInfo,
|
||||
type KnowledgeCreateDocumentResponse,
|
||||
} from '@/tools/knowledge/types'
|
||||
import { enrichKBTagsSchema } from '@/tools/schema-enrichers'
|
||||
import { formatDocumentTagsForAPI, parseDocumentTags } from '@/tools/shared/tags'
|
||||
import type { ToolConfig } from '@/tools/types'
|
||||
@@ -75,18 +78,18 @@ export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumen
|
||||
? Buffer.from(textContent, 'utf8').toString('base64')
|
||||
: btoa(String.fromCharCode(...utf8Bytes))
|
||||
|
||||
const dataUri = `data:text/plain;base64,${base64Content}`
|
||||
const { filename, mimeType } = inferDocumentFileInfo(documentName)
|
||||
const dataUri = `data:${mimeType};base64,${base64Content}`
|
||||
|
||||
// Parse document tags from various formats (object, array, JSON string)
|
||||
const parsedTags = parseDocumentTags(params.documentTags)
|
||||
const tagData = formatDocumentTagsForAPI(parsedTags)
|
||||
|
||||
const documents = [
|
||||
{
|
||||
filename: documentName.endsWith('.txt') ? documentName : `${documentName}.txt`,
|
||||
filename,
|
||||
fileUrl: dataUri,
|
||||
fileSize: contentBytes,
|
||||
mimeType: 'text/plain',
|
||||
mimeType,
|
||||
...tagData,
|
||||
},
|
||||
]
|
||||
|
||||
@@ -1,3 +1,38 @@
|
||||
const EXTENSION_MIME_MAP: Record<string, string> = {
|
||||
html: 'text/html',
|
||||
htm: 'text/html',
|
||||
md: 'text/markdown',
|
||||
csv: 'text/csv',
|
||||
json: 'application/json',
|
||||
yaml: 'application/x-yaml',
|
||||
yml: 'application/x-yaml',
|
||||
xml: 'application/xml',
|
||||
txt: 'text/plain',
|
||||
} as const
|
||||
|
||||
/**
|
||||
* Infers MIME type from a file extension. Returns `text/plain` for unknown extensions.
|
||||
*/
|
||||
export function getMimeTypeFromExtension(ext: string): string {
|
||||
return EXTENSION_MIME_MAP[ext.toLowerCase()] ?? 'text/plain'
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts extension from a filename and returns the normalized filename and MIME type.
|
||||
* If no extension is present, appends `.txt` and uses `text/plain`.
|
||||
*/
|
||||
export function inferDocumentFileInfo(documentName: string): {
|
||||
filename: string
|
||||
mimeType: string
|
||||
} {
|
||||
const dotIndex = documentName.lastIndexOf('.')
|
||||
if (dotIndex > 0) {
|
||||
const ext = documentName.slice(dotIndex + 1).toLowerCase()
|
||||
return { filename: documentName, mimeType: getMimeTypeFromExtension(ext) }
|
||||
}
|
||||
return { filename: `${documentName}.txt`, mimeType: 'text/plain' }
|
||||
}
|
||||
|
||||
export interface KnowledgeSearchResult {
|
||||
documentId: string
|
||||
documentName: string
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import type {
|
||||
KnowledgeUpsertDocumentParams,
|
||||
KnowledgeUpsertDocumentResponse,
|
||||
import {
|
||||
inferDocumentFileInfo,
|
||||
type KnowledgeUpsertDocumentParams,
|
||||
type KnowledgeUpsertDocumentResponse,
|
||||
} from '@/tools/knowledge/types'
|
||||
import { enrichKBTagsSchema } from '@/tools/schema-enrichers'
|
||||
import { formatDocumentTagsForAPI, parseDocumentTags } from '@/tools/shared/tags'
|
||||
@@ -94,18 +95,17 @@ export const knowledgeUpsertDocumentTool: ToolConfig<
|
||||
base64Content = btoa(binary)
|
||||
}
|
||||
|
||||
const dataUri = `data:text/plain;base64,${base64Content}`
|
||||
const { filename, mimeType } = inferDocumentFileInfo(documentName)
|
||||
const dataUri = `data:${mimeType};base64,${base64Content}`
|
||||
|
||||
const parsedTags = parseDocumentTags(params.documentTags)
|
||||
const tagData = formatDocumentTagsForAPI(parsedTags)
|
||||
|
||||
const filename = documentName.endsWith('.txt') ? documentName : `${documentName}.txt`
|
||||
|
||||
const requestBody: Record<string, unknown> = {
|
||||
filename,
|
||||
fileUrl: dataUri,
|
||||
fileSize: contentBytes,
|
||||
mimeType: 'text/plain',
|
||||
mimeType,
|
||||
...tagData,
|
||||
processingOptions: {
|
||||
chunkSize: 1024,
|
||||
|
||||
Reference in New Issue
Block a user