fix(knowledge): infer MIME type from file extension in create/upsert tools

Both create_document and upsert_document forced .txt extension and
text/plain MIME type regardless of the document name. Now the tools
infer the correct MIME type from the file extension (html, md, csv,
json, yaml, xml) and only default to .txt when no extension is given.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Waleed Latif
2026-03-18 03:53:19 -07:00
parent 44d9b743ab
commit 66dd56b614
3 changed files with 50 additions and 12 deletions

View File

@@ -1,4 +1,7 @@
import type { KnowledgeCreateDocumentResponse } from '@/tools/knowledge/types'
import {
inferDocumentFileInfo,
type KnowledgeCreateDocumentResponse,
} from '@/tools/knowledge/types'
import { enrichKBTagsSchema } from '@/tools/schema-enrichers'
import { formatDocumentTagsForAPI, parseDocumentTags } from '@/tools/shared/tags'
import type { ToolConfig } from '@/tools/types'
@@ -75,18 +78,18 @@ export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumen
? Buffer.from(textContent, 'utf8').toString('base64')
: btoa(String.fromCharCode(...utf8Bytes))
const dataUri = `data:text/plain;base64,${base64Content}`
const { filename, mimeType } = inferDocumentFileInfo(documentName)
const dataUri = `data:${mimeType};base64,${base64Content}`
// Parse document tags from various formats (object, array, JSON string)
const parsedTags = parseDocumentTags(params.documentTags)
const tagData = formatDocumentTagsForAPI(parsedTags)
const documents = [
{
filename: documentName.endsWith('.txt') ? documentName : `${documentName}.txt`,
filename,
fileUrl: dataUri,
fileSize: contentBytes,
mimeType: 'text/plain',
mimeType,
...tagData,
},
]

View File

@@ -1,3 +1,38 @@
const EXTENSION_MIME_MAP: Record<string, string> = {
html: 'text/html',
htm: 'text/html',
md: 'text/markdown',
csv: 'text/csv',
json: 'application/json',
yaml: 'application/x-yaml',
yml: 'application/x-yaml',
xml: 'application/xml',
txt: 'text/plain',
} as const
/**
* Infers MIME type from a file extension. Returns `text/plain` for unknown extensions.
*/
export function getMimeTypeFromExtension(ext: string): string {
return EXTENSION_MIME_MAP[ext.toLowerCase()] ?? 'text/plain'
}
/**
* Extracts extension from a filename and returns the normalized filename and MIME type.
* If no extension is present, appends `.txt` and uses `text/plain`.
*/
export function inferDocumentFileInfo(documentName: string): {
filename: string
mimeType: string
} {
const dotIndex = documentName.lastIndexOf('.')
if (dotIndex > 0) {
const ext = documentName.slice(dotIndex + 1).toLowerCase()
return { filename: documentName, mimeType: getMimeTypeFromExtension(ext) }
}
return { filename: `${documentName}.txt`, mimeType: 'text/plain' }
}
export interface KnowledgeSearchResult {
documentId: string
documentName: string

View File

@@ -1,6 +1,7 @@
import type {
KnowledgeUpsertDocumentParams,
KnowledgeUpsertDocumentResponse,
import {
inferDocumentFileInfo,
type KnowledgeUpsertDocumentParams,
type KnowledgeUpsertDocumentResponse,
} from '@/tools/knowledge/types'
import { enrichKBTagsSchema } from '@/tools/schema-enrichers'
import { formatDocumentTagsForAPI, parseDocumentTags } from '@/tools/shared/tags'
@@ -94,18 +95,17 @@ export const knowledgeUpsertDocumentTool: ToolConfig<
base64Content = btoa(binary)
}
const dataUri = `data:text/plain;base64,${base64Content}`
const { filename, mimeType } = inferDocumentFileInfo(documentName)
const dataUri = `data:${mimeType};base64,${base64Content}`
const parsedTags = parseDocumentTags(params.documentTags)
const tagData = formatDocumentTagsForAPI(parsedTags)
const filename = documentName.endsWith('.txt') ? documentName : `${documentName}.txt`
const requestBody: Record<string, unknown> = {
filename,
fileUrl: dataUri,
fileSize: contentBytes,
mimeType: 'text/plain',
mimeType,
...tagData,
processingOptions: {
chunkSize: 1024,