Files
sim/apps/sim/app/api/files/utils.ts
Theodore Li 8fc75a6e9d feat(hosted-key-services) Add hosted key for multiple services (#3461)
* feat(hosted keys): Implement serper hosted key

* Handle required fields correctly for hosted keys

* Add rate limiting (3 tries, exponential backoff)

* Add custom pricing, switch to exa as first hosted key

* Add telemetry

* Consolidate byok type definitions

* Add warning comment if default calculation is used

* Record usage to user stats table

* Fix unit tests, use cost property

* Include more metadata in cost output

* Fix disabled tests

* Fix spacing

* Fix lint

* Move knowledge cost restructuring away from generic block handler

* Migrate knowledge unit tests

* Lint

* Fix broken tests

* Add user based hosted key throttling

* Refactor hosted key handling. Add optimistic handling of throttling for custom throttle rules.

* Remove research as hosted key. Recommend BYOK if throtttling occurs

* Make adding api keys adjustable via env vars

* Remove vestigial fields from research

* Make billing actor id required for throttling

* Switch to round robin for api key distribution

* Add helper method for adding hosted key cost

* Strip leading double underscores to avoid breaking change

* Lint fix

* Remove falsy check in favor for explicit null check

* Add more detailed metrics for different throttling types

* Fix _costDollars field

* Handle hosted agent tool calls

* Fail loudly if cost field isn't found

* Remove any type

* Fix type error

* Fix lint

* Fix usage log double logging data

* Fix test

* Add browseruse hosted key

* Add firecrawl and serper hosted keys

* feat(hosted key): Add exa hosted key (#3221)

* feat(hosted keys): Implement serper hosted key

* Handle required fields correctly for hosted keys

* Add rate limiting (3 tries, exponential backoff)

* Add custom pricing, switch to exa as first hosted key

* Add telemetry

* Consolidate byok type definitions

* Add warning comment if default calculation is used

* Record usage to user stats table

* Fix unit tests, use cost property

* Include more metadata in cost output

* Fix disabled tests

* Fix spacing

* Fix lint

* Move knowledge cost restructuring away from generic block handler

* Migrate knowledge unit tests

* Lint

* Fix broken tests

* Add user based hosted key throttling

* Refactor hosted key handling. Add optimistic handling of throttling for custom throttle rules.

* Remove research as hosted key. Recommend BYOK if throtttling occurs

* Make adding api keys adjustable via env vars

* Remove vestigial fields from research

* Make billing actor id required for throttling

* Switch to round robin for api key distribution

* Add helper method for adding hosted key cost

* Strip leading double underscores to avoid breaking change

* Lint fix

* Remove falsy check in favor for explicit null check

* Add more detailed metrics for different throttling types

* Fix _costDollars field

* Handle hosted agent tool calls

* Fail loudly if cost field isn't found

* Remove any type

* Fix type error

* Fix lint

* Fix usage log double logging data

* Fix test

---------

Co-authored-by: Theodore Li <teddy@zenobiapay.com>

* Fail fast on cost data not being found

* Add hosted key for google services

* Add hosting configuration and pricing logic for ElevenLabs TTS tools

* Add linkup hosted key

* Add jina hosted key

* Add hugging face hosted key

* Add perplexity hosting

* Add broader metrics for throttling

* Add skill for adding hosted key

* Lint, remove vestigial hosted keys not implemented

* Revert agent changes

* fail fast

* Fix build issue

* Fix build issues

* Fix type error

* Remove byok types that aren't implemented

* Address feedback

* Use default model when model id isn't provided

* Fix cost default issues

* Remove firecrawl error suppression

* Restore original behavior for hugging face

* Add mistral hosted key

* Remove hugging face hosted key

* Fix pricing mismatch is mistral and perplexity

* Add hosted keys for parallel and brand fetch

* Add brandfetch hosted key

* Update types

* Change byok name to parallel_ai

* Add telemetry on unknown models

---------

Co-authored-by: Theodore Li <theo@sim.ai>
2026-03-09 22:56:45 -04:00

294 lines
7.8 KiB
TypeScript

import { existsSync } from 'fs'
import path from 'path'
import { createLogger } from '@sim/logger'
import { NextResponse } from 'next/server'
import { UPLOAD_DIR } from '@/lib/uploads/config'
import { sanitizeFileKey } from '@/lib/uploads/utils/file-utils'
const logger = createLogger('FilesUtils')
export interface ApiSuccessResponse {
success: true
[key: string]: any
}
export interface ApiErrorResponse {
error: string
message?: string
}
export interface FileResponse {
buffer: Buffer
contentType: string
filename: string
cacheControl?: string
}
export class FileNotFoundError extends Error {
constructor(message: string) {
super(message)
this.name = 'FileNotFoundError'
}
}
export class InvalidRequestError extends Error {
constructor(message: string) {
super(message)
this.name = 'InvalidRequestError'
}
}
export const contentTypeMap: Record<string, string> = {
txt: 'text/plain',
csv: 'text/csv',
json: 'application/json',
xml: 'application/xml',
md: 'text/markdown',
html: 'text/html',
css: 'text/css',
js: 'application/javascript',
ts: 'application/typescript',
pdf: 'application/pdf',
googleDoc: 'application/vnd.google-apps.document',
doc: 'application/msword',
docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
xls: 'application/vnd.ms-excel',
xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
googleSheet: 'application/vnd.google-apps.spreadsheet',
ppt: 'application/vnd.ms-powerpoint',
pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
png: 'image/png',
jpg: 'image/jpeg',
jpeg: 'image/jpeg',
gif: 'image/gif',
zip: 'application/zip',
googleFolder: 'application/vnd.google-apps.folder',
}
export const binaryExtensions = [
'doc',
'docx',
'xls',
'xlsx',
'ppt',
'pptx',
'zip',
'png',
'jpg',
'jpeg',
'gif',
'pdf',
]
export function getContentType(filename: string): string {
const extension = filename.split('.').pop()?.toLowerCase() || ''
return contentTypeMap[extension] || 'application/octet-stream'
}
export function extractFilename(path: string): string {
let filename: string
if (path.startsWith('/api/files/serve/')) {
filename = path.substring('/api/files/serve/'.length)
} else {
filename = path.split('/').pop() || path
}
filename = filename
.replace(/\.\./g, '')
.replace(/\/\.\./g, '')
.replace(/\.\.\//g, '')
if (filename.startsWith('s3/') || filename.startsWith('blob/')) {
const parts = filename.split('/')
const prefix = parts[0] // 's3' or 'blob'
const keyParts = parts.slice(1)
const sanitizedKeyParts = keyParts
.map((part) => part.replace(/\.\./g, '').replace(/^\./g, '').trim())
.filter((part) => part.length > 0)
filename = `${prefix}/${sanitizedKeyParts.join('/')}`
} else {
filename = filename.replace(/[/\\]/g, '')
}
if (!filename || filename.trim().length === 0) {
throw new Error('Invalid or empty filename after sanitization')
}
return filename
}
function sanitizeFilename(filename: string): string {
if (!filename || typeof filename !== 'string') {
throw new Error('Invalid filename provided')
}
if (!filename.includes('/')) {
throw new Error('File key must include a context prefix (e.g., kb/, workspace/, execution/)')
}
const segments = filename.split('/')
const sanitizedSegments = segments.map((segment) => {
if (segment === '..' || segment === '.') {
throw new Error('Path traversal detected')
}
const sanitized = segment.replace(/\.\./g, '').replace(/[\\]/g, '').replace(/^\./g, '').trim()
if (!sanitized) {
throw new Error('Invalid or empty path segment after sanitization')
}
if (
sanitized.includes(':') ||
sanitized.includes('|') ||
sanitized.includes('?') ||
sanitized.includes('*') ||
sanitized.includes('\x00') ||
/[\x00-\x1F\x7F]/.test(sanitized)
) {
throw new Error('Path segment contains invalid characters')
}
return sanitized
})
return sanitizedSegments.join(path.sep)
}
export function findLocalFile(filename: string): string | null {
try {
const sanitizedFilename = sanitizeFileKey(filename)
// Reject if sanitized filename is empty or only contains path separators/dots
if (!sanitizedFilename || !sanitizedFilename.trim() || /^[/\\.\s]+$/.test(sanitizedFilename)) {
return null
}
const possiblePaths = [
path.join(UPLOAD_DIR, sanitizedFilename),
path.join(process.cwd(), 'uploads', sanitizedFilename),
]
for (const filePath of possiblePaths) {
const resolvedPath = path.resolve(filePath)
const allowedDirs = [path.resolve(UPLOAD_DIR), path.resolve(process.cwd(), 'uploads')]
// Must be within allowed directory but NOT the directory itself
const isWithinAllowedDir = allowedDirs.some(
(allowedDir) =>
resolvedPath.startsWith(allowedDir + path.sep) && resolvedPath !== allowedDir
)
if (!isWithinAllowedDir) {
continue
}
if (existsSync(resolvedPath)) {
return resolvedPath
}
}
return null
} catch (error) {
logger.error('Error in findLocalFile:', error)
return null
}
}
const SAFE_INLINE_TYPES = new Set([
'image/png',
'image/jpeg',
'image/jpg',
'image/gif',
'application/pdf',
'text/plain',
'text/csv',
'application/json',
])
const FORCE_ATTACHMENT_EXTENSIONS = new Set(['html', 'htm', 'svg', 'js', 'css', 'xml'])
function getSecureFileHeaders(filename: string, originalContentType: string) {
const extension = filename.split('.').pop()?.toLowerCase() || ''
if (FORCE_ATTACHMENT_EXTENSIONS.has(extension)) {
return {
contentType: 'application/octet-stream',
disposition: 'attachment',
}
}
let safeContentType = originalContentType
if (originalContentType === 'text/html' || originalContentType === 'image/svg+xml') {
safeContentType = 'text/plain'
}
const disposition = SAFE_INLINE_TYPES.has(safeContentType) ? 'inline' : 'attachment'
return {
contentType: safeContentType,
disposition,
}
}
function encodeFilenameForHeader(storageKey: string): string {
const filename = storageKey.split('/').pop() || storageKey
const hasNonAscii = /[^\x00-\x7F]/.test(filename)
if (!hasNonAscii) {
return `filename="${filename}"`
}
const encodedFilename = encodeURIComponent(filename)
const asciiSafe = filename.replace(/[^\x00-\x7F]/g, '_')
return `filename="${asciiSafe}"; filename*=UTF-8''${encodedFilename}`
}
export function createFileResponse(file: FileResponse): NextResponse {
const { contentType, disposition } = getSecureFileHeaders(file.filename, file.contentType)
return new NextResponse(file.buffer as BodyInit, {
status: 200,
headers: {
'Content-Type': contentType,
'Content-Disposition': `${disposition}; ${encodeFilenameForHeader(file.filename)}`,
'Cache-Control': file.cacheControl || 'public, max-age=31536000',
'X-Content-Type-Options': 'nosniff',
'Content-Security-Policy': "default-src 'none'; style-src 'unsafe-inline'; sandbox;",
},
})
}
export function createErrorResponse(error: Error, status = 500): NextResponse {
const statusCode =
error instanceof FileNotFoundError ? 404 : error instanceof InvalidRequestError ? 400 : status
return NextResponse.json(
{
error: error.name,
message: error.message,
},
{ status: statusCode }
)
}
export function createSuccessResponse(data: ApiSuccessResponse): NextResponse {
return NextResponse.json(data)
}
export function createOptionsResponse(): NextResponse {
return new NextResponse(null, {
status: 204,
headers: {
'Access-Control-Allow-Methods': 'GET, POST, DELETE, OPTIONS',
'Access-Control-Allow-Headers': 'Content-Type',
},
})
}