mirror of
https://github.com/simstudioai/sim.git
synced 2026-04-06 03:00:16 -04:00
feat(motheship): add docx support
This commit is contained in:
@@ -4,7 +4,11 @@ import { createLogger } from '@sim/logger'
|
||||
import type { NextRequest } from 'next/server'
|
||||
import { NextResponse } from 'next/server'
|
||||
import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid'
|
||||
import { generatePptxFromCode } from '@/lib/execution/pptx-vm'
|
||||
import {
|
||||
generateDocxFromCode,
|
||||
generatePdfFromCode,
|
||||
generatePptxFromCode,
|
||||
} from '@/lib/execution/doc-vm'
|
||||
import { CopilotFiles, isUsingCloudStorage } from '@/lib/uploads'
|
||||
import type { StorageContext } from '@/lib/uploads/config'
|
||||
import { parseWorkspaceFileKey } from '@/lib/uploads/contexts/workspace/workspace-file-manager'
|
||||
@@ -22,47 +26,73 @@ import {
|
||||
const logger = createLogger('FilesServeAPI')
|
||||
|
||||
const ZIP_MAGIC = Buffer.from([0x50, 0x4b, 0x03, 0x04])
|
||||
const PDF_MAGIC = Buffer.from([0x25, 0x50, 0x44, 0x46, 0x2d]) // %PDF-
|
||||
|
||||
const MAX_COMPILED_PPTX_CACHE = 10
|
||||
const compiledPptxCache = new Map<string, Buffer>()
|
||||
|
||||
function compiledCacheSet(key: string, buffer: Buffer): void {
|
||||
if (compiledPptxCache.size >= MAX_COMPILED_PPTX_CACHE) {
|
||||
compiledPptxCache.delete(compiledPptxCache.keys().next().value as string)
|
||||
}
|
||||
compiledPptxCache.set(key, buffer)
|
||||
interface CompilableFormat {
|
||||
magic: Buffer
|
||||
compile: (code: string, workspaceId: string) => Promise<Buffer>
|
||||
contentType: string
|
||||
}
|
||||
|
||||
async function compilePptxIfNeeded(
|
||||
const COMPILABLE_FORMATS: Record<string, CompilableFormat> = {
|
||||
'.pptx': {
|
||||
magic: ZIP_MAGIC,
|
||||
compile: generatePptxFromCode,
|
||||
contentType: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
},
|
||||
'.docx': {
|
||||
magic: ZIP_MAGIC,
|
||||
compile: generateDocxFromCode,
|
||||
contentType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
},
|
||||
'.pdf': {
|
||||
magic: PDF_MAGIC,
|
||||
compile: generatePdfFromCode,
|
||||
contentType: 'application/pdf',
|
||||
},
|
||||
}
|
||||
|
||||
const MAX_COMPILED_DOC_CACHE = 10
|
||||
const compiledDocCache = new Map<string, Buffer>()
|
||||
|
||||
function compiledCacheSet(key: string, buffer: Buffer): void {
|
||||
if (compiledDocCache.size >= MAX_COMPILED_DOC_CACHE) {
|
||||
compiledDocCache.delete(compiledDocCache.keys().next().value as string)
|
||||
}
|
||||
compiledDocCache.set(key, buffer)
|
||||
}
|
||||
|
||||
async function compileDocumentIfNeeded(
|
||||
buffer: Buffer,
|
||||
filename: string,
|
||||
workspaceId?: string,
|
||||
raw?: boolean
|
||||
): Promise<{ buffer: Buffer; contentType: string }> {
|
||||
const isPptx = filename.toLowerCase().endsWith('.pptx')
|
||||
if (raw || !isPptx || buffer.subarray(0, 4).equals(ZIP_MAGIC)) {
|
||||
if (raw) return { buffer, contentType: getContentType(filename) }
|
||||
|
||||
const ext = filename.slice(filename.lastIndexOf('.')).toLowerCase()
|
||||
const format = COMPILABLE_FORMATS[ext]
|
||||
if (!format) return { buffer, contentType: getContentType(filename) }
|
||||
|
||||
const magicLen = format.magic.length
|
||||
if (buffer.length >= magicLen && buffer.subarray(0, magicLen).equals(format.magic)) {
|
||||
return { buffer, contentType: getContentType(filename) }
|
||||
}
|
||||
|
||||
const code = buffer.toString('utf-8')
|
||||
const cacheKey = createHash('sha256')
|
||||
.update(ext)
|
||||
.update(code)
|
||||
.update(workspaceId ?? '')
|
||||
.digest('hex')
|
||||
const cached = compiledPptxCache.get(cacheKey)
|
||||
const cached = compiledDocCache.get(cacheKey)
|
||||
if (cached) {
|
||||
return {
|
||||
buffer: cached,
|
||||
contentType: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
}
|
||||
return { buffer: cached, contentType: format.contentType }
|
||||
}
|
||||
|
||||
const compiled = await generatePptxFromCode(code, workspaceId || '')
|
||||
const compiled = await format.compile(code, workspaceId || '')
|
||||
compiledCacheSet(cacheKey, compiled)
|
||||
return {
|
||||
buffer: compiled,
|
||||
contentType: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
}
|
||||
return { buffer: compiled, contentType: format.contentType }
|
||||
}
|
||||
|
||||
const STORAGE_KEY_PREFIX_RE = /^\d{13}-[a-z0-9]{7}-/
|
||||
@@ -169,7 +199,7 @@ async function handleLocalFile(
|
||||
const segment = filename.split('/').pop() || filename
|
||||
const displayName = stripStorageKeyPrefix(segment)
|
||||
const workspaceId = getWorkspaceIdForCompile(filename)
|
||||
const { buffer: fileBuffer, contentType } = await compilePptxIfNeeded(
|
||||
const { buffer: fileBuffer, contentType } = await compileDocumentIfNeeded(
|
||||
rawBuffer,
|
||||
displayName,
|
||||
workspaceId,
|
||||
@@ -226,7 +256,7 @@ async function handleCloudProxy(
|
||||
const segment = cloudKey.split('/').pop() || 'download'
|
||||
const displayName = stripStorageKeyPrefix(segment)
|
||||
const workspaceId = getWorkspaceIdForCompile(cloudKey)
|
||||
const { buffer: fileBuffer, contentType } = await compilePptxIfNeeded(
|
||||
const { buffer: fileBuffer, contentType } = await compileDocumentIfNeeded(
|
||||
rawBuffer,
|
||||
displayName,
|
||||
workspaceId,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { getSession } from '@/lib/auth'
|
||||
import { generatePptxFromCode } from '@/lib/execution/pptx-vm'
|
||||
import { generatePptxFromCode } from '@/lib/execution/doc-vm'
|
||||
import { verifyWorkspaceMembership } from '@/app/api/workflows/utils'
|
||||
|
||||
export const dynamic = 'force-dynamic'
|
||||
|
||||
@@ -44,7 +44,7 @@ const TEXT_EDITABLE_EXTENSIONS = new Set([
|
||||
'svg',
|
||||
])
|
||||
|
||||
const IFRAME_PREVIEWABLE_MIME_TYPES = new Set(['application/pdf'])
|
||||
const IFRAME_PREVIEWABLE_MIME_TYPES = new Set(['application/pdf', 'text/x-pdflibjs'])
|
||||
const IFRAME_PREVIEWABLE_EXTENSIONS = new Set(['pdf'])
|
||||
|
||||
const IMAGE_PREVIEWABLE_MIME_TYPES = new Set(['image/png', 'image/jpeg', 'image/gif', 'image/webp'])
|
||||
@@ -52,26 +52,36 @@ const IMAGE_PREVIEWABLE_EXTENSIONS = new Set(['png', 'jpg', 'jpeg', 'gif', 'webp
|
||||
|
||||
const PPTX_PREVIEWABLE_MIME_TYPES = new Set([
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'text/x-pptxgenjs',
|
||||
])
|
||||
const PPTX_PREVIEWABLE_EXTENSIONS = new Set(['pptx'])
|
||||
|
||||
const DOCX_PREVIEWABLE_MIME_TYPES = new Set([
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'text/x-docxjs',
|
||||
])
|
||||
const DOCX_PREVIEWABLE_EXTENSIONS = new Set(['docx'])
|
||||
|
||||
type FileCategory =
|
||||
| 'text-editable'
|
||||
| 'iframe-previewable'
|
||||
| 'image-previewable'
|
||||
| 'pptx-previewable'
|
||||
| 'docx-previewable'
|
||||
| 'unsupported'
|
||||
|
||||
function resolveFileCategory(mimeType: string | null, filename: string): FileCategory {
|
||||
if (mimeType && TEXT_EDITABLE_MIME_TYPES.has(mimeType)) return 'text-editable'
|
||||
if (mimeType && IFRAME_PREVIEWABLE_MIME_TYPES.has(mimeType)) return 'iframe-previewable'
|
||||
if (mimeType && IMAGE_PREVIEWABLE_MIME_TYPES.has(mimeType)) return 'image-previewable'
|
||||
if (mimeType && DOCX_PREVIEWABLE_MIME_TYPES.has(mimeType)) return 'docx-previewable'
|
||||
if (mimeType && PPTX_PREVIEWABLE_MIME_TYPES.has(mimeType)) return 'pptx-previewable'
|
||||
|
||||
const ext = getFileExtension(filename)
|
||||
if (TEXT_EDITABLE_EXTENSIONS.has(ext)) return 'text-editable'
|
||||
if (IFRAME_PREVIEWABLE_EXTENSIONS.has(ext)) return 'iframe-previewable'
|
||||
if (IMAGE_PREVIEWABLE_EXTENSIONS.has(ext)) return 'image-previewable'
|
||||
if (DOCX_PREVIEWABLE_EXTENSIONS.has(ext)) return 'docx-previewable'
|
||||
if (PPTX_PREVIEWABLE_EXTENSIONS.has(ext)) return 'pptx-previewable'
|
||||
|
||||
return 'unsupported'
|
||||
@@ -138,6 +148,10 @@ export function FileViewer({
|
||||
return <ImagePreview file={file} />
|
||||
}
|
||||
|
||||
if (category === 'docx-previewable') {
|
||||
return <DocxPreview file={file} workspaceId={workspaceId} />
|
||||
}
|
||||
|
||||
if (category === 'pptx-previewable') {
|
||||
return <PptxPreview file={file} workspaceId={workspaceId} streamingContent={streamingContent} />
|
||||
}
|
||||
@@ -181,7 +195,14 @@ function TextEditor({
|
||||
isLoading,
|
||||
error,
|
||||
dataUpdatedAt,
|
||||
} = useWorkspaceFileContent(workspaceId, file.id, file.key, file.type === 'text/x-pptxgenjs')
|
||||
} = useWorkspaceFileContent(
|
||||
workspaceId,
|
||||
file.id,
|
||||
file.key,
|
||||
file.type === 'text/x-pptxgenjs' ||
|
||||
file.type === 'text/x-docxjs' ||
|
||||
file.type === 'text/x-pdflibjs'
|
||||
)
|
||||
|
||||
const updateContent = useUpdateWorkspaceFileContent()
|
||||
const updateContentRef = useRef(updateContent)
|
||||
@@ -551,6 +572,71 @@ const ImagePreview = memo(function ImagePreview({ file }: { file: WorkspaceFileR
|
||||
)
|
||||
})
|
||||
|
||||
const DocxPreview = memo(function DocxPreview({
|
||||
file,
|
||||
workspaceId,
|
||||
}: {
|
||||
file: WorkspaceFileRecord
|
||||
workspaceId: string
|
||||
}) {
|
||||
const containerRef = useRef<HTMLDivElement>(null)
|
||||
const {
|
||||
data: fileData,
|
||||
isLoading,
|
||||
error: fetchError,
|
||||
} = useWorkspaceFileBinary(workspaceId, file.id, file.key)
|
||||
const [renderError, setRenderError] = useState<string | null>(null)
|
||||
|
||||
useEffect(() => {
|
||||
if (!containerRef.current || !fileData) return
|
||||
|
||||
let cancelled = false
|
||||
|
||||
async function render() {
|
||||
try {
|
||||
const { renderAsync } = await import('docx-preview')
|
||||
if (cancelled || !containerRef.current) return
|
||||
containerRef.current.innerHTML = ''
|
||||
await renderAsync(fileData, containerRef.current, undefined, {
|
||||
inWrapper: true,
|
||||
ignoreWidth: false,
|
||||
ignoreHeight: false,
|
||||
})
|
||||
} catch (err) {
|
||||
if (!cancelled) {
|
||||
const msg = err instanceof Error ? err.message : 'Failed to render document'
|
||||
logger.error('DOCX render failed', { error: msg })
|
||||
setRenderError(msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
render()
|
||||
return () => {
|
||||
cancelled = true
|
||||
}
|
||||
}, [fileData])
|
||||
|
||||
if (isLoading) {
|
||||
return (
|
||||
<div className='flex h-full items-center justify-center'>
|
||||
<Skeleton className='h-[200px] w-[80%]' />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
if (fetchError || renderError) {
|
||||
return (
|
||||
<div className='flex h-full flex-col items-center justify-center gap-2 text-[var(--text-muted)]'>
|
||||
<p className='text-[13px]'>Failed to preview document</p>
|
||||
<p className='text-[11px]'>{renderError || 'Could not load file'}</p>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return <div ref={containerRef} className='h-full w-full overflow-auto bg-white' />
|
||||
})
|
||||
|
||||
const pptxSlideCache = new Map<string, string[]>()
|
||||
|
||||
function pptxCacheKey(fileId: string, dataUpdatedAt: number, byteLength: number): string {
|
||||
|
||||
@@ -83,7 +83,12 @@ export const ResourceContent = memo(function ResourceContent({
|
||||
}, [streamingFile])
|
||||
const syntheticFile = useMemo(() => {
|
||||
const ext = getFileExtension(streamFileName)
|
||||
const type = ext === 'pptx' ? 'text/x-pptxgenjs' : getMimeTypeFromExtension(ext)
|
||||
const SOURCE_MIME_MAP: Record<string, string> = {
|
||||
pptx: 'text/x-pptxgenjs',
|
||||
docx: 'text/x-docxjs',
|
||||
pdf: 'text/x-pdflibjs',
|
||||
}
|
||||
const type = SOURCE_MIME_MAP[ext] ?? getMimeTypeFromExtension(ext)
|
||||
return {
|
||||
id: 'streaming-file',
|
||||
workspaceId,
|
||||
|
||||
@@ -6,7 +6,11 @@ import {
|
||||
type ServerToolContext,
|
||||
} from '@/lib/copilot/tools/server/base-tool'
|
||||
import type { WorkspaceFileArgs, WorkspaceFileResult } from '@/lib/copilot/tools/shared/schemas'
|
||||
import { generatePptxFromCode } from '@/lib/execution/pptx-vm'
|
||||
import {
|
||||
generateDocxFromCode,
|
||||
generatePdfFromCode,
|
||||
generatePptxFromCode,
|
||||
} from '@/lib/execution/doc-vm'
|
||||
import {
|
||||
deleteWorkspaceFile,
|
||||
downloadWorkspaceFile as downloadWsFile,
|
||||
@@ -19,7 +23,11 @@ import {
|
||||
const logger = createLogger('WorkspaceFileServerTool')
|
||||
|
||||
const PPTX_MIME = 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
||||
const DOCX_MIME = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
||||
const PDF_MIME = 'application/pdf'
|
||||
const PPTX_SOURCE_MIME = 'text/x-pptxgenjs'
|
||||
const DOCX_SOURCE_MIME = 'text/x-docxjs'
|
||||
const PDF_SOURCE_MIME = 'text/x-pdflibjs'
|
||||
|
||||
const EXT_TO_MIME: Record<string, string> = {
|
||||
'.txt': 'text/plain',
|
||||
@@ -28,6 +36,8 @@ const EXT_TO_MIME: Record<string, string> = {
|
||||
'.json': 'application/json',
|
||||
'.csv': 'text/csv',
|
||||
'.pptx': PPTX_MIME,
|
||||
'.docx': DOCX_MIME,
|
||||
'.pdf': PDF_MIME,
|
||||
}
|
||||
|
||||
function inferContentType(fileName: string, explicitType?: string): string {
|
||||
@@ -85,22 +95,35 @@ export const workspaceFileServerTool: BaseServerTool<WorkspaceFileArgs, Workspac
|
||||
return { success: false, message: fileNameValidationError }
|
||||
}
|
||||
|
||||
const isPptx = fileName.toLowerCase().endsWith('.pptx')
|
||||
const lowerName = fileName.toLowerCase()
|
||||
const isPptx = lowerName.endsWith('.pptx')
|
||||
const isDocx = lowerName.endsWith('.docx')
|
||||
const isPdf = lowerName.endsWith('.pdf')
|
||||
let contentType: string
|
||||
|
||||
if (isPptx) {
|
||||
// Validate the code compiles before storing
|
||||
if (isPptx || isDocx || isPdf) {
|
||||
const formatName = isPptx ? 'PPTX' : isDocx ? 'DOCX' : 'PDF'
|
||||
const generator = isPptx
|
||||
? generatePptxFromCode
|
||||
: isDocx
|
||||
? generateDocxFromCode
|
||||
: generatePdfFromCode
|
||||
const sourceMime = isPptx
|
||||
? PPTX_SOURCE_MIME
|
||||
: isDocx
|
||||
? DOCX_SOURCE_MIME
|
||||
: PDF_SOURCE_MIME
|
||||
try {
|
||||
await generatePptxFromCode(content, workspaceId)
|
||||
await generator(content, workspaceId)
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err)
|
||||
logger.error('PPTX code validation failed', { error: msg, fileName })
|
||||
logger.error(`${formatName} code validation failed`, { error: msg, fileName })
|
||||
return {
|
||||
success: false,
|
||||
message: `PPTX generation failed: ${msg}. Fix the pptxgenjs code and retry.`,
|
||||
message: `${formatName} generation failed: ${msg}. Fix the code and retry.`,
|
||||
}
|
||||
}
|
||||
contentType = PPTX_SOURCE_MIME
|
||||
contentType = sourceMime
|
||||
} else {
|
||||
contentType = inferContentType(fileName, explicitType)
|
||||
}
|
||||
@@ -153,19 +176,37 @@ export const workspaceFileServerTool: BaseServerTool<WorkspaceFileArgs, Workspac
|
||||
return { success: false, message: `File with ID "${fileId}" not found` }
|
||||
}
|
||||
|
||||
const isPptxUpdate = fileRecord.name?.toLowerCase().endsWith('.pptx')
|
||||
if (isPptxUpdate) {
|
||||
const updateLowerName = fileRecord.name?.toLowerCase() ?? ''
|
||||
const isPptxUpdate = updateLowerName.endsWith('.pptx')
|
||||
const isDocxUpdate = updateLowerName.endsWith('.docx')
|
||||
const isPdfUpdate = updateLowerName.endsWith('.pdf')
|
||||
const isDocUpdate = isPptxUpdate || isDocxUpdate || isPdfUpdate
|
||||
|
||||
if (isDocUpdate) {
|
||||
const formatName = isPptxUpdate ? 'PPTX' : isDocxUpdate ? 'DOCX' : 'PDF'
|
||||
const generator = isPptxUpdate
|
||||
? generatePptxFromCode
|
||||
: isDocxUpdate
|
||||
? generateDocxFromCode
|
||||
: generatePdfFromCode
|
||||
try {
|
||||
await generatePptxFromCode(content, workspaceId)
|
||||
await generator(content, workspaceId)
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err)
|
||||
return {
|
||||
success: false,
|
||||
message: `PPTX generation failed: ${msg}. Fix the pptxgenjs code and retry.`,
|
||||
message: `${formatName} generation failed: ${msg}. Fix the code and retry.`,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const updateSourceMime = isPptxUpdate
|
||||
? PPTX_SOURCE_MIME
|
||||
: isDocxUpdate
|
||||
? DOCX_SOURCE_MIME
|
||||
: isPdfUpdate
|
||||
? PDF_SOURCE_MIME
|
||||
: undefined
|
||||
const fileBuffer = Buffer.from(content, 'utf-8')
|
||||
|
||||
assertServerToolNotAborted(context)
|
||||
@@ -174,7 +215,7 @@ export const workspaceFileServerTool: BaseServerTool<WorkspaceFileArgs, Workspac
|
||||
fileId,
|
||||
context.userId,
|
||||
fileBuffer,
|
||||
isPptxUpdate ? PPTX_SOURCE_MIME : undefined
|
||||
updateSourceMime
|
||||
)
|
||||
|
||||
logger.info('Workspace file updated via copilot', {
|
||||
@@ -301,19 +342,37 @@ export const workspaceFileServerTool: BaseServerTool<WorkspaceFileArgs, Workspac
|
||||
content.slice(firstIdx + edit.search.length)
|
||||
}
|
||||
|
||||
const isPptxPatch = fileRecord.name?.toLowerCase().endsWith('.pptx')
|
||||
if (isPptxPatch) {
|
||||
const patchLowerName = fileRecord.name?.toLowerCase() ?? ''
|
||||
const isPptxPatch = patchLowerName.endsWith('.pptx')
|
||||
const isDocxPatch = patchLowerName.endsWith('.docx')
|
||||
const isPdfPatch = patchLowerName.endsWith('.pdf')
|
||||
const isDocPatch = isPptxPatch || isDocxPatch || isPdfPatch
|
||||
|
||||
if (isDocPatch) {
|
||||
const formatName = isPptxPatch ? 'PPTX' : isDocxPatch ? 'DOCX' : 'PDF'
|
||||
const generator = isPptxPatch
|
||||
? generatePptxFromCode
|
||||
: isDocxPatch
|
||||
? generateDocxFromCode
|
||||
: generatePdfFromCode
|
||||
try {
|
||||
await generatePptxFromCode(content, workspaceId)
|
||||
await generator(content, workspaceId)
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err)
|
||||
return {
|
||||
success: false,
|
||||
message: `Patched PPTX code failed to compile: ${msg}. Fix the edits and retry.`,
|
||||
message: `Patched ${formatName} code failed to compile: ${msg}. Fix the edits and retry.`,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const patchSourceMime = isPptxPatch
|
||||
? PPTX_SOURCE_MIME
|
||||
: isDocxPatch
|
||||
? DOCX_SOURCE_MIME
|
||||
: isPdfPatch
|
||||
? PDF_SOURCE_MIME
|
||||
: undefined
|
||||
const patchedBuffer = Buffer.from(content, 'utf-8')
|
||||
assertServerToolNotAborted(context)
|
||||
await updateWorkspaceFileContent(
|
||||
@@ -321,7 +380,7 @@ export const workspaceFileServerTool: BaseServerTool<WorkspaceFileArgs, Workspac
|
||||
fileId,
|
||||
context.userId,
|
||||
patchedBuffer,
|
||||
isPptxPatch ? PPTX_SOURCE_MIME : undefined
|
||||
patchSourceMime
|
||||
)
|
||||
|
||||
logger.info('Workspace file patched via copilot', {
|
||||
|
||||
211
apps/sim/lib/execution/doc-vm.ts
Normal file
211
apps/sim/lib/execution/doc-vm.ts
Normal file
@@ -0,0 +1,211 @@
|
||||
/**
|
||||
* Sandboxed document generation via subprocess.
|
||||
*
|
||||
* Supports pptx (pptxgenjs), docx (docx), and pdf (pdf-lib).
|
||||
* User code runs in a separate Node.js child process. File access is brokered
|
||||
* via IPC -- the subprocess never touches the database directly.
|
||||
*/
|
||||
|
||||
import { type ChildProcess, spawn } from 'node:child_process'
|
||||
import fs from 'node:fs'
|
||||
import path from 'node:path'
|
||||
import { fileURLToPath } from 'node:url'
|
||||
import { createLogger } from '@sim/logger'
|
||||
import {
|
||||
downloadWorkspaceFile,
|
||||
getWorkspaceFile,
|
||||
} from '@/lib/uploads/contexts/workspace/workspace-file-manager'
|
||||
|
||||
const logger = createLogger('DocVMExecution')
|
||||
|
||||
export type DocumentFormat = 'pptx' | 'docx' | 'pdf'
|
||||
|
||||
const WORKER_STARTUP_TIMEOUT_MS = 10_000
|
||||
const GENERATION_TIMEOUT_MS = 60_000
|
||||
const MAX_STDERR = 4096
|
||||
|
||||
type WorkerMessage =
|
||||
| { type: 'ready' }
|
||||
| { type: 'result'; data: string }
|
||||
| { type: 'error'; message: string }
|
||||
| { type: 'getFile'; fileReqId: number; fileId: string }
|
||||
|
||||
const currentDir = path.dirname(fileURLToPath(import.meta.url))
|
||||
let cachedWorkerPath: string | undefined
|
||||
|
||||
function getWorkerPath(): string {
|
||||
if (cachedWorkerPath) return cachedWorkerPath
|
||||
const candidates = [
|
||||
path.join(currentDir, '..', '..', 'dist', 'doc-worker.cjs'),
|
||||
path.join(currentDir, 'doc-worker.cjs'),
|
||||
path.join(process.cwd(), 'apps', 'sim', 'dist', 'doc-worker.cjs'),
|
||||
path.join(process.cwd(), 'apps', 'sim', 'lib', 'execution', 'doc-worker.cjs'),
|
||||
path.join(process.cwd(), 'dist', 'doc-worker.cjs'),
|
||||
path.join(process.cwd(), 'lib', 'execution', 'doc-worker.cjs'),
|
||||
]
|
||||
const found = candidates.find((p) => fs.existsSync(p))
|
||||
if (!found) throw new Error(`doc-worker.cjs not found at any of: ${candidates.join(', ')}`)
|
||||
cachedWorkerPath = found
|
||||
return found
|
||||
}
|
||||
|
||||
export async function generateDocumentFromCode(
|
||||
format: DocumentFormat,
|
||||
code: string,
|
||||
workspaceId: string,
|
||||
signal?: AbortSignal
|
||||
): Promise<Buffer> {
|
||||
return new Promise<Buffer>((resolve, reject) => {
|
||||
let proc: ChildProcess | null = null
|
||||
let settled = false
|
||||
let startupTimer: ReturnType<typeof setTimeout> | null = null
|
||||
let generationTimer: ReturnType<typeof setTimeout> | null = null
|
||||
|
||||
function done(err: Error): void
|
||||
function done(err: undefined, result: Buffer): void
|
||||
function done(err: Error | undefined, result?: Buffer): void {
|
||||
if (settled) return
|
||||
settled = true
|
||||
if (startupTimer) clearTimeout(startupTimer)
|
||||
if (generationTimer) clearTimeout(generationTimer)
|
||||
try {
|
||||
proc?.removeAllListeners()
|
||||
proc?.kill()
|
||||
} catch {
|
||||
// Ignore -- process may have already exited
|
||||
}
|
||||
if (err) reject(err)
|
||||
else resolve(result as Buffer)
|
||||
}
|
||||
|
||||
if (signal?.aborted) {
|
||||
reject(new Error(`${format.toUpperCase()} generation cancelled`))
|
||||
return
|
||||
}
|
||||
|
||||
signal?.addEventListener(
|
||||
'abort',
|
||||
() => done(new Error(`${format.toUpperCase()} generation cancelled`)),
|
||||
{ once: true }
|
||||
)
|
||||
|
||||
try {
|
||||
proc = spawn('node', [getWorkerPath()], {
|
||||
stdio: ['ignore', 'pipe', 'pipe', 'ipc'],
|
||||
serialization: 'json',
|
||||
env: { PATH: process.env.PATH ?? '' } as unknown as NodeJS.ProcessEnv,
|
||||
})
|
||||
} catch (err) {
|
||||
done(err instanceof Error ? err : new Error(String(err)))
|
||||
return
|
||||
}
|
||||
|
||||
let stderrData = ''
|
||||
proc.stderr?.on('data', (chunk: Buffer) => {
|
||||
if (stderrData.length < MAX_STDERR) {
|
||||
stderrData += chunk.toString()
|
||||
if (stderrData.length > MAX_STDERR) stderrData = stderrData.slice(0, MAX_STDERR)
|
||||
}
|
||||
})
|
||||
|
||||
startupTimer = setTimeout(() => {
|
||||
logger.error(`${format} worker failed to start within timeout`)
|
||||
done(new Error(`${format.toUpperCase()} worker failed to start`))
|
||||
}, WORKER_STARTUP_TIMEOUT_MS)
|
||||
|
||||
proc.on('exit', (code) => {
|
||||
if (!settled) {
|
||||
logger.error(`${format} worker exited unexpectedly`, {
|
||||
code,
|
||||
stderr: stderrData.slice(0, 500),
|
||||
})
|
||||
done(new Error(`${format.toUpperCase()} worker exited unexpectedly (code ${code})`))
|
||||
}
|
||||
})
|
||||
|
||||
proc.on('error', (err) => {
|
||||
logger.error(`${format} worker process error`, { error: err.message })
|
||||
done(err)
|
||||
})
|
||||
|
||||
proc.on('message', (rawMsg: unknown) => {
|
||||
const msg = rawMsg as WorkerMessage
|
||||
|
||||
if (msg.type === 'ready') {
|
||||
if (startupTimer) {
|
||||
clearTimeout(startupTimer)
|
||||
startupTimer = null
|
||||
}
|
||||
generationTimer = setTimeout(() => {
|
||||
logger.error(`${format} generation timed out`)
|
||||
done(new Error(`${format.toUpperCase()} generation timed out`))
|
||||
}, GENERATION_TIMEOUT_MS)
|
||||
proc!.send({ type: 'generate', format, code })
|
||||
return
|
||||
}
|
||||
|
||||
if (msg.type === 'result') {
|
||||
done(undefined, Buffer.from(msg.data, 'base64'))
|
||||
return
|
||||
}
|
||||
|
||||
if (msg.type === 'error') {
|
||||
done(new Error(msg.message))
|
||||
return
|
||||
}
|
||||
|
||||
if (msg.type === 'getFile') {
|
||||
handleFileRequest(proc!, workspaceId, msg).catch((err) => {
|
||||
logger.error(`Failed to handle file request from ${format} worker`, {
|
||||
fileId: msg.fileId,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
})
|
||||
if (proc && !settled) {
|
||||
try {
|
||||
proc.send({
|
||||
type: 'fileResult',
|
||||
fileReqId: msg.fileReqId,
|
||||
error: err instanceof Error ? err.message : 'File fetch failed',
|
||||
})
|
||||
} catch {
|
||||
// Ignore -- process may have died
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
async function handleFileRequest(
|
||||
proc: ChildProcess,
|
||||
workspaceId: string,
|
||||
msg: Extract<WorkerMessage, { type: 'getFile' }>
|
||||
): Promise<void> {
|
||||
const record = await getWorkspaceFile(workspaceId, msg.fileId)
|
||||
if (!record) {
|
||||
proc.send({
|
||||
type: 'fileResult',
|
||||
fileReqId: msg.fileReqId,
|
||||
error: `File not found: ${msg.fileId}`,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
const buffer = await downloadWorkspaceFile(record)
|
||||
const mime = record.type || 'image/png'
|
||||
proc.send({
|
||||
type: 'fileResult',
|
||||
fileReqId: msg.fileReqId,
|
||||
data: `data:${mime};base64,${buffer.toString('base64')}`,
|
||||
})
|
||||
}
|
||||
|
||||
export const generatePptxFromCode = (code: string, workspaceId: string, signal?: AbortSignal) =>
|
||||
generateDocumentFromCode('pptx', code, workspaceId, signal)
|
||||
|
||||
export const generateDocxFromCode = (code: string, workspaceId: string, signal?: AbortSignal) =>
|
||||
generateDocumentFromCode('docx', code, workspaceId, signal)
|
||||
|
||||
export const generatePdfFromCode = (code: string, workspaceId: string, signal?: AbortSignal) =>
|
||||
generateDocumentFromCode('pdf', code, workspaceId, signal)
|
||||
146
apps/sim/lib/execution/doc-worker.cjs
Normal file
146
apps/sim/lib/execution/doc-worker.cjs
Normal file
@@ -0,0 +1,146 @@
|
||||
/**
|
||||
* Generic document generation worker.
|
||||
* Runs in a separate Node.js process, communicates with parent via IPC.
|
||||
* Supports pptx (pptxgenjs), docx (docx), and pdf (pdf-lib).
|
||||
*/
|
||||
|
||||
'use strict'
|
||||
|
||||
const vm = require('node:vm')
|
||||
|
||||
const EXECUTION_TIMEOUT_MS = 30_000
|
||||
const FILE_REQUEST_TIMEOUT_MS = 30_000
|
||||
|
||||
const FORMATS = {
|
||||
pptx: {
|
||||
setup() {
|
||||
const PptxGenJS = require('pptxgenjs')
|
||||
const pptx = new PptxGenJS()
|
||||
return { globals: { pptx }, pptx }
|
||||
},
|
||||
async serialize(ctx) {
|
||||
const output = await ctx.pptx.write({ outputType: 'nodebuffer' })
|
||||
return Buffer.from(output)
|
||||
},
|
||||
},
|
||||
docx: {
|
||||
setup() {
|
||||
const docx = require('docx')
|
||||
return { globals: { docx }, docx }
|
||||
},
|
||||
async serialize(ctx) {
|
||||
const doc = ctx.globals.doc
|
||||
if (!doc)
|
||||
throw new Error('No document created. Set doc = new docx.Document({...}) in your code.')
|
||||
return ctx.docx.Packer.toBuffer(doc)
|
||||
},
|
||||
},
|
||||
pdf: {
|
||||
async setup() {
|
||||
const PDFLib = require('pdf-lib')
|
||||
const pdf = await PDFLib.PDFDocument.create()
|
||||
return { globals: { PDFLib, pdf }, pdf }
|
||||
},
|
||||
async serialize(ctx) {
|
||||
const pdf = ctx.globals.pdf
|
||||
if (!pdf)
|
||||
throw new Error(
|
||||
'No PDF document. Use the injected pdf object or load one with PDFLib.PDFDocument.load().'
|
||||
)
|
||||
const bytes = await pdf.save()
|
||||
return Buffer.from(bytes)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
const pendingFileRequests = new Map()
|
||||
let fileRequestCounter = 0
|
||||
|
||||
function sendToParent(msg) {
|
||||
if (process.send && process.connected) {
|
||||
process.send(msg)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
process.on('message', async (msg) => {
|
||||
if (msg.type === 'generate') {
|
||||
await handleGenerate(msg)
|
||||
} else if (msg.type === 'fileResult') {
|
||||
handleFileResult(msg)
|
||||
}
|
||||
})
|
||||
|
||||
async function handleGenerate(msg) {
|
||||
const { code, format } = msg
|
||||
|
||||
try {
|
||||
const formatConfig = FORMATS[format]
|
||||
if (!formatConfig) throw new Error(`Unknown document format: ${format}`)
|
||||
|
||||
const ctx = await formatConfig.setup()
|
||||
|
||||
const getFileBase64 = (fileId) =>
|
||||
new Promise((resolve, reject) => {
|
||||
if (typeof fileId !== 'string' || fileId.length === 0) {
|
||||
reject(new Error('fileId must be a non-empty string'))
|
||||
return
|
||||
}
|
||||
|
||||
const fileReqId = ++fileRequestCounter
|
||||
const timeout = setTimeout(() => {
|
||||
if (pendingFileRequests.has(fileReqId)) {
|
||||
pendingFileRequests.delete(fileReqId)
|
||||
reject(new Error(`File request timed out for fileId: ${fileId}`))
|
||||
}
|
||||
}, FILE_REQUEST_TIMEOUT_MS)
|
||||
|
||||
pendingFileRequests.set(fileReqId, { resolve, reject, timeout })
|
||||
|
||||
if (!sendToParent({ type: 'getFile', fileReqId, fileId })) {
|
||||
clearTimeout(timeout)
|
||||
pendingFileRequests.delete(fileReqId)
|
||||
reject(new Error('Parent process disconnected'))
|
||||
}
|
||||
})
|
||||
|
||||
const sandbox = Object.create(null)
|
||||
Object.assign(sandbox, ctx.globals)
|
||||
sandbox.getFileBase64 = getFileBase64
|
||||
|
||||
vm.createContext(sandbox)
|
||||
|
||||
const promise = vm.runInContext(`(async () => { ${code} })()`, sandbox, {
|
||||
timeout: EXECUTION_TIMEOUT_MS,
|
||||
filename: `${format}-code.js`,
|
||||
})
|
||||
await promise
|
||||
|
||||
ctx.globals = sandbox
|
||||
|
||||
const output = await formatConfig.serialize(ctx)
|
||||
const base64 = Buffer.from(output).toString('base64')
|
||||
sendToParent({ type: 'result', data: base64 })
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
sendToParent({ type: 'error', message })
|
||||
}
|
||||
}
|
||||
|
||||
function handleFileResult(msg) {
|
||||
const { fileReqId, data, error } = msg
|
||||
const pending = pendingFileRequests.get(fileReqId)
|
||||
if (!pending) return
|
||||
|
||||
clearTimeout(pending.timeout)
|
||||
pendingFileRequests.delete(fileReqId)
|
||||
|
||||
if (error) {
|
||||
pending.reject(new Error(error))
|
||||
} else {
|
||||
pending.resolve(data)
|
||||
}
|
||||
}
|
||||
|
||||
sendToParent({ type: 'ready' })
|
||||
@@ -146,6 +146,8 @@
|
||||
"onedollarstats": "0.0.10",
|
||||
"openai": "^4.91.1",
|
||||
"papaparse": "5.5.3",
|
||||
"docx": "^9.6.1",
|
||||
"docx-preview": "^0.3.7",
|
||||
"pdf-lib": "1.17.1",
|
||||
"postgres": "^3.4.5",
|
||||
"posthog-js": "1.334.1",
|
||||
|
||||
@@ -15,13 +15,17 @@ export default defineConfig({
|
||||
},
|
||||
dirs: ['./background'],
|
||||
build: {
|
||||
external: ['isolated-vm', 'pptxgenjs'],
|
||||
external: ['isolated-vm', 'pptxgenjs', 'docx', 'pdf-lib'],
|
||||
extensions: [
|
||||
additionalFiles({
|
||||
files: ['./lib/execution/isolated-vm-worker.cjs', './lib/execution/pptx-worker.cjs'],
|
||||
files: [
|
||||
'./lib/execution/isolated-vm-worker.cjs',
|
||||
'./lib/execution/pptx-worker.cjs',
|
||||
'./lib/execution/doc-worker.cjs',
|
||||
],
|
||||
}),
|
||||
additionalPackages({
|
||||
packages: ['unpdf', 'pdf-lib', 'isolated-vm', 'pptxgenjs'],
|
||||
packages: ['unpdf', 'pdf-lib', 'isolated-vm', 'pptxgenjs', 'docx'],
|
||||
}),
|
||||
],
|
||||
},
|
||||
|
||||
12
bun.lock
12
bun.lock
@@ -128,6 +128,8 @@
|
||||
"csv-parse": "6.1.0",
|
||||
"date-fns": "4.1.0",
|
||||
"decimal.js": "10.6.0",
|
||||
"docx": "^9.6.1",
|
||||
"docx-preview": "^0.3.7",
|
||||
"drizzle-orm": "^0.44.5",
|
||||
"encoding": "0.1.13",
|
||||
"entities": "6.0.1",
|
||||
@@ -2094,6 +2096,10 @@
|
||||
|
||||
"docs": ["docs@workspace:apps/docs"],
|
||||
|
||||
"docx": ["docx@9.6.1", "", { "dependencies": { "@types/node": "^25.2.3", "hash.js": "^1.1.7", "jszip": "^3.10.1", "nanoid": "^5.1.3", "xml": "^1.0.1", "xml-js": "^1.6.8" } }, "sha512-ZJja9/KBUuFC109sCMzovoq2GR2wCG/AuxivjA+OHj/q0TEgJIm3S7yrlUxIy3B+bV8YDj/BiHfWyrRFmyWpDQ=="],
|
||||
|
||||
"docx-preview": ["docx-preview@0.3.7", "", { "dependencies": { "jszip": ">=3.0.0" } }, "sha512-Lav69CTA/IYZPJTsKH7oYeoZjyg96N0wEJMNslGJnZJ+dMUZK85Lt5ASC79yUlD48ecWjuv+rkcmFt6EVPV0Xg=="],
|
||||
|
||||
"dom-accessibility-api": ["dom-accessibility-api@0.6.3", "", {}, "sha512-7ZgogeTnjuHbo+ct10G9Ffp0mif17idi0IyWNVA/wcwcm7NPOD/WEHVP3n7n3MhXqxoIYm8d6MuZohYWIZ4T3w=="],
|
||||
|
||||
"dom-serializer": ["dom-serializer@2.0.0", "", { "dependencies": { "domelementtype": "^2.3.0", "domhandler": "^5.0.2", "entities": "^4.2.0" } }, "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg=="],
|
||||
@@ -2392,6 +2398,8 @@
|
||||
|
||||
"has-tostringtag": ["has-tostringtag@1.0.2", "", { "dependencies": { "has-symbols": "^1.0.3" } }, "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw=="],
|
||||
|
||||
"hash.js": ["hash.js@1.1.7", "", { "dependencies": { "inherits": "^2.0.3", "minimalistic-assert": "^1.0.1" } }, "sha512-taOaskGt4z4SOANNseOviYDvjEJinIkRgmp7LbKP2YTTmVxWBl87s/uzK9r+44BclBSp2X7K1hqeNfz9JbBeXA=="],
|
||||
|
||||
"hasown": ["hasown@2.0.2", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ=="],
|
||||
|
||||
"hast-util-from-parse5": ["hast-util-from-parse5@8.0.3", "", { "dependencies": { "@types/hast": "^3.0.0", "@types/unist": "^3.0.0", "devlop": "^1.0.0", "hastscript": "^9.0.0", "property-information": "^7.0.0", "vfile": "^6.0.0", "vfile-location": "^5.0.0", "web-namespaces": "^2.0.0" } }, "sha512-3kxEVkEKt0zvcZ3hCRYI8rqrgwtlIOFMWkbclACvjlDw8Li9S2hk/d51OI0nr/gIpdMHNepwgOKqZ/sy0Clpyg=="],
|
||||
@@ -4216,6 +4224,8 @@
|
||||
|
||||
"cross-spawn/which": ["which@2.0.2", "", { "dependencies": { "isexe": "^2.0.0" }, "bin": { "node-which": "./bin/node-which" } }, "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA=="],
|
||||
|
||||
"docx/@types/node": ["@types/node@25.5.2", "", { "dependencies": { "undici-types": "~7.18.0" } }, "sha512-tO4ZIRKNC+MDWV4qKVZe3Ql/woTnmHDr5JD8UI5hn2pwBrHEwOEMZK7WlNb5RKB6EoJ02gwmQS9OrjuFnZYdpg=="],
|
||||
|
||||
"dom-serializer/entities": ["entities@4.5.0", "", {}, "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw=="],
|
||||
|
||||
"e2b/glob": ["glob@11.1.0", "", { "dependencies": { "foreground-child": "^3.3.1", "jackspeak": "^4.1.1", "minimatch": "^10.1.1", "minipass": "^7.1.2", "package-json-from-dist": "^1.0.0", "path-scurry": "^2.0.0" }, "bin": { "glob": "dist/esm/bin.mjs" } }, "sha512-vuNwKSaKiqm7g0THUBu2x7ckSs3XJLXE+2ssL7/MfTGPLLcrJQ/4Uq1CjPTtO5cCIiRxqvN6Twy1qOwhL0Xjcw=="],
|
||||
@@ -4702,6 +4712,8 @@
|
||||
|
||||
"critters/chalk/supports-color": ["supports-color@7.2.0", "", { "dependencies": { "has-flag": "^4.0.0" } }, "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw=="],
|
||||
|
||||
"docx/@types/node/undici-types": ["undici-types@7.18.2", "", {}, "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w=="],
|
||||
|
||||
"engine.io/@types/node/undici-types": ["undici-types@7.10.0", "", {}, "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag=="],
|
||||
|
||||
"fetch-cookie/tough-cookie/tldts": ["tldts@7.0.27", "", { "dependencies": { "tldts-core": "^7.0.27" }, "bin": { "tldts": "bin/cli.js" } }, "sha512-I4FZcVFcqCRuT0ph6dCDpPuO4Xgzvh+spkcTr1gK7peIvxWauoloVO0vuy1FQnijT63ss6AsHB6+OIM4aXHbPg=="],
|
||||
|
||||
Reference in New Issue
Block a user