From b6e1df4ffdb20d1c73d7390edb41945b5bd99a92 Mon Sep 17 00:00:00 2001 From: Siddharth Ganesan Date: Sat, 4 Apr 2026 11:41:27 -0700 Subject: [PATCH] feat(motheship): add docx support --- .../app/api/files/serve/[...path]/route.ts | 78 +++++-- .../api/workspaces/[id]/pptx/preview/route.ts | 2 +- .../components/file-viewer/file-viewer.tsx | 148 ++++++------ .../resource-content/resource-content.tsx | 7 +- .../tools/server/files/workspace-file.ts | 95 ++++++-- apps/sim/lib/execution/doc-vm.ts | 211 ++++++++++++++++++ apps/sim/lib/execution/doc-worker.cjs | 146 ++++++++++++ apps/sim/package.json | 2 + apps/sim/trigger.config.ts | 9 +- bun.lock | 12 + 10 files changed, 585 insertions(+), 125 deletions(-) create mode 100644 apps/sim/lib/execution/doc-vm.ts create mode 100644 apps/sim/lib/execution/doc-worker.cjs diff --git a/apps/sim/app/api/files/serve/[...path]/route.ts b/apps/sim/app/api/files/serve/[...path]/route.ts index bc14086395..0f15198c32 100644 --- a/apps/sim/app/api/files/serve/[...path]/route.ts +++ b/apps/sim/app/api/files/serve/[...path]/route.ts @@ -4,7 +4,11 @@ import { createLogger } from '@sim/logger' import type { NextRequest } from 'next/server' import { NextResponse } from 'next/server' import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid' -import { generatePptxFromCode } from '@/lib/execution/pptx-vm' +import { + generateDocxFromCode, + generatePdfFromCode, + generatePptxFromCode, +} from '@/lib/execution/doc-vm' import { CopilotFiles, isUsingCloudStorage } from '@/lib/uploads' import type { StorageContext } from '@/lib/uploads/config' import { parseWorkspaceFileKey } from '@/lib/uploads/contexts/workspace/workspace-file-manager' @@ -22,47 +26,73 @@ import { const logger = createLogger('FilesServeAPI') const ZIP_MAGIC = Buffer.from([0x50, 0x4b, 0x03, 0x04]) +const PDF_MAGIC = Buffer.from([0x25, 0x50, 0x44, 0x46, 0x2d]) // %PDF- -const MAX_COMPILED_PPTX_CACHE = 10 -const compiledPptxCache = new Map() - -function compiledCacheSet(key: string, buffer: Buffer): void { - if (compiledPptxCache.size >= MAX_COMPILED_PPTX_CACHE) { - compiledPptxCache.delete(compiledPptxCache.keys().next().value as string) - } - compiledPptxCache.set(key, buffer) +interface CompilableFormat { + magic: Buffer + compile: (code: string, workspaceId: string) => Promise + contentType: string } -async function compilePptxIfNeeded( +const COMPILABLE_FORMATS: Record = { + '.pptx': { + magic: ZIP_MAGIC, + compile: generatePptxFromCode, + contentType: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + }, + '.docx': { + magic: ZIP_MAGIC, + compile: generateDocxFromCode, + contentType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + }, + '.pdf': { + magic: PDF_MAGIC, + compile: generatePdfFromCode, + contentType: 'application/pdf', + }, +} + +const MAX_COMPILED_DOC_CACHE = 10 +const compiledDocCache = new Map() + +function compiledCacheSet(key: string, buffer: Buffer): void { + if (compiledDocCache.size >= MAX_COMPILED_DOC_CACHE) { + compiledDocCache.delete(compiledDocCache.keys().next().value as string) + } + compiledDocCache.set(key, buffer) +} + +async function compileDocumentIfNeeded( buffer: Buffer, filename: string, workspaceId?: string, raw?: boolean ): Promise<{ buffer: Buffer; contentType: string }> { - const isPptx = filename.toLowerCase().endsWith('.pptx') - if (raw || !isPptx || buffer.subarray(0, 4).equals(ZIP_MAGIC)) { + if (raw) return { buffer, contentType: getContentType(filename) } + + const ext = filename.slice(filename.lastIndexOf('.')).toLowerCase() + const format = COMPILABLE_FORMATS[ext] + if (!format) return { buffer, contentType: getContentType(filename) } + + const magicLen = format.magic.length + if (buffer.length >= magicLen && buffer.subarray(0, magicLen).equals(format.magic)) { return { buffer, contentType: getContentType(filename) } } const code = buffer.toString('utf-8') const cacheKey = createHash('sha256') + .update(ext) .update(code) .update(workspaceId ?? '') .digest('hex') - const cached = compiledPptxCache.get(cacheKey) + const cached = compiledDocCache.get(cacheKey) if (cached) { - return { - buffer: cached, - contentType: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', - } + return { buffer: cached, contentType: format.contentType } } - const compiled = await generatePptxFromCode(code, workspaceId || '') + const compiled = await format.compile(code, workspaceId || '') compiledCacheSet(cacheKey, compiled) - return { - buffer: compiled, - contentType: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', - } + return { buffer: compiled, contentType: format.contentType } } const STORAGE_KEY_PREFIX_RE = /^\d{13}-[a-z0-9]{7}-/ @@ -169,7 +199,7 @@ async function handleLocalFile( const segment = filename.split('/').pop() || filename const displayName = stripStorageKeyPrefix(segment) const workspaceId = getWorkspaceIdForCompile(filename) - const { buffer: fileBuffer, contentType } = await compilePptxIfNeeded( + const { buffer: fileBuffer, contentType } = await compileDocumentIfNeeded( rawBuffer, displayName, workspaceId, @@ -226,7 +256,7 @@ async function handleCloudProxy( const segment = cloudKey.split('/').pop() || 'download' const displayName = stripStorageKeyPrefix(segment) const workspaceId = getWorkspaceIdForCompile(cloudKey) - const { buffer: fileBuffer, contentType } = await compilePptxIfNeeded( + const { buffer: fileBuffer, contentType } = await compileDocumentIfNeeded( rawBuffer, displayName, workspaceId, diff --git a/apps/sim/app/api/workspaces/[id]/pptx/preview/route.ts b/apps/sim/app/api/workspaces/[id]/pptx/preview/route.ts index 6554304941..6de747246d 100644 --- a/apps/sim/app/api/workspaces/[id]/pptx/preview/route.ts +++ b/apps/sim/app/api/workspaces/[id]/pptx/preview/route.ts @@ -1,7 +1,7 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' import { getSession } from '@/lib/auth' -import { generatePptxFromCode } from '@/lib/execution/pptx-vm' +import { generatePptxFromCode } from '@/lib/execution/doc-vm' import { verifyWorkspaceMembership } from '@/app/api/workflows/utils' export const dynamic = 'force-dynamic' diff --git a/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/file-viewer.tsx b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/file-viewer.tsx index f62caa1f51..fcca3eab13 100644 --- a/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/file-viewer.tsx +++ b/apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/file-viewer.tsx @@ -57,7 +57,7 @@ const TEXT_EDITABLE_EXTENSIONS = new Set([ ...SUPPORTED_CODE_EXTENSIONS, ]) -const IFRAME_PREVIEWABLE_MIME_TYPES = new Set(['application/pdf']) +const IFRAME_PREVIEWABLE_MIME_TYPES = new Set(['application/pdf', 'text/x-pdflibjs']) const IFRAME_PREVIEWABLE_EXTENSIONS = new Set(['pdf']) const IMAGE_PREVIEWABLE_MIME_TYPES = new Set(['image/png', 'image/jpeg', 'image/gif', 'image/webp']) @@ -65,11 +65,13 @@ const IMAGE_PREVIEWABLE_EXTENSIONS = new Set(['png', 'jpg', 'jpeg', 'gif', 'webp const PPTX_PREVIEWABLE_MIME_TYPES = new Set([ 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + 'text/x-pptxgenjs', ]) const PPTX_PREVIEWABLE_EXTENSIONS = new Set(['pptx']) const DOCX_PREVIEWABLE_MIME_TYPES = new Set([ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'text/x-docxjs', ]) const DOCX_PREVIEWABLE_EXTENSIONS = new Set(['docx']) @@ -91,8 +93,8 @@ function resolveFileCategory(mimeType: string | null, filename: string): FileCat if (mimeType && TEXT_EDITABLE_MIME_TYPES.has(mimeType)) return 'text-editable' if (mimeType && IFRAME_PREVIEWABLE_MIME_TYPES.has(mimeType)) return 'iframe-previewable' if (mimeType && IMAGE_PREVIEWABLE_MIME_TYPES.has(mimeType)) return 'image-previewable' - if (mimeType && PPTX_PREVIEWABLE_MIME_TYPES.has(mimeType)) return 'pptx-previewable' if (mimeType && DOCX_PREVIEWABLE_MIME_TYPES.has(mimeType)) return 'docx-previewable' + if (mimeType && PPTX_PREVIEWABLE_MIME_TYPES.has(mimeType)) return 'pptx-previewable' if (mimeType && XLSX_PREVIEWABLE_MIME_TYPES.has(mimeType)) return 'xlsx-previewable' const ext = getFileExtension(filename) @@ -100,8 +102,8 @@ function resolveFileCategory(mimeType: string | null, filename: string): FileCat if (TEXT_EDITABLE_EXTENSIONS.has(nameKey)) return 'text-editable' if (IFRAME_PREVIEWABLE_EXTENSIONS.has(ext)) return 'iframe-previewable' if (IMAGE_PREVIEWABLE_EXTENSIONS.has(ext)) return 'image-previewable' - if (PPTX_PREVIEWABLE_EXTENSIONS.has(ext)) return 'pptx-previewable' if (DOCX_PREVIEWABLE_EXTENSIONS.has(ext)) return 'docx-previewable' + if (PPTX_PREVIEWABLE_EXTENSIONS.has(ext)) return 'pptx-previewable' if (XLSX_PREVIEWABLE_EXTENSIONS.has(ext)) return 'xlsx-previewable' return 'unsupported' @@ -168,14 +170,14 @@ export function FileViewer({ return } - if (category === 'pptx-previewable') { - return - } - if (category === 'docx-previewable') { return } + if (category === 'pptx-previewable') { + return + } + if (category === 'xlsx-previewable') { return } @@ -219,7 +221,14 @@ function TextEditor({ isLoading, error, dataUpdatedAt, - } = useWorkspaceFileContent(workspaceId, file.id, file.key, file.type === 'text/x-pptxgenjs') + } = useWorkspaceFileContent( + workspaceId, + file.id, + file.key, + file.type === 'text/x-pptxgenjs' || + file.type === 'text/x-docxjs' || + file.type === 'text/x-pdflibjs' + ) const updateContent = useUpdateWorkspaceFileContent() const updateContentRef = useRef(updateContent) @@ -603,6 +612,58 @@ const DOCUMENT_SKELETON = ( ) +const DocxPreview = memo(function DocxPreview({ + file, + workspaceId, +}: { + file: WorkspaceFileRecord + workspaceId: string +}) { + const containerRef = useRef(null) + const { + data: fileData, + isLoading, + error: fetchError, + } = useWorkspaceFileBinary(workspaceId, file.id, file.key) + const [renderError, setRenderError] = useState(null) + + useEffect(() => { + if (!containerRef.current || !fileData) return + + let cancelled = false + + async function render() { + try { + const { renderAsync } = await import('docx-preview') + if (cancelled || !containerRef.current) return + containerRef.current.innerHTML = '' + await renderAsync(fileData, containerRef.current, undefined, { + inWrapper: true, + ignoreWidth: false, + ignoreHeight: false, + }) + } catch (err) { + if (!cancelled) { + const msg = err instanceof Error ? err.message : 'Failed to render document' + logger.error('DOCX render failed', { error: msg }) + setRenderError(msg) + } + } + } + + render() + return () => { + cancelled = true + } + }, [fileData]) + + const error = resolvePreviewError(fetchError, renderError) + if (error) return + if (isLoading) return DOCUMENT_SKELETON + + return
+}) + const pptxSlideCache = new Map() function pptxCacheKey(fileId: string, dataUpdatedAt: number, byteLength: number): string { @@ -865,77 +926,6 @@ function toggleMarkdownCheckbox(markdown: string, targetIndex: number, checked: }) } -const DocxPreview = memo(function DocxPreview({ - file, - workspaceId, -}: { - file: WorkspaceFileRecord - workspaceId: string -}) { - const { - data: fileData, - isLoading, - error: fetchError, - } = useWorkspaceFileBinary(workspaceId, file.id, file.key) - - const [html, setHtml] = useState(null) - const [renderError, setRenderError] = useState(null) - - useEffect(() => { - if (!fileData) return - const data = fileData - - let cancelled = false - - async function convert() { - try { - setRenderError(null) - const mammoth = await import('mammoth') - const result = await mammoth.convertToHtml({ arrayBuffer: data }) - if (!cancelled) setHtml(result.value) - } catch (err) { - if (!cancelled) { - const msg = err instanceof Error ? err.message : 'Failed to render document' - logger.error('DOCX render failed', { error: msg }) - setRenderError(msg) - } - } - } - - convert() - return () => { - cancelled = true - } - }, [fileData]) - - const error = resolvePreviewError(fetchError, renderError) - if (error) return - if (isLoading || html === null) return DOCUMENT_SKELETON - - return ( -
-