diff --git a/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/helpers.ts b/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/helpers.ts index d6667c96de..e886e1a28c 100644 --- a/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/helpers.ts +++ b/autogpt_platform/frontend/src/components/contextual/Chat/components/ToolResponseMessage/helpers.ts @@ -48,10 +48,23 @@ function isWorkspaceRef(value: unknown): value is string { * Check if a workspace reference appears to be an image based on common patterns. * Since workspace refs don't have extensions, we check the context or assume image * for certain block types. + * + * TODO: Replace keyword matching with MIME type encoded in workspace ref. + * e.g., workspace://abc123#image/png or workspace://abc123#video/mp4 + * This would let frontend render correctly without fragile keyword matching. */ function isLikelyImageRef(value: string, outputKey?: string): boolean { if (!isWorkspaceRef(value)) return false; + // Check output key name for video-related hints (these are NOT images) + const videoKeywords = ["video", "mp4", "mov", "avi", "webm", "movie", "clip"]; + if (outputKey) { + const lowerKey = outputKey.toLowerCase(); + if (videoKeywords.some((kw) => lowerKey.includes(kw))) { + return false; + } + } + // Check output key name for image-related hints const imageKeywords = [ "image", @@ -62,9 +75,6 @@ function isLikelyImageRef(value: string, outputKey?: string): boolean { "avatar", "icon", "screenshot", - "output", - "result", - "generated", ]; if (outputKey) { const lowerKey = outputKey.toLowerCase();