This commit is contained in:
Siddharth Ganesan
2026-01-29 18:10:47 -08:00
parent 4ab3e23cf7
commit 5add92a613
8 changed files with 597 additions and 524 deletions

View File

@@ -32,6 +32,7 @@ import type { WandControlHandlers } from '@/app/workspace/[workspaceId]/w/[workf
import { useAccessibleReferencePrefixes } from '@/app/workspace/[workspaceId]/w/[workflowId]/hooks/use-accessible-reference-prefixes'
import { useWand } from '@/app/workspace/[workspaceId]/w/[workflowId]/hooks/use-wand'
import type { SubBlockConfig } from '@/blocks/types'
import { supportsVision } from '@/providers/utils'
import { useWorkflowRegistry } from '@/stores/workflows/registry/store'
import { useSubBlockStore } from '@/stores/workflows/subblock/store'
@@ -50,13 +51,13 @@ const MAX_TEXTAREA_HEIGHT_PX = 320
/** Pattern to match complete message objects in JSON */
const COMPLETE_MESSAGE_PATTERN =
/"role"\s*:\s*"(system|user|assistant|media)"[^}]*"content"\s*:\s*"((?:[^"\\]|\\.)*)"/g
/"role"\s*:\s*"(system|user|assistant|attachment)"[^}]*"content"\s*:\s*"((?:[^"\\]|\\.)*)"/g
/** Pattern to match incomplete content at end of buffer */
const INCOMPLETE_CONTENT_PATTERN = /"content"\s*:\s*"((?:[^"\\]|\\.)*)$/
/** Pattern to match role before content */
const ROLE_BEFORE_CONTENT_PATTERN = /"role"\s*:\s*"(system|user|assistant|media)"[^{]*$/
const ROLE_BEFORE_CONTENT_PATTERN = /"role"\s*:\s*"(system|user|assistant|attachment)"[^{]*$/
/**
* Unescapes JSON string content
@@ -65,9 +66,9 @@ const unescapeContent = (str: string): string =>
str.replace(/\\n/g, '\n').replace(/\\"/g, '"').replace(/\\\\/g, '\\')
/**
* Media content for multimodal messages
* Attachment content (files, images, documents)
*/
interface MediaContent {
interface AttachmentContent {
/** Source type: how the data was provided */
sourceType: 'url' | 'base64' | 'file'
/** The URL or base64 data */
@@ -84,9 +85,9 @@ interface MediaContent {
* Interface for individual message in the messages array
*/
interface Message {
role: 'system' | 'user' | 'assistant' | 'media'
role: 'system' | 'user' | 'assistant' | 'attachment'
content: string
media?: MediaContent
attachment?: AttachmentContent
}
/**
@@ -122,8 +123,8 @@ export function MessagesInput({
const [openPopoverIndex, setOpenPopoverIndex] = useState<number | null>(null)
const { activeWorkflowId } = useWorkflowRegistry()
// Local media mode state - basic = FileUpload, advanced = URL/base64 textarea
const [mediaMode, setMediaMode] = useState<'basic' | 'advanced'>('basic')
// Local attachment mode state - basic = FileUpload, advanced = URL/base64 textarea
const [attachmentMode, setAttachmentMode] = useState<'basic' | 'advanced'>('basic')
// Workspace files for wand context
const [workspaceFiles, setWorkspaceFiles] = useState<WorkspaceFile[]>([])
@@ -166,22 +167,49 @@ export function MessagesInput({
.join('\n')
if (!filesList) {
return 'No media files in workspace. The user can upload files manually after generation.'
return 'No files in workspace. The user can upload files manually after generation.'
}
return `AVAILABLE WORKSPACE FILES (optional - you don't have to select one):\n${filesList}\n\nTo use a file, include "fileId": "<id>" in the media object. If not selecting a file, omit the fileId field.`
return `AVAILABLE WORKSPACE FILES (optional - you don't have to select one):\n${filesList}\n\nTo use a file, include "fileId": "<id>" in the attachment object. If not selecting a file, omit the fileId field.`
}, [workspaceFiles])
// Get indices of media messages for subscription
const mediaIndices = useMemo(
// Get indices of attachment messages for subscription
const attachmentIndices = useMemo(
() =>
localMessages
.map((msg, index) => (msg.role === 'media' ? index : -1))
.map((msg, index) => (msg.role === 'attachment' ? index : -1))
.filter((i) => i !== -1),
[localMessages]
)
// Subscribe to file upload values for all media messages
// Subscribe to model value to check vision capability
const modelSupportsVision = useSubBlockStore(
useCallback(
(state) => {
if (!activeWorkflowId) return true // Default to allowing attachments
const blockValues = state.workflowValues[activeWorkflowId]?.[blockId] ?? {}
const modelValue = blockValues.model as string | undefined
if (!modelValue) return true // No model selected, allow attachments
return supportsVision(modelValue)
},
[activeWorkflowId, blockId]
)
)
// Determine available roles based on model capabilities
const availableRoles = useMemo(() => {
const baseRoles: Array<'system' | 'user' | 'assistant' | 'attachment'> = [
'system',
'user',
'assistant',
]
if (modelSupportsVision) {
baseRoles.push('attachment')
}
return baseRoles
}, [modelSupportsVision])
// Subscribe to file upload values for all attachment messages
const fileUploadValues = useSubBlockStore(
useCallback(
(state) => {
@@ -189,8 +217,8 @@ export function MessagesInput({
const blockValues = state.workflowValues[activeWorkflowId]?.[blockId] ?? {}
const result: Record<number, { name: string; path: string; type: string; size: number }> =
{}
for (const index of mediaIndices) {
const fileUploadKey = `${subBlockId}-media-${index}`
for (const index of attachmentIndices) {
const fileUploadKey = `${subBlockId}-attachment-${index}`
const fileValue = blockValues[fileUploadKey]
if (fileValue && typeof fileValue === 'object' && 'path' in fileValue) {
result[index] = fileValue as { name: string; path: string; type: string; size: number }
@@ -198,21 +226,21 @@ export function MessagesInput({
}
return result
},
[activeWorkflowId, blockId, subBlockId, mediaIndices]
[activeWorkflowId, blockId, subBlockId, attachmentIndices]
)
)
// Effect to sync FileUpload values to message media objects
// Effect to sync FileUpload values to message attachment objects
useEffect(() => {
if (!activeWorkflowId || isPreview) return
let hasChanges = false
const updatedMessages = localMessages.map((msg, index) => {
if (msg.role !== 'media') return msg
if (msg.role !== 'attachment') return msg
const uploadedFile = fileUploadValues[index]
if (uploadedFile) {
const newMedia: MediaContent = {
const newAttachment: AttachmentContent = {
sourceType: 'file',
data: uploadedFile.path,
mimeType: uploadedFile.type,
@@ -221,16 +249,16 @@ export function MessagesInput({
// Only update if different
if (
msg.media?.data !== newMedia.data ||
msg.media?.sourceType !== newMedia.sourceType ||
msg.media?.mimeType !== newMedia.mimeType ||
msg.media?.fileName !== newMedia.fileName
msg.attachment?.data !== newAttachment.data ||
msg.attachment?.sourceType !== newAttachment.sourceType ||
msg.attachment?.mimeType !== newAttachment.mimeType ||
msg.attachment?.fileName !== newAttachment.fileName
) {
hasChanges = true
return {
...msg,
content: uploadedFile.name || msg.content,
media: newMedia,
attachment: newAttachment,
}
}
}
@@ -267,20 +295,22 @@ export function MessagesInput({
if (Array.isArray(parsed)) {
const validMessages: Message[] = parsed
.filter(
(m): m is { role: string; content: string; media?: MediaContent } =>
(m): m is { role: string; content: string; attachment?: AttachmentContent } =>
typeof m === 'object' &&
m !== null &&
typeof m.role === 'string' &&
typeof m.content === 'string'
)
.map((m) => {
const role = ['system', 'user', 'assistant', 'media'].includes(m.role) ? m.role : 'user'
const role = ['system', 'user', 'assistant', 'attachment'].includes(m.role)
? m.role
: 'user'
const message: Message = {
role: role as Message['role'],
content: m.content,
}
if (m.media) {
message.media = m.media
if (m.attachment) {
message.attachment = m.attachment
}
return message
})
@@ -344,14 +374,14 @@ export function MessagesInput({
onGeneratedContent: (content) => {
const validMessages = parseMessages(content)
if (validMessages) {
// Process media messages - only allow fileId to set files, sanitize other attempts
// Process attachment messages - only allow fileId to set files, sanitize other attempts
validMessages.forEach((msg, index) => {
if (msg.role === 'media') {
if (msg.role === 'attachment') {
// Check if this is an existing file with valid data (preserve it)
const hasExistingFile =
msg.media?.sourceType === 'file' &&
msg.media?.data?.startsWith('/api/') &&
msg.media?.fileName
msg.attachment?.sourceType === 'file' &&
msg.attachment?.data?.startsWith('/api/') &&
msg.attachment?.fileName
if (hasExistingFile) {
// Preserve existing file data as-is
@@ -359,11 +389,11 @@ export function MessagesInput({
}
// Check if wand provided a fileId to select a workspace file
if (msg.media?.fileId) {
const file = workspaceFiles.find((f) => f.id === msg.media?.fileId)
if (msg.attachment?.fileId) {
const file = workspaceFiles.find((f) => f.id === msg.attachment?.fileId)
if (file) {
// Set the file value in SubBlockStore so FileUpload picks it up
const fileUploadKey = `${subBlockId}-media-${index}`
const fileUploadKey = `${subBlockId}-attachment-${index}`
const uploadedFile = {
name: file.name,
path: file.path,
@@ -372,16 +402,16 @@ export function MessagesInput({
}
useSubBlockStore.getState().setValue(blockId, fileUploadKey, uploadedFile)
// Clear the media object - the FileUpload will sync the file data via useEffect
// DON'T set media.data here as it would appear in the ShortInput (advanced mode)
msg.media = undefined
// Clear the attachment object - the FileUpload will sync the file data via useEffect
// DON'T set attachment.data here as it would appear in the ShortInput (advanced mode)
msg.attachment = undefined
return
}
}
// Sanitize: clear any media object that isn't a valid existing file or fileId match
// Sanitize: clear any attachment object that isn't a valid existing file or fileId match
// This prevents the LLM from setting arbitrary data/variable references
msg.media = undefined
msg.attachment = undefined
}
})
@@ -458,22 +488,22 @@ export function MessagesInput({
)
const updateMessageRole = useCallback(
(index: number, role: 'system' | 'user' | 'assistant' | 'media') => {
(index: number, role: 'system' | 'user' | 'assistant' | 'attachment') => {
if (isPreview || disabled) return
const updatedMessages = [...localMessages]
if (role === 'media') {
if (role === 'attachment') {
updatedMessages[index] = {
...updatedMessages[index],
role,
content: updatedMessages[index].content || '',
media: updatedMessages[index].media || {
attachment: updatedMessages[index].attachment || {
sourceType: 'file',
data: '',
},
}
} else {
const { media: _, ...rest } = updatedMessages[index]
const { attachment: _, ...rest } = updatedMessages[index]
updatedMessages[index] = {
...rest,
role,
@@ -761,7 +791,7 @@ export function MessagesInput({
</PopoverTrigger>
<PopoverContent minWidth={140} align='start'>
<div className='flex flex-col gap-[2px]'>
{(['system', 'user', 'assistant', 'media'] as const).map((role) => (
{availableRoles.map((role) => (
<PopoverItem
key={role}
active={message.role === role}
@@ -820,20 +850,20 @@ export function MessagesInput({
</Button>
</>
)}
{/* Mode toggle for media messages */}
{message.role === 'media' && (
{/* Mode toggle for attachment messages */}
{message.role === 'attachment' && (
<Tooltip.Root>
<Tooltip.Trigger asChild>
<Button
variant='ghost'
onClick={(e: React.MouseEvent) => {
e.stopPropagation()
setMediaMode((m) => (m === 'basic' ? 'advanced' : 'basic'))
setAttachmentMode((m) => (m === 'basic' ? 'advanced' : 'basic'))
}}
disabled={disabled}
className='-my-1 -mr-1 h-6 w-6 p-0'
aria-label={
mediaMode === 'advanced'
attachmentMode === 'advanced'
? 'Switch to file upload'
: 'Switch to URL/text input'
}
@@ -841,7 +871,7 @@ export function MessagesInput({
<ArrowLeftRight
className={cn(
'h-3 w-3',
mediaMode === 'advanced'
attachmentMode === 'advanced'
? 'text-[var(--text-primary)]'
: 'text-[var(--text-secondary)]'
)}
@@ -850,7 +880,7 @@ export function MessagesInput({
</Tooltip.Trigger>
<Tooltip.Content side='top'>
<p>
{mediaMode === 'advanced'
{attachmentMode === 'advanced'
? 'Switch to file upload'
: 'Switch to URL/text input'}
</p>
@@ -873,13 +903,13 @@ export function MessagesInput({
)}
</div>
{/* Content Input - different for media vs text messages */}
{message.role === 'media' ? (
{/* Content Input - different for attachment vs text messages */}
{message.role === 'attachment' ? (
<div className='relative w-full px-[8px] py-[8px]'>
{mediaMode === 'basic' ? (
{attachmentMode === 'basic' ? (
<FileUpload
blockId={blockId}
subBlockId={`${subBlockId}-media-${index}`}
subBlockId={`${subBlockId}-attachment-${index}`}
acceptedTypes='image/*,audio/*,video/*,application/pdf,.doc,.docx,.txt'
multiple={false}
isPreview={isPreview}
@@ -888,19 +918,21 @@ export function MessagesInput({
) : (
<ShortInput
blockId={blockId}
subBlockId={`${subBlockId}-media-ref-${index}`}
subBlockId={`${subBlockId}-attachment-ref-${index}`}
placeholder='Reference file from previous block...'
config={{
id: `${subBlockId}-media-ref-${index}`,
id: `${subBlockId}-attachment-ref-${index}`,
type: 'short-input',
}}
value={
// Only show value for variable references, not file uploads
message.media?.sourceType === 'file' ? '' : message.media?.data || ''
message.attachment?.sourceType === 'file'
? ''
: message.attachment?.data || ''
}
onChange={(newValue: string) => {
const updatedMessages = [...localMessages]
if (updatedMessages[index].role === 'media') {
if (updatedMessages[index].role === 'attachment') {
// Determine sourceType based on content
let sourceType: 'url' | 'base64' = 'url'
if (newValue.startsWith('data:') || newValue.includes(';base64,')) {
@@ -909,8 +941,8 @@ export function MessagesInput({
updatedMessages[index] = {
...updatedMessages[index],
content: newValue.substring(0, 50),
media: {
...updatedMessages[index].media,
attachment: {
...updatedMessages[index].attachment,
sourceType,
data: newValue,
},

View File

@@ -100,7 +100,7 @@ Current messages: {context}
RULES:
1. Generate ONLY a valid JSON array - no markdown, no explanations
2. Each message object must have "role" and "content" properties
3. Valid roles are: "system", "user", "assistant", "media"
3. Valid roles are: "system", "user", "assistant", "attachment"
4. Content can be as long as necessary - don't truncate
5. If editing existing messages, preserve structure unless asked to change it
6. For new agents, create DETAILED, PROFESSIONAL system prompts that include:
@@ -110,15 +110,15 @@ RULES:
- Critical thinking or quality guidelines
- How to handle edge cases and uncertainty
MEDIA MESSAGES:
- Use role "media" to include images, audio, video, or documents in a multimodal conversation
- IMPORTANT: If a media message in the current context has a "media" object with file data, ALWAYS preserve that entire "media" object exactly as-is
- When creating NEW media messages, you can either:
1. Just set role to "media" with descriptive content - user will upload the file manually
2. Select a file from the available workspace files by including "fileId" in the media object (optional)
ATTACHMENTS:
- Use role "attachment" to include images, audio, video, or documents in a multimodal conversation
- IMPORTANT: If an attachment message in the current context has an "attachment" object with file data, ALWAYS preserve that entire "attachment" object exactly as-is
- When creating NEW attachment messages, you can either:
1. Just set role to "attachment" with descriptive content - user will upload the file manually
2. Select a file from the available workspace files by including "fileId" in the attachment object (optional)
- You do NOT have to select a file - it's completely optional
- Example without file: {"role": "media", "content": "Analyze this image for text and objects"}
- Example with file selection: {"role": "media", "content": "Analyze this image", "media": {"fileId": "abc123"}}
- Example without file: {"role": "attachment", "content": "Analyze this image for text and objects"}
- Example with file selection: {"role": "attachment", "content": "Analyze this image", "attachment": {"fileId": "abc123"}}
EXAMPLES:
@@ -129,7 +129,7 @@ Code reviewer:
[{"role": "system", "content": "You are a Senior Code Reviewer with expertise in software architecture, security, and best practices. Your role is to provide thorough, constructive code reviews that improve code quality and help developers grow.\\n\\n## Review Methodology\\n\\n1. **Security First**: Check for vulnerabilities including injection attacks, authentication flaws, data exposure, and insecure dependencies.\\n\\n2. **Code Quality**: Evaluate readability, maintainability, adherence to DRY/SOLID principles, and appropriate abstraction levels.\\n\\n3. **Performance**: Identify potential bottlenecks, unnecessary computations, memory leaks, and optimization opportunities.\\n\\n4. **Testing**: Assess test coverage, edge case handling, and testability of the code structure.\\n\\n## Output Format\\n\\n### Summary\\nBrief overview of the code's purpose and overall assessment.\\n\\n### Critical Issues\\nSecurity vulnerabilities or bugs that must be fixed before merging.\\n\\n### Improvements\\nSuggested enhancements with clear explanations of why and how.\\n\\n### Positive Aspects\\nHighlight well-written code to reinforce good practices.\\n\\nBe specific with line references. Provide code examples for suggested changes. Balance critique with encouragement."}, {"role": "user", "content": "<start.input>"}]
Image analysis agent:
[{"role": "system", "content": "You are an expert image analyst. Describe images in detail, identify objects, text, and patterns. Provide structured analysis."}, {"role": "media", "content": "Analyze this image"}]
[{"role": "system", "content": "You are an expert image analyst. Describe images in detail, identify objects, text, and patterns. Provide structured analysis."}, {"role": "attachment", "content": "Analyze this image"}]
Return ONLY the JSON array.`,
placeholder: 'Describe what you want to create or change...',

View File

@@ -3,8 +3,6 @@ import { account, mcpServers } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { and, eq, inArray, isNull } from 'drizzle-orm'
import { createMcpToolId } from '@/lib/mcp/utils'
import { bufferToBase64 } from '@/lib/uploads/utils/file-utils'
import { downloadFileFromUrl } from '@/lib/uploads/utils/file-utils.server'
import { refreshTokenIfNeeded } from '@/app/api/auth/oauth/utils'
import { getAllBlocks } from '@/blocks'
import type { BlockOutput } from '@/blocks/types'
@@ -27,6 +25,8 @@ import {
validateModelProvider,
} from '@/executor/utils/permission-check'
import { executeProviderRequest } from '@/providers'
import { transformAttachmentMessages } from '@/providers/attachment'
import type { ProviderId } from '@/providers/types'
import { getProviderFromModel, transformBlockTool } from '@/providers/utils'
import type { SerializedBlock } from '@/serializer/types'
import { executeTool } from '@/tools'
@@ -62,9 +62,12 @@ export class AgentBlockHandler implements BlockHandler {
const streamingConfig = this.getStreamingConfig(ctx, block)
const rawMessages = await this.buildMessages(ctx, filteredInputs)
// Transform media messages to provider-specific format (async for file fetching)
// Transform attachment messages to provider-specific format (async for file fetching)
const messages = rawMessages
? await this.transformMediaMessages(rawMessages, providerId, ctx)
? await transformAttachmentMessages(rawMessages, {
providerId: providerId as ProviderId,
model,
})
: undefined
const providerRequest = this.buildProviderRequest({
@@ -848,445 +851,11 @@ export class AgentBlockHandler implements BlockHandler {
'role' in m &&
'content' in m &&
typeof m.role === 'string' &&
['system', 'user', 'assistant', 'media'].includes(m.role)
['system', 'user', 'assistant', 'attachment'].includes(m.role)
)
})
}
/**
* Transforms messages with 'media' role into provider-compatible format.
* Media messages are merged with the preceding or following user message,
* or converted to a user message with multimodal content.
*/
private async transformMediaMessages(
messages: Message[],
providerId: string,
ctx: ExecutionContext
): Promise<Message[]> {
const result: Message[] = []
for (let i = 0; i < messages.length; i++) {
const msg = messages[i]
if (msg.role !== 'media') {
result.push(msg)
continue
}
// Media message - transform based on provider (async for file fetching)
const mediaContent = await this.createProviderMediaContent(msg, providerId, ctx)
if (!mediaContent) {
logger.warn('Could not create media content for message', { msg })
continue
}
// Check if we should merge with the previous user message
const lastMessage = result[result.length - 1]
if (lastMessage && lastMessage.role === 'user') {
// Merge media into the previous user message's content array
const existingContent = this.ensureContentArray(lastMessage, providerId)
existingContent.push(mediaContent)
lastMessage.content = existingContent as any
} else {
// Create a new user message with the media content
result.push({
role: 'user',
content: [mediaContent] as any,
})
}
}
// Post-process: ensure all user messages have consistent content format
return result.map((msg) => {
if (msg.role === 'user' && typeof msg.content === 'string') {
// Convert string content to provider-specific text format (wrapped in array for multimodal)
return {
...msg,
content: [this.createTextContent(msg.content, providerId)] as any,
}
}
return msg
})
}
/**
* Ensures a user message has content as an array for multimodal support
*/
private ensureContentArray(msg: Message, providerId: string): any[] {
if (Array.isArray(msg.content)) {
return msg.content
}
if (typeof msg.content === 'string' && msg.content) {
return [this.createTextContent(msg.content, providerId)]
}
return []
}
/**
* Creates provider-specific text content block
*/
private createTextContent(text: string, providerId: string): any {
switch (providerId) {
case 'google':
case 'vertex':
return { text }
case 'anthropic':
return { type: 'text', text }
default:
// OpenAI format (used by most providers)
return { type: 'text', text }
}
}
/**
* Creates provider-specific media content from a media message
*/
private async createProviderMediaContent(
msg: Message,
providerId: string,
ctx: ExecutionContext
): Promise<any> {
const media = msg.media
if (!media) return null
const { sourceType, data, mimeType } = media
// Validate data is not empty
if (!data || !data.trim()) {
logger.warn('Empty media data, skipping media content')
return null
}
// Validate URL format if sourceType is URL
if (sourceType === 'url' || sourceType === 'file') {
const trimmedData = data.trim()
// Must start with http://, https://, or / (relative path for workspace files)
if (
!trimmedData.startsWith('http://') &&
!trimmedData.startsWith('https://') &&
!trimmedData.startsWith('/')
) {
logger.warn('Invalid URL format for media content', { data: trimmedData.substring(0, 50) })
// Try to salvage by treating as text
return { type: 'text', text: `[Invalid media URL: ${trimmedData.substring(0, 30)}...]` }
}
}
// Validate base64 format
if (sourceType === 'base64') {
const trimmedData = data.trim()
// Should be a data URL or raw base64
if (
!trimmedData.startsWith('data:') &&
!/^[A-Za-z0-9+/]+=*$/.test(trimmedData.replace(/\s/g, ''))
) {
logger.warn('Invalid base64 format for media content', {
data: trimmedData.substring(0, 50),
})
return { type: 'text', text: `[Invalid base64 data]` }
}
}
switch (providerId) {
case 'anthropic':
return this.createAnthropicMediaContent(sourceType, data, mimeType, ctx)
case 'google':
case 'vertex':
return this.createGeminiMediaContent(sourceType, data, mimeType)
case 'mistral':
return this.createMistralMediaContent(sourceType, data, mimeType)
case 'bedrock':
return this.createBedrockMediaContent(sourceType, data, mimeType)
default:
// OpenAI format (used by OpenAI, Azure, xAI, Groq, etc.)
return this.createOpenAIMediaContent(sourceType, data, mimeType)
}
}
/**
* Creates OpenAI-compatible media content
*/
private createOpenAIMediaContent(sourceType: string, data: string, mimeType?: string): any {
const isImage = mimeType?.startsWith('image/')
const isAudio = mimeType?.startsWith('audio/')
// Treat 'file' as 'url' since workspace files are served via URL
const isUrl = sourceType === 'url' || sourceType === 'file'
if (isImage) {
return {
type: 'image_url',
image_url: { url: data, detail: 'auto' },
}
}
if (isAudio) {
const base64Data = data.includes(',') ? data.split(',')[1] : data
return {
type: 'input_audio',
input_audio: {
data: base64Data,
format: mimeType === 'audio/wav' ? 'wav' : 'mp3',
},
}
}
// For documents/files, include as URL
if (sourceType === 'url') {
return {
type: 'file',
file: { url: data },
}
}
// Base64 file - some providers may not support this directly
logger.warn('Base64 file content may not be supported by this provider')
return {
type: 'text',
text: `[File: ${mimeType || 'unknown type'}]`,
}
}
/**
* Creates Anthropic-compatible media content
* Anthropic requires base64 for internal/relative URLs since they can't fetch them
*/
private async createAnthropicMediaContent(
sourceType: string,
data: string,
mimeType?: string,
ctx?: ExecutionContext
): Promise<any> {
const isImage = mimeType?.startsWith('image/')
const isPdf = mimeType === 'application/pdf'
const isInternalUrl = data.startsWith('/')
const isExternalHttps = data.startsWith('https://')
// For internal URLs (workspace files), fetch and convert to base64
// Anthropic only supports external HTTPS URLs, not relative paths
if ((sourceType === 'url' || sourceType === 'file') && isInternalUrl) {
try {
logger.info('Fetching internal file for Anthropic base64 conversion', {
path: data.substring(0, 50),
})
const buffer = await downloadFileFromUrl(data)
const base64Data = bufferToBase64(buffer)
if (isImage) {
return {
type: 'image',
source: {
type: 'base64',
media_type: mimeType || 'image/png',
data: base64Data,
},
}
}
if (isPdf) {
return {
type: 'document',
source: {
type: 'base64',
media_type: 'application/pdf',
data: base64Data,
},
}
}
// Other file types - return as text fallback
return {
type: 'text',
text: `[File: ${mimeType || 'unknown type'}]`,
}
} catch (error) {
logger.error('Failed to fetch file for Anthropic', { error, path: data.substring(0, 50) })
return {
type: 'text',
text: `[Failed to load file: ${mimeType || 'unknown type'}]`,
}
}
}
// For external HTTPS URLs, Anthropic can fetch them directly
if ((sourceType === 'url' || sourceType === 'file') && isExternalHttps) {
if (isImage) {
return {
type: 'image',
source: { type: 'url', url: data },
}
}
if (isPdf) {
return {
type: 'document',
source: { type: 'url', url: data },
}
}
}
// Already base64 encoded
if (sourceType === 'base64') {
const base64Data = data.includes(',') ? data.split(',')[1] : data
if (isImage) {
return {
type: 'image',
source: {
type: 'base64',
media_type: mimeType || 'image/png',
data: base64Data,
},
}
}
if (isPdf) {
return {
type: 'document',
source: {
type: 'base64',
media_type: 'application/pdf',
data: base64Data,
},
}
}
}
// Fallback for unsupported types
return {
type: 'text',
text: `[File: ${mimeType || 'unknown type'}]`,
}
}
/**
* Creates Google Gemini-compatible media content
*/
private createGeminiMediaContent(sourceType: string, data: string, mimeType?: string): any {
// Treat 'file' as 'url' since workspace files are served via URL
const isUrl = sourceType === 'url' || sourceType === 'file'
if (isUrl) {
return {
fileData: {
mimeType: mimeType || 'application/octet-stream',
fileUri: data,
},
}
}
// base64
const base64Data = data.includes(',') ? data.split(',')[1] : data
return {
inlineData: {
mimeType: mimeType || 'application/octet-stream',
data: base64Data,
},
}
}
/**
* Creates Mistral-compatible media content
* Note: Mistral uses a simplified format where image_url is a direct string,
* NOT a nested object like OpenAI
*/
private createMistralMediaContent(sourceType: string, data: string, mimeType?: string): any {
const isImage = mimeType?.startsWith('image/')
// Treat 'file' as 'url' since workspace files are served via URL
const isUrl = sourceType === 'url' || sourceType === 'file'
if (isImage) {
if (isUrl) {
// Mistral uses direct string for image_url, not nested object
return {
type: 'image_url',
image_url: data,
}
}
// Base64 - Mistral accepts data URLs directly
const base64Data = data.includes(',')
? data
: `data:${mimeType || 'image/png'};base64,${data}`
return {
type: 'image_url',
image_url: base64Data,
}
}
// Fallback for non-image types
return {
type: 'text',
text: `[File: ${mimeType || 'unknown type'}]`,
}
}
/**
* Creates AWS Bedrock Converse API-compatible media content
* Bedrock uses a different structure: { image: { format, source: { bytes } } }
* Note: The actual bytes conversion happens in the provider layer
*/
private createBedrockMediaContent(sourceType: string, data: string, mimeType?: string): any {
const isImage = mimeType?.startsWith('image/')
// Treat 'file' as 'url' since workspace files are served via URL
const isUrl = sourceType === 'url' || sourceType === 'file'
// Determine format from mimeType
const getFormat = (mime?: string): string => {
if (!mime) return 'png'
if (mime.includes('jpeg') || mime.includes('jpg')) return 'jpeg'
if (mime.includes('png')) return 'png'
if (mime.includes('gif')) return 'gif'
if (mime.includes('webp')) return 'webp'
return 'png'
}
if (isImage) {
if (isUrl) {
// For URLs, Bedrock needs S3 URIs or we need to fetch and convert
// Mark this for the provider layer to handle
return {
type: 'bedrock_image',
format: getFormat(mimeType),
sourceType: 'url',
url: data,
}
}
// Base64 - extract raw base64 data
const base64Data = data.includes(',') ? data.split(',')[1] : data
return {
type: 'bedrock_image',
format: getFormat(mimeType),
sourceType: 'base64',
data: base64Data,
}
}
// Documents (PDFs) - Bedrock supports document content type
if (mimeType === 'application/pdf') {
if (isUrl) {
return {
type: 'bedrock_document',
format: 'pdf',
sourceType: 'url',
url: data,
}
}
const base64Data = data.includes(',') ? data.split(',')[1] : data
return {
type: 'bedrock_document',
format: 'pdf',
sourceType: 'base64',
data: base64Data,
}
}
// Fallback for unsupported types
return {
type: 'text',
text: `[File: ${mimeType || 'unknown type'}]`,
}
}
private processMemories(memories: any): Message[] {
if (!memories) return []

View File

@@ -43,9 +43,9 @@ export interface ToolInput {
}
/**
* Media content for multimodal messages
* Attachment content (files, images, documents)
*/
export interface MediaContent {
export interface AttachmentContent {
/** Source type: how the data was provided */
sourceType: 'url' | 'base64' | 'file'
/** The URL or base64 data */
@@ -57,10 +57,10 @@ export interface MediaContent {
}
export interface Message {
role: 'system' | 'user' | 'assistant' | 'media'
role: 'system' | 'user' | 'assistant' | 'attachment'
content: string
/** Media content for 'media' role messages */
media?: MediaContent
/** Attachment content for 'attachment' role messages */
attachment?: AttachmentContent
executionId?: string
function_call?: any
tool_calls?: any[]

View File

@@ -109,7 +109,7 @@ export const anthropicProvider: ProviderConfig = {
],
})
} else {
// Handle content that's already in array format (from transformMediaMessages)
// Handle content that's already in array format (from transformAttachmentMessages)
const content = Array.isArray(msg.content)
? msg.content
: msg.content

View File

@@ -0,0 +1,397 @@
/**
* Centralized attachment content transformation for all providers.
*
* Strategy: Always normalize to base64 first, then create provider-specific formats.
* This eliminates URL accessibility issues and simplifies provider handling.
*/
import { createLogger } from '@sim/logger'
import { bufferToBase64 } from '@/lib/uploads/utils/file-utils'
import { downloadFileFromUrl } from '@/lib/uploads/utils/file-utils.server'
import { supportsVision } from '@/providers/models'
import type { ProviderId } from '@/providers/types'
const logger = createLogger('AttachmentTransformer')
/**
* Generic message type for attachment transformation.
*/
interface TransformableMessage {
role: string
content: string | any[] | null
attachment?: AttachmentContent
[key: string]: any
}
/**
* Attachment content (files, images, documents)
*/
export interface AttachmentContent {
sourceType: 'url' | 'base64' | 'file'
data: string
mimeType?: string
fileName?: string
}
/**
* Normalized attachment data (always base64)
*/
interface NormalizedAttachment {
base64: string
mimeType: string
}
/**
* Configuration for attachment transformation
*/
interface AttachmentTransformConfig {
providerId: ProviderId
model: string
}
/**
* Checks if a model supports attachments (vision/multimodal content).
*/
export function modelSupportsAttachments(model: string): boolean {
return supportsVision(model)
}
/**
* Transforms messages with 'attachment' role into provider-compatible format.
*/
export async function transformAttachmentMessages<T extends TransformableMessage>(
messages: T[],
config: AttachmentTransformConfig
): Promise<T[]> {
const { providerId, model } = config
const supportsAttachments = modelSupportsAttachments(model)
if (!supportsAttachments) {
return transformAttachmentsToText(messages) as T[]
}
const result: T[] = []
for (const msg of messages) {
if (msg.role !== 'attachment') {
result.push(msg)
continue
}
const attachmentContent = await createProviderAttachmentContent(msg, providerId)
if (!attachmentContent) {
logger.warn('Could not create attachment content for message', { msg })
continue
}
// Merge with previous user message or create new one
const lastMessage = result[result.length - 1]
if (lastMessage && lastMessage.role === 'user') {
const existingContent = ensureContentArray(lastMessage, providerId)
existingContent.push(attachmentContent)
lastMessage.content = existingContent as any
} else {
result.push({
role: 'user',
content: [attachmentContent] as any,
} as T)
}
}
// Ensure all user messages have consistent content format
return result.map((msg) => {
if (msg.role === 'user' && typeof msg.content === 'string') {
return {
...msg,
content: [createTextContent(msg.content, providerId)] as any,
}
}
return msg
})
}
/**
* Transforms attachment messages to text placeholders for non-vision models
*/
function transformAttachmentsToText<T extends TransformableMessage>(messages: T[]): T[] {
const result: T[] = []
for (const msg of messages) {
if (msg.role !== 'attachment') {
result.push(msg)
continue
}
const attachment = msg.attachment
const mimeType = attachment?.mimeType || 'unknown type'
const fileName = attachment?.fileName || 'file'
const lastMessage = result[result.length - 1]
if (lastMessage && lastMessage.role === 'user') {
const currentContent = typeof lastMessage.content === 'string' ? lastMessage.content : ''
lastMessage.content = `${currentContent}\n[Attached file: ${fileName} (${mimeType}) - Note: This model does not support file/image inputs]`
} else {
result.push({
role: 'user',
content: `[Attached file: ${fileName} (${mimeType}) - Note: This model does not support file/image inputs]`,
} as T)
}
}
return result
}
/**
* Ensures a user message has content as an array for multimodal support
*/
function ensureContentArray(msg: TransformableMessage, providerId: ProviderId): any[] {
if (Array.isArray(msg.content)) {
return msg.content
}
if (typeof msg.content === 'string' && msg.content) {
return [createTextContent(msg.content, providerId)]
}
return []
}
/**
* Creates provider-specific text content block
*/
export function createTextContent(text: string, providerId: ProviderId): any {
switch (providerId) {
case 'google':
case 'vertex':
return { text }
default:
return { type: 'text', text }
}
}
/**
* Normalizes attachment data to base64.
* Fetches URLs and converts to base64, extracts base64 from data URLs.
*/
async function normalizeToBase64(
attachment: AttachmentContent
): Promise<NormalizedAttachment | null> {
const { sourceType, data, mimeType } = attachment
if (!data || !data.trim()) {
logger.warn('Empty attachment data')
return null
}
const trimmedData = data.trim()
// Already base64
if (sourceType === 'base64') {
// Handle data URL format: data:mime;base64,xxx
if (trimmedData.startsWith('data:')) {
const match = trimmedData.match(/^data:([^;]+);base64,(.+)$/)
if (match) {
return { base64: match[2], mimeType: match[1] }
}
}
// Raw base64
return { base64: trimmedData, mimeType: mimeType || 'application/octet-stream' }
}
// URL or file path - need to fetch
if (sourceType === 'url' || sourceType === 'file') {
try {
logger.info('Fetching attachment for base64 conversion', {
url: trimmedData.substring(0, 50),
})
const buffer = await downloadFileFromUrl(trimmedData)
const base64 = bufferToBase64(buffer)
return { base64, mimeType: mimeType || 'application/octet-stream' }
} catch (error) {
logger.error('Failed to fetch attachment', { error, url: trimmedData.substring(0, 50) })
return null
}
}
return null
}
/**
* Creates provider-specific attachment content from an attachment message.
* First normalizes to base64, then creates the provider format.
*/
async function createProviderAttachmentContent(
msg: TransformableMessage,
providerId: ProviderId
): Promise<any> {
const attachment = msg.attachment
if (!attachment) return null
// Normalize to base64 first
const normalized = await normalizeToBase64(attachment)
if (!normalized) {
return createTextContent('[Failed to load attachment]', providerId)
}
const { base64, mimeType } = normalized
switch (providerId) {
case 'anthropic':
return createAnthropicContent(base64, mimeType)
case 'google':
case 'vertex':
return createGeminiContent(base64, mimeType)
case 'mistral':
return createMistralContent(base64, mimeType)
case 'bedrock':
return createBedrockContent(base64, mimeType)
default:
// OpenAI format (OpenAI, Azure, xAI, DeepSeek, Cerebras, Groq, OpenRouter, Ollama, vLLM)
return createOpenAIContent(base64, mimeType)
}
}
/**
* OpenAI-compatible content (images only via base64 data URL)
*/
function createOpenAIContent(base64: string, mimeType: string): any {
const isImage = mimeType.startsWith('image/')
const isAudio = mimeType.startsWith('audio/')
if (isImage) {
return {
type: 'image_url',
image_url: {
url: `data:${mimeType};base64,${base64}`,
detail: 'auto',
},
}
}
if (isAudio) {
return {
type: 'input_audio',
input_audio: {
data: base64,
format: mimeType === 'audio/wav' ? 'wav' : 'mp3',
},
}
}
// OpenAI Chat API doesn't support other file types directly
// For PDFs/docs, return a text placeholder
logger.warn(`OpenAI does not support ${mimeType} attachments in Chat API`)
return {
type: 'text',
text: `[Attached file: ${mimeType} - OpenAI Chat API only supports images and audio]`,
}
}
/**
* Anthropic-compatible content (images and PDFs)
*/
function createAnthropicContent(base64: string, mimeType: string): any {
const isImage = mimeType.startsWith('image/')
const isPdf = mimeType === 'application/pdf'
if (isImage) {
return {
type: 'image',
source: {
type: 'base64',
media_type: mimeType,
data: base64,
},
}
}
if (isPdf) {
return {
type: 'document',
source: {
type: 'base64',
media_type: 'application/pdf',
data: base64,
},
}
}
return {
type: 'text',
text: `[Attached file: ${mimeType} - Anthropic supports images and PDFs only]`,
}
}
/**
* Google Gemini-compatible content (inlineData format)
*/
function createGeminiContent(base64: string, mimeType: string): any {
// Gemini supports a wide range of file types via inlineData
return {
inlineData: {
mimeType,
data: base64,
},
}
}
/**
* Mistral-compatible content (images only, data URL format)
*/
function createMistralContent(base64: string, mimeType: string): any {
const isImage = mimeType.startsWith('image/')
if (isImage) {
// Mistral uses direct string for image_url, not nested object
return {
type: 'image_url',
image_url: `data:${mimeType};base64,${base64}`,
}
}
return {
type: 'text',
text: `[Attached file: ${mimeType} - Mistral supports images only]`,
}
}
/**
* AWS Bedrock-compatible content (images and PDFs)
*/
function createBedrockContent(base64: string, mimeType: string): any {
const isImage = mimeType.startsWith('image/')
const isPdf = mimeType === 'application/pdf'
// Determine image format from mimeType
const getImageFormat = (mime: string): string => {
if (mime.includes('jpeg') || mime.includes('jpg')) return 'jpeg'
if (mime.includes('png')) return 'png'
if (mime.includes('gif')) return 'gif'
if (mime.includes('webp')) return 'webp'
return 'png'
}
if (isImage) {
// Return a marker object that the Bedrock provider will convert to proper format
return {
type: 'bedrock_image',
format: getImageFormat(mimeType),
data: base64,
}
}
if (isPdf) {
return {
type: 'bedrock_document',
format: 'pdf',
data: base64,
}
}
return {
type: 'text',
text: `[Attached file: ${mimeType} - Bedrock supports images and PDFs only]`,
}
}

View File

@@ -72,6 +72,75 @@ export function cleanSchemaForGemini(schema: SchemaUnion): SchemaUnion {
return cleanedSchema
}
/**
* Converts an array of content items to Gemini-compatible Part array.
* Handles various formats from the attachment transformer.
*/
function convertContentArrayToGeminiParts(contentArray: any[]): Part[] {
const parts: Part[] = []
for (const item of contentArray) {
if (!item) continue
// Gemini-native text format: { text: "..." }
if (typeof item.text === 'string') {
parts.push({ text: item.text })
continue
}
// OpenAI-style text: { type: 'text', text: '...' }
if (item.type === 'text' && typeof item.text === 'string') {
parts.push({ text: item.text })
continue
}
// Gemini-native inlineData format (from attachment transformer)
if (item.inlineData) {
parts.push({ inlineData: item.inlineData })
continue
}
// Gemini-native fileData format (from attachment transformer)
if (item.fileData) {
parts.push({ fileData: item.fileData })
continue
}
// OpenAI-style image_url - convert to Gemini format
if (item.type === 'image_url' && item.image_url) {
const url = typeof item.image_url === 'string' ? item.image_url : item.image_url?.url
if (url) {
// Check if it's a data URL (base64)
if (url.startsWith('data:')) {
const match = url.match(/^data:([^;]+);base64,(.+)$/)
if (match) {
parts.push({
inlineData: {
mimeType: match[1],
data: match[2],
},
})
}
} else {
// External URL
parts.push({
fileData: {
mimeType: 'image/jpeg', // Default, Gemini will detect actual type
fileUri: url,
},
})
}
}
continue
}
// Unknown type - log warning
logger.warn('Unknown content item type in Gemini conversion:', { type: item.type })
}
return parts
}
/**
* Extracts text content from a Gemini response candidate.
* Filters out thought parts (model reasoning) from the output.
@@ -180,7 +249,13 @@ export function convertToGeminiFormat(request: ProviderRequest): {
} else if (message.role === 'user' || message.role === 'assistant') {
const geminiRole = message.role === 'user' ? 'user' : 'model'
if (message.content) {
// Handle multimodal content (arrays with text/image/file parts)
if (Array.isArray(message.content)) {
const parts: Part[] = convertContentArrayToGeminiParts(message.content)
if (parts.length > 0) {
contents.push({ role: geminiRole, parts })
}
} else if (message.content) {
contents.push({ role: geminiRole, parts: [{ text: message.content }] })
}

View File

@@ -112,9 +112,9 @@ export interface ProviderToolConfig {
}
/**
* Media content for multimodal messages
* Attachment content (files, images, documents)
*/
export interface MediaContent {
export interface AttachmentContent {
/** Source type: how the data was provided */
sourceType: 'url' | 'base64' | 'file'
/** The URL or base64 data */
@@ -126,10 +126,10 @@ export interface MediaContent {
}
export interface Message {
role: 'system' | 'user' | 'assistant' | 'function' | 'tool' | 'media'
role: 'system' | 'user' | 'assistant' | 'function' | 'tool' | 'attachment'
content: string | null
/** Media content for 'media' role messages */
media?: MediaContent
/** Attachment content for 'attachment' role messages */
attachment?: AttachmentContent
name?: string
function_call?: {
name: string