Use b64

2026-02-04 03:35:04 -05:00 · 2026-01-29 18:10:47 -08:00
parent 4ab3e23cf7
commit 5add92a613
8 changed files with 597 additions and 524 deletions
--- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/messages-input/messages-input.tsx
+++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/messages-input/messages-input.tsx
@@ -32,6 +32,7 @@ import type { WandControlHandlers } from '@/app/workspace/[workspaceId]/w/[workf
 import { useAccessibleReferencePrefixes } from '@/app/workspace/[workspaceId]/w/[workflowId]/hooks/use-accessible-reference-prefixes'
 import { useWand } from '@/app/workspace/[workspaceId]/w/[workflowId]/hooks/use-wand'
 import type { SubBlockConfig } from '@/blocks/types'
+import { supportsVision } from '@/providers/utils'
 import { useWorkflowRegistry } from '@/stores/workflows/registry/store'
 import { useSubBlockStore } from '@/stores/workflows/subblock/store'

@@ -50,13 +51,13 @@ const MAX_TEXTAREA_HEIGHT_PX = 320

 /** Pattern to match complete message objects in JSON */
 const COMPLETE_MESSAGE_PATTERN =
-  /"role"\s*:\s*"(system|user|assistant|media)"[^}]*"content"\s*:\s*"((?:[^"\\]|\\.)*)"/g
+  /"role"\s*:\s*"(system|user|assistant|attachment)"[^}]*"content"\s*:\s*"((?:[^"\\]|\\.)*)"/g

 /** Pattern to match incomplete content at end of buffer */
 const INCOMPLETE_CONTENT_PATTERN = /"content"\s*:\s*"((?:[^"\\]|\\.)*)$/

 /** Pattern to match role before content */
-const ROLE_BEFORE_CONTENT_PATTERN = /"role"\s*:\s*"(system|user|assistant|media)"[^{]*$/
+const ROLE_BEFORE_CONTENT_PATTERN = /"role"\s*:\s*"(system|user|assistant|attachment)"[^{]*$/

 /**
 * Unescapes JSON string content
@@ -65,9 +66,9 @@ const unescapeContent = (str: string): string =>
  str.replace(/\\n/g, '\n').replace(/\\"/g, '"').replace(/\\\\/g, '\\')

 /**
- * Media content for multimodal messages
+ * Attachment content (files, images, documents)
 */
-interface MediaContent {
+interface AttachmentContent {
  /** Source type: how the data was provided */
  sourceType: 'url' | 'base64' | 'file'
  /** The URL or base64 data */
@@ -84,9 +85,9 @@ interface MediaContent {
 * Interface for individual message in the messages array
 */
 interface Message {
-  role: 'system' | 'user' | 'assistant' | 'media'
+  role: 'system' | 'user' | 'assistant' | 'attachment'
  content: string
-  media?: MediaContent
+  attachment?: AttachmentContent
 }

 /**
@@ -122,8 +123,8 @@ export function MessagesInput({
  const [openPopoverIndex, setOpenPopoverIndex] = useState<number | null>(null)
  const { activeWorkflowId } = useWorkflowRegistry()

-  // Local media mode state - basic = FileUpload, advanced = URL/base64 textarea
-  const [mediaMode, setMediaMode] = useState<'basic' | 'advanced'>('basic')
+  // Local attachment mode state - basic = FileUpload, advanced = URL/base64 textarea
+  const [attachmentMode, setAttachmentMode] = useState<'basic' | 'advanced'>('basic')

  // Workspace files for wand context
  const [workspaceFiles, setWorkspaceFiles] = useState<WorkspaceFile[]>([])
@@ -166,22 +167,49 @@ export function MessagesInput({
      .join('\n')

    if (!filesList) {
-      return 'No media files in workspace. The user can upload files manually after generation.'
+      return 'No files in workspace. The user can upload files manually after generation.'
    }

-    return `AVAILABLE WORKSPACE FILES (optional - you don't have to select one):\n${filesList}\n\nTo use a file, include "fileId": "<id>" in the media object. If not selecting a file, omit the fileId field.`
+    return `AVAILABLE WORKSPACE FILES (optional - you don't have to select one):\n${filesList}\n\nTo use a file, include "fileId": "<id>" in the attachment object. If not selecting a file, omit the fileId field.`
  }, [workspaceFiles])

-  // Get indices of media messages for subscription
-  const mediaIndices = useMemo(
+  // Get indices of attachment messages for subscription
+  const attachmentIndices = useMemo(
    () =>
      localMessages
-        .map((msg, index) => (msg.role === 'media' ? index : -1))
+        .map((msg, index) => (msg.role === 'attachment' ? index : -1))
        .filter((i) => i !== -1),
    [localMessages]
  )

-  // Subscribe to file upload values for all media messages
+  // Subscribe to model value to check vision capability
+  const modelSupportsVision = useSubBlockStore(
+    useCallback(
+      (state) => {
+        if (!activeWorkflowId) return true // Default to allowing attachments
+        const blockValues = state.workflowValues[activeWorkflowId]?.[blockId] ?? {}
+        const modelValue = blockValues.model as string | undefined
+        if (!modelValue) return true // No model selected, allow attachments
+        return supportsVision(modelValue)
+      },
+      [activeWorkflowId, blockId]
+    )
+  )
+
+  // Determine available roles based on model capabilities
+  const availableRoles = useMemo(() => {
+    const baseRoles: Array<'system' | 'user' | 'assistant' | 'attachment'> = [
+      'system',
+      'user',
+      'assistant',
+    ]
+    if (modelSupportsVision) {
+      baseRoles.push('attachment')
+    }
+    return baseRoles
+  }, [modelSupportsVision])
+
+  // Subscribe to file upload values for all attachment messages
  const fileUploadValues = useSubBlockStore(
    useCallback(
      (state) => {
@@ -189,8 +217,8 @@ export function MessagesInput({
        const blockValues = state.workflowValues[activeWorkflowId]?.[blockId] ?? {}
        const result: Record<number, { name: string; path: string; type: string; size: number }> =
          {}
-        for (const index of mediaIndices) {
-          const fileUploadKey = `${subBlockId}-media-${index}`
+        for (const index of attachmentIndices) {
+          const fileUploadKey = `${subBlockId}-attachment-${index}`
          const fileValue = blockValues[fileUploadKey]
          if (fileValue && typeof fileValue === 'object' && 'path' in fileValue) {
            result[index] = fileValue as { name: string; path: string; type: string; size: number }
@@ -198,21 +226,21 @@ export function MessagesInput({
        }
        return result
      },
-      [activeWorkflowId, blockId, subBlockId, mediaIndices]
+      [activeWorkflowId, blockId, subBlockId, attachmentIndices]
    )
  )

-  // Effect to sync FileUpload values to message media objects
+  // Effect to sync FileUpload values to message attachment objects
  useEffect(() => {
    if (!activeWorkflowId || isPreview) return

    let hasChanges = false
    const updatedMessages = localMessages.map((msg, index) => {
-      if (msg.role !== 'media') return msg
+      if (msg.role !== 'attachment') return msg

      const uploadedFile = fileUploadValues[index]
      if (uploadedFile) {
-        const newMedia: MediaContent = {
+        const newAttachment: AttachmentContent = {
          sourceType: 'file',
          data: uploadedFile.path,
          mimeType: uploadedFile.type,
@@ -221,16 +249,16 @@ export function MessagesInput({

        // Only update if different
        if (
-          msg.media?.data !== newMedia.data ||
-          msg.media?.sourceType !== newMedia.sourceType ||
-          msg.media?.mimeType !== newMedia.mimeType ||
-          msg.media?.fileName !== newMedia.fileName
+          msg.attachment?.data !== newAttachment.data ||
+          msg.attachment?.sourceType !== newAttachment.sourceType ||
+          msg.attachment?.mimeType !== newAttachment.mimeType ||
+          msg.attachment?.fileName !== newAttachment.fileName
        ) {
          hasChanges = true
          return {
            ...msg,
            content: uploadedFile.name || msg.content,
-            media: newMedia,
+            attachment: newAttachment,
          }
        }
      }
@@ -267,20 +295,22 @@ export function MessagesInput({
      if (Array.isArray(parsed)) {
        const validMessages: Message[] = parsed
          .filter(
-            (m): m is { role: string; content: string; media?: MediaContent } =>
+            (m): m is { role: string; content: string; attachment?: AttachmentContent } =>
              typeof m === 'object' &&
              m !== null &&
              typeof m.role === 'string' &&
              typeof m.content === 'string'
          )
          .map((m) => {
-            const role = ['system', 'user', 'assistant', 'media'].includes(m.role) ? m.role : 'user'
+            const role = ['system', 'user', 'assistant', 'attachment'].includes(m.role)
+              ? m.role
+              : 'user'
            const message: Message = {
              role: role as Message['role'],
              content: m.content,
            }
-            if (m.media) {
-              message.media = m.media
+            if (m.attachment) {
+              message.attachment = m.attachment
            }
            return message
          })
@@ -344,14 +374,14 @@ export function MessagesInput({
    onGeneratedContent: (content) => {
      const validMessages = parseMessages(content)
      if (validMessages) {
-        // Process media messages - only allow fileId to set files, sanitize other attempts
+        // Process attachment messages - only allow fileId to set files, sanitize other attempts
        validMessages.forEach((msg, index) => {
-          if (msg.role === 'media') {
+          if (msg.role === 'attachment') {
            // Check if this is an existing file with valid data (preserve it)
            const hasExistingFile =
-              msg.media?.sourceType === 'file' &&
-              msg.media?.data?.startsWith('/api/') &&
-              msg.media?.fileName
+              msg.attachment?.sourceType === 'file' &&
+              msg.attachment?.data?.startsWith('/api/') &&
+              msg.attachment?.fileName

            if (hasExistingFile) {
              // Preserve existing file data as-is
@@ -359,11 +389,11 @@ export function MessagesInput({
            }

            // Check if wand provided a fileId to select a workspace file
-            if (msg.media?.fileId) {
-              const file = workspaceFiles.find((f) => f.id === msg.media?.fileId)
+            if (msg.attachment?.fileId) {
+              const file = workspaceFiles.find((f) => f.id === msg.attachment?.fileId)
              if (file) {
                // Set the file value in SubBlockStore so FileUpload picks it up
-                const fileUploadKey = `${subBlockId}-media-${index}`
+                const fileUploadKey = `${subBlockId}-attachment-${index}`
                const uploadedFile = {
                  name: file.name,
                  path: file.path,
@@ -372,16 +402,16 @@ export function MessagesInput({
                }
                useSubBlockStore.getState().setValue(blockId, fileUploadKey, uploadedFile)

-                // Clear the media object - the FileUpload will sync the file data via useEffect
-                // DON'T set media.data here as it would appear in the ShortInput (advanced mode)
-                msg.media = undefined
+                // Clear the attachment object - the FileUpload will sync the file data via useEffect
+                // DON'T set attachment.data here as it would appear in the ShortInput (advanced mode)
+                msg.attachment = undefined
                return
              }
            }

-            // Sanitize: clear any media object that isn't a valid existing file or fileId match
+            // Sanitize: clear any attachment object that isn't a valid existing file or fileId match
            // This prevents the LLM from setting arbitrary data/variable references
-            msg.media = undefined
+            msg.attachment = undefined
          }
        })

@@ -458,22 +488,22 @@ export function MessagesInput({
  )

  const updateMessageRole = useCallback(
-    (index: number, role: 'system' | 'user' | 'assistant' | 'media') => {
+    (index: number, role: 'system' | 'user' | 'assistant' | 'attachment') => {
      if (isPreview || disabled) return

      const updatedMessages = [...localMessages]
-      if (role === 'media') {
+      if (role === 'attachment') {
        updatedMessages[index] = {
          ...updatedMessages[index],
          role,
          content: updatedMessages[index].content || '',
-          media: updatedMessages[index].media || {
+          attachment: updatedMessages[index].attachment || {
            sourceType: 'file',
            data: '',
          },
        }
      } else {
-        const { media: _, ...rest } = updatedMessages[index]
+        const { attachment: _, ...rest } = updatedMessages[index]
        updatedMessages[index] = {
          ...rest,
          role,
@@ -761,7 +791,7 @@ export function MessagesInput({
                      </PopoverTrigger>
                      <PopoverContent minWidth={140} align='start'>
                        <div className='flex flex-col gap-[2px]'>
-                          {(['system', 'user', 'assistant', 'media'] as const).map((role) => (
+                          {availableRoles.map((role) => (
                            <PopoverItem
                              key={role}
                              active={message.role === role}
@@ -820,20 +850,20 @@ export function MessagesInput({
                          </Button>
                        </>
                      )}
-                      {/* Mode toggle for media messages */}
-                      {message.role === 'media' && (
+                      {/* Mode toggle for attachment messages */}
+                      {message.role === 'attachment' && (
                        <Tooltip.Root>
                          <Tooltip.Trigger asChild>
                            <Button
                              variant='ghost'
                              onClick={(e: React.MouseEvent) => {
                                e.stopPropagation()
-                                setMediaMode((m) => (m === 'basic' ? 'advanced' : 'basic'))
+                                setAttachmentMode((m) => (m === 'basic' ? 'advanced' : 'basic'))
                              }}
                              disabled={disabled}
                              className='-my-1 -mr-1 h-6 w-6 p-0'
                              aria-label={
-                                mediaMode === 'advanced'
+                                attachmentMode === 'advanced'
                                  ? 'Switch to file upload'
                                  : 'Switch to URL/text input'
                              }
@@ -841,7 +871,7 @@ export function MessagesInput({
                              <ArrowLeftRight
                                className={cn(
                                  'h-3 w-3',
-                                  mediaMode === 'advanced'
+                                  attachmentMode === 'advanced'
                                    ? 'text-[var(--text-primary)]'
                                    : 'text-[var(--text-secondary)]'
                                )}
@@ -850,7 +880,7 @@ export function MessagesInput({
                          </Tooltip.Trigger>
                          <Tooltip.Content side='top'>
                            <p>
-                              {mediaMode === 'advanced'
+                              {attachmentMode === 'advanced'
                                ? 'Switch to file upload'
                                : 'Switch to URL/text input'}
                            </p>
@@ -873,13 +903,13 @@ export function MessagesInput({
                  )}
                </div>

-                {/* Content Input - different for media vs text messages */}
-                {message.role === 'media' ? (
+                {/* Content Input - different for attachment vs text messages */}
+                {message.role === 'attachment' ? (
                  <div className='relative w-full px-[8px] py-[8px]'>
-                    {mediaMode === 'basic' ? (
+                    {attachmentMode === 'basic' ? (
                      <FileUpload
                        blockId={blockId}
-                        subBlockId={`${subBlockId}-media-${index}`}
+                        subBlockId={`${subBlockId}-attachment-${index}`}
                        acceptedTypes='image/*,audio/*,video/*,application/pdf,.doc,.docx,.txt'
                        multiple={false}
                        isPreview={isPreview}
@@ -888,19 +918,21 @@ export function MessagesInput({
                    ) : (
                      <ShortInput
                        blockId={blockId}
-                        subBlockId={`${subBlockId}-media-ref-${index}`}
+                        subBlockId={`${subBlockId}-attachment-ref-${index}`}
                        placeholder='Reference file from previous block...'
                        config={{
-                          id: `${subBlockId}-media-ref-${index}`,
+                          id: `${subBlockId}-attachment-ref-${index}`,
                          type: 'short-input',
                        }}
                        value={
                          // Only show value for variable references, not file uploads
-                          message.media?.sourceType === 'file' ? '' : message.media?.data || ''
+                          message.attachment?.sourceType === 'file'
+                            ? ''
+                            : message.attachment?.data || ''
                        }
                        onChange={(newValue: string) => {
                          const updatedMessages = [...localMessages]
-                          if (updatedMessages[index].role === 'media') {
+                          if (updatedMessages[index].role === 'attachment') {
                            // Determine sourceType based on content
                            let sourceType: 'url' | 'base64' = 'url'
                            if (newValue.startsWith('data:') || newValue.includes(';base64,')) {
@@ -909,8 +941,8 @@ export function MessagesInput({
                            updatedMessages[index] = {
                              ...updatedMessages[index],
                              content: newValue.substring(0, 50),
-                              media: {
-                                ...updatedMessages[index].media,
+                              attachment: {
+                                ...updatedMessages[index].attachment,
                                sourceType,
                                data: newValue,
                              },
--- a/apps/sim/blocks/blocks/agent.ts
+++ b/apps/sim/blocks/blocks/agent.ts
@@ -100,7 +100,7 @@ Current messages: {context}
 RULES:
 1. Generate ONLY a valid JSON array - no markdown, no explanations
 2. Each message object must have "role" and "content" properties
-3. Valid roles are: "system", "user", "assistant", "media"
+3. Valid roles are: "system", "user", "assistant", "attachment"
 4. Content can be as long as necessary - don't truncate
 5. If editing existing messages, preserve structure unless asked to change it
 6. For new agents, create DETAILED, PROFESSIONAL system prompts that include:
@@ -110,15 +110,15 @@ RULES:
   - Critical thinking or quality guidelines
   - How to handle edge cases and uncertainty

-MEDIA MESSAGES:
- Use role "media" to include images, audio, video, or documents in a multimodal conversation
- IMPORTANT: If a media message in the current context has a "media" object with file data, ALWAYS preserve that entire "media" object exactly as-is
- When creating NEW media messages, you can either:
-  1. Just set role to "media" with descriptive content - user will upload the file manually
-  2. Select a file from the available workspace files by including "fileId" in the media object (optional)
+ATTACHMENTS:
+- Use role "attachment" to include images, audio, video, or documents in a multimodal conversation
+- IMPORTANT: If an attachment message in the current context has an "attachment" object with file data, ALWAYS preserve that entire "attachment" object exactly as-is
+- When creating NEW attachment messages, you can either:
+  1. Just set role to "attachment" with descriptive content - user will upload the file manually
+  2. Select a file from the available workspace files by including "fileId" in the attachment object (optional)
 - You do NOT have to select a file - it's completely optional
- Example without file: {"role": "media", "content": "Analyze this image for text and objects"}
- Example with file selection: {"role": "media", "content": "Analyze this image", "media": {"fileId": "abc123"}}
+- Example without file: {"role": "attachment", "content": "Analyze this image for text and objects"}
+- Example with file selection: {"role": "attachment", "content": "Analyze this image", "attachment": {"fileId": "abc123"}}

 EXAMPLES:

@@ -129,7 +129,7 @@ Code reviewer:
 [{"role": "system", "content": "You are a Senior Code Reviewer with expertise in software architecture, security, and best practices. Your role is to provide thorough, constructive code reviews that improve code quality and help developers grow.\\n\\n## Review Methodology\\n\\n1. **Security First**: Check for vulnerabilities including injection attacks, authentication flaws, data exposure, and insecure dependencies.\\n\\n2. **Code Quality**: Evaluate readability, maintainability, adherence to DRY/SOLID principles, and appropriate abstraction levels.\\n\\n3. **Performance**: Identify potential bottlenecks, unnecessary computations, memory leaks, and optimization opportunities.\\n\\n4. **Testing**: Assess test coverage, edge case handling, and testability of the code structure.\\n\\n## Output Format\\n\\n### Summary\\nBrief overview of the code's purpose and overall assessment.\\n\\n### Critical Issues\\nSecurity vulnerabilities or bugs that must be fixed before merging.\\n\\n### Improvements\\nSuggested enhancements with clear explanations of why and how.\\n\\n### Positive Aspects\\nHighlight well-written code to reinforce good practices.\\n\\nBe specific with line references. Provide code examples for suggested changes. Balance critique with encouragement."}, {"role": "user", "content": "<start.input>"}]

 Image analysis agent:
-[{"role": "system", "content": "You are an expert image analyst. Describe images in detail, identify objects, text, and patterns. Provide structured analysis."}, {"role": "media", "content": "Analyze this image"}]
+[{"role": "system", "content": "You are an expert image analyst. Describe images in detail, identify objects, text, and patterns. Provide structured analysis."}, {"role": "attachment", "content": "Analyze this image"}]

 Return ONLY the JSON array.`,
        placeholder: 'Describe what you want to create or change...',
--- a/apps/sim/executor/handlers/agent/agent-handler.ts
+++ b/apps/sim/executor/handlers/agent/agent-handler.ts
@@ -3,8 +3,6 @@ import { account, mcpServers } from '@sim/db/schema'
 import { createLogger } from '@sim/logger'
 import { and, eq, inArray, isNull } from 'drizzle-orm'
 import { createMcpToolId } from '@/lib/mcp/utils'
-import { bufferToBase64 } from '@/lib/uploads/utils/file-utils'
-import { downloadFileFromUrl } from '@/lib/uploads/utils/file-utils.server'
 import { refreshTokenIfNeeded } from '@/app/api/auth/oauth/utils'
 import { getAllBlocks } from '@/blocks'
 import type { BlockOutput } from '@/blocks/types'
@@ -27,6 +25,8 @@ import {
  validateModelProvider,
 } from '@/executor/utils/permission-check'
 import { executeProviderRequest } from '@/providers'
+import { transformAttachmentMessages } from '@/providers/attachment'
+import type { ProviderId } from '@/providers/types'
 import { getProviderFromModel, transformBlockTool } from '@/providers/utils'
 import type { SerializedBlock } from '@/serializer/types'
 import { executeTool } from '@/tools'
@@ -62,9 +62,12 @@ export class AgentBlockHandler implements BlockHandler {
    const streamingConfig = this.getStreamingConfig(ctx, block)
    const rawMessages = await this.buildMessages(ctx, filteredInputs)

-    // Transform media messages to provider-specific format (async for file fetching)
+    // Transform attachment messages to provider-specific format (async for file fetching)
    const messages = rawMessages
-      ? await this.transformMediaMessages(rawMessages, providerId, ctx)
+      ? await transformAttachmentMessages(rawMessages, {
+          providerId: providerId as ProviderId,
+          model,
+        })
      : undefined

    const providerRequest = this.buildProviderRequest({
@@ -848,445 +851,11 @@ export class AgentBlockHandler implements BlockHandler {
        'role' in m &&
        'content' in m &&
        typeof m.role === 'string' &&
-        ['system', 'user', 'assistant', 'media'].includes(m.role)
+        ['system', 'user', 'assistant', 'attachment'].includes(m.role)
      )
    })
  }

-  /**
-   * Transforms messages with 'media' role into provider-compatible format.
-   * Media messages are merged with the preceding or following user message,
-   * or converted to a user message with multimodal content.
-   */
-  private async transformMediaMessages(
-    messages: Message[],
-    providerId: string,
-    ctx: ExecutionContext
-  ): Promise<Message[]> {
-    const result: Message[] = []
-
-    for (let i = 0; i < messages.length; i++) {
-      const msg = messages[i]
-
-      if (msg.role !== 'media') {
-        result.push(msg)
-        continue
-      }
-
-      // Media message - transform based on provider (async for file fetching)
-      const mediaContent = await this.createProviderMediaContent(msg, providerId, ctx)
-      if (!mediaContent) {
-        logger.warn('Could not create media content for message', { msg })
-        continue
-      }
-
-      // Check if we should merge with the previous user message
-      const lastMessage = result[result.length - 1]
-      if (lastMessage && lastMessage.role === 'user') {
-        // Merge media into the previous user message's content array
-        const existingContent = this.ensureContentArray(lastMessage, providerId)
-        existingContent.push(mediaContent)
-        lastMessage.content = existingContent as any
-      } else {
-        // Create a new user message with the media content
-        result.push({
-          role: 'user',
-          content: [mediaContent] as any,
-        })
-      }
-    }
-
-    // Post-process: ensure all user messages have consistent content format
-    return result.map((msg) => {
-      if (msg.role === 'user' && typeof msg.content === 'string') {
-        // Convert string content to provider-specific text format (wrapped in array for multimodal)
-        return {
-          ...msg,
-          content: [this.createTextContent(msg.content, providerId)] as any,
-        }
-      }
-      return msg
-    })
-  }
-
-  /**
-   * Ensures a user message has content as an array for multimodal support
-   */
-  private ensureContentArray(msg: Message, providerId: string): any[] {
-    if (Array.isArray(msg.content)) {
-      return msg.content
-    }
-    if (typeof msg.content === 'string' && msg.content) {
-      return [this.createTextContent(msg.content, providerId)]
-    }
-    return []
-  }
-
-  /**
-   * Creates provider-specific text content block
-   */
-  private createTextContent(text: string, providerId: string): any {
-    switch (providerId) {
-      case 'google':
-      case 'vertex':
-        return { text }
-      case 'anthropic':
-        return { type: 'text', text }
-      default:
-        // OpenAI format (used by most providers)
-        return { type: 'text', text }
-    }
-  }
-
-  /**
-   * Creates provider-specific media content from a media message
-   */
-  private async createProviderMediaContent(
-    msg: Message,
-    providerId: string,
-    ctx: ExecutionContext
-  ): Promise<any> {
-    const media = msg.media
-    if (!media) return null
-
-    const { sourceType, data, mimeType } = media
-
-    // Validate data is not empty
-    if (!data || !data.trim()) {
-      logger.warn('Empty media data, skipping media content')
-      return null
-    }
-
-    // Validate URL format if sourceType is URL
-    if (sourceType === 'url' || sourceType === 'file') {
-      const trimmedData = data.trim()
-      // Must start with http://, https://, or / (relative path for workspace files)
-      if (
-        !trimmedData.startsWith('http://') &&
-        !trimmedData.startsWith('https://') &&
-        !trimmedData.startsWith('/')
-      ) {
-        logger.warn('Invalid URL format for media content', { data: trimmedData.substring(0, 50) })
-        // Try to salvage by treating as text
-        return { type: 'text', text: `[Invalid media URL: ${trimmedData.substring(0, 30)}...]` }
-      }
-    }
-
-    // Validate base64 format
-    if (sourceType === 'base64') {
-      const trimmedData = data.trim()
-      // Should be a data URL or raw base64
-      if (
-        !trimmedData.startsWith('data:') &&
-        !/^[A-Za-z0-9+/]+=*$/.test(trimmedData.replace(/\s/g, ''))
-      ) {
-        logger.warn('Invalid base64 format for media content', {
-          data: trimmedData.substring(0, 50),
-        })
-        return { type: 'text', text: `[Invalid base64 data]` }
-      }
-    }
-
-    switch (providerId) {
-      case 'anthropic':
-        return this.createAnthropicMediaContent(sourceType, data, mimeType, ctx)
-
-      case 'google':
-      case 'vertex':
-        return this.createGeminiMediaContent(sourceType, data, mimeType)
-
-      case 'mistral':
-        return this.createMistralMediaContent(sourceType, data, mimeType)
-
-      case 'bedrock':
-        return this.createBedrockMediaContent(sourceType, data, mimeType)
-
-      default:
-        // OpenAI format (used by OpenAI, Azure, xAI, Groq, etc.)
-        return this.createOpenAIMediaContent(sourceType, data, mimeType)
-    }
-  }
-
-  /**
-   * Creates OpenAI-compatible media content
-   */
-  private createOpenAIMediaContent(sourceType: string, data: string, mimeType?: string): any {
-    const isImage = mimeType?.startsWith('image/')
-    const isAudio = mimeType?.startsWith('audio/')
-    // Treat 'file' as 'url' since workspace files are served via URL
-    const isUrl = sourceType === 'url' || sourceType === 'file'
-
-    if (isImage) {
-      return {
-        type: 'image_url',
-        image_url: { url: data, detail: 'auto' },
-      }
-    }
-
-    if (isAudio) {
-      const base64Data = data.includes(',') ? data.split(',')[1] : data
-      return {
-        type: 'input_audio',
-        input_audio: {
-          data: base64Data,
-          format: mimeType === 'audio/wav' ? 'wav' : 'mp3',
-        },
-      }
-    }
-
-    // For documents/files, include as URL
-    if (sourceType === 'url') {
-      return {
-        type: 'file',
-        file: { url: data },
-      }
-    }
-
-    // Base64 file - some providers may not support this directly
-    logger.warn('Base64 file content may not be supported by this provider')
-    return {
-      type: 'text',
-      text: `[File: ${mimeType || 'unknown type'}]`,
-    }
-  }
-
-  /**
-   * Creates Anthropic-compatible media content
-   * Anthropic requires base64 for internal/relative URLs since they can't fetch them
-   */
-  private async createAnthropicMediaContent(
-    sourceType: string,
-    data: string,
-    mimeType?: string,
-    ctx?: ExecutionContext
-  ): Promise<any> {
-    const isImage = mimeType?.startsWith('image/')
-    const isPdf = mimeType === 'application/pdf'
-    const isInternalUrl = data.startsWith('/')
-    const isExternalHttps = data.startsWith('https://')
-
-    // For internal URLs (workspace files), fetch and convert to base64
-    // Anthropic only supports external HTTPS URLs, not relative paths
-    if ((sourceType === 'url' || sourceType === 'file') && isInternalUrl) {
-      try {
-        logger.info('Fetching internal file for Anthropic base64 conversion', {
-          path: data.substring(0, 50),
-        })
-        const buffer = await downloadFileFromUrl(data)
-        const base64Data = bufferToBase64(buffer)
-
-        if (isImage) {
-          return {
-            type: 'image',
-            source: {
-              type: 'base64',
-              media_type: mimeType || 'image/png',
-              data: base64Data,
-            },
-          }
-        }
-
-        if (isPdf) {
-          return {
-            type: 'document',
-            source: {
-              type: 'base64',
-              media_type: 'application/pdf',
-              data: base64Data,
-            },
-          }
-        }
-
-        // Other file types - return as text fallback
-        return {
-          type: 'text',
-          text: `[File: ${mimeType || 'unknown type'}]`,
-        }
-      } catch (error) {
-        logger.error('Failed to fetch file for Anthropic', { error, path: data.substring(0, 50) })
-        return {
-          type: 'text',
-          text: `[Failed to load file: ${mimeType || 'unknown type'}]`,
-        }
-      }
-    }
-
-    // For external HTTPS URLs, Anthropic can fetch them directly
-    if ((sourceType === 'url' || sourceType === 'file') && isExternalHttps) {
-      if (isImage) {
-        return {
-          type: 'image',
-          source: { type: 'url', url: data },
-        }
-      }
-      if (isPdf) {
-        return {
-          type: 'document',
-          source: { type: 'url', url: data },
-        }
-      }
-    }
-
-    // Already base64 encoded
-    if (sourceType === 'base64') {
-      const base64Data = data.includes(',') ? data.split(',')[1] : data
-      if (isImage) {
-        return {
-          type: 'image',
-          source: {
-            type: 'base64',
-            media_type: mimeType || 'image/png',
-            data: base64Data,
-          },
-        }
-      }
-      if (isPdf) {
-        return {
-          type: 'document',
-          source: {
-            type: 'base64',
-            media_type: 'application/pdf',
-            data: base64Data,
-          },
-        }
-      }
-    }
-
-    // Fallback for unsupported types
-    return {
-      type: 'text',
-      text: `[File: ${mimeType || 'unknown type'}]`,
-    }
-  }
-
-  /**
-   * Creates Google Gemini-compatible media content
-   */
-  private createGeminiMediaContent(sourceType: string, data: string, mimeType?: string): any {
-    // Treat 'file' as 'url' since workspace files are served via URL
-    const isUrl = sourceType === 'url' || sourceType === 'file'
-
-    if (isUrl) {
-      return {
-        fileData: {
-          mimeType: mimeType || 'application/octet-stream',
-          fileUri: data,
-        },
-      }
-    }
-
-    // base64
-    const base64Data = data.includes(',') ? data.split(',')[1] : data
-    return {
-      inlineData: {
-        mimeType: mimeType || 'application/octet-stream',
-        data: base64Data,
-      },
-    }
-  }
-
-  /**
-   * Creates Mistral-compatible media content
-   * Note: Mistral uses a simplified format where image_url is a direct string,
-   * NOT a nested object like OpenAI
-   */
-  private createMistralMediaContent(sourceType: string, data: string, mimeType?: string): any {
-    const isImage = mimeType?.startsWith('image/')
-    // Treat 'file' as 'url' since workspace files are served via URL
-    const isUrl = sourceType === 'url' || sourceType === 'file'
-
-    if (isImage) {
-      if (isUrl) {
-        // Mistral uses direct string for image_url, not nested object
-        return {
-          type: 'image_url',
-          image_url: data,
-        }
-      }
-      // Base64 - Mistral accepts data URLs directly
-      const base64Data = data.includes(',')
-        ? data
-        : `data:${mimeType || 'image/png'};base64,${data}`
-      return {
-        type: 'image_url',
-        image_url: base64Data,
-      }
-    }
-
-    // Fallback for non-image types
-    return {
-      type: 'text',
-      text: `[File: ${mimeType || 'unknown type'}]`,
-    }
-  }
-
-  /**
-   * Creates AWS Bedrock Converse API-compatible media content
-   * Bedrock uses a different structure: { image: { format, source: { bytes } } }
-   * Note: The actual bytes conversion happens in the provider layer
-   */
-  private createBedrockMediaContent(sourceType: string, data: string, mimeType?: string): any {
-    const isImage = mimeType?.startsWith('image/')
-    // Treat 'file' as 'url' since workspace files are served via URL
-    const isUrl = sourceType === 'url' || sourceType === 'file'
-
-    // Determine format from mimeType
-    const getFormat = (mime?: string): string => {
-      if (!mime) return 'png'
-      if (mime.includes('jpeg') || mime.includes('jpg')) return 'jpeg'
-      if (mime.includes('png')) return 'png'
-      if (mime.includes('gif')) return 'gif'
-      if (mime.includes('webp')) return 'webp'
-      return 'png'
-    }
-
-    if (isImage) {
-      if (isUrl) {
-        // For URLs, Bedrock needs S3 URIs or we need to fetch and convert
-        // Mark this for the provider layer to handle
-        return {
-          type: 'bedrock_image',
-          format: getFormat(mimeType),
-          sourceType: 'url',
-          url: data,
-        }
-      }
-      // Base64 - extract raw base64 data
-      const base64Data = data.includes(',') ? data.split(',')[1] : data
-      return {
-        type: 'bedrock_image',
-        format: getFormat(mimeType),
-        sourceType: 'base64',
-        data: base64Data,
-      }
-    }
-
-    // Documents (PDFs) - Bedrock supports document content type
-    if (mimeType === 'application/pdf') {
-      if (isUrl) {
-        return {
-          type: 'bedrock_document',
-          format: 'pdf',
-          sourceType: 'url',
-          url: data,
-        }
-      }
-      const base64Data = data.includes(',') ? data.split(',')[1] : data
-      return {
-        type: 'bedrock_document',
-        format: 'pdf',
-        sourceType: 'base64',
-        data: base64Data,
-      }
-    }
-
-    // Fallback for unsupported types
-    return {
-      type: 'text',
-      text: `[File: ${mimeType || 'unknown type'}]`,
-    }
-  }
-
  private processMemories(memories: any): Message[] {
    if (!memories) return []

--- a/apps/sim/executor/handlers/agent/types.ts
+++ b/apps/sim/executor/handlers/agent/types.ts
@@ -43,9 +43,9 @@ export interface ToolInput {
 }

 /**
- * Media content for multimodal messages
+ * Attachment content (files, images, documents)
 */
-export interface MediaContent {
+export interface AttachmentContent {
  /** Source type: how the data was provided */
  sourceType: 'url' | 'base64' | 'file'
  /** The URL or base64 data */
@@ -57,10 +57,10 @@ export interface MediaContent {
 }

 export interface Message {
-  role: 'system' | 'user' | 'assistant' | 'media'
+  role: 'system' | 'user' | 'assistant' | 'attachment'
  content: string
-  /** Media content for 'media' role messages */
-  media?: MediaContent
+  /** Attachment content for 'attachment' role messages */
+  attachment?: AttachmentContent
  executionId?: string
  function_call?: any
  tool_calls?: any[]
--- a/apps/sim/providers/anthropic/index.ts
+++ b/apps/sim/providers/anthropic/index.ts
@@ -109,7 +109,7 @@ export const anthropicProvider: ProviderConfig = {
            ],
          })
        } else {
-          // Handle content that's already in array format (from transformMediaMessages)
+          // Handle content that's already in array format (from transformAttachmentMessages)
          const content = Array.isArray(msg.content)
            ? msg.content
            : msg.content
--- a/apps/sim/providers/attachment.ts
+++ b/apps/sim/providers/attachment.ts
@@ -0,0 +1,397 @@
+/**
+ * Centralized attachment content transformation for all providers.
+ *
+ * Strategy: Always normalize to base64 first, then create provider-specific formats.
+ * This eliminates URL accessibility issues and simplifies provider handling.
+ */
+
+import { createLogger } from '@sim/logger'
+import { bufferToBase64 } from '@/lib/uploads/utils/file-utils'
+import { downloadFileFromUrl } from '@/lib/uploads/utils/file-utils.server'
+import { supportsVision } from '@/providers/models'
+import type { ProviderId } from '@/providers/types'
+
+const logger = createLogger('AttachmentTransformer')
+
+/**
+ * Generic message type for attachment transformation.
+ */
+interface TransformableMessage {
+  role: string
+  content: string | any[] | null
+  attachment?: AttachmentContent
+  [key: string]: any
+}
+
+/**
+ * Attachment content (files, images, documents)
+ */
+export interface AttachmentContent {
+  sourceType: 'url' | 'base64' | 'file'
+  data: string
+  mimeType?: string
+  fileName?: string
+}
+
+/**
+ * Normalized attachment data (always base64)
+ */
+interface NormalizedAttachment {
+  base64: string
+  mimeType: string
+}
+
+/**
+ * Configuration for attachment transformation
+ */
+interface AttachmentTransformConfig {
+  providerId: ProviderId
+  model: string
+}
+
+/**
+ * Checks if a model supports attachments (vision/multimodal content).
+ */
+export function modelSupportsAttachments(model: string): boolean {
+  return supportsVision(model)
+}
+
+/**
+ * Transforms messages with 'attachment' role into provider-compatible format.
+ */
+export async function transformAttachmentMessages<T extends TransformableMessage>(
+  messages: T[],
+  config: AttachmentTransformConfig
+): Promise<T[]> {
+  const { providerId, model } = config
+  const supportsAttachments = modelSupportsAttachments(model)
+
+  if (!supportsAttachments) {
+    return transformAttachmentsToText(messages) as T[]
+  }
+
+  const result: T[] = []
+
+  for (const msg of messages) {
+    if (msg.role !== 'attachment') {
+      result.push(msg)
+      continue
+    }
+
+    const attachmentContent = await createProviderAttachmentContent(msg, providerId)
+    if (!attachmentContent) {
+      logger.warn('Could not create attachment content for message', { msg })
+      continue
+    }
+
+    // Merge with previous user message or create new one
+    const lastMessage = result[result.length - 1]
+    if (lastMessage && lastMessage.role === 'user') {
+      const existingContent = ensureContentArray(lastMessage, providerId)
+      existingContent.push(attachmentContent)
+      lastMessage.content = existingContent as any
+    } else {
+      result.push({
+        role: 'user',
+        content: [attachmentContent] as any,
+      } as T)
+    }
+  }
+
+  // Ensure all user messages have consistent content format
+  return result.map((msg) => {
+    if (msg.role === 'user' && typeof msg.content === 'string') {
+      return {
+        ...msg,
+        content: [createTextContent(msg.content, providerId)] as any,
+      }
+    }
+    return msg
+  })
+}
+
+/**
+ * Transforms attachment messages to text placeholders for non-vision models
+ */
+function transformAttachmentsToText<T extends TransformableMessage>(messages: T[]): T[] {
+  const result: T[] = []
+
+  for (const msg of messages) {
+    if (msg.role !== 'attachment') {
+      result.push(msg)
+      continue
+    }
+
+    const attachment = msg.attachment
+    const mimeType = attachment?.mimeType || 'unknown type'
+    const fileName = attachment?.fileName || 'file'
+
+    const lastMessage = result[result.length - 1]
+    if (lastMessage && lastMessage.role === 'user') {
+      const currentContent = typeof lastMessage.content === 'string' ? lastMessage.content : ''
+      lastMessage.content = `${currentContent}\n[Attached file: ${fileName} (${mimeType}) - Note: This model does not support file/image inputs]`
+    } else {
+      result.push({
+        role: 'user',
+        content: `[Attached file: ${fileName} (${mimeType}) - Note: This model does not support file/image inputs]`,
+      } as T)
+    }
+  }
+
+  return result
+}
+
+/**
+ * Ensures a user message has content as an array for multimodal support
+ */
+function ensureContentArray(msg: TransformableMessage, providerId: ProviderId): any[] {
+  if (Array.isArray(msg.content)) {
+    return msg.content
+  }
+  if (typeof msg.content === 'string' && msg.content) {
+    return [createTextContent(msg.content, providerId)]
+  }
+  return []
+}
+
+/**
+ * Creates provider-specific text content block
+ */
+export function createTextContent(text: string, providerId: ProviderId): any {
+  switch (providerId) {
+    case 'google':
+    case 'vertex':
+      return { text }
+    default:
+      return { type: 'text', text }
+  }
+}
+
+/**
+ * Normalizes attachment data to base64.
+ * Fetches URLs and converts to base64, extracts base64 from data URLs.
+ */
+async function normalizeToBase64(
+  attachment: AttachmentContent
+): Promise<NormalizedAttachment | null> {
+  const { sourceType, data, mimeType } = attachment
+
+  if (!data || !data.trim()) {
+    logger.warn('Empty attachment data')
+    return null
+  }
+
+  const trimmedData = data.trim()
+
+  // Already base64
+  if (sourceType === 'base64') {
+    // Handle data URL format: data:mime;base64,xxx
+    if (trimmedData.startsWith('data:')) {
+      const match = trimmedData.match(/^data:([^;]+);base64,(.+)$/)
+      if (match) {
+        return { base64: match[2], mimeType: match[1] }
+      }
+    }
+    // Raw base64
+    return { base64: trimmedData, mimeType: mimeType || 'application/octet-stream' }
+  }
+
+  // URL or file path - need to fetch
+  if (sourceType === 'url' || sourceType === 'file') {
+    try {
+      logger.info('Fetching attachment for base64 conversion', {
+        url: trimmedData.substring(0, 50),
+      })
+      const buffer = await downloadFileFromUrl(trimmedData)
+      const base64 = bufferToBase64(buffer)
+      return { base64, mimeType: mimeType || 'application/octet-stream' }
+    } catch (error) {
+      logger.error('Failed to fetch attachment', { error, url: trimmedData.substring(0, 50) })
+      return null
+    }
+  }
+
+  return null
+}
+
+/**
+ * Creates provider-specific attachment content from an attachment message.
+ * First normalizes to base64, then creates the provider format.
+ */
+async function createProviderAttachmentContent(
+  msg: TransformableMessage,
+  providerId: ProviderId
+): Promise<any> {
+  const attachment = msg.attachment
+  if (!attachment) return null
+
+  // Normalize to base64 first
+  const normalized = await normalizeToBase64(attachment)
+  if (!normalized) {
+    return createTextContent('[Failed to load attachment]', providerId)
+  }
+
+  const { base64, mimeType } = normalized
+
+  switch (providerId) {
+    case 'anthropic':
+      return createAnthropicContent(base64, mimeType)
+
+    case 'google':
+    case 'vertex':
+      return createGeminiContent(base64, mimeType)
+
+    case 'mistral':
+      return createMistralContent(base64, mimeType)
+
+    case 'bedrock':
+      return createBedrockContent(base64, mimeType)
+
+    default:
+      // OpenAI format (OpenAI, Azure, xAI, DeepSeek, Cerebras, Groq, OpenRouter, Ollama, vLLM)
+      return createOpenAIContent(base64, mimeType)
+  }
+}
+
+/**
+ * OpenAI-compatible content (images only via base64 data URL)
+ */
+function createOpenAIContent(base64: string, mimeType: string): any {
+  const isImage = mimeType.startsWith('image/')
+  const isAudio = mimeType.startsWith('audio/')
+
+  if (isImage) {
+    return {
+      type: 'image_url',
+      image_url: {
+        url: `data:${mimeType};base64,${base64}`,
+        detail: 'auto',
+      },
+    }
+  }
+
+  if (isAudio) {
+    return {
+      type: 'input_audio',
+      input_audio: {
+        data: base64,
+        format: mimeType === 'audio/wav' ? 'wav' : 'mp3',
+      },
+    }
+  }
+
+  // OpenAI Chat API doesn't support other file types directly
+  // For PDFs/docs, return a text placeholder
+  logger.warn(`OpenAI does not support ${mimeType} attachments in Chat API`)
+  return {
+    type: 'text',
+    text: `[Attached file: ${mimeType} - OpenAI Chat API only supports images and audio]`,
+  }
+}
+
+/**
+ * Anthropic-compatible content (images and PDFs)
+ */
+function createAnthropicContent(base64: string, mimeType: string): any {
+  const isImage = mimeType.startsWith('image/')
+  const isPdf = mimeType === 'application/pdf'
+
+  if (isImage) {
+    return {
+      type: 'image',
+      source: {
+        type: 'base64',
+        media_type: mimeType,
+        data: base64,
+      },
+    }
+  }
+
+  if (isPdf) {
+    return {
+      type: 'document',
+      source: {
+        type: 'base64',
+        media_type: 'application/pdf',
+        data: base64,
+      },
+    }
+  }
+
+  return {
+    type: 'text',
+    text: `[Attached file: ${mimeType} - Anthropic supports images and PDFs only]`,
+  }
+}
+
+/**
+ * Google Gemini-compatible content (inlineData format)
+ */
+function createGeminiContent(base64: string, mimeType: string): any {
+  // Gemini supports a wide range of file types via inlineData
+  return {
+    inlineData: {
+      mimeType,
+      data: base64,
+    },
+  }
+}
+
+/**
+ * Mistral-compatible content (images only, data URL format)
+ */
+function createMistralContent(base64: string, mimeType: string): any {
+  const isImage = mimeType.startsWith('image/')
+
+  if (isImage) {
+    // Mistral uses direct string for image_url, not nested object
+    return {
+      type: 'image_url',
+      image_url: `data:${mimeType};base64,${base64}`,
+    }
+  }
+
+  return {
+    type: 'text',
+    text: `[Attached file: ${mimeType} - Mistral supports images only]`,
+  }
+}
+
+/**
+ * AWS Bedrock-compatible content (images and PDFs)
+ */
+function createBedrockContent(base64: string, mimeType: string): any {
+  const isImage = mimeType.startsWith('image/')
+  const isPdf = mimeType === 'application/pdf'
+
+  // Determine image format from mimeType
+  const getImageFormat = (mime: string): string => {
+    if (mime.includes('jpeg') || mime.includes('jpg')) return 'jpeg'
+    if (mime.includes('png')) return 'png'
+    if (mime.includes('gif')) return 'gif'
+    if (mime.includes('webp')) return 'webp'
+    return 'png'
+  }
+
+  if (isImage) {
+    // Return a marker object that the Bedrock provider will convert to proper format
+    return {
+      type: 'bedrock_image',
+      format: getImageFormat(mimeType),
+      data: base64,
+    }
+  }
+
+  if (isPdf) {
+    return {
+      type: 'bedrock_document',
+      format: 'pdf',
+      data: base64,
+    }
+  }
+
+  return {
+    type: 'text',
+    text: `[Attached file: ${mimeType} - Bedrock supports images and PDFs only]`,
+  }
+}
--- a/apps/sim/providers/google/utils.ts
+++ b/apps/sim/providers/google/utils.ts
@@ -72,6 +72,75 @@ export function cleanSchemaForGemini(schema: SchemaUnion): SchemaUnion {
  return cleanedSchema
 }

+/**
+ * Converts an array of content items to Gemini-compatible Part array.
+ * Handles various formats from the attachment transformer.
+ */
+function convertContentArrayToGeminiParts(contentArray: any[]): Part[] {
+  const parts: Part[] = []
+
+  for (const item of contentArray) {
+    if (!item) continue
+
+    // Gemini-native text format: { text: "..." }
+    if (typeof item.text === 'string') {
+      parts.push({ text: item.text })
+      continue
+    }
+
+    // OpenAI-style text: { type: 'text', text: '...' }
+    if (item.type === 'text' && typeof item.text === 'string') {
+      parts.push({ text: item.text })
+      continue
+    }
+
+    // Gemini-native inlineData format (from attachment transformer)
+    if (item.inlineData) {
+      parts.push({ inlineData: item.inlineData })
+      continue
+    }
+
+    // Gemini-native fileData format (from attachment transformer)
+    if (item.fileData) {
+      parts.push({ fileData: item.fileData })
+      continue
+    }
+
+    // OpenAI-style image_url - convert to Gemini format
+    if (item.type === 'image_url' && item.image_url) {
+      const url = typeof item.image_url === 'string' ? item.image_url : item.image_url?.url
+      if (url) {
+        // Check if it's a data URL (base64)
+        if (url.startsWith('data:')) {
+          const match = url.match(/^data:([^;]+);base64,(.+)$/)
+          if (match) {
+            parts.push({
+              inlineData: {
+                mimeType: match[1],
+                data: match[2],
+              },
+            })
+          }
+        } else {
+          // External URL
+          parts.push({
+            fileData: {
+              mimeType: 'image/jpeg', // Default, Gemini will detect actual type
+              fileUri: url,
+            },
+          })
+        }
+      }
+      continue
+    }
+
+    // Unknown type - log warning
+    logger.warn('Unknown content item type in Gemini conversion:', { type: item.type })
+  }
+
+  return parts
+}
+
 /**
 * Extracts text content from a Gemini response candidate.
 * Filters out thought parts (model reasoning) from the output.
@@ -180,7 +249,13 @@ export function convertToGeminiFormat(request: ProviderRequest): {
      } else if (message.role === 'user' || message.role === 'assistant') {
        const geminiRole = message.role === 'user' ? 'user' : 'model'

-        if (message.content) {
+        // Handle multimodal content (arrays with text/image/file parts)
+        if (Array.isArray(message.content)) {
+          const parts: Part[] = convertContentArrayToGeminiParts(message.content)
+          if (parts.length > 0) {
+            contents.push({ role: geminiRole, parts })
+          }
+        } else if (message.content) {
          contents.push({ role: geminiRole, parts: [{ text: message.content }] })
        }

--- a/apps/sim/providers/types.ts
+++ b/apps/sim/providers/types.ts
@@ -112,9 +112,9 @@ export interface ProviderToolConfig {
 }

 /**
- * Media content for multimodal messages
+ * Attachment content (files, images, documents)
 */
-export interface MediaContent {
+export interface AttachmentContent {
  /** Source type: how the data was provided */
  sourceType: 'url' | 'base64' | 'file'
  /** The URL or base64 data */
@@ -126,10 +126,10 @@ export interface MediaContent {
 }

 export interface Message {
-  role: 'system' | 'user' | 'assistant' | 'function' | 'tool' | 'media'
+  role: 'system' | 'user' | 'assistant' | 'function' | 'tool' | 'attachment'
  content: string | null
-  /** Media content for 'media' role messages */
-  media?: MediaContent
+  /** Attachment content for 'attachment' role messages */
+  attachment?: AttachmentContent
  name?: string
  function_call?: {
    name: string