Use b64

Works
Fix media
2026-01-30 09:18:01 -05:00 · 2026-01-29 18:10:47 -08:00 · 2026-01-29 17:35:34 -08:00 · 2026-01-29 17:20:38 -08:00 · 2026-01-29 17:19:29 -08:00 · 2026-01-29 13:19:48 -08:00
14 changed files with 1404 additions and 245 deletions
--- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/messages-input/messages-input.tsx
+++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/messages-input/messages-input.tsx
@@ -7,13 +7,24 @@ import {
  useRef,
  useState,
 } from 'react'
+import { createLogger } from '@sim/logger'
 import { isEqual } from 'lodash'
-import { ChevronDown, ChevronsUpDown, ChevronUp, Plus } from 'lucide-react'
-import { Button, Popover, PopoverContent, PopoverItem, PopoverTrigger } from '@/components/emcn'
+import { ArrowLeftRight, ChevronDown, ChevronsUpDown, ChevronUp, Plus } from 'lucide-react'
+import { useParams } from 'next/navigation'
+import {
+  Button,
+  Popover,
+  PopoverContent,
+  PopoverItem,
+  PopoverTrigger,
+  Tooltip,
+} from '@/components/emcn'
 import { Trash } from '@/components/emcn/icons/trash'
 import { cn } from '@/lib/core/utils/cn'
 import { EnvVarDropdown } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/env-var-dropdown'
+import { FileUpload } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/file-upload/file-upload'
 import { formatDisplayText } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/formatted-text'
+import { ShortInput } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/short-input/short-input'
 import { TagDropdown } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/tag-dropdown/tag-dropdown'
 import { useSubBlockInput } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/hooks/use-sub-block-input'
 import { useSubBlockValue } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/hooks/use-sub-block-value'
@@ -21,19 +32,32 @@ import type { WandControlHandlers } from '@/app/workspace/[workspaceId]/w/[workf
 import { useAccessibleReferencePrefixes } from '@/app/workspace/[workspaceId]/w/[workflowId]/hooks/use-accessible-reference-prefixes'
 import { useWand } from '@/app/workspace/[workspaceId]/w/[workflowId]/hooks/use-wand'
 import type { SubBlockConfig } from '@/blocks/types'
+import { supportsVision } from '@/providers/utils'
+import { useWorkflowRegistry } from '@/stores/workflows/registry/store'
+import { useSubBlockStore } from '@/stores/workflows/subblock/store'
+
+const logger = createLogger('MessagesInput')

 const MIN_TEXTAREA_HEIGHT_PX = 80
+
+/** Workspace file record from API */
+interface WorkspaceFile {
+  id: string
+  name: string
+  path: string
+  type: string
+}
 const MAX_TEXTAREA_HEIGHT_PX = 320

 /** Pattern to match complete message objects in JSON */
 const COMPLETE_MESSAGE_PATTERN =
-  /"role"\s*:\s*"(system|user|assistant)"[^}]*"content"\s*:\s*"((?:[^"\\]|\\.)*)"/g
+  /"role"\s*:\s*"(system|user|assistant|attachment)"[^}]*"content"\s*:\s*"((?:[^"\\]|\\.)*)"/g

 /** Pattern to match incomplete content at end of buffer */
 const INCOMPLETE_CONTENT_PATTERN = /"content"\s*:\s*"((?:[^"\\]|\\.)*)$/

 /** Pattern to match role before content */
-const ROLE_BEFORE_CONTENT_PATTERN = /"role"\s*:\s*"(system|user|assistant)"[^{]*$/
+const ROLE_BEFORE_CONTENT_PATTERN = /"role"\s*:\s*"(system|user|assistant|attachment)"[^{]*$/

 /**
 * Unescapes JSON string content
@@ -41,41 +65,46 @@ const ROLE_BEFORE_CONTENT_PATTERN = /"role"\s*:\s*"(system|user|assistant)"[^{]*
 const unescapeContent = (str: string): string =>
  str.replace(/\\n/g, '\n').replace(/\\"/g, '"').replace(/\\\\/g, '\\')

+/**
+ * Attachment content (files, images, documents)
+ */
+interface AttachmentContent {
+  /** Source type: how the data was provided */
+  sourceType: 'url' | 'base64' | 'file'
+  /** The URL or base64 data */
+  data: string
+  /** MIME type (e.g., 'image/png', 'application/pdf', 'audio/mp3') */
+  mimeType?: string
+  /** Optional filename for file uploads */
+  fileName?: string
+  /** Optional workspace file ID (used by wand to select existing files) */
+  fileId?: string
+}
+
 /**
 * Interface for individual message in the messages array
 */
 interface Message {
-  role: 'system' | 'user' | 'assistant'
+  role: 'system' | 'user' | 'assistant' | 'attachment'
  content: string
+  attachment?: AttachmentContent
 }

 /**
 * Props for the MessagesInput component
 */
 interface MessagesInputProps {
-  /** Unique identifier for the block */
  blockId: string
-  /** Unique identifier for the sub-block */
  subBlockId: string
-  /** Configuration object for the sub-block */
  config: SubBlockConfig
-  /** Whether component is in preview mode */
  isPreview?: boolean
-  /** Value to display in preview mode */
  previewValue?: Message[] | null
-  /** Whether the input is disabled */
  disabled?: boolean
-  /** Ref to expose wand control handlers to parent */
  wandControlRef?: React.MutableRefObject<WandControlHandlers | null>
 }

 /**
 * MessagesInput component for managing LLM message history
- *
- * @remarks
- * - Manages an array of messages with role and content
- * - Each message can be edited, removed, or reordered
- * - Stores data in LLM-compatible format: [{ role, content }]
 */
 export function MessagesInput({
  blockId,
@@ -86,10 +115,163 @@ export function MessagesInput({
  disabled = false,
  wandControlRef,
 }: MessagesInputProps) {
+  const params = useParams()
+  const workspaceId = params?.workspaceId as string
  const [messages, setMessages] = useSubBlockValue<Message[]>(blockId, subBlockId, false)
  const [localMessages, setLocalMessages] = useState<Message[]>([{ role: 'user', content: '' }])
  const accessiblePrefixes = useAccessibleReferencePrefixes(blockId)
  const [openPopoverIndex, setOpenPopoverIndex] = useState<number | null>(null)
+  const { activeWorkflowId } = useWorkflowRegistry()
+
+  // Local attachment mode state - basic = FileUpload, advanced = URL/base64 textarea
+  const [attachmentMode, setAttachmentMode] = useState<'basic' | 'advanced'>('basic')
+
+  // Workspace files for wand context
+  const [workspaceFiles, setWorkspaceFiles] = useState<WorkspaceFile[]>([])
+
+  // Fetch workspace files for wand context
+  const loadWorkspaceFiles = useCallback(async () => {
+    if (!workspaceId || isPreview) return
+
+    try {
+      const response = await fetch(`/api/workspaces/${workspaceId}/files`)
+      const data = await response.json()
+      if (data.success) {
+        setWorkspaceFiles(data.files || [])
+      }
+    } catch (error) {
+      logger.error('Error loading workspace files:', error)
+    }
+  }, [workspaceId, isPreview])
+
+  // Load workspace files on mount
+  useEffect(() => {
+    void loadWorkspaceFiles()
+  }, [loadWorkspaceFiles])
+
+  // Build sources string for wand - available workspace files
+  const sourcesInfo = useMemo(() => {
+    if (workspaceFiles.length === 0) {
+      return 'No workspace files available. The user can upload files manually after generation.'
+    }
+
+    const filesList = workspaceFiles
+      .filter(
+        (f) =>
+          f.type.startsWith('image/') ||
+          f.type.startsWith('audio/') ||
+          f.type.startsWith('video/') ||
+          f.type === 'application/pdf'
+      )
+      .map((f) => `  - id: "${f.id}", name: "${f.name}", type: "${f.type}"`)
+      .join('\n')
+
+    if (!filesList) {
+      return 'No files in workspace. The user can upload files manually after generation.'
+    }
+
+    return `AVAILABLE WORKSPACE FILES (optional - you don't have to select one):\n${filesList}\n\nTo use a file, include "fileId": "<id>" in the attachment object. If not selecting a file, omit the fileId field.`
+  }, [workspaceFiles])
+
+  // Get indices of attachment messages for subscription
+  const attachmentIndices = useMemo(
+    () =>
+      localMessages
+        .map((msg, index) => (msg.role === 'attachment' ? index : -1))
+        .filter((i) => i !== -1),
+    [localMessages]
+  )
+
+  // Subscribe to model value to check vision capability
+  const modelSupportsVision = useSubBlockStore(
+    useCallback(
+      (state) => {
+        if (!activeWorkflowId) return true // Default to allowing attachments
+        const blockValues = state.workflowValues[activeWorkflowId]?.[blockId] ?? {}
+        const modelValue = blockValues.model as string | undefined
+        if (!modelValue) return true // No model selected, allow attachments
+        return supportsVision(modelValue)
+      },
+      [activeWorkflowId, blockId]
+    )
+  )
+
+  // Determine available roles based on model capabilities
+  const availableRoles = useMemo(() => {
+    const baseRoles: Array<'system' | 'user' | 'assistant' | 'attachment'> = [
+      'system',
+      'user',
+      'assistant',
+    ]
+    if (modelSupportsVision) {
+      baseRoles.push('attachment')
+    }
+    return baseRoles
+  }, [modelSupportsVision])
+
+  // Subscribe to file upload values for all attachment messages
+  const fileUploadValues = useSubBlockStore(
+    useCallback(
+      (state) => {
+        if (!activeWorkflowId) return {}
+        const blockValues = state.workflowValues[activeWorkflowId]?.[blockId] ?? {}
+        const result: Record<number, { name: string; path: string; type: string; size: number }> =
+          {}
+        for (const index of attachmentIndices) {
+          const fileUploadKey = `${subBlockId}-attachment-${index}`
+          const fileValue = blockValues[fileUploadKey]
+          if (fileValue && typeof fileValue === 'object' && 'path' in fileValue) {
+            result[index] = fileValue as { name: string; path: string; type: string; size: number }
+          }
+        }
+        return result
+      },
+      [activeWorkflowId, blockId, subBlockId, attachmentIndices]
+    )
+  )
+
+  // Effect to sync FileUpload values to message attachment objects
+  useEffect(() => {
+    if (!activeWorkflowId || isPreview) return
+
+    let hasChanges = false
+    const updatedMessages = localMessages.map((msg, index) => {
+      if (msg.role !== 'attachment') return msg
+
+      const uploadedFile = fileUploadValues[index]
+      if (uploadedFile) {
+        const newAttachment: AttachmentContent = {
+          sourceType: 'file',
+          data: uploadedFile.path,
+          mimeType: uploadedFile.type,
+          fileName: uploadedFile.name,
+        }
+
+        // Only update if different
+        if (
+          msg.attachment?.data !== newAttachment.data ||
+          msg.attachment?.sourceType !== newAttachment.sourceType ||
+          msg.attachment?.mimeType !== newAttachment.mimeType ||
+          msg.attachment?.fileName !== newAttachment.fileName
+        ) {
+          hasChanges = true
+          return {
+            ...msg,
+            content: uploadedFile.name || msg.content,
+            attachment: newAttachment,
+          }
+        }
+      }
+
+      return msg
+    })
+
+    if (hasChanges) {
+      setLocalMessages(updatedMessages)
+      setMessages(updatedMessages)
+    }
+  }, [activeWorkflowId, localMessages, isPreview, setMessages, fileUploadValues])
+
  const subBlockInput = useSubBlockInput({
    blockId,
    subBlockId,
@@ -98,43 +280,40 @@ export function MessagesInput({
    disabled,
  })

-  /**
-   * Gets the current messages as JSON string for wand context
-   */
  const getMessagesJson = useCallback((): string => {
    if (localMessages.length === 0) return ''
-    // Filter out empty messages for cleaner context
    const nonEmptyMessages = localMessages.filter((m) => m.content.trim() !== '')
    if (nonEmptyMessages.length === 0) return ''
    return JSON.stringify(nonEmptyMessages, null, 2)
  }, [localMessages])

-  /**
-   * Streaming buffer for accumulating JSON content
-   */
  const streamBufferRef = useRef<string>('')

-  /**
-   * Parses and validates messages from JSON content
-   */
  const parseMessages = useCallback((content: string): Message[] | null => {
    try {
      const parsed = JSON.parse(content)
      if (Array.isArray(parsed)) {
        const validMessages: Message[] = parsed
          .filter(
-            (m): m is { role: string; content: string } =>
+            (m): m is { role: string; content: string; attachment?: AttachmentContent } =>
              typeof m === 'object' &&
              m !== null &&
              typeof m.role === 'string' &&
              typeof m.content === 'string'
          )
-          .map((m) => ({
-            role: (['system', 'user', 'assistant'].includes(m.role)
+          .map((m) => {
+            const role = ['system', 'user', 'assistant', 'attachment'].includes(m.role)
              ? m.role
-              : 'user') as Message['role'],
-            content: m.content,
-          }))
+              : 'user'
+            const message: Message = {
+              role: role as Message['role'],
+              content: m.content,
+            }
+            if (m.attachment) {
+              message.attachment = m.attachment
+            }
+            return message
+          })
        return validMessages.length > 0 ? validMessages : null
      }
    } catch {
@@ -143,26 +322,19 @@ export function MessagesInput({
    return null
  }, [])

-  /**
-   * Extracts messages from streaming JSON buffer
-   * Uses simple pattern matching for efficiency
-   */
  const extractStreamingMessages = useCallback(
    (buffer: string): Message[] => {
-      // Try complete JSON parse first
      const complete = parseMessages(buffer)
      if (complete) return complete

      const result: Message[] = []

-      // Reset regex lastIndex for global pattern
      COMPLETE_MESSAGE_PATTERN.lastIndex = 0
      let match
      while ((match = COMPLETE_MESSAGE_PATTERN.exec(buffer)) !== null) {
        result.push({ role: match[1] as Message['role'], content: unescapeContent(match[2]) })
      }

-      // Check for incomplete message at end (content still streaming)
      const lastContentIdx = buffer.lastIndexOf('"content"')
      if (lastContentIdx !== -1) {
        const tail = buffer.slice(lastContentIdx)
@@ -172,7 +344,6 @@ export function MessagesInput({
          const roleMatch = head.match(ROLE_BEFORE_CONTENT_PATTERN)
          if (roleMatch) {
            const content = unescapeContent(incomplete[1])
-            // Only add if not duplicate of last complete message
            if (result.length === 0 || result[result.length - 1].content !== content) {
              result.push({ role: roleMatch[1] as Message['role'], content })
            }
@@ -185,12 +356,10 @@ export function MessagesInput({
    [parseMessages]
  )

-  /**
-   * Wand hook for AI-assisted content generation
-   */
  const wandHook = useWand({
    wandConfig: config.wandConfig,
    currentValue: getMessagesJson(),
+    sources: sourcesInfo,
    onStreamStart: () => {
      streamBufferRef.current = ''
      setLocalMessages([{ role: 'system', content: '' }])
@@ -205,10 +374,50 @@ export function MessagesInput({
    onGeneratedContent: (content) => {
      const validMessages = parseMessages(content)
      if (validMessages) {
+        // Process attachment messages - only allow fileId to set files, sanitize other attempts
+        validMessages.forEach((msg, index) => {
+          if (msg.role === 'attachment') {
+            // Check if this is an existing file with valid data (preserve it)
+            const hasExistingFile =
+              msg.attachment?.sourceType === 'file' &&
+              msg.attachment?.data?.startsWith('/api/') &&
+              msg.attachment?.fileName
+
+            if (hasExistingFile) {
+              // Preserve existing file data as-is
+              return
+            }
+
+            // Check if wand provided a fileId to select a workspace file
+            if (msg.attachment?.fileId) {
+              const file = workspaceFiles.find((f) => f.id === msg.attachment?.fileId)
+              if (file) {
+                // Set the file value in SubBlockStore so FileUpload picks it up
+                const fileUploadKey = `${subBlockId}-attachment-${index}`
+                const uploadedFile = {
+                  name: file.name,
+                  path: file.path,
+                  type: file.type,
+                  size: 0, // Size not available from workspace files list
+                }
+                useSubBlockStore.getState().setValue(blockId, fileUploadKey, uploadedFile)
+
+                // Clear the attachment object - the FileUpload will sync the file data via useEffect
+                // DON'T set attachment.data here as it would appear in the ShortInput (advanced mode)
+                msg.attachment = undefined
+                return
+              }
+            }
+
+            // Sanitize: clear any attachment object that isn't a valid existing file or fileId match
+            // This prevents the LLM from setting arbitrary data/variable references
+            msg.attachment = undefined
+          }
+        })
+
        setLocalMessages(validMessages)
        setMessages(validMessages)
      } else {
-        // Fallback: treat as raw system prompt
        const trimmed = content.trim()
        if (trimmed) {
          const fallback: Message[] = [{ role: 'system', content: trimmed }]
@@ -219,9 +428,6 @@ export function MessagesInput({
    },
  })

-  /**
-   * Expose wand control handlers to parent via ref
-   */
  useImperativeHandle(
    wandControlRef,
    () => ({
@@ -249,9 +455,6 @@ export function MessagesInput({
    }
  }, [isPreview, previewValue, messages])

-  /**
-   * Gets the current messages array
-   */
  const currentMessages = useMemo<Message[]>(() => {
    if (isPreview && previewValue && Array.isArray(previewValue)) {
      return previewValue
@@ -269,9 +472,6 @@ export function MessagesInput({
    startHeight: number
  } | null>(null)

-  /**
-   * Updates a specific message's content
-   */
  const updateMessageContent = useCallback(
    (index: number, content: string) => {
      if (isPreview || disabled) return
@@ -287,17 +487,27 @@ export function MessagesInput({
    [localMessages, setMessages, isPreview, disabled]
  )

-  /**
-   * Updates a specific message's role
-   */
  const updateMessageRole = useCallback(
-    (index: number, role: 'system' | 'user' | 'assistant') => {
+    (index: number, role: 'system' | 'user' | 'assistant' | 'attachment') => {
      if (isPreview || disabled) return

      const updatedMessages = [...localMessages]
-      updatedMessages[index] = {
-        ...updatedMessages[index],
-        role,
+      if (role === 'attachment') {
+        updatedMessages[index] = {
+          ...updatedMessages[index],
+          role,
+          content: updatedMessages[index].content || '',
+          attachment: updatedMessages[index].attachment || {
+            sourceType: 'file',
+            data: '',
+          },
+        }
+      } else {
+        const { attachment: _, ...rest } = updatedMessages[index]
+        updatedMessages[index] = {
+          ...rest,
+          role,
+        }
      }
      setLocalMessages(updatedMessages)
      setMessages(updatedMessages)
@@ -305,9 +515,6 @@ export function MessagesInput({
    [localMessages, setMessages, isPreview, disabled]
  )

-  /**
-   * Adds a message after the specified index
-   */
  const addMessageAfter = useCallback(
    (index: number) => {
      if (isPreview || disabled) return
@@ -320,9 +527,6 @@ export function MessagesInput({
    [localMessages, setMessages, isPreview, disabled]
  )

-  /**
-   * Deletes a message at the specified index
-   */
  const deleteMessage = useCallback(
    (index: number) => {
      if (isPreview || disabled) return
@@ -335,9 +539,6 @@ export function MessagesInput({
    [localMessages, setMessages, isPreview, disabled]
  )

-  /**
-   * Moves a message up in the list
-   */
  const moveMessageUp = useCallback(
    (index: number) => {
      if (isPreview || disabled || index === 0) return
@@ -352,9 +553,6 @@ export function MessagesInput({
    [localMessages, setMessages, isPreview, disabled]
  )

-  /**
-   * Moves a message down in the list
-   */
  const moveMessageDown = useCallback(
    (index: number) => {
      if (isPreview || disabled || index === localMessages.length - 1) return
@@ -369,18 +567,11 @@ export function MessagesInput({
    [localMessages, setMessages, isPreview, disabled]
  )

-  /**
-   * Capitalizes the first letter of the role
-   */
  const formatRole = (role: string): string => {
    return role.charAt(0).toUpperCase() + role.slice(1)
  }

-  /**
-   * Handles header click to focus the textarea
-   */
  const handleHeaderClick = useCallback((index: number, e: React.MouseEvent) => {
-    // Don't focus if clicking on interactive elements
    const target = e.target as HTMLElement
    if (target.closest('button') || target.closest('[data-radix-popper-content-wrapper]')) {
      return
@@ -570,50 +761,52 @@ export function MessagesInput({
                  className='flex cursor-pointer items-center justify-between px-[8px] pt-[6px]'
                  onClick={(e) => handleHeaderClick(index, e)}
                >
-                  <Popover
-                    open={openPopoverIndex === index}
-                    onOpenChange={(open) => setOpenPopoverIndex(open ? index : null)}
-                  >
-                    <PopoverTrigger asChild>
-                      <button
-                        type='button'
-                        disabled={isPreview || disabled}
-                        className={cn(
-                          'group -ml-1.5 -my-1 flex items-center gap-1 rounded px-1.5 py-1 font-medium text-[13px] text-[var(--text-primary)] leading-none transition-colors hover:bg-[var(--surface-5)] hover:text-[var(--text-secondary)]',
-                          (isPreview || disabled) &&
-                            'cursor-default hover:bg-transparent hover:text-[var(--text-primary)]'
-                        )}
-                        onClick={(e) => e.stopPropagation()}
-                        aria-label='Select message role'
-                      >
-                        {formatRole(message.role)}
-                        {!isPreview && !disabled && (
-                          <ChevronDown
-                            className={cn(
-                              'h-3 w-3 flex-shrink-0 transition-transform duration-100',
-                              openPopoverIndex === index && 'rotate-180'
-                            )}
-                          />
-                        )}
-                      </button>
-                    </PopoverTrigger>
-                    <PopoverContent minWidth={140} align='start'>
-                      <div className='flex flex-col gap-[2px]'>
-                        {(['system', 'user', 'assistant'] as const).map((role) => (
-                          <PopoverItem
-                            key={role}
-                            active={message.role === role}
-                            onClick={() => {
-                              updateMessageRole(index, role)
-                              setOpenPopoverIndex(null)
-                            }}
-                          >
-                            <span>{formatRole(role)}</span>
-                          </PopoverItem>
-                        ))}
-                      </div>
-                    </PopoverContent>
-                  </Popover>
+                  <div className='flex items-center'>
+                    <Popover
+                      open={openPopoverIndex === index}
+                      onOpenChange={(open) => setOpenPopoverIndex(open ? index : null)}
+                    >
+                      <PopoverTrigger asChild>
+                        <button
+                          type='button'
+                          disabled={isPreview || disabled}
+                          className={cn(
+                            'group -ml-1.5 -my-1 flex items-center gap-1 rounded px-1.5 py-1 font-medium text-[13px] text-[var(--text-primary)] leading-none transition-colors hover:bg-[var(--surface-5)] hover:text-[var(--text-secondary)]',
+                            (isPreview || disabled) &&
+                              'cursor-default hover:bg-transparent hover:text-[var(--text-primary)]'
+                          )}
+                          onClick={(e) => e.stopPropagation()}
+                          aria-label='Select message role'
+                        >
+                          {formatRole(message.role)}
+                          {!isPreview && !disabled && (
+                            <ChevronDown
+                              className={cn(
+                                'h-3 w-3 flex-shrink-0 transition-transform duration-100',
+                                openPopoverIndex === index && 'rotate-180'
+                              )}
+                            />
+                          )}
+                        </button>
+                      </PopoverTrigger>
+                      <PopoverContent minWidth={140} align='start'>
+                        <div className='flex flex-col gap-[2px]'>
+                          {availableRoles.map((role) => (
+                            <PopoverItem
+                              key={role}
+                              active={message.role === role}
+                              onClick={() => {
+                                updateMessageRole(index, role)
+                                setOpenPopoverIndex(null)
+                              }}
+                            >
+                              <span>{formatRole(role)}</span>
+                            </PopoverItem>
+                          ))}
+                        </div>
+                      </PopoverContent>
+                    </Popover>
+                  </div>

                  {!isPreview && !disabled && (
                    <div className='flex items-center'>
@@ -657,6 +850,43 @@ export function MessagesInput({
                          </Button>
                        </>
                      )}
+                      {/* Mode toggle for attachment messages */}
+                      {message.role === 'attachment' && (
+                        <Tooltip.Root>
+                          <Tooltip.Trigger asChild>
+                            <Button
+                              variant='ghost'
+                              onClick={(e: React.MouseEvent) => {
+                                e.stopPropagation()
+                                setAttachmentMode((m) => (m === 'basic' ? 'advanced' : 'basic'))
+                              }}
+                              disabled={disabled}
+                              className='-my-1 -mr-1 h-6 w-6 p-0'
+                              aria-label={
+                                attachmentMode === 'advanced'
+                                  ? 'Switch to file upload'
+                                  : 'Switch to URL/text input'
+                              }
+                            >
+                              <ArrowLeftRight
+                                className={cn(
+                                  'h-3 w-3',
+                                  attachmentMode === 'advanced'
+                                    ? 'text-[var(--text-primary)]'
+                                    : 'text-[var(--text-secondary)]'
+                                )}
+                              />
+                            </Button>
+                          </Tooltip.Trigger>
+                          <Tooltip.Content side='top'>
+                            <p>
+                              {attachmentMode === 'advanced'
+                                ? 'Switch to file upload'
+                                : 'Switch to URL/text input'}
+                            </p>
+                          </Tooltip.Content>
+                        </Tooltip.Root>
+                      )}
                      <Button
                        variant='ghost'
                        onClick={(e: React.MouseEvent) => {
@@ -673,98 +903,152 @@ export function MessagesInput({
                  )}
                </div>

-                {/* Content Input with overlay for variable highlighting */}
-                <div className='relative w-full overflow-hidden'>
-                  <textarea
-                    ref={(el) => {
-                      textareaRefs.current[fieldId] = el
-                    }}
-                    className='relative z-[2] m-0 box-border h-auto min-h-[80px] w-full resize-none overflow-y-auto overflow-x-hidden whitespace-pre-wrap break-words border-none bg-transparent px-[8px] py-[8px] font-medium font-sans text-sm text-transparent leading-[1.5] caret-[var(--text-primary)] outline-none [-ms-overflow-style:none] [scrollbar-width:none] placeholder:text-[var(--text-muted)] focus:outline-none focus-visible:outline-none disabled:cursor-not-allowed [&::-webkit-scrollbar]:hidden'
-                    placeholder='Enter message content...'
-                    value={message.content}
-                    onChange={fieldHandlers.onChange}
-                    onKeyDown={(e) => {
-                      if (e.key === 'Tab' && !isPreview && !disabled) {
-                        e.preventDefault()
-                        const direction = e.shiftKey ? -1 : 1
-                        const nextIndex = index + direction
-
-                        if (nextIndex >= 0 && nextIndex < currentMessages.length) {
-                          const nextFieldId = `message-${nextIndex}`
-                          const nextTextarea = textareaRefs.current[nextFieldId]
-                          if (nextTextarea) {
-                            nextTextarea.focus()
-                            nextTextarea.selectionStart = nextTextarea.value.length
-                            nextTextarea.selectionEnd = nextTextarea.value.length
-                          }
+                {/* Content Input - different for attachment vs text messages */}
+                {message.role === 'attachment' ? (
+                  <div className='relative w-full px-[8px] py-[8px]'>
+                    {attachmentMode === 'basic' ? (
+                      <FileUpload
+                        blockId={blockId}
+                        subBlockId={`${subBlockId}-attachment-${index}`}
+                        acceptedTypes='image/*,audio/*,video/*,application/pdf,.doc,.docx,.txt'
+                        multiple={false}
+                        isPreview={isPreview}
+                        disabled={disabled}
+                      />
+                    ) : (
+                      <ShortInput
+                        blockId={blockId}
+                        subBlockId={`${subBlockId}-attachment-ref-${index}`}
+                        placeholder='Reference file from previous block...'
+                        config={{
+                          id: `${subBlockId}-attachment-ref-${index}`,
+                          type: 'short-input',
+                        }}
+                        value={
+                          // Only show value for variable references, not file uploads
+                          message.attachment?.sourceType === 'file'
+                            ? ''
+                            : message.attachment?.data || ''
                        }
-                        return
-                      }
-
-                      fieldHandlers.onKeyDown(e)
-                    }}
-                    onDrop={fieldHandlers.onDrop}
-                    onDragOver={fieldHandlers.onDragOver}
-                    onFocus={fieldHandlers.onFocus}
-                    onScroll={(e) => {
-                      const overlay = overlayRefs.current[fieldId]
-                      if (overlay) {
-                        overlay.scrollTop = e.currentTarget.scrollTop
-                        overlay.scrollLeft = e.currentTarget.scrollLeft
-                      }
-                    }}
-                    disabled={isPreview || disabled}
-                  />
-                  <div
-                    ref={(el) => {
-                      overlayRefs.current[fieldId] = el
-                    }}
-                    className='pointer-events-none absolute top-0 left-0 z-[1] m-0 box-border w-full overflow-y-auto overflow-x-hidden whitespace-pre-wrap break-words border-none bg-transparent px-[8px] py-[8px] font-medium font-sans text-[var(--text-primary)] text-sm leading-[1.5] [-ms-overflow-style:none] [scrollbar-width:none] [&::-webkit-scrollbar]:hidden'
-                  >
-                    {formatDisplayText(message.content, {
-                      accessiblePrefixes,
-                      highlightAll: !accessiblePrefixes,
-                    })}
-                    {message.content.endsWith('\n') && '\u200B'}
+                        onChange={(newValue: string) => {
+                          const updatedMessages = [...localMessages]
+                          if (updatedMessages[index].role === 'attachment') {
+                            // Determine sourceType based on content
+                            let sourceType: 'url' | 'base64' = 'url'
+                            if (newValue.startsWith('data:') || newValue.includes(';base64,')) {
+                              sourceType = 'base64'
+                            }
+                            updatedMessages[index] = {
+                              ...updatedMessages[index],
+                              content: newValue.substring(0, 50),
+                              attachment: {
+                                ...updatedMessages[index].attachment,
+                                sourceType,
+                                data: newValue,
+                              },
+                            }
+                            setLocalMessages(updatedMessages)
+                            setMessages(updatedMessages)
+                          }
+                        }}
+                        isPreview={isPreview}
+                        disabled={disabled}
+                      />
+                    )}
                  </div>
-
-                  {/* Env var dropdown for this message */}
-                  <EnvVarDropdown
-                    visible={fieldState.showEnvVars && !isPreview && !disabled}
-                    onSelect={handleEnvSelect}
-                    searchTerm={fieldState.searchTerm}
-                    inputValue={message.content}
-                    cursorPosition={fieldState.cursorPosition}
-                    onClose={() => subBlockInput.fieldHelpers.hideFieldDropdowns(fieldId)}
-                    workspaceId={subBlockInput.workspaceId}
-                    maxHeight='192px'
-                    inputRef={textareaRefObject}
-                  />
-
-                  {/* Tag dropdown for this message */}
-                  <TagDropdown
-                    visible={fieldState.showTags && !isPreview && !disabled}
-                    onSelect={handleTagSelect}
-                    blockId={blockId}
-                    activeSourceBlockId={fieldState.activeSourceBlockId}
-                    inputValue={message.content}
-                    cursorPosition={fieldState.cursorPosition}
-                    onClose={() => subBlockInput.fieldHelpers.hideFieldDropdowns(fieldId)}
-                    inputRef={textareaRefObject}
-                  />
-
-                  {!isPreview && !disabled && (
-                    <div
-                      className='absolute right-1 bottom-1 z-[3] flex h-4 w-4 cursor-ns-resize items-center justify-center rounded-[4px] border border-[var(--border-1)] bg-[var(--surface-5)] dark:bg-[var(--surface-5)]'
-                      onMouseDown={(e) => handleResizeStart(fieldId, e)}
-                      onDragStart={(e) => {
-                        e.preventDefault()
+                ) : (
+                  <div className='relative w-full overflow-hidden'>
+                    <textarea
+                      ref={(el) => {
+                        textareaRefs.current[fieldId] = el
                      }}
+                      className='relative z-[2] m-0 box-border h-auto min-h-[80px] w-full resize-none overflow-y-auto overflow-x-hidden whitespace-pre-wrap break-words border-none bg-transparent px-[8px] py-[8px] font-medium font-sans text-sm text-transparent leading-[1.5] caret-[var(--text-primary)] outline-none [-ms-overflow-style:none] [scrollbar-width:none] placeholder:text-[var(--text-muted)] focus:outline-none focus-visible:outline-none disabled:cursor-not-allowed [&::-webkit-scrollbar]:hidden'
+                      placeholder='Enter message content...'
+                      value={message.content}
+                      onChange={fieldHandlers.onChange}
+                      onKeyDown={(e) => {
+                        if (e.key === 'Tab' && !isPreview && !disabled) {
+                          e.preventDefault()
+                          const direction = e.shiftKey ? -1 : 1
+                          const nextIndex = index + direction
+
+                          if (nextIndex >= 0 && nextIndex < currentMessages.length) {
+                            const nextFieldId = `message-${nextIndex}`
+                            const nextTextarea = textareaRefs.current[nextFieldId]
+                            if (nextTextarea) {
+                              nextTextarea.focus()
+                              nextTextarea.selectionStart = nextTextarea.value.length
+                              nextTextarea.selectionEnd = nextTextarea.value.length
+                            }
+                          }
+                          return
+                        }
+
+                        fieldHandlers.onKeyDown(e)
+                      }}
+                      onDrop={fieldHandlers.onDrop}
+                      onDragOver={fieldHandlers.onDragOver}
+                      onFocus={fieldHandlers.onFocus}
+                      onScroll={(e) => {
+                        const overlay = overlayRefs.current[fieldId]
+                        if (overlay) {
+                          overlay.scrollTop = e.currentTarget.scrollTop
+                          overlay.scrollLeft = e.currentTarget.scrollLeft
+                        }
+                      }}
+                      disabled={isPreview || disabled}
+                    />
+                    <div
+                      ref={(el) => {
+                        overlayRefs.current[fieldId] = el
+                      }}
+                      className='pointer-events-none absolute top-0 left-0 z-[1] m-0 box-border w-full overflow-y-auto overflow-x-hidden whitespace-pre-wrap break-words border-none bg-transparent px-[8px] py-[8px] font-medium font-sans text-[var(--text-primary)] text-sm leading-[1.5] [-ms-overflow-style:none] [scrollbar-width:none] [&::-webkit-scrollbar]:hidden'
                    >
-                      <ChevronsUpDown className='h-3 w-3 text-[var(--text-muted)]' />
+                      {formatDisplayText(message.content, {
+                        accessiblePrefixes,
+                        highlightAll: !accessiblePrefixes,
+                      })}
+                      {message.content.endsWith('\n') && '\u200B'}
                    </div>
-                  )}
-                </div>
+
+                    {/* Env var dropdown for this message */}
+                    <EnvVarDropdown
+                      visible={fieldState.showEnvVars && !isPreview && !disabled}
+                      onSelect={handleEnvSelect}
+                      searchTerm={fieldState.searchTerm}
+                      inputValue={message.content}
+                      cursorPosition={fieldState.cursorPosition}
+                      onClose={() => subBlockInput.fieldHelpers.hideFieldDropdowns(fieldId)}
+                      workspaceId={subBlockInput.workspaceId}
+                      maxHeight='192px'
+                      inputRef={textareaRefObject}
+                    />
+
+                    {/* Tag dropdown for this message */}
+                    <TagDropdown
+                      visible={fieldState.showTags && !isPreview && !disabled}
+                      onSelect={handleTagSelect}
+                      blockId={blockId}
+                      activeSourceBlockId={fieldState.activeSourceBlockId}
+                      inputValue={message.content}
+                      cursorPosition={fieldState.cursorPosition}
+                      onClose={() => subBlockInput.fieldHelpers.hideFieldDropdowns(fieldId)}
+                      inputRef={textareaRefObject}
+                    />
+
+                    {!isPreview && !disabled && (
+                      <div
+                        className='absolute right-1 bottom-1 z-[3] flex h-4 w-4 cursor-ns-resize items-center justify-center rounded-[4px] border border-[var(--border-1)] bg-[var(--surface-5)] dark:bg-[var(--surface-5)]'
+                        onMouseDown={(e) => handleResizeStart(fieldId, e)}
+                        onDragStart={(e) => {
+                          e.preventDefault()
+                        }}
+                      >
+                        <ChevronsUpDown className='h-3 w-3 text-[var(--text-muted)]' />
+                      </div>
+                    )}
+                  </div>
+                )}
              </>
            )
          })()}
--- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/hooks/use-wand.ts
+++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/hooks/use-wand.ts
@@ -63,6 +63,8 @@ export interface WandConfig {
 interface UseWandProps {
  wandConfig?: WandConfig
  currentValue?: string
+  /** Additional context about available sources/references for the prompt */
+  sources?: string
  onGeneratedContent: (content: string) => void
  onStreamChunk?: (chunk: string) => void
  onStreamStart?: () => void
@@ -72,6 +74,7 @@ interface UseWandProps {
 export function useWand({
  wandConfig,
  currentValue,
+  sources,
  onGeneratedContent,
  onStreamChunk,
  onStreamStart,
@@ -154,6 +157,12 @@ export function useWand({
        if (systemPrompt.includes('{context}')) {
          systemPrompt = systemPrompt.replace('{context}', contextInfo)
        }
+        if (systemPrompt.includes('{sources}')) {
+          systemPrompt = systemPrompt.replace(
+            '{sources}',
+            sources || 'No upstream sources available'
+          )
+        }

        const userMessage = prompt

--- a/apps/sim/app/workspace/[workspaceId]/w/components/preview/components/preview-workflow/components/block/block.tsx
+++ b/apps/sim/app/workspace/[workspaceId]/w/components/preview/components/preview-workflow/components/block/block.tsx
@@ -461,14 +461,12 @@ function WorkflowPreviewBlockInner({ data }: NodeProps<WorkflowPreviewBlockData>
        className={`flex items-center justify-between p-[8px] ${hasContentBelowHeader ? 'border-[var(--border-1)] border-b' : ''}`}
      >
        <div className='relative z-10 flex min-w-0 flex-1 items-center gap-[10px]'>
-          {!isNoteBlock && (
-            <div
-              className='flex h-[24px] w-[24px] flex-shrink-0 items-center justify-center rounded-[6px]'
-              style={{ background: enabled ? blockConfig.bgColor : 'gray' }}
-            >
-              <IconComponent className='h-[16px] w-[16px] text-white' />
-            </div>
-          )}
+          <div
+            className='flex h-[24px] w-[24px] flex-shrink-0 items-center justify-center rounded-[6px]'
+            style={{ background: enabled ? blockConfig.bgColor : 'gray' }}
+          >
+            <IconComponent className='h-[16px] w-[16px] text-white' />
+          </div>
          <span
            className={`truncate font-medium text-[16px] ${!enabled ? 'text-[var(--text-muted)]' : ''}`}
            title={name}
--- a/apps/sim/blocks/blocks/agent.ts
+++ b/apps/sim/blocks/blocks/agent.ts
@@ -85,7 +85,9 @@ export const AgentBlock: BlockConfig<AgentResponse> = {
      id: 'messages',
      title: 'Messages',
      type: 'messages-input',
+      canonicalParamId: 'messages',
      placeholder: 'Enter messages...',
+      mode: 'basic',
      wandConfig: {
        enabled: true,
        maintainHistory: true,
@@ -93,10 +95,12 @@ export const AgentBlock: BlockConfig<AgentResponse> = {

 Current messages: {context}

+{sources}
+
 RULES:
 1. Generate ONLY a valid JSON array - no markdown, no explanations
-2. Each message object must have "role" (system/user/assistant) and "content" (string)
-3. You can generate any number of messages as needed
+2. Each message object must have "role" and "content" properties
+3. Valid roles are: "system", "user", "assistant", "attachment"
 4. Content can be as long as necessary - don't truncate
 5. If editing existing messages, preserve structure unless asked to change it
 6. For new agents, create DETAILED, PROFESSIONAL system prompts that include:
@@ -106,6 +110,16 @@ RULES:
   - Critical thinking or quality guidelines
   - How to handle edge cases and uncertainty

+ATTACHMENTS:
+- Use role "attachment" to include images, audio, video, or documents in a multimodal conversation
+- IMPORTANT: If an attachment message in the current context has an "attachment" object with file data, ALWAYS preserve that entire "attachment" object exactly as-is
+- When creating NEW attachment messages, you can either:
+  1. Just set role to "attachment" with descriptive content - user will upload the file manually
+  2. Select a file from the available workspace files by including "fileId" in the attachment object (optional)
+- You do NOT have to select a file - it's completely optional
+- Example without file: {"role": "attachment", "content": "Analyze this image for text and objects"}
+- Example with file selection: {"role": "attachment", "content": "Analyze this image", "attachment": {"fileId": "abc123"}}
+
 EXAMPLES:

 Research agent:
@@ -114,14 +128,23 @@ Research agent:
 Code reviewer:
 [{"role": "system", "content": "You are a Senior Code Reviewer with expertise in software architecture, security, and best practices. Your role is to provide thorough, constructive code reviews that improve code quality and help developers grow.\\n\\n## Review Methodology\\n\\n1. **Security First**: Check for vulnerabilities including injection attacks, authentication flaws, data exposure, and insecure dependencies.\\n\\n2. **Code Quality**: Evaluate readability, maintainability, adherence to DRY/SOLID principles, and appropriate abstraction levels.\\n\\n3. **Performance**: Identify potential bottlenecks, unnecessary computations, memory leaks, and optimization opportunities.\\n\\n4. **Testing**: Assess test coverage, edge case handling, and testability of the code structure.\\n\\n## Output Format\\n\\n### Summary\\nBrief overview of the code's purpose and overall assessment.\\n\\n### Critical Issues\\nSecurity vulnerabilities or bugs that must be fixed before merging.\\n\\n### Improvements\\nSuggested enhancements with clear explanations of why and how.\\n\\n### Positive Aspects\\nHighlight well-written code to reinforce good practices.\\n\\nBe specific with line references. Provide code examples for suggested changes. Balance critique with encouragement."}, {"role": "user", "content": "<start.input>"}]

-Writing assistant:
-[{"role": "system", "content": "You are a skilled Writing Editor and Coach. Your role is to help users improve their writing through constructive feedback, editing suggestions, and guidance on style, clarity, and structure.\\n\\n## Editing Approach\\n\\n1. **Clarity**: Ensure ideas are expressed clearly and concisely. Eliminate jargon unless appropriate for the audience.\\n\\n2. **Structure**: Evaluate logical flow, paragraph organization, and transitions between ideas.\\n\\n3. **Voice & Tone**: Maintain consistency and appropriateness for the intended audience and purpose.\\n\\n4. **Grammar & Style**: Correct errors while respecting the author's voice.\\n\\n## Output Format\\n\\n### Overall Impression\\nBrief assessment of the piece's strengths and areas for improvement.\\n\\n### Structural Feedback\\nComments on organization, flow, and logical progression.\\n\\n### Line-Level Edits\\nSpecific suggestions with explanations, not just corrections.\\n\\n### Revised Version\\nWhen appropriate, provide an edited version demonstrating improvements.\\n\\nBe encouraging while honest. Explain the reasoning behind suggestions to help the writer improve."}, {"role": "user", "content": "<start.input>"}]
+Image analysis agent:
+[{"role": "system", "content": "You are an expert image analyst. Describe images in detail, identify objects, text, and patterns. Provide structured analysis."}, {"role": "attachment", "content": "Analyze this image"}]

 Return ONLY the JSON array.`,
        placeholder: 'Describe what you want to create or change...',
        generationType: 'json-object',
      },
    },
+    {
+      id: 'messagesRaw',
+      title: 'Messages',
+      type: 'code',
+      canonicalParamId: 'messages',
+      placeholder: '[{"role": "system", "content": "..."}, {"role": "user", "content": "..."}]',
+      language: 'json',
+      mode: 'advanced',
+    },
    {
      id: 'model',
      title: 'Model',
--- a/apps/sim/executor/handlers/agent/agent-handler.ts
+++ b/apps/sim/executor/handlers/agent/agent-handler.ts
@@ -25,6 +25,8 @@ import {
  validateModelProvider,
 } from '@/executor/utils/permission-check'
 import { executeProviderRequest } from '@/providers'
+import { transformAttachmentMessages } from '@/providers/attachment'
+import type { ProviderId } from '@/providers/types'
 import { getProviderFromModel, transformBlockTool } from '@/providers/utils'
 import type { SerializedBlock } from '@/serializer/types'
 import { executeTool } from '@/tools'
@@ -58,7 +60,15 @@ export class AgentBlockHandler implements BlockHandler {
    const providerId = getProviderFromModel(model)
    const formattedTools = await this.formatTools(ctx, filteredInputs.tools || [])
    const streamingConfig = this.getStreamingConfig(ctx, block)
-    const messages = await this.buildMessages(ctx, filteredInputs)
+    const rawMessages = await this.buildMessages(ctx, filteredInputs)
+
+    // Transform attachment messages to provider-specific format (async for file fetching)
+    const messages = rawMessages
+      ? await transformAttachmentMessages(rawMessages, {
+          providerId: providerId as ProviderId,
+          model,
+        })
+      : undefined

    const providerRequest = this.buildProviderRequest({
      ctx,
@@ -806,17 +816,44 @@ export class AgentBlockHandler implements BlockHandler {
    return messages.length > 0 ? messages : undefined
  }

-  private extractValidMessages(messages?: Message[]): Message[] {
-    if (!messages || !Array.isArray(messages)) return []
+  private extractValidMessages(messages?: Message[] | string): Message[] {
+    if (!messages) return []

-    return messages.filter(
-      (msg): msg is Message =>
-        msg &&
-        typeof msg === 'object' &&
-        'role' in msg &&
-        'content' in msg &&
-        ['system', 'user', 'assistant'].includes(msg.role)
-    )
+    // Handle raw JSON string input (from advanced mode)
+    let messageArray: unknown[]
+    if (typeof messages === 'string') {
+      const trimmed = messages.trim()
+      if (!trimmed) return []
+      try {
+        const parsed = JSON.parse(trimmed)
+        if (!Array.isArray(parsed)) {
+          logger.warn('Parsed messages JSON is not an array', { parsed })
+          return []
+        }
+        messageArray = parsed
+      } catch (error) {
+        logger.warn('Failed to parse messages JSON string', {
+          error,
+          messages: trimmed.substring(0, 100),
+        })
+        return []
+      }
+    } else if (Array.isArray(messages)) {
+      messageArray = messages
+    } else {
+      return []
+    }
+
+    return messageArray.filter((msg): msg is Message => {
+      if (!msg || typeof msg !== 'object') return false
+      const m = msg as Record<string, unknown>
+      return (
+        'role' in m &&
+        'content' in m &&
+        typeof m.role === 'string' &&
+        ['system', 'user', 'assistant', 'attachment'].includes(m.role)
+      )
+    })
  }

  private processMemories(memories: any): Message[] {
--- a/apps/sim/executor/handlers/agent/types.ts
+++ b/apps/sim/executor/handlers/agent/types.ts
@@ -6,8 +6,8 @@ export interface AgentInputs {
  systemPrompt?: string
  userPrompt?: string | object
  memories?: any // Legacy memory block output
-  // New message array input (from messages-input subblock)
-  messages?: Message[]
+  // New message array input (from messages-input subblock or raw JSON from advanced mode)
+  messages?: Message[] | string
  // Memory configuration
  memoryType?: 'none' | 'conversation' | 'sliding_window' | 'sliding_window_tokens'
  conversationId?: string // Required for all non-none memory types
@@ -42,9 +42,25 @@ export interface ToolInput {
  customToolId?: string
 }

+/**
+ * Attachment content (files, images, documents)
+ */
+export interface AttachmentContent {
+  /** Source type: how the data was provided */
+  sourceType: 'url' | 'base64' | 'file'
+  /** The URL or base64 data */
+  data: string
+  /** MIME type (e.g., 'image/png', 'application/pdf', 'audio/mp3') */
+  mimeType?: string
+  /** Optional filename for file uploads */
+  fileName?: string
+}
+
 export interface Message {
-  role: 'system' | 'user' | 'assistant'
+  role: 'system' | 'user' | 'assistant' | 'attachment'
  content: string
+  /** Attachment content for 'attachment' role messages */
+  attachment?: AttachmentContent
  executionId?: string
  function_call?: any
  tool_calls?: any[]
--- a/apps/sim/providers/anthropic/index.ts
+++ b/apps/sim/providers/anthropic/index.ts
@@ -109,9 +109,15 @@ export const anthropicProvider: ProviderConfig = {
            ],
          })
        } else {
+          // Handle content that's already in array format (from transformAttachmentMessages)
+          const content = Array.isArray(msg.content)
+            ? msg.content
+            : msg.content
+              ? [{ type: 'text', text: msg.content }]
+              : []
          messages.push({
            role: msg.role === 'assistant' ? 'assistant' : 'user',
-            content: msg.content ? [{ type: 'text', text: msg.content }] : [],
+            content,
          })
        }
      })
--- a/apps/sim/providers/attachment.ts
+++ b/apps/sim/providers/attachment.ts
@@ -0,0 +1,397 @@
+/**
+ * Centralized attachment content transformation for all providers.
+ *
+ * Strategy: Always normalize to base64 first, then create provider-specific formats.
+ * This eliminates URL accessibility issues and simplifies provider handling.
+ */
+
+import { createLogger } from '@sim/logger'
+import { bufferToBase64 } from '@/lib/uploads/utils/file-utils'
+import { downloadFileFromUrl } from '@/lib/uploads/utils/file-utils.server'
+import { supportsVision } from '@/providers/models'
+import type { ProviderId } from '@/providers/types'
+
+const logger = createLogger('AttachmentTransformer')
+
+/**
+ * Generic message type for attachment transformation.
+ */
+interface TransformableMessage {
+  role: string
+  content: string | any[] | null
+  attachment?: AttachmentContent
+  [key: string]: any
+}
+
+/**
+ * Attachment content (files, images, documents)
+ */
+export interface AttachmentContent {
+  sourceType: 'url' | 'base64' | 'file'
+  data: string
+  mimeType?: string
+  fileName?: string
+}
+
+/**
+ * Normalized attachment data (always base64)
+ */
+interface NormalizedAttachment {
+  base64: string
+  mimeType: string
+}
+
+/**
+ * Configuration for attachment transformation
+ */
+interface AttachmentTransformConfig {
+  providerId: ProviderId
+  model: string
+}
+
+/**
+ * Checks if a model supports attachments (vision/multimodal content).
+ */
+export function modelSupportsAttachments(model: string): boolean {
+  return supportsVision(model)
+}
+
+/**
+ * Transforms messages with 'attachment' role into provider-compatible format.
+ */
+export async function transformAttachmentMessages<T extends TransformableMessage>(
+  messages: T[],
+  config: AttachmentTransformConfig
+): Promise<T[]> {
+  const { providerId, model } = config
+  const supportsAttachments = modelSupportsAttachments(model)
+
+  if (!supportsAttachments) {
+    return transformAttachmentsToText(messages) as T[]
+  }
+
+  const result: T[] = []
+
+  for (const msg of messages) {
+    if (msg.role !== 'attachment') {
+      result.push(msg)
+      continue
+    }
+
+    const attachmentContent = await createProviderAttachmentContent(msg, providerId)
+    if (!attachmentContent) {
+      logger.warn('Could not create attachment content for message', { msg })
+      continue
+    }
+
+    // Merge with previous user message or create new one
+    const lastMessage = result[result.length - 1]
+    if (lastMessage && lastMessage.role === 'user') {
+      const existingContent = ensureContentArray(lastMessage, providerId)
+      existingContent.push(attachmentContent)
+      lastMessage.content = existingContent as any
+    } else {
+      result.push({
+        role: 'user',
+        content: [attachmentContent] as any,
+      } as T)
+    }
+  }
+
+  // Ensure all user messages have consistent content format
+  return result.map((msg) => {
+    if (msg.role === 'user' && typeof msg.content === 'string') {
+      return {
+        ...msg,
+        content: [createTextContent(msg.content, providerId)] as any,
+      }
+    }
+    return msg
+  })
+}
+
+/**
+ * Transforms attachment messages to text placeholders for non-vision models
+ */
+function transformAttachmentsToText<T extends TransformableMessage>(messages: T[]): T[] {
+  const result: T[] = []
+
+  for (const msg of messages) {
+    if (msg.role !== 'attachment') {
+      result.push(msg)
+      continue
+    }
+
+    const attachment = msg.attachment
+    const mimeType = attachment?.mimeType || 'unknown type'
+    const fileName = attachment?.fileName || 'file'
+
+    const lastMessage = result[result.length - 1]
+    if (lastMessage && lastMessage.role === 'user') {
+      const currentContent = typeof lastMessage.content === 'string' ? lastMessage.content : ''
+      lastMessage.content = `${currentContent}\n[Attached file: ${fileName} (${mimeType}) - Note: This model does not support file/image inputs]`
+    } else {
+      result.push({
+        role: 'user',
+        content: `[Attached file: ${fileName} (${mimeType}) - Note: This model does not support file/image inputs]`,
+      } as T)
+    }
+  }
+
+  return result
+}
+
+/**
+ * Ensures a user message has content as an array for multimodal support
+ */
+function ensureContentArray(msg: TransformableMessage, providerId: ProviderId): any[] {
+  if (Array.isArray(msg.content)) {
+    return msg.content
+  }
+  if (typeof msg.content === 'string' && msg.content) {
+    return [createTextContent(msg.content, providerId)]
+  }
+  return []
+}
+
+/**
+ * Creates provider-specific text content block
+ */
+export function createTextContent(text: string, providerId: ProviderId): any {
+  switch (providerId) {
+    case 'google':
+    case 'vertex':
+      return { text }
+    default:
+      return { type: 'text', text }
+  }
+}
+
+/**
+ * Normalizes attachment data to base64.
+ * Fetches URLs and converts to base64, extracts base64 from data URLs.
+ */
+async function normalizeToBase64(
+  attachment: AttachmentContent
+): Promise<NormalizedAttachment | null> {
+  const { sourceType, data, mimeType } = attachment
+
+  if (!data || !data.trim()) {
+    logger.warn('Empty attachment data')
+    return null
+  }
+
+  const trimmedData = data.trim()
+
+  // Already base64
+  if (sourceType === 'base64') {
+    // Handle data URL format: data:mime;base64,xxx
+    if (trimmedData.startsWith('data:')) {
+      const match = trimmedData.match(/^data:([^;]+);base64,(.+)$/)
+      if (match) {
+        return { base64: match[2], mimeType: match[1] }
+      }
+    }
+    // Raw base64
+    return { base64: trimmedData, mimeType: mimeType || 'application/octet-stream' }
+  }
+
+  // URL or file path - need to fetch
+  if (sourceType === 'url' || sourceType === 'file') {
+    try {
+      logger.info('Fetching attachment for base64 conversion', {
+        url: trimmedData.substring(0, 50),
+      })
+      const buffer = await downloadFileFromUrl(trimmedData)
+      const base64 = bufferToBase64(buffer)
+      return { base64, mimeType: mimeType || 'application/octet-stream' }
+    } catch (error) {
+      logger.error('Failed to fetch attachment', { error, url: trimmedData.substring(0, 50) })
+      return null
+    }
+  }
+
+  return null
+}
+
+/**
+ * Creates provider-specific attachment content from an attachment message.
+ * First normalizes to base64, then creates the provider format.
+ */
+async function createProviderAttachmentContent(
+  msg: TransformableMessage,
+  providerId: ProviderId
+): Promise<any> {
+  const attachment = msg.attachment
+  if (!attachment) return null
+
+  // Normalize to base64 first
+  const normalized = await normalizeToBase64(attachment)
+  if (!normalized) {
+    return createTextContent('[Failed to load attachment]', providerId)
+  }
+
+  const { base64, mimeType } = normalized
+
+  switch (providerId) {
+    case 'anthropic':
+      return createAnthropicContent(base64, mimeType)
+
+    case 'google':
+    case 'vertex':
+      return createGeminiContent(base64, mimeType)
+
+    case 'mistral':
+      return createMistralContent(base64, mimeType)
+
+    case 'bedrock':
+      return createBedrockContent(base64, mimeType)
+
+    default:
+      // OpenAI format (OpenAI, Azure, xAI, DeepSeek, Cerebras, Groq, OpenRouter, Ollama, vLLM)
+      return createOpenAIContent(base64, mimeType)
+  }
+}
+
+/**
+ * OpenAI-compatible content (images only via base64 data URL)
+ */
+function createOpenAIContent(base64: string, mimeType: string): any {
+  const isImage = mimeType.startsWith('image/')
+  const isAudio = mimeType.startsWith('audio/')
+
+  if (isImage) {
+    return {
+      type: 'image_url',
+      image_url: {
+        url: `data:${mimeType};base64,${base64}`,
+        detail: 'auto',
+      },
+    }
+  }
+
+  if (isAudio) {
+    return {
+      type: 'input_audio',
+      input_audio: {
+        data: base64,
+        format: mimeType === 'audio/wav' ? 'wav' : 'mp3',
+      },
+    }
+  }
+
+  // OpenAI Chat API doesn't support other file types directly
+  // For PDFs/docs, return a text placeholder
+  logger.warn(`OpenAI does not support ${mimeType} attachments in Chat API`)
+  return {
+    type: 'text',
+    text: `[Attached file: ${mimeType} - OpenAI Chat API only supports images and audio]`,
+  }
+}
+
+/**
+ * Anthropic-compatible content (images and PDFs)
+ */
+function createAnthropicContent(base64: string, mimeType: string): any {
+  const isImage = mimeType.startsWith('image/')
+  const isPdf = mimeType === 'application/pdf'
+
+  if (isImage) {
+    return {
+      type: 'image',
+      source: {
+        type: 'base64',
+        media_type: mimeType,
+        data: base64,
+      },
+    }
+  }
+
+  if (isPdf) {
+    return {
+      type: 'document',
+      source: {
+        type: 'base64',
+        media_type: 'application/pdf',
+        data: base64,
+      },
+    }
+  }
+
+  return {
+    type: 'text',
+    text: `[Attached file: ${mimeType} - Anthropic supports images and PDFs only]`,
+  }
+}
+
+/**
+ * Google Gemini-compatible content (inlineData format)
+ */
+function createGeminiContent(base64: string, mimeType: string): any {
+  // Gemini supports a wide range of file types via inlineData
+  return {
+    inlineData: {
+      mimeType,
+      data: base64,
+    },
+  }
+}
+
+/**
+ * Mistral-compatible content (images only, data URL format)
+ */
+function createMistralContent(base64: string, mimeType: string): any {
+  const isImage = mimeType.startsWith('image/')
+
+  if (isImage) {
+    // Mistral uses direct string for image_url, not nested object
+    return {
+      type: 'image_url',
+      image_url: `data:${mimeType};base64,${base64}`,
+    }
+  }
+
+  return {
+    type: 'text',
+    text: `[Attached file: ${mimeType} - Mistral supports images only]`,
+  }
+}
+
+/**
+ * AWS Bedrock-compatible content (images and PDFs)
+ */
+function createBedrockContent(base64: string, mimeType: string): any {
+  const isImage = mimeType.startsWith('image/')
+  const isPdf = mimeType === 'application/pdf'
+
+  // Determine image format from mimeType
+  const getImageFormat = (mime: string): string => {
+    if (mime.includes('jpeg') || mime.includes('jpg')) return 'jpeg'
+    if (mime.includes('png')) return 'png'
+    if (mime.includes('gif')) return 'gif'
+    if (mime.includes('webp')) return 'webp'
+    return 'png'
+  }
+
+  if (isImage) {
+    // Return a marker object that the Bedrock provider will convert to proper format
+    return {
+      type: 'bedrock_image',
+      format: getImageFormat(mimeType),
+      data: base64,
+    }
+  }
+
+  if (isPdf) {
+    return {
+      type: 'bedrock_document',
+      format: 'pdf',
+      data: base64,
+    }
+  }
+
+  return {
+    type: 'text',
+    text: `[Attached file: ${mimeType} - Bedrock supports images and PDFs only]`,
+  }
+}
--- a/apps/sim/providers/bedrock/index.ts
+++ b/apps/sim/providers/bedrock/index.ts
@@ -16,6 +16,7 @@ import type { StreamingExecution } from '@/executor/types'
 import { MAX_TOOL_ITERATIONS } from '@/providers'
 import {
  checkForForcedToolUsage,
+  convertToBedrockContentBlocks,
  createReadableStreamFromBedrockStream,
  generateToolUseId,
  getBedrockInferenceProfileId,
@@ -116,9 +117,11 @@ export const bedrockProvider: ProviderConfig = {
          }
        } else {
          const role: ConversationRole = msg.role === 'assistant' ? 'assistant' : 'user'
+          // Handle multimodal content arrays
+          const contentBlocks = convertToBedrockContentBlocks(msg.content || '')
          messages.push({
            role,
-            content: [{ text: msg.content || '' }],
+            content: contentBlocks,
          })
        }
      }
--- a/apps/sim/providers/bedrock/utils.ts
+++ b/apps/sim/providers/bedrock/utils.ts
@@ -1,9 +1,199 @@
-import type { ConverseStreamOutput } from '@aws-sdk/client-bedrock-runtime'
+import type {
+  ContentBlock,
+  ConverseStreamOutput,
+  ImageFormat,
+} from '@aws-sdk/client-bedrock-runtime'
 import { createLogger } from '@sim/logger'
 import { trackForcedToolUsage } from '@/providers/utils'

 const logger = createLogger('BedrockUtils')

+/**
+ * Converts message content (string or array) to Bedrock ContentBlock array.
+ * Handles multimodal content including images and documents.
+ */
+export function convertToBedrockContentBlocks(content: string | any[]): ContentBlock[] {
+  // Simple string content
+  if (typeof content === 'string') {
+    return [{ text: content || '' }]
+  }
+
+  // Array content - could be multimodal
+  if (!Array.isArray(content)) {
+    return [{ text: String(content) || '' }]
+  }
+
+  const blocks: ContentBlock[] = []
+
+  for (const item of content) {
+    if (!item) continue
+
+    // Text content
+    if (item.type === 'text' && item.text) {
+      blocks.push({ text: item.text })
+      continue
+    }
+
+    // Gemini-style text (just { text: "..." })
+    if (typeof item.text === 'string' && !item.type) {
+      blocks.push({ text: item.text })
+      continue
+    }
+
+    // Bedrock image content (from agent handler)
+    if (item.type === 'bedrock_image') {
+      const imageBlock = createBedrockImageBlock(item)
+      if (imageBlock) {
+        blocks.push(imageBlock)
+      }
+      continue
+    }
+
+    // Bedrock document content (from agent handler)
+    if (item.type === 'bedrock_document') {
+      const docBlock = createBedrockDocumentBlock(item)
+      if (docBlock) {
+        blocks.push(docBlock)
+      }
+      continue
+    }
+
+    // OpenAI-style image_url (fallback for direct OpenAI format)
+    if (item.type === 'image_url' && item.image_url) {
+      const url = typeof item.image_url === 'string' ? item.image_url : item.image_url?.url
+      if (url) {
+        const imageBlock = createBedrockImageBlockFromUrl(url)
+        if (imageBlock) {
+          blocks.push(imageBlock)
+        }
+      }
+      continue
+    }
+
+    // Unknown type - log warning and skip
+    logger.warn('Unknown content block type in Bedrock conversion:', { type: item.type })
+  }
+
+  // Ensure at least one text block
+  if (blocks.length === 0) {
+    blocks.push({ text: '' })
+  }
+
+  return blocks
+}
+
+/**
+ * Creates a Bedrock image ContentBlock from a bedrock_image item
+ */
+function createBedrockImageBlock(item: {
+  format: string
+  sourceType: string
+  data?: string
+  url?: string
+}): ContentBlock | null {
+  const format = (item.format || 'png') as ImageFormat
+
+  if (item.sourceType === 'base64' && item.data) {
+    // Convert base64 to Uint8Array
+    const bytes = base64ToUint8Array(item.data)
+    return {
+      image: {
+        format,
+        source: { bytes },
+      },
+    }
+  }
+
+  if (item.sourceType === 'url' && item.url) {
+    // For URLs, we need to fetch the image and convert to bytes
+    // This is a limitation - Bedrock doesn't support URL sources directly
+    // The provider layer should handle this, or we log a warning
+    logger.warn('Bedrock does not support image URLs directly. Image will be skipped.', {
+      url: item.url,
+    })
+    // Return a text placeholder
+    return { text: `[Image from URL: ${item.url}]` }
+  }
+
+  return null
+}
+
+/**
+ * Creates a Bedrock document ContentBlock from a bedrock_document item
+ */
+function createBedrockDocumentBlock(item: {
+  format: string
+  sourceType: string
+  data?: string
+  url?: string
+}): ContentBlock | null {
+  if (item.sourceType === 'base64' && item.data) {
+    const bytes = base64ToUint8Array(item.data)
+    return {
+      document: {
+        format: 'pdf',
+        name: 'document',
+        source: { bytes },
+      },
+    }
+  }
+
+  if (item.sourceType === 'url' && item.url) {
+    logger.warn('Bedrock does not support document URLs directly. Document will be skipped.', {
+      url: item.url,
+    })
+    return { text: `[Document from URL: ${item.url}]` }
+  }
+
+  return null
+}
+
+/**
+ * Creates a Bedrock image ContentBlock from a data URL or regular URL
+ */
+function createBedrockImageBlockFromUrl(url: string): ContentBlock | null {
+  // Check if it's a data URL (base64)
+  if (url.startsWith('data:')) {
+    const match = url.match(/^data:image\/(\w+);base64,(.+)$/)
+    if (match) {
+      let format: ImageFormat = match[1] as ImageFormat
+      // Normalize jpg to jpeg
+      if (format === ('jpg' as ImageFormat)) {
+        format = 'jpeg'
+      }
+      const base64Data = match[2]
+      const bytes = base64ToUint8Array(base64Data)
+      return {
+        image: {
+          format,
+          source: { bytes },
+        },
+      }
+    }
+  }
+
+  // Regular URL - Bedrock doesn't support this directly
+  logger.warn('Bedrock does not support image URLs directly. Image will be skipped.', { url })
+  return { text: `[Image from URL: ${url}]` }
+}
+
+/**
+ * Converts a base64 string to Uint8Array
+ */
+function base64ToUint8Array(base64: string): Uint8Array {
+  // Handle browser and Node.js environments
+  if (typeof Buffer !== 'undefined') {
+    return Buffer.from(base64, 'base64')
+  }
+  // Browser fallback
+  const binaryString = atob(base64)
+  const bytes = new Uint8Array(binaryString.length)
+  for (let i = 0; i < binaryString.length; i++) {
+    bytes[i] = binaryString.charCodeAt(i)
+  }
+  return bytes
+}
+
 export interface BedrockStreamUsage {
  inputTokens: number
  outputTokens: number
--- a/apps/sim/providers/google/utils.ts
+++ b/apps/sim/providers/google/utils.ts
@@ -72,6 +72,75 @@ export function cleanSchemaForGemini(schema: SchemaUnion): SchemaUnion {
  return cleanedSchema
 }

+/**
+ * Converts an array of content items to Gemini-compatible Part array.
+ * Handles various formats from the attachment transformer.
+ */
+function convertContentArrayToGeminiParts(contentArray: any[]): Part[] {
+  const parts: Part[] = []
+
+  for (const item of contentArray) {
+    if (!item) continue
+
+    // Gemini-native text format: { text: "..." }
+    if (typeof item.text === 'string') {
+      parts.push({ text: item.text })
+      continue
+    }
+
+    // OpenAI-style text: { type: 'text', text: '...' }
+    if (item.type === 'text' && typeof item.text === 'string') {
+      parts.push({ text: item.text })
+      continue
+    }
+
+    // Gemini-native inlineData format (from attachment transformer)
+    if (item.inlineData) {
+      parts.push({ inlineData: item.inlineData })
+      continue
+    }
+
+    // Gemini-native fileData format (from attachment transformer)
+    if (item.fileData) {
+      parts.push({ fileData: item.fileData })
+      continue
+    }
+
+    // OpenAI-style image_url - convert to Gemini format
+    if (item.type === 'image_url' && item.image_url) {
+      const url = typeof item.image_url === 'string' ? item.image_url : item.image_url?.url
+      if (url) {
+        // Check if it's a data URL (base64)
+        if (url.startsWith('data:')) {
+          const match = url.match(/^data:([^;]+);base64,(.+)$/)
+          if (match) {
+            parts.push({
+              inlineData: {
+                mimeType: match[1],
+                data: match[2],
+              },
+            })
+          }
+        } else {
+          // External URL
+          parts.push({
+            fileData: {
+              mimeType: 'image/jpeg', // Default, Gemini will detect actual type
+              fileUri: url,
+            },
+          })
+        }
+      }
+      continue
+    }
+
+    // Unknown type - log warning
+    logger.warn('Unknown content item type in Gemini conversion:', { type: item.type })
+  }
+
+  return parts
+}
+
 /**
 * Extracts text content from a Gemini response candidate.
 * Filters out thought parts (model reasoning) from the output.
@@ -180,7 +249,13 @@ export function convertToGeminiFormat(request: ProviderRequest): {
      } else if (message.role === 'user' || message.role === 'assistant') {
        const geminiRole = message.role === 'user' ? 'user' : 'model'

-        if (message.content) {
+        // Handle multimodal content (arrays with text/image/file parts)
+        if (Array.isArray(message.content)) {
+          const parts: Part[] = convertContentArrayToGeminiParts(message.content)
+          if (parts.length > 0) {
+            contents.push({ role: geminiRole, parts })
+          }
+        } else if (message.content) {
          contents.push({ role: geminiRole, parts: [{ text: message.content }] })
        }

--- a/apps/sim/providers/models.ts
+++ b/apps/sim/providers/models.ts
@@ -34,6 +34,8 @@ export interface ModelCapabilities {
  toolUsageControl?: boolean
  computerUse?: boolean
  nativeStructuredOutputs?: boolean
+  /** Whether the model supports vision/multimodal inputs (images, audio, video, PDFs) */
+  vision?: boolean
  maxOutputTokens?: {
    /** Maximum tokens for streaming requests */
    max: number
@@ -120,6 +122,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 2 },
+          vision: true,
        },
        contextWindow: 128000,
      },
@@ -132,6 +135,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-12-11',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh'],
          },
@@ -150,6 +154,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-11-14',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['none', 'low', 'medium', 'high'],
          },
@@ -222,6 +227,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-08-07',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['minimal', 'low', 'medium', 'high'],
          },
@@ -240,6 +246,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-08-07',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['minimal', 'low', 'medium', 'high'],
          },
@@ -258,6 +265,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-08-07',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['minimal', 'low', 'medium', 'high'],
          },
@@ -287,6 +295,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-06-17',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['low', 'medium', 'high'],
          },
@@ -302,6 +311,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-06-17',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['low', 'medium', 'high'],
          },
@@ -317,6 +327,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-06-17',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['low', 'medium', 'high'],
          },
@@ -333,6 +344,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 2 },
+          vision: true,
        },
        contextWindow: 1000000,
      },
@@ -346,6 +358,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 2 },
+          vision: true,
        },
        contextWindow: 1000000,
      },
@@ -359,6 +372,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 2 },
+          vision: true,
        },
        contextWindow: 1000000,
      },
@@ -385,6 +399,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 2 },
+          vision: true,
        },
        contextWindow: 128000,
      },
@@ -397,6 +412,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-12-11',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh'],
          },
@@ -415,6 +431,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-11-14',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['none', 'low', 'medium', 'high'],
          },
@@ -433,6 +450,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-11-14',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['none', 'low', 'medium', 'high'],
          },
@@ -451,6 +469,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-11-14',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['none', 'low', 'medium', 'high'],
          },
@@ -469,6 +488,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-11-14',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['none', 'medium', 'high'],
          },
@@ -487,6 +507,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-08-07',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['minimal', 'low', 'medium', 'high'],
          },
@@ -505,6 +526,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-08-07',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['minimal', 'low', 'medium', 'high'],
          },
@@ -523,6 +545,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-08-07',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['minimal', 'low', 'medium', 'high'],
          },
@@ -552,6 +575,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-06-15',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['low', 'medium', 'high'],
          },
@@ -567,6 +591,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          updatedAt: '2025-06-15',
        },
        capabilities: {
+          vision: true,
          reasoningEffort: {
            values: ['low', 'medium', 'high'],
          },
@@ -581,7 +606,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          output: 8.0,
          updatedAt: '2025-06-15',
        },
-        capabilities: {},
+        capabilities: {
+          vision: true,
+        },
        contextWindow: 1000000,
      },
      {
@@ -620,6 +647,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
          maxOutputTokens: { max: 64000, default: 8192 },
+          vision: true,
        },
        contextWindow: 200000,
      },
@@ -635,6 +663,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
          maxOutputTokens: { max: 64000, default: 8192 },
+          vision: true,
        },
        contextWindow: 200000,
      },
@@ -649,6 +678,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        capabilities: {
          temperature: { min: 0, max: 1 },
          maxOutputTokens: { max: 64000, default: 8192 },
+          vision: true,
        },
        contextWindow: 200000,
      },
@@ -664,6 +694,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
          maxOutputTokens: { max: 64000, default: 8192 },
+          vision: true,
        },
        contextWindow: 200000,
      },
@@ -679,6 +710,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
          maxOutputTokens: { max: 64000, default: 8192 },
+          vision: true,
        },
        contextWindow: 200000,
      },
@@ -693,6 +725,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        capabilities: {
          temperature: { min: 0, max: 1 },
          maxOutputTokens: { max: 64000, default: 8192 },
+          vision: true,
        },
        contextWindow: 200000,
      },
@@ -708,6 +741,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          temperature: { min: 0, max: 1 },
          computerUse: true,
          maxOutputTokens: { max: 8192, default: 8192 },
+          vision: true,
        },
        contextWindow: 200000,
      },
@@ -723,6 +757,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          temperature: { min: 0, max: 1 },
          computerUse: true,
          maxOutputTokens: { max: 8192, default: 8192 },
+          vision: true,
        },
        contextWindow: 200000,
      },
@@ -736,6 +771,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
    modelPatterns: [/^gemini/],
    capabilities: {
      toolUsageControl: true,
+      vision: true,
    },
    icon: GeminiIcon,
    models: [
@@ -847,6 +883,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
    icon: VertexIcon,
    capabilities: {
      toolUsageControl: true,
+      vision: true,
    },
    models: [
      {
@@ -1005,6 +1042,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
    icon: xAIIcon,
    capabilities: {
      toolUsageControl: true,
+      vision: true,
    },
    models: [
      {
@@ -1277,7 +1315,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          output: 0.34,
          updatedAt: '2026-01-27',
        },
-        capabilities: {},
+        capabilities: {
+          vision: true,
+        },
        contextWindow: 131072,
      },
      {
@@ -1287,7 +1327,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          output: 0.6,
          updatedAt: '2026-01-27',
        },
-        capabilities: {},
+        capabilities: {
+          vision: true,
+        },
        contextWindow: 131072,
      },
      {
@@ -1369,6 +1411,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 256000,
      },
@@ -1381,6 +1424,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 256000,
      },
@@ -1453,6 +1497,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 128000,
      },
@@ -1465,6 +1510,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 128000,
      },
@@ -1489,6 +1535,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 128000,
      },
@@ -1501,6 +1548,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 128000,
      },
@@ -1549,6 +1597,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 128000,
      },
@@ -1561,6 +1610,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 128000,
      },
@@ -1585,6 +1635,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 256000,
      },
@@ -1597,6 +1648,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 256000,
      },
@@ -1609,6 +1661,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 256000,
      },
@@ -1621,6 +1674,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 256000,
      },
@@ -1645,6 +1699,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 256000,
      },
@@ -1657,6 +1712,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 256000,
      },
@@ -1710,6 +1766,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
          maxOutputTokens: { max: 64000, default: 8192 },
+          vision: true,
        },
        contextWindow: 200000,
      },
@@ -1724,6 +1781,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
          maxOutputTokens: { max: 64000, default: 8192 },
+          vision: true,
        },
        contextWindow: 200000,
      },
@@ -1738,6 +1796,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
          maxOutputTokens: { max: 64000, default: 8192 },
+          vision: true,
        },
        contextWindow: 200000,
      },
@@ -1752,6 +1811,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
          temperature: { min: 0, max: 1 },
          nativeStructuredOutputs: true,
          maxOutputTokens: { max: 64000, default: 8192 },
+          vision: true,
        },
        contextWindow: 200000,
      },
@@ -1764,6 +1824,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 1000000,
      },
@@ -1776,6 +1837,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 1000000,
      },
@@ -1788,6 +1850,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 1000000,
      },
@@ -1800,6 +1863,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 300000,
      },
@@ -1812,6 +1876,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 300000,
      },
@@ -1836,6 +1901,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 1000000,
      },
@@ -1848,6 +1914,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 3500000,
      },
@@ -1872,6 +1939,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 128000,
      },
@@ -1884,6 +1952,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 128000,
      },
@@ -1956,6 +2025,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 128000,
      },
@@ -1992,6 +2062,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 128000,
      },
@@ -2016,6 +2087,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 128000,
      },
@@ -2028,6 +2100,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 128000,
      },
@@ -2040,6 +2113,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
        },
        capabilities: {
          temperature: { min: 0, max: 1 },
+          vision: true,
        },
        contextWindow: 128000,
      },
@@ -2211,6 +2285,32 @@ export function getMaxTemperature(modelId: string): number | undefined {
  return capabilities?.temperature?.max
 }

+/**
+ * Checks if a model supports vision/multimodal inputs (images, audio, video, PDFs)
+ */
+export function supportsVision(modelId: string): boolean {
+  const capabilities = getModelCapabilities(modelId)
+  return !!capabilities?.vision
+}
+
+/**
+ * Returns a list of all vision-capable models
+ */
+export function getVisionModels(): string[] {
+  const models: string[] = []
+  for (const provider of Object.values(PROVIDER_DEFINITIONS)) {
+    // Check if the provider has vision capability at the provider level
+    const providerHasVision = provider.capabilities?.vision
+    for (const model of provider.models) {
+      // Model has vision if either the model or provider has vision capability
+      if (model.capabilities.vision || providerHasVision) {
+        models.push(model.id)
+      }
+    }
+  }
+  return models
+}
+
 export function supportsToolUsageControl(providerId: string): boolean {
  return getProvidersWithToolUsageControl().includes(providerId)
 }
--- a/apps/sim/providers/types.ts
+++ b/apps/sim/providers/types.ts
@@ -111,9 +111,25 @@ export interface ProviderToolConfig {
  usageControl?: ToolUsageControl
 }

+/**
+ * Attachment content (files, images, documents)
+ */
+export interface AttachmentContent {
+  /** Source type: how the data was provided */
+  sourceType: 'url' | 'base64' | 'file'
+  /** The URL or base64 data */
+  data: string
+  /** MIME type (e.g., 'image/png', 'application/pdf', 'audio/mp3') */
+  mimeType?: string
+  /** Optional filename for file uploads */
+  fileName?: string
+}
+
 export interface Message {
-  role: 'system' | 'user' | 'assistant' | 'function' | 'tool'
+  role: 'system' | 'user' | 'assistant' | 'function' | 'tool' | 'attachment'
  content: string | null
+  /** Attachment content for 'attachment' role messages */
+  attachment?: AttachmentContent
  name?: string
  function_call?: {
    name: string
--- a/apps/sim/providers/utils.ts
+++ b/apps/sim/providers/utils.ts
@@ -23,9 +23,11 @@ import {
  getReasoningEffortValuesForModel as getReasoningEffortValuesForModelFromDefinitions,
  getThinkingLevelsForModel as getThinkingLevelsForModelFromDefinitions,
  getVerbosityValuesForModel as getVerbosityValuesForModelFromDefinitions,
+  getVisionModels,
  PROVIDER_DEFINITIONS,
  supportsTemperature as supportsTemperatureFromDefinitions,
  supportsToolUsageControl as supportsToolUsageControlFromDefinitions,
+  supportsVision,
  updateOllamaModels as updateOllamaModelsInDefinitions,
 } from '@/providers/models'
 import type { ProviderId, ProviderToolConfig } from '@/providers/types'
@@ -1152,3 +1154,6 @@ export function checkForForcedToolUsageOpenAI(

  return { hasUsedForcedTool, usedForcedTools: updatedUsedForcedTools }
 }
+
+// Re-export vision capability functions
+export { supportsVision, getVisionModels }
Author	SHA1	Message	Date
Siddharth Ganesan	5add92a613	Use b64	2026-01-29 18:10:47 -08:00
Siddharth Ganesan	4ab3e23cf7	Works	2026-01-29 17:35:34 -08:00
Siddharth Ganesan	aa893d56d8	Fix media	2026-01-29 17:20:38 -08:00
Siddharth Ganesan	599ffb77e6	v1	2026-01-29 17:19:29 -08:00
Siddharth Ganesan	86c3b82339	Add anvanced mode to messages	2026-01-29 13:19:48 -08:00
Siddharth Ganesan	d44c75f486	Add toggle, haven't tested	2026-01-29 13:17:27 -08:00