diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/messages-input/messages-input.tsx b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/messages-input/messages-input.tsx index fface70b0..73054d273 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/messages-input/messages-input.tsx +++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/messages-input/messages-input.tsx @@ -172,24 +172,46 @@ export function MessagesInput({ return `AVAILABLE WORKSPACE FILES (optional - you don't have to select one):\n${filesList}\n\nTo use a file, include "fileId": "" in the media object. If not selecting a file, omit the fileId field.` }, [workspaceFiles]) + // Get indices of media messages for subscription + const mediaIndices = useMemo( + () => + localMessages + .map((msg, index) => (msg.role === 'media' ? index : -1)) + .filter((i) => i !== -1), + [localMessages] + ) + + // Subscribe to file upload values for all media messages + const fileUploadValues = useSubBlockStore( + useCallback( + (state) => { + if (!activeWorkflowId) return {} + const blockValues = state.workflowValues[activeWorkflowId]?.[blockId] ?? {} + const result: Record = + {} + for (const index of mediaIndices) { + const fileUploadKey = `${subBlockId}-media-${index}` + const fileValue = blockValues[fileUploadKey] + if (fileValue && typeof fileValue === 'object' && 'path' in fileValue) { + result[index] = fileValue as { name: string; path: string; type: string; size: number } + } + } + return result + }, + [activeWorkflowId, blockId, subBlockId, mediaIndices] + ) + ) + // Effect to sync FileUpload values to message media objects useEffect(() => { if (!activeWorkflowId || isPreview) return - // Get all subblock values for this workflow - const workflowValues = useSubBlockStore.getState().workflowValues[activeWorkflowId] - if (!workflowValues?.[blockId]) return - let hasChanges = false const updatedMessages = localMessages.map((msg, index) => { if (msg.role !== 'media') return msg - // Check if there's a FileUpload value for this media message - const fileUploadKey = `${subBlockId}-media-${index}` - const fileValue = workflowValues[blockId][fileUploadKey] - - if (fileValue && typeof fileValue === 'object' && 'path' in fileValue) { - const uploadedFile = fileValue as { name: string; path: string; type: string; size: number } + const uploadedFile = fileUploadValues[index] + if (uploadedFile) { const newMedia: MediaContent = { sourceType: 'file', data: uploadedFile.path, @@ -220,7 +242,7 @@ export function MessagesInput({ setLocalMessages(updatedMessages) setMessages(updatedMessages) } - }, [activeWorkflowId, blockId, subBlockId, localMessages, isPreview, setMessages]) + }, [activeWorkflowId, localMessages, isPreview, setMessages, fileUploadValues]) const subBlockInput = useSubBlockInput({ blockId, diff --git a/apps/sim/executor/handlers/agent/agent-handler.ts b/apps/sim/executor/handlers/agent/agent-handler.ts index 3090d1748..612312e25 100644 --- a/apps/sim/executor/handlers/agent/agent-handler.ts +++ b/apps/sim/executor/handlers/agent/agent-handler.ts @@ -3,6 +3,8 @@ import { account, mcpServers } from '@sim/db/schema' import { createLogger } from '@sim/logger' import { and, eq, inArray, isNull } from 'drizzle-orm' import { createMcpToolId } from '@/lib/mcp/utils' +import { bufferToBase64 } from '@/lib/uploads/utils/file-utils' +import { downloadFileFromUrl } from '@/lib/uploads/utils/file-utils.server' import { refreshTokenIfNeeded } from '@/app/api/auth/oauth/utils' import { getAllBlocks } from '@/blocks' import type { BlockOutput } from '@/blocks/types' @@ -60,8 +62,10 @@ export class AgentBlockHandler implements BlockHandler { const streamingConfig = this.getStreamingConfig(ctx, block) const rawMessages = await this.buildMessages(ctx, filteredInputs) - // Transform media messages to provider-specific format - const messages = rawMessages ? this.transformMediaMessages(rawMessages, providerId) : undefined + // Transform media messages to provider-specific format (async for file fetching) + const messages = rawMessages + ? await this.transformMediaMessages(rawMessages, providerId, ctx) + : undefined const providerRequest = this.buildProviderRequest({ ctx, @@ -854,7 +858,11 @@ export class AgentBlockHandler implements BlockHandler { * Media messages are merged with the preceding or following user message, * or converted to a user message with multimodal content. */ - private transformMediaMessages(messages: Message[], providerId: string): Message[] { + private async transformMediaMessages( + messages: Message[], + providerId: string, + ctx: ExecutionContext + ): Promise { const result: Message[] = [] for (let i = 0; i < messages.length; i++) { @@ -865,8 +873,8 @@ export class AgentBlockHandler implements BlockHandler { continue } - // Media message - transform based on provider - const mediaContent = this.createProviderMediaContent(msg, providerId) + // Media message - transform based on provider (async for file fetching) + const mediaContent = await this.createProviderMediaContent(msg, providerId, ctx) if (!mediaContent) { logger.warn('Could not create media content for message', { msg }) continue @@ -891,10 +899,10 @@ export class AgentBlockHandler implements BlockHandler { // Post-process: ensure all user messages have consistent content format return result.map((msg) => { if (msg.role === 'user' && typeof msg.content === 'string') { - // Convert string content to provider-specific text format + // Convert string content to provider-specific text format (wrapped in array for multimodal) return { ...msg, - content: this.createTextContent(msg.content, providerId) as any, + content: [this.createTextContent(msg.content, providerId)] as any, } } return msg @@ -933,7 +941,11 @@ export class AgentBlockHandler implements BlockHandler { /** * Creates provider-specific media content from a media message */ - private createProviderMediaContent(msg: Message, providerId: string): any { + private async createProviderMediaContent( + msg: Message, + providerId: string, + ctx: ExecutionContext + ): Promise { const media = msg.media if (!media) return null @@ -977,7 +989,7 @@ export class AgentBlockHandler implements BlockHandler { switch (providerId) { case 'anthropic': - return this.createAnthropicMediaContent(sourceType, data, mimeType) + return this.createAnthropicMediaContent(sourceType, data, mimeType, ctx) case 'google': case 'vertex': @@ -1040,51 +1052,107 @@ export class AgentBlockHandler implements BlockHandler { /** * Creates Anthropic-compatible media content + * Anthropic requires base64 for internal/relative URLs since they can't fetch them */ - private createAnthropicMediaContent(sourceType: string, data: string, mimeType?: string): any { + private async createAnthropicMediaContent( + sourceType: string, + data: string, + mimeType?: string, + ctx?: ExecutionContext + ): Promise { const isImage = mimeType?.startsWith('image/') const isPdf = mimeType === 'application/pdf' - // Treat 'file' as 'url' since workspace files are served via URL - const isUrl = sourceType === 'url' || sourceType === 'file' + const isInternalUrl = data.startsWith('/') + const isExternalHttps = data.startsWith('https://') - if (isImage) { - if (isUrl) { + // For internal URLs (workspace files), fetch and convert to base64 + // Anthropic only supports external HTTPS URLs, not relative paths + if ((sourceType === 'url' || sourceType === 'file') && isInternalUrl) { + try { + logger.info('Fetching internal file for Anthropic base64 conversion', { + path: data.substring(0, 50), + }) + const buffer = await downloadFileFromUrl(data) + const base64Data = bufferToBase64(buffer) + + if (isImage) { + return { + type: 'image', + source: { + type: 'base64', + media_type: mimeType || 'image/png', + data: base64Data, + }, + } + } + + if (isPdf) { + return { + type: 'document', + source: { + type: 'base64', + media_type: 'application/pdf', + data: base64Data, + }, + } + } + + // Other file types - return as text fallback + return { + type: 'text', + text: `[File: ${mimeType || 'unknown type'}]`, + } + } catch (error) { + logger.error('Failed to fetch file for Anthropic', { error, path: data.substring(0, 50) }) + return { + type: 'text', + text: `[Failed to load file: ${mimeType || 'unknown type'}]`, + } + } + } + + // For external HTTPS URLs, Anthropic can fetch them directly + if ((sourceType === 'url' || sourceType === 'file') && isExternalHttps) { + if (isImage) { return { type: 'image', source: { type: 'url', url: data }, } } - // base64 - const base64Data = data.includes(',') ? data.split(',')[1] : data - return { - type: 'image', - source: { - type: 'base64', - media_type: mimeType || 'image/png', - data: base64Data, - }, - } - } - - if (isPdf) { - if (isUrl) { + if (isPdf) { return { type: 'document', source: { type: 'url', url: data }, } } + } + + // Already base64 encoded + if (sourceType === 'base64') { const base64Data = data.includes(',') ? data.split(',')[1] : data - return { - type: 'document', - source: { - type: 'base64', - media_type: 'application/pdf', - data: base64Data, - }, + if (isImage) { + return { + type: 'image', + source: { + type: 'base64', + media_type: mimeType || 'image/png', + data: base64Data, + }, + } + } + if (isPdf) { + return { + type: 'document', + source: { + type: 'base64', + media_type: 'application/pdf', + data: base64Data, + }, + } } } - // Fallback for other types + // Fallback for unsupported types return { type: 'text', text: `[File: ${mimeType || 'unknown type'}]`, diff --git a/apps/sim/providers/anthropic/index.ts b/apps/sim/providers/anthropic/index.ts index 29e277825..27372c743 100644 --- a/apps/sim/providers/anthropic/index.ts +++ b/apps/sim/providers/anthropic/index.ts @@ -109,9 +109,15 @@ export const anthropicProvider: ProviderConfig = { ], }) } else { + // Handle content that's already in array format (from transformMediaMessages) + const content = Array.isArray(msg.content) + ? msg.content + : msg.content + ? [{ type: 'text', text: msg.content }] + : [] messages.push({ role: msg.role === 'assistant' ? 'assistant' : 'user', - content: msg.content ? [{ type: 'text', text: msg.content }] : [], + content, }) } })