This commit is contained in:
Siddharth Ganesan
2026-01-29 17:19:29 -08:00
parent 86c3b82339
commit 599ffb77e6
10 changed files with 698 additions and 49 deletions

View File

@@ -9,6 +9,8 @@ import {
} from 'react'
import { isEqual } from 'lodash'
import { ArrowLeftRight, ChevronDown, ChevronsUpDown, ChevronUp, Plus } from 'lucide-react'
import { useParams } from 'next/navigation'
import { createLogger } from '@sim/logger'
import {
Button,
Popover,
@@ -22,15 +24,28 @@ import { cn } from '@/lib/core/utils/cn'
import { EnvVarDropdown } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/env-var-dropdown'
import { FileUpload } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/file-upload/file-upload'
import { formatDisplayText } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/formatted-text'
import { ShortInput } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/short-input/short-input'
import { TagDropdown } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/tag-dropdown/tag-dropdown'
import { useSubBlockInput } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/hooks/use-sub-block-input'
import { useSubBlockValue } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/hooks/use-sub-block-value'
import { useWorkflowRegistry } from '@/stores/workflows/registry/store'
import { useSubBlockStore } from '@/stores/workflows/subblock/store'
import type { WandControlHandlers } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/sub-block'
import { useAccessibleReferencePrefixes } from '@/app/workspace/[workspaceId]/w/[workflowId]/hooks/use-accessible-reference-prefixes'
import { useWand } from '@/app/workspace/[workspaceId]/w/[workflowId]/hooks/use-wand'
import type { SubBlockConfig } from '@/blocks/types'
const logger = createLogger('MessagesInput')
const MIN_TEXTAREA_HEIGHT_PX = 80
/** Workspace file record from API */
interface WorkspaceFile {
id: string
name: string
path: string
type: string
}
const MAX_TEXTAREA_HEIGHT_PX = 320
/** Pattern to match complete message objects in JSON */
@@ -49,14 +64,20 @@ const ROLE_BEFORE_CONTENT_PATTERN = /"role"\s*:\s*"(system|user|assistant|media)
const unescapeContent = (str: string): string =>
str.replace(/\\n/g, '\n').replace(/\\"/g, '"').replace(/\\\\/g, '\\')
/**
* Media content for multimodal messages
*/
interface MediaContent {
/** Source type: how the data was provided */
sourceType: 'url' | 'base64' | 'file'
/** The URL or base64 data */
data: string
/** MIME type (e.g., 'image/png', 'application/pdf', 'audio/mp3') */
mimeType?: string
/** Optional filename for file uploads */
fileName?: string
/** Optional workspace file ID (used by wand to select existing files) */
fileId?: string
}
/**
@@ -93,14 +114,109 @@ export function MessagesInput({
disabled = false,
wandControlRef,
}: MessagesInputProps) {
const params = useParams()
const workspaceId = params?.workspaceId as string
const [messages, setMessages] = useSubBlockValue<Message[]>(blockId, subBlockId, false)
const [localMessages, setLocalMessages] = useState<Message[]>([{ role: 'user', content: '' }])
const accessiblePrefixes = useAccessibleReferencePrefixes(blockId)
const [openPopoverIndex, setOpenPopoverIndex] = useState<number | null>(null)
const { activeWorkflowId } = useWorkflowRegistry()
// Local media mode state - basic = FileUpload, advanced = URL/base64 textarea
const [mediaMode, setMediaMode] = useState<'basic' | 'advanced'>('basic')
// Workspace files for wand context
const [workspaceFiles, setWorkspaceFiles] = useState<WorkspaceFile[]>([])
// Fetch workspace files for wand context
const loadWorkspaceFiles = useCallback(async () => {
if (!workspaceId || isPreview) return
try {
const response = await fetch(`/api/workspaces/${workspaceId}/files`)
const data = await response.json()
if (data.success) {
setWorkspaceFiles(data.files || [])
}
} catch (error) {
logger.error('Error loading workspace files:', error)
}
}, [workspaceId, isPreview])
// Load workspace files on mount
useEffect(() => {
void loadWorkspaceFiles()
}, [loadWorkspaceFiles])
// Build sources string for wand - available workspace files
const sourcesInfo = useMemo(() => {
if (workspaceFiles.length === 0) {
return 'No workspace files available. The user can upload files manually after generation.'
}
const filesList = workspaceFiles
.filter((f) => f.type.startsWith('image/') || f.type.startsWith('audio/') || f.type.startsWith('video/') || f.type === 'application/pdf')
.map((f) => ` - id: "${f.id}", name: "${f.name}", type: "${f.type}"`)
.join('\n')
if (!filesList) {
return 'No media files in workspace. The user can upload files manually after generation.'
}
return `AVAILABLE WORKSPACE FILES (optional - you don't have to select one):\n${filesList}\n\nTo use a file, include "fileId": "<id>" in the media object. If not selecting a file, omit the fileId field.`
}, [workspaceFiles])
// Effect to sync FileUpload values to message media objects
useEffect(() => {
if (!activeWorkflowId || isPreview) return
// Get all subblock values for this workflow
const workflowValues = useSubBlockStore.getState().workflowValues[activeWorkflowId]
if (!workflowValues?.[blockId]) return
let hasChanges = false
const updatedMessages = localMessages.map((msg, index) => {
if (msg.role !== 'media') return msg
// Check if there's a FileUpload value for this media message
const fileUploadKey = `${subBlockId}-media-${index}`
const fileValue = workflowValues[blockId][fileUploadKey]
if (fileValue && typeof fileValue === 'object' && 'path' in fileValue) {
const uploadedFile = fileValue as { name: string; path: string; type: string; size: number }
const newMedia: MediaContent = {
sourceType: 'file',
data: uploadedFile.path,
mimeType: uploadedFile.type,
fileName: uploadedFile.name,
}
// Only update if different
if (
msg.media?.data !== newMedia.data ||
msg.media?.sourceType !== newMedia.sourceType ||
msg.media?.mimeType !== newMedia.mimeType ||
msg.media?.fileName !== newMedia.fileName
) {
hasChanges = true
return {
...msg,
content: uploadedFile.name || msg.content,
media: newMedia,
}
}
}
return msg
})
if (hasChanges) {
setLocalMessages(updatedMessages)
setMessages(updatedMessages)
}
}, [activeWorkflowId, blockId, subBlockId, localMessages, isPreview, setMessages])
const subBlockInput = useSubBlockInput({
blockId,
subBlockId,
@@ -186,6 +302,7 @@ export function MessagesInput({
const wandHook = useWand({
wandConfig: config.wandConfig,
currentValue: getMessagesJson(),
sources: sourcesInfo,
onStreamStart: () => {
streamBufferRef.current = ''
setLocalMessages([{ role: 'system', content: '' }])
@@ -200,6 +317,46 @@ export function MessagesInput({
onGeneratedContent: (content) => {
const validMessages = parseMessages(content)
if (validMessages) {
// Process media messages - only allow fileId to set files, sanitize other attempts
validMessages.forEach((msg, index) => {
if (msg.role === 'media') {
// Check if this is an existing file with valid data (preserve it)
const hasExistingFile = msg.media?.sourceType === 'file' &&
msg.media?.data?.startsWith('/api/') &&
msg.media?.fileName
if (hasExistingFile) {
// Preserve existing file data as-is
return
}
// Check if wand provided a fileId to select a workspace file
if (msg.media?.fileId) {
const file = workspaceFiles.find((f) => f.id === msg.media?.fileId)
if (file) {
// Set the file value in SubBlockStore so FileUpload picks it up
const fileUploadKey = `${subBlockId}-media-${index}`
const uploadedFile = {
name: file.name,
path: file.path,
type: file.type,
size: 0, // Size not available from workspace files list
}
useSubBlockStore.getState().setValue(blockId, fileUploadKey, uploadedFile)
// Clear the media object - the FileUpload will sync the file data via useEffect
// DON'T set media.data here as it would appear in the ShortInput (advanced mode)
msg.media = undefined
return
}
}
// Sanitize: clear any media object that isn't a valid existing file or fileId match
// This prevents the LLM from setting arbitrary data/variable references
msg.media = undefined
}
})
setLocalMessages(validMessages)
setMessages(validMessages)
} else {
@@ -283,6 +440,7 @@ export function MessagesInput({
role,
content: updatedMessages[index].content || '',
media: updatedMessages[index].media || {
sourceType: 'file',
data: '',
},
}
@@ -700,29 +858,41 @@ export function MessagesInput({
disabled={disabled}
/>
) : (
<textarea
ref={(el) => {
textareaRefs.current[fieldId] = el
<ShortInput
blockId={blockId}
subBlockId={`${subBlockId}-media-ref-${index}`}
placeholder='Reference file from previous block...'
config={{
id: `${subBlockId}-media-ref-${index}`,
type: 'short-input',
}}
className='relative z-[2] m-0 box-border h-auto min-h-[60px] w-full resize-none overflow-y-auto overflow-x-hidden whitespace-pre-wrap break-words rounded-[4px] border border-[var(--border-1)] bg-transparent px-[8px] py-[6px] font-medium font-sans text-[var(--text-primary)] text-sm leading-[1.5] outline-none transition-colors [-ms-overflow-style:none] [scrollbar-width:none] placeholder:text-[var(--text-muted)] hover:border-[var(--border-2)] focus:border-[var(--border-2)] focus:outline-none focus-visible:outline-none disabled:cursor-not-allowed [&::-webkit-scrollbar]:hidden'
placeholder='Enter URL or paste base64 data...'
value={message.media?.data || ''}
onChange={(e) => {
value={
// Only show value for variable references, not file uploads
message.media?.sourceType === 'file' ? '' : (message.media?.data || '')
}
onChange={(newValue: string) => {
const updatedMessages = [...localMessages]
if (updatedMessages[index].role === 'media') {
// Determine sourceType based on content
let sourceType: 'url' | 'base64' = 'url'
if (newValue.startsWith('data:') || newValue.includes(';base64,')) {
sourceType = 'base64'
}
updatedMessages[index] = {
...updatedMessages[index],
content: e.target.value.substring(0, 50),
content: newValue.substring(0, 50),
media: {
...updatedMessages[index].media,
data: e.target.value,
sourceType,
data: newValue,
},
}
setLocalMessages(updatedMessages)
setMessages(updatedMessages)
}
setLocalMessages(updatedMessages)
setMessages(updatedMessages)
}}
disabled={isPreview || disabled}
isPreview={isPreview}
disabled={disabled}
/>
)}
</div>

View File

@@ -63,6 +63,8 @@ export interface WandConfig {
interface UseWandProps {
wandConfig?: WandConfig
currentValue?: string
/** Additional context about available sources/references for the prompt */
sources?: string
onGeneratedContent: (content: string) => void
onStreamChunk?: (chunk: string) => void
onStreamStart?: () => void
@@ -72,6 +74,7 @@ interface UseWandProps {
export function useWand({
wandConfig,
currentValue,
sources,
onGeneratedContent,
onStreamChunk,
onStreamStart,
@@ -154,6 +157,9 @@ export function useWand({
if (systemPrompt.includes('{context}')) {
systemPrompt = systemPrompt.replace('{context}', contextInfo)
}
if (systemPrompt.includes('{sources}')) {
systemPrompt = systemPrompt.replace('{sources}', sources || 'No upstream sources available')
}
const userMessage = prompt

View File

@@ -95,10 +95,12 @@ export const AgentBlock: BlockConfig<AgentResponse> = {
Current messages: {context}
{sources}
RULES:
1. Generate ONLY a valid JSON array - no markdown, no explanations
2. Each message object must have "role" (system/user/assistant) and "content" (string)
3. You can generate any number of messages as needed
2. Each message object must have "role" and "content" properties
3. Valid roles are: "system", "user", "assistant", "media"
4. Content can be as long as necessary - don't truncate
5. If editing existing messages, preserve structure unless asked to change it
6. For new agents, create DETAILED, PROFESSIONAL system prompts that include:
@@ -108,6 +110,16 @@ RULES:
- Critical thinking or quality guidelines
- How to handle edge cases and uncertainty
MEDIA MESSAGES:
- Use role "media" to include images, audio, video, or documents in a multimodal conversation
- IMPORTANT: If a media message in the current context has a "media" object with file data, ALWAYS preserve that entire "media" object exactly as-is
- When creating NEW media messages, you can either:
1. Just set role to "media" with descriptive content - user will upload the file manually
2. Select a file from the available workspace files by including "fileId" in the media object (optional)
- You do NOT have to select a file - it's completely optional
- Example without file: {"role": "media", "content": "Analyze this image for text and objects"}
- Example with file selection: {"role": "media", "content": "Analyze this image", "media": {"fileId": "abc123"}}
EXAMPLES:
Research agent:
@@ -116,8 +128,8 @@ Research agent:
Code reviewer:
[{"role": "system", "content": "You are a Senior Code Reviewer with expertise in software architecture, security, and best practices. Your role is to provide thorough, constructive code reviews that improve code quality and help developers grow.\\n\\n## Review Methodology\\n\\n1. **Security First**: Check for vulnerabilities including injection attacks, authentication flaws, data exposure, and insecure dependencies.\\n\\n2. **Code Quality**: Evaluate readability, maintainability, adherence to DRY/SOLID principles, and appropriate abstraction levels.\\n\\n3. **Performance**: Identify potential bottlenecks, unnecessary computations, memory leaks, and optimization opportunities.\\n\\n4. **Testing**: Assess test coverage, edge case handling, and testability of the code structure.\\n\\n## Output Format\\n\\n### Summary\\nBrief overview of the code's purpose and overall assessment.\\n\\n### Critical Issues\\nSecurity vulnerabilities or bugs that must be fixed before merging.\\n\\n### Improvements\\nSuggested enhancements with clear explanations of why and how.\\n\\n### Positive Aspects\\nHighlight well-written code to reinforce good practices.\\n\\nBe specific with line references. Provide code examples for suggested changes. Balance critique with encouragement."}, {"role": "user", "content": "<start.input>"}]
Writing assistant:
[{"role": "system", "content": "You are a skilled Writing Editor and Coach. Your role is to help users improve their writing through constructive feedback, editing suggestions, and guidance on style, clarity, and structure.\\n\\n## Editing Approach\\n\\n1. **Clarity**: Ensure ideas are expressed clearly and concisely. Eliminate jargon unless appropriate for the audience.\\n\\n2. **Structure**: Evaluate logical flow, paragraph organization, and transitions between ideas.\\n\\n3. **Voice & Tone**: Maintain consistency and appropriateness for the intended audience and purpose.\\n\\n4. **Grammar & Style**: Correct errors while respecting the author's voice.\\n\\n## Output Format\\n\\n### Overall Impression\\nBrief assessment of the piece's strengths and areas for improvement.\\n\\n### Structural Feedback\\nComments on organization, flow, and logical progression.\\n\\n### Line-Level Edits\\nSpecific suggestions with explanations, not just corrections.\\n\\n### Revised Version\\nWhen appropriate, provide an edited version demonstrating improvements.\\n\\nBe encouraging while honest. Explain the reasoning behind suggestions to help the writer improve."}, {"role": "user", "content": "<start.input>"}]
Image analysis agent:
[{"role": "system", "content": "You are an expert image analyst. Describe images in detail, identify objects, text, and patterns. Provide structured analysis."}, {"role": "media", "content": "Analyze this image"}]
Return ONLY the JSON array.`,
placeholder: 'Describe what you want to create or change...',

View File

@@ -811,17 +811,41 @@ export class AgentBlockHandler implements BlockHandler {
return messages.length > 0 ? messages : undefined
}
private extractValidMessages(messages?: Message[]): Message[] {
if (!messages || !Array.isArray(messages)) return []
private extractValidMessages(messages?: Message[] | string): Message[] {
if (!messages) return []
return messages.filter(
(msg): msg is Message =>
msg &&
typeof msg === 'object' &&
'role' in msg &&
'content' in msg &&
['system', 'user', 'assistant', 'media'].includes(msg.role)
)
// Handle raw JSON string input (from advanced mode)
let messageArray: unknown[]
if (typeof messages === 'string') {
const trimmed = messages.trim()
if (!trimmed) return []
try {
const parsed = JSON.parse(trimmed)
if (!Array.isArray(parsed)) {
logger.warn('Parsed messages JSON is not an array', { parsed })
return []
}
messageArray = parsed
} catch (error) {
logger.warn('Failed to parse messages JSON string', { error, messages: trimmed.substring(0, 100) })
return []
}
} else if (Array.isArray(messages)) {
messageArray = messages
} else {
return []
}
return messageArray.filter((msg): msg is Message => {
if (!msg || typeof msg !== 'object') return false
const m = msg as Record<string, unknown>
return (
'role' in m &&
'content' in m &&
typeof m.role === 'string' &&
['system', 'user', 'assistant', 'media'].includes(m.role)
)
})
}
/**
@@ -914,6 +938,35 @@ export class AgentBlockHandler implements BlockHandler {
const { sourceType, data, mimeType } = media
// Validate data is not empty
if (!data || !data.trim()) {
logger.warn('Empty media data, skipping media content')
return null
}
// Validate URL format if sourceType is URL
if (sourceType === 'url' || sourceType === 'file') {
const trimmedData = data.trim()
// Must start with http://, https://, or / (relative path for workspace files)
if (!trimmedData.startsWith('http://') &&
!trimmedData.startsWith('https://') &&
!trimmedData.startsWith('/')) {
logger.warn('Invalid URL format for media content', { data: trimmedData.substring(0, 50) })
// Try to salvage by treating as text
return { type: 'text', text: `[Invalid media URL: ${trimmedData.substring(0, 30)}...]` }
}
}
// Validate base64 format
if (sourceType === 'base64') {
const trimmedData = data.trim()
// Should be a data URL or raw base64
if (!trimmedData.startsWith('data:') && !/^[A-Za-z0-9+/]+=*$/.test(trimmedData.replace(/\s/g, ''))) {
logger.warn('Invalid base64 format for media content', { data: trimmedData.substring(0, 50) })
return { type: 'text', text: `[Invalid base64 data]` }
}
}
switch (providerId) {
case 'anthropic':
return this.createAnthropicMediaContent(sourceType, data, mimeType)
@@ -922,8 +975,14 @@ export class AgentBlockHandler implements BlockHandler {
case 'vertex':
return this.createGeminiMediaContent(sourceType, data, mimeType)
case 'mistral':
return this.createMistralMediaContent(sourceType, data, mimeType)
case 'bedrock':
return this.createBedrockMediaContent(sourceType, data, mimeType)
default:
// OpenAI format (used by OpenAI, Azure, xAI, Mistral, etc.)
// OpenAI format (used by OpenAI, Azure, xAI, Groq, etc.)
return this.createOpenAIMediaContent(sourceType, data, mimeType)
}
}
@@ -938,15 +997,10 @@ export class AgentBlockHandler implements BlockHandler {
): any {
const isImage = mimeType?.startsWith('image/')
const isAudio = mimeType?.startsWith('audio/')
// Treat 'file' as 'url' since workspace files are served via URL
const isUrl = sourceType === 'url' || sourceType === 'file'
if (isImage) {
if (sourceType === 'url') {
return {
type: 'image_url',
image_url: { url: data, detail: 'auto' },
}
}
// base64 or file (already converted to base64)
return {
type: 'image_url',
image_url: { url: data, detail: 'auto' },
@@ -990,9 +1044,11 @@ export class AgentBlockHandler implements BlockHandler {
): any {
const isImage = mimeType?.startsWith('image/')
const isPdf = mimeType === 'application/pdf'
// Treat 'file' as 'url' since workspace files are served via URL
const isUrl = sourceType === 'url' || sourceType === 'file'
if (isImage) {
if (sourceType === 'url') {
if (isUrl) {
return {
type: 'image',
source: { type: 'url', url: data },
@@ -1011,7 +1067,7 @@ export class AgentBlockHandler implements BlockHandler {
}
if (isPdf) {
if (sourceType === 'url') {
if (isUrl) {
return {
type: 'document',
source: { type: 'url', url: data },
@@ -1043,7 +1099,10 @@ export class AgentBlockHandler implements BlockHandler {
data: string,
mimeType?: string
): any {
if (sourceType === 'url') {
// Treat 'file' as 'url' since workspace files are served via URL
const isUrl = sourceType === 'url' || sourceType === 'file'
if (isUrl) {
return {
fileData: {
mimeType: mimeType || 'application/octet-stream',
@@ -1062,6 +1121,114 @@ export class AgentBlockHandler implements BlockHandler {
}
}
/**
* Creates Mistral-compatible media content
* Note: Mistral uses a simplified format where image_url is a direct string,
* NOT a nested object like OpenAI
*/
private createMistralMediaContent(
sourceType: string,
data: string,
mimeType?: string
): any {
const isImage = mimeType?.startsWith('image/')
// Treat 'file' as 'url' since workspace files are served via URL
const isUrl = sourceType === 'url' || sourceType === 'file'
if (isImage) {
if (isUrl) {
// Mistral uses direct string for image_url, not nested object
return {
type: 'image_url',
image_url: data,
}
}
// Base64 - Mistral accepts data URLs directly
const base64Data = data.includes(',') ? data : `data:${mimeType || 'image/png'};base64,${data}`
return {
type: 'image_url',
image_url: base64Data,
}
}
// Fallback for non-image types
return {
type: 'text',
text: `[File: ${mimeType || 'unknown type'}]`,
}
}
/**
* Creates AWS Bedrock Converse API-compatible media content
* Bedrock uses a different structure: { image: { format, source: { bytes } } }
* Note: The actual bytes conversion happens in the provider layer
*/
private createBedrockMediaContent(
sourceType: string,
data: string,
mimeType?: string
): any {
const isImage = mimeType?.startsWith('image/')
// Treat 'file' as 'url' since workspace files are served via URL
const isUrl = sourceType === 'url' || sourceType === 'file'
// Determine format from mimeType
const getFormat = (mime?: string): string => {
if (!mime) return 'png'
if (mime.includes('jpeg') || mime.includes('jpg')) return 'jpeg'
if (mime.includes('png')) return 'png'
if (mime.includes('gif')) return 'gif'
if (mime.includes('webp')) return 'webp'
return 'png'
}
if (isImage) {
if (isUrl) {
// For URLs, Bedrock needs S3 URIs or we need to fetch and convert
// Mark this for the provider layer to handle
return {
type: 'bedrock_image',
format: getFormat(mimeType),
sourceType: 'url',
url: data,
}
}
// Base64 - extract raw base64 data
const base64Data = data.includes(',') ? data.split(',')[1] : data
return {
type: 'bedrock_image',
format: getFormat(mimeType),
sourceType: 'base64',
data: base64Data,
}
}
// Documents (PDFs) - Bedrock supports document content type
if (mimeType === 'application/pdf') {
if (isUrl) {
return {
type: 'bedrock_document',
format: 'pdf',
sourceType: 'url',
url: data,
}
}
const base64Data = data.includes(',') ? data.split(',')[1] : data
return {
type: 'bedrock_document',
format: 'pdf',
sourceType: 'base64',
data: base64Data,
}
}
// Fallback for unsupported types
return {
type: 'text',
text: `[File: ${mimeType || 'unknown type'}]`,
}
}
private processMemories(memories: any): Message[] {
if (!memories) return []

View File

@@ -6,8 +6,8 @@ export interface AgentInputs {
systemPrompt?: string
userPrompt?: string | object
memories?: any // Legacy memory block output
// New message array input (from messages-input subblock)
messages?: Message[]
// New message array input (from messages-input subblock or raw JSON from advanced mode)
messages?: Message[] | string
// Memory configuration
memoryType?: 'none' | 'conversation' | 'sliding_window' | 'sliding_window_tokens'
conversationId?: string // Required for all non-none memory types
@@ -46,8 +46,8 @@ export interface ToolInput {
* Media content for multimodal messages
*/
export interface MediaContent {
/** Mode: basic (file upload) or advanced (URL/base64 text input) */
mode: 'basic' | 'advanced'
/** Source type: how the data was provided */
sourceType: 'url' | 'base64' | 'file'
/** The URL or base64 data */
data: string
/** MIME type (e.g., 'image/png', 'application/pdf', 'audio/mp3') */

View File

@@ -16,6 +16,7 @@ import type { StreamingExecution } from '@/executor/types'
import { MAX_TOOL_ITERATIONS } from '@/providers'
import {
checkForForcedToolUsage,
convertToBedrockContentBlocks,
createReadableStreamFromBedrockStream,
generateToolUseId,
getBedrockInferenceProfileId,
@@ -116,9 +117,11 @@ export const bedrockProvider: ProviderConfig = {
}
} else {
const role: ConversationRole = msg.role === 'assistant' ? 'assistant' : 'user'
// Handle multimodal content arrays
const contentBlocks = convertToBedrockContentBlocks(msg.content || '')
messages.push({
role,
content: [{ text: msg.content || '' }],
content: contentBlocks,
})
}
}

View File

@@ -1,9 +1,195 @@
import type { ConverseStreamOutput } from '@aws-sdk/client-bedrock-runtime'
import type { ContentBlock, ConverseStreamOutput, ImageFormat } from '@aws-sdk/client-bedrock-runtime'
import { createLogger } from '@sim/logger'
import { trackForcedToolUsage } from '@/providers/utils'
const logger = createLogger('BedrockUtils')
/**
* Converts message content (string or array) to Bedrock ContentBlock array.
* Handles multimodal content including images and documents.
*/
export function convertToBedrockContentBlocks(content: string | any[]): ContentBlock[] {
// Simple string content
if (typeof content === 'string') {
return [{ text: content || '' }]
}
// Array content - could be multimodal
if (!Array.isArray(content)) {
return [{ text: String(content) || '' }]
}
const blocks: ContentBlock[] = []
for (const item of content) {
if (!item) continue
// Text content
if (item.type === 'text' && item.text) {
blocks.push({ text: item.text })
continue
}
// Gemini-style text (just { text: "..." })
if (typeof item.text === 'string' && !item.type) {
blocks.push({ text: item.text })
continue
}
// Bedrock image content (from agent handler)
if (item.type === 'bedrock_image') {
const imageBlock = createBedrockImageBlock(item)
if (imageBlock) {
blocks.push(imageBlock)
}
continue
}
// Bedrock document content (from agent handler)
if (item.type === 'bedrock_document') {
const docBlock = createBedrockDocumentBlock(item)
if (docBlock) {
blocks.push(docBlock)
}
continue
}
// OpenAI-style image_url (fallback for direct OpenAI format)
if (item.type === 'image_url' && item.image_url) {
const url = typeof item.image_url === 'string' ? item.image_url : item.image_url?.url
if (url) {
const imageBlock = createBedrockImageBlockFromUrl(url)
if (imageBlock) {
blocks.push(imageBlock)
}
}
continue
}
// Unknown type - log warning and skip
logger.warn('Unknown content block type in Bedrock conversion:', { type: item.type })
}
// Ensure at least one text block
if (blocks.length === 0) {
blocks.push({ text: '' })
}
return blocks
}
/**
* Creates a Bedrock image ContentBlock from a bedrock_image item
*/
function createBedrockImageBlock(item: {
format: string
sourceType: string
data?: string
url?: string
}): ContentBlock | null {
const format = (item.format || 'png') as ImageFormat
if (item.sourceType === 'base64' && item.data) {
// Convert base64 to Uint8Array
const bytes = base64ToUint8Array(item.data)
return {
image: {
format,
source: { bytes },
},
}
}
if (item.sourceType === 'url' && item.url) {
// For URLs, we need to fetch the image and convert to bytes
// This is a limitation - Bedrock doesn't support URL sources directly
// The provider layer should handle this, or we log a warning
logger.warn('Bedrock does not support image URLs directly. Image will be skipped.', {
url: item.url,
})
// Return a text placeholder
return { text: `[Image from URL: ${item.url}]` }
}
return null
}
/**
* Creates a Bedrock document ContentBlock from a bedrock_document item
*/
function createBedrockDocumentBlock(item: {
format: string
sourceType: string
data?: string
url?: string
}): ContentBlock | null {
if (item.sourceType === 'base64' && item.data) {
const bytes = base64ToUint8Array(item.data)
return {
document: {
format: 'pdf',
name: 'document',
source: { bytes },
},
}
}
if (item.sourceType === 'url' && item.url) {
logger.warn('Bedrock does not support document URLs directly. Document will be skipped.', {
url: item.url,
})
return { text: `[Document from URL: ${item.url}]` }
}
return null
}
/**
* Creates a Bedrock image ContentBlock from a data URL or regular URL
*/
function createBedrockImageBlockFromUrl(url: string): ContentBlock | null {
// Check if it's a data URL (base64)
if (url.startsWith('data:')) {
const match = url.match(/^data:image\/(\w+);base64,(.+)$/)
if (match) {
let format: ImageFormat = match[1] as ImageFormat
// Normalize jpg to jpeg
if (format === ('jpg' as ImageFormat)) {
format = 'jpeg'
}
const base64Data = match[2]
const bytes = base64ToUint8Array(base64Data)
return {
image: {
format,
source: { bytes },
},
}
}
}
// Regular URL - Bedrock doesn't support this directly
logger.warn('Bedrock does not support image URLs directly. Image will be skipped.', { url })
return { text: `[Image from URL: ${url}]` }
}
/**
* Converts a base64 string to Uint8Array
*/
function base64ToUint8Array(base64: string): Uint8Array {
// Handle browser and Node.js environments
if (typeof Buffer !== 'undefined') {
return Buffer.from(base64, 'base64')
}
// Browser fallback
const binaryString = atob(base64)
const bytes = new Uint8Array(binaryString.length)
for (let i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i)
}
return bytes
}
export interface BedrockStreamUsage {
inputTokens: number
outputTokens: number

View File

@@ -34,6 +34,8 @@ export interface ModelCapabilities {
toolUsageControl?: boolean
computerUse?: boolean
nativeStructuredOutputs?: boolean
/** Whether the model supports vision/multimodal inputs (images, audio, video, PDFs) */
vision?: boolean
maxOutputTokens?: {
/** Maximum tokens for streaming requests */
max: number
@@ -120,6 +122,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 2 },
vision: true,
},
contextWindow: 128000,
},
@@ -132,6 +135,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-12-11',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh'],
},
@@ -150,6 +154,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-11-14',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['none', 'low', 'medium', 'high'],
},
@@ -222,6 +227,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-08-07',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['minimal', 'low', 'medium', 'high'],
},
@@ -240,6 +246,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-08-07',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['minimal', 'low', 'medium', 'high'],
},
@@ -258,6 +265,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-08-07',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['minimal', 'low', 'medium', 'high'],
},
@@ -287,6 +295,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-06-17',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['low', 'medium', 'high'],
},
@@ -302,6 +311,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-06-17',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['low', 'medium', 'high'],
},
@@ -317,6 +327,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-06-17',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['low', 'medium', 'high'],
},
@@ -333,6 +344,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 2 },
vision: true,
},
contextWindow: 1000000,
},
@@ -346,6 +358,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 2 },
vision: true,
},
contextWindow: 1000000,
},
@@ -359,6 +372,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 2 },
vision: true,
},
contextWindow: 1000000,
},
@@ -385,6 +399,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 2 },
vision: true,
},
contextWindow: 128000,
},
@@ -397,6 +412,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-12-11',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh'],
},
@@ -415,6 +431,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-11-14',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['none', 'low', 'medium', 'high'],
},
@@ -433,6 +450,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-11-14',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['none', 'low', 'medium', 'high'],
},
@@ -451,6 +469,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-11-14',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['none', 'low', 'medium', 'high'],
},
@@ -469,6 +488,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-11-14',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['none', 'medium', 'high'],
},
@@ -487,6 +507,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-08-07',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['minimal', 'low', 'medium', 'high'],
},
@@ -505,6 +526,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-08-07',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['minimal', 'low', 'medium', 'high'],
},
@@ -523,6 +545,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-08-07',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['minimal', 'low', 'medium', 'high'],
},
@@ -552,6 +575,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-06-15',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['low', 'medium', 'high'],
},
@@ -567,6 +591,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2025-06-15',
},
capabilities: {
vision: true,
reasoningEffort: {
values: ['low', 'medium', 'high'],
},
@@ -581,7 +606,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
output: 8.0,
updatedAt: '2025-06-15',
},
capabilities: {},
capabilities: {
vision: true,
},
contextWindow: 1000000,
},
{
@@ -620,6 +647,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 8192 },
vision: true,
},
contextWindow: 200000,
},
@@ -635,6 +663,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 8192 },
vision: true,
},
contextWindow: 200000,
},
@@ -649,6 +678,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: {
temperature: { min: 0, max: 1 },
maxOutputTokens: { max: 64000, default: 8192 },
vision: true,
},
contextWindow: 200000,
},
@@ -664,6 +694,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 8192 },
vision: true,
},
contextWindow: 200000,
},
@@ -679,6 +710,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 8192 },
vision: true,
},
contextWindow: 200000,
},
@@ -693,6 +725,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: {
temperature: { min: 0, max: 1 },
maxOutputTokens: { max: 64000, default: 8192 },
vision: true,
},
contextWindow: 200000,
},
@@ -708,6 +741,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
temperature: { min: 0, max: 1 },
computerUse: true,
maxOutputTokens: { max: 8192, default: 8192 },
vision: true,
},
contextWindow: 200000,
},
@@ -723,6 +757,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
temperature: { min: 0, max: 1 },
computerUse: true,
maxOutputTokens: { max: 8192, default: 8192 },
vision: true,
},
contextWindow: 200000,
},
@@ -736,6 +771,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
modelPatterns: [/^gemini/],
capabilities: {
toolUsageControl: true,
vision: true,
},
icon: GeminiIcon,
models: [
@@ -847,6 +883,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
icon: VertexIcon,
capabilities: {
toolUsageControl: true,
vision: true,
},
models: [
{
@@ -1005,6 +1042,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
icon: xAIIcon,
capabilities: {
toolUsageControl: true,
vision: true,
},
models: [
{
@@ -1277,7 +1315,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
output: 0.34,
updatedAt: '2026-01-27',
},
capabilities: {},
capabilities: {
vision: true,
},
contextWindow: 131072,
},
{
@@ -1287,7 +1327,9 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
output: 0.6,
updatedAt: '2026-01-27',
},
capabilities: {},
capabilities: {
vision: true,
},
contextWindow: 131072,
},
{
@@ -1369,6 +1411,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 256000,
},
@@ -1381,6 +1424,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 256000,
},
@@ -1453,6 +1497,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 128000,
},
@@ -1465,6 +1510,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 128000,
},
@@ -1489,6 +1535,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 128000,
},
@@ -1501,6 +1548,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 128000,
},
@@ -1549,6 +1597,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 128000,
},
@@ -1561,6 +1610,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 128000,
},
@@ -1585,6 +1635,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 256000,
},
@@ -1597,6 +1648,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 256000,
},
@@ -1609,6 +1661,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 256000,
},
@@ -1621,6 +1674,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 256000,
},
@@ -1645,6 +1699,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 256000,
},
@@ -1657,6 +1712,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 256000,
},
@@ -1710,6 +1766,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 8192 },
vision: true,
},
contextWindow: 200000,
},
@@ -1724,6 +1781,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 8192 },
vision: true,
},
contextWindow: 200000,
},
@@ -1738,6 +1796,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 8192 },
vision: true,
},
contextWindow: 200000,
},
@@ -1752,6 +1811,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true,
maxOutputTokens: { max: 64000, default: 8192 },
vision: true,
},
contextWindow: 200000,
},
@@ -1764,6 +1824,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 1000000,
},
@@ -1776,6 +1837,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 1000000,
},
@@ -1788,6 +1850,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 1000000,
},
@@ -1800,6 +1863,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 300000,
},
@@ -1812,6 +1876,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 300000,
},
@@ -1836,6 +1901,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 1000000,
},
@@ -1848,6 +1914,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 3500000,
},
@@ -1872,6 +1939,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 128000,
},
@@ -1884,6 +1952,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 128000,
},
@@ -1956,6 +2025,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 128000,
},
@@ -1992,6 +2062,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 128000,
},
@@ -2016,6 +2087,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 128000,
},
@@ -2028,6 +2100,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 128000,
},
@@ -2040,6 +2113,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
},
capabilities: {
temperature: { min: 0, max: 1 },
vision: true,
},
contextWindow: 128000,
},
@@ -2211,6 +2285,32 @@ export function getMaxTemperature(modelId: string): number | undefined {
return capabilities?.temperature?.max
}
/**
* Checks if a model supports vision/multimodal inputs (images, audio, video, PDFs)
*/
export function supportsVision(modelId: string): boolean {
const capabilities = getModelCapabilities(modelId)
return !!capabilities?.vision
}
/**
* Returns a list of all vision-capable models
*/
export function getVisionModels(): string[] {
const models: string[] = []
for (const provider of Object.values(PROVIDER_DEFINITIONS)) {
// Check if the provider has vision capability at the provider level
const providerHasVision = provider.capabilities?.vision
for (const model of provider.models) {
// Model has vision if either the model or provider has vision capability
if (model.capabilities.vision || providerHasVision) {
models.push(model.id)
}
}
}
return models
}
export function supportsToolUsageControl(providerId: string): boolean {
return getProvidersWithToolUsageControl().includes(providerId)
}

View File

@@ -115,8 +115,8 @@ export interface ProviderToolConfig {
* Media content for multimodal messages
*/
export interface MediaContent {
/** Mode: basic (file upload) or advanced (URL/base64 text input) */
mode: 'basic' | 'advanced'
/** Source type: how the data was provided */
sourceType: 'url' | 'base64' | 'file'
/** The URL or base64 data */
data: string
/** MIME type (e.g., 'image/png', 'application/pdf', 'audio/mp3') */

View File

@@ -23,9 +23,11 @@ import {
getReasoningEffortValuesForModel as getReasoningEffortValuesForModelFromDefinitions,
getThinkingLevelsForModel as getThinkingLevelsForModelFromDefinitions,
getVerbosityValuesForModel as getVerbosityValuesForModelFromDefinitions,
getVisionModels,
PROVIDER_DEFINITIONS,
supportsTemperature as supportsTemperatureFromDefinitions,
supportsToolUsageControl as supportsToolUsageControlFromDefinitions,
supportsVision,
updateOllamaModels as updateOllamaModelsInDefinitions,
} from '@/providers/models'
import type { ProviderId, ProviderToolConfig } from '@/providers/types'
@@ -1152,3 +1154,6 @@ export function checkForForcedToolUsageOpenAI(
return { hasUsedForcedTool, usedForcedTools: updatedUsedForcedTools }
}
// Re-export vision capability functions
export { supportsVision, getVisionModels }