mirror of
https://github.com/simstudioai/sim.git
synced 2026-01-09 15:07:55 -05:00
137 lines
3.3 KiB
TypeScript
137 lines
3.3 KiB
TypeScript
import { ToolConfig, ToolResponse } from '../types'
|
|
|
|
export interface VisionParams {
|
|
apiKey: string
|
|
imageUrl: string
|
|
model?: string
|
|
prompt?: string
|
|
}
|
|
|
|
export interface VisionResponse extends ToolResponse {
|
|
output: {
|
|
content: string
|
|
model?: string
|
|
tokens?: number
|
|
}
|
|
}
|
|
|
|
export const visionTool: ToolConfig<VisionParams, VisionResponse> = {
|
|
id: 'crewai_vision',
|
|
name: 'Vision Analysis',
|
|
description: 'Analyze images using vision models',
|
|
version: '1.0.0',
|
|
|
|
params: {
|
|
apiKey: {
|
|
type: 'string',
|
|
required: true,
|
|
requiredForToolCall: true,
|
|
description: 'API key for the selected model provider'
|
|
},
|
|
imageUrl: {
|
|
type: 'string',
|
|
required: true,
|
|
description: 'Publicly accessible image URL'
|
|
},
|
|
model: {
|
|
type: 'string',
|
|
required: false,
|
|
description: 'Vision model to use (gpt-4o, claude-3-opus-20240229, etc)'
|
|
},
|
|
prompt: {
|
|
type: 'string',
|
|
required: false,
|
|
description: 'Custom prompt for image analysis'
|
|
}
|
|
},
|
|
|
|
request: {
|
|
method: 'POST',
|
|
url: (params) => {
|
|
if (params.model?.startsWith('claude-3')) {
|
|
return 'https://api.anthropic.com/v1/messages'
|
|
}
|
|
return 'https://api.openai.com/v1/chat/completions'
|
|
},
|
|
headers: (params) => {
|
|
const headers = {
|
|
'Content-Type': 'application/json',
|
|
}
|
|
|
|
return params.model?.startsWith('claude-3')
|
|
? {
|
|
...headers,
|
|
'x-api-key': params.apiKey,
|
|
'anthropic-version': '2023-06-01'
|
|
}
|
|
: {
|
|
...headers,
|
|
'Authorization': `Bearer ${params.apiKey}`
|
|
}
|
|
},
|
|
body: (params) => {
|
|
const defaultPrompt = "Please analyze this image and describe what you see in detail."
|
|
const prompt = params.prompt || defaultPrompt
|
|
|
|
if (params.model?.startsWith('claude-3')) {
|
|
return {
|
|
model: params.model,
|
|
messages: [{
|
|
role: "user",
|
|
content: [
|
|
{ type: "text", text: prompt },
|
|
{ type: "image", source: { type: "url", url: params.imageUrl } }
|
|
]
|
|
}]
|
|
}
|
|
}
|
|
|
|
return {
|
|
model: 'gpt-4o',
|
|
messages: [{
|
|
role: "user",
|
|
content: [
|
|
{ type: "text", text: prompt },
|
|
{
|
|
type: "image_url",
|
|
image_url: {
|
|
url: params.imageUrl
|
|
}
|
|
}
|
|
]
|
|
}],
|
|
max_tokens: 1000
|
|
}
|
|
}
|
|
},
|
|
|
|
transformResponse: async (response: Response) => {
|
|
const data = await response.json()
|
|
|
|
if (data.error) {
|
|
throw new Error(data.error.message || 'Unknown error occurred')
|
|
}
|
|
|
|
const result = data.content?.[0]?.text || data.choices?.[0]?.message?.content
|
|
if (!result) {
|
|
throw new Error('No output content in response')
|
|
}
|
|
|
|
return {
|
|
success: true,
|
|
output: {
|
|
content: result,
|
|
model: data.model,
|
|
tokens: data.content
|
|
? (data.usage?.input_tokens + data.usage?.output_tokens)
|
|
: data.usage?.total_tokens
|
|
}
|
|
}
|
|
},
|
|
|
|
transformError: (error) => {
|
|
const message = error.error?.message || error.message
|
|
const code = error.error?.type || error.code
|
|
return `${message} (${code})`
|
|
}
|
|
}
|