Made CrewAI vision block functional

This commit is contained in:
Waleed Latif
2025-01-29 14:40:01 -08:00
parent 8058e36733
commit c5545397a9
3 changed files with 120 additions and 52 deletions

View File

@@ -4,8 +4,8 @@ import { CrewAIIcon } from '@/components/icons'
export const CrewAIVisionBlock: BlockConfig = {
type: 'crewaivision',
toolbar: {
title: 'CrewAI Vision',
description: 'Analyze images with CrewAI Vision API',
title: 'CrewAI Vision Tool',
description: 'Analyze images using vision models',
bgColor: '#C0392B',
icon: CrewAIIcon,
category: 'advanced'
@@ -16,9 +16,9 @@ export const CrewAIVisionBlock: BlockConfig = {
workflow: {
inputs: {
apiKey: { type: 'string', required: true },
imageUrl: { type: 'string', required: false },
base64Image: { type: 'string', required: false },
model: { type: 'string', required: false }
imageUrl: { type: 'string', required: true },
model: { type: 'string', required: false },
prompt: { type: 'string', required: false }
},
outputs: {
response: 'any'
@@ -29,7 +29,7 @@ export const CrewAIVisionBlock: BlockConfig = {
title: 'API Key',
type: 'short-input',
layout: 'full',
placeholder: 'Enter your CrewAI API key',
placeholder: 'Enter your API key',
password: true
},
{
@@ -37,21 +37,25 @@ export const CrewAIVisionBlock: BlockConfig = {
title: 'Image URL',
type: 'short-input',
layout: 'full',
placeholder: 'Enter image URL'
},
{
id: 'base64Image',
title: 'Base64 Image',
type: 'code',
layout: 'full',
placeholder: 'Paste base64-encoded data'
placeholder: 'Enter publicly accessible image URL'
},
{
id: 'model',
title: 'Vision Model',
type: 'dropdown',
layout: 'half',
options: ['vision-latest', 'vision-beta']
options: [
'gpt-4o',
'claude-3-opus-20240229',
'claude-3-sonnet-20240229'
]
},
{
id: 'prompt',
title: 'Custom Prompt',
type: 'long-input',
layout: 'full',
placeholder: 'Enter custom prompt for image analysis (optional)'
}
]
}

View File

@@ -90,32 +90,39 @@ export class Executor {
private resolveInputs(block: SerializedBlock, context: ExecutionContext): Record<string, any> {
const inputs = { ...block.config.params }
const blockNameMap = new Map(
this.workflow.blocks
.map(b => {
const name = b.metadata?.title?.toLowerCase().replace(' ', '') || ''
return name ? [name, b.id] as [string, string] : null
})
.filter((entry): entry is [string, string] => entry !== null)
)
this.workflow.blocks.map(b => {
const title = b.metadata?.title || '';
const normalizedName = title.toLowerCase().replace(/\s+/g, '');
return [normalizedName, b.id];
})
);
const blockStateMap = new Map(
Object.entries(this.initialBlockStates)
.filter(([_, state]) => state !== undefined)
)
const connectionPattern = /<([a-z0-9]+)\.(string|number|boolean|res|any)>/g
const connectionPattern = /<([^>]+)\.(string|number|boolean|res|any)>/g
return Object.entries(block.config.params || {}).reduce((acc, [key, value]) => {
if (typeof value === 'string') {
let resolvedValue = value
Array.from(value.matchAll(connectionPattern)).forEach(match => {
const [fullMatch, blockName, type] = match
const blockId = blockNameMap.get(blockName) || blockName
// Try both the original format and normalized format
const normalizedBlockName = blockName.toLowerCase().replace(/\s+/g, '');
const blockId = blockNameMap.get(normalizedBlockName);
if (!blockId) {
return;
}
const sourceOutput = context.blockStates.get(blockId) || blockStateMap.get(blockId)
if (sourceOutput) {
const replacementValue = type === 'res'
? (sourceOutput.response?.method || sourceOutput.response || sourceOutput)
? sourceOutput.response
: (sourceOutput.output || sourceOutput.response)
if (replacementValue !== undefined) {

View File

@@ -1,69 +1,126 @@
import { ToolConfig, ToolResponse } from '../types'
interface CrewAIVisionParams {
interface VisionParams {
apiKey: string
imageUrl?: string
base64Image?: string
imageUrl: string
model?: string
prompt?: string
}
interface CrewAIVisionResponse extends ToolResponse {
interface VisionResponse extends ToolResponse {
response: string
tokens?: number
model?: string
}
export const visionTool: ToolConfig<CrewAIVisionParams, CrewAIVisionResponse> = {
export const visionTool: ToolConfig<VisionParams, VisionResponse> = {
id: 'crewai.vision',
name: 'CrewAI Vision',
description: 'Analyze images using CrewAI\'s Vision model',
name: 'Vision Analysis',
description: 'Analyze images using vision models',
version: '1.0.0',
params: {
apiKey: {
type: 'string',
required: true,
description: 'Your CrewAI API key'
description: 'API key for the selected model provider'
},
imageUrl: {
type: 'string',
required: false,
required: true,
description: 'Publicly accessible image URL'
},
base64Image: {
type: 'string',
required: false,
description: 'Base64-encoded image data'
},
model: {
type: 'string',
required: false,
default: 'vision-latest',
description: 'Model to use for image analysis'
description: 'Vision model to use (gpt-4o, claude-3-opus-20240229, etc)'
},
prompt: {
type: 'string',
required: false,
description: 'Custom prompt for image analysis'
}
},
request: {
url: 'https://api.crewai.com/v1/vision/analyze',
method: 'POST',
headers: (params) => ({
'Content-Type': 'application/json',
'Authorization': `Bearer ${params.apiKey}`
}),
url: (params) => {
if (params.model?.startsWith('claude-3')) {
return 'https://api.anthropic.com/v1/messages'
}
return 'https://api.openai.com/v1/chat/completions'
},
headers: (params) => {
const headers = {
'Content-Type': 'application/json',
}
return params.model?.startsWith('claude-3')
? {
...headers,
'x-api-key': params.apiKey,
'anthropic-version': '2023-06-01'
}
: {
...headers,
'Authorization': `Bearer ${params.apiKey}`
}
},
body: (params) => {
const defaultPrompt = "Please analyze this image and describe what you see in detail."
const prompt = params.prompt || defaultPrompt
if (params.model?.startsWith('claude-3')) {
return {
model: params.model,
messages: [{
role: "user",
content: [
{ type: "text", text: prompt },
{ type: "image", source: { type: "url", url: params.imageUrl } }
]
}]
}
}
return {
model: params.model,
imageUrl: params.imageUrl,
base64: params.base64Image
model: 'gpt-4o',
messages: [{
role: "user",
content: [
{ type: "text", text: prompt },
{
type: "image_url",
image_url: {
url: params.imageUrl
}
}
]
}],
max_tokens: 1000
}
}
},
transformResponse: async (response: Response) => {
const data = await response.json()
if (data.error) {
throw new Error(data.error.message || 'Unknown error occurred')
}
const result = data.content?.[0]?.text || data.choices?.[0]?.message?.content
if (!result) {
throw new Error('No output content in response')
}
return {
output: data.result,
tokens: data.usage?.total_tokens,
model: data.model
output: result,
response: result,
model: data.model,
tokens: data.content
? (data.usage?.input_tokens + data.usage?.output_tokens)
: data.usage?.total_tokens
}
},