Made CrewAI vision block functional

2026-01-08 22:48:14 -05:00 · 2025-01-29 14:40:01 -08:00
parent 8058e36733
commit c5545397a9
3 changed files with 120 additions and 52 deletions
--- a/blocks/blocks/crewai.ts
+++ b/blocks/blocks/crewai.ts
@@ -4,8 +4,8 @@ import { CrewAIIcon } from '@/components/icons'
 export const CrewAIVisionBlock: BlockConfig = {
  type: 'crewaivision',
  toolbar: {
-    title: 'CrewAI Vision',
-    description: 'Analyze images with CrewAI Vision API',
+    title: 'CrewAI Vision Tool',
+    description: 'Analyze images using vision models',
    bgColor: '#C0392B',
    icon: CrewAIIcon,
    category: 'advanced'
@@ -16,9 +16,9 @@ export const CrewAIVisionBlock: BlockConfig = {
  workflow: {
    inputs: {
      apiKey: { type: 'string', required: true },
-      imageUrl: { type: 'string', required: false },
-      base64Image: { type: 'string', required: false },
-      model: { type: 'string', required: false }
+      imageUrl: { type: 'string', required: true },
+      model: { type: 'string', required: false },
+      prompt: { type: 'string', required: false }
    },
    outputs: {
      response: 'any'
@@ -29,7 +29,7 @@ export const CrewAIVisionBlock: BlockConfig = {
        title: 'API Key',
        type: 'short-input',
        layout: 'full',
-        placeholder: 'Enter your CrewAI API key',
+        placeholder: 'Enter your API key',
        password: true
      },
      {
@@ -37,21 +37,25 @@ export const CrewAIVisionBlock: BlockConfig = {
        title: 'Image URL',
        type: 'short-input',
        layout: 'full',
-        placeholder: 'Enter image URL'
-      },
-      {
-        id: 'base64Image',
-        title: 'Base64 Image',
-        type: 'code',
-        layout: 'full',
-        placeholder: 'Paste base64-encoded data'
+        placeholder: 'Enter publicly accessible image URL'
      },
      {
        id: 'model',
        title: 'Vision Model',
        type: 'dropdown',
        layout: 'half',
-        options: ['vision-latest', 'vision-beta']
+        options: [
+          'gpt-4o',
+          'claude-3-opus-20240229',
+          'claude-3-sonnet-20240229'
+        ]
+      },
+      {
+        id: 'prompt',
+        title: 'Custom Prompt',
+        type: 'long-input',
+        layout: 'full',
+        placeholder: 'Enter custom prompt for image analysis (optional)'
      }
    ]
  }
--- a/executor/index.ts
+++ b/executor/index.ts
@@ -90,32 +90,39 @@ export class Executor {
  private resolveInputs(block: SerializedBlock, context: ExecutionContext): Record<string, any> {
    const inputs = { ...block.config.params }
    const blockNameMap = new Map(
-      this.workflow.blocks
-        .map(b => {
-          const name = b.metadata?.title?.toLowerCase().replace(' ', '') || ''
-          return name ? [name, b.id] as [string, string] : null
-        })
-        .filter((entry): entry is [string, string] => entry !== null)
-    )
+      this.workflow.blocks.map(b => {
+        const title = b.metadata?.title || '';
+        const normalizedName = title.toLowerCase().replace(/\s+/g, '');
+        return [normalizedName, b.id];
+      })
+    );

    const blockStateMap = new Map(
      Object.entries(this.initialBlockStates)
        .filter(([_, state]) => state !== undefined)
    )

-    const connectionPattern = /<([a-z0-9]+)\.(string|number|boolean|res|any)>/g
+    const connectionPattern = /<([^>]+)\.(string|number|boolean|res|any)>/g
    
    return Object.entries(block.config.params || {}).reduce((acc, [key, value]) => {
      if (typeof value === 'string') {
        let resolvedValue = value
        Array.from(value.matchAll(connectionPattern)).forEach(match => {
          const [fullMatch, blockName, type] = match
-          const blockId = blockNameMap.get(blockName) || blockName
+          
+          // Try both the original format and normalized format
+          const normalizedBlockName = blockName.toLowerCase().replace(/\s+/g, '');
+          const blockId = blockNameMap.get(normalizedBlockName);
+          
+          if (!blockId) {
+            return;
+          }
+          
          const sourceOutput = context.blockStates.get(blockId) || blockStateMap.get(blockId)
          
          if (sourceOutput) {
            const replacementValue = type === 'res' 
-              ? (sourceOutput.response?.method || sourceOutput.response || sourceOutput)
+              ? sourceOutput.response
              : (sourceOutput.output || sourceOutput.response)
            
            if (replacementValue !== undefined) {
--- a/tools/crewai/vision.ts
+++ b/tools/crewai/vision.ts
@@ -1,69 +1,126 @@
 import { ToolConfig, ToolResponse } from '../types'

-interface CrewAIVisionParams {
+interface VisionParams {
  apiKey: string
-  imageUrl?: string
-  base64Image?: string
+  imageUrl: string
  model?: string
+  prompt?: string
 }

-interface CrewAIVisionResponse extends ToolResponse {
+interface VisionResponse extends ToolResponse {
+  response: string
  tokens?: number
  model?: string
 }

-export const visionTool: ToolConfig<CrewAIVisionParams, CrewAIVisionResponse> = {
+export const visionTool: ToolConfig<VisionParams, VisionResponse> = {
  id: 'crewai.vision',
-  name: 'CrewAI Vision',
-  description: 'Analyze images using CrewAI\'s Vision model',
+  name: 'Vision Analysis',
+  description: 'Analyze images using vision models',
  version: '1.0.0',

  params: {
    apiKey: {
      type: 'string',
      required: true,
-      description: 'Your CrewAI API key'
+      description: 'API key for the selected model provider'
    },
    imageUrl: {
      type: 'string',
-      required: false,
+      required: true,
      description: 'Publicly accessible image URL'
    },
-    base64Image: {
-      type: 'string',
-      required: false,
-      description: 'Base64-encoded image data'
-    },
    model: {
      type: 'string',
      required: false,
-      default: 'vision-latest',
-      description: 'Model to use for image analysis'
+      description: 'Vision model to use (gpt-4o, claude-3-opus-20240229, etc)'
+    },
+    prompt: {
+      type: 'string',
+      required: false,
+      description: 'Custom prompt for image analysis'
    }
  },

  request: {
-    url: 'https://api.crewai.com/v1/vision/analyze',
    method: 'POST',
-    headers: (params) => ({
-      'Content-Type': 'application/json',
-      'Authorization': `Bearer ${params.apiKey}`
-    }),
+    url: (params) => {
+      if (params.model?.startsWith('claude-3')) {
+        return 'https://api.anthropic.com/v1/messages'
+      }
+      return 'https://api.openai.com/v1/chat/completions'
+    },
+    headers: (params) => {
+      const headers = {
+        'Content-Type': 'application/json',
+      }
+
+      return params.model?.startsWith('claude-3')
+        ? {
+            ...headers,
+            'x-api-key': params.apiKey,
+            'anthropic-version': '2023-06-01'
+          }
+        : {
+            ...headers,
+            'Authorization': `Bearer ${params.apiKey}`
+          }
+    },
    body: (params) => {
+      const defaultPrompt = "Please analyze this image and describe what you see in detail."
+      const prompt = params.prompt || defaultPrompt
+
+      if (params.model?.startsWith('claude-3')) {
+        return {
+          model: params.model,
+          messages: [{
+            role: "user",
+            content: [
+              { type: "text", text: prompt },
+              { type: "image", source: { type: "url", url: params.imageUrl } }
+            ]
+          }]
+        }
+      }
+
      return {
-        model: params.model,
-        imageUrl: params.imageUrl,
-        base64: params.base64Image
+        model: 'gpt-4o',
+        messages: [{
+          role: "user",
+          content: [
+            { type: "text", text: prompt },
+            { 
+              type: "image_url", 
+              image_url: {
+                url: params.imageUrl
+              }
+            }
+          ]
+        }],
+        max_tokens: 1000
      }
    }
  },

  transformResponse: async (response: Response) => {
    const data = await response.json()
+    
+    if (data.error) {
+      throw new Error(data.error.message || 'Unknown error occurred')
+    }
+
+    const result = data.content?.[0]?.text || data.choices?.[0]?.message?.content
+    if (!result) {
+      throw new Error('No output content in response')
+    }
+
    return {
-      output: data.result,
-      tokens: data.usage?.total_tokens,
-      model: data.model
+      output: result,
+      response: result,
+      model: data.model,
+      tokens: data.content 
+        ? (data.usage?.input_tokens + data.usage?.output_tokens)
+        : data.usage?.total_tokens
    }
  },