From 0015dc93de2b41a9694a4fa7028b31aa39e0fe94 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Wed, 21 May 2025 15:12:51 -0700 Subject: [PATCH] feat(image-gen): added gpt-image-1 and safe storage for base64 data (#396) * added gpt-image-1 and safe storage for base64 data * acknowledged PR comments * updated README * update CONTRIBUTING.md --- .github/CONTRIBUTING.md | 118 +++++----- README.md | 4 +- .../content/docs/tools/image_generator.mdx | 29 +-- apps/sim/app/api/proxy/image/route.ts | 106 +++++---- apps/sim/app/api/proxy/route.ts | 1 - apps/sim/blocks/blocks/image_generator.ts | 85 +++++-- apps/sim/stores/panel/console/store.ts | 76 ++++++- apps/sim/tools/openai/dalle.ts | 193 ---------------- apps/sim/tools/openai/image.ts | 213 +++++++++--------- apps/sim/tools/openai/index.ts | 4 +- apps/sim/tools/openai/types.ts | 8 + apps/sim/tools/registry.ts | 4 +- 12 files changed, 404 insertions(+), 437 deletions(-) delete mode 100644 apps/sim/tools/openai/dalle.ts diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 606e08886..d3c78b77f 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -130,54 +130,69 @@ Using clear and consistent commit messages makes it easier for everyone to under To set up your local development environment: -### Option 1: Using Docker (Recommended) +### Option 1: Using NPM Package (Simplest) -Docker provides a consistent development environment with all dependencies pre-configured. +The easiest way to run Sim Studio locally is using our NPM package: -1. **Clone the Repository:** +```bash +npx simstudio +``` - ```bash - git clone https://github.com//sim.git - cd sim - ``` +After running this command, open [http://localhost:3000/](http://localhost:3000/) in your browser. -2. **Start the Docker Environment:** +#### Options - ```bash - docker compose up -d - ``` +- `-p, --port `: Specify the port to run Sim Studio on (default: 3000) +- `--no-pull`: Skip pulling the latest Docker images - Or use the convenience script which handles environment setup and migrations: +#### Requirements - ```bash - chmod +x scripts/start_simstudio_docker.sh - ./scripts/start_simstudio_docker.sh - ``` +- Docker must be installed and running on your machine - This will: +### Option 2: Using Docker Compose - - Start a PostgreSQL database container - - Build and run the Next.js application with hot-reloading - - Set up all necessary environment variables - - Apply database migrations automatically +```bash +# Clone the repository +git clone https://github.com//sim.git +cd sim -3. **View Logs:** +# Start Sim Studio +docker compose -f docker-compose.prod.yml up -d +``` - ```bash - docker compose logs -f simstudio - ``` +Access the application at [http://localhost:3000/](http://localhost:3000/) -4. **Make Your Changes:** - - Edit files in your local directory - - Changes will be automatically reflected thanks to hot-reloading +#### Using Local Models -### Option 2: Using VS Code / Cursor Dev Containers +To use local models with Sim Studio: + +1. Pull models using our helper script: + +```bash +./apps/sim/scripts/ollama_docker.sh pull +``` + +2. Start Sim Studio with local model support: + +```bash +# With NVIDIA GPU support +docker compose --profile local-gpu -f docker-compose.ollama.yml up -d + +# Without GPU (CPU only) +docker compose --profile local-cpu -f docker-compose.ollama.yml up -d + +# If hosting on a server, update the environment variables in the docker-compose.prod.yml file +# to include the server's public IP then start again (OLLAMA_URL to i.e. http://1.1.1.1:11434) +docker compose -f docker-compose.prod.yml up -d +``` + +### Option 3: Using VS Code / Cursor Dev Containers Dev Containers provide a consistent and easy-to-use development environment: 1. **Prerequisites:** - - Visual Studio Code + - Visual Studio Code or Cursor - Docker Desktop - [Remote - Containers](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) extension for VS Code @@ -188,58 +203,53 @@ Dev Containers provide a consistent and easy-to-use development environment: git clone https://github.com//sim.git cd sim ``` - - Open the project in VS Code + - Open the project in VS Code/Cursor - When prompted, click "Reopen in Container" (or press F1 and select "Remote-Containers: Reopen in Container") - Wait for the container to build and initialize - - The development environment will be set up in the `sim/` directory 3. **Start Developing:** + - Run `bun run dev` in the terminal or use the `sim-start` alias - All dependencies and configurations are automatically set up - - Use the provided aliases (like `sim-start`) to run common commands - Your changes will be automatically hot-reloaded 4. **GitHub Codespaces:** - This setup also works with GitHub Codespaces if you prefer development in the browser - Just click "Code" → "Codespaces" → "Create codespace on main" -### Option 3: Manual Setup +### Option 4: Manual Setup If you prefer not to use Docker or Dev Containers: 1. **Clone the Repository:** ```bash git clone https://github.com//sim.git - cd sim/sim + cd sim + bun install ``` -2. **Install Dependencies:** - - Using Bun: +2. **Set Up Environment:** + + - Navigate to the app directory: ```bash - bun install + cd apps/sim ``` - -3. **Set Up Environment:** - - Copy `.env.example` to `.env` - - Configure database connection and other required authentication variables + - Configure required variables (DATABASE_URL, BETTER_AUTH_SECRET, BETTER_AUTH_URL) -4. **Set Up Database:** +3. **Set Up Database:** - - You need a PostgreSQL instance running - - Run migrations: - ```bash - bun run db:push - ``` + ```bash + bunx drizzle-kit push + ``` -5. **Run the Development Server:** +4. **Run the Development Server:** - - With Bun: - ```bash - bun run dev - ``` + ```bash + bun run dev + ``` -6. **Make Your Changes and Test Locally.** +5. **Make Your Changes and Test Locally.** ### Email Template Development diff --git a/README.md b/README.md index 0ae792934..f9b1ad131 100644 --- a/README.md +++ b/README.md @@ -73,10 +73,10 @@ To use local models with Sim Studio: ```bash # With NVIDIA GPU support -docker compose -f docker-compose.ollama.yml up --profile local-gpu -d --build +docker compose --profile local-gpu -f docker-compose.ollama.yml up -d # Without GPU (CPU only) -docker compose -f docker-compose.ollama.yml up --profile local-cpu -d --build +docker compose --profile local-cpu -f docker-compose.ollama.yml up -d # If hosting on a server, update the environment variables in the docker-compose.prod.yml file to include the server's public IP then start again (OLLAMA_URL to i.e. http://1.1.1.1:11434) docker compose -f docker-compose.prod.yml up -d diff --git a/apps/docs/content/docs/tools/image_generator.mdx b/apps/docs/content/docs/tools/image_generator.mdx index 1acad482a..bd6ed778a 100644 --- a/apps/docs/content/docs/tools/image_generator.mdx +++ b/apps/docs/content/docs/tools/image_generator.mdx @@ -45,25 +45,26 @@ In Sim Studio, the DALL-E integration enables your agents to generate images pro ## Usage Instructions -Create high-quality images using DALL-E. Configure resolution, quality, style, and other parameters to get exactly the image you need. +Create high-quality images using OpenAI ## Tools -### `openai_dalle` +### `openai_image` Generate images using OpenAI #### Input -| Parameter | Type | Required | Description | -| --------- | ------ | -------- | ----------------------------------------------------------------------- | -| `prompt` | string | Yes | A text description of the desired image\(s\) | -| `model` | string | Yes | The DALL-E model to use \(dall-e-2 or dall-e-3\) | -| `size` | string | No | The size of the generated images \(1024x1024, 1024x1792, or 1792x1024\) | -| `quality` | string | No | The quality of the image \(standard or hd\) | -| `style` | string | No | The style of the image \(vivid or natural\) | -| `n` | number | No | The number of images to generate \(1-10\) | -| `apiKey` | string | Yes | Your OpenAI API key | +| Parameter | Type | Required | Description | +| ------------ | ------ | -------- | ----------------------------------------------------------------------- | +| `prompt` | string | Yes | A text description of the desired image | +| `model` | string | Yes | The model to use \(gpt-image-1 or dall-e-3\) | +| `size` | string | Yes | The size of the generated images \(1024x1024, 1024x1792, or 1792x1024\) | +| `quality` | string | No | The quality of the image \(standard or hd\) | +| `style` | string | No | The style of the image \(vivid or natural\) | +| `background` | string | No | The background color, only for gpt-image-1 | +| `n` | number | No | The number of images to generate \(1-10\) | +| `apiKey` | string | Yes | Your OpenAI API key | #### Output @@ -77,9 +78,9 @@ Generate images using OpenAI ### Input -| Parameter | Type | Required | Description | -| ---------- | ------ | -------- | ----------- | -| `provider` | string | Yes | Provider | +| Parameter | Type | Required | Description | +| --------- | ------ | -------- | ----------- | +| `prompt` | string | Yes | | ### Outputs diff --git a/apps/sim/app/api/proxy/image/route.ts b/apps/sim/app/api/proxy/image/route.ts index 8342ae563..2017be225 100644 --- a/apps/sim/app/api/proxy/image/route.ts +++ b/apps/sim/app/api/proxy/image/route.ts @@ -1,65 +1,87 @@ -import { NextResponse } from 'next/server' +import { NextRequest, NextResponse } from 'next/server' import { createLogger } from '@/lib/logs/console-logger' -const logger = createLogger('ProxyImageAPI') +const logger = createLogger('ImageProxyAPI') + +/** + * Proxy for fetching images + * This allows client-side requests to fetch images from various sources while avoiding CORS issues + */ +export async function GET(request: NextRequest) { + const url = new URL(request.url) + const imageUrl = url.searchParams.get('url') + const requestId = crypto.randomUUID().slice(0, 8) + + if (!imageUrl) { + logger.error(`[${requestId}] Missing 'url' parameter`) + return new NextResponse('Missing URL parameter', { status: 400 }) + } + + logger.info(`[${requestId}] Proxying image request for: ${imageUrl}`) -export async function GET(request: Request) { try { - const { searchParams } = new URL(request.url) - const imageUrl = searchParams.get('url') - - if (!imageUrl) { - logger.error('Missing URL parameter in proxy image request') - return new NextResponse('Missing URL parameter', { status: 400 }) - } - - logger.info('Proxying image from:', imageUrl) - - // Add appropriate headers for fetching images - const response = await fetch(imageUrl, { + // Use fetch with custom headers that appear more browser-like + const imageResponse = await fetch(imageUrl, { headers: { - Accept: 'image/*, */*', - 'User-Agent': 'Mozilla/5.0 (compatible; ImageProxyBot/1.0)', + 'User-Agent': + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', + Accept: 'image/webp,image/avif,image/apng,image/svg+xml,image/*,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.9', + 'Accept-Encoding': 'gzip, deflate, br', + Referer: 'https://simstudio.ai/', + 'Sec-Fetch-Dest': 'image', + 'Sec-Fetch-Mode': 'no-cors', + 'Sec-Fetch-Site': 'cross-site', }, - // Set a reasonable timeout - signal: AbortSignal.timeout(15000), }) - if (!response.ok) { - console.error(`Failed to fetch image from ${imageUrl}:`, response.status, response.statusText) - return new NextResponse(`Failed to fetch image: ${response.status} ${response.statusText}`, { - status: response.status, + if (!imageResponse.ok) { + logger.error(`[${requestId}] Image fetch failed:`, { + status: imageResponse.status, + statusText: imageResponse.statusText, + }) + return new NextResponse(`Failed to fetch image: ${imageResponse.statusText}`, { + status: imageResponse.status, }) } - const contentType = response.headers.get('content-type') - console.log('Image content-type:', contentType) + // Get image content type from response headers + const contentType = imageResponse.headers.get('content-type') || 'image/jpeg' - const blob = await response.blob() - console.log('Image size:', blob.size, 'bytes') + // Get the image as a blob + const imageBlob = await imageResponse.blob() - if (blob.size === 0) { - console.error('Empty image received from source URL') - return new NextResponse('Empty image received from source', { status: 422 }) + if (imageBlob.size === 0) { + logger.error(`[${requestId}] Empty image blob received`) + return new NextResponse('Empty image received', { status: 404 }) } // Return the image with appropriate headers - return new NextResponse(blob, { + return new NextResponse(imageBlob, { headers: { - 'Content-Type': contentType || 'image/png', - 'Cache-Control': 'public, max-age=31536000', // Cache for a year - 'Access-Control-Allow-Origin': '*', // CORS support - 'X-Content-Type-Options': 'nosniff', + 'Content-Type': contentType, + 'Access-Control-Allow-Origin': '*', + 'Cache-Control': 'public, max-age=86400', // Cache for 24 hours }, }) } catch (error) { - // Log the full error for debugging - console.error('Error proxying image:', error) + const errorMessage = error instanceof Error ? error.message : String(error) + logger.error(`[${requestId}] Image proxy error:`, { error: errorMessage }) - // Return a helpful error response - return new NextResponse( - `Internal Server Error: ${error instanceof Error ? error.message : 'Unknown error'}`, - { status: 500 } - ) + return new NextResponse(`Failed to proxy image: ${errorMessage}`, { + status: 500, + }) } } + +export async function OPTIONS() { + return new NextResponse(null, { + status: 204, + headers: { + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Methods': 'GET, OPTIONS', + 'Access-Control-Allow-Headers': 'Content-Type, Authorization', + 'Access-Control-Max-Age': '86400', + }, + }) +} diff --git a/apps/sim/app/api/proxy/route.ts b/apps/sim/app/api/proxy/route.ts index ec67bbc0e..194e9aac6 100644 --- a/apps/sim/app/api/proxy/route.ts +++ b/apps/sim/app/api/proxy/route.ts @@ -280,7 +280,6 @@ export async function POST(request: Request) { } } -// Add OPTIONS handler for CORS preflight requests export async function OPTIONS() { return new NextResponse(null, { status: 204, diff --git a/apps/sim/blocks/blocks/image_generator.ts b/apps/sim/blocks/blocks/image_generator.ts index 750706f9f..08935d40c 100644 --- a/apps/sim/blocks/blocks/image_generator.ts +++ b/apps/sim/blocks/blocks/image_generator.ts @@ -7,19 +7,22 @@ export const ImageGeneratorBlock: BlockConfig = { name: 'Image Generator', description: 'Generate images', longDescription: - 'Create high-quality images using DALL-E. Configure resolution, quality, style, and other parameters to get exactly the image you need.', + "Create high-quality images using OpenAI's image generation models. Configure resolution, quality, style, and other parameters to get exactly the image you need.", docsLink: 'https://docs.simstudio.ai/tools/image_generator', category: 'tools', bgColor: '#4D5FFF', icon: ImageIcon, subBlocks: [ { - id: 'provider', - title: 'Provider', + id: 'model', + title: 'Model', type: 'dropdown', - layout: 'full', - options: [{ label: 'DALL-E', id: 'dalle' }], - value: () => 'dalle', + layout: 'half', + options: [ + { label: 'DALL-E 3', id: 'dall-e-3' }, + { label: 'GPT Image', id: 'gpt-image-1' }, + ], + value: () => 'dall-e-3', }, { id: 'prompt', @@ -28,14 +31,6 @@ export const ImageGeneratorBlock: BlockConfig = { layout: 'full', placeholder: 'Describe the image you want to generate...', }, - { - id: 'model', - title: 'Model', - type: 'dropdown', - layout: 'half', - options: [{ label: 'DALL-E 3', id: 'dall-e-3' }], - value: () => 'dall-e-3', - }, { id: 'size', title: 'Size', @@ -47,6 +42,21 @@ export const ImageGeneratorBlock: BlockConfig = { { label: '1792x1024', id: '1792x1024' }, ], value: () => '1024x1024', + condition: { field: 'model', value: 'dall-e-3' }, + }, + { + id: 'size', + title: 'Size', + type: 'dropdown', + layout: 'half', + options: [ + { label: 'Auto', id: 'auto' }, + { label: '1024x1024', id: '1024x1024' }, + { label: '1536x1024', id: '1536x1024' }, + { label: '1024x1536', id: '1024x1536' }, + ], + value: () => 'auto', + condition: { field: 'model', value: 'gpt-image-1' }, }, { id: 'quality', @@ -58,6 +68,7 @@ export const ImageGeneratorBlock: BlockConfig = { { label: 'HD', id: 'hd' }, ], value: () => 'standard', + condition: { field: 'model', value: 'dall-e-3' }, }, { id: 'style', @@ -69,6 +80,20 @@ export const ImageGeneratorBlock: BlockConfig = { { label: 'Natural', id: 'natural' }, ], value: () => 'vivid', + condition: { field: 'model', value: 'dall-e-3' }, + }, + { + id: 'background', + title: 'Background', + type: 'dropdown', + layout: 'half', + options: [ + { label: 'Auto', id: 'auto' }, + { label: 'Transparent', id: 'transparent' }, + { label: 'Opaque', id: 'opaque' }, + ], + value: () => 'auto', + condition: { field: 'model', value: 'gpt-image-1' }, }, { id: 'apiKey', @@ -81,9 +106,9 @@ export const ImageGeneratorBlock: BlockConfig = { }, ], tools: { - access: ['openai_dalle'], + access: ['openai_image'], config: { - tool: () => 'openai_dalle', + tool: () => 'openai_image', params: (params) => { if (!params.apiKey) { throw new Error('API key is required') @@ -92,32 +117,46 @@ export const ImageGeneratorBlock: BlockConfig = { throw new Error('Prompt is required') } - return { + // Base parameters for all models + const baseParams = { prompt: params.prompt, model: params.model || 'dall-e-3', size: params.size || '1024x1024', - quality: params.quality || 'standard', - style: params.style || 'vivid', apiKey: params.apiKey, } + + if (params.model === 'dall-e-3') { + return { + ...baseParams, + quality: params.quality || 'standard', + style: params.style || 'vivid', + } + } else if (params.model === 'gpt-image-1') { + return { + ...baseParams, + ...(params.background && { background: params.background }), + } + } + + return baseParams }, }, }, inputs: { - provider: { type: 'string', required: true }, prompt: { type: 'string', required: true }, model: { type: 'string', required: true }, size: { type: 'string', required: false }, quality: { type: 'string', required: false }, style: { type: 'string', required: false }, + background: { type: 'string', required: false }, apiKey: { type: 'string', required: true }, }, outputs: { response: { type: { - content: 'string', // URL of the generated image - image: 'string', // Base64 image data - metadata: 'json', // Contains only model information + content: 'string', + image: 'string', + metadata: 'json', }, }, }, diff --git a/apps/sim/stores/panel/console/store.ts b/apps/sim/stores/panel/console/store.ts index edaa50038..55bd91ebb 100644 --- a/apps/sim/stores/panel/console/store.ts +++ b/apps/sim/stores/panel/console/store.ts @@ -4,8 +4,8 @@ import { redactApiKeys } from '@/lib/utils' import { useChatStore } from '../chat/store' import { ConsoleEntry, ConsoleStore } from './types' -// MAX across all workflows -const MAX_ENTRIES = 50 +const MAX_ENTRIES = 50 // MAX across all workflows +const MAX_IMAGE_DATA_SIZE = 1000 // Maximum size of image data to store (in characters) /** * Gets a nested property value from an object using a path string @@ -29,6 +29,67 @@ const getValueByPath = (obj: any, path: string): any => { return current } +/** + * Checks if a string is likely a base64 encoded image or large data blob + */ +const isLikelyBase64Data = (value: string): boolean => { + if (value.length < 100) return false + return value.startsWith('data:image') || /^[A-Za-z0-9+/=]{1000,}$/.test(value) +} + +/** + * Processes an object to handle large strings (like base64 image data) + * for localStorage to prevent quota issues + */ +const processSafeStorage = (obj: any): any => { + if (!obj) return obj + + if (typeof obj !== 'object') return obj + + if (Array.isArray(obj)) { + return obj.map((item) => processSafeStorage(item)) + } + + const result: any = {} + for (const [key, value] of Object.entries(obj)) { + if ( + (key === 'image' || key.includes('image')) && + typeof value === 'string' && + value.length > MAX_IMAGE_DATA_SIZE + ) { + if (value.startsWith('data:image')) { + const mimeEnd = value.indexOf(',') + result[key] = + mimeEnd > 0 + ? `${value.substring(0, mimeEnd + 1)}[Image data removed, original length: ${value.length}]` + : `[Image data removed, original length: ${value.length}]` + } else { + result[key] = `[Image data removed, original length: ${value.length}]` + } + } else if (typeof value === 'object' && value !== null) { + result[key] = processSafeStorage(value) + } else if ( + typeof value === 'string' && + value.length > MAX_IMAGE_DATA_SIZE && + isLikelyBase64Data(value) + ) { + if (value.startsWith('data:image')) { + const mimeEnd = value.indexOf(',') + result[key] = + mimeEnd > 0 + ? `${value.substring(0, mimeEnd + 1)}[Large data removed, original length: ${value.length}]` + : `[Large data removed, original length: ${value.length}]` + } else { + result[key] = `[Large data removed, original length: ${value.length}]` + } + } else { + result[key] = value + } + } + + return result +} + export const useConsoleStore = create()( devtools( persist( @@ -207,6 +268,17 @@ export const useConsoleStore = create()( }), { name: 'console-store', + partialize: (state) => { + const sanitizedEntries = state.entries.slice(0, MAX_ENTRIES).map((entry) => ({ + ...entry, + output: processSafeStorage(entry.output), + })) + + return { + isOpen: state.isOpen, + entries: sanitizedEntries, + } + }, } ) ) diff --git a/apps/sim/tools/openai/dalle.ts b/apps/sim/tools/openai/dalle.ts deleted file mode 100644 index 5d3d44f81..000000000 --- a/apps/sim/tools/openai/dalle.ts +++ /dev/null @@ -1,193 +0,0 @@ -import { createLogger } from '@/lib/logs/console-logger' -import { getBaseUrl } from '@/lib/urls/utils' -import { ToolConfig } from '../types' - -const logger = createLogger('DalleTool') - -export const dalleTool: ToolConfig = { - id: 'openai_dalle', - name: 'DALL-E Generate', - description: "Generate images using OpenAI's DALL-E model", - version: '1.0.0', - params: { - prompt: { - type: 'string', - required: true, - description: 'A text description of the desired image(s)', - }, - model: { - type: 'string', - required: true, - description: 'The DALL-E model to use (dall-e-2 or dall-e-3)', - }, - size: { - type: 'string', - required: false, - description: 'The size of the generated images (1024x1024, 1024x1792, or 1792x1024)', - }, - quality: { - type: 'string', - required: false, - description: 'The quality of the image (standard or hd)', - }, - style: { - type: 'string', - required: false, - description: 'The style of the image (vivid or natural)', - }, - n: { - type: 'number', - required: false, - description: 'The number of images to generate (1-10)', - }, - apiKey: { - type: 'string', - required: true, - description: 'Your OpenAI API key', - }, - }, - request: { - url: 'https://api.openai.com/v1/images/generations', - method: 'POST', - headers: (params) => ({ - 'Content-Type': 'application/json', - Authorization: `Bearer ${params.apiKey}`, - }), - body: (params) => ({ - model: params.model, - prompt: params.prompt, - size: params.size || '1024x1024', - quality: params.quality || 'standard', - style: params.style || 'vivid', - n: params.n || 1, - }), - }, - transformResponse: async (response, params) => { - try { - const data = await response.json() - - logger.info('DALL-E API response:', JSON.stringify(data, null, 2)) - - if (!data.data?.[0]?.url) { - logger.error('No image URL in DALL-E response:', data) - throw new Error('No image URL in response') - } - - const imageUrl = data.data[0].url - const modelName = data.model || params?.model || 'dall-e' - - logger.info('Got image URL:', imageUrl) - logger.info('Using model:', modelName) - - try { - logger.info('Fetching image from URL via proxy...') - const baseUrl = getBaseUrl() - const proxyUrl = new URL(`/api/proxy/image`, baseUrl) - proxyUrl.searchParams.append('url', imageUrl) - - const imageResponse = await fetch(proxyUrl.toString(), { - headers: { - Accept: 'image/*, */*', - }, - cache: 'no-store', - }) - - if (!imageResponse.ok) { - logger.error('Failed to fetch image:', imageResponse.status, imageResponse.statusText) - throw new Error(`Failed to fetch image: ${imageResponse.statusText}`) - } - - const imageBlob = await imageResponse.blob() - - if (imageBlob.size === 0) { - logger.error('Empty image blob received') - throw new Error('Empty image received') - } - - const arrayBuffer = await imageBlob.arrayBuffer() - const buffer = Buffer.from(arrayBuffer) - const base64Image = buffer.toString('base64') - - return { - success: true, - output: { - content: imageUrl, // Now using image URL as content - image: base64Image, // Base64 image in separate field - metadata: { - model: modelName, // Only include model name in metadata - }, - }, - } - } catch (error) { - // Log the error but continue with returning the URL - logger.error('Error fetching or processing image:', error) - - // Try again with a direct browser fetch as fallback - try { - logger.info('Attempting fallback with direct browser fetch...') - const directImageResponse = await fetch(imageUrl, { - cache: 'no-store', - headers: { - Accept: 'image/*, */*', - 'User-Agent': 'Mozilla/5.0 (compatible DalleProxy/1.0)', - }, - }) - - if (!directImageResponse.ok) { - throw new Error(`Direct fetch failed: ${directImageResponse.status}`) - } - - const imageBlob = await directImageResponse.blob() - if (imageBlob.size === 0) { - throw new Error('Empty blob received from direct fetch') - } - - // Server-side safe way to convert blob to base64 - const arrayBuffer = await imageBlob.arrayBuffer() - const buffer = Buffer.from(arrayBuffer) - const base64Image = buffer.toString('base64') - - logger.info( - 'Successfully converted image to base64 via direct fetch, length:', - base64Image.length - ) - - return { - success: true, - output: { - content: imageUrl, - image: base64Image, - metadata: { - model: modelName, - }, - }, - } - } catch (fallbackError) { - logger.error('Fallback fetch also failed:', fallbackError) - - // Even if both attempts fail, still return the URL and metadata - return { - success: true, - output: { - content: imageUrl, // URL as content - image: '', // Empty image since we couldn't get it - metadata: { - model: modelName, - }, - }, - } - } - } - } catch (error) { - logger.error('Error in DALL-E response handling:', error) - throw error - } - }, - transformError: (error) => { - logger.error('DALL-E error:', error) - if (error.response?.data?.error?.message) { - return error.response.data.error.message - } - return error.message || 'Failed to generate image with DALL-E' - }, -} diff --git a/apps/sim/tools/openai/image.ts b/apps/sim/tools/openai/image.ts index 43d20e8d8..849a8cdb9 100644 --- a/apps/sim/tools/openai/image.ts +++ b/apps/sim/tools/openai/image.ts @@ -1,28 +1,29 @@ import { createLogger } from '@/lib/logs/console-logger' import { getBaseUrl } from '@/lib/urls/utils' import { ToolConfig } from '../types' +import { BaseImageRequestBody } from './types' -const logger = createLogger('DalleTool') +const logger = createLogger('ImageTool') export const imageTool: ToolConfig = { id: 'openai_image', - name: 'Image Generate', + name: 'Image Generator', description: "Generate images using OpenAI's Image models", version: '1.0.0', params: { prompt: { type: 'string', required: true, - description: 'A text description of the desired image(s)', + description: 'A text description of the desired image', }, model: { type: 'string', required: true, - description: 'The DALL-E model to use (gpt-image-1 or dall-e-3)', + description: 'The model to use (gpt-image-1 or dall-e-3)', }, size: { type: 'string', - required: false, + required: true, description: 'The size of the generated images (1024x1024, 1024x1792, or 1792x1024)', }, quality: { @@ -35,6 +36,11 @@ export const imageTool: ToolConfig = { required: false, description: 'The style of the image (vivid or natural)', }, + background: { + type: 'string', + required: false, + description: 'The background color, only for gpt-image-1', + }, n: { type: 'number', required: false, @@ -53,138 +59,141 @@ export const imageTool: ToolConfig = { 'Content-Type': 'application/json', Authorization: `Bearer ${params.apiKey}`, }), - body: (params) => ({ - model: params.model, - prompt: params.prompt, - size: params.size || '1024x1024', - quality: params.quality || 'standard', - style: params.style || 'vivid', - n: params.n || 1, - }), + body: (params) => { + const body: BaseImageRequestBody = { + model: params.model, + prompt: params.prompt, + size: params.size || '1024x1024', + n: params.n || 1, + } + + // Add model-specific parameters + if (params.model === 'dall-e-3') { + if (params.quality) body.quality = params.quality + if (params.style) body.style = params.style + } else if (params.model === 'gpt-image-1') { + if (params.background) body.background = params.background + } + + return body + }, }, transformResponse: async (response, params) => { try { const data = await response.json() - logger.info('Image API response:', JSON.stringify(data, null, 2)) - - if (!data.data?.[0]?.url) { - logger.error('No image URL in Image response:', data) - throw new Error('No image URL in response') + const sanitizedData = JSON.parse(JSON.stringify(data)) + if (sanitizedData.data && Array.isArray(sanitizedData.data)) { + sanitizedData.data.forEach((item: { b64_json?: string }) => { + if (item.b64_json) { + item.b64_json = `[base64 data truncated, length: ${item.b64_json.length}]` + } + }) } - const imageUrl = data.data[0].url - const modelName = data.model || params?.model || 'dall-e' + const modelName = params?.model || 'dall-e-3' + let imageUrl = null + let base64Image = null - try { - logger.info('Fetching image from URL via proxy...') - const baseUrl = getBaseUrl() - const proxyUrl = new URL(`/api/proxy/image`, baseUrl) - proxyUrl.searchParams.append('url', imageUrl) + if (data.data?.[0]?.url) { + imageUrl = data.data[0].url + logger.info('Found image URL in response for DALL-E 3') + } else if (data.data?.[0]?.b64_json) { + base64Image = data.data[0].b64_json + logger.info( + 'Found base64 encoded image in response for GPT-Image-1', + `length: ${base64Image.length}` + ) + } else { + logger.error('No image data found in API response:', data) + throw new Error('No image data found in response') + } - const imageResponse = await fetch(proxyUrl.toString(), { - headers: { - Accept: 'image/*, */*', - }, - cache: 'no-store', - }) - - if (!imageResponse.ok) { - logger.error('Failed to fetch image:', imageResponse.status, imageResponse.statusText) - throw new Error(`Failed to fetch image: ${imageResponse.statusText}`) - } - - const imageBlob = await imageResponse.blob() - - if (imageBlob.size === 0) { - logger.error('Empty image blob received') - throw new Error('Empty image received') - } - - const arrayBuffer = await imageBlob.arrayBuffer() - const buffer = Buffer.from(arrayBuffer) - const base64Image = buffer.toString('base64') - - return { - success: true, - output: { - content: imageUrl, // Now using image URL as content - image: base64Image, // Base64 image in separate field - metadata: { - model: modelName, // Only include model name in metadata - }, - }, - } - } catch (error) { - // Log the error but continue with returning the URL - logger.error('Error fetching or processing image:', error) - - // Try again with a direct browser fetch as fallback + if (imageUrl && !base64Image) { try { - logger.info('Attempting fallback with direct browser fetch...') - const directImageResponse = await fetch(imageUrl, { - cache: 'no-store', + logger.info('Fetching image from URL via proxy...') + const baseUrl = getBaseUrl() + const proxyUrl = new URL(`/api/proxy/image`, baseUrl) + proxyUrl.searchParams.append('url', imageUrl) + + const imageResponse = await fetch(proxyUrl.toString(), { headers: { Accept: 'image/*, */*', - 'User-Agent': 'Mozilla/5.0 (compatible DalleProxy/1.0)', }, + cache: 'no-store', }) - if (!directImageResponse.ok) { - throw new Error(`Direct fetch failed: ${directImageResponse.status}`) + if (!imageResponse.ok) { + logger.error('Failed to fetch image:', imageResponse.status, imageResponse.statusText) + throw new Error(`Failed to fetch image: ${imageResponse.statusText}`) } - const imageBlob = await directImageResponse.blob() + const imageBlob = await imageResponse.blob() + if (imageBlob.size === 0) { - throw new Error('Empty blob received from direct fetch') + logger.error('Empty image blob received') + throw new Error('Empty image received') } - // Server-side safe way to convert blob to base64 const arrayBuffer = await imageBlob.arrayBuffer() const buffer = Buffer.from(arrayBuffer) - const base64Image = buffer.toString('base64') + base64Image = buffer.toString('base64') + } catch (error) { + logger.error('Error fetching or processing image:', error) - logger.info( - 'Successfully converted image to base64 via direct fetch, length:', - base64Image.length - ) - - return { - success: true, - output: { - content: imageUrl, - image: base64Image, - metadata: { - model: modelName, + try { + logger.info('Attempting fallback with direct browser fetch...') + const directImageResponse = await fetch(imageUrl, { + cache: 'no-store', + headers: { + Accept: 'image/*, */*', + 'User-Agent': 'Mozilla/5.0 (compatible DalleProxy/1.0)', }, - }, - } - } catch (fallbackError) { - logger.error('Fallback fetch also failed:', fallbackError) + }) - // Even if both attempts fail, still return the URL and metadata - return { - success: true, - output: { - content: imageUrl, // URL as content - image: '', // Empty image since we couldn't get it - metadata: { - model: modelName, - }, - }, + if (!directImageResponse.ok) { + throw new Error(`Direct fetch failed: ${directImageResponse.status}`) + } + + const imageBlob = await directImageResponse.blob() + if (imageBlob.size === 0) { + throw new Error('Empty blob received from direct fetch') + } + + const arrayBuffer = await imageBlob.arrayBuffer() + const buffer = Buffer.from(arrayBuffer) + base64Image = buffer.toString('base64') + + logger.info( + 'Successfully converted image to base64 via direct fetch, length:', + base64Image.length + ) + } catch (fallbackError) { + logger.error('Fallback fetch also failed:', fallbackError) } } } + + return { + success: true, + output: { + content: imageUrl || 'direct-image', + image: base64Image || '', + metadata: { + model: modelName, + }, + }, + } } catch (error) { - logger.error('Error in DALL-E response handling:', error) + logger.error('Error in image generation response handling:', error) throw error } }, transformError: (error) => { - logger.error('DALL-E error:', error) + logger.error('Image generation error:', error) if (error.response?.data?.error?.message) { return error.response.data.error.message } - return error.message || 'Failed to generate image with DALL-E' + return error.message || 'Failed to generate image' }, } diff --git a/apps/sim/tools/openai/index.ts b/apps/sim/tools/openai/index.ts index ccf1628d2..f5e7acc93 100644 --- a/apps/sim/tools/openai/index.ts +++ b/apps/sim/tools/openai/index.ts @@ -1,4 +1,4 @@ -import { dalleTool } from './dalle' import { embeddingsTool } from './embeddings' +import { imageTool } from './image' -export { embeddingsTool, dalleTool } +export { embeddingsTool, imageTool } diff --git a/apps/sim/tools/openai/types.ts b/apps/sim/tools/openai/types.ts index 1c73cf4f7..d935d0fb7 100644 --- a/apps/sim/tools/openai/types.ts +++ b/apps/sim/tools/openai/types.ts @@ -1,5 +1,13 @@ import { ToolResponse } from '../types' +export interface BaseImageRequestBody { + model: string + prompt: string + size: string + n: number + [key: string]: any // Allow for additional properties +} + export interface DalleResponse extends ToolResponse { output: { content: string // This will now be the image URL diff --git a/apps/sim/tools/registry.ts b/apps/sim/tools/registry.ts index ffd42865e..ce961bce4 100644 --- a/apps/sim/tools/registry.ts +++ b/apps/sim/tools/registry.ts @@ -50,7 +50,7 @@ import { mem0AddMemoriesTool, mem0GetMemoriesTool, mem0SearchMemoriesTool } from import { memoryAddTool, memoryDeleteTool, memoryGetAllTool, memoryGetTool } from './memory' import { mistralParserTool } from './mistral' import { notionReadTool, notionWriteTool } from './notion' -import { dalleTool, embeddingsTool as openAIEmbeddings } from './openai' +import { imageTool, embeddingsTool as openAIEmbeddings } from './openai' import { perplexityChatTool } from './perplexity' import { pineconeFetchTool, @@ -149,7 +149,6 @@ export const tools: Record = { confluence_retrieve: confluenceRetrieveTool, confluence_update: confluenceUpdateTool, twilio_send_sms: sendSMSTool, - openai_dalle: dalleTool, airtable_create_records: airtableCreateRecordsTool, airtable_get_record: airtableGetRecordTool, airtable_list_records: airtableListRecordsTool, @@ -173,4 +172,5 @@ export const tools: Record = { discord_get_messages: discordGetMessagesTool, discord_get_server: discordGetServerTool, discord_get_user: discordGetUserTool, + openai_image: imageTool, }