feat(image-gen): added gpt-image-1 and safe storage for base64 data (#396)

* added gpt-image-1 and safe storage for base64 data

* acknowledged PR comments

* updated README

* update CONTRIBUTING.md
This commit is contained in:
Waleed Latif
2025-05-21 15:12:51 -07:00
committed by GitHub
parent 6d380c28e3
commit 0015dc93de
12 changed files with 404 additions and 437 deletions

View File

@@ -130,54 +130,69 @@ Using clear and consistent commit messages makes it easier for everyone to under
To set up your local development environment:
### Option 1: Using Docker (Recommended)
### Option 1: Using NPM Package (Simplest)
Docker provides a consistent development environment with all dependencies pre-configured.
The easiest way to run Sim Studio locally is using our NPM package:
1. **Clone the Repository:**
```bash
npx simstudio
```
```bash
git clone https://github.com/<your-username>/sim.git
cd sim
```
After running this command, open [http://localhost:3000/](http://localhost:3000/) in your browser.
2. **Start the Docker Environment:**
#### Options
```bash
docker compose up -d
```
- `-p, --port <port>`: Specify the port to run Sim Studio on (default: 3000)
- `--no-pull`: Skip pulling the latest Docker images
Or use the convenience script which handles environment setup and migrations:
#### Requirements
```bash
chmod +x scripts/start_simstudio_docker.sh
./scripts/start_simstudio_docker.sh
```
- Docker must be installed and running on your machine
This will:
### Option 2: Using Docker Compose
- Start a PostgreSQL database container
- Build and run the Next.js application with hot-reloading
- Set up all necessary environment variables
- Apply database migrations automatically
```bash
# Clone the repository
git clone https://github.com/<your-username>/sim.git
cd sim
3. **View Logs:**
# Start Sim Studio
docker compose -f docker-compose.prod.yml up -d
```
```bash
docker compose logs -f simstudio
```
Access the application at [http://localhost:3000/](http://localhost:3000/)
4. **Make Your Changes:**
- Edit files in your local directory
- Changes will be automatically reflected thanks to hot-reloading
#### Using Local Models
### Option 2: Using VS Code / Cursor Dev Containers
To use local models with Sim Studio:
1. Pull models using our helper script:
```bash
./apps/sim/scripts/ollama_docker.sh pull <model_name>
```
2. Start Sim Studio with local model support:
```bash
# With NVIDIA GPU support
docker compose --profile local-gpu -f docker-compose.ollama.yml up -d
# Without GPU (CPU only)
docker compose --profile local-cpu -f docker-compose.ollama.yml up -d
# If hosting on a server, update the environment variables in the docker-compose.prod.yml file
# to include the server's public IP then start again (OLLAMA_URL to i.e. http://1.1.1.1:11434)
docker compose -f docker-compose.prod.yml up -d
```
### Option 3: Using VS Code / Cursor Dev Containers
Dev Containers provide a consistent and easy-to-use development environment:
1. **Prerequisites:**
- Visual Studio Code
- Visual Studio Code or Cursor
- Docker Desktop
- [Remote - Containers](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) extension for VS Code
@@ -188,58 +203,53 @@ Dev Containers provide a consistent and easy-to-use development environment:
git clone https://github.com/<your-username>/sim.git
cd sim
```
- Open the project in VS Code
- Open the project in VS Code/Cursor
- When prompted, click "Reopen in Container" (or press F1 and select "Remote-Containers: Reopen in Container")
- Wait for the container to build and initialize
- The development environment will be set up in the `sim/` directory
3. **Start Developing:**
- Run `bun run dev` in the terminal or use the `sim-start` alias
- All dependencies and configurations are automatically set up
- Use the provided aliases (like `sim-start`) to run common commands
- Your changes will be automatically hot-reloaded
4. **GitHub Codespaces:**
- This setup also works with GitHub Codespaces if you prefer development in the browser
- Just click "Code" → "Codespaces" → "Create codespace on main"
### Option 3: Manual Setup
### Option 4: Manual Setup
If you prefer not to use Docker or Dev Containers:
1. **Clone the Repository:**
```bash
git clone https://github.com/<your-username>/sim.git
cd sim/sim
cd sim
bun install
```
2. **Install Dependencies:**
- Using Bun:
2. **Set Up Environment:**
- Navigate to the app directory:
```bash
bun install
cd apps/sim
```
3. **Set Up Environment:**
- Copy `.env.example` to `.env`
- Configure database connection and other required authentication variables
- Configure required variables (DATABASE_URL, BETTER_AUTH_SECRET, BETTER_AUTH_URL)
4. **Set Up Database:**
3. **Set Up Database:**
- You need a PostgreSQL instance running
- Run migrations:
```bash
bun run db:push
```
```bash
bunx drizzle-kit push
```
5. **Run the Development Server:**
4. **Run the Development Server:**
- With Bun:
```bash
bun run dev
```
```bash
bun run dev
```
6. **Make Your Changes and Test Locally.**
5. **Make Your Changes and Test Locally.**
### Email Template Development

View File

@@ -73,10 +73,10 @@ To use local models with Sim Studio:
```bash
# With NVIDIA GPU support
docker compose -f docker-compose.ollama.yml up --profile local-gpu -d --build
docker compose --profile local-gpu -f docker-compose.ollama.yml up -d
# Without GPU (CPU only)
docker compose -f docker-compose.ollama.yml up --profile local-cpu -d --build
docker compose --profile local-cpu -f docker-compose.ollama.yml up -d
# If hosting on a server, update the environment variables in the docker-compose.prod.yml file to include the server's public IP then start again (OLLAMA_URL to i.e. http://1.1.1.1:11434)
docker compose -f docker-compose.prod.yml up -d

View File

@@ -45,25 +45,26 @@ In Sim Studio, the DALL-E integration enables your agents to generate images pro
## Usage Instructions
Create high-quality images using DALL-E. Configure resolution, quality, style, and other parameters to get exactly the image you need.
Create high-quality images using OpenAI
## Tools
### `openai_dalle`
### `openai_image`
Generate images using OpenAI
#### Input
| Parameter | Type | Required | Description |
| --------- | ------ | -------- | ----------------------------------------------------------------------- |
| `prompt` | string | Yes | A text description of the desired image\(s\) |
| `model` | string | Yes | The DALL-E model to use \(dall-e-2 or dall-e-3\) |
| `size` | string | No | The size of the generated images \(1024x1024, 1024x1792, or 1792x1024\) |
| `quality` | string | No | The quality of the image \(standard or hd\) |
| `style` | string | No | The style of the image \(vivid or natural\) |
| `n` | number | No | The number of images to generate \(1-10\) |
| `apiKey` | string | Yes | Your OpenAI API key |
| Parameter | Type | Required | Description |
| ------------ | ------ | -------- | ----------------------------------------------------------------------- |
| `prompt` | string | Yes | A text description of the desired image |
| `model` | string | Yes | The model to use \(gpt-image-1 or dall-e-3\) |
| `size` | string | Yes | The size of the generated images \(1024x1024, 1024x1792, or 1792x1024\) |
| `quality` | string | No | The quality of the image \(standard or hd\) |
| `style` | string | No | The style of the image \(vivid or natural\) |
| `background` | string | No | The background color, only for gpt-image-1 |
| `n` | number | No | The number of images to generate \(1-10\) |
| `apiKey` | string | Yes | Your OpenAI API key |
#### Output
@@ -77,9 +78,9 @@ Generate images using OpenAI
### Input
| Parameter | Type | Required | Description |
| ---------- | ------ | -------- | ----------- |
| `provider` | string | Yes | Provider |
| Parameter | Type | Required | Description |
| --------- | ------ | -------- | ----------- |
| `prompt` | string | Yes | |
### Outputs

View File

@@ -1,65 +1,87 @@
import { NextResponse } from 'next/server'
import { NextRequest, NextResponse } from 'next/server'
import { createLogger } from '@/lib/logs/console-logger'
const logger = createLogger('ProxyImageAPI')
const logger = createLogger('ImageProxyAPI')
/**
* Proxy for fetching images
* This allows client-side requests to fetch images from various sources while avoiding CORS issues
*/
export async function GET(request: NextRequest) {
const url = new URL(request.url)
const imageUrl = url.searchParams.get('url')
const requestId = crypto.randomUUID().slice(0, 8)
if (!imageUrl) {
logger.error(`[${requestId}] Missing 'url' parameter`)
return new NextResponse('Missing URL parameter', { status: 400 })
}
logger.info(`[${requestId}] Proxying image request for: ${imageUrl}`)
export async function GET(request: Request) {
try {
const { searchParams } = new URL(request.url)
const imageUrl = searchParams.get('url')
if (!imageUrl) {
logger.error('Missing URL parameter in proxy image request')
return new NextResponse('Missing URL parameter', { status: 400 })
}
logger.info('Proxying image from:', imageUrl)
// Add appropriate headers for fetching images
const response = await fetch(imageUrl, {
// Use fetch with custom headers that appear more browser-like
const imageResponse = await fetch(imageUrl, {
headers: {
Accept: 'image/*, */*',
'User-Agent': 'Mozilla/5.0 (compatible; ImageProxyBot/1.0)',
'User-Agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
Accept: 'image/webp,image/avif,image/apng,image/svg+xml,image/*,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
Referer: 'https://simstudio.ai/',
'Sec-Fetch-Dest': 'image',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Site': 'cross-site',
},
// Set a reasonable timeout
signal: AbortSignal.timeout(15000),
})
if (!response.ok) {
console.error(`Failed to fetch image from ${imageUrl}:`, response.status, response.statusText)
return new NextResponse(`Failed to fetch image: ${response.status} ${response.statusText}`, {
status: response.status,
if (!imageResponse.ok) {
logger.error(`[${requestId}] Image fetch failed:`, {
status: imageResponse.status,
statusText: imageResponse.statusText,
})
return new NextResponse(`Failed to fetch image: ${imageResponse.statusText}`, {
status: imageResponse.status,
})
}
const contentType = response.headers.get('content-type')
console.log('Image content-type:', contentType)
// Get image content type from response headers
const contentType = imageResponse.headers.get('content-type') || 'image/jpeg'
const blob = await response.blob()
console.log('Image size:', blob.size, 'bytes')
// Get the image as a blob
const imageBlob = await imageResponse.blob()
if (blob.size === 0) {
console.error('Empty image received from source URL')
return new NextResponse('Empty image received from source', { status: 422 })
if (imageBlob.size === 0) {
logger.error(`[${requestId}] Empty image blob received`)
return new NextResponse('Empty image received', { status: 404 })
}
// Return the image with appropriate headers
return new NextResponse(blob, {
return new NextResponse(imageBlob, {
headers: {
'Content-Type': contentType || 'image/png',
'Cache-Control': 'public, max-age=31536000', // Cache for a year
'Access-Control-Allow-Origin': '*', // CORS support
'X-Content-Type-Options': 'nosniff',
'Content-Type': contentType,
'Access-Control-Allow-Origin': '*',
'Cache-Control': 'public, max-age=86400', // Cache for 24 hours
},
})
} catch (error) {
// Log the full error for debugging
console.error('Error proxying image:', error)
const errorMessage = error instanceof Error ? error.message : String(error)
logger.error(`[${requestId}] Image proxy error:`, { error: errorMessage })
// Return a helpful error response
return new NextResponse(
`Internal Server Error: ${error instanceof Error ? error.message : 'Unknown error'}`,
{ status: 500 }
)
return new NextResponse(`Failed to proxy image: ${errorMessage}`, {
status: 500,
})
}
}
export async function OPTIONS() {
return new NextResponse(null, {
status: 204,
headers: {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'GET, OPTIONS',
'Access-Control-Allow-Headers': 'Content-Type, Authorization',
'Access-Control-Max-Age': '86400',
},
})
}

View File

@@ -280,7 +280,6 @@ export async function POST(request: Request) {
}
}
// Add OPTIONS handler for CORS preflight requests
export async function OPTIONS() {
return new NextResponse(null, {
status: 204,

View File

@@ -7,19 +7,22 @@ export const ImageGeneratorBlock: BlockConfig<DalleResponse> = {
name: 'Image Generator',
description: 'Generate images',
longDescription:
'Create high-quality images using DALL-E. Configure resolution, quality, style, and other parameters to get exactly the image you need.',
"Create high-quality images using OpenAI's image generation models. Configure resolution, quality, style, and other parameters to get exactly the image you need.",
docsLink: 'https://docs.simstudio.ai/tools/image_generator',
category: 'tools',
bgColor: '#4D5FFF',
icon: ImageIcon,
subBlocks: [
{
id: 'provider',
title: 'Provider',
id: 'model',
title: 'Model',
type: 'dropdown',
layout: 'full',
options: [{ label: 'DALL-E', id: 'dalle' }],
value: () => 'dalle',
layout: 'half',
options: [
{ label: 'DALL-E 3', id: 'dall-e-3' },
{ label: 'GPT Image', id: 'gpt-image-1' },
],
value: () => 'dall-e-3',
},
{
id: 'prompt',
@@ -28,14 +31,6 @@ export const ImageGeneratorBlock: BlockConfig<DalleResponse> = {
layout: 'full',
placeholder: 'Describe the image you want to generate...',
},
{
id: 'model',
title: 'Model',
type: 'dropdown',
layout: 'half',
options: [{ label: 'DALL-E 3', id: 'dall-e-3' }],
value: () => 'dall-e-3',
},
{
id: 'size',
title: 'Size',
@@ -47,6 +42,21 @@ export const ImageGeneratorBlock: BlockConfig<DalleResponse> = {
{ label: '1792x1024', id: '1792x1024' },
],
value: () => '1024x1024',
condition: { field: 'model', value: 'dall-e-3' },
},
{
id: 'size',
title: 'Size',
type: 'dropdown',
layout: 'half',
options: [
{ label: 'Auto', id: 'auto' },
{ label: '1024x1024', id: '1024x1024' },
{ label: '1536x1024', id: '1536x1024' },
{ label: '1024x1536', id: '1024x1536' },
],
value: () => 'auto',
condition: { field: 'model', value: 'gpt-image-1' },
},
{
id: 'quality',
@@ -58,6 +68,7 @@ export const ImageGeneratorBlock: BlockConfig<DalleResponse> = {
{ label: 'HD', id: 'hd' },
],
value: () => 'standard',
condition: { field: 'model', value: 'dall-e-3' },
},
{
id: 'style',
@@ -69,6 +80,20 @@ export const ImageGeneratorBlock: BlockConfig<DalleResponse> = {
{ label: 'Natural', id: 'natural' },
],
value: () => 'vivid',
condition: { field: 'model', value: 'dall-e-3' },
},
{
id: 'background',
title: 'Background',
type: 'dropdown',
layout: 'half',
options: [
{ label: 'Auto', id: 'auto' },
{ label: 'Transparent', id: 'transparent' },
{ label: 'Opaque', id: 'opaque' },
],
value: () => 'auto',
condition: { field: 'model', value: 'gpt-image-1' },
},
{
id: 'apiKey',
@@ -81,9 +106,9 @@ export const ImageGeneratorBlock: BlockConfig<DalleResponse> = {
},
],
tools: {
access: ['openai_dalle'],
access: ['openai_image'],
config: {
tool: () => 'openai_dalle',
tool: () => 'openai_image',
params: (params) => {
if (!params.apiKey) {
throw new Error('API key is required')
@@ -92,32 +117,46 @@ export const ImageGeneratorBlock: BlockConfig<DalleResponse> = {
throw new Error('Prompt is required')
}
return {
// Base parameters for all models
const baseParams = {
prompt: params.prompt,
model: params.model || 'dall-e-3',
size: params.size || '1024x1024',
quality: params.quality || 'standard',
style: params.style || 'vivid',
apiKey: params.apiKey,
}
if (params.model === 'dall-e-3') {
return {
...baseParams,
quality: params.quality || 'standard',
style: params.style || 'vivid',
}
} else if (params.model === 'gpt-image-1') {
return {
...baseParams,
...(params.background && { background: params.background }),
}
}
return baseParams
},
},
},
inputs: {
provider: { type: 'string', required: true },
prompt: { type: 'string', required: true },
model: { type: 'string', required: true },
size: { type: 'string', required: false },
quality: { type: 'string', required: false },
style: { type: 'string', required: false },
background: { type: 'string', required: false },
apiKey: { type: 'string', required: true },
},
outputs: {
response: {
type: {
content: 'string', // URL of the generated image
image: 'string', // Base64 image data
metadata: 'json', // Contains only model information
content: 'string',
image: 'string',
metadata: 'json',
},
},
},

View File

@@ -4,8 +4,8 @@ import { redactApiKeys } from '@/lib/utils'
import { useChatStore } from '../chat/store'
import { ConsoleEntry, ConsoleStore } from './types'
// MAX across all workflows
const MAX_ENTRIES = 50
const MAX_ENTRIES = 50 // MAX across all workflows
const MAX_IMAGE_DATA_SIZE = 1000 // Maximum size of image data to store (in characters)
/**
* Gets a nested property value from an object using a path string
@@ -29,6 +29,67 @@ const getValueByPath = (obj: any, path: string): any => {
return current
}
/**
* Checks if a string is likely a base64 encoded image or large data blob
*/
const isLikelyBase64Data = (value: string): boolean => {
if (value.length < 100) return false
return value.startsWith('data:image') || /^[A-Za-z0-9+/=]{1000,}$/.test(value)
}
/**
* Processes an object to handle large strings (like base64 image data)
* for localStorage to prevent quota issues
*/
const processSafeStorage = (obj: any): any => {
if (!obj) return obj
if (typeof obj !== 'object') return obj
if (Array.isArray(obj)) {
return obj.map((item) => processSafeStorage(item))
}
const result: any = {}
for (const [key, value] of Object.entries(obj)) {
if (
(key === 'image' || key.includes('image')) &&
typeof value === 'string' &&
value.length > MAX_IMAGE_DATA_SIZE
) {
if (value.startsWith('data:image')) {
const mimeEnd = value.indexOf(',')
result[key] =
mimeEnd > 0
? `${value.substring(0, mimeEnd + 1)}[Image data removed, original length: ${value.length}]`
: `[Image data removed, original length: ${value.length}]`
} else {
result[key] = `[Image data removed, original length: ${value.length}]`
}
} else if (typeof value === 'object' && value !== null) {
result[key] = processSafeStorage(value)
} else if (
typeof value === 'string' &&
value.length > MAX_IMAGE_DATA_SIZE &&
isLikelyBase64Data(value)
) {
if (value.startsWith('data:image')) {
const mimeEnd = value.indexOf(',')
result[key] =
mimeEnd > 0
? `${value.substring(0, mimeEnd + 1)}[Large data removed, original length: ${value.length}]`
: `[Large data removed, original length: ${value.length}]`
} else {
result[key] = `[Large data removed, original length: ${value.length}]`
}
} else {
result[key] = value
}
}
return result
}
export const useConsoleStore = create<ConsoleStore>()(
devtools(
persist(
@@ -207,6 +268,17 @@ export const useConsoleStore = create<ConsoleStore>()(
}),
{
name: 'console-store',
partialize: (state) => {
const sanitizedEntries = state.entries.slice(0, MAX_ENTRIES).map((entry) => ({
...entry,
output: processSafeStorage(entry.output),
}))
return {
isOpen: state.isOpen,
entries: sanitizedEntries,
}
},
}
)
)

View File

@@ -1,193 +0,0 @@
import { createLogger } from '@/lib/logs/console-logger'
import { getBaseUrl } from '@/lib/urls/utils'
import { ToolConfig } from '../types'
const logger = createLogger('DalleTool')
export const dalleTool: ToolConfig = {
id: 'openai_dalle',
name: 'DALL-E Generate',
description: "Generate images using OpenAI's DALL-E model",
version: '1.0.0',
params: {
prompt: {
type: 'string',
required: true,
description: 'A text description of the desired image(s)',
},
model: {
type: 'string',
required: true,
description: 'The DALL-E model to use (dall-e-2 or dall-e-3)',
},
size: {
type: 'string',
required: false,
description: 'The size of the generated images (1024x1024, 1024x1792, or 1792x1024)',
},
quality: {
type: 'string',
required: false,
description: 'The quality of the image (standard or hd)',
},
style: {
type: 'string',
required: false,
description: 'The style of the image (vivid or natural)',
},
n: {
type: 'number',
required: false,
description: 'The number of images to generate (1-10)',
},
apiKey: {
type: 'string',
required: true,
description: 'Your OpenAI API key',
},
},
request: {
url: 'https://api.openai.com/v1/images/generations',
method: 'POST',
headers: (params) => ({
'Content-Type': 'application/json',
Authorization: `Bearer ${params.apiKey}`,
}),
body: (params) => ({
model: params.model,
prompt: params.prompt,
size: params.size || '1024x1024',
quality: params.quality || 'standard',
style: params.style || 'vivid',
n: params.n || 1,
}),
},
transformResponse: async (response, params) => {
try {
const data = await response.json()
logger.info('DALL-E API response:', JSON.stringify(data, null, 2))
if (!data.data?.[0]?.url) {
logger.error('No image URL in DALL-E response:', data)
throw new Error('No image URL in response')
}
const imageUrl = data.data[0].url
const modelName = data.model || params?.model || 'dall-e'
logger.info('Got image URL:', imageUrl)
logger.info('Using model:', modelName)
try {
logger.info('Fetching image from URL via proxy...')
const baseUrl = getBaseUrl()
const proxyUrl = new URL(`/api/proxy/image`, baseUrl)
proxyUrl.searchParams.append('url', imageUrl)
const imageResponse = await fetch(proxyUrl.toString(), {
headers: {
Accept: 'image/*, */*',
},
cache: 'no-store',
})
if (!imageResponse.ok) {
logger.error('Failed to fetch image:', imageResponse.status, imageResponse.statusText)
throw new Error(`Failed to fetch image: ${imageResponse.statusText}`)
}
const imageBlob = await imageResponse.blob()
if (imageBlob.size === 0) {
logger.error('Empty image blob received')
throw new Error('Empty image received')
}
const arrayBuffer = await imageBlob.arrayBuffer()
const buffer = Buffer.from(arrayBuffer)
const base64Image = buffer.toString('base64')
return {
success: true,
output: {
content: imageUrl, // Now using image URL as content
image: base64Image, // Base64 image in separate field
metadata: {
model: modelName, // Only include model name in metadata
},
},
}
} catch (error) {
// Log the error but continue with returning the URL
logger.error('Error fetching or processing image:', error)
// Try again with a direct browser fetch as fallback
try {
logger.info('Attempting fallback with direct browser fetch...')
const directImageResponse = await fetch(imageUrl, {
cache: 'no-store',
headers: {
Accept: 'image/*, */*',
'User-Agent': 'Mozilla/5.0 (compatible DalleProxy/1.0)',
},
})
if (!directImageResponse.ok) {
throw new Error(`Direct fetch failed: ${directImageResponse.status}`)
}
const imageBlob = await directImageResponse.blob()
if (imageBlob.size === 0) {
throw new Error('Empty blob received from direct fetch')
}
// Server-side safe way to convert blob to base64
const arrayBuffer = await imageBlob.arrayBuffer()
const buffer = Buffer.from(arrayBuffer)
const base64Image = buffer.toString('base64')
logger.info(
'Successfully converted image to base64 via direct fetch, length:',
base64Image.length
)
return {
success: true,
output: {
content: imageUrl,
image: base64Image,
metadata: {
model: modelName,
},
},
}
} catch (fallbackError) {
logger.error('Fallback fetch also failed:', fallbackError)
// Even if both attempts fail, still return the URL and metadata
return {
success: true,
output: {
content: imageUrl, // URL as content
image: '', // Empty image since we couldn't get it
metadata: {
model: modelName,
},
},
}
}
}
} catch (error) {
logger.error('Error in DALL-E response handling:', error)
throw error
}
},
transformError: (error) => {
logger.error('DALL-E error:', error)
if (error.response?.data?.error?.message) {
return error.response.data.error.message
}
return error.message || 'Failed to generate image with DALL-E'
},
}

View File

@@ -1,28 +1,29 @@
import { createLogger } from '@/lib/logs/console-logger'
import { getBaseUrl } from '@/lib/urls/utils'
import { ToolConfig } from '../types'
import { BaseImageRequestBody } from './types'
const logger = createLogger('DalleTool')
const logger = createLogger('ImageTool')
export const imageTool: ToolConfig = {
id: 'openai_image',
name: 'Image Generate',
name: 'Image Generator',
description: "Generate images using OpenAI's Image models",
version: '1.0.0',
params: {
prompt: {
type: 'string',
required: true,
description: 'A text description of the desired image(s)',
description: 'A text description of the desired image',
},
model: {
type: 'string',
required: true,
description: 'The DALL-E model to use (gpt-image-1 or dall-e-3)',
description: 'The model to use (gpt-image-1 or dall-e-3)',
},
size: {
type: 'string',
required: false,
required: true,
description: 'The size of the generated images (1024x1024, 1024x1792, or 1792x1024)',
},
quality: {
@@ -35,6 +36,11 @@ export const imageTool: ToolConfig = {
required: false,
description: 'The style of the image (vivid or natural)',
},
background: {
type: 'string',
required: false,
description: 'The background color, only for gpt-image-1',
},
n: {
type: 'number',
required: false,
@@ -53,138 +59,141 @@ export const imageTool: ToolConfig = {
'Content-Type': 'application/json',
Authorization: `Bearer ${params.apiKey}`,
}),
body: (params) => ({
model: params.model,
prompt: params.prompt,
size: params.size || '1024x1024',
quality: params.quality || 'standard',
style: params.style || 'vivid',
n: params.n || 1,
}),
body: (params) => {
const body: BaseImageRequestBody = {
model: params.model,
prompt: params.prompt,
size: params.size || '1024x1024',
n: params.n || 1,
}
// Add model-specific parameters
if (params.model === 'dall-e-3') {
if (params.quality) body.quality = params.quality
if (params.style) body.style = params.style
} else if (params.model === 'gpt-image-1') {
if (params.background) body.background = params.background
}
return body
},
},
transformResponse: async (response, params) => {
try {
const data = await response.json()
logger.info('Image API response:', JSON.stringify(data, null, 2))
if (!data.data?.[0]?.url) {
logger.error('No image URL in Image response:', data)
throw new Error('No image URL in response')
const sanitizedData = JSON.parse(JSON.stringify(data))
if (sanitizedData.data && Array.isArray(sanitizedData.data)) {
sanitizedData.data.forEach((item: { b64_json?: string }) => {
if (item.b64_json) {
item.b64_json = `[base64 data truncated, length: ${item.b64_json.length}]`
}
})
}
const imageUrl = data.data[0].url
const modelName = data.model || params?.model || 'dall-e'
const modelName = params?.model || 'dall-e-3'
let imageUrl = null
let base64Image = null
try {
logger.info('Fetching image from URL via proxy...')
const baseUrl = getBaseUrl()
const proxyUrl = new URL(`/api/proxy/image`, baseUrl)
proxyUrl.searchParams.append('url', imageUrl)
if (data.data?.[0]?.url) {
imageUrl = data.data[0].url
logger.info('Found image URL in response for DALL-E 3')
} else if (data.data?.[0]?.b64_json) {
base64Image = data.data[0].b64_json
logger.info(
'Found base64 encoded image in response for GPT-Image-1',
`length: ${base64Image.length}`
)
} else {
logger.error('No image data found in API response:', data)
throw new Error('No image data found in response')
}
const imageResponse = await fetch(proxyUrl.toString(), {
headers: {
Accept: 'image/*, */*',
},
cache: 'no-store',
})
if (!imageResponse.ok) {
logger.error('Failed to fetch image:', imageResponse.status, imageResponse.statusText)
throw new Error(`Failed to fetch image: ${imageResponse.statusText}`)
}
const imageBlob = await imageResponse.blob()
if (imageBlob.size === 0) {
logger.error('Empty image blob received')
throw new Error('Empty image received')
}
const arrayBuffer = await imageBlob.arrayBuffer()
const buffer = Buffer.from(arrayBuffer)
const base64Image = buffer.toString('base64')
return {
success: true,
output: {
content: imageUrl, // Now using image URL as content
image: base64Image, // Base64 image in separate field
metadata: {
model: modelName, // Only include model name in metadata
},
},
}
} catch (error) {
// Log the error but continue with returning the URL
logger.error('Error fetching or processing image:', error)
// Try again with a direct browser fetch as fallback
if (imageUrl && !base64Image) {
try {
logger.info('Attempting fallback with direct browser fetch...')
const directImageResponse = await fetch(imageUrl, {
cache: 'no-store',
logger.info('Fetching image from URL via proxy...')
const baseUrl = getBaseUrl()
const proxyUrl = new URL(`/api/proxy/image`, baseUrl)
proxyUrl.searchParams.append('url', imageUrl)
const imageResponse = await fetch(proxyUrl.toString(), {
headers: {
Accept: 'image/*, */*',
'User-Agent': 'Mozilla/5.0 (compatible DalleProxy/1.0)',
},
cache: 'no-store',
})
if (!directImageResponse.ok) {
throw new Error(`Direct fetch failed: ${directImageResponse.status}`)
if (!imageResponse.ok) {
logger.error('Failed to fetch image:', imageResponse.status, imageResponse.statusText)
throw new Error(`Failed to fetch image: ${imageResponse.statusText}`)
}
const imageBlob = await directImageResponse.blob()
const imageBlob = await imageResponse.blob()
if (imageBlob.size === 0) {
throw new Error('Empty blob received from direct fetch')
logger.error('Empty image blob received')
throw new Error('Empty image received')
}
// Server-side safe way to convert blob to base64
const arrayBuffer = await imageBlob.arrayBuffer()
const buffer = Buffer.from(arrayBuffer)
const base64Image = buffer.toString('base64')
base64Image = buffer.toString('base64')
} catch (error) {
logger.error('Error fetching or processing image:', error)
logger.info(
'Successfully converted image to base64 via direct fetch, length:',
base64Image.length
)
return {
success: true,
output: {
content: imageUrl,
image: base64Image,
metadata: {
model: modelName,
try {
logger.info('Attempting fallback with direct browser fetch...')
const directImageResponse = await fetch(imageUrl, {
cache: 'no-store',
headers: {
Accept: 'image/*, */*',
'User-Agent': 'Mozilla/5.0 (compatible DalleProxy/1.0)',
},
},
}
} catch (fallbackError) {
logger.error('Fallback fetch also failed:', fallbackError)
})
// Even if both attempts fail, still return the URL and metadata
return {
success: true,
output: {
content: imageUrl, // URL as content
image: '', // Empty image since we couldn't get it
metadata: {
model: modelName,
},
},
if (!directImageResponse.ok) {
throw new Error(`Direct fetch failed: ${directImageResponse.status}`)
}
const imageBlob = await directImageResponse.blob()
if (imageBlob.size === 0) {
throw new Error('Empty blob received from direct fetch')
}
const arrayBuffer = await imageBlob.arrayBuffer()
const buffer = Buffer.from(arrayBuffer)
base64Image = buffer.toString('base64')
logger.info(
'Successfully converted image to base64 via direct fetch, length:',
base64Image.length
)
} catch (fallbackError) {
logger.error('Fallback fetch also failed:', fallbackError)
}
}
}
return {
success: true,
output: {
content: imageUrl || 'direct-image',
image: base64Image || '',
metadata: {
model: modelName,
},
},
}
} catch (error) {
logger.error('Error in DALL-E response handling:', error)
logger.error('Error in image generation response handling:', error)
throw error
}
},
transformError: (error) => {
logger.error('DALL-E error:', error)
logger.error('Image generation error:', error)
if (error.response?.data?.error?.message) {
return error.response.data.error.message
}
return error.message || 'Failed to generate image with DALL-E'
return error.message || 'Failed to generate image'
},
}

View File

@@ -1,4 +1,4 @@
import { dalleTool } from './dalle'
import { embeddingsTool } from './embeddings'
import { imageTool } from './image'
export { embeddingsTool, dalleTool }
export { embeddingsTool, imageTool }

View File

@@ -1,5 +1,13 @@
import { ToolResponse } from '../types'
export interface BaseImageRequestBody {
model: string
prompt: string
size: string
n: number
[key: string]: any // Allow for additional properties
}
export interface DalleResponse extends ToolResponse {
output: {
content: string // This will now be the image URL

View File

@@ -50,7 +50,7 @@ import { mem0AddMemoriesTool, mem0GetMemoriesTool, mem0SearchMemoriesTool } from
import { memoryAddTool, memoryDeleteTool, memoryGetAllTool, memoryGetTool } from './memory'
import { mistralParserTool } from './mistral'
import { notionReadTool, notionWriteTool } from './notion'
import { dalleTool, embeddingsTool as openAIEmbeddings } from './openai'
import { imageTool, embeddingsTool as openAIEmbeddings } from './openai'
import { perplexityChatTool } from './perplexity'
import {
pineconeFetchTool,
@@ -149,7 +149,6 @@ export const tools: Record<string, ToolConfig> = {
confluence_retrieve: confluenceRetrieveTool,
confluence_update: confluenceUpdateTool,
twilio_send_sms: sendSMSTool,
openai_dalle: dalleTool,
airtable_create_records: airtableCreateRecordsTool,
airtable_get_record: airtableGetRecordTool,
airtable_list_records: airtableListRecordsTool,
@@ -173,4 +172,5 @@ export const tools: Record<string, ToolConfig> = {
discord_get_messages: discordGetMessagesTool,
discord_get_server: discordGetServerTool,
discord_get_user: discordGetUserTool,
openai_image: imageTool,
}