feat: add OpenAI GPT-image models to image blocks

Add gpt-image-1, gpt-image-1.5, gpt-image-2, and gpt-image-1-mini
as model options in:
- AIImageGeneratorBlock
- AIImageEditorBlock
- AIImageCustomizerBlock

Changes:
- Expand model enums in all three blocks
- Update credentials to accept Union[Replicate, OpenAI]
- Add OpenAI API branches using images.generate and images.edit
- Add block pricing (8-20 credits) consistent with existing tiers
- Rename GeminiImageModel -> ImageCustomizerModel (backwards compatible)
- Rename FluxKontextModelName -> ImageEditorModel (backwards compatible alias)
This commit is contained in:
Toran Bruce Richards
2026-04-22 20:08:28 +00:00
parent c56c1e5dd6
commit 23b5e1272e
4 changed files with 343 additions and 30 deletions

View File

@@ -1,7 +1,8 @@
import asyncio
from enum import Enum
from typing import Literal
from typing import Literal, Union
import openai
from pydantic import SecretStr
from replicate.client import Client as ReplicateClient
from replicate.helpers import FileOutput
@@ -24,10 +25,20 @@ from backend.integrations.providers import ProviderName
from backend.util.file import MediaFileType, store_media_file
class GeminiImageModel(str, Enum):
class ImageCustomizerModel(str, Enum):
"""Models for the AI Image Customizer block, supporting both Replicate and OpenAI."""
NANO_BANANA = "google/nano-banana"
NANO_BANANA_PRO = "google/nano-banana-pro"
NANO_BANANA_2 = "google/nano-banana-2"
GPT_IMAGE_1 = "gpt-image-1"
GPT_IMAGE_1_5 = "gpt-image-1-5"
GPT_IMAGE_2 = "gpt-image-2"
GPT_IMAGE_1_MINI = "gpt-image-1-mini"
# Keep old name as alias for backwards compatibility
GeminiImageModel = ImageCustomizerModel
class AspectRatio(str, Enum):
@@ -49,6 +60,21 @@ class OutputFormat(str, Enum):
PNG = "png"
ASPECT_TO_OPENAI_SIZE = {
AspectRatio.MATCH_INPUT_IMAGE: "auto",
AspectRatio.ASPECT_1_1: "1024x1024",
AspectRatio.ASPECT_2_3: "1024x1536",
AspectRatio.ASPECT_3_2: "1536x1024",
AspectRatio.ASPECT_3_4: "1024x1536",
AspectRatio.ASPECT_4_3: "1536x1024",
AspectRatio.ASPECT_4_5: "1024x1536",
AspectRatio.ASPECT_5_4: "1536x1024",
AspectRatio.ASPECT_9_16: "1024x1536",
AspectRatio.ASPECT_16_9: "1536x1024",
AspectRatio.ASPECT_21_9: "1536x1024",
}
TEST_CREDENTIALS = APIKeyCredentials(
id="01234567-89ab-cdef-0123-456789abcdef",
provider="replicate",
@@ -68,17 +94,18 @@ TEST_CREDENTIALS_INPUT = {
class AIImageCustomizerBlock(Block):
class Input(BlockSchemaInput):
credentials: CredentialsMetaInput[
Literal[ProviderName.REPLICATE], Literal["api_key"]
Union[Literal[ProviderName.REPLICATE], Literal[ProviderName.OPENAI]],
Literal["api_key"],
] = CredentialsField(
description="Replicate API key with permissions for Google Gemini image models",
description="Replicate or OpenAI API key with permissions for image generation and editing models",
)
prompt: str = SchemaField(
description="A text description of the image you want to generate",
title="Prompt",
)
model: GeminiImageModel = SchemaField(
model: ImageCustomizerModel = SchemaField(
description="The AI model to use for image generation and editing",
default=GeminiImageModel.NANO_BANANA_2,
default=ImageCustomizerModel.NANO_BANANA_2,
title="Model",
)
images: list[MediaFileType] = SchemaField(
@@ -104,15 +131,16 @@ class AIImageCustomizerBlock(Block):
super().__init__(
id="d76bbe4c-930e-4894-8469-b66775511f71",
description=(
"Generate and edit custom images using Google's Nano-Banana models from Gemini. "
"Provide a prompt and optional reference images to create or modify images."
"Generate and edit custom images using Google's Nano-Banana models from Gemini "
"or OpenAI GPT-image models. Provide a prompt and optional reference images to "
"create or modify images."
),
categories={BlockCategory.AI, BlockCategory.MULTIMEDIA},
input_schema=AIImageCustomizerBlock.Input,
output_schema=AIImageCustomizerBlock.Output,
test_input={
"prompt": "Make the scene more vibrant and colorful",
"model": GeminiImageModel.NANO_BANANA_2,
"model": ImageCustomizerModel.NANO_BANANA_2,
"images": [],
"aspect_ratio": AspectRatio.MATCH_INPUT_IMAGE,
"output_format": OutputFormat.JPG,
@@ -171,6 +199,43 @@ class AIImageCustomizerBlock(Block):
except Exception as e:
yield "error", str(e)
async def _customize_with_openai(
self,
api_key: SecretStr,
model_name: str,
prompt: str,
images: list[MediaFileType],
aspect_ratio: str,
output_format: str,
) -> MediaFileType:
client = openai.AsyncOpenAI(api_key=api_key.get_secret_value())
from io import BytesIO
import base64
image_bytes = None
if images:
header, encoded = str(images[0]).split(",", 1)
image_bytes = BytesIO(base64.b64decode(encoded))
size = ASPECT_TO_OPENAI_SIZE.get(aspect_ratio, "auto")
kwargs = {
"model": model_name,
"prompt": prompt,
"n": 1,
"size": size, # type: ignore[arg-type]
"quality": "auto",
}
if image_bytes:
kwargs["image"] = image_bytes
response = await client.images.generate(**kwargs)
if response.data and response.data[0].url:
return MediaFileType(response.data[0].url)
if response.data and response.data[0].b64_json:
return MediaFileType(f"data:image/png;base64,{response.data[0].b64_json}")
raise ValueError("OpenAI image customization returned empty result")
async def run_model(
self,
api_key: SecretStr,
@@ -180,6 +245,17 @@ class AIImageCustomizerBlock(Block):
aspect_ratio: str,
output_format: str,
) -> MediaFileType:
# Route to OpenAI for GPT-image models
if model_name.startswith("gpt-image"):
if len(images) > 1:
raise ValueError(
"OpenAI image models support only a single reference image. "
"Please provide one image or use a Replicate model."
)
return await self._customize_with_openai(
api_key, model_name, prompt, images, aspect_ratio, output_format
)
client = ReplicateClient(api_token=api_key.get_secret_value())
input_params: dict = {

View File

@@ -1,6 +1,7 @@
from enum import Enum
from typing import Literal
from typing import Literal, Union
import openai
from pydantic import SecretStr
from replicate.client import Client as ReplicateClient
from replicate.helpers import FileOutput
@@ -76,6 +77,14 @@ SIZE_TO_NANO_BANANA_RATIO = {
ImageSize.TALL: "9:16",
}
SIZE_TO_OPENAI = {
ImageSize.SQUARE: "1024x1024",
ImageSize.LANDSCAPE: "1536x1024",
ImageSize.PORTRAIT: "1024x1536",
ImageSize.WIDE: "1536x1024",
ImageSize.TALL: "1024x1536",
}
class ImageStyle(str, Enum):
"""
@@ -107,7 +116,7 @@ class ImageStyle(str, Enum):
class ImageGenModel(str, Enum):
"""
Available model providers
Available model providers including OpenAI GPT-image family
"""
FLUX = "Flux 1.1 Pro"
@@ -116,14 +125,19 @@ class ImageGenModel(str, Enum):
SD3_5 = "Stable Diffusion 3.5 Medium"
NANO_BANANA_PRO = "Nano Banana Pro"
NANO_BANANA_2 = "Nano Banana 2"
GPT_IMAGE_1 = "gpt-image-1"
GPT_IMAGE_1_5 = "gpt-image-1-5"
GPT_IMAGE_2 = "gpt-image-2"
GPT_IMAGE_1_MINI = "gpt-image-1-mini"
class AIImageGeneratorBlock(Block):
class Input(BlockSchemaInput):
credentials: CredentialsMetaInput[
Literal[ProviderName.REPLICATE], Literal["api_key"]
Union[Literal[ProviderName.REPLICATE], Literal[ProviderName.OPENAI]],
Literal["api_key"],
] = CredentialsField(
description="Enter your Replicate API key to access the image generation API. You can obtain an API key from https://replicate.com/account/api-tokens.",
description="Enter your Replicate or OpenAI API key to access the image generation API.",
)
prompt: str = SchemaField(
description="Text prompt for image generation",
@@ -182,7 +196,10 @@ class AIImageGeneratorBlock(Block):
# Return a data URI directly so store_media_file doesn't need to download
"_run_client": lambda *args, **kwargs: (
"data:image/webp;base64,UklGRiQAAABXRUJQVlA4IBgAAAAwAQCdASoBAAEAAQAcJYgCdAEO"
)
),
"_generate_with_openai": lambda *args, **kwargs: (
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="
),
},
)
@@ -216,8 +233,34 @@ class AIImageGeneratorBlock(Block):
except Exception as e:
raise RuntimeError(f"Unexpected error during model execution: {e}")
async def _generate_with_openai(
self, input_data: Input, credentials: APIKeyCredentials
) -> str:
client = openai.AsyncOpenAI(
api_key=credentials.api_key.get_secret_value()
)
size = SIZE_TO_OPENAI.get(input_data.size, "1024x1024")
response = await client.images.generate(
model=input_data.model.value,
prompt=input_data.prompt,
n=1,
size=size, # type: ignore[arg-type]
quality="auto",
)
if response.data and response.data[0].url:
return response.data[0].url
if response.data and response.data[0].b64_json:
return f"data:image/png;base64,{response.data[0].b64_json}"
raise RuntimeError("OpenAI image generation returned empty result")
async def generate_image(self, input_data: Input, credentials: APIKeyCredentials):
try:
# Route to OpenAI for GPT-image models
if input_data.model.value.startswith("gpt-image"):
return await self._generate_with_openai(input_data, credentials)
# Handle style-based prompt modification for models without native style support
modified_prompt = input_data.prompt
if input_data.model not in [ImageGenModel.RECRAFT]:

View File

@@ -1,6 +1,7 @@
from enum import Enum
from typing import Literal, Optional
from typing import Literal, Optional, Union
import openai
from pydantic import SecretStr
from replicate.client import Client as ReplicateClient
from replicate.helpers import FileOutput
@@ -43,6 +44,10 @@ class ImageEditorModel(str, Enum):
FLUX_KONTEXT_MAX = "Flux Kontext Max"
NANO_BANANA_PRO = "Nano Banana Pro"
NANO_BANANA_2 = "Nano Banana 2"
GPT_IMAGE_1 = "gpt-image-1"
GPT_IMAGE_1_5 = "gpt-image-1-5"
GPT_IMAGE_2 = "gpt-image-2"
GPT_IMAGE_1_MINI = "gpt-image-1-mini"
@property
def api_name(self) -> str:
@@ -76,12 +81,30 @@ class AspectRatio(str, Enum):
ASPECT_1_2 = "1:2"
ASPECT_TO_OPENAI_SIZE = {
AspectRatio.ASPECT_1_1: "1024x1024",
AspectRatio.ASPECT_16_9: "1536x1024",
AspectRatio.ASPECT_9_16: "1024x1536",
AspectRatio.ASPECT_4_3: "1536x1024",
AspectRatio.ASPECT_3_4: "1024x1536",
AspectRatio.ASPECT_3_2: "1536x1024",
AspectRatio.ASPECT_2_3: "1024x1536",
AspectRatio.ASPECT_4_5: "1024x1536",
AspectRatio.ASPECT_5_4: "1536x1024",
AspectRatio.ASPECT_21_9: "1536x1024",
AspectRatio.ASPECT_9_21: "1024x1536",
AspectRatio.ASPECT_2_1: "1536x1024",
AspectRatio.ASPECT_1_2: "1024x1536",
}
class AIImageEditorBlock(Block):
class Input(BlockSchemaInput):
credentials: CredentialsMetaInput[
Literal[ProviderName.REPLICATE], Literal["api_key"]
Union[Literal[ProviderName.REPLICATE], Literal[ProviderName.OPENAI]],
Literal["api_key"],
] = CredentialsField(
description="Replicate API key with permissions for Flux Kontext and Nano Banana models",
description="Replicate or OpenAI API key with permissions for image editing models",
)
prompt: str = SchemaField(
description="Text instruction describing the desired edit",
@@ -99,7 +122,7 @@ class AIImageEditorBlock(Block):
advanced=False,
)
seed: Optional[int] = SchemaField(
description="Random seed. Set for reproducible generation (Flux Kontext only; ignored by Nano Banana models)",
description="Random seed. Set for reproducible generation (Flux Kontext only; ignored by other models)",
default=None,
title="Seed",
advanced=True,
@@ -119,8 +142,8 @@ class AIImageEditorBlock(Block):
super().__init__(
id="3fd9c73d-4370-4925-a1ff-1b86b99fabfa",
description=(
"Edit images using Flux Kontext or Google Nano Banana models. Provide a prompt "
"and optional reference image to generate a modified image."
"Edit images using Flux Kontext, Google Nano Banana, or OpenAI GPT-image models. "
"Provide a prompt and optional reference image to generate a modified image."
),
categories={BlockCategory.AI, BlockCategory.MULTIMEDIA},
input_schema=AIImageEditorBlock.Input,
@@ -140,7 +163,7 @@ class AIImageEditorBlock(Block):
test_mock={
# Use data URI to avoid HTTP requests during tests
"run_model": lambda *args, **kwargs: (
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAhKmMIQAAAABJRU5ErkJggg=="
),
},
test_credentials=TEST_CREDENTIALS,
@@ -180,6 +203,39 @@ class AIImageEditorBlock(Block):
)
yield "output_image", stored_url
async def _edit_with_openai(
self,
api_key: SecretStr,
model: ImageEditorModel,
prompt: str,
input_image_b64: Optional[str],
aspect_ratio: str,
) -> MediaFileType:
if not input_image_b64:
raise ValueError("OpenAI image editing requires an input image.")
client = openai.AsyncOpenAI(api_key=api_key.get_secret_value())
from io import BytesIO
import base64
header, encoded = str(input_image_b64).split(",", 1)
image_bytes = BytesIO(base64.b64decode(encoded))
size = ASPECT_TO_OPENAI_SIZE.get(aspect_ratio, "1024x1024")
response = await client.images.edit(
model=model.value,
image=image_bytes,
prompt=prompt,
n=1,
size=size, # type: ignore[arg-type]
)
if response.data and response.data[0].url:
return MediaFileType(response.data[0].url)
if response.data and response.data[0].b64_json:
return MediaFileType(f"data:image/png;base64,{response.data[0].b64_json}")
raise ValueError("OpenAI image edit returned empty result")
async def run_model(
self,
api_key: SecretStr,
@@ -191,6 +247,12 @@ class AIImageEditorBlock(Block):
user_id: str,
graph_exec_id: str,
) -> MediaFileType:
# Route to OpenAI for GPT-image models
if model.value.startswith("gpt-image"):
return await self._edit_with_openai(
api_key, model, prompt, input_image_b64, aspect_ratio
)
client = ReplicateClient(api_token=api_key.get_secret_value())
model_name = model.api_name

View File

@@ -1,7 +1,7 @@
from typing import Type
from backend.blocks._base import Block, BlockCost, BlockCostType
from backend.blocks.ai_image_customizer import AIImageCustomizerBlock, GeminiImageModel
from backend.blocks.ai_image_customizer import AIImageCustomizerBlock, ImageCustomizerModel
from backend.blocks.ai_image_generator_block import AIImageGeneratorBlock, ImageGenModel
from backend.blocks.ai_music_generator import AIMusicGeneratorBlock
from backend.blocks.ai_shortform_video_block import (
@@ -20,7 +20,7 @@ from backend.blocks.enrichlayer.linkedin import (
LinkedinPersonLookupBlock,
LinkedinRoleLookupBlock,
)
from backend.blocks.flux_kontext import AIImageEditorBlock, FluxKontextModelName
from backend.blocks.flux_kontext import AIImageEditorBlock, ImageEditorModel
from backend.blocks.ideogram import IdeogramModelBlock
from backend.blocks.jina.embeddings import JinaEmbeddingBlock
from backend.blocks.jina.fact_checker import FactCheckerBlock
@@ -477,7 +477,7 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
BlockCost(
cost_amount=10,
cost_filter={
"model": FluxKontextModelName.FLUX_KONTEXT_PRO,
"model": ImageEditorModel.FLUX_KONTEXT_PRO,
"credentials": {
"id": replicate_credentials.id,
"provider": replicate_credentials.provider,
@@ -488,7 +488,7 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
BlockCost(
cost_amount=20,
cost_filter={
"model": FluxKontextModelName.FLUX_KONTEXT_MAX,
"model": ImageEditorModel.FLUX_KONTEXT_MAX,
"credentials": {
"id": replicate_credentials.id,
"provider": replicate_credentials.provider,
@@ -499,7 +499,7 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
BlockCost(
cost_amount=14, # Nano Banana Pro
cost_filter={
"model": FluxKontextModelName.NANO_BANANA_PRO,
"model": ImageEditorModel.NANO_BANANA_PRO,
"credentials": {
"id": replicate_credentials.id,
"provider": replicate_credentials.provider,
@@ -510,7 +510,7 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
BlockCost(
cost_amount=14, # Nano Banana 2
cost_filter={
"model": FluxKontextModelName.NANO_BANANA_2,
"model": ImageEditorModel.NANO_BANANA_2,
"credentials": {
"id": replicate_credentials.id,
"provider": replicate_credentials.provider,
@@ -518,6 +518,50 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
},
},
),
BlockCost(
cost_amount=15, # gpt-image-1
cost_filter={
"model": ImageEditorModel.GPT_IMAGE_1,
"credentials": {
"id": openai_credentials.id,
"provider": openai_credentials.provider,
"type": openai_credentials.type,
},
},
),
BlockCost(
cost_amount=18, # gpt-image-1-5
cost_filter={
"model": ImageEditorModel.GPT_IMAGE_1_5,
"credentials": {
"id": openai_credentials.id,
"provider": openai_credentials.provider,
"type": openai_credentials.type,
},
},
),
BlockCost(
cost_amount=20, # gpt-image-2
cost_filter={
"model": ImageEditorModel.GPT_IMAGE_2,
"credentials": {
"id": openai_credentials.id,
"provider": openai_credentials.provider,
"type": openai_credentials.type,
},
},
),
BlockCost(
cost_amount=8, # gpt-image-1-mini
cost_filter={
"model": ImageEditorModel.GPT_IMAGE_1_MINI,
"credentials": {
"id": openai_credentials.id,
"provider": openai_credentials.provider,
"type": openai_credentials.type,
},
},
),
],
AIMusicGeneratorBlock: [
BlockCost(
@@ -718,12 +762,56 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
},
},
),
BlockCost(
cost_amount=15, # gpt-image-1
cost_filter={
"model": ImageGenModel.GPT_IMAGE_1,
"credentials": {
"id": openai_credentials.id,
"provider": openai_credentials.provider,
"type": openai_credentials.type,
},
},
),
BlockCost(
cost_amount=18, # gpt-image-1-5
cost_filter={
"model": ImageGenModel.GPT_IMAGE_1_5,
"credentials": {
"id": openai_credentials.id,
"provider": openai_credentials.provider,
"type": openai_credentials.type,
},
},
),
BlockCost(
cost_amount=20, # gpt-image-2
cost_filter={
"model": ImageGenModel.GPT_IMAGE_2,
"credentials": {
"id": openai_credentials.id,
"provider": openai_credentials.provider,
"type": openai_credentials.type,
},
},
),
BlockCost(
cost_amount=8, # gpt-image-1-mini
cost_filter={
"model": ImageGenModel.GPT_IMAGE_1_MINI,
"credentials": {
"id": openai_credentials.id,
"provider": openai_credentials.provider,
"type": openai_credentials.type,
},
},
),
],
AIImageCustomizerBlock: [
BlockCost(
cost_amount=10, # Nano Banana (original)
cost_filter={
"model": GeminiImageModel.NANO_BANANA,
"model": ImageCustomizerModel.NANO_BANANA,
"credentials": {
"id": replicate_credentials.id,
"provider": replicate_credentials.provider,
@@ -734,7 +822,7 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
BlockCost(
cost_amount=14, # Nano Banana Pro: $0.14 per image at 2K
cost_filter={
"model": GeminiImageModel.NANO_BANANA_PRO,
"model": ImageCustomizerModel.NANO_BANANA_PRO,
"credentials": {
"id": replicate_credentials.id,
"provider": replicate_credentials.provider,
@@ -745,7 +833,7 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
BlockCost(
cost_amount=14, # Nano Banana 2: same pricing tier as Pro
cost_filter={
"model": GeminiImageModel.NANO_BANANA_2,
"model": ImageCustomizerModel.NANO_BANANA_2,
"credentials": {
"id": replicate_credentials.id,
"provider": replicate_credentials.provider,
@@ -753,6 +841,50 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
},
},
),
BlockCost(
cost_amount=15, # gpt-image-1
cost_filter={
"model": ImageCustomizerModel.GPT_IMAGE_1,
"credentials": {
"id": openai_credentials.id,
"provider": openai_credentials.provider,
"type": openai_credentials.type,
},
},
),
BlockCost(
cost_amount=18, # gpt-image-1-5
cost_filter={
"model": ImageCustomizerModel.GPT_IMAGE_1_5,
"credentials": {
"id": openai_credentials.id,
"provider": openai_credentials.provider,
"type": openai_credentials.type,
},
},
),
BlockCost(
cost_amount=20, # gpt-image-2
cost_filter={
"model": ImageCustomizerModel.GPT_IMAGE_2,
"credentials": {
"id": openai_credentials.id,
"provider": openai_credentials.provider,
"type": openai_credentials.type,
},
},
),
BlockCost(
cost_amount=8, # gpt-image-1-mini
cost_filter={
"model": ImageCustomizerModel.GPT_IMAGE_1_MINI,
"credentials": {
"id": openai_credentials.id,
"provider": openai_credentials.provider,
"type": openai_credentials.type,
},
},
),
],
PerplexityBlock: [
# Sonar Deep Research: up to $5/1K searches + $8/1M reasoning tokens.