mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
Compare commits
3 Commits
symphony/S
...
feat/add-g
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
41017e7975 | ||
|
|
722a8ad534 | ||
|
|
23b5e1272e |
@@ -1,7 +1,10 @@
|
||||
import asyncio
|
||||
from enum import Enum
|
||||
from typing import Literal
|
||||
from io import BytesIO
|
||||
import base64
|
||||
from typing import Literal, cast
|
||||
|
||||
import openai
|
||||
from pydantic import SecretStr
|
||||
from replicate.client import Client as ReplicateClient
|
||||
from replicate.helpers import FileOutput
|
||||
@@ -24,10 +27,19 @@ from backend.integrations.providers import ProviderName
|
||||
from backend.util.file import MediaFileType, store_media_file
|
||||
|
||||
|
||||
class GeminiImageModel(str, Enum):
|
||||
class ImageCustomizerModel(str, Enum):
|
||||
"""Models for the AI Image Customizer block, supporting both Replicate and OpenAI."""
|
||||
|
||||
NANO_BANANA = "google/nano-banana"
|
||||
NANO_BANANA_PRO = "google/nano-banana-pro"
|
||||
NANO_BANANA_2 = "google/nano-banana-2"
|
||||
GPT_IMAGE_1 = "gpt-image-1"
|
||||
GPT_IMAGE_1_5 = "gpt-image-1.5"
|
||||
GPT_IMAGE_2 = "gpt-image-2"
|
||||
GPT_IMAGE_1_MINI = "gpt-image-1-mini"
|
||||
|
||||
|
||||
GeminiImageModel = ImageCustomizerModel
|
||||
|
||||
|
||||
class AspectRatio(str, Enum):
|
||||
@@ -49,6 +61,21 @@ class OutputFormat(str, Enum):
|
||||
PNG = "png"
|
||||
|
||||
|
||||
ASPECT_TO_OPENAI_SIZE = {
|
||||
AspectRatio.MATCH_INPUT_IMAGE: "auto",
|
||||
AspectRatio.ASPECT_1_1: "1024x1024",
|
||||
AspectRatio.ASPECT_2_3: "1024x1536",
|
||||
AspectRatio.ASPECT_3_2: "1536x1024",
|
||||
AspectRatio.ASPECT_3_4: "1024x1536",
|
||||
AspectRatio.ASPECT_4_3: "1536x1024",
|
||||
AspectRatio.ASPECT_4_5: "1024x1536",
|
||||
AspectRatio.ASPECT_5_4: "1536x1024",
|
||||
AspectRatio.ASPECT_9_16: "1024x1536",
|
||||
AspectRatio.ASPECT_16_9: "1536x1024",
|
||||
AspectRatio.ASPECT_21_9: "1536x1024",
|
||||
}
|
||||
|
||||
|
||||
TEST_CREDENTIALS = APIKeyCredentials(
|
||||
id="01234567-89ab-cdef-0123-456789abcdef",
|
||||
provider="replicate",
|
||||
@@ -68,17 +95,18 @@ TEST_CREDENTIALS_INPUT = {
|
||||
class AIImageCustomizerBlock(Block):
|
||||
class Input(BlockSchemaInput):
|
||||
credentials: CredentialsMetaInput[
|
||||
Literal[ProviderName.REPLICATE], Literal["api_key"]
|
||||
Literal[ProviderName.REPLICATE, ProviderName.OPENAI],
|
||||
Literal["api_key"],
|
||||
] = CredentialsField(
|
||||
description="Replicate API key with permissions for Google Gemini image models",
|
||||
description="Replicate or OpenAI API key with permissions for image generation and editing models",
|
||||
)
|
||||
prompt: str = SchemaField(
|
||||
description="A text description of the image you want to generate",
|
||||
title="Prompt",
|
||||
)
|
||||
model: GeminiImageModel = SchemaField(
|
||||
model: ImageCustomizerModel = SchemaField(
|
||||
description="The AI model to use for image generation and editing",
|
||||
default=GeminiImageModel.NANO_BANANA_2,
|
||||
default=ImageCustomizerModel.NANO_BANANA_2,
|
||||
title="Model",
|
||||
)
|
||||
images: list[MediaFileType] = SchemaField(
|
||||
@@ -104,26 +132,25 @@ class AIImageCustomizerBlock(Block):
|
||||
super().__init__(
|
||||
id="d76bbe4c-930e-4894-8469-b66775511f71",
|
||||
description=(
|
||||
"Generate and edit custom images using Google's Nano-Banana models from Gemini. "
|
||||
"Provide a prompt and optional reference images to create or modify images."
|
||||
"Generate and edit custom images using Google's Nano-Banana models from Gemini "
|
||||
"or OpenAI GPT-image models. Provide a prompt and optional reference images to "
|
||||
"create or modify images."
|
||||
),
|
||||
categories={BlockCategory.AI, BlockCategory.MULTIMEDIA},
|
||||
input_schema=AIImageCustomizerBlock.Input,
|
||||
output_schema=AIImageCustomizerBlock.Output,
|
||||
test_input={
|
||||
"prompt": "Make the scene more vibrant and colorful",
|
||||
"model": GeminiImageModel.NANO_BANANA_2,
|
||||
"model": ImageCustomizerModel.NANO_BANANA_2,
|
||||
"images": [],
|
||||
"aspect_ratio": AspectRatio.MATCH_INPUT_IMAGE,
|
||||
"output_format": OutputFormat.JPG,
|
||||
"credentials": TEST_CREDENTIALS_INPUT,
|
||||
},
|
||||
test_output=[
|
||||
# Output will be a workspace ref or data URI depending on context
|
||||
("image_url", lambda x: x.startswith(("workspace://", "data:"))),
|
||||
],
|
||||
test_mock={
|
||||
# Use data URI to avoid HTTP requests during tests
|
||||
"run_model": lambda *args, **kwargs: MediaFileType(
|
||||
"data:image/jpeg;base64,/9j/4AAQSkZJRgABAgAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAABAAEDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD3+iiigD//2Q=="
|
||||
),
|
||||
@@ -140,13 +167,12 @@ class AIImageCustomizerBlock(Block):
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
try:
|
||||
# Convert local file paths to Data URIs (base64) so Replicate can access them
|
||||
processed_images = await asyncio.gather(
|
||||
*(
|
||||
store_media_file(
|
||||
file=img,
|
||||
execution_context=execution_context,
|
||||
return_format="for_external_api", # Get content for Replicate API
|
||||
return_format="for_external_api",
|
||||
)
|
||||
for img in input_data.images
|
||||
)
|
||||
@@ -161,7 +187,6 @@ class AIImageCustomizerBlock(Block):
|
||||
output_format=input_data.output_format.value,
|
||||
)
|
||||
|
||||
# Store the generated image to the user's workspace for persistence
|
||||
stored_url = await store_media_file(
|
||||
file=result,
|
||||
execution_context=execution_context,
|
||||
@@ -171,6 +196,53 @@ class AIImageCustomizerBlock(Block):
|
||||
except Exception as e:
|
||||
yield "error", str(e)
|
||||
|
||||
async def _customize_with_openai(
|
||||
self,
|
||||
api_key: SecretStr,
|
||||
model_name: str,
|
||||
prompt: str,
|
||||
images: list[MediaFileType],
|
||||
aspect_ratio: str,
|
||||
output_format: str,
|
||||
) -> MediaFileType:
|
||||
client = openai.AsyncOpenAI(api_key=api_key.get_secret_value())
|
||||
|
||||
size = ASPECT_TO_OPENAI_SIZE.get(aspect_ratio, "auto")
|
||||
size_literal = cast(
|
||||
Literal["1024x1024", "1536x1024", "1024x1536", "auto"], size
|
||||
)
|
||||
|
||||
if images:
|
||||
if len(images) > 1:
|
||||
raise ValueError(
|
||||
"OpenAI image models support only a single reference image. "
|
||||
"Please provide one image or use a Replicate model."
|
||||
)
|
||||
data_uri = str(images[0])
|
||||
if "," not in data_uri:
|
||||
raise ValueError("Expected a data-URI for the reference image.")
|
||||
_, encoded = data_uri.split(",", 1)
|
||||
image_bytes = BytesIO(base64.b64decode(encoded))
|
||||
response = await client.images.edit(
|
||||
model=model_name,
|
||||
image=image_bytes,
|
||||
prompt=prompt,
|
||||
n=1,
|
||||
size=size_literal,
|
||||
)
|
||||
else:
|
||||
response = await client.images.generate(
|
||||
model=model_name,
|
||||
prompt=prompt,
|
||||
n=1,
|
||||
size=size_literal,
|
||||
quality="auto",
|
||||
)
|
||||
|
||||
if not response.data or not response.data[0].b64_json:
|
||||
raise ValueError("OpenAI image customization returned empty result")
|
||||
return MediaFileType(f"data:image/png;base64,{response.data[0].b64_json}")
|
||||
|
||||
async def run_model(
|
||||
self,
|
||||
api_key: SecretStr,
|
||||
@@ -180,6 +252,11 @@ class AIImageCustomizerBlock(Block):
|
||||
aspect_ratio: str,
|
||||
output_format: str,
|
||||
) -> MediaFileType:
|
||||
if model_name.startswith("gpt-image"):
|
||||
return await self._customize_with_openai(
|
||||
api_key, model_name, prompt, images, aspect_ratio, output_format
|
||||
)
|
||||
|
||||
client = ReplicateClient(api_token=api_key.get_secret_value())
|
||||
|
||||
input_params: dict = {
|
||||
@@ -188,7 +265,6 @@ class AIImageCustomizerBlock(Block):
|
||||
"output_format": output_format,
|
||||
}
|
||||
|
||||
# Add images to input if provided (API expects "image_input" parameter)
|
||||
if images:
|
||||
input_params["image_input"] = [str(img) for img in images]
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from enum import Enum
|
||||
from typing import Literal
|
||||
from typing import Literal, cast
|
||||
|
||||
import openai
|
||||
from pydantic import SecretStr
|
||||
from replicate.client import Client as ReplicateClient
|
||||
from replicate.helpers import FileOutput
|
||||
@@ -76,6 +77,14 @@ SIZE_TO_NANO_BANANA_RATIO = {
|
||||
ImageSize.TALL: "9:16",
|
||||
}
|
||||
|
||||
SIZE_TO_OPENAI = {
|
||||
ImageSize.SQUARE: "1024x1024",
|
||||
ImageSize.LANDSCAPE: "1536x1024",
|
||||
ImageSize.PORTRAIT: "1024x1536",
|
||||
ImageSize.WIDE: "1536x1024",
|
||||
ImageSize.TALL: "1024x1536",
|
||||
}
|
||||
|
||||
|
||||
class ImageStyle(str, Enum):
|
||||
"""
|
||||
@@ -107,7 +116,7 @@ class ImageStyle(str, Enum):
|
||||
|
||||
class ImageGenModel(str, Enum):
|
||||
"""
|
||||
Available model providers
|
||||
Available model providers including OpenAI GPT-image family
|
||||
"""
|
||||
|
||||
FLUX = "Flux 1.1 Pro"
|
||||
@@ -116,14 +125,19 @@ class ImageGenModel(str, Enum):
|
||||
SD3_5 = "Stable Diffusion 3.5 Medium"
|
||||
NANO_BANANA_PRO = "Nano Banana Pro"
|
||||
NANO_BANANA_2 = "Nano Banana 2"
|
||||
GPT_IMAGE_1 = "gpt-image-1"
|
||||
GPT_IMAGE_1_5 = "gpt-image-1.5"
|
||||
GPT_IMAGE_2 = "gpt-image-2"
|
||||
GPT_IMAGE_1_MINI = "gpt-image-1-mini"
|
||||
|
||||
|
||||
class AIImageGeneratorBlock(Block):
|
||||
class Input(BlockSchemaInput):
|
||||
credentials: CredentialsMetaInput[
|
||||
Literal[ProviderName.REPLICATE], Literal["api_key"]
|
||||
Literal[ProviderName.REPLICATE, ProviderName.OPENAI],
|
||||
Literal["api_key"],
|
||||
] = CredentialsField(
|
||||
description="Enter your Replicate API key to access the image generation API. You can obtain an API key from https://replicate.com/account/api-tokens.",
|
||||
description="Enter your Replicate or OpenAI API key to access the image generation API.",
|
||||
)
|
||||
prompt: str = SchemaField(
|
||||
description="Text prompt for image generation",
|
||||
@@ -174,15 +188,16 @@ class AIImageGeneratorBlock(Block):
|
||||
test_output=[
|
||||
(
|
||||
"image_url",
|
||||
# Test output is a data URI since we now store images
|
||||
lambda x: x.startswith("data:image/"),
|
||||
),
|
||||
],
|
||||
test_mock={
|
||||
# Return a data URI directly so store_media_file doesn't need to download
|
||||
"_run_client": lambda *args, **kwargs: (
|
||||
"data:image/webp;base64,UklGRiQAAABXRUJQVlA4IBgAAAAwAQCdASoBAAEAAQAcJYgCdAEO"
|
||||
)
|
||||
),
|
||||
"_generate_with_openai": lambda *args, **kwargs: (
|
||||
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -190,13 +205,9 @@ class AIImageGeneratorBlock(Block):
|
||||
self, credentials: APIKeyCredentials, model_name: str, input_params: dict
|
||||
):
|
||||
try:
|
||||
# Initialize Replicate client
|
||||
client = ReplicateClient(api_token=credentials.api_key.get_secret_value())
|
||||
|
||||
# Run the model with input parameters
|
||||
output = await client.async_run(model_name, input=input_params, wait=False)
|
||||
|
||||
# Process output
|
||||
if isinstance(output, list) and len(output) > 0:
|
||||
if isinstance(output[0], FileOutput):
|
||||
result_url = output[0].url
|
||||
@@ -216,16 +227,38 @@ class AIImageGeneratorBlock(Block):
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Unexpected error during model execution: {e}")
|
||||
|
||||
async def _generate_with_openai(
|
||||
self, input_data: Input, credentials: APIKeyCredentials
|
||||
) -> str:
|
||||
client = openai.AsyncOpenAI(api_key=credentials.api_key.get_secret_value())
|
||||
|
||||
size = SIZE_TO_OPENAI.get(input_data.size, "1024x1024")
|
||||
size_literal = cast(
|
||||
Literal["1024x1024", "1536x1024", "1024x1536"], size
|
||||
)
|
||||
|
||||
response = await client.images.generate(
|
||||
model=input_data.model.value,
|
||||
prompt=input_data.prompt,
|
||||
n=1,
|
||||
size=size_literal,
|
||||
quality="auto",
|
||||
)
|
||||
if not response.data or not response.data[0].b64_json:
|
||||
raise RuntimeError("OpenAI image generation returned empty result")
|
||||
return f"data:image/png;base64,{response.data[0].b64_json}"
|
||||
|
||||
async def generate_image(self, input_data: Input, credentials: APIKeyCredentials):
|
||||
try:
|
||||
# Handle style-based prompt modification for models without native style support
|
||||
if input_data.model.value.startswith("gpt-image"):
|
||||
return await self._generate_with_openai(input_data, credentials)
|
||||
|
||||
modified_prompt = input_data.prompt
|
||||
if input_data.model not in [ImageGenModel.RECRAFT]:
|
||||
style_prefix = self._style_to_prompt_prefix(input_data.style)
|
||||
modified_prompt = f"{style_prefix} {modified_prompt}".strip()
|
||||
|
||||
if input_data.model == ImageGenModel.SD3_5:
|
||||
# Use Stable Diffusion 3.5 with aspect ratio
|
||||
input_params = {
|
||||
"prompt": modified_prompt,
|
||||
"aspect_ratio": SIZE_TO_SD_RATIO[input_data.size],
|
||||
@@ -242,14 +275,13 @@ class AIImageGeneratorBlock(Block):
|
||||
return output
|
||||
|
||||
elif input_data.model == ImageGenModel.FLUX:
|
||||
# Use Flux-specific dimensions with 'jpg' format to avoid ReplicateError
|
||||
width, height = SIZE_TO_FLUX_DIMENSIONS[input_data.size]
|
||||
input_params = {
|
||||
"prompt": modified_prompt,
|
||||
"width": width,
|
||||
"height": height,
|
||||
"aspect_ratio": SIZE_TO_FLUX_RATIO[input_data.size],
|
||||
"output_format": "jpg", # Set to jpg for Flux models
|
||||
"output_format": "jpg",
|
||||
"output_quality": 90,
|
||||
}
|
||||
output = await self._run_client(
|
||||
@@ -287,7 +319,6 @@ class AIImageGeneratorBlock(Block):
|
||||
ImageGenModel.NANO_BANANA_PRO,
|
||||
ImageGenModel.NANO_BANANA_2,
|
||||
):
|
||||
# Use Nano Banana models (Google Gemini image variants)
|
||||
model_map = {
|
||||
ImageGenModel.NANO_BANANA_PRO: "google/nano-banana-pro",
|
||||
ImageGenModel.NANO_BANANA_2: "google/nano-banana-2",
|
||||
@@ -308,9 +339,6 @@ class AIImageGeneratorBlock(Block):
|
||||
raise RuntimeError(f"Failed to generate image: {str(e)}")
|
||||
|
||||
def _style_to_prompt_prefix(self, style: ImageStyle) -> str:
|
||||
"""
|
||||
Convert a style enum to a prompt prefix for models without native style support.
|
||||
"""
|
||||
if style == ImageStyle.ANY:
|
||||
return ""
|
||||
|
||||
@@ -349,7 +377,6 @@ class AIImageGeneratorBlock(Block):
|
||||
try:
|
||||
url = await self.generate_image(input_data, credentials)
|
||||
if url:
|
||||
# Store the generated image to the user's workspace/execution folder
|
||||
stored_url = await store_media_file(
|
||||
file=MediaFileType(url),
|
||||
execution_context=execution_context,
|
||||
@@ -359,11 +386,9 @@ class AIImageGeneratorBlock(Block):
|
||||
else:
|
||||
yield "error", "Image generation returned an empty result."
|
||||
except Exception as e:
|
||||
# Capture and return only the message of the exception, avoiding serialization of non-serializable objects
|
||||
yield "error", str(e)
|
||||
|
||||
|
||||
# Test credentials stay the same
|
||||
TEST_CREDENTIALS = APIKeyCredentials(
|
||||
id="01234567-89ab-cdef-0123-456789abcdef",
|
||||
provider="replicate",
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
from enum import Enum
|
||||
from typing import Literal, Optional
|
||||
from io import BytesIO
|
||||
import base64
|
||||
from typing import Literal, Optional, cast
|
||||
|
||||
import openai
|
||||
from pydantic import SecretStr
|
||||
from replicate.client import Client as ReplicateClient
|
||||
from replicate.helpers import FileOutput
|
||||
@@ -43,6 +46,10 @@ class ImageEditorModel(str, Enum):
|
||||
FLUX_KONTEXT_MAX = "Flux Kontext Max"
|
||||
NANO_BANANA_PRO = "Nano Banana Pro"
|
||||
NANO_BANANA_2 = "Nano Banana 2"
|
||||
GPT_IMAGE_1 = "gpt-image-1"
|
||||
GPT_IMAGE_1_5 = "gpt-image-1.5"
|
||||
GPT_IMAGE_2 = "gpt-image-2"
|
||||
GPT_IMAGE_1_MINI = "gpt-image-1-mini"
|
||||
|
||||
@property
|
||||
def api_name(self) -> str:
|
||||
@@ -55,7 +62,6 @@ class ImageEditorModel(str, Enum):
|
||||
return _map[self.name]
|
||||
|
||||
|
||||
# Keep old name as alias for backwards compatibility
|
||||
FluxKontextModelName = ImageEditorModel
|
||||
|
||||
|
||||
@@ -76,12 +82,31 @@ class AspectRatio(str, Enum):
|
||||
ASPECT_1_2 = "1:2"
|
||||
|
||||
|
||||
ASPECT_TO_OPENAI_SIZE = {
|
||||
AspectRatio.MATCH_INPUT_IMAGE: "auto",
|
||||
AspectRatio.ASPECT_1_1: "1024x1024",
|
||||
AspectRatio.ASPECT_16_9: "1536x1024",
|
||||
AspectRatio.ASPECT_9_16: "1024x1536",
|
||||
AspectRatio.ASPECT_4_3: "1536x1024",
|
||||
AspectRatio.ASPECT_3_4: "1024x1536",
|
||||
AspectRatio.ASPECT_3_2: "1536x1024",
|
||||
AspectRatio.ASPECT_2_3: "1024x1536",
|
||||
AspectRatio.ASPECT_4_5: "1024x1536",
|
||||
AspectRatio.ASPECT_5_4: "1536x1024",
|
||||
AspectRatio.ASPECT_21_9: "1536x1024",
|
||||
AspectRatio.ASPECT_9_21: "1024x1536",
|
||||
AspectRatio.ASPECT_2_1: "1536x1024",
|
||||
AspectRatio.ASPECT_1_2: "1024x1536",
|
||||
}
|
||||
|
||||
|
||||
class AIImageEditorBlock(Block):
|
||||
class Input(BlockSchemaInput):
|
||||
credentials: CredentialsMetaInput[
|
||||
Literal[ProviderName.REPLICATE], Literal["api_key"]
|
||||
Literal[ProviderName.REPLICATE, ProviderName.OPENAI],
|
||||
Literal["api_key"],
|
||||
] = CredentialsField(
|
||||
description="Replicate API key with permissions for Flux Kontext and Nano Banana models",
|
||||
description="Replicate or OpenAI API key with permissions for image editing models",
|
||||
)
|
||||
prompt: str = SchemaField(
|
||||
description="Text instruction describing the desired edit",
|
||||
@@ -99,7 +124,7 @@ class AIImageEditorBlock(Block):
|
||||
advanced=False,
|
||||
)
|
||||
seed: Optional[int] = SchemaField(
|
||||
description="Random seed. Set for reproducible generation (Flux Kontext only; ignored by Nano Banana models)",
|
||||
description="Random seed. Set for reproducible generation (Flux Kontext only; ignored by other models)",
|
||||
default=None,
|
||||
title="Seed",
|
||||
advanced=True,
|
||||
@@ -119,8 +144,8 @@ class AIImageEditorBlock(Block):
|
||||
super().__init__(
|
||||
id="3fd9c73d-4370-4925-a1ff-1b86b99fabfa",
|
||||
description=(
|
||||
"Edit images using Flux Kontext or Google Nano Banana models. Provide a prompt "
|
||||
"and optional reference image to generate a modified image."
|
||||
"Edit images using Flux Kontext, Google Nano Banana, or OpenAI GPT-image models. "
|
||||
"Provide a prompt and optional reference image to generate a modified image."
|
||||
),
|
||||
categories={BlockCategory.AI, BlockCategory.MULTIMEDIA},
|
||||
input_schema=AIImageEditorBlock.Input,
|
||||
@@ -134,13 +159,11 @@ class AIImageEditorBlock(Block):
|
||||
"credentials": TEST_CREDENTIALS_INPUT,
|
||||
},
|
||||
test_output=[
|
||||
# Output will be a workspace ref or data URI depending on context
|
||||
("output_image", lambda x: x.startswith(("workspace://", "data:"))),
|
||||
],
|
||||
test_mock={
|
||||
# Use data URI to avoid HTTP requests during tests
|
||||
"run_model": lambda *args, **kwargs: (
|
||||
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
|
||||
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAhKmMIQAAAABJRU5ErkJggg=="
|
||||
),
|
||||
},
|
||||
test_credentials=TEST_CREDENTIALS,
|
||||
@@ -162,7 +185,7 @@ class AIImageEditorBlock(Block):
|
||||
await store_media_file(
|
||||
file=input_data.input_image,
|
||||
execution_context=execution_context,
|
||||
return_format="for_external_api", # Get content for Replicate API
|
||||
return_format="for_external_api",
|
||||
)
|
||||
if input_data.input_image
|
||||
else None
|
||||
@@ -172,7 +195,6 @@ class AIImageEditorBlock(Block):
|
||||
user_id=execution_context.user_id or "",
|
||||
graph_exec_id=execution_context.graph_exec_id or "",
|
||||
)
|
||||
# Store the generated image to the user's workspace for persistence
|
||||
stored_url = await store_media_file(
|
||||
file=result,
|
||||
execution_context=execution_context,
|
||||
@@ -180,6 +202,41 @@ class AIImageEditorBlock(Block):
|
||||
)
|
||||
yield "output_image", stored_url
|
||||
|
||||
async def _edit_with_openai(
|
||||
self,
|
||||
api_key: SecretStr,
|
||||
model: ImageEditorModel,
|
||||
prompt: str,
|
||||
input_image_b64: Optional[str],
|
||||
aspect_ratio: str,
|
||||
) -> MediaFileType:
|
||||
if not input_image_b64:
|
||||
raise ValueError("OpenAI image editing requires an input image.")
|
||||
|
||||
client = openai.AsyncOpenAI(api_key=api_key.get_secret_value())
|
||||
|
||||
data_uri = str(input_image_b64)
|
||||
if "," not in data_uri:
|
||||
raise ValueError("Expected a data-URI for the input image.")
|
||||
_, encoded = data_uri.split(",", 1)
|
||||
image_bytes = BytesIO(base64.b64decode(encoded))
|
||||
|
||||
size = ASPECT_TO_OPENAI_SIZE.get(aspect_ratio, "1024x1024")
|
||||
size_literal = cast(
|
||||
Literal["1024x1024", "1536x1024", "1024x1536", "auto"], size
|
||||
)
|
||||
|
||||
response = await client.images.edit(
|
||||
model=model.value,
|
||||
image=image_bytes,
|
||||
prompt=prompt,
|
||||
n=1,
|
||||
size=size_literal,
|
||||
)
|
||||
if not response.data or not response.data[0].b64_json:
|
||||
raise ValueError("OpenAI image edit returned empty result")
|
||||
return MediaFileType(f"data:image/png;base64,{response.data[0].b64_json}")
|
||||
|
||||
async def run_model(
|
||||
self,
|
||||
api_key: SecretStr,
|
||||
@@ -191,6 +248,11 @@ class AIImageEditorBlock(Block):
|
||||
user_id: str,
|
||||
graph_exec_id: str,
|
||||
) -> MediaFileType:
|
||||
if model.value.startswith("gpt-image"):
|
||||
return await self._edit_with_openai(
|
||||
api_key, model, prompt, input_image_b64, aspect_ratio
|
||||
)
|
||||
|
||||
client = ReplicateClient(api_token=api_key.get_secret_value())
|
||||
model_name = model.api_name
|
||||
|
||||
@@ -205,7 +267,6 @@ class AIImageEditorBlock(Block):
|
||||
"output_format": "jpg",
|
||||
"safety_filter_level": "block_only_high",
|
||||
}
|
||||
# NB API expects "image_input" as a list, unlike Flux's single "input_image"
|
||||
if input_image_b64:
|
||||
input_params["image_input"] = [input_image_b64]
|
||||
else:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Type
|
||||
|
||||
from backend.blocks._base import Block, BlockCost, BlockCostType
|
||||
from backend.blocks.ai_image_customizer import AIImageCustomizerBlock, GeminiImageModel
|
||||
from backend.blocks.ai_image_customizer import AIImageCustomizerBlock, ImageCustomizerModel
|
||||
from backend.blocks.ai_image_generator_block import AIImageGeneratorBlock, ImageGenModel
|
||||
from backend.blocks.ai_music_generator import AIMusicGeneratorBlock
|
||||
from backend.blocks.ai_shortform_video_block import (
|
||||
@@ -20,7 +20,7 @@ from backend.blocks.enrichlayer.linkedin import (
|
||||
LinkedinPersonLookupBlock,
|
||||
LinkedinRoleLookupBlock,
|
||||
)
|
||||
from backend.blocks.flux_kontext import AIImageEditorBlock, FluxKontextModelName
|
||||
from backend.blocks.flux_kontext import AIImageEditorBlock, ImageEditorModel
|
||||
from backend.blocks.ideogram import IdeogramModelBlock
|
||||
from backend.blocks.jina.embeddings import JinaEmbeddingBlock
|
||||
from backend.blocks.jina.fact_checker import FactCheckerBlock
|
||||
@@ -477,7 +477,7 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
|
||||
BlockCost(
|
||||
cost_amount=10,
|
||||
cost_filter={
|
||||
"model": FluxKontextModelName.FLUX_KONTEXT_PRO,
|
||||
"model": ImageEditorModel.FLUX_KONTEXT_PRO,
|
||||
"credentials": {
|
||||
"id": replicate_credentials.id,
|
||||
"provider": replicate_credentials.provider,
|
||||
@@ -488,7 +488,7 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
|
||||
BlockCost(
|
||||
cost_amount=20,
|
||||
cost_filter={
|
||||
"model": FluxKontextModelName.FLUX_KONTEXT_MAX,
|
||||
"model": ImageEditorModel.FLUX_KONTEXT_MAX,
|
||||
"credentials": {
|
||||
"id": replicate_credentials.id,
|
||||
"provider": replicate_credentials.provider,
|
||||
@@ -499,7 +499,7 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
|
||||
BlockCost(
|
||||
cost_amount=14, # Nano Banana Pro
|
||||
cost_filter={
|
||||
"model": FluxKontextModelName.NANO_BANANA_PRO,
|
||||
"model": ImageEditorModel.NANO_BANANA_PRO,
|
||||
"credentials": {
|
||||
"id": replicate_credentials.id,
|
||||
"provider": replicate_credentials.provider,
|
||||
@@ -510,7 +510,7 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
|
||||
BlockCost(
|
||||
cost_amount=14, # Nano Banana 2
|
||||
cost_filter={
|
||||
"model": FluxKontextModelName.NANO_BANANA_2,
|
||||
"model": ImageEditorModel.NANO_BANANA_2,
|
||||
"credentials": {
|
||||
"id": replicate_credentials.id,
|
||||
"provider": replicate_credentials.provider,
|
||||
@@ -518,6 +518,50 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
|
||||
},
|
||||
},
|
||||
),
|
||||
BlockCost(
|
||||
cost_amount=15, # gpt-image-1
|
||||
cost_filter={
|
||||
"model": ImageEditorModel.GPT_IMAGE_1,
|
||||
"credentials": {
|
||||
"id": openai_credentials.id,
|
||||
"provider": openai_credentials.provider,
|
||||
"type": openai_credentials.type,
|
||||
},
|
||||
},
|
||||
),
|
||||
BlockCost(
|
||||
cost_amount=18, # gpt-image-1-5
|
||||
cost_filter={
|
||||
"model": ImageEditorModel.GPT_IMAGE_1_5,
|
||||
"credentials": {
|
||||
"id": openai_credentials.id,
|
||||
"provider": openai_credentials.provider,
|
||||
"type": openai_credentials.type,
|
||||
},
|
||||
},
|
||||
),
|
||||
BlockCost(
|
||||
cost_amount=20, # gpt-image-2
|
||||
cost_filter={
|
||||
"model": ImageEditorModel.GPT_IMAGE_2,
|
||||
"credentials": {
|
||||
"id": openai_credentials.id,
|
||||
"provider": openai_credentials.provider,
|
||||
"type": openai_credentials.type,
|
||||
},
|
||||
},
|
||||
),
|
||||
BlockCost(
|
||||
cost_amount=8, # gpt-image-1-mini
|
||||
cost_filter={
|
||||
"model": ImageEditorModel.GPT_IMAGE_1_MINI,
|
||||
"credentials": {
|
||||
"id": openai_credentials.id,
|
||||
"provider": openai_credentials.provider,
|
||||
"type": openai_credentials.type,
|
||||
},
|
||||
},
|
||||
),
|
||||
],
|
||||
AIMusicGeneratorBlock: [
|
||||
BlockCost(
|
||||
@@ -718,12 +762,56 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
|
||||
},
|
||||
},
|
||||
),
|
||||
BlockCost(
|
||||
cost_amount=15, # gpt-image-1
|
||||
cost_filter={
|
||||
"model": ImageGenModel.GPT_IMAGE_1,
|
||||
"credentials": {
|
||||
"id": openai_credentials.id,
|
||||
"provider": openai_credentials.provider,
|
||||
"type": openai_credentials.type,
|
||||
},
|
||||
},
|
||||
),
|
||||
BlockCost(
|
||||
cost_amount=18, # gpt-image-1-5
|
||||
cost_filter={
|
||||
"model": ImageGenModel.GPT_IMAGE_1_5,
|
||||
"credentials": {
|
||||
"id": openai_credentials.id,
|
||||
"provider": openai_credentials.provider,
|
||||
"type": openai_credentials.type,
|
||||
},
|
||||
},
|
||||
),
|
||||
BlockCost(
|
||||
cost_amount=20, # gpt-image-2
|
||||
cost_filter={
|
||||
"model": ImageGenModel.GPT_IMAGE_2,
|
||||
"credentials": {
|
||||
"id": openai_credentials.id,
|
||||
"provider": openai_credentials.provider,
|
||||
"type": openai_credentials.type,
|
||||
},
|
||||
},
|
||||
),
|
||||
BlockCost(
|
||||
cost_amount=8, # gpt-image-1-mini
|
||||
cost_filter={
|
||||
"model": ImageGenModel.GPT_IMAGE_1_MINI,
|
||||
"credentials": {
|
||||
"id": openai_credentials.id,
|
||||
"provider": openai_credentials.provider,
|
||||
"type": openai_credentials.type,
|
||||
},
|
||||
},
|
||||
),
|
||||
],
|
||||
AIImageCustomizerBlock: [
|
||||
BlockCost(
|
||||
cost_amount=10, # Nano Banana (original)
|
||||
cost_filter={
|
||||
"model": GeminiImageModel.NANO_BANANA,
|
||||
"model": ImageCustomizerModel.NANO_BANANA,
|
||||
"credentials": {
|
||||
"id": replicate_credentials.id,
|
||||
"provider": replicate_credentials.provider,
|
||||
@@ -734,7 +822,7 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
|
||||
BlockCost(
|
||||
cost_amount=14, # Nano Banana Pro: $0.14 per image at 2K
|
||||
cost_filter={
|
||||
"model": GeminiImageModel.NANO_BANANA_PRO,
|
||||
"model": ImageCustomizerModel.NANO_BANANA_PRO,
|
||||
"credentials": {
|
||||
"id": replicate_credentials.id,
|
||||
"provider": replicate_credentials.provider,
|
||||
@@ -745,7 +833,7 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
|
||||
BlockCost(
|
||||
cost_amount=14, # Nano Banana 2: same pricing tier as Pro
|
||||
cost_filter={
|
||||
"model": GeminiImageModel.NANO_BANANA_2,
|
||||
"model": ImageCustomizerModel.NANO_BANANA_2,
|
||||
"credentials": {
|
||||
"id": replicate_credentials.id,
|
||||
"provider": replicate_credentials.provider,
|
||||
@@ -753,6 +841,50 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
|
||||
},
|
||||
},
|
||||
),
|
||||
BlockCost(
|
||||
cost_amount=15, # gpt-image-1
|
||||
cost_filter={
|
||||
"model": ImageCustomizerModel.GPT_IMAGE_1,
|
||||
"credentials": {
|
||||
"id": openai_credentials.id,
|
||||
"provider": openai_credentials.provider,
|
||||
"type": openai_credentials.type,
|
||||
},
|
||||
},
|
||||
),
|
||||
BlockCost(
|
||||
cost_amount=18, # gpt-image-1-5
|
||||
cost_filter={
|
||||
"model": ImageCustomizerModel.GPT_IMAGE_1_5,
|
||||
"credentials": {
|
||||
"id": openai_credentials.id,
|
||||
"provider": openai_credentials.provider,
|
||||
"type": openai_credentials.type,
|
||||
},
|
||||
},
|
||||
),
|
||||
BlockCost(
|
||||
cost_amount=20, # gpt-image-2
|
||||
cost_filter={
|
||||
"model": ImageCustomizerModel.GPT_IMAGE_2,
|
||||
"credentials": {
|
||||
"id": openai_credentials.id,
|
||||
"provider": openai_credentials.provider,
|
||||
"type": openai_credentials.type,
|
||||
},
|
||||
},
|
||||
),
|
||||
BlockCost(
|
||||
cost_amount=8, # gpt-image-1-mini
|
||||
cost_filter={
|
||||
"model": ImageCustomizerModel.GPT_IMAGE_1_MINI,
|
||||
"credentials": {
|
||||
"id": openai_credentials.id,
|
||||
"provider": openai_credentials.provider,
|
||||
"type": openai_credentials.type,
|
||||
},
|
||||
},
|
||||
),
|
||||
],
|
||||
PerplexityBlock: [
|
||||
# Sonar Deep Research: up to $5/1K searches + $8/1M reasoning tokens.
|
||||
|
||||
Reference in New Issue
Block a user