feat(blocks): add AI Image Customizer block using Googles Nano Banana (#10845)

Add new AutoGPT Platform Block that uses google/gemini-2.5-flash-image model via Replicate API. Features: - Text prompt input for image generation - Optional list of image URLs as input - Configurable output format (jpg/png, defaults to png) - Single model option: google/gemini-2.5-flash-image - Returns image_url output for generated images Fixes #10815 🤖 Generated with [Claude Code](https://claude.ai/code) ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan:  - [x] use the AI image customizer block and upload 2 images to see if it uses them in the image generation/edits <img width="1536" height="672" alt="tmprhzqasxz" src="https://github.com/user-attachments/assets/39d7adbd-2847-4988-aeab-1c5453290174" /> --------- Co-authored-by: Nicholas Tindle <nicholas.tindle@agpt.co> Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Swifty <craigswift13@gmail.com> Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com> Co-authored-by: Bently <Bentlybro@users.noreply.github.com> Co-authored-by: Cursor Agent <cursoragent@cursor.com>
2026-04-08 03:00:28 -04:00 · 2025-09-04 19:51:32 +01:00
parent 2e38f132e7
commit 75c90e49ce
1 changed files with 154 additions and 0 deletions
--- a/autogpt_platform/backend/backend/blocks/ai_image_customizer.py
+++ b/autogpt_platform/backend/backend/blocks/ai_image_customizer.py
@@ -0,0 +1,154 @@
+from enum import Enum
+from typing import Literal
+
+from pydantic import SecretStr
+from replicate.client import Client as ReplicateClient
+from replicate.helpers import FileOutput
+
+from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
+from backend.data.model import (
+    APIKeyCredentials,
+    CredentialsField,
+    CredentialsMetaInput,
+    SchemaField,
+)
+from backend.integrations.providers import ProviderName
+from backend.util.file import MediaFileType
+
+
+class GeminiImageModel(str, Enum):
+    NANO_BANANA = "google/nano-banana"
+
+
+class OutputFormat(str, Enum):
+    JPG = "jpg"
+    PNG = "png"
+
+
+TEST_CREDENTIALS = APIKeyCredentials(
+    id="01234567-89ab-cdef-0123-456789abcdef",
+    provider="replicate",
+    api_key=SecretStr("mock-replicate-api-key"),
+    title="Mock Replicate API key",
+    expires_at=None,
+)
+
+TEST_CREDENTIALS_INPUT = {
+    "provider": TEST_CREDENTIALS.provider,
+    "id": TEST_CREDENTIALS.id,
+    "type": TEST_CREDENTIALS.type,
+    "title": TEST_CREDENTIALS.title,
+}
+
+
+class AIImageCustomizerBlock(Block):
+    class Input(BlockSchema):
+        credentials: CredentialsMetaInput[
+            Literal[ProviderName.REPLICATE], Literal["api_key"]
+        ] = CredentialsField(
+            description="Replicate API key with permissions for Google Gemini image models",
+        )
+        prompt: str = SchemaField(
+            description="A text description of the image you want to generate",
+            title="Prompt",
+        )
+        model: GeminiImageModel = SchemaField(
+            description="The AI model to use for image generation and editing",
+            default=GeminiImageModel.NANO_BANANA,
+            title="Model",
+        )
+        images: list[MediaFileType] = SchemaField(
+            description="Optional list of input images to reference or modify",
+            default=[],
+            title="Input Images",
+        )
+        output_format: OutputFormat = SchemaField(
+            description="Format of the output image",
+            default=OutputFormat.PNG,
+            title="Output Format",
+        )
+
+    class Output(BlockSchema):
+        image_url: MediaFileType = SchemaField(description="URL of the generated image")
+        error: str = SchemaField(description="Error message if generation failed")
+
+    def __init__(self):
+        super().__init__(
+            id="d76bbe4c-930e-4894-8469-b66775511f71",
+            description=(
+                "Generate and edit custom images using Google's Nano-Banana model from Gemini 2.5. "
+                "Provide a prompt and optional reference images to create or modify images."
+            ),
+            categories={BlockCategory.AI, BlockCategory.MULTIMEDIA},
+            input_schema=AIImageCustomizerBlock.Input,
+            output_schema=AIImageCustomizerBlock.Output,
+            test_input={
+                "prompt": "Make the scene more vibrant and colorful",
+                "model": GeminiImageModel.NANO_BANANA,
+                "images": [],
+                "output_format": OutputFormat.JPG,
+                "credentials": TEST_CREDENTIALS_INPUT,
+            },
+            test_output=[
+                ("image_url", "https://replicate.delivery/generated-image.jpg"),
+            ],
+            test_mock={
+                "run_model": lambda *args, **kwargs: MediaFileType(
+                    "https://replicate.delivery/generated-image.jpg"
+                ),
+            },
+            test_credentials=TEST_CREDENTIALS,
+        )
+
+    async def run(
+        self,
+        input_data: Input,
+        *,
+        credentials: APIKeyCredentials,
+        graph_exec_id: str,
+        user_id: str,
+        **kwargs,
+    ) -> BlockOutput:
+        try:
+            result = await self.run_model(
+                api_key=credentials.api_key,
+                model_name=input_data.model.value,
+                prompt=input_data.prompt,
+                images=input_data.images,
+                output_format=input_data.output_format.value,
+            )
+            yield "image_url", result
+        except Exception as e:
+            yield "error", str(e)
+
+    async def run_model(
+        self,
+        api_key: SecretStr,
+        model_name: str,
+        prompt: str,
+        images: list[MediaFileType],
+        output_format: str,
+    ) -> MediaFileType:
+        client = ReplicateClient(api_token=api_key.get_secret_value())
+
+        input_params: dict = {
+            "prompt": prompt,
+            "output_format": output_format,
+        }
+
+        # Add images to input if provided (API expects "image_input" parameter)
+        if images:
+            input_params["image_input"] = [str(img) for img in images]
+
+        output: FileOutput | str = await client.async_run(  # type: ignore
+            model_name,
+            input=input_params,
+            wait=False,
+        )
+
+        if isinstance(output, FileOutput):
+            return MediaFileType(output.url)
+        if isinstance(output, str):
+            return MediaFileType(output)
+
+        raise ValueError("No output received from the model")