From 3f6585f763825aee0881978a4523930fee78e585 Mon Sep 17 00:00:00 2001 From: Toran Bruce Richards Date: Mon, 2 Jun 2025 21:46:48 +0100 Subject: [PATCH] feat(platform/blocks): add AI Image Editor Block powered by flux kontext (#10063) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds a new internal block, **AI Image Editor**, which enables **text-based image editing** via BlackForest Labs’ Flux Kontext models on Replicate. This block allows users to input a prompt and optionally a reference image, and returns a transformed image URL. It supports two model variants (Pro and Max), with different cost tiers. This functionality will enhance multimedia capabilities across internal agent workflows and support richer AI-powered image manipulation. --- ### Changes 🏗️ * Added `FluxKontextBlock` in `backend/blocks/flux_kontext.py` * Uses `ReplicateClient` to call Flux Kontext Pro or Max models * Supports inputs for `prompt`, `input_image`, `aspect_ratio`, `seed`, and `model` * Outputs transformed image URL or error * Added credit pricing logic for Flux Kontext models to `block_cost_config.py`: * Pro: 10 credits * Max: 20 credits * Added documentation for the new block at `docs/content/platform/blocks/flux_kontext.md` * Updated block index at `docs/content/platform/blocks/blocks.md` to include Flux Kontext --- ![image](https://github.com/user-attachments/assets/0edb2b30-4c37-4184-bcc8-9d733658f620) ### Checklist 📋 #### For code changes: * [x] I have clearly listed my changes in the PR description * [x] I have made a test plan * [x] I have tested my changes according to the test plan: * [x] Prompt-only input generates an image * [x] Prompt with image applies edit correctly * [x] Image respects specified aspect ratio * [x] Invalid image URL returns helpful error * [x] Using the same seed gives consistent output * [x] Output chaining works: result URI can be used in downstream blocks * [x] Output from Max model shows higher fidelity than Pro
Example test plan * [x] Create from scratch and execute an agent using Flux Kontext with at least 3 blocks * [x] Import agent with Flux Kontext from file upload, and confirm execution * [x] Upload agent (with Flux Kontext block) to marketplace (internal test) * [x] Import agent from marketplace and confirm correct execution * [x] Edit agent with Flux Kontext block from monitor and confirm output
#### For configuration changes: * [x] `.env.example` is updated or already compatible with my changes * [x] `docker-compose.yml` is updated or already compatible with my changes * [x] I have included a list of my configuration changes in the PR description (under **Changes**) * No new environment variables or services introduced
Examples of configuration changes * N/A
--------- Co-authored-by: Zamil Majdy --- .../backend/backend/blocks/flux_kontext.py | 174 ++++++++++++++++++ .../backend/backend/data/block_cost_config.py | 25 +++ docs/content/platform/blocks/blocks.md | 1 + docs/content/platform/blocks/flux_kontext.md | 31 ++++ 4 files changed, 231 insertions(+) create mode 100644 autogpt_platform/backend/backend/blocks/flux_kontext.py create mode 100644 docs/content/platform/blocks/flux_kontext.md diff --git a/autogpt_platform/backend/backend/blocks/flux_kontext.py b/autogpt_platform/backend/backend/blocks/flux_kontext.py new file mode 100644 index 0000000000..1cff9dbba2 --- /dev/null +++ b/autogpt_platform/backend/backend/blocks/flux_kontext.py @@ -0,0 +1,174 @@ +from enum import Enum +from typing import Literal, Optional + +from pydantic import SecretStr +from replicate.client import Client as ReplicateClient +from replicate.helpers import FileOutput + +from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema +from backend.data.model import ( + APIKeyCredentials, + CredentialsField, + CredentialsMetaInput, + SchemaField, +) +from backend.integrations.providers import ProviderName +from backend.util.file import MediaFileType + +TEST_CREDENTIALS = APIKeyCredentials( + id="01234567-89ab-cdef-0123-456789abcdef", + provider="replicate", + api_key=SecretStr("mock-replicate-api-key"), + title="Mock Replicate API key", + expires_at=None, +) +TEST_CREDENTIALS_INPUT = { + "provider": TEST_CREDENTIALS.provider, + "id": TEST_CREDENTIALS.id, + "type": TEST_CREDENTIALS.type, + "title": TEST_CREDENTIALS.type, +} + + +class FluxKontextModelName(str, Enum): + PRO = "Flux Kontext Pro" + MAX = "Flux Kontext Max" + + @property + def api_name(self) -> str: + return f"black-forest-labs/flux-kontext-{self.name.lower()}" + + +class AspectRatio(str, Enum): + MATCH_INPUT_IMAGE = "match_input_image" + ASPECT_1_1 = "1:1" + ASPECT_16_9 = "16:9" + ASPECT_9_16 = "9:16" + ASPECT_4_3 = "4:3" + ASPECT_3_4 = "3:4" + ASPECT_3_2 = "3:2" + ASPECT_2_3 = "2:3" + ASPECT_4_5 = "4:5" + ASPECT_5_4 = "5:4" + ASPECT_21_9 = "21:9" + ASPECT_9_21 = "9:21" + ASPECT_2_1 = "2:1" + ASPECT_1_2 = "1:2" + + +class AIImageEditorBlock(Block): + class Input(BlockSchema): + credentials: CredentialsMetaInput[ + Literal[ProviderName.REPLICATE], Literal["api_key"] + ] = CredentialsField( + description="Replicate API key with permissions for Flux Kontext models", + ) + prompt: str = SchemaField( + description="Text instruction describing the desired edit", + title="Prompt", + ) + input_image: Optional[MediaFileType] = SchemaField( + description="Reference image URI (jpeg, png, gif, webp)", + default=None, + title="Input Image", + ) + aspect_ratio: AspectRatio = SchemaField( + description="Aspect ratio of the generated image", + default=AspectRatio.MATCH_INPUT_IMAGE, + title="Aspect Ratio", + advanced=False, + ) + seed: Optional[int] = SchemaField( + description="Random seed. Set for reproducible generation", + default=None, + title="Seed", + advanced=True, + ) + model: FluxKontextModelName = SchemaField( + description="Model variant to use", + default=FluxKontextModelName.PRO, + title="Model", + ) + + class Output(BlockSchema): + output_image: MediaFileType = SchemaField( + description="URL of the transformed image" + ) + error: str = SchemaField(description="Error message if generation failed") + + def __init__(self): + super().__init__( + id="3fd9c73d-4370-4925-a1ff-1b86b99fabfa", + description=( + "Edit images using BlackForest Labs' Flux Kontext models. Provide a prompt " + "and optional reference image to generate a modified image." + ), + categories={BlockCategory.AI, BlockCategory.MULTIMEDIA}, + input_schema=AIImageEditorBlock.Input, + output_schema=AIImageEditorBlock.Output, + test_input={ + "prompt": "Add a hat to the cat", + "input_image": "https://example.com/cat.png", + "aspect_ratio": AspectRatio.MATCH_INPUT_IMAGE, + "seed": None, + "model": FluxKontextModelName.PRO, + "credentials": TEST_CREDENTIALS_INPUT, + }, + test_output=[ + ("output_image", "https://replicate.com/output/edited-image.png"), + ], + test_mock={ + "run_model": lambda *args, **kwargs: "https://replicate.com/output/edited-image.png", + }, + test_credentials=TEST_CREDENTIALS, + ) + + def run( + self, + input_data: Input, + *, + credentials: APIKeyCredentials, + **kwargs, + ) -> BlockOutput: + result = self.run_model( + api_key=credentials.api_key, + model_name=input_data.model.api_name, + prompt=input_data.prompt, + input_image=input_data.input_image, + aspect_ratio=input_data.aspect_ratio.value, + seed=input_data.seed, + ) + yield "output_image", result + + def run_model( + self, + api_key: SecretStr, + model_name: str, + prompt: str, + input_image: Optional[MediaFileType], + aspect_ratio: str, + seed: Optional[int], + ) -> MediaFileType: + client = ReplicateClient(api_token=api_key.get_secret_value()) + input_params = { + "prompt": prompt, + "input_image": input_image, + "aspect_ratio": aspect_ratio, + **({"seed": seed} if seed is not None else {}), + } + + output: FileOutput | list[FileOutput] = client.run( # type: ignore + model_name, + input=input_params, + wait=False, + ) + + if isinstance(output, list) and output: + output = output[0] + + if isinstance(output, FileOutput): + return MediaFileType(output.url) + if isinstance(output, str): + return MediaFileType(output) + + raise ValueError("No output received") diff --git a/autogpt_platform/backend/backend/data/block_cost_config.py b/autogpt_platform/backend/backend/data/block_cost_config.py index 8e6ca55f7e..384d1c870f 100644 --- a/autogpt_platform/backend/backend/data/block_cost_config.py +++ b/autogpt_platform/backend/backend/data/block_cost_config.py @@ -2,6 +2,7 @@ from typing import Type from backend.blocks.ai_music_generator import AIMusicGeneratorBlock from backend.blocks.ai_shortform_video_block import AIShortformVideoCreatorBlock +from backend.blocks.flux_kontext import AIImageEditorBlock, FluxKontextModelName from backend.blocks.ideogram import IdeogramModelBlock from backend.blocks.jina.embeddings import JinaEmbeddingBlock from backend.blocks.jina.search import ExtractWebsiteContentBlock, SearchTheWebBlock @@ -260,6 +261,30 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = { }, ) ], + AIImageEditorBlock: [ + BlockCost( + cost_amount=10, + cost_filter={ + "model": FluxKontextModelName.PRO.api_name, + "credentials": { + "id": replicate_credentials.id, + "provider": replicate_credentials.provider, + "type": replicate_credentials.type, + }, + }, + ), + BlockCost( + cost_amount=20, + cost_filter={ + "model": FluxKontextModelName.MAX.api_name, + "credentials": { + "id": replicate_credentials.id, + "provider": replicate_credentials.provider, + "type": replicate_credentials.type, + }, + }, + ), + ], AIMusicGeneratorBlock: [ BlockCost( cost_amount=11, diff --git a/docs/content/platform/blocks/blocks.md b/docs/content/platform/blocks/blocks.md index 3ca02d88d3..9d994b7212 100644 --- a/docs/content/platform/blocks/blocks.md +++ b/docs/content/platform/blocks/blocks.md @@ -86,6 +86,7 @@ Below is a comprehensive list of all available blocks, categorized by their prim | [Unreal Text to Speech](text_to_speech_block.md#unreal-text-to-speech) | Converts text to speech using Unreal Speech API | | [AI Shortform Video Creator](ai_shortform_video_block.md#ai-shortform-video-creator) | Generates short-form videos using AI | | [Replicate Flux Advanced Model](replicate_flux_advanced.md#replicate-flux-advanced-model) | Creates images using Replicate's Flux models | +| [Flux Kontext](flux_kontext.md#flux-kontext) | Text-based image editing using Flux Kontext | ## Miscellaneous | Block Name | Description | diff --git a/docs/content/platform/blocks/flux_kontext.md b/docs/content/platform/blocks/flux_kontext.md new file mode 100644 index 0000000000..3aced2ba1f --- /dev/null +++ b/docs/content/platform/blocks/flux_kontext.md @@ -0,0 +1,31 @@ +# Flux Kontext + +## What it is +An internal block that performs text-based image editing using BlackForest Labs' Flux Kontext models. + +## What it does +Takes a prompt describing the desired transformation and optionally a reference image, then returns a new image URL. + +## How it works +The block sends your prompt, image, and settings to the selected Flux Kontext model on Replicate. The service processes the request and returns a link to the edited image. + +## Inputs +| Input | Description | +|--------------|-----------------------------------------------------------------------------| +| Credentials | Replicate API key with permissions for Flux Kontext models | +| Prompt | Text instruction describing the desired edit | +| Input Image | (Optional) Reference image URI (jpeg, png, gif, webp) | +| Aspect Ratio | Aspect ratio of the generated image (e.g. match_input_image, 1:1, 16:9, etc.) | +| Seed | (Optional, advanced) Random seed for reproducible generation | +| Model | Model variant to use: Flux Kontext Pro or Flux Kontext Max | + +## Outputs +| Output | Description | +|------------|------------------------------------------| +| image_url | URL of the transformed image | +| error | Error message if generation failed | + +## Use Cases +- Enhance a marketing image by requesting "add soft lighting and a subtle vignette" while providing the original asset as the reference image. +- Generate social media assets with specific aspect ratios and style prompts. +- Apply creative edits to product photos using text instructions.