Merge branch 'dev' into swiftyos/sdk

2026-04-08 03:00:28 -04:00 · 2025-06-04 11:39:03 +02:00
parent f99c974ea8 0f558876e2
commit 617533fa1d
9 changed files with 407 additions and 70 deletions
--- a/README.md
+++ b/README.md
@@ -15,8 +15,35 @@
 > Setting up and hosting the AutoGPT Platform yourself is a technical process. 
 > If you'd rather something that just works, we recommend [joining the waitlist](https://bit.ly/3ZDijAI) for the cloud-hosted beta.

+### System Requirements
+
+Before proceeding with the installation, ensure your system meets the following requirements:
+
+#### Hardware Requirements
+- CPU: 4+ cores recommended
+- RAM: Minimum 8GB, 16GB recommended
+- Storage: At least 10GB of free space
+
+#### Software Requirements
+- Operating Systems:
+  - Linux (Ubuntu 20.04 or newer recommended)
+  - macOS (10.15 or newer)
+  - Windows 10/11 with WSL2
+- Required Software (with minimum versions):
+  - Docker Engine (20.10.0 or newer)
+  - Docker Compose (2.0.0 or newer)
+  - Git (2.30 or newer)
+  - Node.js (16.x or newer)
+  - npm (8.x or newer)
+  - VSCode (1.60 or newer) or any modern code editor
+
+#### Network Requirements
+- Stable internet connection
+- Access to required ports (will be configured in Docker)
+- Ability to make outbound HTTPS connections
+
 ### Updated Setup Instructions:
-We’ve moved to a fully maintained and regularly updated documentation site.
+We've moved to a fully maintained and regularly updated documentation site.

 👉 [Follow the official self-hosting guide here](https://docs.agpt.co/platform/getting-started/)

@@ -152,7 +179,7 @@ Just clone the repo, install dependencies with `./run setup`, and you should be

 [![Join us on Discord](https://invidget.switchblade.xyz/autogpt)](https://discord.gg/autogpt)

-To report a bug or request a feature, create a [GitHub Issue](https://github.com/Significant-Gravitas/AutoGPT/issues/new/choose). Please ensure someone else hasn’t created an issue for the same topic.
+To report a bug or request a feature, create a [GitHub Issue](https://github.com/Significant-Gravitas/AutoGPT/issues/new/choose). Please ensure someone else hasn't created an issue for the same topic.

 ## 🤝 Sister projects

--- a/autogpt_platform/backend/backend/blocks/flux_kontext.py
+++ b/autogpt_platform/backend/backend/blocks/flux_kontext.py
@@ -0,0 +1,174 @@
+from enum import Enum
+from typing import Literal, Optional
+
+from pydantic import SecretStr
+from replicate.client import Client as ReplicateClient
+from replicate.helpers import FileOutput
+
+from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
+from backend.data.model import (
+    APIKeyCredentials,
+    CredentialsField,
+    CredentialsMetaInput,
+    SchemaField,
+)
+from backend.integrations.providers import ProviderName
+from backend.util.file import MediaFileType
+
+TEST_CREDENTIALS = APIKeyCredentials(
+    id="01234567-89ab-cdef-0123-456789abcdef",
+    provider="replicate",
+    api_key=SecretStr("mock-replicate-api-key"),
+    title="Mock Replicate API key",
+    expires_at=None,
+)
+TEST_CREDENTIALS_INPUT = {
+    "provider": TEST_CREDENTIALS.provider,
+    "id": TEST_CREDENTIALS.id,
+    "type": TEST_CREDENTIALS.type,
+    "title": TEST_CREDENTIALS.type,
+}
+
+
+class FluxKontextModelName(str, Enum):
+    PRO = "Flux Kontext Pro"
+    MAX = "Flux Kontext Max"
+
+    @property
+    def api_name(self) -> str:
+        return f"black-forest-labs/flux-kontext-{self.name.lower()}"
+
+
+class AspectRatio(str, Enum):
+    MATCH_INPUT_IMAGE = "match_input_image"
+    ASPECT_1_1 = "1:1"
+    ASPECT_16_9 = "16:9"
+    ASPECT_9_16 = "9:16"
+    ASPECT_4_3 = "4:3"
+    ASPECT_3_4 = "3:4"
+    ASPECT_3_2 = "3:2"
+    ASPECT_2_3 = "2:3"
+    ASPECT_4_5 = "4:5"
+    ASPECT_5_4 = "5:4"
+    ASPECT_21_9 = "21:9"
+    ASPECT_9_21 = "9:21"
+    ASPECT_2_1 = "2:1"
+    ASPECT_1_2 = "1:2"
+
+
+class AIImageEditorBlock(Block):
+    class Input(BlockSchema):
+        credentials: CredentialsMetaInput[
+            Literal[ProviderName.REPLICATE], Literal["api_key"]
+        ] = CredentialsField(
+            description="Replicate API key with permissions for Flux Kontext models",
+        )
+        prompt: str = SchemaField(
+            description="Text instruction describing the desired edit",
+            title="Prompt",
+        )
+        input_image: Optional[MediaFileType] = SchemaField(
+            description="Reference image URI (jpeg, png, gif, webp)",
+            default=None,
+            title="Input Image",
+        )
+        aspect_ratio: AspectRatio = SchemaField(
+            description="Aspect ratio of the generated image",
+            default=AspectRatio.MATCH_INPUT_IMAGE,
+            title="Aspect Ratio",
+            advanced=False,
+        )
+        seed: Optional[int] = SchemaField(
+            description="Random seed. Set for reproducible generation",
+            default=None,
+            title="Seed",
+            advanced=True,
+        )
+        model: FluxKontextModelName = SchemaField(
+            description="Model variant to use",
+            default=FluxKontextModelName.PRO,
+            title="Model",
+        )
+
+    class Output(BlockSchema):
+        output_image: MediaFileType = SchemaField(
+            description="URL of the transformed image"
+        )
+        error: str = SchemaField(description="Error message if generation failed")
+
+    def __init__(self):
+        super().__init__(
+            id="3fd9c73d-4370-4925-a1ff-1b86b99fabfa",
+            description=(
+                "Edit images using BlackForest Labs' Flux Kontext models. Provide a prompt "
+                "and optional reference image to generate a modified image."
+            ),
+            categories={BlockCategory.AI, BlockCategory.MULTIMEDIA},
+            input_schema=AIImageEditorBlock.Input,
+            output_schema=AIImageEditorBlock.Output,
+            test_input={
+                "prompt": "Add a hat to the cat",
+                "input_image": "https://example.com/cat.png",
+                "aspect_ratio": AspectRatio.MATCH_INPUT_IMAGE,
+                "seed": None,
+                "model": FluxKontextModelName.PRO,
+                "credentials": TEST_CREDENTIALS_INPUT,
+            },
+            test_output=[
+                ("output_image", "https://replicate.com/output/edited-image.png"),
+            ],
+            test_mock={
+                "run_model": lambda *args, **kwargs: "https://replicate.com/output/edited-image.png",
+            },
+            test_credentials=TEST_CREDENTIALS,
+        )
+
+    def run(
+        self,
+        input_data: Input,
+        *,
+        credentials: APIKeyCredentials,
+        **kwargs,
+    ) -> BlockOutput:
+        result = self.run_model(
+            api_key=credentials.api_key,
+            model_name=input_data.model.api_name,
+            prompt=input_data.prompt,
+            input_image=input_data.input_image,
+            aspect_ratio=input_data.aspect_ratio.value,
+            seed=input_data.seed,
+        )
+        yield "output_image", result
+
+    def run_model(
+        self,
+        api_key: SecretStr,
+        model_name: str,
+        prompt: str,
+        input_image: Optional[MediaFileType],
+        aspect_ratio: str,
+        seed: Optional[int],
+    ) -> MediaFileType:
+        client = ReplicateClient(api_token=api_key.get_secret_value())
+        input_params = {
+            "prompt": prompt,
+            "input_image": input_image,
+            "aspect_ratio": aspect_ratio,
+            **({"seed": seed} if seed is not None else {}),
+        }
+
+        output: FileOutput | list[FileOutput] = client.run(  # type: ignore
+            model_name,
+            input=input_params,
+            wait=False,
+        )
+
+        if isinstance(output, list) and output:
+            output = output[0]
+
+        if isinstance(output, FileOutput):
+            return MediaFileType(output.url)
+        if isinstance(output, str):
+            return MediaFileType(output)
+
+        raise ValueError("No output received")
--- a/autogpt_platform/backend/backend/blocks/http.py
+++ b/autogpt_platform/backend/backend/blocks/http.py
@@ -1,12 +1,19 @@
 import json
 import logging
 from enum import Enum
-from typing import Any
+from io import BufferedReader
+from pathlib import Path

 from requests.exceptions import HTTPError, RequestException

 from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
 from backend.data.model import SchemaField
+from backend.util.file import (
+    MediaFileType,
+    get_exec_file_path,
+    get_mime_type,
+    store_media_file,
+)
 from backend.util.request import requests

 logger = logging.getLogger(name=__name__)
@@ -38,13 +45,21 @@ class SendWebRequestBlock(Block):
        )
        json_format: bool = SchemaField(
            title="JSON format",
-            description="Whether to send and receive body as JSON",
+            description="If true, send the body as JSON (unless files are also present).",
            default=True,
        )
-        body: Any = SchemaField(
-            description="The body of the request",
+        body: dict | None = SchemaField(
+            description="Form/JSON body payload. If files are supplied, this must be a mapping of form‑fields.",
            default=None,
        )
+        files_name: str = SchemaField(
+            description="The name of the file field in the form data.",
+            default="file",
+        )
+        files: list[MediaFileType] = SchemaField(
+            description="Mapping of *form field name* → Image url / path / base64 url.",
+            default_factory=list,
+        )

    class Output(BlockSchema):
        response: object = SchemaField(description="The response from the server")
@@ -55,67 +70,112 @@ class SendWebRequestBlock(Block):
    def __init__(self):
        super().__init__(
            id="6595ae1f-b924-42cb-9a41-551a0611c4b4",
-            description="This block makes an HTTP request to the given URL.",
+            description="Make an HTTP request (JSON / form / multipart).",
            categories={BlockCategory.OUTPUT},
            input_schema=SendWebRequestBlock.Input,
            output_schema=SendWebRequestBlock.Output,
        )

-    def run(self, input_data: Input, **kwargs) -> BlockOutput:
+    @staticmethod
+    def _prepare_files(
+        graph_exec_id: str,
+        files_name: str,
+        files: list[MediaFileType],
+    ) -> tuple[list[tuple[str, tuple[str, BufferedReader, str]]], list[BufferedReader]]:
+        """Convert the `files` mapping into the structure expected by `requests`.
+
+        Returns a tuple of (**files_payload**, **open_handles**) so we can close handles later.
+        """
+        files_payload: list[tuple[str, tuple[str, BufferedReader, str]]] = []
+        open_handles: list[BufferedReader] = []
+
+        for media in files:
+            # Normalise to a list so we can repeat the same key
+            rel_path = store_media_file(graph_exec_id, media, return_content=False)
+            abs_path = get_exec_file_path(graph_exec_id, rel_path)
+            try:
+                handle = open(abs_path, "rb")
+            except Exception as e:
+                for h in open_handles:
+                    try:
+                        h.close()
+                    except Exception:
+                        pass
+                raise RuntimeError(f"Failed to open file '{abs_path}': {e}") from e
+
+            open_handles.append(handle)
+            mime = get_mime_type(abs_path)
+            files_payload.append((files_name, (Path(abs_path).name, handle, mime)))
+
+        return files_payload, open_handles
+
+    def run(self, input_data: Input, *, graph_exec_id: str, **kwargs) -> BlockOutput:
+        # ─── Parse/normalise body ────────────────────────────────────
        body = input_data.body
+        if isinstance(body, str):
+            try:
+                body = json.loads(body)
+            except json.JSONDecodeError:
+                # plain text – treat as form‑field value instead
+                input_data.json_format = False

-        if input_data.json_format:
-            if isinstance(body, str):
-                try:
-                    # Try to parse as JSON first
-                    body = json.loads(body)
-                except json.JSONDecodeError:
-                    # If it's not valid JSON and just plain text,
-                    # we should send it as plain text instead
-                    input_data.json_format = False
+        # ─── Prepare files (if any) ──────────────────────────────────
+        use_files = bool(input_data.files)
+        files_payload: list[tuple[str, tuple[str, BufferedReader, str]]] = []
+        open_handles: list[BufferedReader] = []
+        if use_files:
+            files_payload, open_handles = self._prepare_files(
+                graph_exec_id, input_data.files_name, input_data.files
+            )

+        # Enforce body format rules
+        if use_files and input_data.json_format:
+            raise ValueError(
+                "json_format=True cannot be combined with file uploads; set json_format=False and put form fields in `body`."
+            )
+
+        # ─── Execute request ─────────────────────────────────────────
        try:
            response = requests.request(
                input_data.method.value,
                input_data.url,
                headers=input_data.headers,
-                json=body if input_data.json_format else None,
+                files=files_payload if use_files else None,
+                # * If files → multipart ⇒ pass form‑fields via data=
                data=body if not input_data.json_format else None,
+                # * Else, choose JSON vs url‑encoded based on flag
+                json=body if (input_data.json_format and not use_files) else None,
            )

-            if input_data.json_format:
-                if response.status_code == 204 or not response.content.strip():
-                    result = None
-                else:
-                    result = response.json()
+            # Decide how to parse the response
+            if input_data.json_format or response.headers.get(
+                "content-type", ""
+            ).startswith("application/json"):
+                result = (
+                    None
+                    if (response.status_code == 204 or not response.content.strip())
+                    else response.json()
+                )
            else:
                result = response.text

-            yield "response", result
+            # Yield according to status code bucket
+            if 200 <= response.status_code < 300:
+                yield "response", result
+            elif 400 <= response.status_code < 500:
+                yield "client_error", result
+            else:
+                yield "server_error", result

        except HTTPError as e:
-            # Handle error responses
-            try:
-                result = e.response.json() if input_data.json_format else str(e)
-            except json.JSONDecodeError:
-                result = str(e)
-
-            if 400 <= e.response.status_code < 500:
-                yield "client_error", result
-            elif 500 <= e.response.status_code < 600:
-                yield "server_error", result
-            else:
-                error_msg = (
-                    "Unexpected status code "
-                    f"{e.response.status_code} '{e.response.reason}'"
-                )
-                logger.warning(error_msg)
-                yield "error", error_msg
-
+            yield "error", f"HTTP error: {str(e)}"
        except RequestException as e:
-            # Handle other request-related exceptions
-            yield "error", str(e)
-
+            yield "error", f"Request error: {str(e)}"
        except Exception as e:
-            # Catch any other unexpected exceptions
            yield "error", str(e)
+        finally:
+            for h in open_handles:
+                try:
+                    h.close()
+                except Exception:
+                    pass
--- a/autogpt_platform/backend/backend/data/block_cost_config.py
+++ b/autogpt_platform/backend/backend/data/block_cost_config.py
@@ -2,6 +2,7 @@ from typing import Type

 from backend.blocks.ai_music_generator import AIMusicGeneratorBlock
 from backend.blocks.ai_shortform_video_block import AIShortformVideoCreatorBlock
+from backend.blocks.flux_kontext import AIImageEditorBlock, FluxKontextModelName
 from backend.blocks.ideogram import IdeogramModelBlock
 from backend.blocks.jina.embeddings import JinaEmbeddingBlock
 from backend.blocks.jina.search import ExtractWebsiteContentBlock, SearchTheWebBlock
@@ -260,6 +261,30 @@ BLOCK_COSTS: dict[Type[Block], list[BlockCost]] = {
            },
        )
    ],
+    AIImageEditorBlock: [
+        BlockCost(
+            cost_amount=10,
+            cost_filter={
+                "model": FluxKontextModelName.PRO.api_name,
+                "credentials": {
+                    "id": replicate_credentials.id,
+                    "provider": replicate_credentials.provider,
+                    "type": replicate_credentials.type,
+                },
+            },
+        ),
+        BlockCost(
+            cost_amount=20,
+            cost_filter={
+                "model": FluxKontextModelName.MAX.api_name,
+                "credentials": {
+                    "id": replicate_credentials.id,
+                    "provider": replicate_credentials.provider,
+                    "type": replicate_credentials.type,
+                },
+            },
+        ),
+    ],
    AIMusicGeneratorBlock: [
        BlockCost(
            cost_amount=11,
--- a/autogpt_platform/backend/backend/util/file.py
+++ b/autogpt_platform/backend/backend/util/file.py
@@ -67,8 +67,7 @@ def store_media_file(
        return ext if ext else ".bin"

    def _file_to_data_uri(path: Path) -> str:
-        mime_type, _ = mimetypes.guess_type(path)
-        mime_type = mime_type or "application/octet-stream"
+        mime_type = get_mime_type(str(path))
        b64 = base64.b64encode(path.read_bytes()).decode("utf-8")
        return f"data:{mime_type};base64,{b64}"

@@ -130,3 +129,21 @@ def store_media_file(
        return MediaFileType(_file_to_data_uri(target_path))
    else:
        return MediaFileType(_strip_base_prefix(target_path, base_path))
+
+
+def get_mime_type(file: str) -> str:
+    """
+    Get the MIME type of a file, whether it's a data URI, URL, or local path.
+    """
+    if file.startswith("data:"):
+        match = re.match(r"^data:([^;]+);base64,", file)
+        return match.group(1) if match else "application/octet-stream"
+
+    elif file.startswith(("http://", "https://")):
+        parsed_url = urlparse(file)
+        mime_type, _ = mimetypes.guess_type(parsed_url.path)
+        return mime_type or "application/octet-stream"
+
+    else:
+        mime_type, _ = mimetypes.guess_type(file)
+        return mime_type or "application/octet-stream"
--- a/autogpt_platform/frontend/src/components/node-input-components.tsx
+++ b/autogpt_platform/frontend/src/components/node-input-components.tsx
@@ -493,10 +493,11 @@ export const NodeGenericInputField: FC<{
          schema={propSchema as BlockIOKVSubSchema}
          entries={currentValue}
          errors={errors}
-          className={className}
-          displayName={displayName}
          connections={connections}
          handleInputChange={handleInputChange}
+          handleInputClick={handleInputClick}
+          className={className}
+          displayName={displayName}
        />
      );

@@ -732,6 +733,7 @@ const NodeKeyValueInput: FC<{
  errors: { [key: string]: string | undefined };
  connections: NodeObjectInputTreeProps["connections"];
  handleInputChange: NodeObjectInputTreeProps["handleInputChange"];
+  handleInputClick: NodeObjectInputTreeProps["handleInputClick"];
  className?: string;
  displayName?: string;
 }> = ({
@@ -741,6 +743,7 @@ const NodeKeyValueInput: FC<{
  schema,
  connections,
  handleInputChange,
+  handleInputClick,
  errors,
  className,
  displayName,
@@ -761,7 +764,7 @@ const NodeKeyValueInput: FC<{
  }, [entries, schema.default, connections, nodeId, selfKey]);

  const [keyValuePairs, setKeyValuePairs] = useState<
-    { key: string; value: string | number | null }[]
+    { key: string; value: any }[]
  >([]);

  useEffect(
@@ -778,18 +781,6 @@ const NodeKeyValueInput: FC<{
    );
  }

-  const isNumberType =
-    schema.additionalProperties &&
-    ["number", "integer"].includes(schema.additionalProperties.type);
-
-  function convertValueType(value: string): string | number | null {
-    if (isNumberType) {
-      const numValue = Number(value);
-      return !isNaN(numValue) ? numValue : null;
-    }
-    return value;
-  }
-
  function getEntryKey(key: string): string {
    return `${selfKey}_#_${key}`;
  }
@@ -799,6 +790,11 @@ const NodeKeyValueInput: FC<{
    );
  }

+  const propSchema =
+    schema.additionalProperties && schema.additionalProperties.type
+      ? schema.additionalProperties
+      : ({ type: "string" } as BlockIOSimpleTypeSubSchema);
+
  return (
    <div
      className={cn(className, keyValuePairs.length > 0 ? "flex flex-col" : "")}
@@ -832,18 +828,24 @@ const NodeKeyValueInput: FC<{
                    )
                  }
                />
-                <LocalValuedInput
-                  type={isNumberType ? "number" : "text"}
-                  placeholder="Value"
-                  value={value ?? ""}
-                  onChange={(e) =>
+                <NodeGenericInputField
+                  className="w-full"
+                  nodeId={nodeId}
+                  propKey={`${selfKey}_#_${key}`}
+                  propSchema={propSchema}
+                  currentValue={value}
+                  errors={errors}
+                  connections={connections}
+                  displayName={displayName || beautifyString(key)}
+                  handleInputChange={(_, newValue) =>
                    updateKeyValuePairs(
                      keyValuePairs.toSpliced(index, 1, {
                        key: key,
-                        value: convertValueType(e.target.value),
+                        value: newValue,
                      }),
                    )
                  }
+                  handleInputClick={handleInputClick}
                />
                <Button
                  variant="ghost"
--- a/docs/content/platform/blocks/blocks.md
+++ b/docs/content/platform/blocks/blocks.md
@@ -86,6 +86,7 @@ Below is a comprehensive list of all available blocks, categorized by their prim
 | [Unreal Text to Speech](text_to_speech_block.md#unreal-text-to-speech) | Converts text to speech using Unreal Speech API |
 | [AI Shortform Video Creator](ai_shortform_video_block.md#ai-shortform-video-creator) | Generates short-form videos using AI |
 | [Replicate Flux Advanced Model](replicate_flux_advanced.md#replicate-flux-advanced-model) | Creates images using Replicate's Flux models |
+| [Flux Kontext](flux_kontext.md#flux-kontext) | Text-based image editing using Flux Kontext |

 ## Miscellaneous
 | Block Name | Description |
--- a/docs/content/platform/blocks/flux_kontext.md
+++ b/docs/content/platform/blocks/flux_kontext.md
@@ -0,0 +1,31 @@
+# Flux Kontext
+
+## What it is
+An internal block that performs text-based image editing using BlackForest Labs' Flux Kontext models.
+
+## What it does
+Takes a prompt describing the desired transformation and optionally a reference image, then returns a new image URL.
+
+## How it works
+The block sends your prompt, image, and settings to the selected Flux Kontext model on Replicate. The service processes the request and returns a link to the edited image.
+
+## Inputs
+| Input        | Description                                                                 |
+|--------------|-----------------------------------------------------------------------------|
+| Credentials  | Replicate API key with permissions for Flux Kontext models                  |
+| Prompt       | Text instruction describing the desired edit                                |
+| Input Image  | (Optional) Reference image URI (jpeg, png, gif, webp)                      |
+| Aspect Ratio | Aspect ratio of the generated image (e.g. match_input_image, 1:1, 16:9, etc.) |
+| Seed         | (Optional, advanced) Random seed for reproducible generation                |
+| Model        | Model variant to use: Flux Kontext Pro or Flux Kontext Max                  |
+
+## Outputs
+| Output     | Description                              |
+|------------|------------------------------------------|
+| image_url  | URL of the transformed image             |
+| error      | Error message if generation failed       |
+
+## Use Cases
+- Enhance a marketing image by requesting "add soft lighting and a subtle vignette" while providing the original asset as the reference image.
+- Generate social media assets with specific aspect ratios and style prompts.
+- Apply creative edits to product photos using text instructions.
--- a/docs/content/platform/getting-started.md
+++ b/docs/content/platform/getting-started.md
@@ -6,7 +6,7 @@ This guide will help you setup the server and builder for the project.

 <!-- The video is listed in the root Readme.md of the repo -->

-We also offer this in video format. You can check it out [here](https://github.com/Significant-Gravitas/AutoGPT?tab=readme-ov-file#how-to-setup-for-self-hosting).
+<!--We also offer this in video format. You can check it out [here](https://github.com/Significant-Gravitas/AutoGPT?tab=readme-ov-file#how-to-setup-for-self-hosting). -->

 !!! warning
    **DO NOT FOLLOW ANY OUTSIDE TUTORIALS AS THEY WILL LIKELY BE OUT OF DATE**