chore: release v5.0.0.a4

fix(ui): show send to toggle on canvas only
revert(ui): miniviewer
2026-01-15 08:28:14 -05:00 · 2024-09-13 00:04:07 +10:00 · 2024-09-12 23:42:21 +10:00 · 2024-09-12 23:42:21 +10:00 · 2024-09-12 23:42:21 +10:00 · 2024-09-12 23:42:21 +10:00
1192 changed files with 72751 additions and 54365 deletions
--- a/.github/workflows/build-container.yml
+++ b/.github/workflows/build-container.yml
@@ -13,6 +13,12 @@ on:
    tags:
      - 'v*.*.*'
  workflow_dispatch:
+    inputs:
+      push-to-registry:
+        description: Push the built image to the container registry
+        required: false
+        type: boolean
+        default: false

 permissions:
  contents: write
@@ -50,16 +56,15 @@ jobs:
          df -h

      - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@v4
+        uses: docker/metadata-action@v5
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          images: |
            ghcr.io/${{ github.repository }}
-            ${{ env.DOCKERHUB_REPOSITORY }}
          tags: |
            type=ref,event=branch
            type=ref,event=tag
@@ -72,49 +77,33 @@ jobs:
            suffix=-${{ matrix.gpu-driver }},onlatest=false

      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v2
+        uses: docker/setup-qemu-action@v3

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
+        uses: docker/setup-buildx-action@v3
        with:
          platforms: ${{ env.PLATFORMS }}

      - name: Login to GitHub Container Registry
        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v2
+        uses: docker/login-action@v3
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
          password: ${{ secrets.GITHUB_TOKEN }}

-      # - name: Login to Docker Hub
-      #   if: github.event_name != 'pull_request' && vars.DOCKERHUB_REPOSITORY != ''
-      #   uses: docker/login-action@v2
-      #   with:
-      #     username: ${{ secrets.DOCKERHUB_USERNAME }}
-      #     password: ${{ secrets.DOCKERHUB_TOKEN }}
-
      - name: Build container
        timeout-minutes: 40
        id: docker_build
-        uses: docker/build-push-action@v4
+        uses: docker/build-push-action@v6
        with:
          context: .
          file: docker/Dockerfile
          platforms: ${{ env.PLATFORMS }}
-          push: ${{ github.ref == 'refs/heads/main' || github.ref_type == 'tag' }}
+          push: ${{ github.ref == 'refs/heads/main' || github.ref_type == 'tag' || github.event.inputs.push-to-registry }}
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          cache-from: |
            type=gha,scope=${{ github.ref_name }}-${{ matrix.gpu-driver }}
            type=gha,scope=main-${{ matrix.gpu-driver }}
          cache-to: type=gha,mode=max,scope=${{ github.ref_name }}-${{ matrix.gpu-driver }}
-
-      # - name: Docker Hub Description
-      #   if: github.ref == 'refs/heads/main' || github.ref == 'refs/tags/*' && vars.DOCKERHUB_REPOSITORY != ''
-      #   uses: peter-evans/dockerhub-description@v3
-      #   with:
-      #     username: ${{ secrets.DOCKERHUB_USERNAME }}
-      #     password: ${{ secrets.DOCKERHUB_TOKEN }}
-      #     repository: ${{ vars.DOCKERHUB_REPOSITORY }}
-      #     short-description: ${{ github.event.repository.description }}
--- a/.github/workflows/python-checks.yml
+++ b/.github/workflows/python-checks.yml
@@ -62,7 +62,7 @@ jobs:

      - name: install ruff
        if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
-        run: pip install ruff
+        run: pip install ruff==0.6.0
        shell: bash

      - name: ruff check
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -60,7 +60,7 @@ jobs:
            extra-index-url: 'https://download.pytorch.org/whl/cpu'
            github-env: $GITHUB_ENV
          - platform: macos-default
-            os: macOS-12
+            os: macOS-14
            github-env: $GITHUB_ENV
          - platform: windows-cpu
            os: windows-2022
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -55,6 +55,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
 FROM node:20-slim AS web-builder
 ENV PNPM_HOME="/pnpm"
 ENV PATH="$PNPM_HOME:$PATH"
+RUN corepack use pnpm@8.x
 RUN corepack enable

 WORKDIR /build
--- a/docker/README.md
+++ b/docker/README.md
@@ -1,20 +1,22 @@
 # Invoke in Docker

- Ensure that Docker can use the GPU on your system
- This documentation assumes Linux, but should work similarly under Windows with WSL2
+First things first:
+
+- Ensure that Docker can use your [NVIDIA][nvidia docker docs] or [AMD][amd docker docs] GPU.
+- This document assumes a Linux system, but should work similarly under Windows with WSL2.
 - We don't recommend running Invoke in Docker on macOS at this time. It works, but very slowly.

-## Quickstart :lightning:
+## Quickstart

-No `docker compose`, no persistence, just a simple one-liner using the official images:
+No `docker compose`, no persistence, single command, using the official images:

-**CUDA:**
+**CUDA (NVIDIA GPU):**

 ```bash
 docker run --runtime=nvidia --gpus=all --publish 9090:9090 ghcr.io/invoke-ai/invokeai
 ```

-**ROCm:**
+**ROCm (AMD GPU):**

 ```bash
 docker run --device /dev/kfd --device /dev/dri --publish 9090:9090 ghcr.io/invoke-ai/invokeai:main-rocm
@@ -22,12 +24,20 @@ docker run --device /dev/kfd --device /dev/dri --publish 9090:9090 ghcr.io/invok

 Open `http://localhost:9090` in your browser once the container finishes booting, install some models, and generate away!

-> [!TIP]
-> To persist your data (including downloaded models) outside of the container, add a `--volume/-v` flag to the above command, e.g.: `docker run --volume /some/local/path:/invokeai <...the rest of the command>`
+### Data persistence
+
+To persist your generated images and downloaded models outside of the container, add a `--volume/-v` flag to the above command, e.g.:
+
+```bash
+docker run --volume /some/local/path:/invokeai {...etc...}
+```
+
+`/some/local/path/invokeai` will contain all your data.
+It can *usually* be reused between different installs of Invoke. Tread with caution and read the release notes!

 ## Customize the container

-We ship the `run.sh` script, which is a convenient wrapper around `docker compose` for cases where custom image build args are needed. Alternatively, the familiar `docker compose` commands work just as well.
+The included `run.sh` script is a convenience wrapper around `docker compose`. It can be helpful for passing additional build arguments to `docker compose`. Alternatively, the familiar `docker compose` commands work just as well.

 ```bash
 cd docker
@@ -38,11 +48,14 @@ cp .env.sample .env

 It will take a few minutes to build the image the first time. Once the application starts up, open `http://localhost:9090` in your browser to invoke!

+>[!TIP]
+>When using the `run.sh` script, the container will continue running after Ctrl+C. To shut it down, use the `docker compose down` command.
+
 ## Docker setup in detail

 #### Linux

-1. Ensure builkit is enabled in the Docker daemon settings (`/etc/docker/daemon.json`)
+1. Ensure buildkit is enabled in the Docker daemon settings (`/etc/docker/daemon.json`)
 2. Install the `docker compose` plugin using your package manager, or follow a [tutorial](https://docs.docker.com/compose/install/linux/#install-using-the-repository).
    - The deprecated `docker-compose` (hyphenated) CLI probably won't work. Update to a recent version.
 3. Ensure docker daemon is able to access the GPU.
@@ -98,25 +111,7 @@ GPU_DRIVER=cuda

 Any environment variables supported by InvokeAI can be set here. See the [Configuration docs](https://invoke-ai.github.io/InvokeAI/features/CONFIGURATION/) for further detail.

-## Even More Customizing!
+---

-See the `docker-compose.yml` file. The `command` instruction can be uncommented and used to run arbitrary startup commands. Some examples below.
-
-### Reconfigure the runtime directory
-
-Can be used to download additional models from the supported model list
-
-In conjunction with `INVOKEAI_ROOT` can be also used to initialize a runtime directory
-
-```yaml
-command:
-  - invokeai-configure
-  - --yes
-```
-
-Or install models:
-
-```yaml
-command:
-  - invokeai-model-install
-```
+[nvidia docker docs]: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
+[amd docker docs]: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html
--- a/docs/help/FAQ.md
+++ b/docs/help/FAQ.md
@@ -196,6 +196,22 @@ tips to reduce the problem:
    === "12GB VRAM GPU"

        This should be sufficient to generate larger images up to about 1280x1280.
+		
+## Checkpoint Models Load Slowly or Use Too Much RAM
+
+The difference between diffusers models (a folder containing multiple
+subfolders) and checkpoint models (a file ending with .safetensors or
+.ckpt) is that InvokeAI is able to load diffusers models into memory
+incrementally, while checkpoint models must be loaded all at
+once. With very large models, or systems with limited RAM, you may
+experience slowdowns and other memory-related issues when loading
+checkpoint models.
+
+To solve this, go to the Model Manager tab (the cube), select the
+checkpoint model that's giving you trouble, and press the "Convert"
+button in the upper right of your browser window. This will conver the
+checkpoint into a diffusers model, after which loading should be
+faster and less memory-intensive.

 ## Memory Leak (Linux)

--- a/installer/templates/invoke.sh.in
+++ b/installer/templates/invoke.sh.in
@@ -17,7 +17,7 @@
 set -eu

 # Ensure we're in the correct folder in case user's CWD is somewhere else
-scriptdir=$(dirname "$0")
+scriptdir=$(dirname $(readlink -f "$0"))
 cd "$scriptdir"

 . .venv/bin/activate
--- a/invokeai/app/api/dependencies.py
+++ b/invokeai/app/api/dependencies.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2022 Kyle Schouviller (https://github.com/kyle0654)

+import asyncio
 from logging import Logger

 import torch
@@ -31,6 +32,8 @@ from invokeai.app.services.session_processor.session_processor_default import (
 )
 from invokeai.app.services.session_queue.session_queue_sqlite import SqliteSessionQueue
 from invokeai.app.services.shared.sqlite.sqlite_util import init_db
+from invokeai.app.services.style_preset_images.style_preset_images_disk import StylePresetImageFileStorageDisk
+from invokeai.app.services.style_preset_records.style_preset_records_sqlite import SqliteStylePresetRecordsStorage
 from invokeai.app.services.urls.urls_default import LocalUrlService
 from invokeai.app.services.workflow_records.workflow_records_sqlite import SqliteWorkflowRecordsStorage
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData
@@ -63,7 +66,12 @@ class ApiDependencies:
    invoker: Invoker

    @staticmethod
-    def initialize(config: InvokeAIAppConfig, event_handler_id: int, logger: Logger = logger) -> None:
+    def initialize(
+        config: InvokeAIAppConfig,
+        event_handler_id: int,
+        loop: asyncio.AbstractEventLoop,
+        logger: Logger = logger,
+    ) -> None:
        logger.info(f"InvokeAI version {__version__}")
        logger.info(f"Root directory = {str(config.root_path)}")

@@ -74,6 +82,7 @@ class ApiDependencies:
        image_files = DiskImageFileStorage(f"{output_folder}/images")

        model_images_folder = config.models_path
+        style_presets_folder = config.style_presets_path

        db = init_db(config=config, logger=logger, image_files=image_files)

@@ -84,7 +93,7 @@ class ApiDependencies:
        board_images = BoardImagesService()
        board_records = SqliteBoardRecordStorage(db=db)
        boards = BoardService()
-        events = FastAPIEventService(event_handler_id)
+        events = FastAPIEventService(event_handler_id, loop=loop)
        bulk_download = BulkDownloadService()
        image_records = SqliteImageRecordStorage(db=db)
        images = ImageService()
@@ -109,6 +118,8 @@ class ApiDependencies:
        session_queue = SqliteSessionQueue(db=db)
        urls = LocalUrlService()
        workflow_records = SqliteWorkflowRecordsStorage(db=db)
+        style_preset_records = SqliteStylePresetRecordsStorage(db=db)
+        style_preset_image_files = StylePresetImageFileStorageDisk(style_presets_folder / "images")

        services = InvocationServices(
            board_image_records=board_image_records,
@@ -134,6 +145,8 @@ class ApiDependencies:
            workflow_records=workflow_records,
            tensors=tensors,
            conditioning=conditioning,
+            style_preset_records=style_preset_records,
+            style_preset_image_files=style_preset_image_files,
        )

        ApiDependencies.invoker = Invoker(services)
--- a/invokeai/app/api/routers/images.py
+++ b/invokeai/app/api/routers/images.py
@@ -218,9 +218,8 @@ async def get_image_workflow(
        raise HTTPException(status_code=404)


-@images_router.api_route(
+@images_router.get(
    "/i/{image_name}/full",
-    methods=["GET", "HEAD"],
    operation_id="get_image_full",
    response_class=Response,
    responses={
@@ -231,6 +230,18 @@ async def get_image_workflow(
        404: {"description": "Image not found"},
    },
 )
+@images_router.head(
+    "/i/{image_name}/full",
+    operation_id="get_image_full_head",
+    response_class=Response,
+    responses={
+        200: {
+            "description": "Return the full-resolution image",
+            "content": {"image/png": {}},
+        },
+        404: {"description": "Image not found"},
+    },
+)
 async def get_image_full(
    image_name: str = Path(description="The name of full-resolution image file to get"),
 ) -> Response:
@@ -242,6 +253,7 @@ async def get_image_full(
            content = f.read()
        response = Response(content, media_type="image/png")
        response.headers["Cache-Control"] = f"max-age={IMAGE_MAX_AGE}"
+        response.headers["Content-Disposition"] = f'inline; filename="{image_name}"'
        return response
    except Exception:
        raise HTTPException(status_code=404)
--- a/invokeai/app/api/routers/model_manager.py
+++ b/invokeai/app/api/routers/model_manager.py
@@ -3,8 +3,10 @@

 import io
 import pathlib
+import shutil
 import traceback
 from copy import deepcopy
+from enum import Enum
 from tempfile import TemporaryDirectory
 from typing import List, Optional, Type

@@ -17,6 +19,7 @@ from starlette.exceptions import HTTPException
 from typing_extensions import Annotated

 from invokeai.app.api.dependencies import ApiDependencies
+from invokeai.app.services.config import get_config
 from invokeai.app.services.model_images.model_images_common import ModelImageFileNotFoundException
 from invokeai.app.services.model_install.model_install_common import ModelInstallJob
 from invokeai.app.services.model_records import (
@@ -31,6 +34,7 @@ from invokeai.backend.model_manager.config import (
    ModelFormat,
    ModelType,
 )
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import CacheStats
 from invokeai.backend.model_manager.metadata.fetch.huggingface import HuggingFaceMetadataFetch
 from invokeai.backend.model_manager.metadata.metadata_base import ModelMetadataWithFiles, UnknownMetadataException
 from invokeai.backend.model_manager.search import ModelSearch
@@ -50,6 +54,13 @@ class ModelsList(BaseModel):
    model_config = ConfigDict(use_enum_values=True)


+class CacheType(str, Enum):
+    """Cache type - one of vram or ram."""
+
+    RAM = "RAM"
+    VRAM = "VRAM"
+
+
 def add_cover_image_to_model_config(config: AnyModelConfig, dependencies: Type[ApiDependencies]) -> AnyModelConfig:
    """Add a cover image URL to a model configuration."""
    cover_image = dependencies.invoker.services.model_images.get_url(config.key)
@@ -797,3 +808,83 @@ async def get_starter_models() -> list[StarterModel]:
        model.dependencies = missing_deps

    return starter_models
+
+
+@model_manager_router.get(
+    "/model_cache",
+    operation_id="get_cache_size",
+    response_model=float,
+    summary="Get maximum size of model manager RAM or VRAM cache.",
+)
+async def get_cache_size(cache_type: CacheType = Query(description="The cache type", default=CacheType.RAM)) -> float:
+    """Return the current RAM or VRAM cache size setting (in GB)."""
+    cache = ApiDependencies.invoker.services.model_manager.load.ram_cache
+    value = 0.0
+    if cache_type == CacheType.RAM:
+        value = cache.max_cache_size
+    elif cache_type == CacheType.VRAM:
+        value = cache.max_vram_cache_size
+    return value
+
+
+@model_manager_router.put(
+    "/model_cache",
+    operation_id="set_cache_size",
+    response_model=float,
+    summary="Set maximum size of model manager RAM or VRAM cache, optionally writing new value out to invokeai.yaml config file.",
+)
+async def set_cache_size(
+    value: float = Query(description="The new value for the maximum cache size"),
+    cache_type: CacheType = Query(description="The cache type", default=CacheType.RAM),
+    persist: bool = Query(description="Write new value out to invokeai.yaml", default=False),
+) -> float:
+    """Set the current RAM or VRAM cache size setting (in GB). ."""
+    cache = ApiDependencies.invoker.services.model_manager.load.ram_cache
+    app_config = get_config()
+    # Record initial state.
+    vram_old = app_config.vram
+    ram_old = app_config.ram
+
+    # Prepare target state.
+    vram_new = vram_old
+    ram_new = ram_old
+    if cache_type == CacheType.RAM:
+        ram_new = value
+    elif cache_type == CacheType.VRAM:
+        vram_new = value
+    else:
+        raise ValueError(f"Unexpected {cache_type=}.")
+
+    config_path = app_config.config_file_path
+    new_config_path = config_path.with_suffix(".yaml.new")
+
+    try:
+        # Try to apply the target state.
+        cache.max_vram_cache_size = vram_new
+        cache.max_cache_size = ram_new
+        app_config.ram = ram_new
+        app_config.vram = vram_new
+        if persist:
+            app_config.write_file(new_config_path)
+            shutil.move(new_config_path, config_path)
+    except Exception as e:
+        # If there was a failure, restore the initial state.
+        cache.max_cache_size = ram_old
+        cache.max_vram_cache_size = vram_old
+        app_config.ram = ram_old
+        app_config.vram = vram_old
+
+        raise RuntimeError("Failed to update cache size") from e
+    return value
+
+
+@model_manager_router.get(
+    "/stats",
+    operation_id="get_stats",
+    response_model=Optional[CacheStats],
+    summary="Get model manager RAM cache performance statistics.",
+)
+async def get_stats() -> Optional[CacheStats]:
+    """Return performance statistics on the model manager's RAM cache. Will return null if no models have been loaded."""
+
+    return ApiDependencies.invoker.services.model_manager.load.ram_cache.stats
--- a/invokeai/app/api/routers/session_queue.py
+++ b/invokeai/app/api/routers/session_queue.py
@@ -11,6 +11,7 @@ from invokeai.app.services.session_queue.session_queue_common import (
    Batch,
    BatchStatus,
    CancelByBatchIDsResult,
+    CancelByDestinationResult,
    ClearResult,
    EnqueueBatchResult,
    PruneResult,
@@ -105,6 +106,21 @@ async def cancel_by_batch_ids(
    return ApiDependencies.invoker.services.session_queue.cancel_by_batch_ids(queue_id=queue_id, batch_ids=batch_ids)


+@session_queue_router.put(
+    "/{queue_id}/cancel_by_destination",
+    operation_id="cancel_by_destination",
+    responses={200: {"model": CancelByBatchIDsResult}},
+)
+async def cancel_by_destination(
+    queue_id: str = Path(description="The queue id to perform this operation on"),
+    destination: str = Query(description="The destination to cancel all queue items for"),
+) -> CancelByDestinationResult:
+    """Immediately cancels all queue items with the given origin"""
+    return ApiDependencies.invoker.services.session_queue.cancel_by_destination(
+        queue_id=queue_id, destination=destination
+    )
+
+
@session_queue_router.put(
    "/{queue_id}/clear",
    operation_id="clear",
--- a/invokeai/app/api/routers/style_presets.py
+++ b/invokeai/app/api/routers/style_presets.py
@@ -0,0 +1,274 @@
+import csv
+import io
+import json
+import traceback
+from typing import Optional
+
+import pydantic
+from fastapi import APIRouter, File, Form, HTTPException, Path, Response, UploadFile
+from fastapi.responses import FileResponse
+from PIL import Image
+from pydantic import BaseModel, Field
+
+from invokeai.app.api.dependencies import ApiDependencies
+from invokeai.app.api.routers.model_manager import IMAGE_MAX_AGE
+from invokeai.app.services.style_preset_images.style_preset_images_common import StylePresetImageFileNotFoundException
+from invokeai.app.services.style_preset_records.style_preset_records_common import (
+    InvalidPresetImportDataError,
+    PresetData,
+    PresetType,
+    StylePresetChanges,
+    StylePresetNotFoundError,
+    StylePresetRecordWithImage,
+    StylePresetWithoutId,
+    UnsupportedFileTypeError,
+    parse_presets_from_file,
+)
+
+
+class StylePresetFormData(BaseModel):
+    name: str = Field(description="Preset name")
+    positive_prompt: str = Field(description="Positive prompt")
+    negative_prompt: str = Field(description="Negative prompt")
+    type: PresetType = Field(description="Preset type")
+
+
+style_presets_router = APIRouter(prefix="/v1/style_presets", tags=["style_presets"])
+
+
+@style_presets_router.get(
+    "/i/{style_preset_id}",
+    operation_id="get_style_preset",
+    responses={
+        200: {"model": StylePresetRecordWithImage},
+    },
+)
+async def get_style_preset(
+    style_preset_id: str = Path(description="The style preset to get"),
+) -> StylePresetRecordWithImage:
+    """Gets a style preset"""
+    try:
+        image = ApiDependencies.invoker.services.style_preset_image_files.get_url(style_preset_id)
+        style_preset = ApiDependencies.invoker.services.style_preset_records.get(style_preset_id)
+        return StylePresetRecordWithImage(image=image, **style_preset.model_dump())
+    except StylePresetNotFoundError:
+        raise HTTPException(status_code=404, detail="Style preset not found")
+
+
+@style_presets_router.patch(
+    "/i/{style_preset_id}",
+    operation_id="update_style_preset",
+    responses={
+        200: {"model": StylePresetRecordWithImage},
+    },
+)
+async def update_style_preset(
+    image: Optional[UploadFile] = File(description="The image file to upload", default=None),
+    style_preset_id: str = Path(description="The id of the style preset to update"),
+    data: str = Form(description="The data of the style preset to update"),
+) -> StylePresetRecordWithImage:
+    """Updates a style preset"""
+    if image is not None:
+        if not image.content_type or not image.content_type.startswith("image"):
+            raise HTTPException(status_code=415, detail="Not an image")
+
+        contents = await image.read()
+        try:
+            pil_image = Image.open(io.BytesIO(contents))
+
+        except Exception:
+            ApiDependencies.invoker.services.logger.error(traceback.format_exc())
+            raise HTTPException(status_code=415, detail="Failed to read image")
+
+        try:
+            ApiDependencies.invoker.services.style_preset_image_files.save(style_preset_id, pil_image)
+        except ValueError as e:
+            raise HTTPException(status_code=409, detail=str(e))
+    else:
+        try:
+            ApiDependencies.invoker.services.style_preset_image_files.delete(style_preset_id)
+        except StylePresetImageFileNotFoundException:
+            pass
+
+    try:
+        parsed_data = json.loads(data)
+        validated_data = StylePresetFormData(**parsed_data)
+
+        name = validated_data.name
+        type = validated_data.type
+        positive_prompt = validated_data.positive_prompt
+        negative_prompt = validated_data.negative_prompt
+
+    except pydantic.ValidationError:
+        raise HTTPException(status_code=400, detail="Invalid preset data")
+
+    preset_data = PresetData(positive_prompt=positive_prompt, negative_prompt=negative_prompt)
+    changes = StylePresetChanges(name=name, preset_data=preset_data, type=type)
+
+    style_preset_image = ApiDependencies.invoker.services.style_preset_image_files.get_url(style_preset_id)
+    style_preset = ApiDependencies.invoker.services.style_preset_records.update(
+        style_preset_id=style_preset_id, changes=changes
+    )
+    return StylePresetRecordWithImage(image=style_preset_image, **style_preset.model_dump())
+
+
+@style_presets_router.delete(
+    "/i/{style_preset_id}",
+    operation_id="delete_style_preset",
+)
+async def delete_style_preset(
+    style_preset_id: str = Path(description="The style preset to delete"),
+) -> None:
+    """Deletes a style preset"""
+    try:
+        ApiDependencies.invoker.services.style_preset_image_files.delete(style_preset_id)
+    except StylePresetImageFileNotFoundException:
+        pass
+
+    ApiDependencies.invoker.services.style_preset_records.delete(style_preset_id)
+
+
+@style_presets_router.post(
+    "/",
+    operation_id="create_style_preset",
+    responses={
+        200: {"model": StylePresetRecordWithImage},
+    },
+)
+async def create_style_preset(
+    image: Optional[UploadFile] = File(description="The image file to upload", default=None),
+    data: str = Form(description="The data of the style preset to create"),
+) -> StylePresetRecordWithImage:
+    """Creates a style preset"""
+
+    try:
+        parsed_data = json.loads(data)
+        validated_data = StylePresetFormData(**parsed_data)
+
+        name = validated_data.name
+        type = validated_data.type
+        positive_prompt = validated_data.positive_prompt
+        negative_prompt = validated_data.negative_prompt
+
+    except pydantic.ValidationError:
+        raise HTTPException(status_code=400, detail="Invalid preset data")
+
+    preset_data = PresetData(positive_prompt=positive_prompt, negative_prompt=negative_prompt)
+    style_preset = StylePresetWithoutId(name=name, preset_data=preset_data, type=type)
+    new_style_preset = ApiDependencies.invoker.services.style_preset_records.create(style_preset=style_preset)
+
+    if image is not None:
+        if not image.content_type or not image.content_type.startswith("image"):
+            raise HTTPException(status_code=415, detail="Not an image")
+
+        contents = await image.read()
+        try:
+            pil_image = Image.open(io.BytesIO(contents))
+
+        except Exception:
+            ApiDependencies.invoker.services.logger.error(traceback.format_exc())
+            raise HTTPException(status_code=415, detail="Failed to read image")
+
+        try:
+            ApiDependencies.invoker.services.style_preset_image_files.save(new_style_preset.id, pil_image)
+        except ValueError as e:
+            raise HTTPException(status_code=409, detail=str(e))
+
+    preset_image = ApiDependencies.invoker.services.style_preset_image_files.get_url(new_style_preset.id)
+    return StylePresetRecordWithImage(image=preset_image, **new_style_preset.model_dump())
+
+
+@style_presets_router.get(
+    "/",
+    operation_id="list_style_presets",
+    responses={
+        200: {"model": list[StylePresetRecordWithImage]},
+    },
+)
+async def list_style_presets() -> list[StylePresetRecordWithImage]:
+    """Gets a page of style presets"""
+    style_presets_with_image: list[StylePresetRecordWithImage] = []
+    style_presets = ApiDependencies.invoker.services.style_preset_records.get_many()
+    for preset in style_presets:
+        image = ApiDependencies.invoker.services.style_preset_image_files.get_url(preset.id)
+        style_preset_with_image = StylePresetRecordWithImage(image=image, **preset.model_dump())
+        style_presets_with_image.append(style_preset_with_image)
+
+    return style_presets_with_image
+
+
+@style_presets_router.get(
+    "/i/{style_preset_id}/image",
+    operation_id="get_style_preset_image",
+    responses={
+        200: {
+            "description": "The style preset image was fetched successfully",
+        },
+        400: {"description": "Bad request"},
+        404: {"description": "The style preset image could not be found"},
+    },
+    status_code=200,
+)
+async def get_style_preset_image(
+    style_preset_id: str = Path(description="The id of the style preset image to get"),
+) -> FileResponse:
+    """Gets an image file that previews the model"""
+
+    try:
+        path = ApiDependencies.invoker.services.style_preset_image_files.get_path(style_preset_id)
+
+        response = FileResponse(
+            path,
+            media_type="image/png",
+            filename=style_preset_id + ".png",
+            content_disposition_type="inline",
+        )
+        response.headers["Cache-Control"] = f"max-age={IMAGE_MAX_AGE}"
+        return response
+    except Exception:
+        raise HTTPException(status_code=404)
+
+
+@style_presets_router.get(
+    "/export",
+    operation_id="export_style_presets",
+    responses={200: {"content": {"text/csv": {}}, "description": "A CSV file with the requested data."}},
+    status_code=200,
+)
+async def export_style_presets():
+    # Create an in-memory stream to store the CSV data
+    output = io.StringIO()
+    writer = csv.writer(output)
+
+    # Write the header
+    writer.writerow(["name", "prompt", "negative_prompt"])
+
+    style_presets = ApiDependencies.invoker.services.style_preset_records.get_many(type=PresetType.User)
+
+    for preset in style_presets:
+        writer.writerow([preset.name, preset.preset_data.positive_prompt, preset.preset_data.negative_prompt])
+
+    csv_data = output.getvalue()
+    output.close()
+
+    return Response(
+        content=csv_data,
+        media_type="text/csv",
+        headers={"Content-Disposition": "attachment; filename=prompt_templates.csv"},
+    )
+
+
+@style_presets_router.post(
+    "/import",
+    operation_id="import_style_presets",
+)
+async def import_style_presets(file: UploadFile = File(description="The file to import")):
+    try:
+        style_presets = await parse_presets_from_file(file)
+        ApiDependencies.invoker.services.style_preset_records.create_many(style_presets)
+    except InvalidPresetImportDataError as e:
+        ApiDependencies.invoker.services.logger.error(traceback.format_exc())
+        raise HTTPException(status_code=400, detail=str(e))
+    except UnsupportedFileTypeError as e:
+        ApiDependencies.invoker.services.logger.error(traceback.format_exc())
+        raise HTTPException(status_code=415, detail=str(e))
--- a/invokeai/app/api_app.py
+++ b/invokeai/app/api_app.py
@@ -30,6 +30,7 @@ from invokeai.app.api.routers import (
    images,
    model_manager,
    session_queue,
+    style_presets,
    utilities,
    workflows,
 )
@@ -55,11 +56,13 @@ mimetypes.add_type("text/css", ".css")
 torch_device_name = TorchDevice.get_torch_device_name()
 logger.info(f"Using torch device: {torch_device_name}")

+loop = asyncio.new_event_loop()
+

@asynccontextmanager
 async def lifespan(app: FastAPI):
    # Add startup event to load dependencies
-    ApiDependencies.initialize(config=app_config, event_handler_id=event_handler_id, logger=logger)
+    ApiDependencies.initialize(config=app_config, event_handler_id=event_handler_id, loop=loop, logger=logger)
    yield
    # Shut down threads
    ApiDependencies.shutdown()
@@ -106,6 +109,7 @@ app.include_router(board_images.board_images_router, prefix="/api")
 app.include_router(app_info.app_router, prefix="/api")
 app.include_router(session_queue.session_queue_router, prefix="/api")
 app.include_router(workflows.workflows_router, prefix="/api")
+app.include_router(style_presets.style_presets_router, prefix="/api")

 app.openapi = get_openapi_func(app)

@@ -184,8 +188,6 @@ def invoke_api() -> None:

    check_cudnn(logger)

-    # Start our own event loop for eventing usage
-    loop = asyncio.new_event_loop()
    config = uvicorn.Config(
        app=app,
        host=app_config.host,
--- a/invokeai/app/invocations/baseinvocation.py
+++ b/invokeai/app/invocations/baseinvocation.py
@@ -20,7 +20,6 @@ from typing import (
    Type,
    TypeVar,
    Union,
-    cast,
 )

 import semver
@@ -61,11 +60,13 @@ class Classification(str, Enum, metaclass=MetaEnum):
    - `Stable`: The invocation, including its inputs/outputs and internal logic, is stable. You may build workflows with it, having confidence that they will not break because of a change in this invocation.
    - `Beta`: The invocation is not yet stable, but is planned to be stable in the future. Workflows built around this invocation may break, but we are committed to supporting this invocation long-term.
    - `Prototype`: The invocation is not yet stable and may be removed from the application at any time. Workflows built around this invocation may break, and we are *not* committed to supporting this invocation.
+    - `Deprecated`: The invocation is deprecated and may be removed in a future version.
    """

    Stable = "stable"
    Beta = "beta"
    Prototype = "prototype"
+    Deprecated = "deprecated"


 class UIConfigBase(BaseModel):
@@ -80,7 +81,7 @@ class UIConfigBase(BaseModel):
    version: str = Field(
        description='The node\'s version. Should be a valid semver string e.g. "1.0.0" or "3.8.13".',
    )
-    node_pack: Optional[str] = Field(default=None, description="Whether or not this is a custom node")
+    node_pack: str = Field(description="The node pack that this node belongs to, will be 'invokeai' for built-in nodes")
    classification: Classification = Field(default=Classification.Stable, description="The node's classification")

    model_config = ConfigDict(
@@ -230,18 +231,16 @@ class BaseInvocation(ABC, BaseModel):
    @staticmethod
    def json_schema_extra(schema: dict[str, Any], model_class: Type[BaseInvocation]) -> None:
        """Adds various UI-facing attributes to the invocation's OpenAPI schema."""
-        uiconfig = cast(UIConfigBase | None, getattr(model_class, "UIConfig", None))
-        if uiconfig is not None:
-            if uiconfig.title is not None:
-                schema["title"] = uiconfig.title
-            if uiconfig.tags is not None:
-                schema["tags"] = uiconfig.tags
-            if uiconfig.category is not None:
-                schema["category"] = uiconfig.category
-            if uiconfig.node_pack is not None:
-                schema["node_pack"] = uiconfig.node_pack
-            schema["classification"] = uiconfig.classification
-            schema["version"] = uiconfig.version
+        if title := model_class.UIConfig.title:
+            schema["title"] = title
+        if tags := model_class.UIConfig.tags:
+            schema["tags"] = tags
+        if category := model_class.UIConfig.category:
+            schema["category"] = category
+        if node_pack := model_class.UIConfig.node_pack:
+            schema["node_pack"] = node_pack
+        schema["classification"] = model_class.UIConfig.classification
+        schema["version"] = model_class.UIConfig.version
        if "required" not in schema or not isinstance(schema["required"], list):
            schema["required"] = []
        schema["class"] = "invocation"
@@ -312,7 +311,7 @@ class BaseInvocation(ABC, BaseModel):
        json_schema_extra={"field_kind": FieldKind.NodeAttribute},
    )

-    UIConfig: ClassVar[Type[UIConfigBase]]
+    UIConfig: ClassVar[UIConfigBase]

    model_config = ConfigDict(
        protected_namespaces=(),
@@ -441,30 +440,25 @@ def invocation(
        validate_fields(cls.model_fields, invocation_type)

        # Add OpenAPI schema extras
-        uiconfig_name = cls.__qualname__ + ".UIConfig"
-        if not hasattr(cls, "UIConfig") or cls.UIConfig.__qualname__ != uiconfig_name:
-            cls.UIConfig = type(uiconfig_name, (UIConfigBase,), {})
-        cls.UIConfig.title = title
-        cls.UIConfig.tags = tags
-        cls.UIConfig.category = category
-        cls.UIConfig.classification = classification
-
-        # Grab the node pack's name from the module name, if it's a custom node
-        is_custom_node = cls.__module__.rsplit(".", 1)[0] == "invokeai.app.invocations"
-        if is_custom_node:
-            cls.UIConfig.node_pack = cls.__module__.split(".")[0]
-        else:
-            cls.UIConfig.node_pack = None
+        uiconfig: dict[str, Any] = {}
+        uiconfig["title"] = title
+        uiconfig["tags"] = tags
+        uiconfig["category"] = category
+        uiconfig["classification"] = classification
+        # The node pack is the module name - will be "invokeai" for built-in nodes
+        uiconfig["node_pack"] = cls.__module__.split(".")[0]

        if version is not None:
            try:
                semver.Version.parse(version)
            except ValueError as e:
                raise InvalidVersionError(f'Invalid version string for node "{invocation_type}": "{version}"') from e
-            cls.UIConfig.version = version
+            uiconfig["version"] = version
        else:
            logger.warn(f'No version specified for node "{invocation_type}", using "1.0.0"')
-            cls.UIConfig.version = "1.0.0"
+            uiconfig["version"] = "1.0.0"
+
+        cls.UIConfig = UIConfigBase(**uiconfig)

        if use_cache is not None:
            cls.model_fields["use_cache"].default = use_cache
--- a/invokeai/app/invocations/canny.py
+++ b/invokeai/app/invocations/canny.py
@@ -0,0 +1,34 @@
+import cv2
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import ImageField, InputField, WithBoard, WithMetadata
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.util import cv2_to_pil, pil_to_cv2
+
+
+@invocation(
+    "canny_edge_detection",
+    title="Canny Edge Detection",
+    tags=["controlnet", "canny"],
+    category="controlnet",
+    version="1.0.0",
+)
+class CannyEdgeDetectionInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Geneartes an edge map using a cv2's Canny algorithm."""
+
+    image: ImageField = InputField(description="The image to process")
+    low_threshold: int = InputField(
+        default=100, ge=0, le=255, description="The low threshold of the Canny pixel gradient (0-255)"
+    )
+    high_threshold: int = InputField(
+        default=200, ge=0, le=255, description="The high threshold of the Canny pixel gradient (0-255)"
+    )
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        image = context.images.get_pil(self.image.image_name, "RGB")
+        np_img = pil_to_cv2(image)
+        edge_map = cv2.Canny(np_img, self.low_threshold, self.high_threshold)
+        edge_map_pil = cv2_to_pil(edge_map)
+        image_dto = context.images.save(image=edge_map_pil)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/color_map.py
+++ b/invokeai/app/invocations/color_map.py
@@ -0,0 +1,41 @@
+import cv2
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import FieldDescriptions, ImageField, InputField, WithBoard, WithMetadata
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.util import np_to_pil, pil_to_np
+
+
+@invocation(
+    "color_map",
+    title="Color Map",
+    tags=["controlnet"],
+    category="controlnet",
+    version="1.0.0",
+)
+class ColorMapInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Generates a color map from the provided image."""
+
+    image: ImageField = InputField(description="The image to process")
+    tile_size: int = InputField(default=64, ge=1, description=FieldDescriptions.tile_size)
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        image = context.images.get_pil(self.image.image_name, "RGB")
+
+        np_image = pil_to_np(image)
+        height, width = np_image.shape[:2]
+
+        width_tile_size = min(self.tile_size, width)
+        height_tile_size = min(self.tile_size, height)
+
+        color_map = cv2.resize(
+            np_image,
+            (width // width_tile_size, height // height_tile_size),
+            interpolation=cv2.INTER_CUBIC,
+        )
+        color_map = cv2.resize(color_map, (width, height), interpolation=cv2.INTER_NEAREST)
+        color_map_pil = np_to_pil(color_map)
+
+        image_dto = context.images.save(image=color_map_pil)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/compel.py
+++ b/invokeai/app/invocations/compel.py
@@ -80,12 +80,12 @@ class CompelInvocation(BaseInvocation):

        with (
            # apply all patches while the model is on the target device
-            text_encoder_info.model_on_device() as (model_state_dict, text_encoder),
+            text_encoder_info.model_on_device() as (cached_weights, text_encoder),
            tokenizer_info as tokenizer,
            ModelPatcher.apply_lora_text_encoder(
                text_encoder,
                loras=_lora_loader(),
-                model_state_dict=model_state_dict,
+                cached_weights=cached_weights,
            ),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
            ModelPatcher.apply_clip_skip(text_encoder, self.clip.skipped_layers),
@@ -175,13 +175,13 @@ class SDXLPromptInvocationBase:

        with (
            # apply all patches while the model is on the target device
-            text_encoder_info.model_on_device() as (state_dict, text_encoder),
+            text_encoder_info.model_on_device() as (cached_weights, text_encoder),
            tokenizer_info as tokenizer,
            ModelPatcher.apply_lora(
                text_encoder,
                loras=_lora_loader(),
                prefix=lora_prefix,
-                model_state_dict=state_dict,
+                cached_weights=cached_weights,
            ),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
            ModelPatcher.apply_clip_skip(text_encoder, clip_field.skipped_layers),
--- a/invokeai/app/invocations/content_shuffle.py
+++ b/invokeai/app/invocations/content_shuffle.py
@@ -0,0 +1,25 @@
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import ImageField, InputField, WithBoard, WithMetadata
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.content_shuffle import content_shuffle
+
+
+@invocation(
+    "content_shuffle",
+    title="Content Shuffle",
+    tags=["controlnet", "normal"],
+    category="controlnet",
+    version="1.0.0",
+)
+class ContentShuffleInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Shuffles the image, similar to a 'liquify' filter."""
+
+    image: ImageField = InputField(description="The image to process")
+    scale_factor: int = InputField(default=256, ge=0, description="The scale factor used for the shuffle")
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        image = context.images.get_pil(self.image.image_name, "RGB")
+        output_image = content_shuffle(input_image=image, scale_factor=self.scale_factor)
+        image_dto = context.images.save(image=output_image)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/controlnet_image_processors.py
+++ b/invokeai/app/invocations/controlnet_image_processors.py
@@ -21,6 +21,8 @@ from controlnet_aux import (
 from controlnet_aux.util import HWC3, ade_palette
 from PIL import Image
 from pydantic import BaseModel, Field, field_validator, model_validator
+from transformers import pipeline
+from transformers.pipelines import DepthEstimationPipeline

 from invokeai.app.invocations.baseinvocation import (
    BaseInvocation,
@@ -44,13 +46,12 @@ from invokeai.app.invocations.util import validate_begin_end_step, validate_weig
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.controlnet_utils import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES, heuristic_resize
 from invokeai.backend.image_util.canny import get_canny_edges
-from invokeai.backend.image_util.depth_anything import DEPTH_ANYTHING_MODELS, DepthAnythingDetector
+from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline
 from invokeai.backend.image_util.dw_openpose import DWPOSE_MODELS, DWOpenposeDetector
 from invokeai.backend.image_util.hed import HEDProcessor
 from invokeai.backend.image_util.lineart import LineartProcessor
 from invokeai.backend.image_util.lineart_anime import LineartAnimeProcessor
 from invokeai.backend.image_util.util import np_to_pil, pil_to_np
-from invokeai.backend.util.devices import TorchDevice


 class ControlField(BaseModel):
@@ -173,6 +174,7 @@ class ImageProcessorInvocation(BaseInvocation, WithMetadata, WithBoard):
    tags=["controlnet", "canny"],
    category="controlnet",
    version="1.3.3",
+    classification=Classification.Deprecated,
 )
 class CannyImageProcessorInvocation(ImageProcessorInvocation):
    """Canny edge detection for ControlNet"""
@@ -207,6 +209,7 @@ class CannyImageProcessorInvocation(ImageProcessorInvocation):
    tags=["controlnet", "hed", "softedge"],
    category="controlnet",
    version="1.2.3",
+    classification=Classification.Deprecated,
 )
 class HedImageProcessorInvocation(ImageProcessorInvocation):
    """Applies HED edge detection to image"""
@@ -236,6 +239,7 @@ class HedImageProcessorInvocation(ImageProcessorInvocation):
    tags=["controlnet", "lineart"],
    category="controlnet",
    version="1.2.3",
+    classification=Classification.Deprecated,
 )
 class LineartImageProcessorInvocation(ImageProcessorInvocation):
    """Applies line art processing to image"""
@@ -258,6 +262,7 @@ class LineartImageProcessorInvocation(ImageProcessorInvocation):
    tags=["controlnet", "lineart", "anime"],
    category="controlnet",
    version="1.2.3",
+    classification=Classification.Deprecated,
 )
 class LineartAnimeImageProcessorInvocation(ImageProcessorInvocation):
    """Applies line art anime processing to image"""
@@ -281,6 +286,7 @@ class LineartAnimeImageProcessorInvocation(ImageProcessorInvocation):
    tags=["controlnet", "midas"],
    category="controlnet",
    version="1.2.4",
+    classification=Classification.Deprecated,
 )
 class MidasDepthImageProcessorInvocation(ImageProcessorInvocation):
    """Applies Midas depth processing to image"""
@@ -313,6 +319,7 @@ class MidasDepthImageProcessorInvocation(ImageProcessorInvocation):
    tags=["controlnet"],
    category="controlnet",
    version="1.2.3",
+    classification=Classification.Deprecated,
 )
 class NormalbaeImageProcessorInvocation(ImageProcessorInvocation):
    """Applies NormalBae processing to image"""
@@ -329,7 +336,12 @@ class NormalbaeImageProcessorInvocation(ImageProcessorInvocation):


@invocation(
-    "mlsd_image_processor", title="MLSD Processor", tags=["controlnet", "mlsd"], category="controlnet", version="1.2.3"
+    "mlsd_image_processor",
+    title="MLSD Processor",
+    tags=["controlnet", "mlsd"],
+    category="controlnet",
+    version="1.2.3",
+    classification=Classification.Deprecated,
 )
 class MlsdImageProcessorInvocation(ImageProcessorInvocation):
    """Applies MLSD processing to image"""
@@ -352,7 +364,12 @@ class MlsdImageProcessorInvocation(ImageProcessorInvocation):


@invocation(
-    "pidi_image_processor", title="PIDI Processor", tags=["controlnet", "pidi"], category="controlnet", version="1.2.3"
+    "pidi_image_processor",
+    title="PIDI Processor",
+    tags=["controlnet", "pidi"],
+    category="controlnet",
+    version="1.2.3",
+    classification=Classification.Deprecated,
 )
 class PidiImageProcessorInvocation(ImageProcessorInvocation):
    """Applies PIDI processing to image"""
@@ -380,6 +397,7 @@ class PidiImageProcessorInvocation(ImageProcessorInvocation):
    tags=["controlnet", "contentshuffle"],
    category="controlnet",
    version="1.2.3",
+    classification=Classification.Deprecated,
 )
 class ContentShuffleImageProcessorInvocation(ImageProcessorInvocation):
    """Applies content shuffle processing to image"""
@@ -410,6 +428,7 @@ class ContentShuffleImageProcessorInvocation(ImageProcessorInvocation):
    tags=["controlnet", "zoe", "depth"],
    category="controlnet",
    version="1.2.3",
+    classification=Classification.Deprecated,
 )
 class ZoeDepthImageProcessorInvocation(ImageProcessorInvocation):
    """Applies Zoe depth processing to image"""
@@ -426,6 +445,7 @@ class ZoeDepthImageProcessorInvocation(ImageProcessorInvocation):
    tags=["controlnet", "mediapipe", "face"],
    category="controlnet",
    version="1.2.4",
+    classification=Classification.Deprecated,
 )
 class MediapipeFaceProcessorInvocation(ImageProcessorInvocation):
    """Applies mediapipe face processing to image"""
@@ -453,6 +473,7 @@ class MediapipeFaceProcessorInvocation(ImageProcessorInvocation):
    tags=["controlnet", "leres", "depth"],
    category="controlnet",
    version="1.2.3",
+    classification=Classification.Deprecated,
 )
 class LeresImageProcessorInvocation(ImageProcessorInvocation):
    """Applies leres processing to image"""
@@ -482,6 +503,7 @@ class LeresImageProcessorInvocation(ImageProcessorInvocation):
    tags=["controlnet", "tile"],
    category="controlnet",
    version="1.2.3",
+    classification=Classification.Deprecated,
 )
 class TileResamplerProcessorInvocation(ImageProcessorInvocation):
    """Tile resampler processor"""
@@ -522,6 +544,7 @@ class TileResamplerProcessorInvocation(ImageProcessorInvocation):
    tags=["controlnet", "segmentanything"],
    category="controlnet",
    version="1.2.4",
+    classification=Classification.Deprecated,
 )
 class SegmentAnythingProcessorInvocation(ImageProcessorInvocation):
    """Applies segment anything processing to image"""
@@ -569,6 +592,7 @@ class SamDetectorReproducibleColors(SamDetector):
    tags=["controlnet"],
    category="controlnet",
    version="1.2.3",
+    classification=Classification.Deprecated,
 )
 class ColorMapImageProcessorInvocation(ImageProcessorInvocation):
    """Generates a color map from the provided image"""
@@ -592,7 +616,14 @@ class ColorMapImageProcessorInvocation(ImageProcessorInvocation):
        return color_map


-DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small"]
+DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small", "small_v2"]
+# DepthAnything V2 Small model is licensed under Apache 2.0 but not the base and large models.
+DEPTH_ANYTHING_MODELS = {
+    "large": "LiheYoung/depth-anything-large-hf",
+    "base": "LiheYoung/depth-anything-base-hf",
+    "small": "LiheYoung/depth-anything-small-hf",
+    "small_v2": "depth-anything/Depth-Anything-V2-Small-hf",
+}


@invocation(
@@ -600,28 +631,34 @@ DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small"]
    title="Depth Anything Processor",
    tags=["controlnet", "depth", "depth anything"],
    category="controlnet",
-    version="1.1.2",
+    version="1.1.3",
+    classification=Classification.Deprecated,
 )
 class DepthAnythingImageProcessorInvocation(ImageProcessorInvocation):
    """Generates a depth map based on the Depth Anything algorithm"""

    model_size: DEPTH_ANYTHING_MODEL_SIZES = InputField(
-        default="small", description="The size of the depth model to use"
+        default="small_v2", description="The size of the depth model to use"
    )
    resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)

    def run_processor(self, image: Image.Image) -> Image.Image:
-        def loader(model_path: Path):
-            return DepthAnythingDetector.load_model(
-                model_path, model_size=self.model_size, device=TorchDevice.choose_torch_device()
-            )
+        def load_depth_anything(model_path: Path):
+            depth_anything_pipeline = pipeline(model=str(model_path), task="depth-estimation", local_files_only=True)
+            assert isinstance(depth_anything_pipeline, DepthEstimationPipeline)
+            return DepthAnythingPipeline(depth_anything_pipeline)

        with self._context.models.load_remote_model(
-            source=DEPTH_ANYTHING_MODELS[self.model_size], loader=loader
-        ) as model:
-            depth_anything_detector = DepthAnythingDetector(model, TorchDevice.choose_torch_device())
-            processed_image = depth_anything_detector(image=image, resolution=self.resolution)
-            return processed_image
+            source=DEPTH_ANYTHING_MODELS[self.model_size], loader=load_depth_anything
+        ) as depth_anything_detector:
+            assert isinstance(depth_anything_detector, DepthAnythingPipeline)
+            depth_map = depth_anything_detector.generate_depth(image)
+
+            # Resizing to user target specified size
+            new_height = int(image.size[1] * (self.resolution / image.size[0]))
+            depth_map = depth_map.resize((self.resolution, new_height))
+
+            return depth_map


@invocation(
@@ -630,6 +667,7 @@ class DepthAnythingImageProcessorInvocation(ImageProcessorInvocation):
    tags=["controlnet", "dwpose", "openpose"],
    category="controlnet",
    version="1.1.1",
+    classification=Classification.Deprecated,
 )
 class DWOpenposeImageProcessorInvocation(ImageProcessorInvocation):
    """Generates an openpose pose from an image using DWPose"""
--- a/invokeai/app/invocations/create_gradient_mask.py
+++ b/invokeai/app/invocations/create_gradient_mask.py
@@ -39,7 +39,7 @@ class GradientMaskOutput(BaseInvocationOutput):
    title="Create Gradient Mask",
    tags=["mask", "denoise"],
    category="latents",
-    version="1.1.0",
+    version="1.2.0",
 )
 class CreateGradientMaskInvocation(BaseInvocation):
    """Creates mask for denoising model run."""
@@ -93,6 +93,7 @@ class CreateGradientMaskInvocation(BaseInvocation):

            # redistribute blur so that the original edges are 0 and blur outwards to 1
            blur_tensor = (blur_tensor - 0.5) * 2
+            blur_tensor[blur_tensor < 0] = 0.0

            threshold = 1 - self.minimum_denoise

--- a/invokeai/app/invocations/denoise_latents.py
+++ b/invokeai/app/invocations/denoise_latents.py
@@ -37,9 +37,9 @@ from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.controlnet_utils import prepare_control_image
 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
 from invokeai.backend.lora import LoRAModelRaw
-from invokeai.backend.model_manager import BaseModelType
+from invokeai.backend.model_manager import BaseModelType, ModelVariantType
 from invokeai.backend.model_patcher import ModelPatcher
-from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
+from invokeai.backend.stable_diffusion import PipelineIntermediateState
 from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext, DenoiseInputs
 from invokeai.backend.stable_diffusion.diffusers_pipeline import (
    ControlNetData,
@@ -60,8 +60,13 @@ from invokeai.backend.stable_diffusion.diffusion_backend import StableDiffusionB
 from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
 from invokeai.backend.stable_diffusion.extensions.controlnet import ControlNetExt
 from invokeai.backend.stable_diffusion.extensions.freeu import FreeUExt
+from invokeai.backend.stable_diffusion.extensions.inpaint import InpaintExt
+from invokeai.backend.stable_diffusion.extensions.inpaint_model import InpaintModelExt
+from invokeai.backend.stable_diffusion.extensions.lora import LoRAExt
 from invokeai.backend.stable_diffusion.extensions.preview import PreviewExt
 from invokeai.backend.stable_diffusion.extensions.rescale_cfg import RescaleCFGExt
+from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
+from invokeai.backend.stable_diffusion.extensions.t2i_adapter import T2IAdapterExt
 from invokeai.backend.stable_diffusion.extensions_manager import ExtensionsManager
 from invokeai.backend.stable_diffusion.schedulers import SCHEDULER_MAP
 from invokeai.backend.stable_diffusion.schedulers.schedulers import SCHEDULER_NAME_VALUES
@@ -180,7 +185,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
    )
    denoise_mask: Optional[DenoiseMaskField] = InputField(
        default=None,
-        description=FieldDescriptions.mask,
+        description=FieldDescriptions.denoise_mask,
        input=Input.Connection,
        ui_order=8,
    )
@@ -498,6 +503,33 @@ class DenoiseLatentsInvocation(BaseInvocation):
                )
            )

+    @staticmethod
+    def parse_t2i_adapter_field(
+        exit_stack: ExitStack,
+        context: InvocationContext,
+        t2i_adapters: Optional[Union[T2IAdapterField, list[T2IAdapterField]]],
+        ext_manager: ExtensionsManager,
+    ) -> None:
+        if t2i_adapters is None:
+            return
+
+        # Handle the possibility that t2i_adapters could be a list or a single T2IAdapterField.
+        if isinstance(t2i_adapters, T2IAdapterField):
+            t2i_adapters = [t2i_adapters]
+
+        for t2i_adapter_field in t2i_adapters:
+            ext_manager.add_extension(
+                T2IAdapterExt(
+                    node_context=context,
+                    model_id=t2i_adapter_field.t2i_adapter_model,
+                    image=context.images.get_pil(t2i_adapter_field.image.image_name),
+                    weight=t2i_adapter_field.weight,
+                    begin_step_percent=t2i_adapter_field.begin_step_percent,
+                    end_step_percent=t2i_adapter_field.end_step_percent,
+                    resize_mode=t2i_adapter_field.resize_mode,
+                )
+            )
+
    def prep_ip_adapter_image_prompts(
        self,
        context: InvocationContext,
@@ -707,7 +739,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
        else:
            masked_latents = torch.where(mask < 0.5, 0.0, latents)

-        return 1 - mask, masked_latents, self.denoise_mask.gradient
+        return mask, masked_latents, self.denoise_mask.gradient

    @staticmethod
    def prepare_noise_and_latents(
@@ -765,10 +797,6 @@ class DenoiseLatentsInvocation(BaseInvocation):
        dtype = TorchDevice.choose_torch_dtype()

        seed, noise, latents = self.prepare_noise_and_latents(context, self.noise, self.latents)
-        latents = latents.to(device=device, dtype=dtype)
-        if noise is not None:
-            noise = noise.to(device=device, dtype=dtype)
-
        _, _, latent_height, latent_width = latents.shape

        conditioning_data = self.get_conditioning_data(
@@ -801,21 +829,6 @@ class DenoiseLatentsInvocation(BaseInvocation):
            denoising_end=self.denoising_end,
        )

-        denoise_ctx = DenoiseContext(
-            inputs=DenoiseInputs(
-                orig_latents=latents,
-                timesteps=timesteps,
-                init_timestep=init_timestep,
-                noise=noise,
-                seed=seed,
-                scheduler_step_kwargs=scheduler_step_kwargs,
-                conditioning_data=conditioning_data,
-                attention_processor_cls=CustomAttnProcessor2_0,
-            ),
-            unet=None,
-            scheduler=scheduler,
-        )
-
        # get the unet's config so that we can pass the base to sd_step_callback()
        unet_config = context.models.get_config(self.unet.unet.key)

@@ -833,6 +846,50 @@ class DenoiseLatentsInvocation(BaseInvocation):
        if self.unet.freeu_config:
            ext_manager.add_extension(FreeUExt(self.unet.freeu_config))

+        ### lora
+        if self.unet.loras:
+            for lora_field in self.unet.loras:
+                ext_manager.add_extension(
+                    LoRAExt(
+                        node_context=context,
+                        model_id=lora_field.lora,
+                        weight=lora_field.weight,
+                    )
+                )
+        ### seamless
+        if self.unet.seamless_axes:
+            ext_manager.add_extension(SeamlessExt(self.unet.seamless_axes))
+
+        ### inpaint
+        mask, masked_latents, is_gradient_mask = self.prep_inpaint_mask(context, latents)
+        # NOTE: We used to identify inpainting models by inpecting the shape of the loaded UNet model weights. Now we
+        # use the ModelVariantType config. During testing, there was a report of a user with models that had an
+        # incorrect ModelVariantType value. Re-installing the model fixed the issue. If this issue turns out to be
+        # prevalent, we will have to revisit how we initialize the inpainting extensions.
+        if unet_config.variant == ModelVariantType.Inpaint:
+            ext_manager.add_extension(InpaintModelExt(mask, masked_latents, is_gradient_mask))
+        elif mask is not None:
+            ext_manager.add_extension(InpaintExt(mask, is_gradient_mask))
+
+        # Initialize context for modular denoise
+        latents = latents.to(device=device, dtype=dtype)
+        if noise is not None:
+            noise = noise.to(device=device, dtype=dtype)
+        denoise_ctx = DenoiseContext(
+            inputs=DenoiseInputs(
+                orig_latents=latents,
+                timesteps=timesteps,
+                init_timestep=init_timestep,
+                noise=noise,
+                seed=seed,
+                scheduler_step_kwargs=scheduler_step_kwargs,
+                conditioning_data=conditioning_data,
+                attention_processor_cls=CustomAttnProcessor2_0,
+            ),
+            unet=None,
+            scheduler=scheduler,
+        )
+
        # context for loading additional models
        with ExitStack() as exit_stack:
            # later should be smth like:
@@ -840,6 +897,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
            #    ext = extension_field.to_extension(exit_stack, context, ext_manager)
            #    ext_manager.add_extension(ext)
            self.parse_controlnet_field(exit_stack, context, self.control, ext_manager)
+            self.parse_t2i_adapter_field(exit_stack, context, self.t2i_adapter, ext_manager)

            # ext: t2i/ip adapter
            ext_manager.run_callback(ExtensionCallbackType.SETUP, denoise_ctx)
@@ -871,6 +929,10 @@ class DenoiseLatentsInvocation(BaseInvocation):
        seed, noise, latents = self.prepare_noise_and_latents(context, self.noise, self.latents)

        mask, masked_latents, gradient_mask = self.prep_inpaint_mask(context, latents)
+        # At this point, the mask ranges from 0 (leave unchanged) to 1 (inpaint).
+        # We invert the mask here for compatibility with the old backend implementation.
+        if mask is not None:
+            mask = 1 - mask

        # TODO(ryand): I have hard-coded `do_classifier_free_guidance=True` to mirror the behaviour of ControlNets,
        # below. Investigate whether this is appropriate.
@@ -913,14 +975,14 @@ class DenoiseLatentsInvocation(BaseInvocation):
        assert isinstance(unet_info.model, UNet2DConditionModel)
        with (
            ExitStack() as exit_stack,
-            unet_info.model_on_device() as (model_state_dict, unet),
+            unet_info.model_on_device() as (cached_weights, unet),
            ModelPatcher.apply_freeu(unet, self.unet.freeu_config),
-            set_seamless(unet, self.unet.seamless_axes),  # FIXME
+            SeamlessExt.static_patch_model(unet, self.unet.seamless_axes),  # FIXME
            # Apply the LoRA after unet has been moved to its target device for faster patching.
            ModelPatcher.apply_lora_unet(
                unet,
                loras=_lora_loader(),
-                model_state_dict=model_state_dict,
+                cached_weights=cached_weights,
            ),
        ):
            assert isinstance(unet, UNet2DConditionModel)
--- a/invokeai/app/invocations/depth_anything.py
+++ b/invokeai/app/invocations/depth_anything.py
@@ -0,0 +1,45 @@
+from typing import Literal
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import ImageField, InputField, WithBoard, WithMetadata
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline
+
+DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small", "small_v2"]
+# DepthAnything V2 Small model is licensed under Apache 2.0 but not the base and large models.
+DEPTH_ANYTHING_MODELS = {
+    "large": "LiheYoung/depth-anything-large-hf",
+    "base": "LiheYoung/depth-anything-base-hf",
+    "small": "LiheYoung/depth-anything-small-hf",
+    "small_v2": "depth-anything/Depth-Anything-V2-Small-hf",
+}
+
+
+@invocation(
+    "depth_anything_depth_estimation",
+    title="Depth Anything Depth Estimation",
+    tags=["controlnet", "depth", "depth anything"],
+    category="controlnet",
+    version="1.0.0",
+)
+class DepthAnythingDepthEstimationInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Generates a depth map using a Depth Anything model."""
+
+    image: ImageField = InputField(description="The image to process")
+    model_size: DEPTH_ANYTHING_MODEL_SIZES = InputField(
+        default="small_v2", description="The size of the depth model to use"
+    )
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        model_url = DEPTH_ANYTHING_MODELS[self.model_size]
+        image = context.images.get_pil(self.image.image_name, "RGB")
+
+        loaded_model = context.models.load_remote_model(model_url, DepthAnythingPipeline.load_model)
+
+        with loaded_model as depth_anything_detector:
+            assert isinstance(depth_anything_detector, DepthAnythingPipeline)
+            depth_map = depth_anything_detector.generate_depth(image)
+
+        image_dto = context.images.save(image=depth_map)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/dw_openpose.py
+++ b/invokeai/app/invocations/dw_openpose.py
@@ -0,0 +1,50 @@
+import onnxruntime as ort
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import ImageField, InputField, WithBoard, WithMetadata
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.dw_openpose import DWOpenposeDetector2
+
+
+@invocation(
+    "dw_openpose_detection",
+    title="DW Openpose Detection",
+    tags=["controlnet", "dwpose", "openpose"],
+    category="controlnet",
+    version="1.1.1",
+)
+class DWOpenposeDetectionInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Generates an openpose pose from an image using DWPose"""
+
+    image: ImageField = InputField(description="The image to process")
+    draw_body: bool = InputField(default=True)
+    draw_face: bool = InputField(default=False)
+    draw_hands: bool = InputField(default=False)
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        image = context.images.get_pil(self.image.image_name, "RGB")
+
+        onnx_det_path = context.models.download_and_cache_model(DWOpenposeDetector2.get_model_url_det())
+        onnx_pose_path = context.models.download_and_cache_model(DWOpenposeDetector2.get_model_url_pose())
+
+        loaded_session_det = context.models.load_local_model(
+            onnx_det_path, DWOpenposeDetector2.create_onnx_inference_session
+        )
+        loaded_session_pose = context.models.load_local_model(
+            onnx_pose_path, DWOpenposeDetector2.create_onnx_inference_session
+        )
+
+        with loaded_session_det as session_det, loaded_session_pose as session_pose:
+            assert isinstance(session_det, ort.InferenceSession)
+            assert isinstance(session_pose, ort.InferenceSession)
+            detector = DWOpenposeDetector2(session_det=session_det, session_pose=session_pose)
+            detected_image = detector.run(
+                image,
+                draw_face=self.draw_face,
+                draw_hands=self.draw_hands,
+                draw_body=self.draw_body,
+            )
+        image_dto = context.images.save(image=detected_image)
+
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/fields.py
+++ b/invokeai/app/invocations/fields.py
@@ -1,7 +1,7 @@
 from enum import Enum
 from typing import Any, Callable, Optional, Tuple

-from pydantic import BaseModel, ConfigDict, Field, RootModel, TypeAdapter
+from pydantic import BaseModel, ConfigDict, Field, RootModel, TypeAdapter, model_validator
 from pydantic.fields import _Unset
 from pydantic_core import PydanticUndefined

@@ -40,14 +40,18 @@ class UIType(str, Enum, metaclass=MetaEnum):

    # region Model Field Types
    MainModel = "MainModelField"
+    FluxMainModel = "FluxMainModelField"
    SDXLMainModel = "SDXLMainModelField"
    SDXLRefinerModel = "SDXLRefinerModelField"
    ONNXModel = "ONNXModelField"
    VAEModel = "VAEModelField"
+    FluxVAEModel = "FluxVAEModelField"
    LoRAModel = "LoRAModelField"
    ControlNetModel = "ControlNetModelField"
    IPAdapterModel = "IPAdapterModelField"
    T2IAdapterModel = "T2IAdapterModelField"
+    T5EncoderModel = "T5EncoderModelField"
+    CLIPEmbedModel = "CLIPEmbedModelField"
    SpandrelImageToImageModel = "SpandrelImageToImageModelField"
    # endregion

@@ -125,13 +129,17 @@ class FieldDescriptions:
    negative_cond = "Negative conditioning tensor"
    noise = "Noise tensor"
    clip = "CLIP (tokenizer, text encoder, LoRAs) and skipped layer count"
+    t5_encoder = "T5 tokenizer and text encoder"
+    clip_embed_model = "CLIP Embed loader"
    unet = "UNet (scheduler, LoRAs)"
+    transformer = "Transformer"
    vae = "VAE"
    cond = "Conditioning tensor"
    controlnet_model = "ControlNet model to load"
    vae_model = "VAE model to load"
    lora_model = "LoRA model to load"
    main_model = "Main model (UNet, VAE, CLIP) to load"
+    flux_model = "Flux model (Transformer) to load"
    sdxl_main_model = "SDXL Main model (UNet, VAE, CLIP1, CLIP2) to load"
    sdxl_refiner_model = "SDXL Refiner Main Modde (UNet, VAE, CLIP2) to load"
    onnx_main_model = "ONNX Main model (UNet, VAE, CLIP) to load"
@@ -173,7 +181,7 @@ class FieldDescriptions:
    )
    num_1 = "The first number"
    num_2 = "The second number"
-    mask = "The mask to use for the operation"
+    denoise_mask = "A mask of the region to apply the denoising process to."
    board = "The board to save the image to"
    image = "The image to process"
    tile_size = "Tile size"
@@ -231,6 +239,12 @@ class ColorField(BaseModel):
        return (self.r, self.g, self.b, self.a)


+class FluxConditioningField(BaseModel):
+    """A conditioning tensor primitive value"""
+
+    conditioning_name: str = Field(description="The name of conditioning tensor")
+
+
 class ConditioningField(BaseModel):
    """A conditioning tensor primitive value"""

@@ -242,6 +256,31 @@ class ConditioningField(BaseModel):
    )


+class BoundingBoxField(BaseModel):
+    """A bounding box primitive value."""
+
+    x_min: int = Field(ge=0, description="The minimum x-coordinate of the bounding box (inclusive).")
+    x_max: int = Field(ge=0, description="The maximum x-coordinate of the bounding box (exclusive).")
+    y_min: int = Field(ge=0, description="The minimum y-coordinate of the bounding box (inclusive).")
+    y_max: int = Field(ge=0, description="The maximum y-coordinate of the bounding box (exclusive).")
+
+    score: Optional[float] = Field(
+        default=None,
+        ge=0.0,
+        le=1.0,
+        description="The score associated with the bounding box. In the range [0, 1]. This value is typically set "
+        "when the bounding box was produced by a detector and has an associated confidence score.",
+    )
+
+    @model_validator(mode="after")
+    def check_coords(self):
+        if self.x_min > self.x_max:
+            raise ValueError(f"x_min ({self.x_min}) is greater than x_max ({self.x_max}).")
+        if self.y_min > self.y_max:
+            raise ValueError(f"y_min ({self.y_min}) is greater than y_max ({self.y_max}).")
+        return self
+
+
 class MetadataField(RootModel[dict[str, Any]]):
    """
    Pydantic model for metadata with custom root of type dict[str, Any].
--- a/invokeai/app/invocations/flux_denoise.py
+++ b/invokeai/app/invocations/flux_denoise.py
@@ -0,0 +1,249 @@
+from typing import Callable, Optional
+
+import torch
+import torchvision.transforms as tv_transforms
+from torchvision.transforms.functional import resize as tv_resize
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
+from invokeai.app.invocations.fields import (
+    DenoiseMaskField,
+    FieldDescriptions,
+    FluxConditioningField,
+    Input,
+    InputField,
+    LatentsField,
+    WithBoard,
+    WithMetadata,
+)
+from invokeai.app.invocations.model import TransformerField
+from invokeai.app.invocations.primitives import LatentsOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.flux.denoise import denoise
+from invokeai.backend.flux.inpaint_extension import InpaintExtension
+from invokeai.backend.flux.model import Flux
+from invokeai.backend.flux.sampling_utils import (
+    clip_timestep_schedule,
+    generate_img_ids,
+    get_noise,
+    get_schedule,
+    pack,
+    unpack,
+)
+from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState
+from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo
+from invokeai.backend.util.devices import TorchDevice
+
+
+@invocation(
+    "flux_denoise",
+    title="FLUX Denoise",
+    tags=["image", "flux"],
+    category="image",
+    version="1.0.0",
+    classification=Classification.Prototype,
+)
+class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Run denoising process with a FLUX transformer model."""
+
+    # If latents is provided, this means we are doing image-to-image.
+    latents: Optional[LatentsField] = InputField(
+        default=None,
+        description=FieldDescriptions.latents,
+        input=Input.Connection,
+    )
+    # denoise_mask is used for image-to-image inpainting. Only the masked region is modified.
+    denoise_mask: Optional[DenoiseMaskField] = InputField(
+        default=None,
+        description=FieldDescriptions.denoise_mask,
+        input=Input.Connection,
+    )
+    denoising_start: float = InputField(
+        default=0.0,
+        ge=0,
+        le=1,
+        description=FieldDescriptions.denoising_start,
+    )
+    denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
+    transformer: TransformerField = InputField(
+        description=FieldDescriptions.flux_model,
+        input=Input.Connection,
+        title="Transformer",
+    )
+    positive_text_conditioning: FluxConditioningField = InputField(
+        description=FieldDescriptions.positive_cond, input=Input.Connection
+    )
+    width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.")
+    height: int = InputField(default=1024, multiple_of=16, description="Height of the generated image.")
+    num_steps: int = InputField(
+        default=4, description="Number of diffusion steps. Recommended values are schnell: 4, dev: 50."
+    )
+    guidance: float = InputField(
+        default=4.0,
+        description="The guidance strength. Higher values adhere more strictly to the prompt, and will produce less diverse images. FLUX dev only, ignored for schnell.",
+    )
+    seed: int = InputField(default=0, description="Randomness seed for reproducibility.")
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> LatentsOutput:
+        latents = self._run_diffusion(context)
+        latents = latents.detach().to("cpu")
+
+        name = context.tensors.save(tensor=latents)
+        return LatentsOutput.build(latents_name=name, latents=latents, seed=None)
+
+    def _run_diffusion(
+        self,
+        context: InvocationContext,
+    ):
+        inference_dtype = torch.bfloat16
+
+        # Load the conditioning data.
+        cond_data = context.conditioning.load(self.positive_text_conditioning.conditioning_name)
+        assert len(cond_data.conditionings) == 1
+        flux_conditioning = cond_data.conditionings[0]
+        assert isinstance(flux_conditioning, FLUXConditioningInfo)
+        flux_conditioning = flux_conditioning.to(dtype=inference_dtype)
+        t5_embeddings = flux_conditioning.t5_embeds
+        clip_embeddings = flux_conditioning.clip_embeds
+
+        # Load the input latents, if provided.
+        init_latents = context.tensors.load(self.latents.latents_name) if self.latents else None
+        if init_latents is not None:
+            init_latents = init_latents.to(device=TorchDevice.choose_torch_device(), dtype=inference_dtype)
+
+        # Prepare input noise.
+        noise = get_noise(
+            num_samples=1,
+            height=self.height,
+            width=self.width,
+            device=TorchDevice.choose_torch_device(),
+            dtype=inference_dtype,
+            seed=self.seed,
+        )
+
+        transformer_info = context.models.load(self.transformer.transformer)
+        is_schnell = "schnell" in transformer_info.config.config_path
+
+        # Calculate the timestep schedule.
+        image_seq_len = noise.shape[-1] * noise.shape[-2] // 4
+        timesteps = get_schedule(
+            num_steps=self.num_steps,
+            image_seq_len=image_seq_len,
+            shift=not is_schnell,
+        )
+
+        # Clip the timesteps schedule based on denoising_start and denoising_end.
+        timesteps = clip_timestep_schedule(timesteps, self.denoising_start, self.denoising_end)
+
+        # Prepare input latent image.
+        if init_latents is not None:
+            # If init_latents is provided, we are doing image-to-image.
+
+            if is_schnell:
+                context.logger.warning(
+                    "Running image-to-image with a FLUX schnell model. This is not recommended. The results are likely "
+                    "to be poor. Consider using a FLUX dev model instead."
+                )
+
+            # Noise the orig_latents by the appropriate amount for the first timestep.
+            t_0 = timesteps[0]
+            x = t_0 * noise + (1.0 - t_0) * init_latents
+        else:
+            # init_latents are not provided, so we are not doing image-to-image (i.e. we are starting from pure noise).
+            if self.denoising_start > 1e-5:
+                raise ValueError("denoising_start should be 0 when initial latents are not provided.")
+
+            x = noise
+
+        # If len(timesteps) == 1, then short-circuit. We are just noising the input latents, but not taking any
+        # denoising steps.
+        if len(timesteps) <= 1:
+            return x
+
+        inpaint_mask = self._prep_inpaint_mask(context, x)
+
+        b, _c, h, w = x.shape
+        img_ids = generate_img_ids(h=h, w=w, batch_size=b, device=x.device, dtype=x.dtype)
+
+        bs, t5_seq_len, _ = t5_embeddings.shape
+        txt_ids = torch.zeros(bs, t5_seq_len, 3, dtype=inference_dtype, device=TorchDevice.choose_torch_device())
+
+        # Pack all latent tensors.
+        init_latents = pack(init_latents) if init_latents is not None else None
+        inpaint_mask = pack(inpaint_mask) if inpaint_mask is not None else None
+        noise = pack(noise)
+        x = pack(x)
+
+        # Now that we have 'packed' the latent tensors, verify that we calculated the image_seq_len correctly.
+        assert image_seq_len == x.shape[1]
+
+        # Prepare inpaint extension.
+        inpaint_extension: InpaintExtension | None = None
+        if inpaint_mask is not None:
+            assert init_latents is not None
+            inpaint_extension = InpaintExtension(
+                init_latents=init_latents,
+                inpaint_mask=inpaint_mask,
+                noise=noise,
+            )
+
+        with transformer_info as transformer:
+            assert isinstance(transformer, Flux)
+
+            x = denoise(
+                model=transformer,
+                img=x,
+                img_ids=img_ids,
+                txt=t5_embeddings,
+                txt_ids=txt_ids,
+                vec=clip_embeddings,
+                timesteps=timesteps,
+                step_callback=self._build_step_callback(context),
+                guidance=self.guidance,
+                inpaint_extension=inpaint_extension,
+            )
+
+        x = unpack(x.float(), self.height, self.width)
+        return x
+
+    def _prep_inpaint_mask(self, context: InvocationContext, latents: torch.Tensor) -> torch.Tensor | None:
+        """Prepare the inpaint mask.
+
+        - Loads the mask
+        - Resizes if necessary
+        - Casts to same device/dtype as latents
+        - Expands mask to the same shape as latents so that they line up after 'packing'
+
+        Args:
+            context (InvocationContext): The invocation context, for loading the inpaint mask.
+            latents (torch.Tensor): A latent image tensor. In 'unpacked' format. Used to determine the target shape,
+                device, and dtype for the inpaint mask.
+
+        Returns:
+            torch.Tensor | None: Inpaint mask.
+        """
+        if self.denoise_mask is None:
+            return None
+
+        mask = context.tensors.load(self.denoise_mask.mask_name)
+
+        _, _, latent_height, latent_width = latents.shape
+        mask = tv_resize(
+            img=mask,
+            size=[latent_height, latent_width],
+            interpolation=tv_transforms.InterpolationMode.BILINEAR,
+            antialias=False,
+        )
+
+        mask = mask.to(device=latents.device, dtype=latents.dtype)
+
+        # Expand the inpaint mask to the same shape as `latents` so that when we 'pack' `mask` it lines up with
+        # `latents`.
+        return mask.expand_as(latents)
+
+    def _build_step_callback(self, context: InvocationContext) -> Callable[[PipelineIntermediateState], None]:
+        def step_callback(state: PipelineIntermediateState) -> None:
+            state.latents = unpack(state.latents.float(), self.height, self.width).squeeze()
+            context.util.flux_step_callback(state)
+
+        return step_callback
--- a/invokeai/app/invocations/flux_text_encoder.py
+++ b/invokeai/app/invocations/flux_text_encoder.py
@@ -0,0 +1,92 @@
+from typing import Literal
+
+import torch
+from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
+from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField
+from invokeai.app.invocations.model import CLIPField, T5EncoderField
+from invokeai.app.invocations.primitives import FluxConditioningOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.flux.modules.conditioner import HFEncoder
+from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, FLUXConditioningInfo
+
+
+@invocation(
+    "flux_text_encoder",
+    title="FLUX Text Encoding",
+    tags=["prompt", "conditioning", "flux"],
+    category="conditioning",
+    version="1.0.0",
+    classification=Classification.Prototype,
+)
+class FluxTextEncoderInvocation(BaseInvocation):
+    """Encodes and preps a prompt for a flux image."""
+
+    clip: CLIPField = InputField(
+        title="CLIP",
+        description=FieldDescriptions.clip,
+        input=Input.Connection,
+    )
+    t5_encoder: T5EncoderField = InputField(
+        title="T5Encoder",
+        description=FieldDescriptions.t5_encoder,
+        input=Input.Connection,
+    )
+    t5_max_seq_len: Literal[256, 512] = InputField(
+        description="Max sequence length for the T5 encoder. Expected to be 256 for FLUX schnell models and 512 for FLUX dev models."
+    )
+    prompt: str = InputField(description="Text prompt to encode.")
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> FluxConditioningOutput:
+        # Note: The T5 and CLIP encoding are done in separate functions to ensure that all model references are locally
+        # scoped. This ensures that the T5 model can be freed and gc'd before loading the CLIP model (if necessary).
+        t5_embeddings = self._t5_encode(context)
+        clip_embeddings = self._clip_encode(context)
+        conditioning_data = ConditioningFieldData(
+            conditionings=[FLUXConditioningInfo(clip_embeds=clip_embeddings, t5_embeds=t5_embeddings)]
+        )
+
+        conditioning_name = context.conditioning.save(conditioning_data)
+        return FluxConditioningOutput.build(conditioning_name)
+
+    def _t5_encode(self, context: InvocationContext) -> torch.Tensor:
+        t5_tokenizer_info = context.models.load(self.t5_encoder.tokenizer)
+        t5_text_encoder_info = context.models.load(self.t5_encoder.text_encoder)
+
+        prompt = [self.prompt]
+
+        with (
+            t5_text_encoder_info as t5_text_encoder,
+            t5_tokenizer_info as t5_tokenizer,
+        ):
+            assert isinstance(t5_text_encoder, T5EncoderModel)
+            assert isinstance(t5_tokenizer, T5Tokenizer)
+
+            t5_encoder = HFEncoder(t5_text_encoder, t5_tokenizer, False, self.t5_max_seq_len)
+
+            prompt_embeds = t5_encoder(prompt)
+
+        assert isinstance(prompt_embeds, torch.Tensor)
+        return prompt_embeds
+
+    def _clip_encode(self, context: InvocationContext) -> torch.Tensor:
+        clip_tokenizer_info = context.models.load(self.clip.tokenizer)
+        clip_text_encoder_info = context.models.load(self.clip.text_encoder)
+
+        prompt = [self.prompt]
+
+        with (
+            clip_text_encoder_info as clip_text_encoder,
+            clip_tokenizer_info as clip_tokenizer,
+        ):
+            assert isinstance(clip_text_encoder, CLIPTextModel)
+            assert isinstance(clip_tokenizer, CLIPTokenizer)
+
+            clip_encoder = HFEncoder(clip_text_encoder, clip_tokenizer, True, 77)
+
+            pooled_prompt_embeds = clip_encoder(prompt)
+
+        assert isinstance(pooled_prompt_embeds, torch.Tensor)
+        return pooled_prompt_embeds
--- a/invokeai/app/invocations/flux_vae_decode.py
+++ b/invokeai/app/invocations/flux_vae_decode.py
@@ -0,0 +1,60 @@
+import torch
+from einops import rearrange
+from PIL import Image
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import (
+    FieldDescriptions,
+    Input,
+    InputField,
+    LatentsField,
+    WithBoard,
+    WithMetadata,
+)
+from invokeai.app.invocations.model import VAEField
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.flux.modules.autoencoder import AutoEncoder
+from invokeai.backend.model_manager.load.load_base import LoadedModel
+from invokeai.backend.util.devices import TorchDevice
+
+
+@invocation(
+    "flux_vae_decode",
+    title="FLUX Latents to Image",
+    tags=["latents", "image", "vae", "l2i", "flux"],
+    category="latents",
+    version="1.0.0",
+)
+class FluxVaeDecodeInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Generates an image from latents."""
+
+    latents: LatentsField = InputField(
+        description=FieldDescriptions.latents,
+        input=Input.Connection,
+    )
+    vae: VAEField = InputField(
+        description=FieldDescriptions.vae,
+        input=Input.Connection,
+    )
+
+    def _vae_decode(self, vae_info: LoadedModel, latents: torch.Tensor) -> Image.Image:
+        with vae_info as vae:
+            assert isinstance(vae, AutoEncoder)
+            latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=TorchDevice.choose_torch_dtype())
+            img = vae.decode(latents)
+
+        img = img.clamp(-1, 1)
+        img = rearrange(img[0], "c h w -> h w c")  # noqa: F821
+        img_pil = Image.fromarray((127.5 * (img + 1.0)).byte().cpu().numpy())
+        return img_pil
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        latents = context.tensors.load(self.latents.latents_name)
+        vae_info = context.models.load(self.vae.vae)
+        image = self._vae_decode(vae_info=vae_info, latents=latents)
+
+        TorchDevice.empty_cache()
+        image_dto = context.images.save(image=image)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/flux_vae_encode.py
+++ b/invokeai/app/invocations/flux_vae_encode.py
@@ -0,0 +1,67 @@
+import einops
+import torch
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import (
+    FieldDescriptions,
+    ImageField,
+    Input,
+    InputField,
+)
+from invokeai.app.invocations.model import VAEField
+from invokeai.app.invocations.primitives import LatentsOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.flux.modules.autoencoder import AutoEncoder
+from invokeai.backend.model_manager import LoadedModel
+from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor
+from invokeai.backend.util.devices import TorchDevice
+
+
+@invocation(
+    "flux_vae_encode",
+    title="FLUX Image to Latents",
+    tags=["latents", "image", "vae", "i2l", "flux"],
+    category="latents",
+    version="1.0.0",
+)
+class FluxVaeEncodeInvocation(BaseInvocation):
+    """Encodes an image into latents."""
+
+    image: ImageField = InputField(
+        description="The image to encode.",
+    )
+    vae: VAEField = InputField(
+        description=FieldDescriptions.vae,
+        input=Input.Connection,
+    )
+
+    @staticmethod
+    def vae_encode(vae_info: LoadedModel, image_tensor: torch.Tensor) -> torch.Tensor:
+        # TODO(ryand): Expose seed parameter at the invocation level.
+        # TODO(ryand): Write a util function for generating random tensors that is consistent across devices / dtypes.
+        # There's a starting point in get_noise(...), but it needs to be extracted and generalized. This function
+        # should be used for VAE encode sampling.
+        generator = torch.Generator(device=TorchDevice.choose_torch_device()).manual_seed(0)
+        with vae_info as vae:
+            assert isinstance(vae, AutoEncoder)
+            image_tensor = image_tensor.to(
+                device=TorchDevice.choose_torch_device(), dtype=TorchDevice.choose_torch_dtype()
+            )
+            latents = vae.encode(image_tensor, sample=True, generator=generator)
+            return latents
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> LatentsOutput:
+        image = context.images.get_pil(self.image.image_name)
+
+        vae_info = context.models.load(self.vae.vae)
+
+        image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
+        if image_tensor.dim() == 3:
+            image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w")
+
+        latents = self.vae_encode(vae_info=vae_info, image_tensor=image_tensor)
+
+        latents = latents.to("cpu")
+        name = context.tensors.save(tensor=latents)
+        return LatentsOutput.build(latents_name=name, latents=latents, seed=None)
--- a/invokeai/app/invocations/grounding_dino.py
+++ b/invokeai/app/invocations/grounding_dino.py
@@ -0,0 +1,100 @@
+from pathlib import Path
+from typing import Literal
+
+import torch
+from PIL import Image
+from transformers import pipeline
+from transformers.pipelines import ZeroShotObjectDetectionPipeline
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import BoundingBoxField, ImageField, InputField
+from invokeai.app.invocations.primitives import BoundingBoxCollectionOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.grounding_dino.detection_result import DetectionResult
+from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline
+
+GroundingDinoModelKey = Literal["grounding-dino-tiny", "grounding-dino-base"]
+GROUNDING_DINO_MODEL_IDS: dict[GroundingDinoModelKey, str] = {
+    "grounding-dino-tiny": "IDEA-Research/grounding-dino-tiny",
+    "grounding-dino-base": "IDEA-Research/grounding-dino-base",
+}
+
+
+@invocation(
+    "grounding_dino",
+    title="Grounding DINO (Text Prompt Object Detection)",
+    tags=["prompt", "object detection"],
+    category="image",
+    version="1.0.0",
+)
+class GroundingDinoInvocation(BaseInvocation):
+    """Runs a Grounding DINO model. Performs zero-shot bounding-box object detection from a text prompt."""
+
+    # Reference:
+    # - https://arxiv.org/pdf/2303.05499
+    # - https://huggingface.co/docs/transformers/v4.43.3/en/model_doc/grounding-dino#grounded-sam
+    # - https://github.com/NielsRogge/Transformers-Tutorials/blob/a39f33ac1557b02ebfb191ea7753e332b5ca933f/Grounding%20DINO/GroundingDINO_with_Segment_Anything.ipynb
+
+    model: GroundingDinoModelKey = InputField(description="The Grounding DINO model to use.")
+    prompt: str = InputField(description="The prompt describing the object to segment.")
+    image: ImageField = InputField(description="The image to segment.")
+    detection_threshold: float = InputField(
+        description="The detection threshold for the Grounding DINO model. All detected bounding boxes with scores above this threshold will be returned.",
+        ge=0.0,
+        le=1.0,
+        default=0.3,
+    )
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> BoundingBoxCollectionOutput:
+        # The model expects a 3-channel RGB image.
+        image_pil = context.images.get_pil(self.image.image_name, mode="RGB")
+
+        detections = self._detect(
+            context=context, image=image_pil, labels=[self.prompt], threshold=self.detection_threshold
+        )
+
+        # Convert detections to BoundingBoxCollectionOutput.
+        bounding_boxes: list[BoundingBoxField] = []
+        for detection in detections:
+            bounding_boxes.append(
+                BoundingBoxField(
+                    x_min=detection.box.xmin,
+                    x_max=detection.box.xmax,
+                    y_min=detection.box.ymin,
+                    y_max=detection.box.ymax,
+                    score=detection.score,
+                )
+            )
+        return BoundingBoxCollectionOutput(collection=bounding_boxes)
+
+    @staticmethod
+    def _load_grounding_dino(model_path: Path):
+        grounding_dino_pipeline = pipeline(
+            model=str(model_path),
+            task="zero-shot-object-detection",
+            local_files_only=True,
+            # TODO(ryand): Setting the torch_dtype here doesn't work. Investigate whether fp16 is supported by the
+            # model, and figure out how to make it work in the pipeline.
+            # torch_dtype=TorchDevice.choose_torch_dtype(),
+        )
+        assert isinstance(grounding_dino_pipeline, ZeroShotObjectDetectionPipeline)
+        return GroundingDinoPipeline(grounding_dino_pipeline)
+
+    def _detect(
+        self,
+        context: InvocationContext,
+        image: Image.Image,
+        labels: list[str],
+        threshold: float = 0.3,
+    ) -> list[DetectionResult]:
+        """Use Grounding DINO to detect bounding boxes for a set of labels in an image."""
+        # TODO(ryand): I copied this "."-handling logic from the transformers example code. Test it and see if it
+        # actually makes a difference.
+        labels = [label if label.endswith(".") else label + "." for label in labels]
+
+        with context.models.load_remote_model(
+            source=GROUNDING_DINO_MODEL_IDS[self.model], loader=GroundingDinoInvocation._load_grounding_dino
+        ) as detector:
+            assert isinstance(detector, GroundingDinoPipeline)
+            return detector.detect(image=image, candidate_labels=labels, threshold=threshold)
--- a/invokeai/app/invocations/hed.py
+++ b/invokeai/app/invocations/hed.py
@@ -0,0 +1,33 @@
+from builtins import bool
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import FieldDescriptions, ImageField, InputField, WithBoard, WithMetadata
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.hed import ControlNetHED_Apache2, HEDEdgeDetector
+
+
+@invocation(
+    "hed_edge_detection",
+    title="HED Edge Detection",
+    tags=["controlnet", "hed", "softedge"],
+    category="controlnet",
+    version="1.0.0",
+)
+class HEDEdgeDetectionInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Geneartes an edge map using the HED (softedge) model."""
+
+    image: ImageField = InputField(description="The image to process")
+    scribble: bool = InputField(default=False, description=FieldDescriptions.scribble_mode)
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        image = context.images.get_pil(self.image.image_name, "RGB")
+        loaded_model = context.models.load_remote_model(HEDEdgeDetector.get_model_url(), HEDEdgeDetector.load_model)
+
+        with loaded_model as model:
+            assert isinstance(model, ControlNetHED_Apache2)
+            hed_processor = HEDEdgeDetector(model)
+            edge_map = hed_processor.run(image=image, scribble=self.scribble)
+
+        image_dto = context.images.save(image=edge_map)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/image.py
+++ b/invokeai/app/invocations/image.py
@@ -6,13 +6,19 @@ import cv2
 import numpy
 from PIL import Image, ImageChops, ImageFilter, ImageOps

-from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
+from invokeai.app.invocations.baseinvocation import (
+    BaseInvocation,
+    Classification,
+    invocation,
+    invocation_output,
+)
 from invokeai.app.invocations.constants import IMAGE_MODES
 from invokeai.app.invocations.fields import (
    ColorField,
    FieldDescriptions,
    ImageField,
    InputField,
+    OutputField,
    WithBoard,
    WithMetadata,
 )
@@ -1007,3 +1013,62 @@ class MaskFromIDInvocation(BaseInvocation, WithMetadata, WithBoard):
        image_dto = context.images.save(image=mask, image_category=ImageCategory.MASK)

        return ImageOutput.build(image_dto)
+
+
+@invocation_output("canvas_v2_mask_and_crop_output")
+class CanvasV2MaskAndCropOutput(ImageOutput):
+    offset_x: int = OutputField(description="The x offset of the image, after cropping")
+    offset_y: int = OutputField(description="The y offset of the image, after cropping")
+
+
+@invocation(
+    "canvas_v2_mask_and_crop",
+    title="Canvas V2 Mask and Crop",
+    tags=["image", "mask", "id"],
+    category="image",
+    version="1.0.0",
+    classification=Classification.Prototype,
+)
+class CanvasV2MaskAndCropInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Handles Canvas V2 image output masking and cropping"""
+
+    source_image: ImageField | None = InputField(
+        default=None,
+        description="The source image onto which the masked generated image is pasted. If omitted, the masked generated image is returned with transparency.",
+    )
+    generated_image: ImageField = InputField(description="The image to apply the mask to")
+    mask: ImageField = InputField(description="The mask to apply")
+    mask_blur: int = InputField(default=0, ge=0, description="The amount to blur the mask by")
+
+    def _prepare_mask(self, mask: Image.Image) -> Image.Image:
+        mask_array = numpy.array(mask)
+        kernel = numpy.ones((self.mask_blur, self.mask_blur), numpy.uint8)
+        dilated_mask_array = cv2.erode(mask_array, kernel, iterations=3)
+        dilated_mask = Image.fromarray(dilated_mask_array)
+        if self.mask_blur > 0:
+            mask = dilated_mask.filter(ImageFilter.GaussianBlur(self.mask_blur))
+        return ImageOps.invert(mask.convert("L"))
+
+    def invoke(self, context: InvocationContext) -> CanvasV2MaskAndCropOutput:
+        mask = self._prepare_mask(context.images.get_pil(self.mask.image_name))
+
+        if self.source_image:
+            generated_image = context.images.get_pil(self.generated_image.image_name)
+            source_image = context.images.get_pil(self.source_image.image_name)
+            source_image.paste(generated_image, (0, 0), mask)
+            image_dto = context.images.save(image=source_image)
+        else:
+            generated_image = context.images.get_pil(self.generated_image.image_name)
+            generated_image.putalpha(mask)
+            image_dto = context.images.save(image=generated_image)
+
+        # bbox = image.getbbox()
+        # image = image.crop(bbox)
+
+        return CanvasV2MaskAndCropOutput(
+            image=ImageField(image_name=image_dto.image_name),
+            offset_x=0,
+            offset_y=0,
+            width=image_dto.width,
+            height=image_dto.height,
+        )
--- a/invokeai/app/invocations/latents_to_image.py
+++ b/invokeai/app/invocations/latents_to_image.py
@@ -24,7 +24,7 @@ from invokeai.app.invocations.fields import (
 from invokeai.app.invocations.model import VAEField
 from invokeai.app.invocations.primitives import ImageOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.stable_diffusion import set_seamless
+from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
 from invokeai.backend.stable_diffusion.vae_tiling import patch_vae_tiling_params
 from invokeai.backend.util.devices import TorchDevice

@@ -59,7 +59,7 @@ class LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):

        vae_info = context.models.load(self.vae.vae)
        assert isinstance(vae_info.model, (AutoencoderKL, AutoencoderTiny))
-        with set_seamless(vae_info.model, self.vae.seamless_axes), vae_info as vae:
+        with SeamlessExt.static_patch_model(vae_info.model, self.vae.seamless_axes), vae_info as vae:
            assert isinstance(vae, (AutoencoderKL, AutoencoderTiny))
            latents = latents.to(vae.device)
            if self.fp32:
--- a/invokeai/app/invocations/lineart.py
+++ b/invokeai/app/invocations/lineart.py
@@ -0,0 +1,34 @@
+from builtins import bool
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import ImageField, InputField, WithBoard, WithMetadata
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.lineart import Generator, LineartEdgeDetector
+
+
+@invocation(
+    "lineart_edge_detection",
+    title="Lineart Edge Detection",
+    tags=["controlnet", "lineart"],
+    category="controlnet",
+    version="1.0.0",
+)
+class LineartEdgeDetectionInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Generates an edge map using the Lineart model."""
+
+    image: ImageField = InputField(description="The image to process")
+    coarse: bool = InputField(default=False, description="Whether to use coarse mode")
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        image = context.images.get_pil(self.image.image_name, "RGB")
+        model_url = LineartEdgeDetector.get_model_url(self.coarse)
+        loaded_model = context.models.load_remote_model(model_url, LineartEdgeDetector.load_model)
+
+        with loaded_model as model:
+            assert isinstance(model, Generator)
+            detector = LineartEdgeDetector(model)
+            edge_map = detector.run(image=image)
+
+        image_dto = context.images.save(image=edge_map)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/lineart_anime.py
+++ b/invokeai/app/invocations/lineart_anime.py
@@ -0,0 +1,31 @@
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import ImageField, InputField, WithBoard, WithMetadata
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.lineart_anime import LineartAnimeEdgeDetector, UnetGenerator
+
+
+@invocation(
+    "lineart_anime_edge_detection",
+    title="Lineart Anime Edge Detection",
+    tags=["controlnet", "lineart"],
+    category="controlnet",
+    version="1.0.0",
+)
+class LineartAnimeEdgeDetectionInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Geneartes an edge map using the Lineart model."""
+
+    image: ImageField = InputField(description="The image to process")
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        image = context.images.get_pil(self.image.image_name, "RGB")
+        model_url = LineartAnimeEdgeDetector.get_model_url()
+        loaded_model = context.models.load_remote_model(model_url, LineartAnimeEdgeDetector.load_model)
+
+        with loaded_model as model:
+            assert isinstance(model, UnetGenerator)
+            detector = LineartAnimeEdgeDetector(model)
+            edge_map = detector.run(image=image)
+
+        image_dto = context.images.save(image=edge_map)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/mask.py
+++ b/invokeai/app/invocations/mask.py
@@ -1,9 +1,10 @@
 import numpy as np
 import torch
+from PIL import Image

 from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, InvocationContext, invocation
-from invokeai.app.invocations.fields import ImageField, InputField, TensorField, WithMetadata
-from invokeai.app.invocations.primitives import MaskOutput
+from invokeai.app.invocations.fields import ImageField, InputField, TensorField, WithBoard, WithMetadata
+from invokeai.app.invocations.primitives import ImageOutput, MaskOutput


@invocation(
@@ -118,3 +119,32 @@ class ImageMaskToTensorInvocation(BaseInvocation, WithMetadata):
            height=mask.shape[1],
            width=mask.shape[2],
        )
+
+
+@invocation(
+    "tensor_mask_to_image",
+    title="Tensor Mask to Image",
+    tags=["mask"],
+    category="mask",
+    version="1.1.0",
+)
+class MaskTensorToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Convert a mask tensor to an image."""
+
+    mask: TensorField = InputField(description="The mask tensor to convert.")
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        mask = context.tensors.load(self.mask.tensor_name)
+
+        # Squeeze the channel dimension if it exists.
+        if mask.dim() == 3:
+            mask = mask.squeeze(0)
+
+        # Ensure that the mask is binary.
+        if mask.dtype != torch.bool:
+            mask = mask > 0.5
+        mask_np = (mask.float() * 255).byte().cpu().numpy()
+
+        mask_pil = Image.fromarray(mask_np, mode="L")
+        image_dto = context.images.save(image=mask_pil)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/mediapipe_face.py
+++ b/invokeai/app/invocations/mediapipe_face.py
@@ -0,0 +1,26 @@
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import ImageField, InputField, WithBoard, WithMetadata
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.mediapipe_face import detect_faces
+
+
+@invocation(
+    "mediapipe_face_detection",
+    title="MediaPipe Face Detection",
+    tags=["controlnet", "face"],
+    category="controlnet",
+    version="1.0.0",
+)
+class MediaPipeFaceDetectionInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Detects faces using MediaPipe."""
+
+    image: ImageField = InputField(description="The image to process")
+    max_faces: int = InputField(default=1, ge=1, description="Maximum number of faces to detect")
+    min_confidence: float = InputField(default=0.5, ge=0, le=1, description="Minimum confidence for face detection")
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        image = context.images.get_pil(self.image.image_name, "RGB")
+        detected_faces = detect_faces(image=image, max_faces=self.max_faces, min_confidence=self.min_confidence)
+        image_dto = context.images.save(image=detected_faces)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/mlsd.py
+++ b/invokeai/app/invocations/mlsd.py
@@ -0,0 +1,39 @@
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import ImageField, InputField, WithBoard, WithMetadata
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.mlsd import MLSDDetector
+from invokeai.backend.image_util.mlsd.models.mbv2_mlsd_large import MobileV2_MLSD_Large
+
+
+@invocation(
+    "mlsd_detection",
+    title="MLSD Detection",
+    tags=["controlnet", "mlsd", "edge"],
+    category="controlnet",
+    version="1.0.0",
+)
+class MLSDDetectionInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Generates an line segment map using MLSD."""
+
+    image: ImageField = InputField(description="The image to process")
+    score_threshold: float = InputField(
+        default=0.1, ge=0, description="The threshold used to score points when determining line segments"
+    )
+    distance_threshold: float = InputField(
+        default=20.0,
+        ge=0,
+        description="Threshold for including a line segment - lines shorter than this distance will be discarded",
+    )
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        image = context.images.get_pil(self.image.image_name, "RGB")
+        loaded_model = context.models.load_remote_model(MLSDDetector.get_model_url(), MLSDDetector.load_model)
+
+        with loaded_model as model:
+            assert isinstance(model, MobileV2_MLSD_Large)
+            detector = MLSDDetector(model)
+            edge_map = detector.run(image, self.score_threshold, self.distance_threshold)
+
+        image_dto = context.images.save(image=edge_map)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/model.py
+++ b/invokeai/app/invocations/model.py
@@ -1,5 +1,5 @@
 import copy
-from typing import List, Optional
+from typing import List, Literal, Optional

 from pydantic import BaseModel, Field

@@ -13,7 +13,14 @@ from invokeai.app.invocations.baseinvocation import (
 from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.shared.models import FreeUConfig
-from invokeai.backend.model_manager.config import AnyModelConfig, BaseModelType, ModelType, SubModelType
+from invokeai.backend.flux.util import max_seq_lengths
+from invokeai.backend.model_manager.config import (
+    AnyModelConfig,
+    BaseModelType,
+    CheckpointConfigBase,
+    ModelType,
+    SubModelType,
+)


 class ModelIdentifierField(BaseModel):
@@ -60,6 +67,15 @@ class CLIPField(BaseModel):
    loras: List[LoRAField] = Field(description="LoRAs to apply on model loading")


+class TransformerField(BaseModel):
+    transformer: ModelIdentifierField = Field(description="Info to load Transformer submodel")
+
+
+class T5EncoderField(BaseModel):
+    tokenizer: ModelIdentifierField = Field(description="Info to load tokenizer submodel")
+    text_encoder: ModelIdentifierField = Field(description="Info to load text_encoder submodel")
+
+
 class VAEField(BaseModel):
    vae: ModelIdentifierField = Field(description="Info to load vae submodel")
    seamless_axes: List[str] = Field(default_factory=list, description='Axes("x" and "y") to which apply seamless')
@@ -122,6 +138,78 @@ class ModelIdentifierInvocation(BaseInvocation):
        return ModelIdentifierOutput(model=self.model)


+@invocation_output("flux_model_loader_output")
+class FluxModelLoaderOutput(BaseInvocationOutput):
+    """Flux base model loader output"""
+
+    transformer: TransformerField = OutputField(description=FieldDescriptions.transformer, title="Transformer")
+    clip: CLIPField = OutputField(description=FieldDescriptions.clip, title="CLIP")
+    t5_encoder: T5EncoderField = OutputField(description=FieldDescriptions.t5_encoder, title="T5 Encoder")
+    vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE")
+    max_seq_len: Literal[256, 512] = OutputField(
+        description="The max sequence length to used for the T5 encoder. (256 for schnell transformer, 512 for dev transformer)",
+        title="Max Seq Length",
+    )
+
+
+@invocation(
+    "flux_model_loader",
+    title="Flux Main Model",
+    tags=["model", "flux"],
+    category="model",
+    version="1.0.4",
+    classification=Classification.Prototype,
+)
+class FluxModelLoaderInvocation(BaseInvocation):
+    """Loads a flux base model, outputting its submodels."""
+
+    model: ModelIdentifierField = InputField(
+        description=FieldDescriptions.flux_model,
+        ui_type=UIType.FluxMainModel,
+        input=Input.Direct,
+    )
+
+    t5_encoder_model: ModelIdentifierField = InputField(
+        description=FieldDescriptions.t5_encoder, ui_type=UIType.T5EncoderModel, input=Input.Direct, title="T5 Encoder"
+    )
+
+    clip_embed_model: ModelIdentifierField = InputField(
+        description=FieldDescriptions.clip_embed_model,
+        ui_type=UIType.CLIPEmbedModel,
+        input=Input.Direct,
+        title="CLIP Embed",
+    )
+
+    vae_model: ModelIdentifierField = InputField(
+        description=FieldDescriptions.vae_model, ui_type=UIType.FluxVAEModel, title="VAE"
+    )
+
+    def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput:
+        for key in [self.model.key, self.t5_encoder_model.key, self.clip_embed_model.key, self.vae_model.key]:
+            if not context.models.exists(key):
+                raise ValueError(f"Unknown model: {key}")
+
+        transformer = self.model.model_copy(update={"submodel_type": SubModelType.Transformer})
+        vae = self.vae_model.model_copy(update={"submodel_type": SubModelType.VAE})
+
+        tokenizer = self.clip_embed_model.model_copy(update={"submodel_type": SubModelType.Tokenizer})
+        clip_encoder = self.clip_embed_model.model_copy(update={"submodel_type": SubModelType.TextEncoder})
+
+        tokenizer2 = self.t5_encoder_model.model_copy(update={"submodel_type": SubModelType.Tokenizer2})
+        t5_encoder = self.t5_encoder_model.model_copy(update={"submodel_type": SubModelType.TextEncoder2})
+
+        transformer_config = context.models.get_config(transformer)
+        assert isinstance(transformer_config, CheckpointConfigBase)
+
+        return FluxModelLoaderOutput(
+            transformer=TransformerField(transformer=transformer),
+            clip=CLIPField(tokenizer=tokenizer, text_encoder=clip_encoder, loras=[], skipped_layers=0),
+            t5_encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder),
+            vae=VAEField(vae=vae),
+            max_seq_len=max_seq_lengths[transformer_config.config_path],
+        )
+
+
@invocation(
    "main_model_loader",
    title="Main Model",
--- a/invokeai/app/invocations/normal_bae.py
+++ b/invokeai/app/invocations/normal_bae.py
@@ -0,0 +1,31 @@
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import ImageField, InputField, WithBoard, WithMetadata
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.normal_bae import NormalMapDetector
+from invokeai.backend.image_util.normal_bae.nets.NNET import NNET
+
+
+@invocation(
+    "normal_map",
+    title="Normal Map",
+    tags=["controlnet", "normal"],
+    category="controlnet",
+    version="1.0.0",
+)
+class NormalMapInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Generates a normal map."""
+
+    image: ImageField = InputField(description="The image to process")
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        image = context.images.get_pil(self.image.image_name, "RGB")
+        loaded_model = context.models.load_remote_model(NormalMapDetector.get_model_url(), NormalMapDetector.load_model)
+
+        with loaded_model as model:
+            assert isinstance(model, NNET)
+            detector = NormalMapDetector(model)
+            normal_map = detector.run(image=image)
+
+        image_dto = context.images.save(image=normal_map)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/pidi.py
+++ b/invokeai/app/invocations/pidi.py
@@ -0,0 +1,33 @@
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import FieldDescriptions, ImageField, InputField, WithBoard, WithMetadata
+from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.pidi import PIDINetDetector
+from invokeai.backend.image_util.pidi.model import PiDiNet
+
+
+@invocation(
+    "pidi_edge_detection",
+    title="PiDiNet Edge Detection",
+    tags=["controlnet", "edge"],
+    category="controlnet",
+    version="1.0.0",
+)
+class PiDiNetEdgeDetectionInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Generates an edge map using PiDiNet."""
+
+    image: ImageField = InputField(description="The image to process")
+    quantize_edges: bool = InputField(default=False, description=FieldDescriptions.safe_mode)
+    scribble: bool = InputField(default=False, description=FieldDescriptions.scribble_mode)
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        image = context.images.get_pil(self.image.image_name, "RGB")
+        loaded_model = context.models.load_remote_model(PIDINetDetector.get_model_url(), PIDINetDetector.load_model)
+
+        with loaded_model as model:
+            assert isinstance(model, PiDiNet)
+            detector = PIDINetDetector(model)
+            edge_map = detector.run(image=image, quantize_edges=self.quantize_edges, scribble=self.scribble)
+
+        image_dto = context.images.save(image=edge_map)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/primitives.py
+++ b/invokeai/app/invocations/primitives.py
@@ -7,10 +7,12 @@ import torch
 from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
 from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
 from invokeai.app.invocations.fields import (
+    BoundingBoxField,
    ColorField,
    ConditioningField,
    DenoiseMaskField,
    FieldDescriptions,
+    FluxConditioningField,
    ImageField,
    Input,
    InputField,
@@ -413,6 +415,17 @@ class MaskOutput(BaseInvocationOutput):
    height: int = OutputField(description="The height of the mask in pixels.")


+@invocation_output("flux_conditioning_output")
+class FluxConditioningOutput(BaseInvocationOutput):
+    """Base class for nodes that output a single conditioning tensor"""
+
+    conditioning: FluxConditioningField = OutputField(description=FieldDescriptions.cond)
+
+    @classmethod
+    def build(cls, conditioning_name: str) -> "FluxConditioningOutput":
+        return cls(conditioning=FluxConditioningField(conditioning_name=conditioning_name))
+
+
@invocation_output("conditioning_output")
 class ConditioningOutput(BaseInvocationOutput):
    """Base class for nodes that output a single conditioning tensor"""
@@ -469,3 +482,42 @@ class ConditioningCollectionInvocation(BaseInvocation):


 # endregion
+
+# region BoundingBox
+
+
+@invocation_output("bounding_box_output")
+class BoundingBoxOutput(BaseInvocationOutput):
+    """Base class for nodes that output a single bounding box"""
+
+    bounding_box: BoundingBoxField = OutputField(description="The output bounding box.")
+
+
+@invocation_output("bounding_box_collection_output")
+class BoundingBoxCollectionOutput(BaseInvocationOutput):
+    """Base class for nodes that output a collection of bounding boxes"""
+
+    collection: list[BoundingBoxField] = OutputField(description="The output bounding boxes.", title="Bounding Boxes")
+
+
+@invocation(
+    "bounding_box",
+    title="Bounding Box",
+    tags=["primitives", "segmentation", "collection", "bounding box"],
+    category="primitives",
+    version="1.0.0",
+)
+class BoundingBoxInvocation(BaseInvocation):
+    """Create a bounding box manually by supplying box coordinates"""
+
+    x_min: int = InputField(default=0, description="x-coordinate of the bounding box's top left vertex")
+    y_min: int = InputField(default=0, description="y-coordinate of the bounding box's top left vertex")
+    x_max: int = InputField(default=0, description="x-coordinate of the bounding box's bottom right vertex")
+    y_max: int = InputField(default=0, description="y-coordinate of the bounding box's bottom right vertex")
+
+    def invoke(self, context: InvocationContext) -> BoundingBoxOutput:
+        bounding_box = BoundingBoxField(x_min=self.x_min, y_min=self.y_min, x_max=self.x_max, y_max=self.y_max)
+        return BoundingBoxOutput(bounding_box=bounding_box)
+
+
+# endregion
--- a/invokeai/app/invocations/segment_anything.py
+++ b/invokeai/app/invocations/segment_anything.py
@@ -0,0 +1,161 @@
+from pathlib import Path
+from typing import Literal
+
+import numpy as np
+import torch
+from PIL import Image
+from transformers import AutoModelForMaskGeneration, AutoProcessor
+from transformers.models.sam import SamModel
+from transformers.models.sam.processing_sam import SamProcessor
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import BoundingBoxField, ImageField, InputField, TensorField
+from invokeai.app.invocations.primitives import MaskOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.segment_anything.mask_refinement import mask_to_polygon, polygon_to_mask
+from invokeai.backend.image_util.segment_anything.segment_anything_pipeline import SegmentAnythingPipeline
+
+SegmentAnythingModelKey = Literal["segment-anything-base", "segment-anything-large", "segment-anything-huge"]
+SEGMENT_ANYTHING_MODEL_IDS: dict[SegmentAnythingModelKey, str] = {
+    "segment-anything-base": "facebook/sam-vit-base",
+    "segment-anything-large": "facebook/sam-vit-large",
+    "segment-anything-huge": "facebook/sam-vit-huge",
+}
+
+
+@invocation(
+    "segment_anything",
+    title="Segment Anything",
+    tags=["prompt", "segmentation"],
+    category="segmentation",
+    version="1.0.0",
+)
+class SegmentAnythingInvocation(BaseInvocation):
+    """Runs a Segment Anything Model."""
+
+    # Reference:
+    # - https://arxiv.org/pdf/2304.02643
+    # - https://huggingface.co/docs/transformers/v4.43.3/en/model_doc/grounding-dino#grounded-sam
+    # - https://github.com/NielsRogge/Transformers-Tutorials/blob/a39f33ac1557b02ebfb191ea7753e332b5ca933f/Grounding%20DINO/GroundingDINO_with_Segment_Anything.ipynb
+
+    model: SegmentAnythingModelKey = InputField(description="The Segment Anything model to use.")
+    image: ImageField = InputField(description="The image to segment.")
+    bounding_boxes: list[BoundingBoxField] = InputField(description="The bounding boxes to prompt the SAM model with.")
+    apply_polygon_refinement: bool = InputField(
+        description="Whether to apply polygon refinement to the masks. This will smooth the edges of the masks slightly and ensure that each mask consists of a single closed polygon (before merging).",
+        default=True,
+    )
+    mask_filter: Literal["all", "largest", "highest_box_score"] = InputField(
+        description="The filtering to apply to the detected masks before merging them into a final output.",
+        default="all",
+    )
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> MaskOutput:
+        # The models expect a 3-channel RGB image.
+        image_pil = context.images.get_pil(self.image.image_name, mode="RGB")
+
+        if len(self.bounding_boxes) == 0:
+            combined_mask = torch.zeros(image_pil.size[::-1], dtype=torch.bool)
+        else:
+            masks = self._segment(context=context, image=image_pil)
+            masks = self._filter_masks(masks=masks, bounding_boxes=self.bounding_boxes)
+
+            # masks contains bool values, so we merge them via max-reduce.
+            combined_mask, _ = torch.stack(masks).max(dim=0)
+
+        mask_tensor_name = context.tensors.save(combined_mask)
+        height, width = combined_mask.shape
+        return MaskOutput(mask=TensorField(tensor_name=mask_tensor_name), width=width, height=height)
+
+    @staticmethod
+    def _load_sam_model(model_path: Path):
+        sam_model = AutoModelForMaskGeneration.from_pretrained(
+            model_path,
+            local_files_only=True,
+            # TODO(ryand): Setting the torch_dtype here doesn't work. Investigate whether fp16 is supported by the
+            # model, and figure out how to make it work in the pipeline.
+            # torch_dtype=TorchDevice.choose_torch_dtype(),
+        )
+        assert isinstance(sam_model, SamModel)
+
+        sam_processor = AutoProcessor.from_pretrained(model_path, local_files_only=True)
+        assert isinstance(sam_processor, SamProcessor)
+        return SegmentAnythingPipeline(sam_model=sam_model, sam_processor=sam_processor)
+
+    def _segment(
+        self,
+        context: InvocationContext,
+        image: Image.Image,
+    ) -> list[torch.Tensor]:
+        """Use Segment Anything (SAM) to generate masks given an image + a set of bounding boxes."""
+        # Convert the bounding boxes to the SAM input format.
+        sam_bounding_boxes = [[bb.x_min, bb.y_min, bb.x_max, bb.y_max] for bb in self.bounding_boxes]
+
+        with (
+            context.models.load_remote_model(
+                source=SEGMENT_ANYTHING_MODEL_IDS[self.model], loader=SegmentAnythingInvocation._load_sam_model
+            ) as sam_pipeline,
+        ):
+            assert isinstance(sam_pipeline, SegmentAnythingPipeline)
+            masks = sam_pipeline.segment(image=image, bounding_boxes=sam_bounding_boxes)
+
+        masks = self._process_masks(masks)
+        if self.apply_polygon_refinement:
+            masks = self._apply_polygon_refinement(masks)
+
+        return masks
+
+    def _process_masks(self, masks: torch.Tensor) -> list[torch.Tensor]:
+        """Convert the tensor output from the Segment Anything model from a tensor of shape
+        [num_masks, channels, height, width] to a list of tensors of shape [height, width].
+        """
+        assert masks.dtype == torch.bool
+        # [num_masks, channels, height, width] -> [num_masks, height, width]
+        masks, _ = masks.max(dim=1)
+        # Split the first dimension into a list of masks.
+        return list(masks.cpu().unbind(dim=0))
+
+    def _apply_polygon_refinement(self, masks: list[torch.Tensor]) -> list[torch.Tensor]:
+        """Apply polygon refinement to the masks.
+
+        Convert each mask to a polygon, then back to a mask. This has the following effect:
+        - Smooth the edges of the mask slightly.
+        - Ensure that each mask consists of a single closed polygon
+            - Removes small mask pieces.
+            - Removes holes from the mask.
+        """
+        # Convert tensor masks to np masks.
+        np_masks = [mask.cpu().numpy().astype(np.uint8) for mask in masks]
+
+        # Apply polygon refinement.
+        for idx, mask in enumerate(np_masks):
+            shape = mask.shape
+            assert len(shape) == 2  # Assert length to satisfy type checker.
+            polygon = mask_to_polygon(mask)
+            mask = polygon_to_mask(polygon, shape)
+            np_masks[idx] = mask
+
+        # Convert np masks back to tensor masks.
+        masks = [torch.tensor(mask, dtype=torch.bool) for mask in np_masks]
+
+        return masks
+
+    def _filter_masks(self, masks: list[torch.Tensor], bounding_boxes: list[BoundingBoxField]) -> list[torch.Tensor]:
+        """Filter the detected masks based on the specified mask filter."""
+        assert len(masks) == len(bounding_boxes)
+
+        if self.mask_filter == "all":
+            return masks
+        elif self.mask_filter == "largest":
+            # Find the largest mask.
+            return [max(masks, key=lambda x: float(x.sum()))]
+        elif self.mask_filter == "highest_box_score":
+            # Find the index of the bounding box with the highest score.
+            # Note that we fallback to -1.0 if the score is None. This is mainly to satisfy the type checker. In most
+            # cases the scores should all be non-None when using this filtering mode. That being said, -1.0 is a
+            # reasonable fallback since the expected score range is [0.0, 1.0].
+            max_score_idx = max(range(len(bounding_boxes)), key=lambda i: bounding_boxes[i].score or -1.0)
+            return [masks[max_score_idx]]
+        else:
+            raise ValueError(f"Invalid mask filter: {self.mask_filter}")
--- a/invokeai/app/services/config/config_default.py
+++ b/invokeai/app/services/config/config_default.py
@@ -91,6 +91,7 @@ class InvokeAIAppConfig(BaseSettings):
        db_dir: Path to InvokeAI databases directory.
        outputs_dir: Path to directory for outputs.
        custom_nodes_dir: Path to directory for custom nodes.
+        style_presets_dir: Path to directory for style presets.
        log_handlers: Log handler. Valid options are "console", "file=<path>", "syslog=path|address:host:port", "http=<url>".
        log_format: Log format. Use "plain" for text-only, "color" for colorized output, "legacy" for 2.3-style logging and "syslog" for syslog-style.<br>Valid values: `plain`, `color`, `syslog`, `legacy`
        log_level: Emit logging messages at this level or higher.<br>Valid values: `debug`, `info`, `warning`, `error`, `critical`
@@ -153,6 +154,7 @@ class InvokeAIAppConfig(BaseSettings):
    db_dir:                        Path = Field(default=Path("databases"),  description="Path to InvokeAI databases directory.")
    outputs_dir:                   Path = Field(default=Path("outputs"),    description="Path to directory for outputs.")
    custom_nodes_dir:              Path = Field(default=Path("nodes"),      description="Path to directory for custom nodes.")
+    style_presets_dir:      Path = Field(default=Path("style_presets"),      description="Path to directory for style presets.")

    # LOGGING
    log_handlers:             list[str] = Field(default=["console"],        description='Log handler. Valid options are "console", "file=<path>", "syslog=path|address:host:port", "http=<url>".')
@@ -300,6 +302,11 @@ class InvokeAIAppConfig(BaseSettings):
        """Path to the models directory, resolved to an absolute path.."""
        return self._resolve(self.models_dir)

+    @property
+    def style_presets_path(self) -> Path:
+        """Path to the style presets directory, resolved to an absolute path.."""
+        return self._resolve(self.style_presets_dir)
+
    @property
    def convert_cache_path(self) -> Path:
        """Path to the converted cache models directory, resolved to an absolute path.."""
--- a/invokeai/app/services/events/events_common.py
+++ b/invokeai/app/services/events/events_common.py
@@ -88,6 +88,8 @@ class QueueItemEventBase(QueueEventBase):

    item_id: int = Field(description="The ID of the queue item")
    batch_id: str = Field(description="The ID of the queue batch")
+    origin: str | None = Field(default=None, description="The origin of the queue item")
+    destination: str | None = Field(default=None, description="The destination of the queue item")


 class InvocationEventBase(QueueItemEventBase):
@@ -95,8 +97,6 @@ class InvocationEventBase(QueueItemEventBase):

    session_id: str = Field(description="The ID of the session (aka graph execution state)")
    queue_id: str = Field(description="The ID of the queue")
-    item_id: int = Field(description="The ID of the queue item")
-    batch_id: str = Field(description="The ID of the queue batch")
    session_id: str = Field(description="The ID of the session (aka graph execution state)")
    invocation: AnyInvocation = Field(description="The ID of the invocation")
    invocation_source_id: str = Field(description="The ID of the prepared invocation's source node")
@@ -114,6 +114,8 @@ class InvocationStartedEvent(InvocationEventBase):
            queue_id=queue_item.queue_id,
            item_id=queue_item.item_id,
            batch_id=queue_item.batch_id,
+            origin=queue_item.origin,
+            destination=queue_item.destination,
            session_id=queue_item.session_id,
            invocation=invocation,
            invocation_source_id=queue_item.session.prepared_source_mapping[invocation.id],
@@ -147,6 +149,8 @@ class InvocationDenoiseProgressEvent(InvocationEventBase):
            queue_id=queue_item.queue_id,
            item_id=queue_item.item_id,
            batch_id=queue_item.batch_id,
+            origin=queue_item.origin,
+            destination=queue_item.destination,
            session_id=queue_item.session_id,
            invocation=invocation,
            invocation_source_id=queue_item.session.prepared_source_mapping[invocation.id],
@@ -184,6 +188,8 @@ class InvocationCompleteEvent(InvocationEventBase):
            queue_id=queue_item.queue_id,
            item_id=queue_item.item_id,
            batch_id=queue_item.batch_id,
+            origin=queue_item.origin,
+            destination=queue_item.destination,
            session_id=queue_item.session_id,
            invocation=invocation,
            invocation_source_id=queue_item.session.prepared_source_mapping[invocation.id],
@@ -216,6 +222,8 @@ class InvocationErrorEvent(InvocationEventBase):
            queue_id=queue_item.queue_id,
            item_id=queue_item.item_id,
            batch_id=queue_item.batch_id,
+            origin=queue_item.origin,
+            destination=queue_item.destination,
            session_id=queue_item.session_id,
            invocation=invocation,
            invocation_source_id=queue_item.session.prepared_source_mapping[invocation.id],
@@ -253,6 +261,8 @@ class QueueItemStatusChangedEvent(QueueItemEventBase):
            queue_id=queue_item.queue_id,
            item_id=queue_item.item_id,
            batch_id=queue_item.batch_id,
+            origin=queue_item.origin,
+            destination=queue_item.destination,
            session_id=queue_item.session_id,
            status=queue_item.status,
            error_type=queue_item.error_type,
@@ -279,12 +289,14 @@ class BatchEnqueuedEvent(QueueEventBase):
        description="The number of invocations initially requested to be enqueued (may be less than enqueued if queue was full)"
    )
    priority: int = Field(description="The priority of the batch")
+    origin: str | None = Field(default=None, description="The origin of the batch")

    @classmethod
    def build(cls, enqueue_result: EnqueueBatchResult) -> "BatchEnqueuedEvent":
        return cls(
            queue_id=enqueue_result.queue_id,
            batch_id=enqueue_result.batch.batch_id,
+            origin=enqueue_result.batch.origin,
            enqueued=enqueue_result.enqueued,
            requested=enqueue_result.requested,
            priority=enqueue_result.priority,
--- a/invokeai/app/services/events/events_fastapievents.py
+++ b/invokeai/app/services/events/events_fastapievents.py
@@ -1,46 +1,44 @@
-# Copyright (c) 2022 Kyle Schouviller (https://github.com/kyle0654)
-
 import asyncio
 import threading
-from queue import Empty, Queue

 from fastapi_events.dispatcher import dispatch

 from invokeai.app.services.events.events_base import EventServiceBase
-from invokeai.app.services.events.events_common import (
-    EventBase,
-)
+from invokeai.app.services.events.events_common import EventBase


 class FastAPIEventService(EventServiceBase):
-    def __init__(self, event_handler_id: int) -> None:
+    def __init__(self, event_handler_id: int, loop: asyncio.AbstractEventLoop) -> None:
        self.event_handler_id = event_handler_id
-        self._queue = Queue[EventBase | None]()
+        self._queue = asyncio.Queue[EventBase | None]()
        self._stop_event = threading.Event()
-        asyncio.create_task(self._dispatch_from_queue(stop_event=self._stop_event))
+        self._loop = loop
+
+        # We need to store a reference to the task so it doesn't get GC'd
+        # See: https://docs.python.org/3/library/asyncio-task.html#creating-tasks
+        self._background_tasks: set[asyncio.Task[None]] = set()
+        task = self._loop.create_task(self._dispatch_from_queue(stop_event=self._stop_event))
+        self._background_tasks.add(task)
+        task.add_done_callback(self._background_tasks.remove)

        super().__init__()

    def stop(self, *args, **kwargs):
        self._stop_event.set()
-        self._queue.put(None)
+        self._loop.call_soon_threadsafe(self._queue.put_nowait, None)

    def dispatch(self, event: EventBase) -> None:
-        self._queue.put(event)
+        self._loop.call_soon_threadsafe(self._queue.put_nowait, event)

    async def _dispatch_from_queue(self, stop_event: threading.Event):
        """Get events on from the queue and dispatch them, from the correct thread"""
        while not stop_event.is_set():
            try:
-                event = self._queue.get(block=False)
+                event = await self._queue.get()
                if not event:  # Probably stopping
                    continue
                # Leave the payloads as live pydantic models
                dispatch(event, middleware_id=self.event_handler_id, payload_schema_dump=False)

-            except Empty:
-                await asyncio.sleep(0.1)
-                pass
-
            except asyncio.CancelledError as e:
                raise e  # Raise a proper error
--- a/invokeai/app/services/image_files/image_files_disk.py
+++ b/invokeai/app/services/image_files/image_files_disk.py
@@ -1,11 +1,10 @@
 # Copyright (c) 2022 Kyle Schouviller (https://github.com/kyle0654) and the InvokeAI Team
 from pathlib import Path
 from queue import Queue
-from typing import Dict, Optional, Union
+from typing import Optional, Union

 from PIL import Image, PngImagePlugin
 from PIL.Image import Image as PILImageType
-from send2trash import send2trash

 from invokeai.app.services.image_files.image_files_base import ImageFileStorageBase
 from invokeai.app.services.image_files.image_files_common import (
@@ -20,18 +19,12 @@ from invokeai.app.util.thumbnails import get_thumbnail_name, make_thumbnail
 class DiskImageFileStorage(ImageFileStorageBase):
    """Stores images on disk"""

-    __output_folder: Path
-    __cache_ids: Queue  # TODO: this is an incredibly naive cache
-    __cache: Dict[Path, PILImageType]
-    __max_cache_size: int
-    __invoker: Invoker
-
    def __init__(self, output_folder: Union[str, Path]):
-        self.__cache = {}
-        self.__cache_ids = Queue()
+        self.__cache: dict[Path, PILImageType] = {}
+        self.__cache_ids = Queue[Path]()
        self.__max_cache_size = 10  # TODO: get this from config

-        self.__output_folder: Path = output_folder if isinstance(output_folder, Path) else Path(output_folder)
+        self.__output_folder = output_folder if isinstance(output_folder, Path) else Path(output_folder)
        self.__thumbnails_folder = self.__output_folder / "thumbnails"
        # Validate required output folders at launch
        self.__validate_storage_folders()
@@ -103,7 +96,7 @@ class DiskImageFileStorage(ImageFileStorageBase):
            image_path = self.get_path(image_name)

            if image_path.exists():
-                send2trash(image_path)
+                image_path.unlink()
            if image_path in self.__cache:
                del self.__cache[image_path]

@@ -111,7 +104,7 @@ class DiskImageFileStorage(ImageFileStorageBase):
            thumbnail_path = self.get_path(thumbnail_name, True)

            if thumbnail_path.exists():
-                send2trash(thumbnail_path)
+                thumbnail_path.unlink()
            if thumbnail_path in self.__cache:
                del self.__cache[thumbnail_path]
        except Exception as e:
--- a/invokeai/app/services/invocation_services.py
+++ b/invokeai/app/services/invocation_services.py
@@ -4,6 +4,8 @@ from __future__ import annotations
 from typing import TYPE_CHECKING

 from invokeai.app.services.object_serializer.object_serializer_base import ObjectSerializerBase
+from invokeai.app.services.style_preset_images.style_preset_images_base import StylePresetImageFileStorageBase
+from invokeai.app.services.style_preset_records.style_preset_records_base import StylePresetRecordsStorageBase

 if TYPE_CHECKING:
    from logging import Logger
@@ -61,6 +63,8 @@ class InvocationServices:
        workflow_records: "WorkflowRecordsStorageBase",
        tensors: "ObjectSerializerBase[torch.Tensor]",
        conditioning: "ObjectSerializerBase[ConditioningFieldData]",
+        style_preset_records: "StylePresetRecordsStorageBase",
+        style_preset_image_files: "StylePresetImageFileStorageBase",
    ):
        self.board_images = board_images
        self.board_image_records = board_image_records
@@ -85,3 +89,5 @@ class InvocationServices:
        self.workflow_records = workflow_records
        self.tensors = tensors
        self.conditioning = conditioning
+        self.style_preset_records = style_preset_records
+        self.style_preset_image_files = style_preset_image_files
--- a/invokeai/app/services/model_images/model_images_default.py
+++ b/invokeai/app/services/model_images/model_images_default.py
@@ -2,7 +2,6 @@ from pathlib import Path

 from PIL import Image
 from PIL.Image import Image as PILImageType
-from send2trash import send2trash

 from invokeai.app.services.invoker import Invoker
 from invokeai.app.services.model_images.model_images_base import ModelImageFileStorageBase
@@ -70,7 +69,7 @@ class ModelImageFileStorageDisk(ModelImageFileStorageBase):
            if not self._validate_path(path):
                raise ModelImageFileNotFoundException

-            send2trash(path)
+            path.unlink()

        except Exception as e:
            raise ModelImageFileDeleteException from e
--- a/invokeai/app/services/model_install/model_install_common.py
+++ b/invokeai/app/services/model_install/model_install_common.py
@@ -103,7 +103,7 @@ class HFModelSource(StringLikeSource):
        if self.variant:
            base += f":{self.variant or ''}"
        if self.subfolder:
-            base += f":{self.subfolder}"
+            base += f"::{self.subfolder.as_posix()}"
        return base


--- a/invokeai/app/services/model_install/model_install_default.py
+++ b/invokeai/app/services/model_install/model_install_default.py
@@ -783,8 +783,9 @@ class ModelInstallService(ModelInstallServiceBase):
        # So what we do is to synthesize a folder named "sdxl-turbo_vae" here.
        if subfolder:
            top = Path(remote_files[0].path.parts[0])  # e.g. "sdxl-turbo/"
-            path_to_remove = top / subfolder.parts[-1]  # sdxl-turbo/vae/
-            path_to_add = Path(f"{top}_{subfolder}")
+            path_to_remove = top / subfolder  # sdxl-turbo/vae/
+            subfolder_rename = subfolder.name.replace("/", "_").replace("\\", "_")
+            path_to_add = Path(f"{top}_{subfolder_rename}")
        else:
            path_to_remove = Path(".")
            path_to_add = Path(".")
--- a/invokeai/app/services/model_records/model_records_base.py
+++ b/invokeai/app/services/model_records/model_records_base.py
@@ -77,6 +77,7 @@ class ModelRecordChanges(BaseModelExcludeNull):
    type: Optional[ModelType] = Field(description="Type of model", default=None)
    key: Optional[str] = Field(description="Database ID for this model", default=None)
    hash: Optional[str] = Field(description="hash of model file", default=None)
+    format: Optional[str] = Field(description="format of model file", default=None)
    trigger_phrases: Optional[set[str]] = Field(description="Set of trigger phrases for this model", default=None)
    default_settings: Optional[MainModelDefaultSettings | ControlAdapterDefaultSettings] = Field(
        description="Default settings for this model", default=None
--- a/invokeai/app/services/session_queue/session_queue_base.py
+++ b/invokeai/app/services/session_queue/session_queue_base.py
@@ -6,6 +6,7 @@ from invokeai.app.services.session_queue.session_queue_common import (
    Batch,
    BatchStatus,
    CancelByBatchIDsResult,
+    CancelByDestinationResult,
    CancelByQueueIDResult,
    ClearResult,
    EnqueueBatchResult,
@@ -95,6 +96,11 @@ class SessionQueueBase(ABC):
        """Cancels all queue items with matching batch IDs"""
        pass

+    @abstractmethod
+    def cancel_by_destination(self, queue_id: str, destination: str) -> CancelByDestinationResult:
+        """Cancels all queue items with the given batch destination"""
+        pass
+
    @abstractmethod
    def cancel_by_queue_id(self, queue_id: str) -> CancelByQueueIDResult:
        """Cancels all queue items with matching queue ID"""
--- a/invokeai/app/services/session_queue/session_queue_common.py
+++ b/invokeai/app/services/session_queue/session_queue_common.py
@@ -77,6 +77,14 @@ BatchDataCollection: TypeAlias = list[list[BatchDatum]]

 class Batch(BaseModel):
    batch_id: str = Field(default_factory=uuid_string, description="The ID of the batch")
+    origin: str | None = Field(
+        default=None,
+        description="The origin of this queue item. This data is used by the frontend to determine how to handle results.",
+    )
+    destination: str | None = Field(
+        default=None,
+        description="The origin of this queue item. This data is used by the frontend to determine how to handle results",
+    )
    data: Optional[BatchDataCollection] = Field(default=None, description="The batch data collection.")
    graph: Graph = Field(description="The graph to initialize the session with")
    workflow: Optional[WorkflowWithoutID] = Field(
@@ -195,6 +203,14 @@ class SessionQueueItemWithoutGraph(BaseModel):
    status: QUEUE_ITEM_STATUS = Field(default="pending", description="The status of this queue item")
    priority: int = Field(default=0, description="The priority of this queue item")
    batch_id: str = Field(description="The ID of the batch associated with this queue item")
+    origin: str | None = Field(
+        default=None,
+        description="The origin of this queue item. This data is used by the frontend to determine how to handle results.",
+    )
+    destination: str | None = Field(
+        default=None,
+        description="The origin of this queue item. This data is used by the frontend to determine how to handle results",
+    )
    session_id: str = Field(
        description="The ID of the session associated with this queue item. The session doesn't exist in graph_executions until the queue item is executed."
    )
@@ -294,6 +310,8 @@ class SessionQueueStatus(BaseModel):
 class BatchStatus(BaseModel):
    queue_id: str = Field(..., description="The ID of the queue")
    batch_id: str = Field(..., description="The ID of the batch")
+    origin: str | None = Field(..., description="The origin of the batch")
+    destination: str | None = Field(..., description="The destination of the batch")
    pending: int = Field(..., description="Number of queue items with status 'pending'")
    in_progress: int = Field(..., description="Number of queue items with status 'in_progress'")
    completed: int = Field(..., description="Number of queue items with status 'complete'")
@@ -328,6 +346,12 @@ class CancelByBatchIDsResult(BaseModel):
    canceled: int = Field(..., description="Number of queue items canceled")


+class CancelByDestinationResult(CancelByBatchIDsResult):
+    """Result of canceling by a destination"""
+
+    pass
+
+
 class CancelByQueueIDResult(CancelByBatchIDsResult):
    """Result of canceling by queue id"""

@@ -433,6 +457,8 @@ class SessionQueueValueToInsert(NamedTuple):
    field_values: Optional[str]  # field_values json
    priority: int  # priority
    workflow: Optional[str]  # workflow json
+    origin: str | None
+    destination: str | None


 ValuesToInsert: TypeAlias = list[SessionQueueValueToInsert]
@@ -453,6 +479,8 @@ def prepare_values_to_insert(queue_id: str, batch: Batch, priority: int, max_new
                json.dumps(field_values, default=to_jsonable_python) if field_values else None,  # field_values (json)
                priority,  # priority
                json.dumps(workflow, default=to_jsonable_python) if workflow else None,  # workflow (json)
+                batch.origin,  # origin
+                batch.destination,  # destination
            )
        )
    return values_to_insert
--- a/invokeai/app/services/session_queue/session_queue_sqlite.py
+++ b/invokeai/app/services/session_queue/session_queue_sqlite.py
@@ -10,6 +10,7 @@ from invokeai.app.services.session_queue.session_queue_common import (
    Batch,
    BatchStatus,
    CancelByBatchIDsResult,
+    CancelByDestinationResult,
    CancelByQueueIDResult,
    ClearResult,
    EnqueueBatchResult,
@@ -127,8 +128,8 @@ class SqliteSessionQueue(SessionQueueBase):

            self.__cursor.executemany(
                """--sql
-                INSERT INTO session_queue (queue_id, session, session_id, batch_id, field_values, priority, workflow)
-                VALUES (?, ?, ?, ?, ?, ?, ?)
+                INSERT INTO session_queue (queue_id, session, session_id, batch_id, field_values, priority, workflow, origin, destination)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
                """,
                values_to_insert,
            )
@@ -417,11 +418,7 @@ class SqliteSessionQueue(SessionQueueBase):
            )
            self.__conn.commit()
            if current_queue_item is not None and current_queue_item.batch_id in batch_ids:
-                batch_status = self.get_batch_status(queue_id=queue_id, batch_id=current_queue_item.batch_id)
-                queue_status = self.get_queue_status(queue_id=queue_id)
-                self.__invoker.services.events.emit_queue_item_status_changed(
-                    current_queue_item, batch_status, queue_status
-                )
+                self._set_queue_item_status(current_queue_item.item_id, "canceled")
        except Exception:
            self.__conn.rollback()
            raise
@@ -429,6 +426,46 @@ class SqliteSessionQueue(SessionQueueBase):
            self.__lock.release()
        return CancelByBatchIDsResult(canceled=count)

+    def cancel_by_destination(self, queue_id: str, destination: str) -> CancelByDestinationResult:
+        try:
+            current_queue_item = self.get_current(queue_id)
+            self.__lock.acquire()
+            where = """--sql
+                WHERE
+                  queue_id == ?
+                  AND destination == ?
+                  AND status != 'canceled'
+                  AND status != 'completed'
+                  AND status != 'failed'
+                """
+            params = (queue_id, destination)
+            self.__cursor.execute(
+                f"""--sql
+                SELECT COUNT(*)
+                FROM session_queue
+                {where};
+                """,
+                params,
+            )
+            count = self.__cursor.fetchone()[0]
+            self.__cursor.execute(
+                f"""--sql
+                UPDATE session_queue
+                SET status = 'canceled'
+                {where};
+                """,
+                params,
+            )
+            self.__conn.commit()
+            if current_queue_item is not None and current_queue_item.destination == destination:
+                self._set_queue_item_status(current_queue_item.item_id, "canceled")
+        except Exception:
+            self.__conn.rollback()
+            raise
+        finally:
+            self.__lock.release()
+        return CancelByDestinationResult(canceled=count)
+
    def cancel_by_queue_id(self, queue_id: str) -> CancelByQueueIDResult:
        try:
            current_queue_item = self.get_current(queue_id)
@@ -541,7 +578,9 @@ class SqliteSessionQueue(SessionQueueBase):
                    started_at,
                    session_id,
                    batch_id,
-                    queue_id
+                    queue_id,
+                    origin,
+                    destination
                FROM session_queue
                WHERE queue_id = ?
            """
@@ -621,7 +660,7 @@ class SqliteSessionQueue(SessionQueueBase):
            self.__lock.acquire()
            self.__cursor.execute(
                """--sql
-                SELECT status, count(*)
+                SELECT status, count(*), origin, destination
                FROM session_queue
                WHERE
                  queue_id = ?
@@ -633,6 +672,8 @@ class SqliteSessionQueue(SessionQueueBase):
            result = cast(list[sqlite3.Row], self.__cursor.fetchall())
            total = sum(row[1] for row in result)
            counts: dict[str, int] = {row[0]: row[1] for row in result}
+            origin = result[0]["origin"] if result else None
+            destination = result[0]["destination"] if result else None
        except Exception:
            self.__conn.rollback()
            raise
@@ -641,6 +682,8 @@ class SqliteSessionQueue(SessionQueueBase):

        return BatchStatus(
            batch_id=batch_id,
+            origin=origin,
+            destination=destination,
            queue_id=queue_id,
            pending=counts.get("pending", 0),
            in_progress=counts.get("in_progress", 0),
--- a/invokeai/app/services/shared/invocation_context.py
+++ b/invokeai/app/services/shared/invocation_context.py
@@ -14,7 +14,7 @@ from invokeai.app.services.image_records.image_records_common import ImageCatego
 from invokeai.app.services.images.images_common import ImageDTO
 from invokeai.app.services.invocation_services import InvocationServices
 from invokeai.app.services.model_records.model_records_base import UnknownModelException
-from invokeai.app.util.step_callback import stable_diffusion_step_callback
+from invokeai.app.util.step_callback import flux_step_callback, stable_diffusion_step_callback
 from invokeai.backend.model_manager.config import (
    AnyModel,
    AnyModelConfig,
@@ -557,6 +557,24 @@ class UtilInterface(InvocationContextInterface):
            is_canceled=self.is_canceled,
        )

+    def flux_step_callback(self, intermediate_state: PipelineIntermediateState) -> None:
+        """
+        The step callback emits a progress event with the current step, the total number of
+        steps, a preview image, and some other internal metadata.
+
+        This should be called after each denoising step.
+
+        Args:
+            intermediate_state: The intermediate state of the diffusion pipeline.
+        """
+
+        flux_step_callback(
+            context_data=self._data,
+            intermediate_state=intermediate_state,
+            events=self._services.events,
+            is_canceled=self.is_canceled,
+        )
+

 class InvocationContext:
    """Provides access to various services and data for the current invocation.
--- a/invokeai/app/services/shared/sqlite/sqlite_util.py
+++ b/invokeai/app/services/shared/sqlite/sqlite_util.py
@@ -16,6 +16,8 @@ from invokeai.app.services.shared.sqlite_migrator.migrations.migration_10 import
 from invokeai.app.services.shared.sqlite_migrator.migrations.migration_11 import build_migration_11
 from invokeai.app.services.shared.sqlite_migrator.migrations.migration_12 import build_migration_12
 from invokeai.app.services.shared.sqlite_migrator.migrations.migration_13 import build_migration_13
+from invokeai.app.services.shared.sqlite_migrator.migrations.migration_14 import build_migration_14
+from invokeai.app.services.shared.sqlite_migrator.migrations.migration_15 import build_migration_15
 from invokeai.app.services.shared.sqlite_migrator.sqlite_migrator_impl import SqliteMigrator


@@ -49,6 +51,8 @@ def init_db(config: InvokeAIAppConfig, logger: Logger, image_files: ImageFileSto
    migrator.register_migration(build_migration_11(app_config=config, logger=logger))
    migrator.register_migration(build_migration_12(app_config=config))
    migrator.register_migration(build_migration_13())
+    migrator.register_migration(build_migration_14())
+    migrator.register_migration(build_migration_15())
    migrator.run_migrations()

    return db
--- a/invokeai/app/services/shared/sqlite_migrator/migrations/migration_14.py
+++ b/invokeai/app/services/shared/sqlite_migrator/migrations/migration_14.py
@@ -0,0 +1,61 @@
+import sqlite3
+
+from invokeai.app.services.shared.sqlite_migrator.sqlite_migrator_common import Migration
+
+
+class Migration14Callback:
+    def __call__(self, cursor: sqlite3.Cursor) -> None:
+        self._create_style_presets(cursor)
+
+    def _create_style_presets(self, cursor: sqlite3.Cursor) -> None:
+        """Create the table used to store style presets."""
+        tables = [
+            """--sql
+            CREATE TABLE IF NOT EXISTS style_presets (
+                id TEXT NOT NULL PRIMARY KEY,
+                name TEXT NOT NULL,
+                preset_data TEXT NOT NULL,
+                type TEXT NOT NULL DEFAULT "user",
+                created_at DATETIME NOT NULL DEFAULT(STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW')),
+                -- Updated via trigger
+                updated_at DATETIME NOT NULL DEFAULT(STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW'))
+            );
+            """
+        ]
+
+        # Add trigger for `updated_at`.
+        triggers = [
+            """--sql
+            CREATE TRIGGER IF NOT EXISTS style_presets
+            AFTER UPDATE
+            ON style_presets FOR EACH ROW
+            BEGIN
+                UPDATE style_presets SET updated_at = STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW')
+                    WHERE id = old.id;
+            END;
+            """
+        ]
+
+        # Add indexes for searchable fields
+        indices = [
+            "CREATE INDEX IF NOT EXISTS idx_style_presets_name ON style_presets(name);",
+        ]
+
+        for stmt in tables + indices + triggers:
+            cursor.execute(stmt)
+
+
+def build_migration_14() -> Migration:
+    """
+    Build the migration from database version 13 to 14..
+
+    This migration does the following:
+    - Create the table used to store style presets.
+    """
+    migration_14 = Migration(
+        from_version=13,
+        to_version=14,
+        callback=Migration14Callback(),
+    )
+
+    return migration_14
--- a/invokeai/app/services/shared/sqlite_migrator/migrations/migration_15.py
+++ b/invokeai/app/services/shared/sqlite_migrator/migrations/migration_15.py
@@ -0,0 +1,34 @@
+import sqlite3
+
+from invokeai.app.services.shared.sqlite_migrator.sqlite_migrator_common import Migration
+
+
+class Migration15Callback:
+    def __call__(self, cursor: sqlite3.Cursor) -> None:
+        self._add_origin_col(cursor)
+
+    def _add_origin_col(self, cursor: sqlite3.Cursor) -> None:
+        """
+        - Adds `origin` column to the session queue table.
+        - Adds `destination` column to the session queue table.
+        """
+
+        cursor.execute("ALTER TABLE session_queue ADD COLUMN origin TEXT;")
+        cursor.execute("ALTER TABLE session_queue ADD COLUMN destination TEXT;")
+
+
+def build_migration_15() -> Migration:
+    """
+    Build the migration from database version 14 to 15.
+
+    This migration does the following:
+        - Adds `origin` column to the session queue table.
+        - Adds `destination` column to the session queue table.
+    """
+    migration_15 = Migration(
+        from_version=14,
+        to_version=15,
+        callback=Migration15Callback(),
+    )
+
+    return migration_15
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Anime.png
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Anime.png
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Architectural
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Architectural
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Environment
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Environment
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Illustration.png
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Illustration.png
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Interior
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Interior
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Line
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Line
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Product
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Product
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Sketch.png
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Sketch.png
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Vehicles.png
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Vehicles.png
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/init.py
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/init.py
--- a/invokeai/app/services/style_preset_images/style_preset_images_base.py
+++ b/invokeai/app/services/style_preset_images/style_preset_images_base.py
@@ -0,0 +1,33 @@
+from abc import ABC, abstractmethod
+from pathlib import Path
+
+from PIL.Image import Image as PILImageType
+
+
+class StylePresetImageFileStorageBase(ABC):
+    """Low-level service responsible for storing and retrieving image files."""
+
+    @abstractmethod
+    def get(self, style_preset_id: str) -> PILImageType:
+        """Retrieves a style preset image as PIL Image."""
+        pass
+
+    @abstractmethod
+    def get_path(self, style_preset_id: str) -> Path:
+        """Gets the internal path to a style preset image."""
+        pass
+
+    @abstractmethod
+    def get_url(self, style_preset_id: str) -> str | None:
+        """Gets the URL to fetch a style preset image."""
+        pass
+
+    @abstractmethod
+    def save(self, style_preset_id: str, image: PILImageType) -> None:
+        """Saves a style preset image."""
+        pass
+
+    @abstractmethod
+    def delete(self, style_preset_id: str) -> None:
+        """Deletes a style preset image."""
+        pass
--- a/invokeai/app/services/style_preset_images/style_preset_images_common.py
+++ b/invokeai/app/services/style_preset_images/style_preset_images_common.py
@@ -0,0 +1,19 @@
+class StylePresetImageFileNotFoundException(Exception):
+    """Raised when an image file is not found in storage."""
+
+    def __init__(self, message: str = "Style preset image file not found"):
+        super().__init__(message)
+
+
+class StylePresetImageFileSaveException(Exception):
+    """Raised when an image cannot be saved."""
+
+    def __init__(self, message: str = "Style preset image file not saved"):
+        super().__init__(message)
+
+
+class StylePresetImageFileDeleteException(Exception):
+    """Raised when an image cannot be deleted."""
+
+    def __init__(self, message: str = "Style preset image file not deleted"):
+        super().__init__(message)
--- a/invokeai/app/services/style_preset_images/style_preset_images_disk.py
+++ b/invokeai/app/services/style_preset_images/style_preset_images_disk.py
@@ -0,0 +1,88 @@
+from pathlib import Path
+
+from PIL import Image
+from PIL.Image import Image as PILImageType
+
+from invokeai.app.services.invoker import Invoker
+from invokeai.app.services.style_preset_images.style_preset_images_base import StylePresetImageFileStorageBase
+from invokeai.app.services.style_preset_images.style_preset_images_common import (
+    StylePresetImageFileDeleteException,
+    StylePresetImageFileNotFoundException,
+    StylePresetImageFileSaveException,
+)
+from invokeai.app.services.style_preset_records.style_preset_records_common import PresetType
+from invokeai.app.util.misc import uuid_string
+from invokeai.app.util.thumbnails import make_thumbnail
+
+
+class StylePresetImageFileStorageDisk(StylePresetImageFileStorageBase):
+    """Stores images on disk"""
+
+    def __init__(self, style_preset_images_folder: Path):
+        self._style_preset_images_folder = style_preset_images_folder
+        self._validate_storage_folders()
+
+    def start(self, invoker: Invoker) -> None:
+        self._invoker = invoker
+
+    def get(self, style_preset_id: str) -> PILImageType:
+        try:
+            path = self.get_path(style_preset_id)
+
+            return Image.open(path)
+        except FileNotFoundError as e:
+            raise StylePresetImageFileNotFoundException from e
+
+    def save(self, style_preset_id: str, image: PILImageType) -> None:
+        try:
+            self._validate_storage_folders()
+            image_path = self._style_preset_images_folder / (style_preset_id + ".webp")
+            thumbnail = make_thumbnail(image, 256)
+            thumbnail.save(image_path, format="webp")
+
+        except Exception as e:
+            raise StylePresetImageFileSaveException from e
+
+    def get_path(self, style_preset_id: str) -> Path:
+        style_preset = self._invoker.services.style_preset_records.get(style_preset_id)
+        if style_preset.type is PresetType.Default:
+            default_images_dir = Path(__file__).parent / Path("default_style_preset_images")
+            path = default_images_dir / (style_preset.name + ".png")
+        else:
+            path = self._style_preset_images_folder / (style_preset_id + ".webp")
+
+        return path
+
+    def get_url(self, style_preset_id: str) -> str | None:
+        path = self.get_path(style_preset_id)
+        if not self._validate_path(path):
+            return
+
+        url = self._invoker.services.urls.get_style_preset_image_url(style_preset_id)
+
+        # The image URL never changes, so we must add random query string to it to prevent caching
+        url += f"?{uuid_string()}"
+
+        return url
+
+    def delete(self, style_preset_id: str) -> None:
+        try:
+            path = self.get_path(style_preset_id)
+
+            if not self._validate_path(path):
+                raise StylePresetImageFileNotFoundException
+
+            path.unlink()
+
+        except StylePresetImageFileNotFoundException as e:
+            raise StylePresetImageFileNotFoundException from e
+        except Exception as e:
+            raise StylePresetImageFileDeleteException from e
+
+    def _validate_path(self, path: Path) -> bool:
+        """Validates the path given for an image."""
+        return path.exists()
+
+    def _validate_storage_folders(self) -> None:
+        """Checks if the required folders exist and create them if they don't"""
+        self._style_preset_images_folder.mkdir(parents=True, exist_ok=True)
--- a/invokeai/app/services/style_preset_records/init.py
+++ b/invokeai/app/services/style_preset_records/init.py
--- a/invokeai/app/services/style_preset_records/default_style_presets.json
+++ b/invokeai/app/services/style_preset_records/default_style_presets.json
@@ -0,0 +1,146 @@
+[
+  {
+    "name": "Photography (General)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt}. photography. f/2.8 macro photo, bokeh, photorealism",
+      "negative_prompt": "painting, digital art. sketch, blurry"
+    }
+  },
+  {
+    "name": "Photography (Studio Lighting)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt}, photography. f/8 photo. centered subject, studio lighting.",
+      "negative_prompt": "painting, digital art. sketch, blurry"
+    }
+  },
+  {
+    "name": "Photography (Landscape)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt}, landscape photograph, f/12, lifelike,  highly detailed.",
+      "negative_prompt": "painting, digital art. sketch, blurry"
+    }
+  },
+  {
+    "name": "Photography (Portrait)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt}. photography. portraiture. catch light in eyes. one flash. rembrandt lighting. Soft box. dark shadows. High contrast. 80mm lens. F2.8.",
+      "negative_prompt": "painting, digital art. sketch, blurry"
+    }
+  },
+  {
+    "name": "Photography (Black and White)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} photography. natural light. 80mm lens. F1.4. strong contrast, hard light. dark contrast. blurred background. black and white",
+      "negative_prompt": "painting, digital art. sketch, colour+"
+    }
+  },
+  {
+    "name": "Architectural Visualization",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt}. architectural photography, f/12, luxury, aesthetically pleasing form and function.",
+      "negative_prompt": "painting, digital art. sketch, blurry"
+    }
+  },
+  {
+    "name": "Concept Art (Fantasy)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "concept artwork of a {prompt}. (digital painterly art style)++, mythological, (textured 2d dry media brushpack)++, glazed brushstrokes, otherworldly. painting+, illustration+",
+      "negative_prompt": "photo. distorted, blurry, out of focus. sketch. (cgi, 3d.)++"
+    }
+  },
+  {
+    "name": "Concept Art (Sci-Fi)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "(concept art)++, {prompt}, (sleek futurism)++, (textured 2d dry media)++, metallic highlights, digital painting style",
+      "negative_prompt": "photo. distorted, blurry, out of focus. sketch. (cgi, 3d.)++"
+    }
+  },
+  {
+    "name": "Concept Art (Character)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "(character concept art)++, stylized painterly digital painting of {prompt}, (painterly, impasto. Dry brush.)++",
+      "negative_prompt": "photo. distorted, blurry, out of focus. sketch. (cgi, 3d.)++"
+    }
+  },
+  {
+    "name": "Concept Art (Painterly)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} oil painting. high contrast. impasto. sfumato. chiaroscuro. Palette knife.",
+      "negative_prompt": "photo. smooth. border. frame"
+    }
+  },
+  {
+    "name": "Environment Art",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} environment artwork, hyper-realistic digital painting style with cinematic composition, atmospheric, depth and detail, voluminous. textured dry brush 2d media",
+      "negative_prompt": "photo, distorted, blurry, out of focus. sketch."
+    }
+  },
+  {
+    "name": "Interior Design (Visualization)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} interior design photo, gentle shadows, light mid-tones, dimension, mix of smooth and textured surfaces, focus on negative space and clean lines, focus",
+      "negative_prompt": "photo, distorted. sketch."
+    }
+  },
+  {
+    "name": "Product Rendering",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} high quality product photography, 3d rendering with key lighting, shallow depth of field, simple plain background, studio lighting.",
+      "negative_prompt": "blurry, sketch, messy, dirty. unfinished."
+    }
+  },
+  {
+    "name": "Sketch",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} black and white pencil drawing, off-center composition, cross-hatching for shadows, bold strokes, textured paper. sketch+++",
+      "negative_prompt": "blurry, photo, painting, color. messy, dirty. unfinished. frame, borders."
+    }
+  },
+  {
+    "name": "Line Art",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} Line art. bold outline. simplistic. white background. 2d",
+      "negative_prompt": "photo. digital art. greyscale. solid black. painting"
+    }
+  },
+  {
+    "name": "Anime",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} anime++, bold outline, cel-shaded coloring, shounen, seinen",
+      "negative_prompt": "(photo)+++. greyscale. solid black. painting"
+    }
+  },
+  {
+    "name": "Illustration",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} illustration, bold linework, illustrative details, vector art style, flat coloring",
+      "negative_prompt": "(photo)+++. greyscale. painting, black and white."
+    }
+  },
+  {
+    "name": "Vehicles",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "A weird futuristic normal auto, {prompt} elegant design, nice color, nice wheels",
+      "negative_prompt": "sketch. digital art. greyscale. painting"
+    }
+  }
+]
--- a/invokeai/app/services/style_preset_records/style_preset_records_base.py
+++ b/invokeai/app/services/style_preset_records/style_preset_records_base.py
@@ -0,0 +1,42 @@
+from abc import ABC, abstractmethod
+
+from invokeai.app.services.style_preset_records.style_preset_records_common import (
+    PresetType,
+    StylePresetChanges,
+    StylePresetRecordDTO,
+    StylePresetWithoutId,
+)
+
+
+class StylePresetRecordsStorageBase(ABC):
+    """Base class for style preset storage services."""
+
+    @abstractmethod
+    def get(self, style_preset_id: str) -> StylePresetRecordDTO:
+        """Get style preset by id."""
+        pass
+
+    @abstractmethod
+    def create(self, style_preset: StylePresetWithoutId) -> StylePresetRecordDTO:
+        """Creates a style preset."""
+        pass
+
+    @abstractmethod
+    def create_many(self, style_presets: list[StylePresetWithoutId]) -> None:
+        """Creates many style presets."""
+        pass
+
+    @abstractmethod
+    def update(self, style_preset_id: str, changes: StylePresetChanges) -> StylePresetRecordDTO:
+        """Updates a style preset."""
+        pass
+
+    @abstractmethod
+    def delete(self, style_preset_id: str) -> None:
+        """Deletes a style preset."""
+        pass
+
+    @abstractmethod
+    def get_many(self, type: PresetType | None = None) -> list[StylePresetRecordDTO]:
+        """Gets many workflows."""
+        pass
--- a/invokeai/app/services/style_preset_records/style_preset_records_common.py
+++ b/invokeai/app/services/style_preset_records/style_preset_records_common.py
@@ -0,0 +1,139 @@
+import codecs
+import csv
+import json
+from enum import Enum
+from typing import Any, Optional
+
+import pydantic
+from fastapi import UploadFile
+from pydantic import AliasChoices, BaseModel, ConfigDict, Field, TypeAdapter
+
+from invokeai.app.util.metaenum import MetaEnum
+
+
+class StylePresetNotFoundError(Exception):
+    """Raised when a style preset is not found"""
+
+
+class PresetData(BaseModel, extra="forbid"):
+    positive_prompt: str = Field(description="Positive prompt")
+    negative_prompt: str = Field(description="Negative prompt")
+
+
+PresetDataValidator = TypeAdapter(PresetData)
+
+
+class PresetType(str, Enum, metaclass=MetaEnum):
+    User = "user"
+    Default = "default"
+    Project = "project"
+
+
+class StylePresetChanges(BaseModel, extra="forbid"):
+    name: Optional[str] = Field(default=None, description="The style preset's new name.")
+    preset_data: Optional[PresetData] = Field(default=None, description="The updated data for style preset.")
+    type: Optional[PresetType] = Field(description="The updated type of the style preset")
+
+
+class StylePresetWithoutId(BaseModel):
+    name: str = Field(description="The name of the style preset.")
+    preset_data: PresetData = Field(description="The preset data")
+    type: PresetType = Field(description="The type of style preset")
+
+
+class StylePresetRecordDTO(StylePresetWithoutId):
+    id: str = Field(description="The style preset ID.")
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "StylePresetRecordDTO":
+        data["preset_data"] = PresetDataValidator.validate_json(data.get("preset_data", ""))
+        return StylePresetRecordDTOValidator.validate_python(data)
+
+
+StylePresetRecordDTOValidator = TypeAdapter(StylePresetRecordDTO)
+
+
+class StylePresetRecordWithImage(StylePresetRecordDTO):
+    image: Optional[str] = Field(description="The path for image")
+
+
+class StylePresetImportRow(BaseModel):
+    name: str = Field(min_length=1, description="The name of the preset.")
+    positive_prompt: str = Field(
+        default="",
+        description="The positive prompt for the preset.",
+        validation_alias=AliasChoices("positive_prompt", "prompt"),
+    )
+    negative_prompt: str = Field(default="", description="The negative prompt for the preset.")
+
+    model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
+
+
+StylePresetImportList = list[StylePresetImportRow]
+StylePresetImportListTypeAdapter = TypeAdapter(StylePresetImportList)
+
+
+class UnsupportedFileTypeError(ValueError):
+    """Raised when an unsupported file type is encountered"""
+
+    pass
+
+
+class InvalidPresetImportDataError(ValueError):
+    """Raised when invalid preset import data is encountered"""
+
+    pass
+
+
+async def parse_presets_from_file(file: UploadFile) -> list[StylePresetWithoutId]:
+    """Parses style presets from a file. The file must be a CSV or JSON file.
+
+    If CSV, the file must have the following columns:
+    - name
+    - prompt (or positive_prompt)
+    - negative_prompt
+
+    If JSON, the file must be a list of objects with the following keys:
+    - name
+    - prompt (or positive_prompt)
+    - negative_prompt
+
+    Args:
+        file (UploadFile): The file to parse.
+
+    Returns:
+        list[StylePresetWithoutId]: The parsed style presets.
+
+    Raises:
+        UnsupportedFileTypeError: If the file type is not supported.
+        InvalidPresetImportDataError: If the data in the file is invalid.
+    """
+    if file.content_type not in ["text/csv", "application/json"]:
+        raise UnsupportedFileTypeError()
+
+    if file.content_type == "text/csv":
+        csv_reader = csv.DictReader(codecs.iterdecode(file.file, "utf-8"))
+        data = list(csv_reader)
+    else:  # file.content_type == "application/json":
+        json_data = await file.read()
+        data = json.loads(json_data)
+
+    try:
+        imported_presets = StylePresetImportListTypeAdapter.validate_python(data)
+
+        style_presets: list[StylePresetWithoutId] = []
+
+        for imported in imported_presets:
+            preset_data = PresetData(positive_prompt=imported.positive_prompt, negative_prompt=imported.negative_prompt)
+            style_preset = StylePresetWithoutId(name=imported.name, preset_data=preset_data, type=PresetType.User)
+            style_presets.append(style_preset)
+    except pydantic.ValidationError as e:
+        if file.content_type == "text/csv":
+            msg = "Invalid CSV format: must include columns 'name', 'prompt', and 'negative_prompt' and name cannot be blank"
+        else:  # file.content_type == "application/json":
+            msg = "Invalid JSON format: must be a list of objects with keys 'name', 'prompt', and 'negative_prompt' and name cannot be blank"
+        raise InvalidPresetImportDataError(msg) from e
+    finally:
+        file.file.close()
+
+    return style_presets
--- a/invokeai/app/services/style_preset_records/style_preset_records_sqlite.py
+++ b/invokeai/app/services/style_preset_records/style_preset_records_sqlite.py
@@ -0,0 +1,215 @@
+import json
+from pathlib import Path
+
+from invokeai.app.services.invoker import Invoker
+from invokeai.app.services.shared.sqlite.sqlite_database import SqliteDatabase
+from invokeai.app.services.style_preset_records.style_preset_records_base import StylePresetRecordsStorageBase
+from invokeai.app.services.style_preset_records.style_preset_records_common import (
+    PresetType,
+    StylePresetChanges,
+    StylePresetNotFoundError,
+    StylePresetRecordDTO,
+    StylePresetWithoutId,
+)
+from invokeai.app.util.misc import uuid_string
+
+
+class SqliteStylePresetRecordsStorage(StylePresetRecordsStorageBase):
+    def __init__(self, db: SqliteDatabase) -> None:
+        super().__init__()
+        self._lock = db.lock
+        self._conn = db.conn
+        self._cursor = self._conn.cursor()
+
+    def start(self, invoker: Invoker) -> None:
+        self._invoker = invoker
+        self._sync_default_style_presets()
+
+    def get(self, style_preset_id: str) -> StylePresetRecordDTO:
+        """Gets a style preset by ID."""
+        try:
+            self._lock.acquire()
+            self._cursor.execute(
+                """--sql
+                SELECT *
+                FROM style_presets
+                WHERE id = ?;
+                """,
+                (style_preset_id,),
+            )
+            row = self._cursor.fetchone()
+            if row is None:
+                raise StylePresetNotFoundError(f"Style preset with id {style_preset_id} not found")
+            return StylePresetRecordDTO.from_dict(dict(row))
+        except Exception:
+            self._conn.rollback()
+            raise
+        finally:
+            self._lock.release()
+
+    def create(self, style_preset: StylePresetWithoutId) -> StylePresetRecordDTO:
+        style_preset_id = uuid_string()
+        try:
+            self._lock.acquire()
+            self._cursor.execute(
+                """--sql
+                INSERT OR IGNORE INTO style_presets (
+                    id,
+                    name,
+                    preset_data,
+                    type
+                )
+                VALUES (?, ?, ?, ?);
+                """,
+                (
+                    style_preset_id,
+                    style_preset.name,
+                    style_preset.preset_data.model_dump_json(),
+                    style_preset.type,
+                ),
+            )
+            self._conn.commit()
+        except Exception:
+            self._conn.rollback()
+            raise
+        finally:
+            self._lock.release()
+        return self.get(style_preset_id)
+
+    def create_many(self, style_presets: list[StylePresetWithoutId]) -> None:
+        style_preset_ids = []
+        try:
+            self._lock.acquire()
+            for style_preset in style_presets:
+                style_preset_id = uuid_string()
+                style_preset_ids.append(style_preset_id)
+                self._cursor.execute(
+                    """--sql
+                    INSERT OR IGNORE INTO style_presets (
+                        id,
+                        name,
+                        preset_data,
+                        type
+                    )
+                    VALUES (?, ?, ?, ?);
+                    """,
+                    (
+                        style_preset_id,
+                        style_preset.name,
+                        style_preset.preset_data.model_dump_json(),
+                        style_preset.type,
+                    ),
+                )
+            self._conn.commit()
+        except Exception:
+            self._conn.rollback()
+            raise
+        finally:
+            self._lock.release()
+
+        return None
+
+    def update(self, style_preset_id: str, changes: StylePresetChanges) -> StylePresetRecordDTO:
+        try:
+            self._lock.acquire()
+            # Change the name of a style preset
+            if changes.name is not None:
+                self._cursor.execute(
+                    """--sql
+                    UPDATE style_presets
+                    SET name = ?
+                    WHERE id = ?;
+                    """,
+                    (changes.name, style_preset_id),
+                )
+
+            # Change the preset data for a style preset
+            if changes.preset_data is not None:
+                self._cursor.execute(
+                    """--sql
+                    UPDATE style_presets
+                    SET preset_data = ?
+                    WHERE id = ?;
+                    """,
+                    (changes.preset_data.model_dump_json(), style_preset_id),
+                )
+
+            self._conn.commit()
+        except Exception:
+            self._conn.rollback()
+            raise
+        finally:
+            self._lock.release()
+        return self.get(style_preset_id)
+
+    def delete(self, style_preset_id: str) -> None:
+        try:
+            self._lock.acquire()
+            self._cursor.execute(
+                """--sql
+                DELETE from style_presets
+                WHERE id = ?;
+                """,
+                (style_preset_id,),
+            )
+            self._conn.commit()
+        except Exception:
+            self._conn.rollback()
+            raise
+        finally:
+            self._lock.release()
+        return None
+
+    def get_many(self, type: PresetType | None = None) -> list[StylePresetRecordDTO]:
+        try:
+            self._lock.acquire()
+            main_query = """
+                SELECT
+                    *
+                FROM style_presets
+                """
+
+            if type is not None:
+                main_query += "WHERE type = ? "
+
+            main_query += "ORDER BY LOWER(name) ASC"
+
+            if type is not None:
+                self._cursor.execute(main_query, (type,))
+            else:
+                self._cursor.execute(main_query)
+
+            rows = self._cursor.fetchall()
+            style_presets = [StylePresetRecordDTO.from_dict(dict(row)) for row in rows]
+
+            return style_presets
+        except Exception:
+            self._conn.rollback()
+            raise
+        finally:
+            self._lock.release()
+
+    def _sync_default_style_presets(self) -> None:
+        """Syncs default style presets to the database. Internal use only."""
+
+        # First delete all existing default style presets
+        try:
+            self._lock.acquire()
+            self._cursor.execute(
+                """--sql
+                DELETE FROM style_presets
+                WHERE type = "default";
+                """
+            )
+            self._conn.commit()
+        except Exception:
+            self._conn.rollback()
+            raise
+        finally:
+            self._lock.release()
+        # Next, parse and create the default style presets
+        with self._lock, open(Path(__file__).parent / Path("default_style_presets.json"), "r") as file:
+            presets = json.load(file)
+            for preset in presets:
+                style_preset = StylePresetWithoutId.model_validate(preset)
+                self.create(style_preset)
--- a/invokeai/app/services/urls/urls_base.py
+++ b/invokeai/app/services/urls/urls_base.py
@@ -13,3 +13,8 @@ class UrlServiceBase(ABC):
    def get_model_image_url(self, model_key: str) -> str:
        """Gets the URL for a model image"""
        pass
+
+    @abstractmethod
+    def get_style_preset_image_url(self, style_preset_id: str) -> str:
+        """Gets the URL for a style preset image"""
+        pass
--- a/invokeai/app/services/urls/urls_default.py
+++ b/invokeai/app/services/urls/urls_default.py
@@ -19,3 +19,6 @@ class LocalUrlService(UrlServiceBase):

    def get_model_image_url(self, model_key: str) -> str:
        return f"{self._base_url_v2}/models/i/{model_key}/image"
+
+    def get_style_preset_image_url(self, style_preset_id: str) -> str:
+        return f"{self._base_url}/style_presets/i/{style_preset_id}/image"
--- a/invokeai/app/services/workflow_records/default_workflows/FLUX
+++ b/invokeai/app/services/workflow_records/default_workflows/FLUX
@@ -0,0 +1,407 @@
+{
+  "name": "FLUX Image to Image",
+  "author": "InvokeAI",
+  "description": "A simple image-to-image workflow using a FLUX dev model. ",
+  "version": "1.0.4",
+  "contact": "",
+  "tags": "image2image, flux, image-to-image",
+  "notes": "Prerequisite model downloads: T5 Encoder, CLIP-L Encoder, and FLUX VAE. Quantized and un-quantized versions can be found in the starter models tab within your Model Manager. We recommend using FLUX dev models for image-to-image workflows. The image-to-image performance with FLUX schnell models is poor.",
+  "exposedFields": [
+    {
+      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "fieldName": "model"
+    },
+    {
+      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "fieldName": "t5_encoder_model"
+    },
+    {
+      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "fieldName": "clip_embed_model"
+    },
+    {
+      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "fieldName": "vae_model"
+    },
+    {
+      "nodeId": "ace0258f-67d7-4eee-a218-6fff27065214",
+      "fieldName": "denoising_start"
+    },
+    {
+      "nodeId": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
+      "fieldName": "prompt"
+    },
+    {
+      "nodeId": "ace0258f-67d7-4eee-a218-6fff27065214",
+      "fieldName": "num_steps"
+    }
+  ],
+  "meta": {
+    "version": "3.0.0",
+    "category": "default"
+  },
+  "nodes": [
+    {
+      "id": "2981a67c-480f-4237-9384-26b68dbf912b",
+      "type": "invocation",
+      "data": {
+        "id": "2981a67c-480f-4237-9384-26b68dbf912b",
+        "type": "flux_vae_encode",
+        "version": "1.0.0",
+        "label": "",
+        "notes": "",
+        "isOpen": true,
+        "isIntermediate": true,
+        "useCache": true,
+        "inputs": {
+          "image": {
+            "name": "image",
+            "label": "",
+            "value": {
+              "image_name": "8a5c62aa-9335-45d2-9c71-89af9fc1f8d4.png"
+            }
+          },
+          "vae": {
+            "name": "vae",
+            "label": ""
+          }
+        }
+      },
+      "position": {
+        "x": 732.7680166609682,
+        "y": -24.37398171806909
+      }
+    },
+    {
+      "id": "ace0258f-67d7-4eee-a218-6fff27065214",
+      "type": "invocation",
+      "data": {
+        "id": "ace0258f-67d7-4eee-a218-6fff27065214",
+        "type": "flux_denoise",
+        "version": "1.0.0",
+        "label": "",
+        "notes": "",
+        "isOpen": true,
+        "isIntermediate": true,
+        "useCache": true,
+        "inputs": {
+          "board": {
+            "name": "board",
+            "label": ""
+          },
+          "metadata": {
+            "name": "metadata",
+            "label": ""
+          },
+          "latents": {
+            "name": "latents",
+            "label": ""
+          },
+          "denoise_mask": {
+            "name": "denoise_mask",
+            "label": ""
+          },
+          "denoising_start": {
+            "name": "denoising_start",
+            "label": "",
+            "value": 0.04
+          },
+          "denoising_end": {
+            "name": "denoising_end",
+            "label": "",
+            "value": 1
+          },
+          "transformer": {
+            "name": "transformer",
+            "label": ""
+          },
+          "positive_text_conditioning": {
+            "name": "positive_text_conditioning",
+            "label": ""
+          },
+          "width": {
+            "name": "width",
+            "label": "",
+            "value": 1024
+          },
+          "height": {
+            "name": "height",
+            "label": "",
+            "value": 1024
+          },
+          "num_steps": {
+            "name": "num_steps",
+            "label": "Steps (Recommend 30 for Dev, 4 for Schnell)",
+            "value": 30
+          },
+          "guidance": {
+            "name": "guidance",
+            "label": "",
+            "value": 4
+          },
+          "seed": {
+            "name": "seed",
+            "label": "",
+            "value": 0
+          }
+        }
+      },
+      "position": {
+        "x": 1182.8836633018684,
+        "y": -251.38882958913183
+      }
+    },
+    {
+      "id": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
+      "type": "invocation",
+      "data": {
+        "id": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
+        "type": "flux_vae_decode",
+        "version": "1.0.0",
+        "label": "",
+        "notes": "",
+        "isOpen": true,
+        "isIntermediate": false,
+        "useCache": true,
+        "inputs": {
+          "board": {
+            "name": "board",
+            "label": ""
+          },
+          "metadata": {
+            "name": "metadata",
+            "label": ""
+          },
+          "latents": {
+            "name": "latents",
+            "label": ""
+          },
+          "vae": {
+            "name": "vae",
+            "label": ""
+          }
+        }
+      },
+      "position": {
+        "x": 1575.5797431839133,
+        "y": -209.00150975507415
+      }
+    },
+    {
+      "id": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "type": "invocation",
+      "data": {
+        "id": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+        "type": "flux_model_loader",
+        "version": "1.0.4",
+        "label": "",
+        "notes": "",
+        "isOpen": true,
+        "isIntermediate": true,
+        "useCache": false,
+        "inputs": {
+          "model": {
+            "name": "model",
+            "label": "Model (dev variant recommended for Image-to-Image)"
+          },
+          "t5_encoder_model": {
+            "name": "t5_encoder_model",
+            "label": ""
+          },
+          "clip_embed_model": {
+            "name": "clip_embed_model",
+            "label": "",
+            "value": {
+              "key": "fa23a584-b623-415d-832a-21b5098ff1a1",
+              "hash": "blake3:17c19f0ef941c3b7609a9c94a659ca5364de0be364a91d4179f0e39ba17c3b70",
+              "name": "clip-vit-large-patch14",
+              "base": "any",
+              "type": "clip_embed"
+            }
+          },
+          "vae_model": {
+            "name": "vae_model",
+            "label": "",
+            "value": {
+              "key": "74fc82ba-c0a8-479d-a890-2126f82da758",
+              "hash": "blake3:ce21cb76364aa6e2421311cf4a4b5eb052a76c4f1cd207b50703d8978198a068",
+              "name": "FLUX.1-schnell_ae",
+              "base": "flux",
+              "type": "vae"
+            }
+          }
+        }
+      },
+      "position": {
+        "x": 328.1809894659957,
+        "y": -90.2241133566946
+      }
+    },
+    {
+      "id": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
+      "type": "invocation",
+      "data": {
+        "id": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
+        "type": "flux_text_encoder",
+        "version": "1.0.0",
+        "label": "",
+        "notes": "",
+        "isOpen": true,
+        "isIntermediate": true,
+        "useCache": true,
+        "inputs": {
+          "clip": {
+            "name": "clip",
+            "label": ""
+          },
+          "t5_encoder": {
+            "name": "t5_encoder",
+            "label": ""
+          },
+          "t5_max_seq_len": {
+            "name": "t5_max_seq_len",
+            "label": "T5 Max Seq Len",
+            "value": 256
+          },
+          "prompt": {
+            "name": "prompt",
+            "label": "",
+            "value": "a cat wearing a birthday hat"
+          }
+        }
+      },
+      "position": {
+        "x": 745.8823365057267,
+        "y": -299.60249175851914
+      }
+    },
+    {
+      "id": "4754c534-a5f3-4ad0-9382-7887985e668c",
+      "type": "invocation",
+      "data": {
+        "id": "4754c534-a5f3-4ad0-9382-7887985e668c",
+        "type": "rand_int",
+        "version": "1.0.1",
+        "label": "",
+        "notes": "",
+        "isOpen": true,
+        "isIntermediate": true,
+        "useCache": false,
+        "inputs": {
+          "low": {
+            "name": "low",
+            "label": "",
+            "value": 0
+          },
+          "high": {
+            "name": "high",
+            "label": "",
+            "value": 2147483647
+          }
+        }
+      },
+      "position": {
+        "x": 725.834098928012,
+        "y": 496.2710031089931
+      }
+    }
+  ],
+  "edges": [
+    {
+      "id": "reactflow__edge-2981a67c-480f-4237-9384-26b68dbf912bheight-ace0258f-67d7-4eee-a218-6fff27065214height",
+      "type": "default",
+      "source": "2981a67c-480f-4237-9384-26b68dbf912b",
+      "target": "ace0258f-67d7-4eee-a218-6fff27065214",
+      "sourceHandle": "height",
+      "targetHandle": "height"
+    },
+    {
+      "id": "reactflow__edge-2981a67c-480f-4237-9384-26b68dbf912bwidth-ace0258f-67d7-4eee-a218-6fff27065214width",
+      "type": "default",
+      "source": "2981a67c-480f-4237-9384-26b68dbf912b",
+      "target": "ace0258f-67d7-4eee-a218-6fff27065214",
+      "sourceHandle": "width",
+      "targetHandle": "width"
+    },
+    {
+      "id": "reactflow__edge-2981a67c-480f-4237-9384-26b68dbf912blatents-ace0258f-67d7-4eee-a218-6fff27065214latents",
+      "type": "default",
+      "source": "2981a67c-480f-4237-9384-26b68dbf912b",
+      "target": "ace0258f-67d7-4eee-a218-6fff27065214",
+      "sourceHandle": "latents",
+      "targetHandle": "latents"
+    },
+    {
+      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90vae-2981a67c-480f-4237-9384-26b68dbf912bvae",
+      "type": "default",
+      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "target": "2981a67c-480f-4237-9384-26b68dbf912b",
+      "sourceHandle": "vae",
+      "targetHandle": "vae"
+    },
+    {
+      "id": "reactflow__edge-ace0258f-67d7-4eee-a218-6fff27065214latents-7e5172eb-48c1-44db-a770-8fd83e1435d1latents",
+      "type": "default",
+      "source": "ace0258f-67d7-4eee-a218-6fff27065214",
+      "target": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
+      "sourceHandle": "latents",
+      "targetHandle": "latents"
+    },
+    {
+      "id": "reactflow__edge-4754c534-a5f3-4ad0-9382-7887985e668cvalue-ace0258f-67d7-4eee-a218-6fff27065214seed",
+      "type": "default",
+      "source": "4754c534-a5f3-4ad0-9382-7887985e668c",
+      "target": "ace0258f-67d7-4eee-a218-6fff27065214",
+      "sourceHandle": "value",
+      "targetHandle": "seed"
+    },
+    {
+      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90transformer-ace0258f-67d7-4eee-a218-6fff27065214transformer",
+      "type": "default",
+      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "target": "ace0258f-67d7-4eee-a218-6fff27065214",
+      "sourceHandle": "transformer",
+      "targetHandle": "transformer"
+    },
+    {
+      "id": "reactflow__edge-01f674f8-b3d1-4df1-acac-6cb8e0bfb63cconditioning-ace0258f-67d7-4eee-a218-6fff27065214positive_text_conditioning",
+      "type": "default",
+      "source": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
+      "target": "ace0258f-67d7-4eee-a218-6fff27065214",
+      "sourceHandle": "conditioning",
+      "targetHandle": "positive_text_conditioning"
+    },
+    {
+      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90vae-7e5172eb-48c1-44db-a770-8fd83e1435d1vae",
+      "type": "default",
+      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "target": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
+      "sourceHandle": "vae",
+      "targetHandle": "vae"
+    },
+    {
+      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90max_seq_len-01f674f8-b3d1-4df1-acac-6cb8e0bfb63ct5_max_seq_len",
+      "type": "default",
+      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "target": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
+      "sourceHandle": "max_seq_len",
+      "targetHandle": "t5_max_seq_len"
+    },
+    {
+      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90t5_encoder-01f674f8-b3d1-4df1-acac-6cb8e0bfb63ct5_encoder",
+      "type": "default",
+      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "target": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
+      "sourceHandle": "t5_encoder",
+      "targetHandle": "t5_encoder"
+    },
+    {
+      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90clip-01f674f8-b3d1-4df1-acac-6cb8e0bfb63cclip",
+      "type": "default",
+      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "target": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
+      "sourceHandle": "clip",
+      "targetHandle": "clip"
+    }
+  ]
+}
--- a/invokeai/app/services/workflow_records/default_workflows/Flux
+++ b/invokeai/app/services/workflow_records/default_workflows/Flux
@@ -0,0 +1,326 @@
+{
+  "name": "FLUX Text to Image",
+  "author": "InvokeAI",
+  "description": "A simple text-to-image workflow using FLUX dev or schnell models.",
+  "version": "1.0.4",
+  "contact": "",
+  "tags": "text2image, flux",
+  "notes": "Prerequisite model downloads: T5 Encoder, CLIP-L Encoder, and FLUX VAE. Quantized and un-quantized versions can be found in the starter models tab within your Model Manager. We recommend 4 steps for FLUX schnell models and 30 steps for FLUX dev models.",
+  "exposedFields": [
+    {
+      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "fieldName": "model"
+    },
+    {
+      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "fieldName": "t5_encoder_model"
+    },
+    {
+      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "fieldName": "clip_embed_model"
+    },
+    {
+      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "fieldName": "vae_model"
+    },
+    {
+      "nodeId": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
+      "fieldName": "prompt"
+    },
+    {
+      "nodeId": "4fe24f07-f906-4f55-ab2c-9beee56ef5bd",
+      "fieldName": "num_steps"
+    }
+  ],
+  "meta": {
+    "version": "3.0.0",
+    "category": "default"
+  },
+  "nodes": [
+    {
+      "id": "4fe24f07-f906-4f55-ab2c-9beee56ef5bd",
+      "type": "invocation",
+      "data": {
+        "id": "4fe24f07-f906-4f55-ab2c-9beee56ef5bd",
+        "type": "flux_denoise",
+        "version": "1.0.0",
+        "label": "",
+        "notes": "",
+        "isOpen": true,
+        "isIntermediate": true,
+        "useCache": true,
+        "inputs": {
+          "board": {
+            "name": "board",
+            "label": ""
+          },
+          "metadata": {
+            "name": "metadata",
+            "label": ""
+          },
+          "latents": {
+            "name": "latents",
+            "label": ""
+          },
+          "denoise_mask": {
+            "name": "denoise_mask",
+            "label": ""
+          },
+          "denoising_start": {
+            "name": "denoising_start",
+            "label": "",
+            "value": 0
+          },
+          "denoising_end": {
+            "name": "denoising_end",
+            "label": "",
+            "value": 1
+          },
+          "transformer": {
+            "name": "transformer",
+            "label": ""
+          },
+          "positive_text_conditioning": {
+            "name": "positive_text_conditioning",
+            "label": ""
+          },
+          "width": {
+            "name": "width",
+            "label": "",
+            "value": 1024
+          },
+          "height": {
+            "name": "height",
+            "label": "",
+            "value": 1024
+          },
+          "num_steps": {
+            "name": "num_steps",
+            "label": "Steps (Recommend 30 for Dev, 4 for Schnell)",
+            "value": 30
+          },
+          "guidance": {
+            "name": "guidance",
+            "label": "",
+            "value": 4
+          },
+          "seed": {
+            "name": "seed",
+            "label": "",
+            "value": 0
+          }
+        }
+      },
+      "position": {
+        "x": 1186.1868226120378,
+        "y": -214.9459927686657
+      }
+    },
+    {
+      "id": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
+      "type": "invocation",
+      "data": {
+        "id": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
+        "type": "flux_vae_decode",
+        "version": "1.0.0",
+        "label": "",
+        "notes": "",
+        "isOpen": true,
+        "isIntermediate": false,
+        "useCache": true,
+        "inputs": {
+          "board": {
+            "name": "board",
+            "label": ""
+          },
+          "metadata": {
+            "name": "metadata",
+            "label": ""
+          },
+          "latents": {
+            "name": "latents",
+            "label": ""
+          },
+          "vae": {
+            "name": "vae",
+            "label": ""
+          }
+        }
+      },
+      "position": {
+        "x": 1575.5797431839133,
+        "y": -209.00150975507415
+      }
+    },
+    {
+      "id": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "type": "invocation",
+      "data": {
+        "id": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+        "type": "flux_model_loader",
+        "version": "1.0.4",
+        "label": "",
+        "notes": "",
+        "isOpen": true,
+        "isIntermediate": true,
+        "useCache": false,
+        "inputs": {
+          "model": {
+            "name": "model",
+            "label": ""
+          },
+          "t5_encoder_model": {
+            "name": "t5_encoder_model",
+            "label": ""
+          },
+          "clip_embed_model": {
+            "name": "clip_embed_model",
+            "label": ""
+          },
+          "vae_model": {
+            "name": "vae_model",
+            "label": ""
+          }
+        }
+      },
+      "position": {
+        "x": 381.1882713063478,
+        "y": -95.89663532854017
+      }
+    },
+    {
+      "id": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
+      "type": "invocation",
+      "data": {
+        "id": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
+        "type": "flux_text_encoder",
+        "version": "1.0.0",
+        "label": "",
+        "notes": "",
+        "isOpen": true,
+        "isIntermediate": true,
+        "useCache": true,
+        "inputs": {
+          "clip": {
+            "name": "clip",
+            "label": ""
+          },
+          "t5_encoder": {
+            "name": "t5_encoder",
+            "label": ""
+          },
+          "t5_max_seq_len": {
+            "name": "t5_max_seq_len",
+            "label": "T5 Max Seq Len",
+            "value": 256
+          },
+          "prompt": {
+            "name": "prompt",
+            "label": "",
+            "value": "a cat"
+          }
+        }
+      },
+      "position": {
+        "x": 778.4899149328337,
+        "y": -100.36469216659502
+      }
+    },
+    {
+      "id": "4754c534-a5f3-4ad0-9382-7887985e668c",
+      "type": "invocation",
+      "data": {
+        "id": "4754c534-a5f3-4ad0-9382-7887985e668c",
+        "type": "rand_int",
+        "version": "1.0.1",
+        "label": "",
+        "notes": "",
+        "isOpen": true,
+        "isIntermediate": true,
+        "useCache": false,
+        "inputs": {
+          "low": {
+            "name": "low",
+            "label": "",
+            "value": 0
+          },
+          "high": {
+            "name": "high",
+            "label": "",
+            "value": 2147483647
+          }
+        }
+      },
+      "position": {
+        "x": 800.9667463219505,
+        "y": 285.8297267547506
+      }
+    }
+  ],
+  "edges": [
+    {
+      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90transformer-4fe24f07-f906-4f55-ab2c-9beee56ef5bdtransformer",
+      "type": "default",
+      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "target": "4fe24f07-f906-4f55-ab2c-9beee56ef5bd",
+      "sourceHandle": "transformer",
+      "targetHandle": "transformer"
+    },
+    {
+      "id": "reactflow__edge-01f674f8-b3d1-4df1-acac-6cb8e0bfb63cconditioning-4fe24f07-f906-4f55-ab2c-9beee56ef5bdpositive_text_conditioning",
+      "type": "default",
+      "source": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
+      "target": "4fe24f07-f906-4f55-ab2c-9beee56ef5bd",
+      "sourceHandle": "conditioning",
+      "targetHandle": "positive_text_conditioning"
+    },
+    {
+      "id": "reactflow__edge-4754c534-a5f3-4ad0-9382-7887985e668cvalue-4fe24f07-f906-4f55-ab2c-9beee56ef5bdseed",
+      "type": "default",
+      "source": "4754c534-a5f3-4ad0-9382-7887985e668c",
+      "target": "4fe24f07-f906-4f55-ab2c-9beee56ef5bd",
+      "sourceHandle": "value",
+      "targetHandle": "seed"
+    },
+    {
+      "id": "reactflow__edge-4fe24f07-f906-4f55-ab2c-9beee56ef5bdlatents-7e5172eb-48c1-44db-a770-8fd83e1435d1latents",
+      "type": "default",
+      "source": "4fe24f07-f906-4f55-ab2c-9beee56ef5bd",
+      "target": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
+      "sourceHandle": "latents",
+      "targetHandle": "latents"
+    },
+    {
+      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90vae-7e5172eb-48c1-44db-a770-8fd83e1435d1vae",
+      "type": "default",
+      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "target": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
+      "sourceHandle": "vae",
+      "targetHandle": "vae"
+    },
+    {
+      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90max_seq_len-01f674f8-b3d1-4df1-acac-6cb8e0bfb63ct5_max_seq_len",
+      "type": "default",
+      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "target": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
+      "sourceHandle": "max_seq_len",
+      "targetHandle": "t5_max_seq_len"
+    },
+    {
+      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90t5_encoder-01f674f8-b3d1-4df1-acac-6cb8e0bfb63ct5_encoder",
+      "type": "default",
+      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "target": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
+      "sourceHandle": "t5_encoder",
+      "targetHandle": "t5_encoder"
+    },
+    {
+      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90clip-01f674f8-b3d1-4df1-acac-6cb8e0bfb63cclip",
+      "type": "default",
+      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
+      "target": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
+      "sourceHandle": "clip",
+      "targetHandle": "clip"
+    }
+  ]
+}
--- a/invokeai/app/services/workflow_records/default_workflows/MultiDiffusion
+++ b/invokeai/app/services/workflow_records/default_workflows/MultiDiffusion
--- a/invokeai/app/services/workflow_records/default_workflows/MultiDiffusion
+++ b/invokeai/app/services/workflow_records/default_workflows/MultiDiffusion
--- a/invokeai/app/util/custom_openapi.py
+++ b/invokeai/app/util/custom_openapi.py
@@ -81,7 +81,7 @@ def get_openapi_func(
        # Add the output map to the schema
        openapi_schema["components"]["schemas"]["InvocationOutputMap"] = {
            "type": "object",
-            "properties": invocation_output_map_properties,
+            "properties": dict(sorted(invocation_output_map_properties.items())),
            "required": invocation_output_map_required,
        }

--- a/invokeai/app/util/step_callback.py
+++ b/invokeai/app/util/step_callback.py
@@ -38,6 +38,25 @@ SD1_5_LATENT_RGB_FACTORS = [
    [-0.1307, -0.1874, -0.7445],  # L4
 ]

+FLUX_LATENT_RGB_FACTORS = [
+    [-0.0412, 0.0149, 0.0521],
+    [0.0056, 0.0291, 0.0768],
+    [0.0342, -0.0681, -0.0427],
+    [-0.0258, 0.0092, 0.0463],
+    [0.0863, 0.0784, 0.0547],
+    [-0.0017, 0.0402, 0.0158],
+    [0.0501, 0.1058, 0.1152],
+    [-0.0209, -0.0218, -0.0329],
+    [-0.0314, 0.0083, 0.0896],
+    [0.0851, 0.0665, -0.0472],
+    [-0.0534, 0.0238, -0.0024],
+    [0.0452, -0.0026, 0.0048],
+    [0.0892, 0.0831, 0.0881],
+    [-0.1117, -0.0304, -0.0789],
+    [0.0027, -0.0479, -0.0043],
+    [-0.1146, -0.0827, -0.0598],
+]
+

 def sample_to_lowres_estimated_image(
    samples: torch.Tensor, latent_rgb_factors: torch.Tensor, smooth_matrix: Optional[torch.Tensor] = None
@@ -94,3 +113,32 @@ def stable_diffusion_step_callback(
        intermediate_state,
        ProgressImage(dataURL=dataURL, width=width, height=height),
    )
+
+
+def flux_step_callback(
+    context_data: "InvocationContextData",
+    intermediate_state: PipelineIntermediateState,
+    events: "EventServiceBase",
+    is_canceled: Callable[[], bool],
+) -> None:
+    if is_canceled():
+        raise CanceledException
+    sample = intermediate_state.latents
+    latent_rgb_factors = torch.tensor(FLUX_LATENT_RGB_FACTORS, dtype=sample.dtype, device=sample.device)
+    latent_image_perm = sample.permute(1, 2, 0).to(dtype=sample.dtype, device=sample.device)
+    latent_image = latent_image_perm @ latent_rgb_factors
+    latents_ubyte = (
+        ((latent_image + 1) / 2).clamp(0, 1).mul(0xFF)  # change scale from -1..1 to 0..1  # to 0..255
+    ).to(device="cpu", dtype=torch.uint8)
+    image = Image.fromarray(latents_ubyte.cpu().numpy())
+    (width, height) = image.size
+    width *= 8
+    height *= 8
+    dataURL = image_to_dataURL(image, image_format="JPEG")
+
+    events.emit_invocation_denoise_progress(
+        context_data.queue_item,
+        context_data.invocation,
+        intermediate_state,
+        ProgressImage(dataURL=dataURL, width=width, height=height),
+    )
--- a/invokeai/backend/flux/denoise.py
+++ b/invokeai/backend/flux/denoise.py
@@ -0,0 +1,56 @@
+from typing import Callable
+
+import torch
+from tqdm import tqdm
+
+from invokeai.backend.flux.inpaint_extension import InpaintExtension
+from invokeai.backend.flux.model import Flux
+from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState
+
+
+def denoise(
+    model: Flux,
+    # model input
+    img: torch.Tensor,
+    img_ids: torch.Tensor,
+    txt: torch.Tensor,
+    txt_ids: torch.Tensor,
+    vec: torch.Tensor,
+    # sampling parameters
+    timesteps: list[float],
+    step_callback: Callable[[PipelineIntermediateState], None],
+    guidance: float,
+    inpaint_extension: InpaintExtension | None,
+):
+    step = 0
+    # guidance_vec is ignored for schnell.
+    guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype)
+    for t_curr, t_prev in tqdm(list(zip(timesteps[:-1], timesteps[1:], strict=True))):
+        t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
+        pred = model(
+            img=img,
+            img_ids=img_ids,
+            txt=txt,
+            txt_ids=txt_ids,
+            y=vec,
+            timesteps=t_vec,
+            guidance=guidance_vec,
+        )
+        preview_img = img - t_curr * pred
+        img = img + (t_prev - t_curr) * pred
+
+        if inpaint_extension is not None:
+            img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
+
+        step_callback(
+            PipelineIntermediateState(
+                step=step,
+                order=1,
+                total_steps=len(timesteps),
+                timestep=int(t_curr),
+                latents=preview_img,
+            ),
+        )
+        step += 1
+
+    return img
--- a/invokeai/backend/flux/inpaint_extension.py
+++ b/invokeai/backend/flux/inpaint_extension.py
@@ -0,0 +1,35 @@
+import torch
+
+
+class InpaintExtension:
+    """A class for managing inpainting with FLUX."""
+
+    def __init__(self, init_latents: torch.Tensor, inpaint_mask: torch.Tensor, noise: torch.Tensor):
+        """Initialize InpaintExtension.
+
+        Args:
+            init_latents (torch.Tensor): The initial latents (i.e. un-noised at timestep 0). In 'packed' format.
+            inpaint_mask (torch.Tensor): A mask specifying which elements to inpaint. Range [0, 1]. Values of 1 will be
+                re-generated. Values of 0 will remain unchanged. Values between 0 and 1 can be used to blend the
+                inpainted region with the background. In 'packed' format.
+            noise (torch.Tensor): The noise tensor used to noise the init_latents. In 'packed' format.
+        """
+        assert init_latents.shape == inpaint_mask.shape == noise.shape
+        self._init_latents = init_latents
+        self._inpaint_mask = inpaint_mask
+        self._noise = noise
+
+    def merge_intermediate_latents_with_init_latents(
+        self, intermediate_latents: torch.Tensor, timestep: float
+    ) -> torch.Tensor:
+        """Merge the intermediate latents with the initial latents for the current timestep using the inpaint mask. I.e.
+        update the intermediate latents to keep the regions that are not being inpainted on the correct noise
+        trajectory.
+
+        This function should be called after each denoising step.
+        """
+        # Noise the init latents for the current timestep.
+        noised_init_latents = self._noise * timestep + (1.0 - timestep) * self._init_latents
+
+        # Merge the intermediate latents with the noised_init_latents using the inpaint_mask.
+        return intermediate_latents * self._inpaint_mask + noised_init_latents * (1.0 - self._inpaint_mask)
--- a/invokeai/backend/flux/math.py
+++ b/invokeai/backend/flux/math.py
@@ -0,0 +1,32 @@
+# Initially pulled from https://github.com/black-forest-labs/flux
+
+import torch
+from einops import rearrange
+from torch import Tensor
+
+
+def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor) -> Tensor:
+    q, k = apply_rope(q, k, pe)
+
+    x = torch.nn.functional.scaled_dot_product_attention(q, k, v)
+    x = rearrange(x, "B H L D -> B L (H D)")
+
+    return x
+
+
+def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
+    assert dim % 2 == 0
+    scale = torch.arange(0, dim, 2, dtype=torch.float64, device=pos.device) / dim
+    omega = 1.0 / (theta**scale)
+    out = torch.einsum("...n,d->...nd", pos, omega)
+    out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1)
+    out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
+    return out.float()
+
+
+def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tensor]:
+    xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2)
+    xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2)
+    xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1]
+    xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1]
+    return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk)
--- a/invokeai/backend/flux/model.py
+++ b/invokeai/backend/flux/model.py
@@ -0,0 +1,117 @@
+# Initially pulled from https://github.com/black-forest-labs/flux
+
+from dataclasses import dataclass
+
+import torch
+from torch import Tensor, nn
+
+from invokeai.backend.flux.modules.layers import (
+    DoubleStreamBlock,
+    EmbedND,
+    LastLayer,
+    MLPEmbedder,
+    SingleStreamBlock,
+    timestep_embedding,
+)
+
+
+@dataclass
+class FluxParams:
+    in_channels: int
+    vec_in_dim: int
+    context_in_dim: int
+    hidden_size: int
+    mlp_ratio: float
+    num_heads: int
+    depth: int
+    depth_single_blocks: int
+    axes_dim: list[int]
+    theta: int
+    qkv_bias: bool
+    guidance_embed: bool
+
+
+class Flux(nn.Module):
+    """
+    Transformer model for flow matching on sequences.
+    """
+
+    def __init__(self, params: FluxParams):
+        super().__init__()
+
+        self.params = params
+        self.in_channels = params.in_channels
+        self.out_channels = self.in_channels
+        if params.hidden_size % params.num_heads != 0:
+            raise ValueError(f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}")
+        pe_dim = params.hidden_size // params.num_heads
+        if sum(params.axes_dim) != pe_dim:
+            raise ValueError(f"Got {params.axes_dim} but expected positional dim {pe_dim}")
+        self.hidden_size = params.hidden_size
+        self.num_heads = params.num_heads
+        self.pe_embedder = EmbedND(dim=pe_dim, theta=params.theta, axes_dim=params.axes_dim)
+        self.img_in = nn.Linear(self.in_channels, self.hidden_size, bias=True)
+        self.time_in = MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size)
+        self.vector_in = MLPEmbedder(params.vec_in_dim, self.hidden_size)
+        self.guidance_in = (
+            MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size) if params.guidance_embed else nn.Identity()
+        )
+        self.txt_in = nn.Linear(params.context_in_dim, self.hidden_size)
+
+        self.double_blocks = nn.ModuleList(
+            [
+                DoubleStreamBlock(
+                    self.hidden_size,
+                    self.num_heads,
+                    mlp_ratio=params.mlp_ratio,
+                    qkv_bias=params.qkv_bias,
+                )
+                for _ in range(params.depth)
+            ]
+        )
+
+        self.single_blocks = nn.ModuleList(
+            [
+                SingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio)
+                for _ in range(params.depth_single_blocks)
+            ]
+        )
+
+        self.final_layer = LastLayer(self.hidden_size, 1, self.out_channels)
+
+    def forward(
+        self,
+        img: Tensor,
+        img_ids: Tensor,
+        txt: Tensor,
+        txt_ids: Tensor,
+        timesteps: Tensor,
+        y: Tensor,
+        guidance: Tensor | None = None,
+    ) -> Tensor:
+        if img.ndim != 3 or txt.ndim != 3:
+            raise ValueError("Input img and txt tensors must have 3 dimensions.")
+
+        # running on sequences img
+        img = self.img_in(img)
+        vec = self.time_in(timestep_embedding(timesteps, 256))
+        if self.params.guidance_embed:
+            if guidance is None:
+                raise ValueError("Didn't get guidance strength for guidance distilled model.")
+            vec = vec + self.guidance_in(timestep_embedding(guidance, 256))
+        vec = vec + self.vector_in(y)
+        txt = self.txt_in(txt)
+
+        ids = torch.cat((txt_ids, img_ids), dim=1)
+        pe = self.pe_embedder(ids)
+
+        for block in self.double_blocks:
+            img, txt = block(img=img, txt=txt, vec=vec, pe=pe)
+
+        img = torch.cat((txt, img), 1)
+        for block in self.single_blocks:
+            img = block(img, vec=vec, pe=pe)
+        img = img[:, txt.shape[1] :, ...]
+
+        img = self.final_layer(img, vec)  # (N, T, patch_size ** 2 * out_channels)
+        return img
--- a/invokeai/backend/flux/modules/autoencoder.py
+++ b/invokeai/backend/flux/modules/autoencoder.py
@@ -0,0 +1,324 @@
+# Initially pulled from https://github.com/black-forest-labs/flux
+
+from dataclasses import dataclass
+
+import torch
+from einops import rearrange
+from torch import Tensor, nn
+
+
+@dataclass
+class AutoEncoderParams:
+    resolution: int
+    in_channels: int
+    ch: int
+    out_ch: int
+    ch_mult: list[int]
+    num_res_blocks: int
+    z_channels: int
+    scale_factor: float
+    shift_factor: float
+
+
+class AttnBlock(nn.Module):
+    def __init__(self, in_channels: int):
+        super().__init__()
+        self.in_channels = in_channels
+
+        self.norm = nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True)
+
+        self.q = nn.Conv2d(in_channels, in_channels, kernel_size=1)
+        self.k = nn.Conv2d(in_channels, in_channels, kernel_size=1)
+        self.v = nn.Conv2d(in_channels, in_channels, kernel_size=1)
+        self.proj_out = nn.Conv2d(in_channels, in_channels, kernel_size=1)
+
+    def attention(self, h_: Tensor) -> Tensor:
+        h_ = self.norm(h_)
+        q = self.q(h_)
+        k = self.k(h_)
+        v = self.v(h_)
+
+        b, c, h, w = q.shape
+        q = rearrange(q, "b c h w -> b 1 (h w) c").contiguous()
+        k = rearrange(k, "b c h w -> b 1 (h w) c").contiguous()
+        v = rearrange(v, "b c h w -> b 1 (h w) c").contiguous()
+        h_ = nn.functional.scaled_dot_product_attention(q, k, v)
+
+        return rearrange(h_, "b 1 (h w) c -> b c h w", h=h, w=w, c=c, b=b)
+
+    def forward(self, x: Tensor) -> Tensor:
+        return x + self.proj_out(self.attention(x))
+
+
+class ResnetBlock(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int):
+        super().__init__()
+        self.in_channels = in_channels
+        out_channels = in_channels if out_channels is None else out_channels
+        self.out_channels = out_channels
+
+        self.norm1 = nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True)
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        self.norm2 = nn.GroupNorm(num_groups=32, num_channels=out_channels, eps=1e-6, affine=True)
+        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        if self.in_channels != self.out_channels:
+            self.nin_shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
+
+    def forward(self, x):
+        h = x
+        h = self.norm1(h)
+        h = torch.nn.functional.silu(h)
+        h = self.conv1(h)
+
+        h = self.norm2(h)
+        h = torch.nn.functional.silu(h)
+        h = self.conv2(h)
+
+        if self.in_channels != self.out_channels:
+            x = self.nin_shortcut(x)
+
+        return x + h
+
+
+class Downsample(nn.Module):
+    def __init__(self, in_channels: int):
+        super().__init__()
+        # no asymmetric padding in torch conv, must do it ourselves
+        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0)
+
+    def forward(self, x: Tensor):
+        pad = (0, 1, 0, 1)
+        x = nn.functional.pad(x, pad, mode="constant", value=0)
+        x = self.conv(x)
+        return x
+
+
+class Upsample(nn.Module):
+    def __init__(self, in_channels: int):
+        super().__init__()
+        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
+
+    def forward(self, x: Tensor):
+        x = nn.functional.interpolate(x, scale_factor=2.0, mode="nearest")
+        x = self.conv(x)
+        return x
+
+
+class Encoder(nn.Module):
+    def __init__(
+        self,
+        resolution: int,
+        in_channels: int,
+        ch: int,
+        ch_mult: list[int],
+        num_res_blocks: int,
+        z_channels: int,
+    ):
+        super().__init__()
+        self.ch = ch
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.in_channels = in_channels
+        # downsampling
+        self.conv_in = nn.Conv2d(in_channels, self.ch, kernel_size=3, stride=1, padding=1)
+
+        curr_res = resolution
+        in_ch_mult = (1,) + tuple(ch_mult)
+        self.in_ch_mult = in_ch_mult
+        self.down = nn.ModuleList()
+        block_in = self.ch
+        for i_level in range(self.num_resolutions):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_in = ch * in_ch_mult[i_level]
+            block_out = ch * ch_mult[i_level]
+            for _ in range(self.num_res_blocks):
+                block.append(ResnetBlock(in_channels=block_in, out_channels=block_out))
+                block_in = block_out
+            down = nn.Module()
+            down.block = block
+            down.attn = attn
+            if i_level != self.num_resolutions - 1:
+                down.downsample = Downsample(block_in)
+                curr_res = curr_res // 2
+            self.down.append(down)
+
+        # middle
+        self.mid = nn.Module()
+        self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in)
+        self.mid.attn_1 = AttnBlock(block_in)
+        self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in)
+
+        # end
+        self.norm_out = nn.GroupNorm(num_groups=32, num_channels=block_in, eps=1e-6, affine=True)
+        self.conv_out = nn.Conv2d(block_in, 2 * z_channels, kernel_size=3, stride=1, padding=1)
+
+    def forward(self, x: Tensor) -> Tensor:
+        # downsampling
+        hs = [self.conv_in(x)]
+        for i_level in range(self.num_resolutions):
+            for i_block in range(self.num_res_blocks):
+                h = self.down[i_level].block[i_block](hs[-1])
+                if len(self.down[i_level].attn) > 0:
+                    h = self.down[i_level].attn[i_block](h)
+                hs.append(h)
+            if i_level != self.num_resolutions - 1:
+                hs.append(self.down[i_level].downsample(hs[-1]))
+
+        # middle
+        h = hs[-1]
+        h = self.mid.block_1(h)
+        h = self.mid.attn_1(h)
+        h = self.mid.block_2(h)
+        # end
+        h = self.norm_out(h)
+        h = torch.nn.functional.silu(h)
+        h = self.conv_out(h)
+        return h
+
+
+class Decoder(nn.Module):
+    def __init__(
+        self,
+        ch: int,
+        out_ch: int,
+        ch_mult: list[int],
+        num_res_blocks: int,
+        in_channels: int,
+        resolution: int,
+        z_channels: int,
+    ):
+        super().__init__()
+        self.ch = ch
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.in_channels = in_channels
+        self.ffactor = 2 ** (self.num_resolutions - 1)
+
+        # compute in_ch_mult, block_in and curr_res at lowest res
+        block_in = ch * ch_mult[self.num_resolutions - 1]
+        curr_res = resolution // 2 ** (self.num_resolutions - 1)
+        self.z_shape = (1, z_channels, curr_res, curr_res)
+
+        # z to block_in
+        self.conv_in = nn.Conv2d(z_channels, block_in, kernel_size=3, stride=1, padding=1)
+
+        # middle
+        self.mid = nn.Module()
+        self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in)
+        self.mid.attn_1 = AttnBlock(block_in)
+        self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in)
+
+        # upsampling
+        self.up = nn.ModuleList()
+        for i_level in reversed(range(self.num_resolutions)):
+            block = nn.ModuleList()
+            attn = nn.ModuleList()
+            block_out = ch * ch_mult[i_level]
+            for _ in range(self.num_res_blocks + 1):
+                block.append(ResnetBlock(in_channels=block_in, out_channels=block_out))
+                block_in = block_out
+            up = nn.Module()
+            up.block = block
+            up.attn = attn
+            if i_level != 0:
+                up.upsample = Upsample(block_in)
+                curr_res = curr_res * 2
+            self.up.insert(0, up)  # prepend to get consistent order
+
+        # end
+        self.norm_out = nn.GroupNorm(num_groups=32, num_channels=block_in, eps=1e-6, affine=True)
+        self.conv_out = nn.Conv2d(block_in, out_ch, kernel_size=3, stride=1, padding=1)
+
+    def forward(self, z: Tensor) -> Tensor:
+        # z to block_in
+        h = self.conv_in(z)
+
+        # middle
+        h = self.mid.block_1(h)
+        h = self.mid.attn_1(h)
+        h = self.mid.block_2(h)
+
+        # upsampling
+        for i_level in reversed(range(self.num_resolutions)):
+            for i_block in range(self.num_res_blocks + 1):
+                h = self.up[i_level].block[i_block](h)
+                if len(self.up[i_level].attn) > 0:
+                    h = self.up[i_level].attn[i_block](h)
+            if i_level != 0:
+                h = self.up[i_level].upsample(h)
+
+        # end
+        h = self.norm_out(h)
+        h = torch.nn.functional.silu(h)
+        h = self.conv_out(h)
+        return h
+
+
+class DiagonalGaussian(nn.Module):
+    def __init__(self, chunk_dim: int = 1):
+        super().__init__()
+        self.chunk_dim = chunk_dim
+
+    def forward(self, z: Tensor, sample: bool = True, generator: torch.Generator | None = None) -> Tensor:
+        mean, logvar = torch.chunk(z, 2, dim=self.chunk_dim)
+        if sample:
+            std = torch.exp(0.5 * logvar)
+            # Unfortunately, torch.randn_like(...) does not accept a generator argument at the time of writing, so we
+            # have to use torch.randn(...) instead.
+            return mean + std * torch.randn(size=mean.size(), generator=generator, dtype=mean.dtype, device=mean.device)
+        else:
+            return mean
+
+
+class AutoEncoder(nn.Module):
+    def __init__(self, params: AutoEncoderParams):
+        super().__init__()
+        self.encoder = Encoder(
+            resolution=params.resolution,
+            in_channels=params.in_channels,
+            ch=params.ch,
+            ch_mult=params.ch_mult,
+            num_res_blocks=params.num_res_blocks,
+            z_channels=params.z_channels,
+        )
+        self.decoder = Decoder(
+            resolution=params.resolution,
+            in_channels=params.in_channels,
+            ch=params.ch,
+            out_ch=params.out_ch,
+            ch_mult=params.ch_mult,
+            num_res_blocks=params.num_res_blocks,
+            z_channels=params.z_channels,
+        )
+        self.reg = DiagonalGaussian()
+
+        self.scale_factor = params.scale_factor
+        self.shift_factor = params.shift_factor
+
+    def encode(self, x: Tensor, sample: bool = True, generator: torch.Generator | None = None) -> Tensor:
+        """Run VAE encoding on input tensor x.
+
+        Args:
+            x (Tensor): Input image tensor. Shape: (batch_size, in_channels, height, width).
+            sample (bool, optional): If True, sample from the encoded distribution, else, return the distribution mean.
+                Defaults to True.
+            generator (torch.Generator | None, optional): Optional random number generator for reproducibility.
+                Defaults to None.
+
+        Returns:
+            Tensor: Encoded latent tensor. Shape: (batch_size, z_channels, latent_height, latent_width).
+        """
+
+        z = self.reg(self.encoder(x), sample=sample, generator=generator)
+        z = self.scale_factor * (z - self.shift_factor)
+        return z
+
+    def decode(self, z: Tensor) -> Tensor:
+        z = z / self.scale_factor + self.shift_factor
+        return self.decoder(z)
+
+    def forward(self, x: Tensor) -> Tensor:
+        return self.decode(self.encode(x))
--- a/invokeai/backend/flux/modules/conditioner.py
+++ b/invokeai/backend/flux/modules/conditioner.py
@@ -0,0 +1,33 @@
+# Initially pulled from https://github.com/black-forest-labs/flux
+
+from torch import Tensor, nn
+from transformers import PreTrainedModel, PreTrainedTokenizer
+
+
+class HFEncoder(nn.Module):
+    def __init__(self, encoder: PreTrainedModel, tokenizer: PreTrainedTokenizer, is_clip: bool, max_length: int):
+        super().__init__()
+        self.max_length = max_length
+        self.is_clip = is_clip
+        self.output_key = "pooler_output" if self.is_clip else "last_hidden_state"
+        self.tokenizer = tokenizer
+        self.hf_module = encoder
+        self.hf_module = self.hf_module.eval().requires_grad_(False)
+
+    def forward(self, text: list[str]) -> Tensor:
+        batch_encoding = self.tokenizer(
+            text,
+            truncation=True,
+            max_length=self.max_length,
+            return_length=False,
+            return_overflowing_tokens=False,
+            padding="max_length",
+            return_tensors="pt",
+        )
+
+        outputs = self.hf_module(
+            input_ids=batch_encoding["input_ids"].to(self.hf_module.device),
+            attention_mask=None,
+            output_hidden_states=False,
+        )
+        return outputs[self.output_key]
--- a/invokeai/backend/flux/modules/layers.py
+++ b/invokeai/backend/flux/modules/layers.py
@@ -0,0 +1,253 @@
+# Initially pulled from https://github.com/black-forest-labs/flux
+
+import math
+from dataclasses import dataclass
+
+import torch
+from einops import rearrange
+from torch import Tensor, nn
+
+from invokeai.backend.flux.math import attention, rope
+
+
+class EmbedND(nn.Module):
+    def __init__(self, dim: int, theta: int, axes_dim: list[int]):
+        super().__init__()
+        self.dim = dim
+        self.theta = theta
+        self.axes_dim = axes_dim
+
+    def forward(self, ids: Tensor) -> Tensor:
+        n_axes = ids.shape[-1]
+        emb = torch.cat(
+            [rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(n_axes)],
+            dim=-3,
+        )
+
+        return emb.unsqueeze(1)
+
+
+def timestep_embedding(t: Tensor, dim, max_period=10000, time_factor: float = 1000.0):
+    """
+    Create sinusoidal timestep embeddings.
+    :param t: a 1-D Tensor of N indices, one per batch element.
+                      These may be fractional.
+    :param dim: the dimension of the output.
+    :param max_period: controls the minimum frequency of the embeddings.
+    :return: an (N, D) Tensor of positional embeddings.
+    """
+    t = time_factor * t
+    half = dim // 2
+    freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half).to(t.device)
+
+    args = t[:, None].float() * freqs[None]
+    embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
+    if dim % 2:
+        embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
+    if torch.is_floating_point(t):
+        embedding = embedding.to(t)
+    return embedding
+
+
+class MLPEmbedder(nn.Module):
+    def __init__(self, in_dim: int, hidden_dim: int):
+        super().__init__()
+        self.in_layer = nn.Linear(in_dim, hidden_dim, bias=True)
+        self.silu = nn.SiLU()
+        self.out_layer = nn.Linear(hidden_dim, hidden_dim, bias=True)
+
+    def forward(self, x: Tensor) -> Tensor:
+        return self.out_layer(self.silu(self.in_layer(x)))
+
+
+class RMSNorm(torch.nn.Module):
+    def __init__(self, dim: int):
+        super().__init__()
+        self.scale = nn.Parameter(torch.ones(dim))
+
+    def forward(self, x: Tensor):
+        x_dtype = x.dtype
+        x = x.float()
+        rrms = torch.rsqrt(torch.mean(x**2, dim=-1, keepdim=True) + 1e-6)
+        return (x * rrms).to(dtype=x_dtype) * self.scale
+
+
+class QKNorm(torch.nn.Module):
+    def __init__(self, dim: int):
+        super().__init__()
+        self.query_norm = RMSNorm(dim)
+        self.key_norm = RMSNorm(dim)
+
+    def forward(self, q: Tensor, k: Tensor, v: Tensor) -> tuple[Tensor, Tensor]:
+        q = self.query_norm(q)
+        k = self.key_norm(k)
+        return q.to(v), k.to(v)
+
+
+class SelfAttention(nn.Module):
+    def __init__(self, dim: int, num_heads: int = 8, qkv_bias: bool = False):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.norm = QKNorm(head_dim)
+        self.proj = nn.Linear(dim, dim)
+
+    def forward(self, x: Tensor, pe: Tensor) -> Tensor:
+        qkv = self.qkv(x)
+        q, k, v = rearrange(qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
+        q, k = self.norm(q, k, v)
+        x = attention(q, k, v, pe=pe)
+        x = self.proj(x)
+        return x
+
+
+@dataclass
+class ModulationOut:
+    shift: Tensor
+    scale: Tensor
+    gate: Tensor
+
+
+class Modulation(nn.Module):
+    def __init__(self, dim: int, double: bool):
+        super().__init__()
+        self.is_double = double
+        self.multiplier = 6 if double else 3
+        self.lin = nn.Linear(dim, self.multiplier * dim, bias=True)
+
+    def forward(self, vec: Tensor) -> tuple[ModulationOut, ModulationOut | None]:
+        out = self.lin(nn.functional.silu(vec))[:, None, :].chunk(self.multiplier, dim=-1)
+
+        return (
+            ModulationOut(*out[:3]),
+            ModulationOut(*out[3:]) if self.is_double else None,
+        )
+
+
+class DoubleStreamBlock(nn.Module):
+    def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False):
+        super().__init__()
+
+        mlp_hidden_dim = int(hidden_size * mlp_ratio)
+        self.num_heads = num_heads
+        self.hidden_size = hidden_size
+        self.img_mod = Modulation(hidden_size, double=True)
+        self.img_norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+        self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias)
+
+        self.img_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+        self.img_mlp = nn.Sequential(
+            nn.Linear(hidden_size, mlp_hidden_dim, bias=True),
+            nn.GELU(approximate="tanh"),
+            nn.Linear(mlp_hidden_dim, hidden_size, bias=True),
+        )
+
+        self.txt_mod = Modulation(hidden_size, double=True)
+        self.txt_norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+        self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias)
+
+        self.txt_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+        self.txt_mlp = nn.Sequential(
+            nn.Linear(hidden_size, mlp_hidden_dim, bias=True),
+            nn.GELU(approximate="tanh"),
+            nn.Linear(mlp_hidden_dim, hidden_size, bias=True),
+        )
+
+    def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor) -> tuple[Tensor, Tensor]:
+        img_mod1, img_mod2 = self.img_mod(vec)
+        txt_mod1, txt_mod2 = self.txt_mod(vec)
+
+        # prepare image for attention
+        img_modulated = self.img_norm1(img)
+        img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift
+        img_qkv = self.img_attn.qkv(img_modulated)
+        img_q, img_k, img_v = rearrange(img_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
+        img_q, img_k = self.img_attn.norm(img_q, img_k, img_v)
+
+        # prepare txt for attention
+        txt_modulated = self.txt_norm1(txt)
+        txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift
+        txt_qkv = self.txt_attn.qkv(txt_modulated)
+        txt_q, txt_k, txt_v = rearrange(txt_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
+        txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)
+
+        # run actual attention
+        q = torch.cat((txt_q, img_q), dim=2)
+        k = torch.cat((txt_k, img_k), dim=2)
+        v = torch.cat((txt_v, img_v), dim=2)
+
+        attn = attention(q, k, v, pe=pe)
+        txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1] :]
+
+        # calculate the img bloks
+        img = img + img_mod1.gate * self.img_attn.proj(img_attn)
+        img = img + img_mod2.gate * self.img_mlp((1 + img_mod2.scale) * self.img_norm2(img) + img_mod2.shift)
+
+        # calculate the txt bloks
+        txt = txt + txt_mod1.gate * self.txt_attn.proj(txt_attn)
+        txt = txt + txt_mod2.gate * self.txt_mlp((1 + txt_mod2.scale) * self.txt_norm2(txt) + txt_mod2.shift)
+        return img, txt
+
+
+class SingleStreamBlock(nn.Module):
+    """
+    A DiT block with parallel linear layers as described in
+    https://arxiv.org/abs/2302.05442 and adapted modulation interface.
+    """
+
+    def __init__(
+        self,
+        hidden_size: int,
+        num_heads: int,
+        mlp_ratio: float = 4.0,
+        qk_scale: float | None = None,
+    ):
+        super().__init__()
+        self.hidden_dim = hidden_size
+        self.num_heads = num_heads
+        head_dim = hidden_size // num_heads
+        self.scale = qk_scale or head_dim**-0.5
+
+        self.mlp_hidden_dim = int(hidden_size * mlp_ratio)
+        # qkv and mlp_in
+        self.linear1 = nn.Linear(hidden_size, hidden_size * 3 + self.mlp_hidden_dim)
+        # proj and mlp_out
+        self.linear2 = nn.Linear(hidden_size + self.mlp_hidden_dim, hidden_size)
+
+        self.norm = QKNorm(head_dim)
+
+        self.hidden_size = hidden_size
+        self.pre_norm = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+
+        self.mlp_act = nn.GELU(approximate="tanh")
+        self.modulation = Modulation(hidden_size, double=False)
+
+    def forward(self, x: Tensor, vec: Tensor, pe: Tensor) -> Tensor:
+        mod, _ = self.modulation(vec)
+        x_mod = (1 + mod.scale) * self.pre_norm(x) + mod.shift
+        qkv, mlp = torch.split(self.linear1(x_mod), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1)
+
+        q, k, v = rearrange(qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
+        q, k = self.norm(q, k, v)
+
+        # compute attention
+        attn = attention(q, k, v, pe=pe)
+        # compute activation in mlp stream, cat again and run second linear layer
+        output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2))
+        return x + mod.gate * output
+
+
+class LastLayer(nn.Module):
+    def __init__(self, hidden_size: int, patch_size: int, out_channels: int):
+        super().__init__()
+        self.norm_final = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
+        self.linear = nn.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True)
+        self.adaLN_modulation = nn.Sequential(nn.SiLU(), nn.Linear(hidden_size, 2 * hidden_size, bias=True))
+
+    def forward(self, x: Tensor, vec: Tensor) -> Tensor:
+        shift, scale = self.adaLN_modulation(vec).chunk(2, dim=1)
+        x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :]
+        x = self.linear(x)
+        return x
--- a/Show More
+++ b/Show More