chore: bump version to v5.4.0a1

2026-01-22 12:58:22 -05:00 · 2024-10-30 11:06:01 +11:00
472 changed files with 10879 additions and 27498 deletions
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -19,4 +19,3 @@
 - [ ] _The PR has a short but descriptive title, suitable for a changelog_
 - [ ] _Tests added / updated (if applicable)_
 - [ ] _Documentation added / updated (if applicable)_
- [ ] _Updated `What's New` copy (if doing a release after this PR)_
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -1,14 +0,0 @@
-# Security Policy
-
-## Supported Versions
-
-Only the latest version of Invoke will receive security updates. 
-We do not currently maintain multiple versions of the application with updates.
-
-## Reporting a Vulnerability
-
-To report a vulnerability, contact the Invoke team directly at security@invoke.ai
-
-At this time, we do not maintain a formal bug bounty program. 
-
-You can also share identified security issues with our team on huntr.com
--- a/docs/contributing/MODEL_MANAGER.md
+++ b/docs/contributing/MODEL_MANAGER.md
@@ -1364,6 +1364,7 @@ the in-memory loaded model:
 |----------------|-----------------|------------------|
 | `config`       | AnyModelConfig         | A copy of the model's configuration record for retrieving base type, etc. |
 | `model`        | AnyModel               | The instantiated model (details below) |
+| `locker`       | ModelLockerBase        | A context manager that mediates the movement of the model into VRAM |

 ### get_model_by_key(key, [submodel]) -> LoadedModel

--- a/docs/contributing/contribution_guides/newContributorChecklist.md
+++ b/docs/contributing/contribution_guides/newContributorChecklist.md
@@ -5,7 +5,7 @@ If you're a new contributor to InvokeAI or Open Source Projects, this is the gui
 ## New Contributor Checklist

 - [x] Set up your local development environment & fork of InvokAI by following [the steps outlined here](../dev-environment.md)
- [x] Set up your local tooling with [this guide](../LOCAL_DEVELOPMENT.md). Feel free to skip this step if you already have tooling you're comfortable with.
+- [x] Set up your local tooling with [this guide](InvokeAI/contributing/LOCAL_DEVELOPMENT/#developing-invokeai-in-vscode). Feel free to skip this step if you already have tooling you're comfortable with.
 - [x] Familiarize yourself with [Git](https://www.atlassian.com/git) & our project structure by reading through the [development documentation](development.md)
 - [x] Join the [#dev-chat](https://discord.com/channels/1020123559063990373/1049495067846524939) channel of the Discord
 - [x] Choose an issue to work on! This can be achieved by asking in the #dev-chat channel, tackling a [good first issue](https://github.com/invoke-ai/InvokeAI/contribute) or finding an item on the [roadmap](https://github.com/orgs/invoke-ai/projects/7). If nothing in any of those places catches your eye, feel free to work on something of interest to you!
--- a/docs/contributing/index.md
+++ b/docs/contributing/index.md
@@ -38,7 +38,7 @@ This project is a combined effort of dedicated people from across the world. [C

 ## Code of Conduct

-The InvokeAI community is a welcoming place, and we want your help in maintaining that. Please review our [Code of Conduct](https://github.com/invoke-ai/InvokeAI/blob/main/docs/CODE_OF_CONDUCT.md) to learn more - it's essential to maintaining a respectful and inclusive environment.
+The InvokeAI community is a welcoming place, and we want your help in maintaining that. Please review our [Code of Conduct](https://github.com/invoke-ai/InvokeAI/blob/main/CODE_OF_CONDUCT.md) to learn more - it's essential to maintaining a respectful and inclusive environment.

 By making a contribution to this project, you certify that:

--- a/docs/faq.md
+++ b/docs/faq.md
@@ -209,7 +209,7 @@ checkpoint models.

 To solve this, go to the Model Manager tab (the cube), select the
 checkpoint model that's giving you trouble, and press the "Convert"
-button in the upper right of your browser window. This will convert the
+button in the upper right of your browser window. This will conver the
 checkpoint into a diffusers model, after which loading should be
 faster and less memory-intensive.

--- a/docs/installation/patchmatch.md
+++ b/docs/installation/patchmatch.md
@@ -97,16 +97,16 @@ Prior to installing PyPatchMatch, you need to take the following steps:
   sudo pacman -S --needed base-devel
   ```

-2. Install `opencv`, `blas`, and required dependencies:
+2. Install `opencv` and `blas`:

   ```sh
-   sudo pacman -S opencv blas fmt glew vtk hdf5
+   sudo pacman -S opencv blas
   ```

   or for CUDA support

   ```sh
-   sudo pacman -S opencv-cuda blas fmt glew vtk hdf5
+   sudo pacman -S opencv-cuda blas
   ```

 3. Fix the naming of the `opencv` package configuration file:
--- a/docs/nodes/defaultNodes.md
+++ b/docs/nodes/defaultNodes.md
@@ -99,6 +99,7 @@ their descriptions.
 | Scale Latents                                                 | Scales latents by a given factor.                                                                                                                    |
 | Segment Anything Processor                                    | Applies segment anything processing to image                                                                                                         |
 | Show Image                                                    | Displays a provided image, and passes it forward in the pipeline.                                                                                    |
+| Step Param Easing                                             | Experimental per-step parameter easing for denoising steps                                                                                           |
 | String Primitive Collection                                   | A collection of string primitive values                                                                                                              |
 | String Primitive                                              | A string primitive value                                                                                                                             |
 | Subtract Integers                                             | Subtracts two numbers                                                                                                                                |
--- a/installer/lib/messages.py
+++ b/installer/lib/messages.py
@@ -259,7 +259,7 @@ def select_gpu() -> GpuType:
                    [
                        f"Detected the [gold1]{OS}-{ARCH}[/] platform",
                        "",
-                        "See [deep_sky_blue1]https://invoke-ai.github.io/InvokeAI/installation/requirements/[/] to ensure your system meets the minimum requirements.",
+                        "See [deep_sky_blue1]https://invoke-ai.github.io/InvokeAI/#system[/] to ensure your system meets the minimum requirements.",
                        "",
                        "[red3]🠶[/] [b]Your GPU drivers must be correctly installed before using InvokeAI![/] [red3]🠴[/]",
                    ]
--- a/installer/templates/invoke.sh.in
+++ b/installer/templates/invoke.sh.in
@@ -68,7 +68,7 @@ do_line_input() {
    printf "2: Open the developer console\n"
    printf "3: Command-line help\n"
    printf "Q: Quit\n\n"
-    printf "To update, download and run the installer from https://github.com/invoke-ai/InvokeAI/releases/latest\n\n"
+    printf "To update, download and run the installer from https://github.com/invoke-ai/InvokeAI/releases/latest.\n\n"
    read -p "Please enter 1-4, Q: [1] " yn
    choice=${yn:='1'}
    do_choice $choice
--- a/invokeai/app/api/routers/app_info.py
+++ b/invokeai/app/api/routers/app_info.py
@@ -40,8 +40,6 @@ class AppVersion(BaseModel):

    version: str = Field(description="App version")

-    highlights: Optional[list[str]] = Field(default=None, description="Highlights of release")
-

 class AppDependencyVersions(BaseModel):
    """App depencency Versions Response"""
--- a/invokeai/app/api/routers/model_manager.py
+++ b/invokeai/app/api/routers/model_manager.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2023 Lincoln D. Stein
 """FastAPI route for model configuration records."""

-import contextlib
 import io
 import pathlib
 import shutil
@@ -11,7 +10,6 @@ from enum import Enum
 from tempfile import TemporaryDirectory
 from typing import List, Optional, Type

-import huggingface_hub
 from fastapi import Body, Path, Query, Response, UploadFile
 from fastapi.responses import FileResponse, HTMLResponse
 from fastapi.routing import APIRouter
@@ -29,7 +27,6 @@ from invokeai.app.services.model_records import (
    ModelRecordChanges,
    UnknownModelException,
 )
-from invokeai.app.util.suppress_output import SuppressOutput
 from invokeai.backend.model_manager.config import (
    AnyModelConfig,
    BaseModelType,
@@ -37,7 +34,7 @@ from invokeai.backend.model_manager.config import (
    ModelFormat,
    ModelType,
 )
-from invokeai.backend.model_manager.load.model_cache.cache_stats import CacheStats
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import CacheStats
 from invokeai.backend.model_manager.metadata.fetch.huggingface import HuggingFaceMetadataFetch
 from invokeai.backend.model_manager.metadata.metadata_base import ModelMetadataWithFiles, UnknownMetadataException
 from invokeai.backend.model_manager.search import ModelSearch
@@ -926,51 +923,3 @@ async def get_stats() -> Optional[CacheStats]:
    """Return performance statistics on the model manager's RAM cache. Will return null if no models have been loaded."""

    return ApiDependencies.invoker.services.model_manager.load.ram_cache.stats
-
-
-class HFTokenStatus(str, Enum):
-    VALID = "valid"
-    INVALID = "invalid"
-    UNKNOWN = "unknown"
-
-
-class HFTokenHelper:
-    @classmethod
-    def get_status(cls) -> HFTokenStatus:
-        try:
-            if huggingface_hub.get_token_permission(huggingface_hub.get_token()):
-                # Valid token!
-                return HFTokenStatus.VALID
-            # No token set
-            return HFTokenStatus.INVALID
-        except Exception:
-            return HFTokenStatus.UNKNOWN
-
-    @classmethod
-    def set_token(cls, token: str) -> HFTokenStatus:
-        with SuppressOutput(), contextlib.suppress(Exception):
-            huggingface_hub.login(token=token, add_to_git_credential=False)
-        return cls.get_status()
-
-
-@model_manager_router.get("/hf_login", operation_id="get_hf_login_status", response_model=HFTokenStatus)
-async def get_hf_login_status() -> HFTokenStatus:
-    token_status = HFTokenHelper.get_status()
-
-    if token_status is HFTokenStatus.UNKNOWN:
-        ApiDependencies.invoker.services.logger.warning("Unable to verify HF token")
-
-    return token_status
-
-
-@model_manager_router.post("/hf_login", operation_id="do_hf_login", response_model=HFTokenStatus)
-async def do_hf_login(
-    token: str = Body(description="Hugging Face token to use for login", embed=True),
-) -> HFTokenStatus:
-    HFTokenHelper.set_token(token)
-    token_status = HFTokenHelper.get_status()
-
-    if token_status is HFTokenStatus.UNKNOWN:
-        ApiDependencies.invoker.services.logger.warning("Unable to verify HF token")
-
-    return token_status
--- a/invokeai/app/api/routers/session_queue.py
+++ b/invokeai/app/api/routers/session_queue.py
@@ -110,7 +110,7 @@ async def cancel_by_batch_ids(
@session_queue_router.put(
    "/{queue_id}/cancel_by_destination",
    operation_id="cancel_by_destination",
-    responses={200: {"model": CancelByDestinationResult}},
+    responses={200: {"model": CancelByBatchIDsResult}},
 )
 async def cancel_by_destination(
    queue_id: str = Path(description="The queue id to perform this operation on"),
--- a/invokeai/app/invocations/baseinvocation.py
+++ b/invokeai/app/invocations/baseinvocation.py
@@ -4,7 +4,6 @@ from __future__ import annotations

 import inspect
 import re
-import sys
 import warnings
 from abc import ABC, abstractmethod
 from enum import Enum
@@ -63,7 +62,6 @@ class Classification(str, Enum, metaclass=MetaEnum):
    - `Prototype`: The invocation is not yet stable and may be removed from the application at any time. Workflows built around this invocation may break, and we are *not* committed to supporting this invocation.
    - `Deprecated`: The invocation is deprecated and may be removed in a future version.
    - `Internal`: The invocation is not intended for use by end-users. It may be changed or removed at any time, but is exposed for users to play with.
-    - `Special`: The invocation is a special case and does not fit into any of the other classifications.
    """

    Stable = "stable"
@@ -71,7 +69,6 @@ class Classification(str, Enum, metaclass=MetaEnum):
    Prototype = "prototype"
    Deprecated = "deprecated"
    Internal = "internal"
-    Special = "special"


 class UIConfigBase(BaseModel):
@@ -195,19 +192,12 @@ class BaseInvocation(ABC, BaseModel):
        """Gets a pydantc TypeAdapter for the union of all invocation types."""
        if not cls._typeadapter or cls._typeadapter_needs_update:
            AnyInvocation = TypeAliasType(
-                "AnyInvocation", Annotated[Union[tuple(cls.get_invocations())], Field(discriminator="type")]
+                "AnyInvocation", Annotated[Union[tuple(cls._invocation_classes)], Field(discriminator="type")]
            )
            cls._typeadapter = TypeAdapter(AnyInvocation)
            cls._typeadapter_needs_update = False
        return cls._typeadapter

-    @classmethod
-    def invalidate_typeadapter(cls) -> None:
-        """Invalidates the typeadapter, forcing it to be rebuilt on next access. If the invocation allowlist or
-        denylist is changed, this should be called to ensure the typeadapter is updated and validation respects
-        the updated allowlist and denylist."""
-        cls._typeadapter_needs_update = True
-
    @classmethod
    def get_invocations(cls) -> Iterable[BaseInvocation]:
        """Gets all invocations, respecting the allowlist and denylist."""
@@ -489,26 +479,6 @@ def invocation(
            title="type", default=invocation_type, json_schema_extra={"field_kind": FieldKind.NodeAttribute}
        )

-        # Validate the `invoke()` method is implemented
-        if "invoke" in cls.__abstractmethods__:
-            raise ValueError(f'Invocation "{invocation_type}" must implement the "invoke" method')
-
-        # And validate that `invoke()` returns a subclass of `BaseInvocationOutput
-        invoke_return_annotation = signature(cls.invoke).return_annotation
-
-        try:
-            # TODO(psyche): If `invoke()` is not defined, `return_annotation` ends up as the string "BaseInvocationOutput"
-            # instead of the class `BaseInvocationOutput`. This may be a pydantic bug: https://github.com/pydantic/pydantic/issues/7978
-            if isinstance(invoke_return_annotation, str):
-                invoke_return_annotation = getattr(sys.modules[cls.__module__], invoke_return_annotation)
-
-            assert invoke_return_annotation is not BaseInvocationOutput
-            assert issubclass(invoke_return_annotation, BaseInvocationOutput)
-        except Exception:
-            raise ValueError(
-                f'Invocation "{invocation_type}" must have a return annotation of a subclass of BaseInvocationOutput (got "{invoke_return_annotation}")'
-            )
-
        docstring = cls.__doc__
        cls = create_model(
            cls.__qualname__,
--- a/invokeai/app/invocations/blend_latents.py
+++ b/invokeai/app/invocations/blend_latents.py
@@ -1,120 +1,98 @@
-from typing import Optional, Union
+from typing import Any, Union

 import numpy as np
+import numpy.typing as npt
 import torch
-import torchvision.transforms as T
-from PIL import Image
-from torchvision.transforms.functional import resize as tv_resize

 from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
-from invokeai.app.invocations.fields import FieldDescriptions, ImageField, Input, InputField, LatentsField
+from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, LatentsField
 from invokeai.app.invocations.primitives import LatentsOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor
 from invokeai.backend.util.devices import TorchDevice


-def slerp(
-    t: Union[float, np.ndarray],
-    v0: Union[torch.Tensor, np.ndarray],
-    v1: Union[torch.Tensor, np.ndarray],
-    device: torch.device,
-    DOT_THRESHOLD: float = 0.9995,
-):
-    """
-    Spherical linear interpolation
-    Args:
-        t (float/np.ndarray): Float value between 0.0 and 1.0
-        v0 (np.ndarray): Starting vector
-        v1 (np.ndarray): Final vector
-        DOT_THRESHOLD (float): Threshold for considering the two vectors as
-                            colineal. Not recommended to alter this.
-    Returns:
-        v2 (np.ndarray): Interpolation vector between v0 and v1
-    """
-    inputs_are_torch = False
-    if not isinstance(v0, np.ndarray):
-        inputs_are_torch = True
-        v0 = v0.detach().cpu().numpy()
-    if not isinstance(v1, np.ndarray):
-        inputs_are_torch = True
-        v1 = v1.detach().cpu().numpy()
-
-    dot = np.sum(v0 * v1 / (np.linalg.norm(v0) * np.linalg.norm(v1)))
-    if np.abs(dot) > DOT_THRESHOLD:
-        v2 = (1 - t) * v0 + t * v1
-    else:
-        theta_0 = np.arccos(dot)
-        sin_theta_0 = np.sin(theta_0)
-        theta_t = theta_0 * t
-        sin_theta_t = np.sin(theta_t)
-        s0 = np.sin(theta_0 - theta_t) / sin_theta_0
-        s1 = sin_theta_t / sin_theta_0
-        v2 = s0 * v0 + s1 * v1
-
-    if inputs_are_torch:
-        v2 = torch.from_numpy(v2).to(device)
-
-    return v2
-
-
@invocation(
    "lblend",
    title="Blend Latents",
-    tags=["latents", "blend", "mask"],
+    tags=["latents", "blend"],
    category="latents",
-    version="1.1.0",
+    version="1.0.3",
 )
 class BlendLatentsInvocation(BaseInvocation):
-    """Blend two latents using a given alpha. If a mask is provided, the second latents will be masked before blending.
-    Latents must have same size. Masking functionality added by @dwringer."""
+    """Blend two latents using a given alpha. Latents must have same size."""

-    latents_a: LatentsField = InputField(description=FieldDescriptions.latents, input=Input.Connection)
-    latents_b: LatentsField = InputField(description=FieldDescriptions.latents, input=Input.Connection)
-    mask: Optional[ImageField] = InputField(default=None, description="Mask for blending in latents B")
-    alpha: float = InputField(ge=0, default=0.5, description=FieldDescriptions.blend_alpha)
-
-    def prep_mask_tensor(self, mask_image: Image.Image) -> torch.Tensor:
-        if mask_image.mode != "L":
-            mask_image = mask_image.convert("L")
-        mask_tensor = image_resized_to_grid_as_tensor(mask_image, normalize=False)
-        if mask_tensor.dim() == 3:
-            mask_tensor = mask_tensor.unsqueeze(0)
-        return mask_tensor
-
-    def replace_tensor_from_masked_tensor(
-        self, tensor: torch.Tensor, other_tensor: torch.Tensor, mask_tensor: torch.Tensor
-    ):
-        output = tensor.clone()
-        mask_tensor = mask_tensor.expand(output.shape)
-        if output.dtype != torch.float16:
-            output = torch.add(output, mask_tensor * torch.sub(other_tensor, tensor))
-        else:
-            output = torch.add(output, mask_tensor.half() * torch.sub(other_tensor, tensor))
-        return output
+    latents_a: LatentsField = InputField(
+        description=FieldDescriptions.latents,
+        input=Input.Connection,
+    )
+    latents_b: LatentsField = InputField(
+        description=FieldDescriptions.latents,
+        input=Input.Connection,
+    )
+    alpha: float = InputField(default=0.5, description=FieldDescriptions.blend_alpha)

    def invoke(self, context: InvocationContext) -> LatentsOutput:
        latents_a = context.tensors.load(self.latents_a.latents_name)
        latents_b = context.tensors.load(self.latents_b.latents_name)
-        if self.mask is None:
-            mask_tensor = torch.zeros(latents_a.shape[-2:])
-        else:
-            mask_tensor = self.prep_mask_tensor(context.images.get_pil(self.mask.image_name))
-            mask_tensor = tv_resize(mask_tensor, latents_a.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False)
-
-        latents_b = self.replace_tensor_from_masked_tensor(latents_b, latents_a, mask_tensor)

        if latents_a.shape != latents_b.shape:
-            raise ValueError("Latents to blend must be the same size.")
+            raise Exception("Latents to blend must be the same size.")

        device = TorchDevice.choose_torch_device()

+        def slerp(
+            t: Union[float, npt.NDArray[Any]],  # FIXME: maybe use np.float32 here?
+            v0: Union[torch.Tensor, npt.NDArray[Any]],
+            v1: Union[torch.Tensor, npt.NDArray[Any]],
+            DOT_THRESHOLD: float = 0.9995,
+        ) -> Union[torch.Tensor, npt.NDArray[Any]]:
+            """
+            Spherical linear interpolation
+            Args:
+                t (float/np.ndarray): Float value between 0.0 and 1.0
+                v0 (np.ndarray): Starting vector
+                v1 (np.ndarray): Final vector
+                DOT_THRESHOLD (float): Threshold for considering the two vectors as
+                                    colineal. Not recommended to alter this.
+            Returns:
+                v2 (np.ndarray): Interpolation vector between v0 and v1
+            """
+            inputs_are_torch = False
+            if not isinstance(v0, np.ndarray):
+                inputs_are_torch = True
+                v0 = v0.detach().cpu().numpy()
+            if not isinstance(v1, np.ndarray):
+                inputs_are_torch = True
+                v1 = v1.detach().cpu().numpy()
+
+            dot = np.sum(v0 * v1 / (np.linalg.norm(v0) * np.linalg.norm(v1)))
+            if np.abs(dot) > DOT_THRESHOLD:
+                v2 = (1 - t) * v0 + t * v1
+            else:
+                theta_0 = np.arccos(dot)
+                sin_theta_0 = np.sin(theta_0)
+                theta_t = theta_0 * t
+                sin_theta_t = np.sin(theta_t)
+                s0 = np.sin(theta_0 - theta_t) / sin_theta_0
+                s1 = sin_theta_t / sin_theta_0
+                v2 = s0 * v0 + s1 * v1
+
+            if inputs_are_torch:
+                v2_torch: torch.Tensor = torch.from_numpy(v2).to(device)
+                return v2_torch
+            else:
+                assert isinstance(v2, np.ndarray)
+                return v2
+
        # blend
-        blended_latents = slerp(self.alpha, latents_a, latents_b, device)
+        bl = slerp(self.alpha, latents_a, latents_b)
+        assert isinstance(bl, torch.Tensor)
+        blended_latents: torch.Tensor = bl  # for type checking convenience

        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
        blended_latents = blended_latents.to("cpu")
-        torch.cuda.empty_cache()
+
+        TorchDevice.empty_cache()

        name = context.tensors.save(tensor=blended_latents)
-        return LatentsOutput.build(latents_name=name, latents=blended_latents)
+        return LatentsOutput.build(latents_name=name, latents=blended_latents, seed=self.latents_a.seed)
--- a/invokeai/app/invocations/compel.py
+++ b/invokeai/app/invocations/compel.py
@@ -82,11 +82,10 @@ class CompelInvocation(BaseInvocation):
            # apply all patches while the model is on the target device
            text_encoder_info.model_on_device() as (cached_weights, text_encoder),
            tokenizer_info as tokenizer,
-            LoRAPatcher.apply_smart_lora_patches(
+            LoRAPatcher.apply_lora_patches(
                model=text_encoder,
                patches=_lora_loader(),
                prefix="lora_te_",
-                dtype=TorchDevice.choose_torch_dtype(),
                cached_weights=cached_weights,
            ),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
@@ -96,7 +95,6 @@ class CompelInvocation(BaseInvocation):
                ti_manager,
            ),
        ):
-            context.util.signal_progress("Building conditioning")
            assert isinstance(text_encoder, CLIPTextModel)
            assert isinstance(tokenizer, CLIPTokenizer)
            compel = Compel(
@@ -180,11 +178,10 @@ class SDXLPromptInvocationBase:
            # apply all patches while the model is on the target device
            text_encoder_info.model_on_device() as (cached_weights, text_encoder),
            tokenizer_info as tokenizer,
-            LoRAPatcher.apply_smart_lora_patches(
+            LoRAPatcher.apply_lora_patches(
                text_encoder,
                patches=_lora_loader(),
                prefix=lora_prefix,
-                dtype=TorchDevice.choose_torch_dtype(),
                cached_weights=cached_weights,
            ),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
@@ -194,7 +191,6 @@ class SDXLPromptInvocationBase:
                ti_manager,
            ),
        ):
-            context.util.signal_progress("Building conditioning")
            assert isinstance(text_encoder, (CLIPTextModel, CLIPTextModelWithProjection))
            assert isinstance(tokenizer, CLIPTokenizer)

--- a/invokeai/app/invocations/composition-nodes.py
+++ b/invokeai/app/invocations/composition-nodes.py
--- a/invokeai/app/invocations/create_denoise_mask.py
+++ b/invokeai/app/invocations/create_denoise_mask.py
@@ -65,7 +65,6 @@ class CreateDenoiseMaskInvocation(BaseInvocation):
            img_mask = tv_resize(mask, image_tensor.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False)
            masked_image = image_tensor * torch.where(img_mask < 0.5, 0.0, 1.0)
            # TODO:
-            context.util.signal_progress("Running VAE encoder")
            masked_latents = ImageToLatentsInvocation.vae_encode(vae_info, self.fp32, self.tiled, masked_image.clone())

            masked_latents_name = context.tensors.save(tensor=masked_latents)
--- a/invokeai/app/invocations/create_gradient_mask.py
+++ b/invokeai/app/invocations/create_gradient_mask.py
@@ -131,7 +131,6 @@ class CreateGradientMaskInvocation(BaseInvocation):
                    image_tensor = image_tensor.unsqueeze(0)
                img_mask = tv_resize(mask, image_tensor.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False)
                masked_image = image_tensor * torch.where(img_mask < 0.5, 0.0, 1.0)
-                context.util.signal_progress("Running VAE encoder")
                masked_latents = ImageToLatentsInvocation.vae_encode(
                    vae_info, self.fp32, self.tiled, masked_image.clone()
                )
--- a/invokeai/app/invocations/denoise_latents.py
+++ b/invokeai/app/invocations/denoise_latents.py
@@ -622,7 +622,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
        for t2i_adapter_field in t2i_adapter:
            t2i_adapter_model_config = context.models.get_config(t2i_adapter_field.t2i_adapter_model.key)
            t2i_adapter_loaded_model = context.models.load(t2i_adapter_field.t2i_adapter_model)
-            image = context.images.get_pil(t2i_adapter_field.image.image_name, mode="RGB")
+            image = context.images.get_pil(t2i_adapter_field.image.image_name)

            # The max_unet_downscale is the maximum amount that the UNet model downscales the latent image internally.
            if t2i_adapter_model_config.base == BaseModelType.StableDiffusion1:
@@ -640,39 +640,29 @@ class DenoiseLatentsInvocation(BaseInvocation):
            with t2i_adapter_loaded_model as t2i_adapter_model:
                total_downscale_factor = t2i_adapter_model.total_downscale_factor

+                # Resize the T2I-Adapter input image.
+                # We select the resize dimensions so that after the T2I-Adapter's total_downscale_factor is applied, the
+                # result will match the latent image's dimensions after max_unet_downscale is applied.
+                t2i_input_height = latents_shape[2] // max_unet_downscale * total_downscale_factor
+                t2i_input_width = latents_shape[3] // max_unet_downscale * total_downscale_factor
+
                # Note: We have hard-coded `do_classifier_free_guidance=False`. This is because we only want to prepare
                # a single image. If CFG is enabled, we will duplicate the resultant tensor after applying the
                # T2I-Adapter model.
                #
                # Note: We re-use the `prepare_control_image(...)` from ControlNet for T2I-Adapter, because it has many
                # of the same requirements (e.g. preserving binary masks during resize).
-
-                # Assuming fixed dimensional scaling of LATENT_SCALE_FACTOR.
-                _, _, latent_height, latent_width = latents_shape
-                control_height_resize = latent_height * LATENT_SCALE_FACTOR
-                control_width_resize = latent_width * LATENT_SCALE_FACTOR
                t2i_image = prepare_control_image(
                    image=image,
                    do_classifier_free_guidance=False,
-                    width=control_width_resize,
-                    height=control_height_resize,
+                    width=t2i_input_width,
+                    height=t2i_input_height,
                    num_channels=t2i_adapter_model.config["in_channels"],  # mypy treats this as a FrozenDict
                    device=t2i_adapter_model.device,
                    dtype=t2i_adapter_model.dtype,
                    resize_mode=t2i_adapter_field.resize_mode,
                )

-                # Resize the T2I-Adapter input image.
-                # We select the resize dimensions so that after the T2I-Adapter's total_downscale_factor is applied, the
-                # result will match the latent image's dimensions after max_unet_downscale is applied.
-                # We crop the image to this size so that the positions match the input image on non-standard resolutions
-                t2i_input_height = latents_shape[2] // max_unet_downscale * total_downscale_factor
-                t2i_input_width = latents_shape[3] // max_unet_downscale * total_downscale_factor
-                if t2i_image.shape[2] > t2i_input_height or t2i_image.shape[3] > t2i_input_width:
-                    t2i_image = t2i_image[
-                        :, :, : min(t2i_image.shape[2], t2i_input_height), : min(t2i_image.shape[3], t2i_input_width)
-                    ]
-
                adapter_state = t2i_adapter_model(t2i_image)

            if do_classifier_free_guidance:
@@ -1003,11 +993,10 @@ class DenoiseLatentsInvocation(BaseInvocation):
            ModelPatcher.apply_freeu(unet, self.unet.freeu_config),
            SeamlessExt.static_patch_model(unet, self.unet.seamless_axes),  # FIXME
            # Apply the LoRA after unet has been moved to its target device for faster patching.
-            LoRAPatcher.apply_smart_lora_patches(
+            LoRAPatcher.apply_lora_patches(
                model=unet,
                patches=_lora_loader(),
                prefix="lora_unet_",
-                dtype=unet.dtype,
                cached_weights=cached_weights,
            ),
        ):
--- a/invokeai/app/invocations/fields.py
+++ b/invokeai/app/invocations/fields.py
@@ -41,7 +41,6 @@ class UIType(str, Enum, metaclass=MetaEnum):
    # region Model Field Types
    MainModel = "MainModelField"
    FluxMainModel = "FluxMainModelField"
-    SD3MainModel = "SD3MainModelField"
    SDXLMainModel = "SDXLMainModelField"
    SDXLRefinerModel = "SDXLRefinerModelField"
    ONNXModel = "ONNXModelField"
@@ -53,8 +52,6 @@ class UIType(str, Enum, metaclass=MetaEnum):
    T2IAdapterModel = "T2IAdapterModelField"
    T5EncoderModel = "T5EncoderModelField"
    CLIPEmbedModel = "CLIPEmbedModelField"
-    CLIPLEmbedModel = "CLIPLEmbedModelField"
-    CLIPGEmbedModel = "CLIPGEmbedModelField"
    SpandrelImageToImageModel = "SpandrelImageToImageModelField"
    # endregion

@@ -134,10 +131,8 @@ class FieldDescriptions:
    clip = "CLIP (tokenizer, text encoder, LoRAs) and skipped layer count"
    t5_encoder = "T5 tokenizer and text encoder"
    clip_embed_model = "CLIP Embed loader"
-    clip_g_model = "CLIP-G Embed loader"
    unet = "UNet (scheduler, LoRAs)"
    transformer = "Transformer"
-    mmditx = "MMDiTX"
    vae = "VAE"
    cond = "Conditioning tensor"
    controlnet_model = "ControlNet model to load"
@@ -145,7 +140,6 @@ class FieldDescriptions:
    lora_model = "LoRA model to load"
    main_model = "Main model (UNet, VAE, CLIP) to load"
    flux_model = "Flux model (Transformer) to load"
-    sd3_model = "SD3 model (MMDiTX) to load"
    sdxl_main_model = "SDXL Main model (UNet, VAE, CLIP1, CLIP2) to load"
    sdxl_refiner_model = "SDXL Refiner Main Modde (UNet, VAE, CLIP2) to load"
    onnx_main_model = "ONNX Main model (UNet, VAE, CLIP) to load"
@@ -250,17 +244,6 @@ class FluxConditioningField(BaseModel):
    """A conditioning tensor primitive value"""

    conditioning_name: str = Field(description="The name of conditioning tensor")
-    mask: Optional[TensorField] = Field(
-        default=None,
-        description="The mask associated with this conditioning tensor. Excluded regions should be set to False, "
-        "included regions should be set to True.",
-    )
-
-
-class SD3ConditioningField(BaseModel):
-    """A conditioning tensor primitive value"""
-
-    conditioning_name: str = Field(description="The name of conditioning tensor")


 class ConditioningField(BaseModel):
--- a/invokeai/app/invocations/flux_denoise.py
+++ b/invokeai/app/invocations/flux_denoise.py
@@ -30,7 +30,6 @@ from invokeai.backend.flux.controlnet.xlabs_controlnet_flux import XLabsControlN
 from invokeai.backend.flux.denoise import denoise
 from invokeai.backend.flux.extensions.inpaint_extension import InpaintExtension
 from invokeai.backend.flux.extensions.instantx_controlnet_extension import InstantXControlNetExtension
-from invokeai.backend.flux.extensions.regional_prompting_extension import RegionalPromptingExtension
 from invokeai.backend.flux.extensions.xlabs_controlnet_extension import XLabsControlNetExtension
 from invokeai.backend.flux.extensions.xlabs_ip_adapter_extension import XLabsIPAdapterExtension
 from invokeai.backend.flux.ip_adapter.xlabs_ip_adapter_flux import XlabsIpAdapterFlux
@@ -43,7 +42,6 @@ from invokeai.backend.flux.sampling_utils import (
    pack,
    unpack,
 )
-from invokeai.backend.flux.text_conditioning import FluxTextConditioning
 from invokeai.backend.lora.conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
 from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
 from invokeai.backend.lora.lora_patcher import LoRAPatcher
@@ -58,7 +56,7 @@ from invokeai.backend.util.devices import TorchDevice
    title="FLUX Denoise",
    tags=["image", "flux"],
    category="image",
-    version="3.2.2",
+    version="3.2.0",
    classification=Classification.Prototype,
 )
 class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
@@ -83,16 +81,15 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
        description=FieldDescriptions.denoising_start,
    )
    denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
-    add_noise: bool = InputField(default=True, description="Add noise based on denoising start.")
    transformer: TransformerField = InputField(
        description=FieldDescriptions.flux_model,
        input=Input.Connection,
        title="Transformer",
    )
-    positive_text_conditioning: FluxConditioningField | list[FluxConditioningField] = InputField(
+    positive_text_conditioning: FluxConditioningField = InputField(
        description=FieldDescriptions.positive_cond, input=Input.Connection
    )
-    negative_text_conditioning: FluxConditioningField | list[FluxConditioningField] | None = InputField(
+    negative_text_conditioning: FluxConditioningField | None = InputField(
        default=None,
        description="Negative conditioning tensor. Can be None if cfg_scale is 1.0.",
        input=Input.Connection,
@@ -141,12 +138,36 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
        name = context.tensors.save(tensor=latents)
        return LatentsOutput.build(latents_name=name, latents=latents, seed=None)

+    def _load_text_conditioning(
+        self, context: InvocationContext, conditioning_name: str, dtype: torch.dtype
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        # Load the conditioning data.
+        cond_data = context.conditioning.load(conditioning_name)
+        assert len(cond_data.conditionings) == 1
+        flux_conditioning = cond_data.conditionings[0]
+        assert isinstance(flux_conditioning, FLUXConditioningInfo)
+        flux_conditioning = flux_conditioning.to(dtype=dtype)
+        t5_embeddings = flux_conditioning.t5_embeds
+        clip_embeddings = flux_conditioning.clip_embeds
+        return t5_embeddings, clip_embeddings
+
    def _run_diffusion(
        self,
        context: InvocationContext,
    ):
        inference_dtype = torch.bfloat16

+        # Load the conditioning data.
+        pos_t5_embeddings, pos_clip_embeddings = self._load_text_conditioning(
+            context, self.positive_text_conditioning.conditioning_name, inference_dtype
+        )
+        neg_t5_embeddings: torch.Tensor | None = None
+        neg_clip_embeddings: torch.Tensor | None = None
+        if self.negative_text_conditioning is not None:
+            neg_t5_embeddings, neg_clip_embeddings = self._load_text_conditioning(
+                context, self.negative_text_conditioning.conditioning_name, inference_dtype
+            )
+
        # Load the input latents, if provided.
        init_latents = context.tensors.load(self.latents.latents_name) if self.latents else None
        if init_latents is not None:
@@ -161,45 +182,15 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
            dtype=inference_dtype,
            seed=self.seed,
        )
-        b, _c, latent_h, latent_w = noise.shape
-        packed_h = latent_h // 2
-        packed_w = latent_w // 2
-
-        # Load the conditioning data.
-        pos_text_conditionings = self._load_text_conditioning(
-            context=context,
-            cond_field=self.positive_text_conditioning,
-            packed_height=packed_h,
-            packed_width=packed_w,
-            dtype=inference_dtype,
-            device=TorchDevice.choose_torch_device(),
-        )
-        neg_text_conditionings: list[FluxTextConditioning] | None = None
-        if self.negative_text_conditioning is not None:
-            neg_text_conditionings = self._load_text_conditioning(
-                context=context,
-                cond_field=self.negative_text_conditioning,
-                packed_height=packed_h,
-                packed_width=packed_w,
-                dtype=inference_dtype,
-                device=TorchDevice.choose_torch_device(),
-            )
-        pos_regional_prompting_extension = RegionalPromptingExtension.from_text_conditioning(
-            pos_text_conditionings, img_seq_len=packed_h * packed_w
-        )
-        neg_regional_prompting_extension = (
-            RegionalPromptingExtension.from_text_conditioning(neg_text_conditionings, img_seq_len=packed_h * packed_w)
-            if neg_text_conditionings
-            else None
-        )

        transformer_info = context.models.load(self.transformer.transformer)
        is_schnell = "schnell" in transformer_info.config.config_path

        # Calculate the timestep schedule.
+        image_seq_len = noise.shape[-1] * noise.shape[-2] // 4
        timesteps = get_schedule(
            num_steps=self.num_steps,
-            image_seq_len=packed_h * packed_w,
+            image_seq_len=image_seq_len,
            shift=not is_schnell,
        )

@@ -216,12 +207,9 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
                    "to be poor. Consider using a FLUX dev model instead."
                )

-            if self.add_noise:
-                # Noise the orig_latents by the appropriate amount for the first timestep.
-                t_0 = timesteps[0]
-                x = t_0 * noise + (1.0 - t_0) * init_latents
-            else:
-                x = init_latents
+            # Noise the orig_latents by the appropriate amount for the first timestep.
+            t_0 = timesteps[0]
+            x = t_0 * noise + (1.0 - t_0) * init_latents
        else:
            # init_latents are not provided, so we are not doing image-to-image (i.e. we are starting from pure noise).
            if self.denoising_start > 1e-5:
@@ -236,17 +224,28 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):

        inpaint_mask = self._prep_inpaint_mask(context, x)

+        b, _c, latent_h, latent_w = x.shape
        img_ids = generate_img_ids(h=latent_h, w=latent_w, batch_size=b, device=x.device, dtype=x.dtype)

+        pos_bs, pos_t5_seq_len, _ = pos_t5_embeddings.shape
+        pos_txt_ids = torch.zeros(
+            pos_bs, pos_t5_seq_len, 3, dtype=inference_dtype, device=TorchDevice.choose_torch_device()
+        )
+        neg_txt_ids: torch.Tensor | None = None
+        if neg_t5_embeddings is not None:
+            neg_bs, neg_t5_seq_len, _ = neg_t5_embeddings.shape
+            neg_txt_ids = torch.zeros(
+                neg_bs, neg_t5_seq_len, 3, dtype=inference_dtype, device=TorchDevice.choose_torch_device()
+            )
+
        # Pack all latent tensors.
        init_latents = pack(init_latents) if init_latents is not None else None
        inpaint_mask = pack(inpaint_mask) if inpaint_mask is not None else None
        noise = pack(noise)
        x = pack(x)

-        # Now that we have 'packed' the latent tensors, verify that we calculated the image_seq_len, packed_h, and
-        # packed_w correctly.
-        assert packed_h * packed_w == x.shape[1]
+        # Now that we have 'packed' the latent tensors, verify that we calculated the image_seq_len correctly.
+        assert image_seq_len == x.shape[1]

        # Prepare inpaint extension.
        inpaint_extension: InpaintExtension | None = None
@@ -296,11 +295,10 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
            if config.format in [ModelFormat.Checkpoint]:
                # The model is non-quantized, so we can apply the LoRA weights directly into the model.
                exit_stack.enter_context(
-                    LoRAPatcher.apply_smart_lora_patches(
+                    LoRAPatcher.apply_lora_patches(
                        model=transformer,
                        patches=self._lora_iterator(context),
                        prefix=FLUX_LORA_TRANSFORMER_PREFIX,
-                        dtype=inference_dtype,
                        cached_weights=cached_weights,
                    )
                )
@@ -312,7 +310,7 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
                # The model is quantized, so apply the LoRA weights as sidecar layers. This results in slower inference,
                # than directly patching the weights, but is agnostic to the quantization format.
                exit_stack.enter_context(
-                    LoRAPatcher.apply_lora_wrapper_patches(
+                    LoRAPatcher.apply_lora_sidecar_patches(
                        model=transformer,
                        patches=self._lora_iterator(context),
                        prefix=FLUX_LORA_TRANSFORMER_PREFIX,
@@ -336,8 +334,12 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
                model=transformer,
                img=x,
                img_ids=img_ids,
-                pos_regional_prompting_extension=pos_regional_prompting_extension,
-                neg_regional_prompting_extension=neg_regional_prompting_extension,
+                txt=pos_t5_embeddings,
+                txt_ids=pos_txt_ids,
+                vec=pos_clip_embeddings,
+                neg_txt=neg_t5_embeddings,
+                neg_txt_ids=neg_txt_ids,
+                neg_vec=neg_clip_embeddings,
                timesteps=timesteps,
                step_callback=self._build_step_callback(context),
                guidance=self.guidance,
@@ -351,43 +353,6 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
        x = unpack(x.float(), self.height, self.width)
        return x

-    def _load_text_conditioning(
-        self,
-        context: InvocationContext,
-        cond_field: FluxConditioningField | list[FluxConditioningField],
-        packed_height: int,
-        packed_width: int,
-        dtype: torch.dtype,
-        device: torch.device,
-    ) -> list[FluxTextConditioning]:
-        """Load text conditioning data from a FluxConditioningField or a list of FluxConditioningFields."""
-        # Normalize to a list of FluxConditioningFields.
-        cond_list = [cond_field] if isinstance(cond_field, FluxConditioningField) else cond_field
-
-        text_conditionings: list[FluxTextConditioning] = []
-        for cond_field in cond_list:
-            # Load the text embeddings.
-            cond_data = context.conditioning.load(cond_field.conditioning_name)
-            assert len(cond_data.conditionings) == 1
-            flux_conditioning = cond_data.conditionings[0]
-            assert isinstance(flux_conditioning, FLUXConditioningInfo)
-            flux_conditioning = flux_conditioning.to(dtype=dtype, device=device)
-            t5_embeddings = flux_conditioning.t5_embeds
-            clip_embeddings = flux_conditioning.clip_embeds
-
-            # Load the mask, if provided.
-            mask: Optional[torch.Tensor] = None
-            if cond_field.mask is not None:
-                mask = context.tensors.load(cond_field.mask.tensor_name)
-                mask = mask.to(device=device)
-                mask = RegionalPromptingExtension.preprocess_regional_prompt_mask(
-                    mask, packed_height, packed_width, dtype, device
-                )
-
-            text_conditionings.append(FluxTextConditioning(t5_embeddings, clip_embeddings, mask))
-
-        return text_conditionings
-
    @classmethod
    def prep_cfg_scale(
        cls, cfg_scale: float | list[float], timesteps: list[float], cfg_scale_start_step: int, cfg_scale_end_step: int
--- a/invokeai/app/invocations/flux_model_loader.py
+++ b/invokeai/app/invocations/flux_model_loader.py
@@ -1,89 +0,0 @@
-from typing import Literal
-
-from invokeai.app.invocations.baseinvocation import (
-    BaseInvocation,
-    BaseInvocationOutput,
-    Classification,
-    invocation,
-    invocation_output,
-)
-from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType
-from invokeai.app.invocations.model import CLIPField, ModelIdentifierField, T5EncoderField, TransformerField, VAEField
-from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.flux.util import max_seq_lengths
-from invokeai.backend.model_manager.config import (
-    CheckpointConfigBase,
-    SubModelType,
-)
-
-
-@invocation_output("flux_model_loader_output")
-class FluxModelLoaderOutput(BaseInvocationOutput):
-    """Flux base model loader output"""
-
-    transformer: TransformerField = OutputField(description=FieldDescriptions.transformer, title="Transformer")
-    clip: CLIPField = OutputField(description=FieldDescriptions.clip, title="CLIP")
-    t5_encoder: T5EncoderField = OutputField(description=FieldDescriptions.t5_encoder, title="T5 Encoder")
-    vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE")
-    max_seq_len: Literal[256, 512] = OutputField(
-        description="The max sequence length to used for the T5 encoder. (256 for schnell transformer, 512 for dev transformer)",
-        title="Max Seq Length",
-    )
-
-
-@invocation(
-    "flux_model_loader",
-    title="Flux Main Model",
-    tags=["model", "flux"],
-    category="model",
-    version="1.0.4",
-    classification=Classification.Prototype,
-)
-class FluxModelLoaderInvocation(BaseInvocation):
-    """Loads a flux base model, outputting its submodels."""
-
-    model: ModelIdentifierField = InputField(
-        description=FieldDescriptions.flux_model,
-        ui_type=UIType.FluxMainModel,
-        input=Input.Direct,
-    )
-
-    t5_encoder_model: ModelIdentifierField = InputField(
-        description=FieldDescriptions.t5_encoder, ui_type=UIType.T5EncoderModel, input=Input.Direct, title="T5 Encoder"
-    )
-
-    clip_embed_model: ModelIdentifierField = InputField(
-        description=FieldDescriptions.clip_embed_model,
-        ui_type=UIType.CLIPEmbedModel,
-        input=Input.Direct,
-        title="CLIP Embed",
-    )
-
-    vae_model: ModelIdentifierField = InputField(
-        description=FieldDescriptions.vae_model, ui_type=UIType.FluxVAEModel, title="VAE"
-    )
-
-    def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput:
-        for key in [self.model.key, self.t5_encoder_model.key, self.clip_embed_model.key, self.vae_model.key]:
-            if not context.models.exists(key):
-                raise ValueError(f"Unknown model: {key}")
-
-        transformer = self.model.model_copy(update={"submodel_type": SubModelType.Transformer})
-        vae = self.vae_model.model_copy(update={"submodel_type": SubModelType.VAE})
-
-        tokenizer = self.clip_embed_model.model_copy(update={"submodel_type": SubModelType.Tokenizer})
-        clip_encoder = self.clip_embed_model.model_copy(update={"submodel_type": SubModelType.TextEncoder})
-
-        tokenizer2 = self.t5_encoder_model.model_copy(update={"submodel_type": SubModelType.Tokenizer2})
-        t5_encoder = self.t5_encoder_model.model_copy(update={"submodel_type": SubModelType.TextEncoder2})
-
-        transformer_config = context.models.get_config(transformer)
-        assert isinstance(transformer_config, CheckpointConfigBase)
-
-        return FluxModelLoaderOutput(
-            transformer=TransformerField(transformer=transformer, loras=[]),
-            clip=CLIPField(tokenizer=tokenizer, text_encoder=clip_encoder, loras=[], skipped_layers=0),
-            t5_encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder),
-            vae=VAEField(vae=vae),
-            max_seq_len=max_seq_lengths[transformer_config.config_path],
-        )
--- a/invokeai/app/invocations/flux_text_encoder.py
+++ b/invokeai/app/invocations/flux_text_encoder.py
@@ -1,18 +1,11 @@
 from contextlib import ExitStack
-from typing import Iterator, Literal, Optional, Tuple
+from typing import Iterator, Literal, Tuple

 import torch
 from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer

 from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
-from invokeai.app.invocations.fields import (
-    FieldDescriptions,
-    FluxConditioningField,
-    Input,
-    InputField,
-    TensorField,
-    UIComponent,
-)
+from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField
 from invokeai.app.invocations.model import CLIPField, T5EncoderField
 from invokeai.app.invocations.primitives import FluxConditioningOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
@@ -22,7 +15,6 @@ from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
 from invokeai.backend.lora.lora_patcher import LoRAPatcher
 from invokeai.backend.model_manager.config import ModelFormat
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, FLUXConditioningInfo
-from invokeai.backend.util.devices import TorchDevice


@invocation(
@@ -30,7 +22,7 @@ from invokeai.backend.util.devices import TorchDevice
    title="FLUX Text Encoding",
    tags=["prompt", "conditioning", "flux"],
    category="conditioning",
-    version="1.1.1",
+    version="1.1.0",
    classification=Classification.Prototype,
 )
 class FluxTextEncoderInvocation(BaseInvocation):
@@ -49,10 +41,7 @@ class FluxTextEncoderInvocation(BaseInvocation):
    t5_max_seq_len: Literal[256, 512] = InputField(
        description="Max sequence length for the T5 encoder. Expected to be 256 for FLUX schnell models and 512 for FLUX dev models."
    )
-    prompt: str = InputField(description="Text prompt to encode.", ui_component=UIComponent.Textarea)
-    mask: Optional[TensorField] = InputField(
-        default=None, description="A mask defining the region that this conditioning prompt applies to."
-    )
+    prompt: str = InputField(description="Text prompt to encode.")

    @torch.no_grad()
    def invoke(self, context: InvocationContext) -> FluxConditioningOutput:
@@ -65,9 +54,7 @@ class FluxTextEncoderInvocation(BaseInvocation):
        )

        conditioning_name = context.conditioning.save(conditioning_data)
-        return FluxConditioningOutput(
-            conditioning=FluxConditioningField(conditioning_name=conditioning_name, mask=self.mask)
-        )
+        return FluxConditioningOutput.build(conditioning_name)

    def _t5_encode(self, context: InvocationContext) -> torch.Tensor:
        t5_tokenizer_info = context.models.load(self.t5_encoder.tokenizer)
@@ -84,7 +71,6 @@ class FluxTextEncoderInvocation(BaseInvocation):

            t5_encoder = HFEncoder(t5_text_encoder, t5_tokenizer, False, self.t5_max_seq_len)

-            context.util.signal_progress("Running T5 encoder")
            prompt_embeds = t5_encoder(prompt)

        assert isinstance(prompt_embeds, torch.Tensor)
@@ -112,11 +98,10 @@ class FluxTextEncoderInvocation(BaseInvocation):
            if clip_text_encoder_config.format in [ModelFormat.Diffusers]:
                # The model is non-quantized, so we can apply the LoRA weights directly into the model.
                exit_stack.enter_context(
-                    LoRAPatcher.apply_smart_lora_patches(
+                    LoRAPatcher.apply_lora_patches(
                        model=clip_text_encoder,
                        patches=self._clip_lora_iterator(context),
                        prefix=FLUX_LORA_CLIP_PREFIX,
-                        dtype=TorchDevice.choose_torch_dtype(),
                        cached_weights=cached_weights,
                    )
                )
@@ -126,7 +111,6 @@ class FluxTextEncoderInvocation(BaseInvocation):

            clip_encoder = HFEncoder(clip_text_encoder, clip_tokenizer, True, 77)

-            context.util.signal_progress("Running CLIP encoder")
            pooled_prompt_embeds = clip_encoder(prompt)

        assert isinstance(pooled_prompt_embeds, torch.Tensor)
--- a/invokeai/app/invocations/flux_vae_decode.py
+++ b/invokeai/app/invocations/flux_vae_decode.py
@@ -41,8 +41,7 @@ class FluxVaeDecodeInvocation(BaseInvocation, WithMetadata, WithBoard):
    def _vae_decode(self, vae_info: LoadedModel, latents: torch.Tensor) -> Image.Image:
        with vae_info as vae:
            assert isinstance(vae, AutoEncoder)
-            vae_dtype = next(iter(vae.parameters())).dtype
-            latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype)
+            latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=TorchDevice.choose_torch_dtype())
            img = vae.decode(latents)

        img = img.clamp(-1, 1)
@@ -54,7 +53,6 @@ class FluxVaeDecodeInvocation(BaseInvocation, WithMetadata, WithBoard):
    def invoke(self, context: InvocationContext) -> ImageOutput:
        latents = context.tensors.load(self.latents.latents_name)
        vae_info = context.models.load(self.vae.vae)
-        context.util.signal_progress("Running VAE")
        image = self._vae_decode(vae_info=vae_info, latents=latents)

        TorchDevice.empty_cache()
--- a/invokeai/app/invocations/flux_vae_encode.py
+++ b/invokeai/app/invocations/flux_vae_encode.py
@@ -44,8 +44,9 @@ class FluxVaeEncodeInvocation(BaseInvocation):
        generator = torch.Generator(device=TorchDevice.choose_torch_device()).manual_seed(0)
        with vae_info as vae:
            assert isinstance(vae, AutoEncoder)
-            vae_dtype = next(iter(vae.parameters())).dtype
-            image_tensor = image_tensor.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype)
+            image_tensor = image_tensor.to(
+                device=TorchDevice.choose_torch_device(), dtype=TorchDevice.choose_torch_dtype()
+            )
            latents = vae.encode(image_tensor, sample=True, generator=generator)
            return latents

@@ -59,7 +60,6 @@ class FluxVaeEncodeInvocation(BaseInvocation):
        if image_tensor.dim() == 3:
            image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w")

-        context.util.signal_progress("Running VAE")
        latents = self.vae_encode(vae_info=vae_info, image_tensor=image_tensor)

        latents = latents.to("cpu")
--- a/invokeai/app/invocations/image_panels.py
+++ b/invokeai/app/invocations/image_panels.py
@@ -1,59 +0,0 @@
-from pydantic import ValidationInfo, field_validator
-
-from invokeai.app.invocations.baseinvocation import (
-    BaseInvocation,
-    BaseInvocationOutput,
-    Classification,
-    invocation,
-    invocation_output,
-)
-from invokeai.app.invocations.fields import InputField, OutputField
-from invokeai.app.services.shared.invocation_context import InvocationContext
-
-
-@invocation_output("image_panel_coordinate_output")
-class ImagePanelCoordinateOutput(BaseInvocationOutput):
-    x_left: int = OutputField(description="The left x-coordinate of the panel.")
-    y_top: int = OutputField(description="The top y-coordinate of the panel.")
-    width: int = OutputField(description="The width of the panel.")
-    height: int = OutputField(description="The height of the panel.")
-
-
-@invocation(
-    "image_panel_layout",
-    title="Image Panel Layout",
-    tags=["image", "panel", "layout"],
-    category="image",
-    version="1.0.0",
-    classification=Classification.Prototype,
-)
-class ImagePanelLayoutInvocation(BaseInvocation):
-    """Get the coordinates of a single panel in a grid. (If the full image shape cannot be divided evenly into panels,
-    then the grid may not cover the entire image.)
-    """
-
-    width: int = InputField(description="The width of the entire grid.")
-    height: int = InputField(description="The height of the entire grid.")
-    num_cols: int = InputField(ge=1, default=1, description="The number of columns in the grid.")
-    num_rows: int = InputField(ge=1, default=1, description="The number of rows in the grid.")
-    panel_col_idx: int = InputField(ge=0, default=0, description="The column index of the panel to be processed.")
-    panel_row_idx: int = InputField(ge=0, default=0, description="The row index of the panel to be processed.")
-
-    @field_validator("panel_col_idx")
-    def validate_panel_col_idx(cls, v: int, info: ValidationInfo) -> int:
-        if v < 0 or v >= info.data["num_cols"]:
-            raise ValueError(f"panel_col_idx must be between 0 and {info.data['num_cols'] - 1}")
-        return v
-
-    @field_validator("panel_row_idx")
-    def validate_panel_row_idx(cls, v: int, info: ValidationInfo) -> int:
-        if v < 0 or v >= info.data["num_rows"]:
-            raise ValueError(f"panel_row_idx must be between 0 and {info.data['num_rows'] - 1}")
-        return v
-
-    def invoke(self, context: InvocationContext) -> ImagePanelCoordinateOutput:
-        x_left = self.panel_col_idx * (self.width // self.num_cols)
-        y_top = self.panel_row_idx * (self.height // self.num_rows)
-        width = self.width // self.num_cols
-        height = self.height // self.num_rows
-        return ImagePanelCoordinateOutput(x_left=x_left, y_top=y_top, width=width, height=height)
--- a/invokeai/app/invocations/image_to_latents.py
+++ b/invokeai/app/invocations/image_to_latents.py
@@ -117,7 +117,6 @@ class ImageToLatentsInvocation(BaseInvocation):
        if image_tensor.dim() == 3:
            image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w")

-        context.util.signal_progress("Running VAE encoder")
        latents = self.vae_encode(
            vae_info=vae_info, upcast=self.fp32, tiled=self.tiled, image_tensor=image_tensor, tile_size=self.tile_size
        )
--- a/invokeai/app/invocations/latents_to_image.py
+++ b/invokeai/app/invocations/latents_to_image.py
@@ -60,7 +60,6 @@ class LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
        vae_info = context.models.load(self.vae.vae)
        assert isinstance(vae_info.model, (AutoencoderKL, AutoencoderTiny))
        with SeamlessExt.static_patch_model(vae_info.model, self.vae.seamless_axes), vae_info as vae:
-            context.util.signal_progress("Running VAE decoder")
            assert isinstance(vae, (AutoencoderKL, AutoencoderTiny))
            latents = latents.to(vae.device)
            if self.fp32:
--- a/invokeai/app/invocations/metadata.py
+++ b/invokeai/app/invocations/metadata.py
@@ -147,10 +147,6 @@ GENERATION_MODES = Literal[
    "flux_img2img",
    "flux_inpaint",
    "flux_outpaint",
-    "sd3_txt2img",
-    "sd3_img2img",
-    "sd3_inpaint",
-    "sd3_outpaint",
 ]


--- a/invokeai/app/invocations/model.py
+++ b/invokeai/app/invocations/model.py
@@ -1,5 +1,5 @@
 import copy
-from typing import List, Optional
+from typing import List, Literal, Optional

 from pydantic import BaseModel, Field

@@ -13,9 +13,11 @@ from invokeai.app.invocations.baseinvocation import (
 from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.shared.models import FreeUConfig
+from invokeai.backend.flux.util import max_seq_lengths
 from invokeai.backend.model_manager.config import (
    AnyModelConfig,
    BaseModelType,
+    CheckpointConfigBase,
    ModelType,
    SubModelType,
 )
@@ -137,6 +139,78 @@ class ModelIdentifierInvocation(BaseInvocation):
        return ModelIdentifierOutput(model=self.model)


+@invocation_output("flux_model_loader_output")
+class FluxModelLoaderOutput(BaseInvocationOutput):
+    """Flux base model loader output"""
+
+    transformer: TransformerField = OutputField(description=FieldDescriptions.transformer, title="Transformer")
+    clip: CLIPField = OutputField(description=FieldDescriptions.clip, title="CLIP")
+    t5_encoder: T5EncoderField = OutputField(description=FieldDescriptions.t5_encoder, title="T5 Encoder")
+    vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE")
+    max_seq_len: Literal[256, 512] = OutputField(
+        description="The max sequence length to used for the T5 encoder. (256 for schnell transformer, 512 for dev transformer)",
+        title="Max Seq Length",
+    )
+
+
+@invocation(
+    "flux_model_loader",
+    title="Flux Main Model",
+    tags=["model", "flux"],
+    category="model",
+    version="1.0.4",
+    classification=Classification.Prototype,
+)
+class FluxModelLoaderInvocation(BaseInvocation):
+    """Loads a flux base model, outputting its submodels."""
+
+    model: ModelIdentifierField = InputField(
+        description=FieldDescriptions.flux_model,
+        ui_type=UIType.FluxMainModel,
+        input=Input.Direct,
+    )
+
+    t5_encoder_model: ModelIdentifierField = InputField(
+        description=FieldDescriptions.t5_encoder, ui_type=UIType.T5EncoderModel, input=Input.Direct, title="T5 Encoder"
+    )
+
+    clip_embed_model: ModelIdentifierField = InputField(
+        description=FieldDescriptions.clip_embed_model,
+        ui_type=UIType.CLIPEmbedModel,
+        input=Input.Direct,
+        title="CLIP Embed",
+    )
+
+    vae_model: ModelIdentifierField = InputField(
+        description=FieldDescriptions.vae_model, ui_type=UIType.FluxVAEModel, title="VAE"
+    )
+
+    def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput:
+        for key in [self.model.key, self.t5_encoder_model.key, self.clip_embed_model.key, self.vae_model.key]:
+            if not context.models.exists(key):
+                raise ValueError(f"Unknown model: {key}")
+
+        transformer = self.model.model_copy(update={"submodel_type": SubModelType.Transformer})
+        vae = self.vae_model.model_copy(update={"submodel_type": SubModelType.VAE})
+
+        tokenizer = self.clip_embed_model.model_copy(update={"submodel_type": SubModelType.Tokenizer})
+        clip_encoder = self.clip_embed_model.model_copy(update={"submodel_type": SubModelType.TextEncoder})
+
+        tokenizer2 = self.t5_encoder_model.model_copy(update={"submodel_type": SubModelType.Tokenizer2})
+        t5_encoder = self.t5_encoder_model.model_copy(update={"submodel_type": SubModelType.TextEncoder2})
+
+        transformer_config = context.models.get_config(transformer)
+        assert isinstance(transformer_config, CheckpointConfigBase)
+
+        return FluxModelLoaderOutput(
+            transformer=TransformerField(transformer=transformer, loras=[]),
+            clip=CLIPField(tokenizer=tokenizer, text_encoder=clip_encoder, loras=[], skipped_layers=0),
+            t5_encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder),
+            vae=VAEField(vae=vae),
+            max_seq_len=max_seq_lengths[transformer_config.config_path],
+        )
+
+
@invocation(
    "main_model_loader",
    title="Main Model",
--- a/invokeai/app/invocations/param_easing.py
+++ b/invokeai/app/invocations/param_easing.py
@@ -1,4 +1,43 @@
+import io
+from typing import Literal, Optional
+
+import matplotlib.pyplot as plt
 import numpy as np
+import PIL.Image
+from easing_functions import (
+    BackEaseIn,
+    BackEaseInOut,
+    BackEaseOut,
+    BounceEaseIn,
+    BounceEaseInOut,
+    BounceEaseOut,
+    CircularEaseIn,
+    CircularEaseInOut,
+    CircularEaseOut,
+    CubicEaseIn,
+    CubicEaseInOut,
+    CubicEaseOut,
+    ElasticEaseIn,
+    ElasticEaseInOut,
+    ElasticEaseOut,
+    ExponentialEaseIn,
+    ExponentialEaseInOut,
+    ExponentialEaseOut,
+    LinearInOut,
+    QuadEaseIn,
+    QuadEaseInOut,
+    QuadEaseOut,
+    QuarticEaseIn,
+    QuarticEaseInOut,
+    QuarticEaseOut,
+    QuinticEaseIn,
+    QuinticEaseInOut,
+    QuinticEaseOut,
+    SineEaseIn,
+    SineEaseInOut,
+    SineEaseOut,
+)
+from matplotlib.ticker import MaxNLocator

 from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
 from invokeai.app.invocations.fields import InputField
@@ -26,3 +65,191 @@ class FloatLinearRangeInvocation(BaseInvocation):
    def invoke(self, context: InvocationContext) -> FloatCollectionOutput:
        param_list = list(np.linspace(self.start, self.stop, self.steps))
        return FloatCollectionOutput(collection=param_list)
+
+
+EASING_FUNCTIONS_MAP = {
+    "Linear": LinearInOut,
+    "QuadIn": QuadEaseIn,
+    "QuadOut": QuadEaseOut,
+    "QuadInOut": QuadEaseInOut,
+    "CubicIn": CubicEaseIn,
+    "CubicOut": CubicEaseOut,
+    "CubicInOut": CubicEaseInOut,
+    "QuarticIn": QuarticEaseIn,
+    "QuarticOut": QuarticEaseOut,
+    "QuarticInOut": QuarticEaseInOut,
+    "QuinticIn": QuinticEaseIn,
+    "QuinticOut": QuinticEaseOut,
+    "QuinticInOut": QuinticEaseInOut,
+    "SineIn": SineEaseIn,
+    "SineOut": SineEaseOut,
+    "SineInOut": SineEaseInOut,
+    "CircularIn": CircularEaseIn,
+    "CircularOut": CircularEaseOut,
+    "CircularInOut": CircularEaseInOut,
+    "ExponentialIn": ExponentialEaseIn,
+    "ExponentialOut": ExponentialEaseOut,
+    "ExponentialInOut": ExponentialEaseInOut,
+    "ElasticIn": ElasticEaseIn,
+    "ElasticOut": ElasticEaseOut,
+    "ElasticInOut": ElasticEaseInOut,
+    "BackIn": BackEaseIn,
+    "BackOut": BackEaseOut,
+    "BackInOut": BackEaseInOut,
+    "BounceIn": BounceEaseIn,
+    "BounceOut": BounceEaseOut,
+    "BounceInOut": BounceEaseInOut,
+}
+
+EASING_FUNCTION_KEYS = Literal[tuple(EASING_FUNCTIONS_MAP.keys())]
+
+
+# actually I think for now could just use CollectionOutput (which is list[Any]
+@invocation(
+    "step_param_easing",
+    title="Step Param Easing",
+    tags=["step", "easing"],
+    category="step",
+    version="1.0.2",
+)
+class StepParamEasingInvocation(BaseInvocation):
+    """Experimental per-step parameter easing for denoising steps"""
+
+    easing: EASING_FUNCTION_KEYS = InputField(default="Linear", description="The easing function to use")
+    num_steps: int = InputField(default=20, description="number of denoising steps")
+    start_value: float = InputField(default=0.0, description="easing starting value")
+    end_value: float = InputField(default=1.0, description="easing ending value")
+    start_step_percent: float = InputField(default=0.0, description="fraction of steps at which to start easing")
+    end_step_percent: float = InputField(default=1.0, description="fraction of steps after which to end easing")
+    # if None, then start_value is used prior to easing start
+    pre_start_value: Optional[float] = InputField(default=None, description="value before easing start")
+    # if None, then end value is used prior to easing end
+    post_end_value: Optional[float] = InputField(default=None, description="value after easing end")
+    mirror: bool = InputField(default=False, description="include mirror of easing function")
+    # FIXME: add alt_mirror option (alternative to default or mirror), or remove entirely
+    # alt_mirror: bool = InputField(default=False, description="alternative mirroring by dual easing")
+    show_easing_plot: bool = InputField(default=False, description="show easing plot")
+
+    def invoke(self, context: InvocationContext) -> FloatCollectionOutput:
+        log_diagnostics = False
+        # convert from start_step_percent to nearest step <= (steps * start_step_percent)
+        # start_step = int(np.floor(self.num_steps * self.start_step_percent))
+        start_step = int(np.round(self.num_steps * self.start_step_percent))
+        # convert from end_step_percent to nearest step >= (steps * end_step_percent)
+        # end_step = int(np.ceil((self.num_steps - 1) * self.end_step_percent))
+        end_step = int(np.round((self.num_steps - 1) * self.end_step_percent))
+
+        # end_step = int(np.ceil(self.num_steps * self.end_step_percent))
+        num_easing_steps = end_step - start_step + 1
+
+        # num_presteps = max(start_step - 1, 0)
+        num_presteps = start_step
+        num_poststeps = self.num_steps - (num_presteps + num_easing_steps)
+        prelist = list(num_presteps * [self.pre_start_value])
+        postlist = list(num_poststeps * [self.post_end_value])
+
+        if log_diagnostics:
+            context.logger.debug("start_step: " + str(start_step))
+            context.logger.debug("end_step: " + str(end_step))
+            context.logger.debug("num_easing_steps: " + str(num_easing_steps))
+            context.logger.debug("num_presteps: " + str(num_presteps))
+            context.logger.debug("num_poststeps: " + str(num_poststeps))
+            context.logger.debug("prelist size: " + str(len(prelist)))
+            context.logger.debug("postlist size: " + str(len(postlist)))
+            context.logger.debug("prelist: " + str(prelist))
+            context.logger.debug("postlist: " + str(postlist))
+
+        easing_class = EASING_FUNCTIONS_MAP[self.easing]
+        if log_diagnostics:
+            context.logger.debug("easing class: " + str(easing_class))
+        easing_list = []
+        if self.mirror:  # "expected" mirroring
+            # if number of steps is even, squeeze duration down to (number_of_steps)/2
+            # and create reverse copy of list to append
+            # if number of steps is odd, squeeze duration down to ceil(number_of_steps/2)
+            # and create reverse copy of list[1:end-1]
+            # but if even then number_of_steps/2 === ceil(number_of_steps/2), so can just use ceil always
+
+            base_easing_duration = int(np.ceil(num_easing_steps / 2.0))
+            if log_diagnostics:
+                context.logger.debug("base easing duration: " + str(base_easing_duration))
+            even_num_steps = num_easing_steps % 2 == 0  # even number of steps
+            easing_function = easing_class(
+                start=self.start_value,
+                end=self.end_value,
+                duration=base_easing_duration - 1,
+            )
+            base_easing_vals = []
+            for step_index in range(base_easing_duration):
+                easing_val = easing_function.ease(step_index)
+                base_easing_vals.append(easing_val)
+                if log_diagnostics:
+                    context.logger.debug("step_index: " + str(step_index) + ", easing_val: " + str(easing_val))
+            if even_num_steps:
+                mirror_easing_vals = list(reversed(base_easing_vals))
+            else:
+                mirror_easing_vals = list(reversed(base_easing_vals[0:-1]))
+            if log_diagnostics:
+                context.logger.debug("base easing vals: " + str(base_easing_vals))
+                context.logger.debug("mirror easing vals: " + str(mirror_easing_vals))
+            easing_list = base_easing_vals + mirror_easing_vals
+
+        # FIXME: add alt_mirror option (alternative to default or mirror), or remove entirely
+        # elif self.alt_mirror:  # function mirroring (unintuitive behavior (at least to me))
+        #     # half_ease_duration = round(num_easing_steps - 1 / 2)
+        #     half_ease_duration = round((num_easing_steps - 1) / 2)
+        #     easing_function = easing_class(start=self.start_value,
+        #                                    end=self.end_value,
+        #                                    duration=half_ease_duration,
+        #                                    )
+        #
+        #     mirror_function = easing_class(start=self.end_value,
+        #                                    end=self.start_value,
+        #                                    duration=half_ease_duration,
+        #                                    )
+        #     for step_index in range(num_easing_steps):
+        #         if step_index <= half_ease_duration:
+        #             step_val = easing_function.ease(step_index)
+        #         else:
+        #             step_val = mirror_function.ease(step_index - half_ease_duration)
+        #         easing_list.append(step_val)
+        #         if log_diagnostics: logger.debug(step_index, step_val)
+        #
+
+        else:  # no mirroring (default)
+            easing_function = easing_class(
+                start=self.start_value,
+                end=self.end_value,
+                duration=num_easing_steps - 1,
+            )
+            for step_index in range(num_easing_steps):
+                step_val = easing_function.ease(step_index)
+                easing_list.append(step_val)
+                if log_diagnostics:
+                    context.logger.debug("step_index: " + str(step_index) + ", easing_val: " + str(step_val))
+
+        if log_diagnostics:
+            context.logger.debug("prelist size: " + str(len(prelist)))
+            context.logger.debug("easing_list size: " + str(len(easing_list)))
+            context.logger.debug("postlist size: " + str(len(postlist)))
+
+        param_list = prelist + easing_list + postlist
+
+        if self.show_easing_plot:
+            plt.figure()
+            plt.xlabel("Step")
+            plt.ylabel("Param Value")
+            plt.title("Per-Step Values Based On Easing: " + self.easing)
+            plt.bar(range(len(param_list)), param_list)
+            # plt.plot(param_list)
+            ax = plt.gca()
+            ax.xaxis.set_major_locator(MaxNLocator(integer=True))
+            buf = io.BytesIO()
+            plt.savefig(buf, format="png")
+            buf.seek(0)
+            im = PIL.Image.open(buf)
+            im.show()
+            buf.close()
+
+        # output array of size steps, each entry list[i] is param value for step i
+        return FloatCollectionOutput(collection=param_list)
--- a/invokeai/app/invocations/primitives.py
+++ b/invokeai/app/invocations/primitives.py
@@ -4,13 +4,7 @@ from typing import Optional

 import torch

-from invokeai.app.invocations.baseinvocation import (
-    BaseInvocation,
-    BaseInvocationOutput,
-    Classification,
-    invocation,
-    invocation_output,
-)
+from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
 from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
 from invokeai.app.invocations.fields import (
    BoundingBoxField,
@@ -24,7 +18,6 @@ from invokeai.app.invocations.fields import (
    InputField,
    LatentsField,
    OutputField,
-    SD3ConditioningField,
    TensorField,
    UIComponent,
 )
@@ -433,17 +426,6 @@ class FluxConditioningOutput(BaseInvocationOutput):
        return cls(conditioning=FluxConditioningField(conditioning_name=conditioning_name))


-@invocation_output("sd3_conditioning_output")
-class SD3ConditioningOutput(BaseInvocationOutput):
-    """Base class for nodes that output a single SD3 conditioning tensor"""
-
-    conditioning: SD3ConditioningField = OutputField(description=FieldDescriptions.cond)
-
-    @classmethod
-    def build(cls, conditioning_name: str) -> "SD3ConditioningOutput":
-        return cls(conditioning=SD3ConditioningField(conditioning_name=conditioning_name))
-
-
@invocation_output("conditioning_output")
 class ConditioningOutput(BaseInvocationOutput):
    """Base class for nodes that output a single conditioning tensor"""
@@ -539,23 +521,3 @@ class BoundingBoxInvocation(BaseInvocation):


 # endregion
-
-
-@invocation(
-    "image_batch",
-    title="Image Batch",
-    tags=["primitives", "image", "batch", "internal"],
-    category="primitives",
-    version="1.0.0",
-    classification=Classification.Special,
-)
-class ImageBatchInvocation(BaseInvocation):
-    """Create a batched generation, where the workflow is executed once for each image in the batch."""
-
-    images: list[ImageField] = InputField(min_length=1, description="The images to batch over", input=Input.Direct)
-
-    def __init__(self):
-        raise NotImplementedError("This class should never be executed or instantiated directly.")
-
-    def invoke(self, context: InvocationContext) -> ImageOutput:
-        raise NotImplementedError("This class should never be executed or instantiated directly.")
--- a/invokeai/app/invocations/sd3_denoise.py
+++ b/invokeai/app/invocations/sd3_denoise.py
@@ -1,338 +0,0 @@
-from typing import Callable, Optional, Tuple
-
-import torch
-import torchvision.transforms as tv_transforms
-from diffusers.models.transformers.transformer_sd3 import SD3Transformer2DModel
-from torchvision.transforms.functional import resize as tv_resize
-from tqdm import tqdm
-
-from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
-from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
-from invokeai.app.invocations.fields import (
-    DenoiseMaskField,
-    FieldDescriptions,
-    Input,
-    InputField,
-    LatentsField,
-    SD3ConditioningField,
-    WithBoard,
-    WithMetadata,
-)
-from invokeai.app.invocations.model import TransformerField
-from invokeai.app.invocations.primitives import LatentsOutput
-from invokeai.app.invocations.sd3_text_encoder import SD3_T5_MAX_SEQ_LEN
-from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.flux.sampling_utils import clip_timestep_schedule_fractional
-from invokeai.backend.model_manager.config import BaseModelType
-from invokeai.backend.sd3.extensions.inpaint_extension import InpaintExtension
-from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState
-from invokeai.backend.stable_diffusion.diffusion.conditioning_data import SD3ConditioningInfo
-from invokeai.backend.util.devices import TorchDevice
-
-
-@invocation(
-    "sd3_denoise",
-    title="SD3 Denoise",
-    tags=["image", "sd3"],
-    category="image",
-    version="1.1.0",
-    classification=Classification.Prototype,
-)
-class SD3DenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
-    """Run denoising process with a SD3 model."""
-
-    # If latents is provided, this means we are doing image-to-image.
-    latents: Optional[LatentsField] = InputField(
-        default=None, description=FieldDescriptions.latents, input=Input.Connection
-    )
-    # denoise_mask is used for image-to-image inpainting. Only the masked region is modified.
-    denoise_mask: Optional[DenoiseMaskField] = InputField(
-        default=None, description=FieldDescriptions.denoise_mask, input=Input.Connection
-    )
-    denoising_start: float = InputField(default=0.0, ge=0, le=1, description=FieldDescriptions.denoising_start)
-    denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
-    transformer: TransformerField = InputField(
-        description=FieldDescriptions.sd3_model, input=Input.Connection, title="Transformer"
-    )
-    positive_conditioning: SD3ConditioningField = InputField(
-        description=FieldDescriptions.positive_cond, input=Input.Connection
-    )
-    negative_conditioning: SD3ConditioningField = InputField(
-        description=FieldDescriptions.negative_cond, input=Input.Connection
-    )
-    cfg_scale: float | list[float] = InputField(default=3.5, description=FieldDescriptions.cfg_scale, title="CFG Scale")
-    width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.")
-    height: int = InputField(default=1024, multiple_of=16, description="Height of the generated image.")
-    steps: int = InputField(default=10, gt=0, description=FieldDescriptions.steps)
-    seed: int = InputField(default=0, description="Randomness seed for reproducibility.")
-
-    @torch.no_grad()
-    def invoke(self, context: InvocationContext) -> LatentsOutput:
-        latents = self._run_diffusion(context)
-        latents = latents.detach().to("cpu")
-
-        name = context.tensors.save(tensor=latents)
-        return LatentsOutput.build(latents_name=name, latents=latents, seed=None)
-
-    def _prep_inpaint_mask(self, context: InvocationContext, latents: torch.Tensor) -> torch.Tensor | None:
-        """Prepare the inpaint mask.
-        - Loads the mask
-        - Resizes if necessary
-        - Casts to same device/dtype as latents
-
-        Args:
-            context (InvocationContext): The invocation context, for loading the inpaint mask.
-            latents (torch.Tensor): A latent image tensor. Used to determine the target shape, device, and dtype for the
-                inpaint mask.
-
-        Returns:
-            torch.Tensor | None: Inpaint mask. Values of 0.0 represent the regions to be fully denoised, and 1.0
-                represent the regions to be preserved.
-        """
-        if self.denoise_mask is None:
-            return None
-        mask = context.tensors.load(self.denoise_mask.mask_name)
-
-        # The input denoise_mask contains values in [0, 1], where 0.0 represents the regions to be fully denoised, and
-        # 1.0 represents the regions to be preserved.
-        # We invert the mask so that the regions to be preserved are 0.0 and the regions to be denoised are 1.0.
-        mask = 1.0 - mask
-
-        _, _, latent_height, latent_width = latents.shape
-        mask = tv_resize(
-            img=mask,
-            size=[latent_height, latent_width],
-            interpolation=tv_transforms.InterpolationMode.BILINEAR,
-            antialias=False,
-        )
-
-        mask = mask.to(device=latents.device, dtype=latents.dtype)
-        return mask
-
-    def _load_text_conditioning(
-        self,
-        context: InvocationContext,
-        conditioning_name: str,
-        joint_attention_dim: int,
-        dtype: torch.dtype,
-        device: torch.device,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Load the conditioning data.
-        cond_data = context.conditioning.load(conditioning_name)
-        assert len(cond_data.conditionings) == 1
-        sd3_conditioning = cond_data.conditionings[0]
-        assert isinstance(sd3_conditioning, SD3ConditioningInfo)
-        sd3_conditioning = sd3_conditioning.to(dtype=dtype, device=device)
-
-        t5_embeds = sd3_conditioning.t5_embeds
-        if t5_embeds is None:
-            t5_embeds = torch.zeros(
-                (1, SD3_T5_MAX_SEQ_LEN, joint_attention_dim),
-                device=device,
-                dtype=dtype,
-            )
-
-        clip_prompt_embeds = torch.cat([sd3_conditioning.clip_l_embeds, sd3_conditioning.clip_g_embeds], dim=-1)
-        clip_prompt_embeds = torch.nn.functional.pad(
-            clip_prompt_embeds, (0, t5_embeds.shape[-1] - clip_prompt_embeds.shape[-1])
-        )
-
-        prompt_embeds = torch.cat([clip_prompt_embeds, t5_embeds], dim=-2)
-        pooled_prompt_embeds = torch.cat(
-            [sd3_conditioning.clip_l_pooled_embeds, sd3_conditioning.clip_g_pooled_embeds], dim=-1
-        )
-
-        return prompt_embeds, pooled_prompt_embeds
-
-    def _get_noise(
-        self,
-        num_samples: int,
-        num_channels_latents: int,
-        height: int,
-        width: int,
-        dtype: torch.dtype,
-        device: torch.device,
-        seed: int,
-    ) -> torch.Tensor:
-        # We always generate noise on the same device and dtype then cast to ensure consistency across devices/dtypes.
-        rand_device = "cpu"
-        rand_dtype = torch.float16
-
-        return torch.randn(
-            num_samples,
-            num_channels_latents,
-            int(height) // LATENT_SCALE_FACTOR,
-            int(width) // LATENT_SCALE_FACTOR,
-            device=rand_device,
-            dtype=rand_dtype,
-            generator=torch.Generator(device=rand_device).manual_seed(seed),
-        ).to(device=device, dtype=dtype)
-
-    def _prepare_cfg_scale(self, num_timesteps: int) -> list[float]:
-        """Prepare the CFG scale list.
-
-        Args:
-            num_timesteps (int): The number of timesteps in the scheduler. Could be different from num_steps depending
-            on the scheduler used (e.g. higher order schedulers).
-
-        Returns:
-            list[float]: _description_
-        """
-        if isinstance(self.cfg_scale, float):
-            cfg_scale = [self.cfg_scale] * num_timesteps
-        elif isinstance(self.cfg_scale, list):
-            assert len(self.cfg_scale) == num_timesteps
-            cfg_scale = self.cfg_scale
-        else:
-            raise ValueError(f"Invalid CFG scale type: {type(self.cfg_scale)}")
-
-        return cfg_scale
-
-    def _run_diffusion(
-        self,
-        context: InvocationContext,
-    ):
-        inference_dtype = TorchDevice.choose_torch_dtype()
-        device = TorchDevice.choose_torch_device()
-
-        transformer_info = context.models.load(self.transformer.transformer)
-
-        # Load/process the conditioning data.
-        # TODO(ryand): Make CFG optional.
-        do_classifier_free_guidance = True
-        pos_prompt_embeds, pos_pooled_prompt_embeds = self._load_text_conditioning(
-            context=context,
-            conditioning_name=self.positive_conditioning.conditioning_name,
-            joint_attention_dim=transformer_info.model.config.joint_attention_dim,
-            dtype=inference_dtype,
-            device=device,
-        )
-        neg_prompt_embeds, neg_pooled_prompt_embeds = self._load_text_conditioning(
-            context=context,
-            conditioning_name=self.negative_conditioning.conditioning_name,
-            joint_attention_dim=transformer_info.model.config.joint_attention_dim,
-            dtype=inference_dtype,
-            device=device,
-        )
-        # TODO(ryand): Support both sequential and batched CFG inference.
-        prompt_embeds = torch.cat([neg_prompt_embeds, pos_prompt_embeds], dim=0)
-        pooled_prompt_embeds = torch.cat([neg_pooled_prompt_embeds, pos_pooled_prompt_embeds], dim=0)
-
-        # Prepare the timestep schedule.
-        # We add an extra step to the end to account for the final timestep of 0.0.
-        timesteps: list[float] = torch.linspace(1, 0, self.steps + 1).tolist()
-        # Clip the timesteps schedule based on denoising_start and denoising_end.
-        timesteps = clip_timestep_schedule_fractional(timesteps, self.denoising_start, self.denoising_end)
-        total_steps = len(timesteps) - 1
-
-        # Prepare the CFG scale list.
-        cfg_scale = self._prepare_cfg_scale(total_steps)
-
-        # Load the input latents, if provided.
-        init_latents = context.tensors.load(self.latents.latents_name) if self.latents else None
-        if init_latents is not None:
-            init_latents = init_latents.to(device=device, dtype=inference_dtype)
-
-        # Generate initial latent noise.
-        num_channels_latents = transformer_info.model.config.in_channels
-        assert isinstance(num_channels_latents, int)
-        noise = self._get_noise(
-            num_samples=1,
-            num_channels_latents=num_channels_latents,
-            height=self.height,
-            width=self.width,
-            dtype=inference_dtype,
-            device=device,
-            seed=self.seed,
-        )
-
-        # Prepare input latent image.
-        if init_latents is not None:
-            # Noise the init_latents by the appropriate amount for the first timestep.
-            t_0 = timesteps[0]
-            latents = t_0 * noise + (1.0 - t_0) * init_latents
-        else:
-            # init_latents are not provided, so we are not doing image-to-image (i.e. we are starting from pure noise).
-            if self.denoising_start > 1e-5:
-                raise ValueError("denoising_start should be 0 when initial latents are not provided.")
-            latents = noise
-
-        # If len(timesteps) == 1, then short-circuit. We are just noising the input latents, but not taking any
-        # denoising steps.
-        if len(timesteps) <= 1:
-            return latents
-
-        # Prepare inpaint extension.
-        inpaint_mask = self._prep_inpaint_mask(context, latents)
-        inpaint_extension: InpaintExtension | None = None
-        if inpaint_mask is not None:
-            assert init_latents is not None
-            inpaint_extension = InpaintExtension(
-                init_latents=init_latents,
-                inpaint_mask=inpaint_mask,
-                noise=noise,
-            )
-
-        step_callback = self._build_step_callback(context)
-
-        step_callback(
-            PipelineIntermediateState(
-                step=0,
-                order=1,
-                total_steps=total_steps,
-                timestep=int(timesteps[0]),
-                latents=latents,
-            ),
-        )
-
-        with transformer_info.model_on_device() as (cached_weights, transformer):
-            assert isinstance(transformer, SD3Transformer2DModel)
-
-            # 6. Denoising loop
-            for step_idx, (t_curr, t_prev) in tqdm(list(enumerate(zip(timesteps[:-1], timesteps[1:], strict=True)))):
-                # Expand the latents if we are doing CFG.
-                latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
-                # Expand the timestep to match the latent model input.
-                # Multiply by 1000 to match the default FlowMatchEulerDiscreteScheduler num_train_timesteps.
-                timestep = torch.tensor([t_curr * 1000], device=device).expand(latent_model_input.shape[0])
-
-                noise_pred = transformer(
-                    hidden_states=latent_model_input,
-                    timestep=timestep,
-                    encoder_hidden_states=prompt_embeds,
-                    pooled_projections=pooled_prompt_embeds,
-                    joint_attention_kwargs=None,
-                    return_dict=False,
-                )[0]
-
-                # Apply CFG.
-                if do_classifier_free_guidance:
-                    noise_pred_uncond, noise_pred_cond = noise_pred.chunk(2)
-                    noise_pred = noise_pred_uncond + cfg_scale[step_idx] * (noise_pred_cond - noise_pred_uncond)
-
-                # Compute the previous noisy sample x_t -> x_t-1.
-                latents_dtype = latents.dtype
-                latents = latents.to(dtype=torch.float32)
-                latents = latents + (t_prev - t_curr) * noise_pred
-                latents = latents.to(dtype=latents_dtype)
-
-                if inpaint_extension is not None:
-                    latents = inpaint_extension.merge_intermediate_latents_with_init_latents(latents, t_prev)
-
-                step_callback(
-                    PipelineIntermediateState(
-                        step=step_idx + 1,
-                        order=1,
-                        total_steps=total_steps,
-                        timestep=int(t_curr),
-                        latents=latents,
-                    ),
-                )
-
-        return latents
-
-    def _build_step_callback(self, context: InvocationContext) -> Callable[[PipelineIntermediateState], None]:
-        def step_callback(state: PipelineIntermediateState) -> None:
-            context.util.sd_step_callback(state, BaseModelType.StableDiffusion3)
-
-        return step_callback
--- a/invokeai/app/invocations/sd3_image_to_latents.py
+++ b/invokeai/app/invocations/sd3_image_to_latents.py
@@ -1,65 +0,0 @@
-import einops
-import torch
-from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
-
-from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
-from invokeai.app.invocations.fields import (
-    FieldDescriptions,
-    ImageField,
-    Input,
-    InputField,
-    WithBoard,
-    WithMetadata,
-)
-from invokeai.app.invocations.model import VAEField
-from invokeai.app.invocations.primitives import LatentsOutput
-from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.model_manager.load.load_base import LoadedModel
-from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor
-
-
-@invocation(
-    "sd3_i2l",
-    title="SD3 Image to Latents",
-    tags=["image", "latents", "vae", "i2l", "sd3"],
-    category="image",
-    version="1.0.0",
-    classification=Classification.Prototype,
-)
-class SD3ImageToLatentsInvocation(BaseInvocation, WithMetadata, WithBoard):
-    """Generates latents from an image."""
-
-    image: ImageField = InputField(description="The image to encode")
-    vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection)
-
-    @staticmethod
-    def vae_encode(vae_info: LoadedModel, image_tensor: torch.Tensor) -> torch.Tensor:
-        with vae_info as vae:
-            assert isinstance(vae, AutoencoderKL)
-
-            vae.disable_tiling()
-
-            image_tensor = image_tensor.to(device=vae.device, dtype=vae.dtype)
-            with torch.inference_mode():
-                image_tensor_dist = vae.encode(image_tensor).latent_dist
-                # TODO: Use seed to make sampling reproducible.
-                latents: torch.Tensor = image_tensor_dist.sample().to(dtype=vae.dtype)
-
-            latents = vae.config.scaling_factor * latents
-
-        return latents
-
-    @torch.no_grad()
-    def invoke(self, context: InvocationContext) -> LatentsOutput:
-        image = context.images.get_pil(self.image.image_name)
-
-        image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
-        if image_tensor.dim() == 3:
-            image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w")
-
-        vae_info = context.models.load(self.vae.vae)
-        latents = self.vae_encode(vae_info=vae_info, image_tensor=image_tensor)
-
-        latents = latents.to("cpu")
-        name = context.tensors.save(tensor=latents)
-        return LatentsOutput.build(latents_name=name, latents=latents, seed=None)
--- a/invokeai/app/invocations/sd3_latents_to_image.py
+++ b/invokeai/app/invocations/sd3_latents_to_image.py
@@ -1,74 +0,0 @@
-from contextlib import nullcontext
-
-import torch
-from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
-from einops import rearrange
-from PIL import Image
-
-from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
-from invokeai.app.invocations.fields import (
-    FieldDescriptions,
-    Input,
-    InputField,
-    LatentsField,
-    WithBoard,
-    WithMetadata,
-)
-from invokeai.app.invocations.model import VAEField
-from invokeai.app.invocations.primitives import ImageOutput
-from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
-from invokeai.backend.util.devices import TorchDevice
-
-
-@invocation(
-    "sd3_l2i",
-    title="SD3 Latents to Image",
-    tags=["latents", "image", "vae", "l2i", "sd3"],
-    category="latents",
-    version="1.3.0",
-)
-class SD3LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
-    """Generates an image from latents."""
-
-    latents: LatentsField = InputField(
-        description=FieldDescriptions.latents,
-        input=Input.Connection,
-    )
-    vae: VAEField = InputField(
-        description=FieldDescriptions.vae,
-        input=Input.Connection,
-    )
-
-    @torch.no_grad()
-    def invoke(self, context: InvocationContext) -> ImageOutput:
-        latents = context.tensors.load(self.latents.latents_name)
-
-        vae_info = context.models.load(self.vae.vae)
-        assert isinstance(vae_info.model, (AutoencoderKL))
-        with SeamlessExt.static_patch_model(vae_info.model, self.vae.seamless_axes), vae_info as vae:
-            context.util.signal_progress("Running VAE")
-            assert isinstance(vae, (AutoencoderKL))
-            latents = latents.to(vae.device)
-
-            vae.disable_tiling()
-
-            tiling_context = nullcontext()
-
-            # clear memory as vae decode can request a lot
-            TorchDevice.empty_cache()
-
-            with torch.inference_mode(), tiling_context:
-                # copied from diffusers pipeline
-                latents = latents / vae.config.scaling_factor
-                img = vae.decode(latents, return_dict=False)[0]
-
-            img = img.clamp(-1, 1)
-            img = rearrange(img[0], "c h w -> h w c")  # noqa: F821
-            img_pil = Image.fromarray((127.5 * (img + 1.0)).byte().cpu().numpy())
-
-        TorchDevice.empty_cache()
-
-        image_dto = context.images.save(image=img_pil)
-
-        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/sd3_model_loader.py
+++ b/invokeai/app/invocations/sd3_model_loader.py
@@ -1,108 +0,0 @@
-from typing import Optional
-
-from invokeai.app.invocations.baseinvocation import (
-    BaseInvocation,
-    BaseInvocationOutput,
-    Classification,
-    invocation,
-    invocation_output,
-)
-from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType
-from invokeai.app.invocations.model import CLIPField, ModelIdentifierField, T5EncoderField, TransformerField, VAEField
-from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.model_manager.config import SubModelType
-
-
-@invocation_output("sd3_model_loader_output")
-class Sd3ModelLoaderOutput(BaseInvocationOutput):
-    """SD3 base model loader output."""
-
-    transformer: TransformerField = OutputField(description=FieldDescriptions.transformer, title="Transformer")
-    clip_l: CLIPField = OutputField(description=FieldDescriptions.clip, title="CLIP L")
-    clip_g: CLIPField = OutputField(description=FieldDescriptions.clip, title="CLIP G")
-    t5_encoder: T5EncoderField = OutputField(description=FieldDescriptions.t5_encoder, title="T5 Encoder")
-    vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE")
-
-
-@invocation(
-    "sd3_model_loader",
-    title="SD3 Main Model",
-    tags=["model", "sd3"],
-    category="model",
-    version="1.0.0",
-    classification=Classification.Prototype,
-)
-class Sd3ModelLoaderInvocation(BaseInvocation):
-    """Loads a SD3 base model, outputting its submodels."""
-
-    model: ModelIdentifierField = InputField(
-        description=FieldDescriptions.sd3_model,
-        ui_type=UIType.SD3MainModel,
-        input=Input.Direct,
-    )
-
-    t5_encoder_model: Optional[ModelIdentifierField] = InputField(
-        description=FieldDescriptions.t5_encoder,
-        ui_type=UIType.T5EncoderModel,
-        input=Input.Direct,
-        title="T5 Encoder",
-        default=None,
-    )
-
-    clip_l_model: Optional[ModelIdentifierField] = InputField(
-        description=FieldDescriptions.clip_embed_model,
-        ui_type=UIType.CLIPLEmbedModel,
-        input=Input.Direct,
-        title="CLIP L Encoder",
-        default=None,
-    )
-
-    clip_g_model: Optional[ModelIdentifierField] = InputField(
-        description=FieldDescriptions.clip_g_model,
-        ui_type=UIType.CLIPGEmbedModel,
-        input=Input.Direct,
-        title="CLIP G Encoder",
-        default=None,
-    )
-
-    vae_model: Optional[ModelIdentifierField] = InputField(
-        description=FieldDescriptions.vae_model, ui_type=UIType.VAEModel, title="VAE", default=None
-    )
-
-    def invoke(self, context: InvocationContext) -> Sd3ModelLoaderOutput:
-        transformer = self.model.model_copy(update={"submodel_type": SubModelType.Transformer})
-        vae = (
-            self.vae_model.model_copy(update={"submodel_type": SubModelType.VAE})
-            if self.vae_model
-            else self.model.model_copy(update={"submodel_type": SubModelType.VAE})
-        )
-        tokenizer_l = self.model.model_copy(update={"submodel_type": SubModelType.Tokenizer})
-        clip_encoder_l = (
-            self.clip_l_model.model_copy(update={"submodel_type": SubModelType.TextEncoder})
-            if self.clip_l_model
-            else self.model.model_copy(update={"submodel_type": SubModelType.TextEncoder})
-        )
-        tokenizer_g = self.model.model_copy(update={"submodel_type": SubModelType.Tokenizer2})
-        clip_encoder_g = (
-            self.clip_g_model.model_copy(update={"submodel_type": SubModelType.TextEncoder2})
-            if self.clip_g_model
-            else self.model.model_copy(update={"submodel_type": SubModelType.TextEncoder2})
-        )
-        tokenizer_t5 = (
-            self.t5_encoder_model.model_copy(update={"submodel_type": SubModelType.Tokenizer3})
-            if self.t5_encoder_model
-            else self.model.model_copy(update={"submodel_type": SubModelType.Tokenizer3})
-        )
-        t5_encoder = (
-            self.t5_encoder_model.model_copy(update={"submodel_type": SubModelType.TextEncoder3})
-            if self.t5_encoder_model
-            else self.model.model_copy(update={"submodel_type": SubModelType.TextEncoder3})
-        )
-
-        return Sd3ModelLoaderOutput(
-            transformer=TransformerField(transformer=transformer, loras=[]),
-            clip_l=CLIPField(tokenizer=tokenizer_l, text_encoder=clip_encoder_l, loras=[], skipped_layers=0),
-            clip_g=CLIPField(tokenizer=tokenizer_g, text_encoder=clip_encoder_g, loras=[], skipped_layers=0),
-            t5_encoder=T5EncoderField(tokenizer=tokenizer_t5, text_encoder=t5_encoder),
-            vae=VAEField(vae=vae),
-        )
--- a/invokeai/app/invocations/sd3_text_encoder.py
+++ b/invokeai/app/invocations/sd3_text_encoder.py
@@ -1,203 +0,0 @@
-from contextlib import ExitStack
-from typing import Iterator, Tuple
-
-import torch
-from transformers import (
-    CLIPTextModel,
-    CLIPTextModelWithProjection,
-    CLIPTokenizer,
-    T5EncoderModel,
-    T5Tokenizer,
-    T5TokenizerFast,
-)
-
-from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
-from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField
-from invokeai.app.invocations.model import CLIPField, T5EncoderField
-from invokeai.app.invocations.primitives import SD3ConditioningOutput
-from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.lora.conversions.flux_lora_constants import FLUX_LORA_CLIP_PREFIX
-from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
-from invokeai.backend.lora.lora_patcher import LoRAPatcher
-from invokeai.backend.model_manager.config import ModelFormat
-from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, SD3ConditioningInfo
-from invokeai.backend.util.devices import TorchDevice
-
-# The SD3 T5 Max Sequence Length set based on the default in diffusers.
-SD3_T5_MAX_SEQ_LEN = 256
-
-
-@invocation(
-    "sd3_text_encoder",
-    title="SD3 Text Encoding",
-    tags=["prompt", "conditioning", "sd3"],
-    category="conditioning",
-    version="1.0.0",
-    classification=Classification.Prototype,
-)
-class Sd3TextEncoderInvocation(BaseInvocation):
-    """Encodes and preps a prompt for a SD3 image."""
-
-    clip_l: CLIPField = InputField(
-        title="CLIP L",
-        description=FieldDescriptions.clip,
-        input=Input.Connection,
-    )
-    clip_g: CLIPField = InputField(
-        title="CLIP G",
-        description=FieldDescriptions.clip,
-        input=Input.Connection,
-    )
-
-    # The SD3 models were trained with text encoder dropout, so the T5 encoder can be omitted to save time/memory.
-    t5_encoder: T5EncoderField | None = InputField(
-        title="T5Encoder",
-        default=None,
-        description=FieldDescriptions.t5_encoder,
-        input=Input.Connection,
-    )
-    prompt: str = InputField(description="Text prompt to encode.")
-
-    @torch.no_grad()
-    def invoke(self, context: InvocationContext) -> SD3ConditioningOutput:
-        # Note: The text encoding model are run in separate functions to ensure that all model references are locally
-        # scoped. This ensures that earlier models can be freed and gc'd before loading later models (if necessary).
-
-        clip_l_embeddings, clip_l_pooled_embeddings = self._clip_encode(context, self.clip_l)
-        clip_g_embeddings, clip_g_pooled_embeddings = self._clip_encode(context, self.clip_g)
-
-        t5_embeddings: torch.Tensor | None = None
-        if self.t5_encoder is not None:
-            t5_embeddings = self._t5_encode(context, SD3_T5_MAX_SEQ_LEN)
-
-        conditioning_data = ConditioningFieldData(
-            conditionings=[
-                SD3ConditioningInfo(
-                    clip_l_embeds=clip_l_embeddings,
-                    clip_l_pooled_embeds=clip_l_pooled_embeddings,
-                    clip_g_embeds=clip_g_embeddings,
-                    clip_g_pooled_embeds=clip_g_pooled_embeddings,
-                    t5_embeds=t5_embeddings,
-                )
-            ]
-        )
-
-        conditioning_name = context.conditioning.save(conditioning_data)
-        return SD3ConditioningOutput.build(conditioning_name)
-
-    def _t5_encode(self, context: InvocationContext, max_seq_len: int) -> torch.Tensor:
-        assert self.t5_encoder is not None
-        t5_tokenizer_info = context.models.load(self.t5_encoder.tokenizer)
-        t5_text_encoder_info = context.models.load(self.t5_encoder.text_encoder)
-
-        prompt = [self.prompt]
-
-        with (
-            t5_text_encoder_info as t5_text_encoder,
-            t5_tokenizer_info as t5_tokenizer,
-        ):
-            context.util.signal_progress("Running T5 encoder")
-            assert isinstance(t5_text_encoder, T5EncoderModel)
-            assert isinstance(t5_tokenizer, (T5Tokenizer, T5TokenizerFast))
-
-            text_inputs = t5_tokenizer(
-                prompt,
-                padding="max_length",
-                max_length=max_seq_len,
-                truncation=True,
-                add_special_tokens=True,
-                return_tensors="pt",
-            )
-            text_input_ids = text_inputs.input_ids
-            untruncated_ids = t5_tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
-            assert isinstance(text_input_ids, torch.Tensor)
-            assert isinstance(untruncated_ids, torch.Tensor)
-            if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
-                text_input_ids, untruncated_ids
-            ):
-                removed_text = t5_tokenizer.batch_decode(untruncated_ids[:, max_seq_len - 1 : -1])
-                context.logger.warning(
-                    "The following part of your input was truncated because `max_sequence_length` is set to "
-                    f" {max_seq_len} tokens: {removed_text}"
-                )
-
-            prompt_embeds = t5_text_encoder(text_input_ids.to(t5_text_encoder.device))[0]
-
-        assert isinstance(prompt_embeds, torch.Tensor)
-        return prompt_embeds
-
-    def _clip_encode(
-        self, context: InvocationContext, clip_model: CLIPField, tokenizer_max_length: int = 77
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        clip_tokenizer_info = context.models.load(clip_model.tokenizer)
-        clip_text_encoder_info = context.models.load(clip_model.text_encoder)
-
-        prompt = [self.prompt]
-
-        with (
-            clip_text_encoder_info.model_on_device() as (cached_weights, clip_text_encoder),
-            clip_tokenizer_info as clip_tokenizer,
-            ExitStack() as exit_stack,
-        ):
-            context.util.signal_progress("Running CLIP encoder")
-            assert isinstance(clip_text_encoder, (CLIPTextModel, CLIPTextModelWithProjection))
-            assert isinstance(clip_tokenizer, CLIPTokenizer)
-
-            clip_text_encoder_config = clip_text_encoder_info.config
-            assert clip_text_encoder_config is not None
-
-            # Apply LoRA models to the CLIP encoder.
-            # Note: We apply the LoRA after the transformer has been moved to its target device for faster patching.
-            if clip_text_encoder_config.format in [ModelFormat.Diffusers]:
-                # The model is non-quantized, so we can apply the LoRA weights directly into the model.
-                exit_stack.enter_context(
-                    LoRAPatcher.apply_smart_lora_patches(
-                        model=clip_text_encoder,
-                        patches=self._clip_lora_iterator(context, clip_model),
-                        prefix=FLUX_LORA_CLIP_PREFIX,
-                        dtype=TorchDevice.choose_torch_dtype(),
-                        cached_weights=cached_weights,
-                    )
-                )
-            else:
-                # There are currently no supported CLIP quantized models. Add support here if needed.
-                raise ValueError(f"Unsupported model format: {clip_text_encoder_config.format}")
-
-            clip_text_encoder = clip_text_encoder.eval().requires_grad_(False)
-
-            text_inputs = clip_tokenizer(
-                prompt,
-                padding="max_length",
-                max_length=tokenizer_max_length,
-                truncation=True,
-                return_tensors="pt",
-            )
-
-            text_input_ids = text_inputs.input_ids
-            untruncated_ids = clip_tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
-            assert isinstance(text_input_ids, torch.Tensor)
-            assert isinstance(untruncated_ids, torch.Tensor)
-            if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
-                text_input_ids, untruncated_ids
-            ):
-                removed_text = clip_tokenizer.batch_decode(untruncated_ids[:, tokenizer_max_length - 1 : -1])
-                context.logger.warning(
-                    "The following part of your input was truncated because CLIP can only handle sequences up to"
-                    f" {tokenizer_max_length} tokens: {removed_text}"
-                )
-            prompt_embeds = clip_text_encoder(
-                input_ids=text_input_ids.to(clip_text_encoder.device), output_hidden_states=True
-            )
-            pooled_prompt_embeds = prompt_embeds[0]
-            prompt_embeds = prompt_embeds.hidden_states[-2]
-
-            return prompt_embeds, pooled_prompt_embeds
-
-    def _clip_lora_iterator(
-        self, context: InvocationContext, clip_model: CLIPField
-    ) -> Iterator[Tuple[LoRAModelRaw, float]]:
-        for lora in clip_model.loras:
-            lora_info = context.models.load(lora.lora)
-            assert isinstance(lora_info.model, LoRAModelRaw)
-            yield (lora_info.model, lora.weight)
-            del lora_info
--- a/invokeai/app/invocations/segment_anything.py
+++ b/invokeai/app/invocations/segment_anything.py
@@ -5,7 +5,7 @@ from typing import Literal
 import numpy as np
 import torch
 from PIL import Image
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, model_validator
 from transformers import AutoModelForMaskGeneration, AutoProcessor
 from transformers.models.sam import SamModel
 from transformers.models.sam.processing_sam import SamProcessor
@@ -77,14 +77,19 @@ class SegmentAnythingInvocation(BaseInvocation):
        default="all",
    )

+    @model_validator(mode="after")
+    def check_point_lists_or_bounding_box(self):
+        if self.point_lists is None and self.bounding_boxes is None:
+            raise ValueError("Either point_lists or bounding_box must be provided.")
+        elif self.point_lists is not None and self.bounding_boxes is not None:
+            raise ValueError("Only one of point_lists or bounding_box can be provided.")
+        return self
+
    @torch.no_grad()
    def invoke(self, context: InvocationContext) -> MaskOutput:
        # The models expect a 3-channel RGB image.
        image_pil = context.images.get_pil(self.image.image_name, mode="RGB")

-        if self.point_lists is not None and self.bounding_boxes is not None:
-            raise ValueError("Only one of point_lists or bounding_box can be provided.")
-
        if (not self.bounding_boxes or len(self.bounding_boxes) == 0) and (
            not self.point_lists or len(self.point_lists) == 0
        ):
--- a/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py
+++ b/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py
@@ -207,9 +207,7 @@ class TiledMultiDiffusionDenoiseLatents(BaseInvocation):
        with (
            ExitStack() as exit_stack,
            unet_info as unet,
-            LoRAPatcher.apply_smart_lora_patches(
-                model=unet, patches=_lora_loader(), prefix="lora_unet_", dtype=unet.dtype
-            ),
+            LoRAPatcher.apply_lora_patches(model=unet, patches=_lora_loader(), prefix="lora_unet_"),
        ):
            assert isinstance(unet, UNet2DConditionModel)
            latents = latents.to(device=unet.device, dtype=unet.dtype)
--- a/invokeai/app/services/invocation_stats/invocation_stats_default.py
+++ b/invokeai/app/services/invocation_stats/invocation_stats_default.py
@@ -20,7 +20,7 @@ from invokeai.app.services.invocation_stats.invocation_stats_common import (
    NodeExecutionStatsSummary,
 )
 from invokeai.app.services.invoker import Invoker
-from invokeai.backend.model_manager.load.model_cache.cache_stats import CacheStats
+from invokeai.backend.model_manager.load.model_cache import CacheStats

 # Size of 1GB in bytes.
 GB = 2**30
--- a/invokeai/app/services/model_load/model_load_base.py
+++ b/invokeai/app/services/model_load/model_load_base.py
@@ -7,7 +7,7 @@ from typing import Callable, Optional

 from invokeai.backend.model_manager import AnyModel, AnyModelConfig, SubModelType
 from invokeai.backend.model_manager.load import LoadedModel, LoadedModelWithoutConfig
-from invokeai.backend.model_manager.load.model_cache.model_cache import ModelCache
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase


 class ModelLoadServiceBase(ABC):
@@ -24,7 +24,7 @@ class ModelLoadServiceBase(ABC):

    @property
    @abstractmethod
-    def ram_cache(self) -> ModelCache:
+    def ram_cache(self) -> ModelCacheBase[AnyModel]:
        """Return the RAM cache used by this loader."""

    @abstractmethod
--- a/invokeai/app/services/model_load/model_load_default.py
+++ b/invokeai/app/services/model_load/model_load_default.py
@@ -18,7 +18,7 @@ from invokeai.backend.model_manager.load import (
    ModelLoaderRegistry,
    ModelLoaderRegistryBase,
 )
-from invokeai.backend.model_manager.load.model_cache.model_cache import ModelCache
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase
 from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.logging import InvokeAILogger
@@ -30,7 +30,7 @@ class ModelLoadService(ModelLoadServiceBase):
    def __init__(
        self,
        app_config: InvokeAIAppConfig,
-        ram_cache: ModelCache,
+        ram_cache: ModelCacheBase[AnyModel],
        registry: Optional[Type[ModelLoaderRegistryBase]] = ModelLoaderRegistry,
    ):
        """Initialize the model load service."""
@@ -45,7 +45,7 @@ class ModelLoadService(ModelLoadServiceBase):
        self._invoker = invoker

    @property
-    def ram_cache(self) -> ModelCache:
+    def ram_cache(self) -> ModelCacheBase[AnyModel]:
        """Return the RAM cache used by this loader."""
        return self._ram_cache

@@ -78,14 +78,15 @@ class ModelLoadService(ModelLoadServiceBase):
        self, model_path: Path, loader: Optional[Callable[[Path], AnyModel]] = None
    ) -> LoadedModelWithoutConfig:
        cache_key = str(model_path)
+        ram_cache = self.ram_cache
        try:
-            return LoadedModelWithoutConfig(cache_record=self._ram_cache.get(key=cache_key), cache=self._ram_cache)
+            return LoadedModelWithoutConfig(_locker=ram_cache.get(key=cache_key))
        except IndexError:
            pass

        def torch_load_file(checkpoint: Path) -> AnyModel:
            scan_result = scan_file_path(checkpoint)
-            if scan_result.infected_files != 0 or scan_result.scan_err:
+            if scan_result.infected_files != 0:
                raise Exception("The model at {checkpoint} is potentially infected by malware. Aborting load.")
            result = torch_load(checkpoint, map_location="cpu")
            return result
@@ -108,5 +109,5 @@ class ModelLoadService(ModelLoadServiceBase):
        )
        assert loader is not None
        raw_model = loader(model_path)
-        self._ram_cache.put(key=cache_key, model=raw_model)
-        return LoadedModelWithoutConfig(cache_record=self._ram_cache.get(key=cache_key), cache=self._ram_cache)
+        ram_cache.put(key=cache_key, model=raw_model)
+        return LoadedModelWithoutConfig(_locker=ram_cache.get(key=cache_key))
--- a/invokeai/app/services/model_manager/model_manager_default.py
+++ b/invokeai/app/services/model_manager/model_manager_default.py
@@ -16,8 +16,7 @@ from invokeai.app.services.model_load.model_load_base import ModelLoadServiceBas
 from invokeai.app.services.model_load.model_load_default import ModelLoadService
 from invokeai.app.services.model_manager.model_manager_base import ModelManagerServiceBase
 from invokeai.app.services.model_records.model_records_base import ModelRecordServiceBase
-from invokeai.backend.model_manager.load.model_cache.model_cache import ModelCache
-from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry
+from invokeai.backend.model_manager.load import ModelCache, ModelLoaderRegistry
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.logging import InvokeAILogger

--- a/invokeai/app/services/model_records/model_records_base.py
+++ b/invokeai/app/services/model_records/model_records_base.py
@@ -15,7 +15,6 @@ from invokeai.app.util.model_exclude_null import BaseModelExcludeNull
 from invokeai.backend.model_manager.config import (
    AnyModelConfig,
    BaseModelType,
-    ClipVariantType,
    ControlAdapterDefaultSettings,
    MainModelDefaultSettings,
    ModelFormat,
@@ -86,7 +85,7 @@ class ModelRecordChanges(BaseModelExcludeNull):

    # Checkpoint-specific changes
    # TODO(MM2): Should we expose these? Feels footgun-y...
-    variant: Optional[ModelVariantType | ClipVariantType] = Field(description="The variant of the model.", default=None)
+    variant: Optional[ModelVariantType] = Field(description="The variant of the model.", default=None)
    prediction_type: Optional[SchedulerPredictionType] = Field(
        description="The prediction type of the model.", default=None
    )
--- a/invokeai/app/services/session_queue/session_queue_common.py
+++ b/invokeai/app/services/session_queue/session_queue_common.py
@@ -16,7 +16,6 @@ from pydantic import (
 from pydantic_core import to_jsonable_python

 from invokeai.app.invocations.baseinvocation import BaseInvocation
-from invokeai.app.invocations.fields import ImageField
 from invokeai.app.services.shared.graph import Graph, GraphExecutionState, NodeNotFoundError
 from invokeai.app.services.workflow_records.workflow_records_common import (
    WorkflowWithoutID,
@@ -52,7 +51,11 @@ class SessionQueueItemNotFoundError(ValueError):

 # region Batch

-BatchDataType = Union[StrictStr, float, int, ImageField]
+BatchDataType = Union[
+    StrictStr,
+    float,
+    int,
+]


 class NodeFieldValue(BaseModel):
--- a/invokeai/app/services/shared/invocation_context.py
+++ b/invokeai/app/services/shared/invocation_context.py
@@ -160,10 +160,6 @@ class LoggerInterface(InvocationContextInterface):


 class ImagesInterface(InvocationContextInterface):
-    def __init__(self, services: InvocationServices, data: InvocationContextData, util: "UtilInterface") -> None:
-        super().__init__(services, data)
-        self._util = util
-
    def save(
        self,
        image: Image,
@@ -190,8 +186,6 @@ class ImagesInterface(InvocationContextInterface):
            The saved image DTO.
        """

-        self._util.signal_progress("Saving image")
-
        # If `metadata` is provided directly, use that. Else, use the metadata provided by `WithMetadata`, falling back to None.
        metadata_ = None
        if metadata:
@@ -342,10 +336,6 @@ class ConditioningInterface(InvocationContextInterface):
 class ModelsInterface(InvocationContextInterface):
    """Common API for loading, downloading and managing models."""

-    def __init__(self, services: InvocationServices, data: InvocationContextData, util: "UtilInterface") -> None:
-        super().__init__(services, data)
-        self._util = util
-
    def exists(self, identifier: Union[str, "ModelIdentifierField"]) -> bool:
        """Check if a model exists.

@@ -378,15 +368,11 @@ class ModelsInterface(InvocationContextInterface):

        if isinstance(identifier, str):
            model = self._services.model_manager.store.get_model(identifier)
+            return self._services.model_manager.load.load_model(model, submodel_type)
        else:
-            submodel_type = submodel_type or identifier.submodel_type
+            _submodel_type = submodel_type or identifier.submodel_type
            model = self._services.model_manager.store.get_model(identifier.key)
-
-        message = f"Loading model {model.name}"
-        if submodel_type:
-            message += f" ({submodel_type.value})"
-        self._util.signal_progress(message)
-        return self._services.model_manager.load.load_model(model, submodel_type)
+            return self._services.model_manager.load.load_model(model, _submodel_type)

    def load_by_attrs(
        self, name: str, base: BaseModelType, type: ModelType, submodel_type: Optional[SubModelType] = None
@@ -411,10 +397,6 @@ class ModelsInterface(InvocationContextInterface):
        if len(configs) > 1:
            raise ValueError(f"More than one model found with name {name}, base {base}, and type {type}")

-        message = f"Loading model {name}"
-        if submodel_type:
-            message += f" ({submodel_type.value})"
-        self._util.signal_progress(message)
        return self._services.model_manager.load.load_model(configs[0], submodel_type)

    def get_config(self, identifier: Union[str, "ModelIdentifierField"]) -> AnyModelConfig:
@@ -485,7 +467,6 @@ class ModelsInterface(InvocationContextInterface):
        Returns:
            Path to the downloaded model
        """
-        self._util.signal_progress(f"Downloading model {source}")
        return self._services.model_manager.install.download_and_cache_model(source=source)

    def load_local_model(
@@ -508,8 +489,6 @@ class ModelsInterface(InvocationContextInterface):
        Returns:
            A LoadedModelWithoutConfig object.
        """
-
-        self._util.signal_progress(f"Loading model {model_path.name}")
        return self._services.model_manager.load.load_model_from_path(model_path=model_path, loader=loader)

    def load_remote_model(
@@ -535,8 +514,6 @@ class ModelsInterface(InvocationContextInterface):
            A LoadedModelWithoutConfig object.
        """
        model_path = self._services.model_manager.install.download_and_cache_model(source=str(source))
-
-        self._util.signal_progress(f"Loading model {source}")
        return self._services.model_manager.load.load_model_from_path(model_path=model_path, loader=loader)


@@ -730,12 +707,12 @@ def build_invocation_context(
    """

    logger = LoggerInterface(services=services, data=data)
+    images = ImagesInterface(services=services, data=data)
    tensors = TensorsInterface(services=services, data=data)
+    models = ModelsInterface(services=services, data=data)
    config = ConfigInterface(services=services, data=data)
    util = UtilInterface(services=services, data=data, is_canceled=is_canceled)
    conditioning = ConditioningInterface(services=services, data=data)
-    models = ModelsInterface(services=services, data=data, util=util)
-    images = ImagesInterface(services=services, data=data, util=util)
    boards = BoardsInterface(services=services, data=data)

    ctx = InvocationContext(
--- a/invokeai/app/services/workflow_records/default_workflows/SD3.5
+++ b/invokeai/app/services/workflow_records/default_workflows/SD3.5
@@ -1,382 +0,0 @@
-{
-    "name": "SD3.5 Text to Image",
-   "author": "InvokeAI",
-    "description": "Sample text to image workflow for Stable Diffusion 3.5",
-    "version": "1.0.0",
-    "contact": "invoke@invoke.ai",
-    "tags": "text2image, SD3.5, default",
-  "notes": "",
-    "exposedFields": [
-      {
-        "nodeId": "3f22f668-0e02-4fde-a2bb-c339586ceb4c",
-        "fieldName": "model"
-      },
-      {
-        "nodeId": "e17d34e7-6ed1-493c-9a85-4fcd291cb084",
-        "fieldName": "prompt"
-      }
-    ],
-    "meta": {
-      "version": "3.0.0",
-      "category": "default"
-    },
-    "id": "e3a51d6b-8208-4d6d-b187-fcfe8b32934c",
-    "nodes": [
-      {
-        "id": "3f22f668-0e02-4fde-a2bb-c339586ceb4c",
-        "type": "invocation",
-        "data": {
-          "id": "3f22f668-0e02-4fde-a2bb-c339586ceb4c",
-          "type": "sd3_model_loader",
-          "version": "1.0.0",
-          "label": "",
-          "notes": "",
-          "isOpen": true,
-          "isIntermediate": true,
-          "useCache": true,
-          "nodePack": "invokeai",
-          "inputs": {
-            "model": {
-              "name": "model",
-              "label": "",
-              "value": {
-                "key": "f7b20be9-92a8-4cfb-bca4-6c3b5535c10b",
-                "hash": "placeholder",
-                "name": "stable-diffusion-3.5-medium",
-                "base": "sd-3",
-                "type": "main"
-              }
-            },
-            "t5_encoder_model": {
-              "name": "t5_encoder_model",
-              "label": ""
-            },
-            "clip_l_model": {
-              "name": "clip_l_model",
-              "label": ""
-            },
-            "clip_g_model": {
-              "name": "clip_g_model",
-              "label": ""
-            },
-            "vae_model": {
-              "name": "vae_model",
-              "label": ""
-            }
-          }
-        },
-        "position": {
-          "x": -55.58689609637031,
-          "y": -111.53602444662268
-        }
-      },
-      {
-        "id": "f7e394ac-6394-4096-abcb-de0d346506b3",
-        "type": "invocation",
-        "data": {
-          "id": "f7e394ac-6394-4096-abcb-de0d346506b3",
-          "type": "rand_int",
-          "version": "1.0.1",
-          "label": "",
-          "notes": "",
-          "isOpen": true,
-          "isIntermediate": true,
-          "useCache": false,
-          "nodePack": "invokeai",
-          "inputs": {
-            "low": {
-              "name": "low",
-              "label": "",
-              "value": 0
-            },
-            "high": {
-              "name": "high",
-              "label": "",
-              "value": 2147483647
-            }
-          }
-        },
-        "position": {
-          "x": 470.45870147220353,
-          "y": 350.3141781644303
-        }
-      },
-      {
-        "id": "9eb72af0-dd9e-4ec5-ad87-d65e3c01f48b",
-        "type": "invocation",
-        "data": {
-          "id": "9eb72af0-dd9e-4ec5-ad87-d65e3c01f48b",
-          "type": "sd3_l2i",
-          "version": "1.3.0",
-          "label": "",
-          "notes": "",
-          "isOpen": true,
-          "isIntermediate": false,
-          "useCache": true,
-          "nodePack": "invokeai",
-          "inputs": {
-            "board": {
-              "name": "board",
-              "label": ""
-            },
-            "metadata": {
-              "name": "metadata",
-              "label": ""
-            },
-            "latents": {
-              "name": "latents",
-              "label": ""
-            },
-            "vae": {
-              "name": "vae",
-              "label": ""
-            }
-          }
-        },
-        "position": {
-          "x": 1192.3097009334897,
-          "y": -366.0994675072209
-        }
-      },
-      {
-        "id": "3b4f7f27-cfc0-4373-a009-99c5290d0cd6",
-        "type": "invocation",
-        "data": {
-          "id": "3b4f7f27-cfc0-4373-a009-99c5290d0cd6",
-          "type": "sd3_text_encoder",
-          "version": "1.0.0",
-          "label": "",
-          "notes": "",
-          "isOpen": true,
-          "isIntermediate": true,
-          "useCache": true,
-          "nodePack": "invokeai",
-          "inputs": {
-            "clip_l": {
-              "name": "clip_l",
-              "label": ""
-            },
-            "clip_g": {
-              "name": "clip_g",
-              "label": ""
-            },
-            "t5_encoder": {
-              "name": "t5_encoder",
-              "label": ""
-            },
-            "prompt": {
-              "name": "prompt",
-              "label": "",
-              "value": ""
-            }
-          }
-        },
-        "position": {
-          "x": 408.16054647924784,
-          "y": 65.06415352118786
-        }
-      },
-      {
-        "id": "e17d34e7-6ed1-493c-9a85-4fcd291cb084",
-        "type": "invocation",
-        "data": {
-          "id": "e17d34e7-6ed1-493c-9a85-4fcd291cb084",
-          "type": "sd3_text_encoder",
-          "version": "1.0.0",
-          "label": "",
-          "notes": "",
-          "isOpen": true,
-          "isIntermediate": true,
-          "useCache": true,
-          "nodePack": "invokeai",
-          "inputs": {
-            "clip_l": {
-              "name": "clip_l",
-              "label": ""
-            },
-            "clip_g": {
-              "name": "clip_g",
-              "label": ""
-            },
-            "t5_encoder": {
-              "name": "t5_encoder",
-              "label": ""
-            },
-            "prompt": {
-              "name": "prompt",
-              "label": "",
-              "value": ""
-            }
-          }
-        },
-        "position": {
-          "x": 378.9283412440941,
-          "y": -302.65777497352553
-        }
-      },
-      {
-        "id": "c7539f7b-7ac5-49b9-93eb-87ede611409f",
-        "type": "invocation",
-        "data": {
-          "id": "c7539f7b-7ac5-49b9-93eb-87ede611409f",
-          "type": "sd3_denoise",
-          "version": "1.0.0",
-          "label": "",
-          "notes": "",
-          "isOpen": true,
-          "isIntermediate": true,
-          "useCache": true,
-          "nodePack": "invokeai",
-          "inputs": {
-            "board": {
-              "name": "board",
-              "label": ""
-            },
-            "metadata": {
-              "name": "metadata",
-              "label": ""
-            },
-            "transformer": {
-              "name": "transformer",
-              "label": ""
-            },
-            "positive_conditioning": {
-              "name": "positive_conditioning",
-              "label": ""
-            },
-            "negative_conditioning": {
-              "name": "negative_conditioning",
-              "label": ""
-            },
-            "cfg_scale": {
-              "name": "cfg_scale",
-              "label": "",
-              "value": 3.5
-            },
-            "width": {
-              "name": "width",
-              "label": "",
-              "value": 1024
-            },
-            "height": {
-              "name": "height",
-              "label": "",
-              "value": 1024
-            },
-            "steps": {
-              "name": "steps",
-              "label": "",
-              "value": 30
-            },
-            "seed": {
-              "name": "seed",
-              "label": "",
-              "value": 0
-            }
-          }
-        },
-        "position": {
-          "x": 813.7814762740603,
-          "y": -142.20529727605867
-        }
-      }
-    ],
-    "edges": [
-      {
-        "id": "reactflow__edge-3f22f668-0e02-4fde-a2bb-c339586ceb4cvae-9eb72af0-dd9e-4ec5-ad87-d65e3c01f48bvae",
-        "type": "default",
-        "source": "3f22f668-0e02-4fde-a2bb-c339586ceb4c",
-        "target": "9eb72af0-dd9e-4ec5-ad87-d65e3c01f48b",
-        "sourceHandle": "vae",
-        "targetHandle": "vae"
-      },
-      {
-        "id": "reactflow__edge-3f22f668-0e02-4fde-a2bb-c339586ceb4ct5_encoder-3b4f7f27-cfc0-4373-a009-99c5290d0cd6t5_encoder",
-        "type": "default",
-        "source": "3f22f668-0e02-4fde-a2bb-c339586ceb4c",
-        "target": "3b4f7f27-cfc0-4373-a009-99c5290d0cd6",
-        "sourceHandle": "t5_encoder",
-        "targetHandle": "t5_encoder"
-      },
-      {
-        "id": "reactflow__edge-3f22f668-0e02-4fde-a2bb-c339586ceb4ct5_encoder-e17d34e7-6ed1-493c-9a85-4fcd291cb084t5_encoder",
-        "type": "default",
-        "source": "3f22f668-0e02-4fde-a2bb-c339586ceb4c",
-        "target": "e17d34e7-6ed1-493c-9a85-4fcd291cb084",
-        "sourceHandle": "t5_encoder",
-        "targetHandle": "t5_encoder"
-      },
-      {
-        "id": "reactflow__edge-3f22f668-0e02-4fde-a2bb-c339586ceb4cclip_g-3b4f7f27-cfc0-4373-a009-99c5290d0cd6clip_g",
-        "type": "default",
-        "source": "3f22f668-0e02-4fde-a2bb-c339586ceb4c",
-        "target": "3b4f7f27-cfc0-4373-a009-99c5290d0cd6",
-        "sourceHandle": "clip_g",
-        "targetHandle": "clip_g"
-      },
-      {
-        "id": "reactflow__edge-3f22f668-0e02-4fde-a2bb-c339586ceb4cclip_g-e17d34e7-6ed1-493c-9a85-4fcd291cb084clip_g",
-        "type": "default",
-        "source": "3f22f668-0e02-4fde-a2bb-c339586ceb4c",
-        "target": "e17d34e7-6ed1-493c-9a85-4fcd291cb084",
-        "sourceHandle": "clip_g",
-        "targetHandle": "clip_g"
-      },
-      {
-        "id": "reactflow__edge-3f22f668-0e02-4fde-a2bb-c339586ceb4cclip_l-3b4f7f27-cfc0-4373-a009-99c5290d0cd6clip_l",
-        "type": "default",
-        "source": "3f22f668-0e02-4fde-a2bb-c339586ceb4c",
-        "target": "3b4f7f27-cfc0-4373-a009-99c5290d0cd6",
-        "sourceHandle": "clip_l",
-        "targetHandle": "clip_l"
-      },
-      {
-        "id": "reactflow__edge-3f22f668-0e02-4fde-a2bb-c339586ceb4cclip_l-e17d34e7-6ed1-493c-9a85-4fcd291cb084clip_l",
-        "type": "default",
-        "source": "3f22f668-0e02-4fde-a2bb-c339586ceb4c",
-        "target": "e17d34e7-6ed1-493c-9a85-4fcd291cb084",
-        "sourceHandle": "clip_l",
-        "targetHandle": "clip_l"
-      },
-      {
-        "id": "reactflow__edge-3f22f668-0e02-4fde-a2bb-c339586ceb4ctransformer-c7539f7b-7ac5-49b9-93eb-87ede611409ftransformer",
-        "type": "default",
-        "source": "3f22f668-0e02-4fde-a2bb-c339586ceb4c",
-        "target": "c7539f7b-7ac5-49b9-93eb-87ede611409f",
-        "sourceHandle": "transformer",
-        "targetHandle": "transformer"
-      },
-      {
-        "id": "reactflow__edge-f7e394ac-6394-4096-abcb-de0d346506b3value-c7539f7b-7ac5-49b9-93eb-87ede611409fseed",
-        "type": "default",
-        "source": "f7e394ac-6394-4096-abcb-de0d346506b3",
-        "target": "c7539f7b-7ac5-49b9-93eb-87ede611409f",
-        "sourceHandle": "value",
-        "targetHandle": "seed"
-      },
-      {
-        "id": "reactflow__edge-c7539f7b-7ac5-49b9-93eb-87ede611409flatents-9eb72af0-dd9e-4ec5-ad87-d65e3c01f48blatents",
-        "type": "default",
-        "source": "c7539f7b-7ac5-49b9-93eb-87ede611409f",
-        "target": "9eb72af0-dd9e-4ec5-ad87-d65e3c01f48b",
-        "sourceHandle": "latents",
-        "targetHandle": "latents"
-      },
-      {
-        "id": "reactflow__edge-e17d34e7-6ed1-493c-9a85-4fcd291cb084conditioning-c7539f7b-7ac5-49b9-93eb-87ede611409fpositive_conditioning",
-        "type": "default",
-        "source": "e17d34e7-6ed1-493c-9a85-4fcd291cb084",
-        "target": "c7539f7b-7ac5-49b9-93eb-87ede611409f",
-        "sourceHandle": "conditioning",
-        "targetHandle": "positive_conditioning"
-      },
-      {
-        "id": "reactflow__edge-3b4f7f27-cfc0-4373-a009-99c5290d0cd6conditioning-c7539f7b-7ac5-49b9-93eb-87ede611409fnegative_conditioning",
-        "type": "default",
-        "source": "3b4f7f27-cfc0-4373-a009-99c5290d0cd6",
-        "target": "c7539f7b-7ac5-49b9-93eb-87ede611409f",
-        "sourceHandle": "conditioning",
-        "targetHandle": "negative_conditioning"
-      }
-    ]
-  }
--- a/invokeai/app/util/step_callback.py
+++ b/invokeai/app/util/step_callback.py
@@ -34,25 +34,6 @@ SD1_5_LATENT_RGB_FACTORS = [
    [-0.1307, -0.1874, -0.7445],  # L4
 ]

-SD3_5_LATENT_RGB_FACTORS = [
-    [-0.05240681, 0.03251581, 0.0749016],
-    [-0.0580572, 0.00759826, 0.05729818],
-    [0.16144888, 0.01270368, -0.03768577],
-    [0.14418615, 0.08460266, 0.15941818],
-    [0.04894035, 0.0056485, -0.06686988],
-    [0.05187166, 0.19222395, 0.06261094],
-    [0.1539433, 0.04818359, 0.07103094],
-    [-0.08601796, 0.09013458, 0.10893912],
-    [-0.12398469, -0.06766567, 0.0033688],
-    [-0.0439737, 0.07825329, 0.02258823],
-    [0.03101129, 0.06382551, 0.07753657],
-    [-0.01315361, 0.08554491, -0.08772475],
-    [0.06464487, 0.05914605, 0.13262741],
-    [-0.07863674, -0.02261737, -0.12761454],
-    [-0.09923835, -0.08010759, -0.06264447],
-    [-0.03392309, -0.0804029, -0.06078822],
-]
-
 FLUX_LATENT_RGB_FACTORS = [
    [-0.0412, 0.0149, 0.0521],
    [0.0056, 0.0291, 0.0768],
@@ -129,9 +110,6 @@ def stable_diffusion_step_callback(
        sdxl_latent_rgb_factors = torch.tensor(SDXL_LATENT_RGB_FACTORS, dtype=sample.dtype, device=sample.device)
        sdxl_smooth_matrix = torch.tensor(SDXL_SMOOTH_MATRIX, dtype=sample.dtype, device=sample.device)
        image = sample_to_lowres_estimated_image(sample, sdxl_latent_rgb_factors, sdxl_smooth_matrix)
-    elif base_model == BaseModelType.StableDiffusion3:
-        sd3_latent_rgb_factors = torch.tensor(SD3_5_LATENT_RGB_FACTORS, dtype=sample.dtype, device=sample.device)
-        image = sample_to_lowres_estimated_image(sample, sd3_latent_rgb_factors)
    else:
        v1_5_latent_rgb_factors = torch.tensor(SD1_5_LATENT_RGB_FACTORS, dtype=sample.dtype, device=sample.device)
        image = sample_to_lowres_estimated_image(sample, v1_5_latent_rgb_factors)
--- a/invokeai/backend/flux/custom_block_processor.py
+++ b/invokeai/backend/flux/custom_block_processor.py
@@ -1,10 +1,9 @@
 import einops
 import torch

-from invokeai.backend.flux.extensions.regional_prompting_extension import RegionalPromptingExtension
 from invokeai.backend.flux.extensions.xlabs_ip_adapter_extension import XLabsIPAdapterExtension
 from invokeai.backend.flux.math import attention
-from invokeai.backend.flux.modules.layers import DoubleStreamBlock, SingleStreamBlock
+from invokeai.backend.flux.modules.layers import DoubleStreamBlock


 class CustomDoubleStreamBlockProcessor:
@@ -14,12 +13,7 @@ class CustomDoubleStreamBlockProcessor:

    @staticmethod
    def _double_stream_block_forward(
-        block: DoubleStreamBlock,
-        img: torch.Tensor,
-        txt: torch.Tensor,
-        vec: torch.Tensor,
-        pe: torch.Tensor,
-        attn_mask: torch.Tensor | None = None,
+        block: DoubleStreamBlock, img: torch.Tensor, txt: torch.Tensor, vec: torch.Tensor, pe: torch.Tensor
    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """This function is a direct copy of DoubleStreamBlock.forward(), but it returns some of the intermediate
        values.
@@ -46,7 +40,7 @@ class CustomDoubleStreamBlockProcessor:
        k = torch.cat((txt_k, img_k), dim=2)
        v = torch.cat((txt_v, img_v), dim=2)

-        attn = attention(q, k, v, pe=pe, attn_mask=attn_mask)
+        attn = attention(q, k, v, pe=pe)
        txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1] :]

        # calculate the img bloks
@@ -69,15 +63,11 @@ class CustomDoubleStreamBlockProcessor:
        vec: torch.Tensor,
        pe: torch.Tensor,
        ip_adapter_extensions: list[XLabsIPAdapterExtension],
-        regional_prompting_extension: RegionalPromptingExtension,
    ) -> tuple[torch.Tensor, torch.Tensor]:
        """A custom implementation of DoubleStreamBlock.forward() with additional features:
        - IP-Adapter support
        """
-        attn_mask = regional_prompting_extension.get_double_stream_attn_mask(block_index)
-        img, txt, img_q = CustomDoubleStreamBlockProcessor._double_stream_block_forward(
-            block, img, txt, vec, pe, attn_mask=attn_mask
-        )
+        img, txt, img_q = CustomDoubleStreamBlockProcessor._double_stream_block_forward(block, img, txt, vec, pe)

        # Apply IP-Adapter conditioning.
        for ip_adapter_extension in ip_adapter_extensions:
@@ -91,48 +81,3 @@ class CustomDoubleStreamBlockProcessor:
            )

        return img, txt
-
-
-class CustomSingleStreamBlockProcessor:
-    """A class containing a custom implementation of SingleStreamBlock.forward() with additional features (masking,
-    etc.)
-    """
-
-    @staticmethod
-    def _single_stream_block_forward(
-        block: SingleStreamBlock,
-        x: torch.Tensor,
-        vec: torch.Tensor,
-        pe: torch.Tensor,
-        attn_mask: torch.Tensor | None = None,
-    ) -> torch.Tensor:
-        """This function is a direct copy of SingleStreamBlock.forward()."""
-        mod, _ = block.modulation(vec)
-        x_mod = (1 + mod.scale) * block.pre_norm(x) + mod.shift
-        qkv, mlp = torch.split(block.linear1(x_mod), [3 * block.hidden_size, block.mlp_hidden_dim], dim=-1)
-
-        q, k, v = einops.rearrange(qkv, "B L (K H D) -> K B H L D", K=3, H=block.num_heads)
-        q, k = block.norm(q, k, v)
-
-        # compute attention
-        attn = attention(q, k, v, pe=pe, attn_mask=attn_mask)
-        # compute activation in mlp stream, cat again and run second linear layer
-        output = block.linear2(torch.cat((attn, block.mlp_act(mlp)), 2))
-        return x + mod.gate * output
-
-    @staticmethod
-    def custom_single_block_forward(
-        timestep_index: int,
-        total_num_timesteps: int,
-        block_index: int,
-        block: SingleStreamBlock,
-        img: torch.Tensor,
-        vec: torch.Tensor,
-        pe: torch.Tensor,
-        regional_prompting_extension: RegionalPromptingExtension,
-    ) -> torch.Tensor:
-        """A custom implementation of SingleStreamBlock.forward() with additional features:
-        - Masking
-        """
-        attn_mask = regional_prompting_extension.get_single_stream_attn_mask(block_index)
-        return CustomSingleStreamBlockProcessor._single_stream_block_forward(block, img, vec, pe, attn_mask=attn_mask)
--- a/invokeai/backend/flux/denoise.py
+++ b/invokeai/backend/flux/denoise.py
@@ -7,7 +7,6 @@ from tqdm import tqdm
 from invokeai.backend.flux.controlnet.controlnet_flux_output import ControlNetFluxOutput, sum_controlnet_flux_outputs
 from invokeai.backend.flux.extensions.inpaint_extension import InpaintExtension
 from invokeai.backend.flux.extensions.instantx_controlnet_extension import InstantXControlNetExtension
-from invokeai.backend.flux.extensions.regional_prompting_extension import RegionalPromptingExtension
 from invokeai.backend.flux.extensions.xlabs_controlnet_extension import XLabsControlNetExtension
 from invokeai.backend.flux.extensions.xlabs_ip_adapter_extension import XLabsIPAdapterExtension
 from invokeai.backend.flux.model import Flux
@@ -19,8 +18,14 @@ def denoise(
    # model input
    img: torch.Tensor,
    img_ids: torch.Tensor,
-    pos_regional_prompting_extension: RegionalPromptingExtension,
-    neg_regional_prompting_extension: RegionalPromptingExtension | None,
+    # positive text conditioning
+    txt: torch.Tensor,
+    txt_ids: torch.Tensor,
+    vec: torch.Tensor,
+    # negative text conditioning
+    neg_txt: torch.Tensor | None,
+    neg_txt_ids: torch.Tensor | None,
+    neg_vec: torch.Tensor | None,
    # sampling parameters
    timesteps: list[float],
    step_callback: Callable[[PipelineIntermediateState], None],
@@ -56,9 +61,9 @@ def denoise(
                    total_num_timesteps=total_steps,
                    img=img,
                    img_ids=img_ids,
-                    txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
-                    txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
-                    y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
+                    txt=txt,
+                    txt_ids=txt_ids,
+                    y=vec,
                    timesteps=t_vec,
                    guidance=guidance_vec,
                )
@@ -73,9 +78,9 @@ def denoise(
        pred = model(
            img=img,
            img_ids=img_ids,
-            txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
-            txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
-            y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
+            txt=txt,
+            txt_ids=txt_ids,
+            y=vec,
            timesteps=t_vec,
            guidance=guidance_vec,
            timestep_index=step_index,
@@ -83,7 +88,6 @@ def denoise(
            controlnet_double_block_residuals=merged_controlnet_residuals.double_block_residuals,
            controlnet_single_block_residuals=merged_controlnet_residuals.single_block_residuals,
            ip_adapter_extensions=pos_ip_adapter_extensions,
-            regional_prompting_extension=pos_regional_prompting_extension,
        )

        step_cfg_scale = cfg_scale[step_index]
@@ -93,15 +97,15 @@ def denoise(
            # TODO(ryand): Add option to run positive and negative predictions in a single batch for better performance
            # on systems with sufficient VRAM.

-            if neg_regional_prompting_extension is None:
+            if neg_txt is None or neg_txt_ids is None or neg_vec is None:
                raise ValueError("Negative text conditioning is required when cfg_scale is not 1.0.")

            neg_pred = model(
                img=img,
                img_ids=img_ids,
-                txt=neg_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
-                txt_ids=neg_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
-                y=neg_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
+                txt=neg_txt,
+                txt_ids=neg_txt_ids,
+                y=neg_vec,
                timesteps=t_vec,
                guidance=guidance_vec,
                timestep_index=step_index,
@@ -109,7 +113,6 @@ def denoise(
                controlnet_double_block_residuals=None,
                controlnet_single_block_residuals=None,
                ip_adapter_extensions=neg_ip_adapter_extensions,
-                regional_prompting_extension=neg_regional_prompting_extension,
            )
            pred = neg_pred + step_cfg_scale * (pred - neg_pred)

--- a/invokeai/backend/flux/extensions/regional_prompting_extension.py
+++ b/invokeai/backend/flux/extensions/regional_prompting_extension.py
@@ -1,276 +0,0 @@
-from typing import Optional
-
-import torch
-import torchvision
-
-from invokeai.backend.flux.text_conditioning import FluxRegionalTextConditioning, FluxTextConditioning
-from invokeai.backend.stable_diffusion.diffusion.conditioning_data import Range
-from invokeai.backend.util.devices import TorchDevice
-from invokeai.backend.util.mask import to_standard_float_mask
-
-
-class RegionalPromptingExtension:
-    """A class for managing regional prompting with FLUX.
-
-    This implementation is inspired by https://arxiv.org/pdf/2411.02395 (though there are significant differences).
-    """
-
-    def __init__(
-        self,
-        regional_text_conditioning: FluxRegionalTextConditioning,
-        restricted_attn_mask: torch.Tensor | None = None,
-    ):
-        self.regional_text_conditioning = regional_text_conditioning
-        self.restricted_attn_mask = restricted_attn_mask
-
-    def get_double_stream_attn_mask(self, block_index: int) -> torch.Tensor | None:
-        order = [self.restricted_attn_mask, None]
-        return order[block_index % len(order)]
-
-    def get_single_stream_attn_mask(self, block_index: int) -> torch.Tensor | None:
-        order = [self.restricted_attn_mask, None]
-        return order[block_index % len(order)]
-
-    @classmethod
-    def from_text_conditioning(cls, text_conditioning: list[FluxTextConditioning], img_seq_len: int):
-        """Create a RegionalPromptingExtension from a list of text conditionings.
-
-        Args:
-            text_conditioning (list[FluxTextConditioning]): The text conditionings to use for regional prompting.
-            img_seq_len (int): The image sequence length (i.e. packed_height * packed_width).
-        """
-        regional_text_conditioning = cls._concat_regional_text_conditioning(text_conditioning)
-        attn_mask_with_restricted_img_self_attn = cls._prepare_restricted_attn_mask(
-            regional_text_conditioning, img_seq_len
-        )
-        return cls(
-            regional_text_conditioning=regional_text_conditioning,
-            restricted_attn_mask=attn_mask_with_restricted_img_self_attn,
-        )
-
-    # Keeping _prepare_unrestricted_attn_mask for reference as an alternative masking strategy:
-    #
-    # @classmethod
-    # def _prepare_unrestricted_attn_mask(
-    #     cls,
-    #     regional_text_conditioning: FluxRegionalTextConditioning,
-    #     img_seq_len: int,
-    # ) -> torch.Tensor:
-    #     """Prepare an 'unrestricted' attention mask. In this context, 'unrestricted' means that:
-    #     - img self-attention is not masked.
-    #     - img regions attend to both txt within their own region and to global prompts.
-    #     """
-    #     device = TorchDevice.choose_torch_device()
-
-    #     # Infer txt_seq_len from the t5_embeddings tensor.
-    #     txt_seq_len = regional_text_conditioning.t5_embeddings.shape[1]
-
-    #     # In the attention blocks, the txt seq and img seq are concatenated and then attention is applied.
-    #     # Concatenation happens in the following order: [txt_seq, img_seq].
-    #     # There are 4 portions of the attention mask to consider as we prepare it:
-    #     # 1. txt attends to itself
-    #     # 2. txt attends to corresponding regional img
-    #     # 3. regional img attends to corresponding txt
-    #     # 4. regional img attends to itself
-
-    #     # Initialize empty attention mask.
-    #     regional_attention_mask = torch.zeros(
-    #         (txt_seq_len + img_seq_len, txt_seq_len + img_seq_len), device=device, dtype=torch.float16
-    #     )
-
-    #     for image_mask, t5_embedding_range in zip(
-    #         regional_text_conditioning.image_masks, regional_text_conditioning.t5_embedding_ranges, strict=True
-    #     ):
-    #         # 1. txt attends to itself
-    #         regional_attention_mask[
-    #             t5_embedding_range.start : t5_embedding_range.end, t5_embedding_range.start : t5_embedding_range.end
-    #         ] = 1.0
-
-    #         # 2. txt attends to corresponding regional img
-    #         # Note that we reshape to (1, img_seq_len) to ensure broadcasting works as desired.
-    #         fill_value = image_mask.view(1, img_seq_len) if image_mask is not None else 1.0
-    #         regional_attention_mask[t5_embedding_range.start : t5_embedding_range.end, txt_seq_len:] = fill_value
-
-    #         # 3. regional img attends to corresponding txt
-    #         # Note that we reshape to (img_seq_len, 1) to ensure broadcasting works as desired.
-    #         fill_value = image_mask.view(img_seq_len, 1) if image_mask is not None else 1.0
-    #         regional_attention_mask[txt_seq_len:, t5_embedding_range.start : t5_embedding_range.end] = fill_value
-
-    #     # 4. regional img attends to itself
-    #     # Allow unrestricted img self attention.
-    #     regional_attention_mask[txt_seq_len:, txt_seq_len:] = 1.0
-
-    #     # Convert attention mask to boolean.
-    #     regional_attention_mask = regional_attention_mask > 0.5
-
-    #     return regional_attention_mask
-
-    @classmethod
-    def _prepare_restricted_attn_mask(
-        cls,
-        regional_text_conditioning: FluxRegionalTextConditioning,
-        img_seq_len: int,
-    ) -> torch.Tensor | None:
-        """Prepare a 'restricted' attention mask. In this context, 'restricted' means that:
-        - img self-attention is only allowed within regions.
-        - img regions only attend to txt within their own region, not to global prompts.
-        """
-        # Identify background region. I.e. the region that is not covered by any region masks.
-        background_region_mask: None | torch.Tensor = None
-        for image_mask in regional_text_conditioning.image_masks:
-            if image_mask is not None:
-                if background_region_mask is None:
-                    background_region_mask = torch.ones_like(image_mask)
-                background_region_mask *= 1 - image_mask
-
-        if background_region_mask is None:
-            # There are no region masks, short-circuit and return None.
-            # TODO(ryand): We could restrict txt-txt attention across multiple global prompts, but this would
-            # is a rare use case and would make the logic here significantly more complicated.
-            return None
-
-        device = TorchDevice.choose_torch_device()
-
-        # Infer txt_seq_len from the t5_embeddings tensor.
-        txt_seq_len = regional_text_conditioning.t5_embeddings.shape[1]
-
-        # In the attention blocks, the txt seq and img seq are concatenated and then attention is applied.
-        # Concatenation happens in the following order: [txt_seq, img_seq].
-        # There are 4 portions of the attention mask to consider as we prepare it:
-        # 1. txt attends to itself
-        # 2. txt attends to corresponding regional img
-        # 3. regional img attends to corresponding txt
-        # 4. regional img attends to itself
-
-        # Initialize empty attention mask.
-        regional_attention_mask = torch.zeros(
-            (txt_seq_len + img_seq_len, txt_seq_len + img_seq_len), device=device, dtype=torch.float16
-        )
-
-        for image_mask, t5_embedding_range in zip(
-            regional_text_conditioning.image_masks, regional_text_conditioning.t5_embedding_ranges, strict=True
-        ):
-            # 1. txt attends to itself
-            regional_attention_mask[
-                t5_embedding_range.start : t5_embedding_range.end, t5_embedding_range.start : t5_embedding_range.end
-            ] = 1.0
-
-            if image_mask is not None:
-                # 2. txt attends to corresponding regional img
-                # Note that we reshape to (1, img_seq_len) to ensure broadcasting works as desired.
-                regional_attention_mask[t5_embedding_range.start : t5_embedding_range.end, txt_seq_len:] = (
-                    image_mask.view(1, img_seq_len)
-                )
-
-                # 3. regional img attends to corresponding txt
-                # Note that we reshape to (img_seq_len, 1) to ensure broadcasting works as desired.
-                regional_attention_mask[txt_seq_len:, t5_embedding_range.start : t5_embedding_range.end] = (
-                    image_mask.view(img_seq_len, 1)
-                )
-
-                # 4. regional img attends to itself
-                image_mask = image_mask.view(img_seq_len, 1)
-                regional_attention_mask[txt_seq_len:, txt_seq_len:] += image_mask @ image_mask.T
-            else:
-                # We don't allow attention between non-background image regions and global prompts. This helps to ensure
-                # that regions focus on their local prompts. We do, however, allow attention between background regions
-                # and global prompts. If we didn't do this, then the background regions would not attend to any txt
-                # embeddings, which we found experimentally to cause artifacts.
-
-                # 2. global txt attends to background region
-                # Note that we reshape to (1, img_seq_len) to ensure broadcasting works as desired.
-                regional_attention_mask[t5_embedding_range.start : t5_embedding_range.end, txt_seq_len:] = (
-                    background_region_mask.view(1, img_seq_len)
-                )
-
-                # 3. background region attends to global txt
-                # Note that we reshape to (img_seq_len, 1) to ensure broadcasting works as desired.
-                regional_attention_mask[txt_seq_len:, t5_embedding_range.start : t5_embedding_range.end] = (
-                    background_region_mask.view(img_seq_len, 1)
-                )
-
-        # Allow background regions to attend to themselves.
-        regional_attention_mask[txt_seq_len:, txt_seq_len:] += background_region_mask.view(img_seq_len, 1)
-        regional_attention_mask[txt_seq_len:, txt_seq_len:] += background_region_mask.view(1, img_seq_len)
-
-        # Convert attention mask to boolean.
-        regional_attention_mask = regional_attention_mask > 0.5
-
-        return regional_attention_mask
-
-    @classmethod
-    def _concat_regional_text_conditioning(
-        cls,
-        text_conditionings: list[FluxTextConditioning],
-    ) -> FluxRegionalTextConditioning:
-        """Concatenate regional text conditioning data into a single conditioning tensor (with associated masks)."""
-        concat_t5_embeddings: list[torch.Tensor] = []
-        concat_t5_embedding_ranges: list[Range] = []
-        image_masks: list[torch.Tensor | None] = []
-
-        # Choose global CLIP embedding.
-        # Use the first global prompt's CLIP embedding as the global CLIP embedding. If there is no global prompt, use
-        # the first prompt's CLIP embedding.
-        global_clip_embedding: torch.Tensor = text_conditionings[0].clip_embeddings
-        for text_conditioning in text_conditionings:
-            if text_conditioning.mask is None:
-                global_clip_embedding = text_conditioning.clip_embeddings
-                break
-
-        cur_t5_embedding_len = 0
-        for text_conditioning in text_conditionings:
-            concat_t5_embeddings.append(text_conditioning.t5_embeddings)
-
-            concat_t5_embedding_ranges.append(
-                Range(start=cur_t5_embedding_len, end=cur_t5_embedding_len + text_conditioning.t5_embeddings.shape[1])
-            )
-
-            image_masks.append(text_conditioning.mask)
-
-            cur_t5_embedding_len += text_conditioning.t5_embeddings.shape[1]
-
-        t5_embeddings = torch.cat(concat_t5_embeddings, dim=1)
-
-        # Initialize the txt_ids tensor.
-        pos_bs, pos_t5_seq_len, _ = t5_embeddings.shape
-        t5_txt_ids = torch.zeros(
-            pos_bs, pos_t5_seq_len, 3, dtype=t5_embeddings.dtype, device=TorchDevice.choose_torch_device()
-        )
-
-        return FluxRegionalTextConditioning(
-            t5_embeddings=t5_embeddings,
-            clip_embeddings=global_clip_embedding,
-            t5_txt_ids=t5_txt_ids,
-            image_masks=image_masks,
-            t5_embedding_ranges=concat_t5_embedding_ranges,
-        )
-
-    @staticmethod
-    def preprocess_regional_prompt_mask(
-        mask: Optional[torch.Tensor], packed_height: int, packed_width: int, dtype: torch.dtype, device: torch.device
-    ) -> torch.Tensor:
-        """Preprocess a regional prompt mask to match the target height and width.
-        If mask is None, returns a mask of all ones with the target height and width.
-        If mask is not None, resizes the mask to the target height and width using 'nearest' interpolation.
-
-        packed_height and packed_width are the target height and width of the mask in the 'packed' latent space.
-
-        Returns:
-            torch.Tensor: The processed mask. shape: (1, 1, packed_height * packed_width).
-        """
-
-        if mask is None:
-            return torch.ones((1, 1, packed_height * packed_width), dtype=dtype, device=device)
-
-        mask = to_standard_float_mask(mask, out_dtype=dtype)
-
-        tf = torchvision.transforms.Resize(
-            (packed_height, packed_width), interpolation=torchvision.transforms.InterpolationMode.NEAREST
-        )
-
-        # Add a batch dimension to the mask, because torchvision expects shape (batch, channels, h, w).
-        mask = mask.unsqueeze(0)  # Shape: (1, h, w) -> (1, 1, h, w)
-        resized_mask = tf(mask)
-
-        # Flatten the height and width dimensions into a single image_seq_len dimension.
-        return resized_mask.flatten(start_dim=2)
--- a/invokeai/backend/flux/ip_adapter/state_dict_utils.py
+++ b/invokeai/backend/flux/ip_adapter/state_dict_utils.py
@@ -41,12 +41,10 @@ def infer_xlabs_ip_adapter_params_from_state_dict(state_dict: dict[str, torch.Te
    hidden_dim = state_dict["double_blocks.0.processor.ip_adapter_double_stream_k_proj.weight"].shape[0]
    context_dim = state_dict["double_blocks.0.processor.ip_adapter_double_stream_k_proj.weight"].shape[1]
    clip_embeddings_dim = state_dict["ip_adapter_proj_model.proj.weight"].shape[1]
-    clip_extra_context_tokens = state_dict["ip_adapter_proj_model.proj.weight"].shape[0] // context_dim

    return XlabsIpAdapterParams(
        num_double_blocks=num_double_blocks,
        context_dim=context_dim,
        hidden_dim=hidden_dim,
        clip_embeddings_dim=clip_embeddings_dim,
-        clip_extra_context_tokens=clip_extra_context_tokens,
    )
--- a/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
+++ b/invokeai/backend/flux/ip_adapter/xlabs_ip_adapter_flux.py
@@ -31,16 +31,13 @@ class XlabsIpAdapterParams:
    hidden_dim: int

    clip_embeddings_dim: int
-    clip_extra_context_tokens: int


 class XlabsIpAdapterFlux(torch.nn.Module):
    def __init__(self, params: XlabsIpAdapterParams):
        super().__init__()
        self.image_proj = ImageProjModel(
-            cross_attention_dim=params.context_dim,
-            clip_embeddings_dim=params.clip_embeddings_dim,
-            clip_extra_context_tokens=params.clip_extra_context_tokens,
+            cross_attention_dim=params.context_dim, clip_embeddings_dim=params.clip_embeddings_dim
        )
        self.ip_adapter_double_blocks = IPAdapterDoubleBlocks(
            num_double_blocks=params.num_double_blocks, context_dim=params.context_dim, hidden_dim=params.hidden_dim
--- a/invokeai/backend/flux/math.py
+++ b/invokeai/backend/flux/math.py
@@ -5,10 +5,10 @@ from einops import rearrange
 from torch import Tensor


-def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, attn_mask: Tensor | None = None) -> Tensor:
+def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor) -> Tensor:
    q, k = apply_rope(q, k, pe)

-    x = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=attn_mask)
+    x = torch.nn.functional.scaled_dot_product_attention(q, k, v)
    x = rearrange(x, "B H L D -> B L (H D)")

    return x
@@ -24,12 +24,12 @@ def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
    out = torch.einsum("...n,d->...nd", pos, omega)
    out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1)
    out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
-    return out.to(dtype=pos.dtype, device=pos.device)
+    return out.float()


 def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tensor]:
-    xq_ = xq.view(*xq.shape[:-1], -1, 1, 2)
-    xk_ = xk.view(*xk.shape[:-1], -1, 1, 2)
+    xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2)
+    xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2)
    xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1]
    xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1]
-    return xq_out.view(*xq.shape), xk_out.view(*xk.shape)
+    return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk)
--- a/invokeai/backend/flux/model.py
+++ b/invokeai/backend/flux/model.py
@@ -5,11 +5,7 @@ from dataclasses import dataclass
 import torch
 from torch import Tensor, nn

-from invokeai.backend.flux.custom_block_processor import (
-    CustomDoubleStreamBlockProcessor,
-    CustomSingleStreamBlockProcessor,
-)
-from invokeai.backend.flux.extensions.regional_prompting_extension import RegionalPromptingExtension
+from invokeai.backend.flux.custom_block_processor import CustomDoubleStreamBlockProcessor
 from invokeai.backend.flux.extensions.xlabs_ip_adapter_extension import XLabsIPAdapterExtension
 from invokeai.backend.flux.modules.layers import (
    DoubleStreamBlock,
@@ -99,7 +95,6 @@ class Flux(nn.Module):
        controlnet_double_block_residuals: list[Tensor] | None,
        controlnet_single_block_residuals: list[Tensor] | None,
        ip_adapter_extensions: list[XLabsIPAdapterExtension],
-        regional_prompting_extension: RegionalPromptingExtension,
    ) -> Tensor:
        if img.ndim != 3 or txt.ndim != 3:
            raise ValueError("Input img and txt tensors must have 3 dimensions.")
@@ -122,6 +117,7 @@ class Flux(nn.Module):
            assert len(controlnet_double_block_residuals) == len(self.double_blocks)
        for block_index, block in enumerate(self.double_blocks):
            assert isinstance(block, DoubleStreamBlock)
+
            img, txt = CustomDoubleStreamBlockProcessor.custom_double_block_forward(
                timestep_index=timestep_index,
                total_num_timesteps=total_num_timesteps,
@@ -132,7 +128,6 @@ class Flux(nn.Module):
                vec=vec,
                pe=pe,
                ip_adapter_extensions=ip_adapter_extensions,
-                regional_prompting_extension=regional_prompting_extension,
            )

            if controlnet_double_block_residuals is not None:
@@ -145,17 +140,7 @@ class Flux(nn.Module):
            assert len(controlnet_single_block_residuals) == len(self.single_blocks)

        for block_index, block in enumerate(self.single_blocks):
-            assert isinstance(block, SingleStreamBlock)
-            img = CustomSingleStreamBlockProcessor.custom_single_block_forward(
-                timestep_index=timestep_index,
-                total_num_timesteps=total_num_timesteps,
-                block_index=block_index,
-                block=block,
-                img=img,
-                vec=vec,
-                pe=pe,
-                regional_prompting_extension=regional_prompting_extension,
-            )
+            img = block(img, vec=vec, pe=pe)

            if controlnet_single_block_residuals is not None:
                img[:, txt.shape[1] :, ...] += controlnet_single_block_residuals[block_index]
--- a/invokeai/backend/flux/modules/layers.py
+++ b/invokeai/backend/flux/modules/layers.py
@@ -66,7 +66,10 @@ class RMSNorm(torch.nn.Module):
        self.scale = nn.Parameter(torch.ones(dim))

    def forward(self, x: Tensor):
-        return torch.nn.functional.rms_norm(x, self.scale.shape, self.scale, eps=1e-6)
+        x_dtype = x.dtype
+        x = x.float()
+        rrms = torch.rsqrt(torch.mean(x**2, dim=-1, keepdim=True) + 1e-6)
+        return (x * rrms).to(dtype=x_dtype) * self.scale


 class QKNorm(torch.nn.Module):
--- a/invokeai/backend/flux/text_conditioning.py
+++ b/invokeai/backend/flux/text_conditioning.py
@@ -1,36 +0,0 @@
-from dataclasses import dataclass
-
-import torch
-
-from invokeai.backend.stable_diffusion.diffusion.conditioning_data import Range
-
-
-@dataclass
-class FluxTextConditioning:
-    t5_embeddings: torch.Tensor
-    clip_embeddings: torch.Tensor
-    # If mask is None, the prompt is a global prompt.
-    mask: torch.Tensor | None
-
-
-@dataclass
-class FluxRegionalTextConditioning:
-    # Concatenated text embeddings.
-    # Shape: (1, concatenated_txt_seq_len, 4096)
-    t5_embeddings: torch.Tensor
-    # Shape: (1, concatenated_txt_seq_len, 3)
-    t5_txt_ids: torch.Tensor
-
-    # Global CLIP embeddings.
-    # Shape: (1, 768)
-    clip_embeddings: torch.Tensor
-
-    # A binary mask indicating the regions of the image that the prompt should be applied to. If None, the prompt is a
-    # global prompt.
-    # image_masks[i] is the mask for the ith prompt.
-    # image_masks[i] has shape (1, image_seq_len) and dtype torch.bool.
-    image_masks: list[torch.Tensor | None]
-
-    # List of ranges that represent the embedding ranges for each mask.
-    # t5_embedding_ranges[i] contains the range of the t5 embeddings that correspond to image_masks[i].
-    t5_embedding_ranges: list[Range]
--- a/invokeai/backend/image_util/assets/CIELab_to_UPLab.icc
+++ b/invokeai/backend/image_util/assets/CIELab_to_UPLab.icc
--- a/invokeai/backend/image_util/composition.py
+++ b/invokeai/backend/image_util/composition.py
--- a/invokeai/backend/lora/conversions/flux_diffusers_lora_conversion_utils.py
+++ b/invokeai/backend/lora/conversions/flux_diffusers_lora_conversion_utils.py
@@ -45,9 +45,8 @@ def lora_model_from_flux_diffusers_state_dict(state_dict: Dict[str, torch.Tensor
    # Constants for FLUX.1
    num_double_layers = 19
    num_single_layers = 38
-    hidden_size = 3072
-    mlp_ratio = 4.0
-    mlp_hidden_dim = int(hidden_size * mlp_ratio)
+    # inner_dim = 3072
+    # mlp_ratio = 4.0

    layers: dict[str, AnyLoRALayer] = {}

@@ -63,43 +62,30 @@ def lora_model_from_flux_diffusers_state_dict(state_dict: Dict[str, torch.Tensor
            layers[dst_key] = LoRALayer.from_state_dict_values(values=value)
            assert len(src_layer_dict) == 0

-    def add_qkv_lora_layer_if_present(
-        src_keys: list[str],
-        src_weight_shapes: list[tuple[int, int]],
-        dst_qkv_key: str,
-        allow_missing_keys: bool = False,
-    ) -> None:
+    def add_qkv_lora_layer_if_present(src_keys: list[str], dst_qkv_key: str) -> None:
        """Handle the Q, K, V matrices for a transformer block. We need special handling because the diffusers format
        stores them in separate matrices, whereas the BFL format used internally by InvokeAI concatenates them.
        """
-        # If none of the keys are present, return early.
+        # We expect that either all src keys are present or none of them are. Verify this.
        keys_present = [key in grouped_state_dict for key in src_keys]
+        assert all(keys_present) or not any(keys_present)
+
+        # If none of the keys are present, return early.
        if not any(keys_present):
            return

+        src_layer_dicts = [grouped_state_dict.pop(key) for key in src_keys]
        sub_layers: list[LoRALayer] = []
-        for src_key, src_weight_shape in zip(src_keys, src_weight_shapes, strict=True):
-            src_layer_dict = grouped_state_dict.pop(src_key, None)
-            if src_layer_dict is not None:
-                values = {
-                    "lora_down.weight": src_layer_dict.pop("lora_A.weight"),
-                    "lora_up.weight": src_layer_dict.pop("lora_B.weight"),
-                }
-                if alpha is not None:
-                    values["alpha"] = torch.tensor(alpha)
-                assert values["lora_down.weight"].shape[1] == src_weight_shape[1]
-                assert values["lora_up.weight"].shape[0] == src_weight_shape[0]
-                sub_layers.append(LoRALayer.from_state_dict_values(values=values))
-                assert len(src_layer_dict) == 0
-            else:
-                if not allow_missing_keys:
-                    raise ValueError(f"Missing LoRA layer: '{src_key}'.")
-                values = {
-                    "lora_up.weight": torch.zeros((src_weight_shape[0], 1)),
-                    "lora_down.weight": torch.zeros((1, src_weight_shape[1])),
-                }
-                sub_layers.append(LoRALayer.from_state_dict_values(values=values))
-        layers[dst_qkv_key] = ConcatenatedLoRALayer(lora_layers=sub_layers)
+        for src_layer_dict in src_layer_dicts:
+            values = {
+                "lora_down.weight": src_layer_dict.pop("lora_A.weight"),
+                "lora_up.weight": src_layer_dict.pop("lora_B.weight"),
+            }
+            if alpha is not None:
+                values["alpha"] = torch.tensor(alpha)
+            sub_layers.append(LoRALayer.from_state_dict_values(values=values))
+            assert len(src_layer_dict) == 0
+        layers[dst_qkv_key] = ConcatenatedLoRALayer(lora_layers=sub_layers, concat_axis=0)

    # time_text_embed.timestep_embedder -> time_in.
    add_lora_layer_if_present("time_text_embed.timestep_embedder.linear_1", "time_in.in_layer")
@@ -132,7 +118,6 @@ def lora_model_from_flux_diffusers_state_dict(state_dict: Dict[str, torch.Tensor
                f"transformer_blocks.{i}.attn.to_k",
                f"transformer_blocks.{i}.attn.to_v",
            ],
-            [(hidden_size, hidden_size), (hidden_size, hidden_size), (hidden_size, hidden_size)],
            f"double_blocks.{i}.img_attn.qkv",
        )
        add_qkv_lora_layer_if_present(
@@ -141,7 +126,6 @@ def lora_model_from_flux_diffusers_state_dict(state_dict: Dict[str, torch.Tensor
                f"transformer_blocks.{i}.attn.add_k_proj",
                f"transformer_blocks.{i}.attn.add_v_proj",
            ],
-            [(hidden_size, hidden_size), (hidden_size, hidden_size), (hidden_size, hidden_size)],
            f"double_blocks.{i}.txt_attn.qkv",
        )

@@ -191,14 +175,7 @@ def lora_model_from_flux_diffusers_state_dict(state_dict: Dict[str, torch.Tensor
                f"single_transformer_blocks.{i}.attn.to_v",
                f"single_transformer_blocks.{i}.proj_mlp",
            ],
-            [
-                (hidden_size, hidden_size),
-                (hidden_size, hidden_size),
-                (hidden_size, hidden_size),
-                (mlp_hidden_dim, hidden_size),
-            ],
            f"single_blocks.{i}.linear1",
-            allow_missing_keys=True,
        )

        # Output projections.
--- a/invokeai/backend/lora/lora_layer_wrappers.py
+++ b/invokeai/backend/lora/lora_layer_wrappers.py
@@ -1,133 +0,0 @@
-import torch
-
-from invokeai.backend.lora.layers.any_lora_layer import AnyLoRALayer
-from invokeai.backend.lora.layers.concatenated_lora_layer import ConcatenatedLoRALayer
-from invokeai.backend.lora.layers.lora_layer import LoRALayer
-
-
-class LoRASidecarWrapper(torch.nn.Module):
-    def __init__(self, orig_module: torch.nn.Module, lora_layers: list[AnyLoRALayer], lora_weights: list[float]):
-        super().__init__()
-        self._orig_module = orig_module
-        self._lora_layers = lora_layers
-        self._lora_weights = lora_weights
-
-    @property
-    def orig_module(self) -> torch.nn.Module:
-        return self._orig_module
-
-    def add_lora_layer(self, lora_layer: AnyLoRALayer, lora_weight: float):
-        self._lora_layers.append(lora_layer)
-        self._lora_weights.append(lora_weight)
-
-    @torch.no_grad()
-    def _get_lora_patched_parameters(
-        self, orig_params: dict[str, torch.Tensor], lora_layers: list[AnyLoRALayer], lora_weights: list[float]
-    ) -> dict[str, torch.Tensor]:
-        params: dict[str, torch.Tensor] = {}
-        for lora_layer, lora_weight in zip(lora_layers, lora_weights, strict=True):
-            layer_params = lora_layer.get_parameters(self._orig_module)
-            for param_name, param_weight in layer_params.items():
-                if orig_params[param_name].shape != param_weight.shape:
-                    param_weight = param_weight.reshape(orig_params[param_name].shape)
-
-                if param_name not in params:
-                    params[param_name] = param_weight * (lora_layer.scale() * lora_weight)
-                else:
-                    params[param_name] += param_weight * (lora_layer.scale() * lora_weight)
-
-        return params
-
-
-class LoRALinearWrapper(LoRASidecarWrapper):
-    def _lora_linear_forward(self, input: torch.Tensor, lora_layer: LoRALayer, lora_weight: float) -> torch.Tensor:
-        """An optimized implementation of the residual calculation for a Linear LoRALayer."""
-        x = torch.nn.functional.linear(input, lora_layer.down)
-        if lora_layer.mid is not None:
-            x = torch.nn.functional.linear(x, lora_layer.mid)
-        x = torch.nn.functional.linear(x, lora_layer.up, bias=lora_layer.bias)
-        x *= lora_weight * lora_layer.scale()
-        return x
-
-    def _concatenated_lora_forward(
-        self, input: torch.Tensor, concatenated_lora_layer: ConcatenatedLoRALayer, lora_weight: float
-    ) -> torch.Tensor:
-        """An optimized implementation of the residual calculation for a Linear ConcatenatedLoRALayer."""
-        x_chunks: list[torch.Tensor] = []
-        for lora_layer in concatenated_lora_layer.lora_layers:
-            x_chunk = torch.nn.functional.linear(input, lora_layer.down)
-            if lora_layer.mid is not None:
-                x_chunk = torch.nn.functional.linear(x_chunk, lora_layer.mid)
-            x_chunk = torch.nn.functional.linear(x_chunk, lora_layer.up, bias=lora_layer.bias)
-            x_chunk *= lora_weight * lora_layer.scale()
-            x_chunks.append(x_chunk)
-
-        # TODO(ryand): Generalize to support concat_axis != 0.
-        assert concatenated_lora_layer.concat_axis == 0
-        x = torch.cat(x_chunks, dim=-1)
-        return x
-
-    def forward(self, input: torch.Tensor) -> torch.Tensor:
-        # Split the LoRA layers into those that have optimized implementations and those that don't.
-        optimized_layer_types = (LoRALayer, ConcatenatedLoRALayer)
-        optimized_layers = [
-            (layer, weight)
-            for layer, weight in zip(self._lora_layers, self._lora_weights, strict=True)
-            if isinstance(layer, optimized_layer_types)
-        ]
-        non_optimized_layers = [
-            (layer, weight)
-            for layer, weight in zip(self._lora_layers, self._lora_weights, strict=True)
-            if not isinstance(layer, optimized_layer_types)
-        ]
-
-        # First, calculate the residual for LoRA layers for which there is an optimized implementation.
-        residual = None
-        for lora_layer, lora_weight in optimized_layers:
-            if isinstance(lora_layer, LoRALayer):
-                added_residual = self._lora_linear_forward(input, lora_layer, lora_weight)
-            elif isinstance(lora_layer, ConcatenatedLoRALayer):
-                added_residual = self._concatenated_lora_forward(input, lora_layer, lora_weight)
-            else:
-                raise ValueError(f"Unsupported LoRA layer type: {type(lora_layer)}")
-
-            if residual is None:
-                residual = added_residual
-            else:
-                residual += added_residual
-
-        # Next, calculate the residuals for the LoRA layers for which there is no optimized implementation.
-        if non_optimized_layers:
-            unoptimized_layers, unoptimized_weights = zip(*non_optimized_layers, strict=True)
-            params = self._get_lora_patched_parameters(
-                orig_params={"weight": self._orig_module.weight, "bias": self._orig_module.bias},
-                lora_layers=unoptimized_layers,
-                lora_weights=unoptimized_weights,
-            )
-            added_residual = torch.nn.functional.linear(input, params["weight"], params.get("bias", None))
-            if residual is None:
-                residual = added_residual
-            else:
-                residual += added_residual
-
-        return self.orig_module(input) + residual
-
-
-class LoRAConv1dWrapper(LoRASidecarWrapper):
-    def forward(self, input: torch.Tensor) -> torch.Tensor:
-        params = self._get_lora_patched_parameters(
-            orig_params={"weight": self._orig_module.weight, "bias": self._orig_module.bias},
-            lora_layers=self._lora_layers,
-            lora_weights=self._lora_weights,
-        )
-        return self.orig_module(input) + torch.nn.functional.conv1d(input, params["weight"], params.get("bias", None))
-
-
-class LoRAConv2dWrapper(LoRASidecarWrapper):
-    def forward(self, input: torch.Tensor) -> torch.Tensor:
-        params = self._get_lora_patched_parameters(
-            orig_params={"weight": self._orig_module.weight, "bias": self._orig_module.bias},
-            lora_layers=self._lora_layers,
-            lora_weights=self._lora_weights,
-        )
-        return self.orig_module(input) + torch.nn.functional.conv2d(input, params["weight"], params.get("bias", None))
--- a/invokeai/backend/lora/lora_patcher.py
+++ b/invokeai/backend/lora/lora_patcher.py
@@ -4,126 +4,19 @@ from typing import Dict, Iterable, Optional, Tuple
 import torch

 from invokeai.backend.lora.layers.any_lora_layer import AnyLoRALayer
-from invokeai.backend.lora.lora_layer_wrappers import (
-    LoRAConv1dWrapper,
-    LoRAConv2dWrapper,
-    LoRALinearWrapper,
-    LoRASidecarWrapper,
-)
+from invokeai.backend.lora.layers.concatenated_lora_layer import ConcatenatedLoRALayer
+from invokeai.backend.lora.layers.lora_layer import LoRALayer
 from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
+from invokeai.backend.lora.sidecar_layers.concatenated_lora.concatenated_lora_linear_sidecar_layer import (
+    ConcatenatedLoRALinearSidecarLayer,
+)
+from invokeai.backend.lora.sidecar_layers.lora.lora_linear_sidecar_layer import LoRALinearSidecarLayer
+from invokeai.backend.lora.sidecar_layers.lora_sidecar_module import LoRASidecarModule
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.original_weights_storage import OriginalWeightsStorage


 class LoRAPatcher:
-    @staticmethod
-    @torch.no_grad()
-    @contextmanager
-    def apply_smart_lora_patches(
-        model: torch.nn.Module,
-        patches: Iterable[Tuple[LoRAModelRaw, float]],
-        prefix: str,
-        dtype: torch.dtype,
-        cached_weights: Optional[Dict[str, torch.Tensor]] = None,
-    ):
-        """Apply 'smart' LoRA patching that chooses whether to use direct patching or a sidecar wrapper for each module."""
-
-        # original_weights are stored for unpatching layers that are directly patched.
-        original_weights = OriginalWeightsStorage(cached_weights)
-        # original_modules are stored for unpatching layers that are wrapped in a LoRASidecarWrapper.
-        original_modules: dict[str, torch.nn.Module] = {}
-        try:
-            for patch, patch_weight in patches:
-                LoRAPatcher._apply_smart_lora_patch(
-                    model=model,
-                    prefix=prefix,
-                    patch=patch,
-                    patch_weight=patch_weight,
-                    original_weights=original_weights,
-                    original_modules=original_modules,
-                    dtype=dtype,
-                )
-
-            yield
-        finally:
-            # Restore directly patched layers.
-            for param_key, weight in original_weights.get_changed_weights():
-                model.get_parameter(param_key).copy_(weight)
-
-            # Restore LoRASidecarWrapper modules.
-            # Note: This logic assumes no nested modules in original_modules.
-            for module_key, orig_module in original_modules.items():
-                module_parent_key, module_name = LoRAPatcher._split_parent_key(module_key)
-                parent_module = model.get_submodule(module_parent_key)
-                LoRAPatcher._set_submodule(parent_module, module_name, orig_module)
-
-    @staticmethod
-    @torch.no_grad()
-    def _apply_smart_lora_patch(
-        model: torch.nn.Module,
-        prefix: str,
-        patch: LoRAModelRaw,
-        patch_weight: float,
-        original_weights: OriginalWeightsStorage,
-        original_modules: dict[str, torch.nn.Module],
-        dtype: torch.dtype,
-    ):
-        """Apply a single LoRA patch to a model using the 'smart' patching strategy that chooses whether to use direct
-        patching or a sidecar wrapper for each module.
-        """
-        if patch_weight == 0:
-            return
-
-        # If the layer keys contain a dot, then they are not flattened, and can be directly used to access model
-        # submodules. If the layer keys do not contain a dot, then they are flattened, meaning that all '.' have been
-        # replaced with '_'. Non-flattened keys are preferred, because they allow submodules to be accessed directly
-        # without searching, but some legacy code still uses flattened keys.
-        layer_keys_are_flattened = "." not in next(iter(patch.layers.keys()))
-
-        prefix_len = len(prefix)
-
-        for layer_key, layer in patch.layers.items():
-            if not layer_key.startswith(prefix):
-                continue
-
-            module_key, module = LoRAPatcher._get_submodule(
-                model, layer_key[prefix_len:], layer_key_is_flattened=layer_keys_are_flattened
-            )
-
-            # Decide whether to use direct patching or a sidecar wrapper.
-            # Direct patching is preferred, because it results in better runtime speed.
-            # Reasons to use sidecar patching:
-            # - The module is already wrapped in a LoRASidecarWrapper.
-            # - The module is quantized.
-            # - The module is on the CPU (and we don't want to store a second full copy of the original weights on the
-            #   CPU, since this would double the RAM usage)
-            # NOTE: For now, we don't check if the layer is quantized here. We assume that this is checked in the caller
-            # and that the caller will use the 'apply_lora_wrapper_patches' method if the layer is quantized.
-            # TODO(ryand): Handle the case where we are running without a GPU. Should we set a config flag that allows
-            # forcing full patching even on the CPU?
-            if isinstance(module, LoRASidecarWrapper) or LoRAPatcher._is_any_part_of_layer_on_cpu(module):
-                LoRAPatcher._apply_lora_layer_wrapper_patch(
-                    model=model,
-                    module_to_patch=module,
-                    module_to_patch_key=module_key,
-                    patch=layer,
-                    patch_weight=patch_weight,
-                    original_modules=original_modules,
-                    dtype=dtype,
-                )
-            else:
-                LoRAPatcher._apply_lora_layer_patch(
-                    module_to_patch=module,
-                    module_to_patch_key=module_key,
-                    patch=layer,
-                    patch_weight=patch_weight,
-                    original_weights=original_weights,
-                )
-
-    @staticmethod
-    def _is_any_part_of_layer_on_cpu(layer: torch.nn.Module) -> bool:
-        return any(p.device.type == "cpu" for p in layer.parameters())
-
    @staticmethod
    @torch.no_grad()
    @contextmanager
@@ -147,7 +40,7 @@ class LoRAPatcher:
        original_weights = OriginalWeightsStorage(cached_weights)
        try:
            for patch, patch_weight in patches:
-                LoRAPatcher._apply_lora_patch(
+                LoRAPatcher.apply_lora_patch(
                    model=model,
                    prefix=prefix,
                    patch=patch,
@@ -163,7 +56,7 @@ class LoRAPatcher:

    @staticmethod
    @torch.no_grad()
-    def _apply_lora_patch(
+    def apply_lora_patch(
        model: torch.nn.Module,
        prefix: str,
        patch: LoRAModelRaw,
@@ -198,67 +91,48 @@ class LoRAPatcher:
                model, layer_key[prefix_len:], layer_key_is_flattened=layer_keys_are_flattened
            )

-            LoRAPatcher._apply_lora_layer_patch(
-                module_to_patch=module,
-                module_to_patch_key=module_key,
-                patch=layer,
-                patch_weight=patch_weight,
-                original_weights=original_weights,
-            )
+            # All of the LoRA weight calculations will be done on the same device as the module weight.
+            # (Performance will be best if this is a CUDA device.)
+            device = module.weight.device
+            dtype = module.weight.dtype

-    @staticmethod
-    @torch.no_grad()
-    def _apply_lora_layer_patch(
-        module_to_patch: torch.nn.Module,
-        module_to_patch_key: str,
-        patch: AnyLoRALayer,
-        patch_weight: float,
-        original_weights: OriginalWeightsStorage,
-    ):
-        # All of the LoRA weight calculations will be done on the same device as the module weight.
-        # (Performance will be best if this is a CUDA device.)
-        device = module_to_patch.weight.device
-        dtype = module_to_patch.weight.dtype
+            layer_scale = layer.scale()

-        layer_scale = patch.scale()
+            # We intentionally move to the target device first, then cast. Experimentally, this was found to
+            # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
+            # same thing in a single call to '.to(...)'.
+            layer.to(device=device)
+            layer.to(dtype=torch.float32)

-        # We intentionally move to the target device first, then cast. Experimentally, this was found to
-        # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
-        # same thing in a single call to '.to(...)'.
-        patch.to(device=device)
-        patch.to(dtype=torch.float32)
+            # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
+            # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
+            for param_name, lora_param_weight in layer.get_parameters(module).items():
+                param_key = module_key + "." + param_name
+                module_param = module.get_parameter(param_name)

-        # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
-        # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
-        for param_name, lora_param_weight in patch.get_parameters(module_to_patch).items():
-            param_key = module_to_patch_key + "." + param_name
-            module_param = module_to_patch.get_parameter(param_name)
+                # Save original weight
+                original_weights.save(param_key, module_param)

-            # Save original weight
-            original_weights.save(param_key, module_param)
+                if module_param.shape != lora_param_weight.shape:
+                    lora_param_weight = lora_param_weight.reshape(module_param.shape)

-            if module_param.shape != lora_param_weight.shape:
-                lora_param_weight = lora_param_weight.reshape(module_param.shape)
+                lora_param_weight *= patch_weight * layer_scale
+                module_param += lora_param_weight.to(dtype=dtype)

-            lora_param_weight *= patch_weight * layer_scale
-            module_param += lora_param_weight.to(dtype=dtype)
-
-        patch.to(device=TorchDevice.CPU_DEVICE)
+            layer.to(device=TorchDevice.CPU_DEVICE)

    @staticmethod
    @torch.no_grad()
    @contextmanager
-    def apply_lora_wrapper_patches(
+    def apply_lora_sidecar_patches(
        model: torch.nn.Module,
        patches: Iterable[Tuple[LoRAModelRaw, float]],
        prefix: str,
        dtype: torch.dtype,
    ):
-        """Apply one or more LoRA wrapper patches to a model within a context manager. Wrapper patches incur some
-        runtime overhead compared to normal LoRA patching, but they enable:
-        - LoRA layers to be applied to quantized models
-        - LoRA layers to be applied to CPU layers without needing to store a full copy of the original weights (i.e.
-          avoid doubling the memory requirements).
+        """Apply one or more LoRA sidecar patches to a model within a context manager. Sidecar patches incur some
+        overhead compared to normal LoRA patching, but they allow for LoRA layers to applied to base layers in any
+        quantization format.

        Args:
            model (torch.nn.Module): The model to patch.
@@ -266,11 +140,14 @@ class LoRAPatcher:
                associated weights. An iterator is used so that the LoRA patches do not need to be loaded into memory
                all at once.
            prefix (str): The keys in the patches will be filtered to only include weights with this prefix.
+            dtype (torch.dtype): The compute dtype of the sidecar layers. This cannot easily be inferred from the model,
+                since the sidecar layers are typically applied on top of quantized layers whose weight dtype is
+                different from their compute dtype.
        """
        original_modules: dict[str, torch.nn.Module] = {}
        try:
            for patch, patch_weight in patches:
-                LoRAPatcher._apply_lora_wrapper_patch(
+                LoRAPatcher._apply_lora_sidecar_patch(
                    model=model,
                    prefix=prefix,
                    patch=patch,
@@ -288,7 +165,7 @@ class LoRAPatcher:
                LoRAPatcher._set_submodule(parent_module, module_name, orig_module)

    @staticmethod
-    def _apply_lora_wrapper_patch(
+    def _apply_lora_sidecar_patch(
        model: torch.nn.Module,
        patch: LoRAModelRaw,
        patch_weight: float,
@@ -296,7 +173,7 @@ class LoRAPatcher:
        original_modules: dict[str, torch.nn.Module],
        dtype: torch.dtype,
    ):
-        """Apply a single LoRA wrapper patch to a model."""
+        """Apply a single LoRA sidecar patch to a model."""

        if patch_weight == 0:
            return
@@ -317,47 +194,28 @@ class LoRAPatcher:
                model, layer_key[prefix_len:], layer_key_is_flattened=layer_keys_are_flattened
            )

-            LoRAPatcher._apply_lora_layer_wrapper_patch(
-                model=model,
-                module_to_patch=module,
-                module_to_patch_key=module_key,
-                patch=layer,
-                patch_weight=patch_weight,
-                original_modules=original_modules,
-                dtype=dtype,
-            )
+            # Initialize the LoRA sidecar layer.
+            lora_sidecar_layer = LoRAPatcher._initialize_lora_sidecar_layer(module, layer, patch_weight)

-    @staticmethod
-    @torch.no_grad()
-    def _apply_lora_layer_wrapper_patch(
-        model: torch.nn.Module,
-        module_to_patch: torch.nn.Module,
-        module_to_patch_key: str,
-        patch: AnyLoRALayer,
-        patch_weight: float,
-        original_modules: dict[str, torch.nn.Module],
-        dtype: torch.dtype,
-    ):
-        """Apply a single LoRA wrapper patch to a model."""
+            # Replace the original module with a LoRASidecarModule if it has not already been done.
+            if module_key in original_modules:
+                # The module has already been patched with a LoRASidecarModule. Append to it.
+                assert isinstance(module, LoRASidecarModule)
+                lora_sidecar_module = module
+            else:
+                # The module has not yet been patched with a LoRASidecarModule. Create one.
+                lora_sidecar_module = LoRASidecarModule(module, [])
+                original_modules[module_key] = module
+                module_parent_key, module_name = LoRAPatcher._split_parent_key(module_key)
+                module_parent = model.get_submodule(module_parent_key)
+                LoRAPatcher._set_submodule(module_parent, module_name, lora_sidecar_module)

-        # Replace the original module with a LoRASidecarWrapper if it has not already been done.
-        if not isinstance(module_to_patch, LoRASidecarWrapper):
-            lora_wrapper_layer = LoRAPatcher._initialize_lora_wrapper_layer(module_to_patch)
-            original_modules[module_to_patch_key] = module_to_patch
-            module_parent_key, module_name = LoRAPatcher._split_parent_key(module_to_patch_key)
-            module_parent = model.get_submodule(module_parent_key)
-            LoRAPatcher._set_submodule(module_parent, module_name, lora_wrapper_layer)
-            orig_module = module_to_patch
-        else:
-            assert module_to_patch_key in original_modules
-            lora_wrapper_layer = module_to_patch
-            orig_module = module_to_patch.orig_module
+            # Move the LoRA sidecar layer to the same device/dtype as the orig module.
+            # TODO(ryand): Experiment with moving to the device first, then casting. This could be faster.
+            lora_sidecar_layer.to(device=lora_sidecar_module.orig_module.weight.device, dtype=dtype)

-        # Move the LoRA layer to the same device/dtype as the orig module.
-        patch.to(device=orig_module.weight.device, dtype=dtype)
-
-        # Add the LoRA wrapper layer to the LoRASidecarWrapper.
-        lora_wrapper_layer.add_lora_layer(patch, patch_weight)
+            # Add the LoRA sidecar layer to the LoRASidecarModule.
+            lora_sidecar_module.add_lora_layer(lora_sidecar_layer)

    @staticmethod
    def _split_parent_key(module_key: str) -> tuple[str, str]:
@@ -378,13 +236,17 @@ class LoRAPatcher:
            raise ValueError(f"Invalid module key: {module_key}")

    @staticmethod
-    def _initialize_lora_wrapper_layer(orig_layer: torch.nn.Module):
-        if isinstance(orig_layer, torch.nn.Linear):
-            return LoRALinearWrapper(orig_layer, [], [])
-        elif isinstance(orig_layer, torch.nn.Conv1d):
-            return LoRAConv1dWrapper(orig_layer, [], [])
-        elif isinstance(orig_layer, torch.nn.Conv2d):
-            return LoRAConv2dWrapper(orig_layer, [], [])
+    def _initialize_lora_sidecar_layer(orig_layer: torch.nn.Module, lora_layer: AnyLoRALayer, patch_weight: float):
+        # TODO(ryand): Add support for more original layer types and LoRA layer types.
+        if isinstance(orig_layer, torch.nn.Linear) or (
+            isinstance(orig_layer, LoRASidecarModule) and isinstance(orig_layer.orig_module, torch.nn.Linear)
+        ):
+            if isinstance(lora_layer, LoRALayer):
+                return LoRALinearSidecarLayer(lora_layer=lora_layer, weight=patch_weight)
+            elif isinstance(lora_layer, ConcatenatedLoRALayer):
+                return ConcatenatedLoRALinearSidecarLayer(concatenated_lora_layer=lora_layer, weight=patch_weight)
+            else:
+                raise ValueError(f"Unsupported Linear LoRA layer type: {type(lora_layer)}")
        else:
            raise ValueError(f"Unsupported layer type: {type(orig_layer)}")

--- a/invokeai/backend/model_manager/load/model_cache/cached_model/init.py
+++ b/invokeai/backend/model_manager/load/model_cache/cached_model/init.py
--- a/invokeai/backend/lora/sidecar_layers/concatenated_lora/init.py
+++ b/invokeai/backend/lora/sidecar_layers/concatenated_lora/init.py
--- a/invokeai/backend/lora/sidecar_layers/concatenated_lora/concatenated_lora_linear_sidecar_layer.py
+++ b/invokeai/backend/lora/sidecar_layers/concatenated_lora/concatenated_lora_linear_sidecar_layer.py
@@ -0,0 +1,34 @@
+import torch
+
+from invokeai.backend.lora.layers.concatenated_lora_layer import ConcatenatedLoRALayer
+
+
+class ConcatenatedLoRALinearSidecarLayer(torch.nn.Module):
+    def __init__(
+        self,
+        concatenated_lora_layer: ConcatenatedLoRALayer,
+        weight: float,
+    ):
+        super().__init__()
+
+        self._concatenated_lora_layer = concatenated_lora_layer
+        self._weight = weight
+
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        x_chunks: list[torch.Tensor] = []
+        for lora_layer in self._concatenated_lora_layer.lora_layers:
+            x_chunk = torch.nn.functional.linear(input, lora_layer.down)
+            if lora_layer.mid is not None:
+                x_chunk = torch.nn.functional.linear(x_chunk, lora_layer.mid)
+            x_chunk = torch.nn.functional.linear(x_chunk, lora_layer.up, bias=lora_layer.bias)
+            x_chunk *= self._weight * lora_layer.scale()
+            x_chunks.append(x_chunk)
+
+        # TODO(ryand): Generalize to support concat_axis != 0.
+        assert self._concatenated_lora_layer.concat_axis == 0
+        x = torch.cat(x_chunks, dim=-1)
+        return x
+
+    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
+        self._concatenated_lora_layer.to(device=device, dtype=dtype)
+        return self
--- a/invokeai/backend/lora/sidecar_layers/lora/init.py
+++ b/invokeai/backend/lora/sidecar_layers/lora/init.py
--- a/invokeai/backend/lora/sidecar_layers/lora/lora_linear_sidecar_layer.py
+++ b/invokeai/backend/lora/sidecar_layers/lora/lora_linear_sidecar_layer.py
@@ -0,0 +1,27 @@
+import torch
+
+from invokeai.backend.lora.layers.lora_layer import LoRALayer
+
+
+class LoRALinearSidecarLayer(torch.nn.Module):
+    def __init__(
+        self,
+        lora_layer: LoRALayer,
+        weight: float,
+    ):
+        super().__init__()
+
+        self._lora_layer = lora_layer
+        self._weight = weight
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = torch.nn.functional.linear(x, self._lora_layer.down)
+        if self._lora_layer.mid is not None:
+            x = torch.nn.functional.linear(x, self._lora_layer.mid)
+        x = torch.nn.functional.linear(x, self._lora_layer.up, bias=self._lora_layer.bias)
+        x *= self._weight * self._lora_layer.scale()
+        return x
+
+    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
+        self._lora_layer.to(device=device, dtype=dtype)
+        return self
--- a/invokeai/backend/lora/sidecar_layers/lora_sidecar_layer.py
+++ b/invokeai/backend/lora/sidecar_layers/lora_sidecar_layer.py
--- a/invokeai/backend/lora/sidecar_layers/lora_sidecar_module.py
+++ b/invokeai/backend/lora/sidecar_layers/lora_sidecar_module.py
@@ -0,0 +1,24 @@
+import torch
+
+
+class LoRASidecarModule(torch.nn.Module):
+    """A LoRA sidecar module that wraps an original module and adds LoRA layers to it."""
+
+    def __init__(self, orig_module: torch.nn.Module, lora_layers: list[torch.nn.Module]):
+        super().__init__()
+        self.orig_module = orig_module
+        self._lora_layers = lora_layers
+
+    def add_lora_layer(self, lora_layer: torch.nn.Module):
+        self._lora_layers.append(lora_layer)
+
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        x = self.orig_module(input)
+        for lora_layer in self._lora_layers:
+            x += lora_layer(input)
+        return x
+
+    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
+        self._orig_module.to(device=device, dtype=dtype)
+        for lora_layer in self._lora_layers:
+            lora_layer.to(device=device, dtype=dtype)
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@@ -53,7 +53,6 @@ class BaseModelType(str, Enum):
    Any = "any"
    StableDiffusion1 = "sd-1"
    StableDiffusion2 = "sd-2"
-    StableDiffusion3 = "sd-3"
    StableDiffusionXL = "sdxl"
    StableDiffusionXLRefiner = "sdxl-refiner"
    Flux = "flux"
@@ -84,10 +83,8 @@ class SubModelType(str, Enum):
    Transformer = "transformer"
    TextEncoder = "text_encoder"
    TextEncoder2 = "text_encoder_2"
-    TextEncoder3 = "text_encoder_3"
    Tokenizer = "tokenizer"
    Tokenizer2 = "tokenizer_2"
-    Tokenizer3 = "tokenizer_3"
    VAE = "vae"
    VAEDecoder = "vae_decoder"
    VAEEncoder = "vae_encoder"
@@ -95,13 +92,6 @@ class SubModelType(str, Enum):
    SafetyChecker = "safety_checker"


-class ClipVariantType(str, Enum):
-    """Variant type."""
-
-    L = "large"
-    G = "gigantic"
-
-
 class ModelVariantType(str, Enum):
    """Variant type."""

@@ -157,17 +147,6 @@ class ModelSourceType(str, Enum):
 DEFAULTS_PRECISION = Literal["fp16", "fp32"]


-AnyVariant: TypeAlias = Union[ModelVariantType, ClipVariantType, None]
-
-
-class SubmodelDefinition(BaseModel):
-    path_or_prefix: str
-    model_type: ModelType
-    variant: AnyVariant = None
-
-    model_config = ConfigDict(protected_namespaces=())
-
-
 class MainModelDefaultSettings(BaseModel):
    vae: str | None = Field(default=None, description="Default VAE for this model (model key)")
    vae_precision: DEFAULTS_PRECISION | None = Field(default=None, description="Default VAE precision for this model")
@@ -214,9 +193,6 @@ class ModelConfigBase(BaseModel):
        schema["required"].extend(["key", "type", "format"])

    model_config = ConfigDict(validate_assignment=True, json_schema_extra=json_schema_extra)
-    submodels: Optional[Dict[SubModelType, SubmodelDefinition]] = Field(
-        description="Loadable submodels in this model", default=None
-    )


 class CheckpointConfigBase(ModelConfigBase):
@@ -359,7 +335,7 @@ class MainConfigBase(ModelConfigBase):
    default_settings: Optional[MainModelDefaultSettings] = Field(
        description="Default settings for this model", default=None
    )
-    variant: AnyVariant = ModelVariantType.Normal
+    variant: ModelVariantType = ModelVariantType.Normal


 class MainCheckpointConfig(CheckpointConfigBase, MainConfigBase):
@@ -443,33 +419,12 @@ class CLIPEmbedDiffusersConfig(DiffusersConfigBase):

    type: Literal[ModelType.CLIPEmbed] = ModelType.CLIPEmbed
    format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers
-    variant: ClipVariantType = ClipVariantType.L

    @staticmethod
    def get_tag() -> Tag:
        return Tag(f"{ModelType.CLIPEmbed.value}.{ModelFormat.Diffusers.value}")


-class CLIPGEmbedDiffusersConfig(CLIPEmbedDiffusersConfig):
-    """Model config for CLIP-G Embeddings."""
-
-    variant: ClipVariantType = ClipVariantType.G
-
-    @staticmethod
-    def get_tag() -> Tag:
-        return Tag(f"{ModelType.CLIPEmbed.value}.{ModelFormat.Diffusers.value}.{ClipVariantType.G}")
-
-
-class CLIPLEmbedDiffusersConfig(CLIPEmbedDiffusersConfig):
-    """Model config for CLIP-L Embeddings."""
-
-    variant: ClipVariantType = ClipVariantType.L
-
-    @staticmethod
-    def get_tag() -> Tag:
-        return Tag(f"{ModelType.CLIPEmbed.value}.{ModelFormat.Diffusers.value}.{ClipVariantType.L}")
-
-
 class CLIPVisionDiffusersConfig(DiffusersConfigBase):
    """Model config for CLIPVision."""

@@ -546,8 +501,6 @@ AnyModelConfig = Annotated[
        Annotated[SpandrelImageToImageConfig, SpandrelImageToImageConfig.get_tag()],
        Annotated[CLIPVisionDiffusersConfig, CLIPVisionDiffusersConfig.get_tag()],
        Annotated[CLIPEmbedDiffusersConfig, CLIPEmbedDiffusersConfig.get_tag()],
-        Annotated[CLIPLEmbedDiffusersConfig, CLIPLEmbedDiffusersConfig.get_tag()],
-        Annotated[CLIPGEmbedDiffusersConfig, CLIPGEmbedDiffusersConfig.get_tag()],
    ],
    Discriminator(get_model_discriminator_value),
 ]
--- a/invokeai/backend/model_manager/load/init.py
+++ b/invokeai/backend/model_manager/load/init.py
@@ -8,7 +8,7 @@ from pathlib import Path

 from invokeai.backend.model_manager.load.load_base import LoadedModel, LoadedModelWithoutConfig, ModelLoaderBase
 from invokeai.backend.model_manager.load.load_default import ModelLoader
-from invokeai.backend.model_manager.load.model_cache.model_cache import ModelCache
+from invokeai.backend.model_manager.load.model_cache.model_cache_default import ModelCache
 from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry, ModelLoaderRegistryBase

 # This registers the subclasses that implement loaders of specific model types
--- a/invokeai/backend/model_manager/load/load_base.py
+++ b/invokeai/backend/model_manager/load/load_base.py
@@ -5,6 +5,7 @@ Base class for model loading in InvokeAI.

 from abc import ABC, abstractmethod
 from contextlib import contextmanager
+from dataclasses import dataclass
 from logging import Logger
 from pathlib import Path
 from typing import Any, Dict, Generator, Optional, Tuple
@@ -17,17 +18,19 @@ from invokeai.backend.model_manager.config import (
    AnyModelConfig,
    SubModelType,
 )
-from invokeai.backend.model_manager.load.model_cache.cache_record import CacheRecord
-from invokeai.backend.model_manager.load.model_cache.model_cache import ModelCache
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase, ModelLockerBase


+@dataclass
 class LoadedModelWithoutConfig:
-    """Context manager object that mediates transfer from RAM<->VRAM.
+    """
+    Context manager object that mediates transfer from RAM<->VRAM.

    This is a context manager object that has two distinct APIs:

    1. Older API (deprecated):
-    Use the LoadedModel object directly as a context manager.  It will move the model into VRAM (on CUDA devices), and
+    Use the LoadedModel object directly as a context manager.
+    It will move the model into VRAM (on CUDA devices), and
    return the model in a form suitable for passing to torch.
    Example:
    ```
@@ -37,9 +40,13 @@ class LoadedModelWithoutConfig:
    ```

    2. Newer API (recommended):
-    Call the LoadedModel's `model_on_device()` method in a context. It returns a tuple consisting of a copy of the
-    model's state dict in CPU RAM followed by a copy of the model in VRAM. The state dict is provided to allow LoRAs and
-    other model patchers to return the model to its unpatched state without expensive copy and restore operations.
+    Call the LoadedModel's `model_on_device()` method in a
+    context. It returns a tuple consisting of a copy of
+    the model's state dict in CPU RAM followed by a copy
+    of the model in VRAM. The state dict is provided to allow
+    LoRAs and other model patchers to return the model to
+    its unpatched state without expensive copy and restore
+    operations.

    Example:
    ```
@@ -48,42 +55,43 @@ class LoadedModelWithoutConfig:
        image = vae.decode(latents)[0]
    ```

-    The state_dict should be treated as a read-only object and never modified. Also be aware that some loadable models
-    do not have a state_dict, in which case this value will be None.
+    The state_dict should be treated as a read-only object and
+    never modified. Also be aware that some loadable models do
+    not have a state_dict, in which case this value will be None.
    """

-    def __init__(self, cache_record: CacheRecord, cache: ModelCache):
-        self._cache_record = cache_record
-        self._cache = cache
+    _locker: ModelLockerBase

    def __enter__(self) -> AnyModel:
-        self._cache.lock(self._cache_record.key)
+        """Context entry."""
+        self._locker.lock()
        return self.model

    def __exit__(self, *args: Any, **kwargs: Any) -> None:
-        self._cache.unlock(self._cache_record.key)
+        """Context exit."""
+        self._locker.unlock()

    @contextmanager
    def model_on_device(self) -> Generator[Tuple[Optional[Dict[str, torch.Tensor]], AnyModel], None, None]:
        """Return a tuple consisting of the model's state dict (if it exists) and the locked model on execution device."""
-        self._cache.lock(self._cache_record.key)
+        locked_model = self._locker.lock()
        try:
-            yield (self._cache_record.cached_model.get_cpu_state_dict(), self._cache_record.cached_model.model)
+            state_dict = self._locker.get_state_dict()
+            yield (state_dict, locked_model)
        finally:
-            self._cache.unlock(self._cache_record.key)
+            self._locker.unlock()

    @property
    def model(self) -> AnyModel:
        """Return the model without locking it."""
-        return self._cache_record.cached_model.model
+        return self._locker.model


+@dataclass
 class LoadedModel(LoadedModelWithoutConfig):
    """Context manager object that mediates transfer from RAM<->VRAM."""

-    def __init__(self, config: Optional[AnyModelConfig], cache_record: CacheRecord, cache: ModelCache):
-        super().__init__(cache_record=cache_record, cache=cache)
-        self.config = config
+    config: Optional[AnyModelConfig] = None


 # TODO(MM2):
@@ -102,7 +110,7 @@ class ModelLoaderBase(ABC):
        self,
        app_config: InvokeAIAppConfig,
        logger: Logger,
-        ram_cache: ModelCache,
+        ram_cache: ModelCacheBase[AnyModel],
    ):
        """Initialize the loader."""
        pass
@@ -130,6 +138,6 @@ class ModelLoaderBase(ABC):

    @property
    @abstractmethod
-    def ram_cache(self) -> ModelCache:
+    def ram_cache(self) -> ModelCacheBase[AnyModel]:
        """Return the ram cache associated with this loader."""
        pass
--- a/invokeai/backend/model_manager/load/load_default.py
+++ b/invokeai/backend/model_manager/load/load_default.py
@@ -14,8 +14,7 @@ from invokeai.backend.model_manager import (
 )
 from invokeai.backend.model_manager.config import DiffusersConfigBase
 from invokeai.backend.model_manager.load.load_base import LoadedModel, ModelLoaderBase
-from invokeai.backend.model_manager.load.model_cache.cache_record import CacheRecord
-from invokeai.backend.model_manager.load.model_cache.model_cache import ModelCache, get_model_cache_key
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase, ModelLockerBase
 from invokeai.backend.model_manager.load.model_util import calc_model_size_by_fs
 from invokeai.backend.model_manager.load.optimizations import skip_torch_weight_init
 from invokeai.backend.util.devices import TorchDevice
@@ -29,14 +28,13 @@ class ModelLoader(ModelLoaderBase):
        self,
        app_config: InvokeAIAppConfig,
        logger: Logger,
-        ram_cache: ModelCache,
+        ram_cache: ModelCacheBase[AnyModel],
    ):
        """Initialize the loader."""
        self._app_config = app_config
        self._logger = logger
        self._ram_cache = ram_cache
        self._torch_dtype = TorchDevice.choose_torch_dtype()
-        self._torch_device = TorchDevice.choose_torch_device()

    def load_model(self, model_config: AnyModelConfig, submodel_type: Optional[SubModelType] = None) -> LoadedModel:
        """
@@ -55,11 +53,11 @@ class ModelLoader(ModelLoaderBase):
            raise InvalidModelConfigException(f"Files for model '{model_config.name}' not found at {model_path}")

        with skip_torch_weight_init():
-            cache_record = self._load_and_cache(model_config, submodel_type)
-        return LoadedModel(config=model_config, cache_record=cache_record, cache=self._ram_cache)
+            locker = self._load_and_cache(model_config, submodel_type)
+        return LoadedModel(config=model_config, _locker=locker)

    @property
-    def ram_cache(self) -> ModelCache:
+    def ram_cache(self) -> ModelCacheBase[AnyModel]:
        """Return the ram cache associated with this loader."""
        return self._ram_cache

@@ -67,10 +65,10 @@ class ModelLoader(ModelLoaderBase):
        model_base = self._app_config.models_path
        return (model_base / config.path).resolve()

-    def _load_and_cache(self, config: AnyModelConfig, submodel_type: Optional[SubModelType] = None) -> CacheRecord:
+    def _load_and_cache(self, config: AnyModelConfig, submodel_type: Optional[SubModelType] = None) -> ModelLockerBase:
        stats_name = ":".join([config.base, config.type, config.name, (submodel_type or "")])
        try:
-            return self._ram_cache.get(key=get_model_cache_key(config.key, submodel_type), stats_name=stats_name)
+            return self._ram_cache.get(config.key, submodel_type, stats_name=stats_name)
        except IndexError:
            pass

@@ -79,11 +77,16 @@ class ModelLoader(ModelLoaderBase):
        loaded_model = self._load_model(config, submodel_type)

        self._ram_cache.put(
-            get_model_cache_key(config.key, submodel_type),
+            config.key,
+            submodel_type=submodel_type,
            model=loaded_model,
        )

-        return self._ram_cache.get(key=get_model_cache_key(config.key, submodel_type), stats_name=stats_name)
+        return self._ram_cache.get(
+            key=config.key,
+            submodel_type=submodel_type,
+            stats_name=stats_name,
+        )

    def get_size_fs(
        self, config: AnyModelConfig, model_path: Path, submodel_type: Optional[SubModelType] = None
--- a/invokeai/backend/model_manager/load/model_cache/init.py
+++ b/invokeai/backend/model_manager/load/model_cache/init.py
@@ -0,0 +1,6 @@
+"""Init file for ModelCache."""
+
+from .model_cache_base import ModelCacheBase, CacheStats  # noqa F401
+from .model_cache_default import ModelCache  # noqa F401
+
+_all__ = ["ModelCacheBase", "ModelCache", "CacheStats"]
--- a/invokeai/backend/model_manager/load/model_cache/cache_record.py
+++ b/invokeai/backend/model_manager/load/model_cache/cache_record.py
@@ -1,31 +0,0 @@
-from dataclasses import dataclass
-
-from invokeai.backend.model_manager.load.model_cache.cached_model.cached_model_only_full_load import (
-    CachedModelOnlyFullLoad,
-)
-from invokeai.backend.model_manager.load.model_cache.cached_model.cached_model_with_partial_load import (
-    CachedModelWithPartialLoad,
-)
-
-
-@dataclass
-class CacheRecord:
-    """A class that represents a model in the model cache."""
-
-    # Cache key.
-    key: str
-    # Model in memory.
-    cached_model: CachedModelWithPartialLoad | CachedModelOnlyFullLoad
-    # If locks > 0, the model is actively being used, so we should do our best to keep it on the compute device.
-    _locks: int = 0
-
-    def lock(self) -> None:
-        self._locks += 1
-
-    def unlock(self) -> None:
-        self._locks -= 1
-        assert self._locks >= 0
-
-    @property
-    def is_locked(self) -> bool:
-        return self._locks > 0
--- a/invokeai/backend/model_manager/load/model_cache/cache_stats.py
+++ b/invokeai/backend/model_manager/load/model_cache/cache_stats.py
@@ -1,15 +0,0 @@
-from dataclasses import dataclass, field
-from typing import Dict
-
-
-@dataclass
-class CacheStats(object):
-    """Collect statistics on cache performance."""
-
-    hits: int = 0  # cache hits
-    misses: int = 0  # cache misses
-    high_watermark: int = 0  # amount of cache used
-    in_cache: int = 0  # number of models in cache
-    cleared: int = 0  # number of models cleared to make space
-    cache_size: int = 0  # total size of cache
-    loaded_model_sizes: Dict[str, int] = field(default_factory=dict)
--- a/invokeai/backend/model_manager/load/model_cache/cached_model/cached_model_only_full_load.py
+++ b/invokeai/backend/model_manager/load/model_cache/cached_model/cached_model_only_full_load.py
@@ -1,81 +0,0 @@
-from typing import Any
-
-import torch
-
-
-class CachedModelOnlyFullLoad:
-    """A wrapper around a PyTorch model to handle full loads and unloads between the CPU and the compute device.
-
-    Note: "VRAM" is used throughout this class to refer to the memory on the compute device. It could be CUDA memory,
-    MPS memory, etc.
-    """
-
-    def __init__(self, model: torch.nn.Module | Any, compute_device: torch.device, total_bytes: int):
-        """Initialize a CachedModelOnlyFullLoad.
-
-        Args:
-            model (torch.nn.Module | Any): The model to wrap. Should be on the CPU.
-            compute_device (torch.device): The compute device to move the model to.
-            total_bytes (int): The total size (in bytes) of all the weights in the model.
-        """
-        # model is often a torch.nn.Module, but could be any model type. Throughout this class, we handle both cases.
-        self._model = model
-        self._compute_device = compute_device
-        self._total_bytes = total_bytes
-        self._is_in_vram = False
-
-    @property
-    def model(self) -> torch.nn.Module:
-        return self._model
-
-    def get_cpu_state_dict(self) -> dict[str, torch.Tensor] | None:
-        """Get a read-only copy of the model's state dict in RAM."""
-        # TODO(ryand): Document this better and implement it.
-        return None
-
-    def total_bytes(self) -> int:
-        """Get the total size (in bytes) of all the weights in the model."""
-        return self._total_bytes
-
-    def cur_vram_bytes(self) -> int:
-        """Get the size (in bytes) of the weights that are currently in VRAM."""
-        if self._is_in_vram:
-            return self._total_bytes
-        else:
-            return 0
-
-    def is_in_vram(self) -> bool:
-        """Return true if the model is currently in VRAM."""
-        return self._is_in_vram
-
-    def full_load_to_vram(self) -> int:
-        """Load all weights into VRAM (if supported by the model).
-
-        Returns:
-            The number of bytes loaded into VRAM.
-        """
-        if self._is_in_vram:
-            # Already in VRAM.
-            return 0
-
-        if not hasattr(self._model, "to"):
-            # Model doesn't support moving to a device.
-            return 0
-
-        self._model.to(self._compute_device)
-        self._is_in_vram = True
-        return self._total_bytes
-
-    def full_unload_from_vram(self) -> int:
-        """Unload all weights from VRAM.
-
-        Returns:
-            The number of bytes unloaded from VRAM.
-        """
-        if not self._is_in_vram:
-            # Already in RAM.
-            return 0
-
-        self._model.to("cpu")
-        self._is_in_vram = False
-        return self._total_bytes
--- a/invokeai/backend/model_manager/load/model_cache/cached_model/cached_model_with_partial_load.py
+++ b/invokeai/backend/model_manager/load/model_cache/cached_model/cached_model_with_partial_load.py
@@ -1,150 +0,0 @@
-import itertools
-
-import torch
-
-from invokeai.backend.model_manager.load.model_cache.torch_function_autocast_context import (
-    add_autocast_to_module_forward,
-)
-from invokeai.backend.util.calc_tensor_size import calc_tensor_size
-
-
-def set_nested_attr(obj: object, attr: str, value: object):
-    """A helper function that extends setattr() to support nested attributes.
-
-    Example:
-        set_nested_attr(model, "module.encoder.conv1.weight", new_conv1_weight)
-    """
-    attrs = attr.split(".")
-    for attr in attrs[:-1]:
-        obj = getattr(obj, attr)
-    setattr(obj, attrs[-1], value)
-
-
-class CachedModelWithPartialLoad:
-    """A wrapper around a PyTorch model to handle partial loads and unloads between the CPU and the compute device.
-
-    Note: "VRAM" is used throughout this class to refer to the memory on the compute device. It could be CUDA memory,
-    MPS memory, etc.
-    """
-
-    def __init__(self, model: torch.nn.Module, compute_device: torch.device):
-        self._model = model
-        self._compute_device = compute_device
-
-        # A CPU read-only copy of the model's state dict.
-        self._cpu_state_dict: dict[str, torch.Tensor] = model.state_dict()
-
-        # Monkey-patch the model to add autocasting to the model's forward method.
-        add_autocast_to_module_forward(model, compute_device)
-
-        self._total_bytes = sum(
-            calc_tensor_size(p) for p in itertools.chain(self._model.parameters(), self._model.buffers())
-        )
-        self._cur_vram_bytes: int | None = None
-
-    @property
-    def model(self) -> torch.nn.Module:
-        return self._model
-
-    def get_cpu_state_dict(self) -> dict[str, torch.Tensor] | None:
-        """Get a read-only copy of the model's state dict in RAM."""
-        # TODO(ryand): Document this better.
-        return self._cpu_state_dict
-
-    def total_bytes(self) -> int:
-        """Get the total size (in bytes) of all the weights in the model."""
-        return self._total_bytes
-
-    def cur_vram_bytes(self) -> int:
-        """Get the size (in bytes) of the weights that are currently in VRAM."""
-        if self._cur_vram_bytes is None:
-            self._cur_vram_bytes = sum(
-                calc_tensor_size(p)
-                for p in itertools.chain(self._model.parameters(), self._model.buffers())
-                if p.device.type == self._compute_device.type
-            )
-        return self._cur_vram_bytes
-
-    def full_load_to_vram(self) -> int:
-        """Load all weights into VRAM."""
-        return self.partial_load_to_vram(self.total_bytes())
-
-    def full_unload_from_vram(self) -> int:
-        """Unload all weights from VRAM."""
-        return self.partial_unload_from_vram(self.total_bytes())
-
-    @torch.no_grad()
-    def partial_load_to_vram(self, vram_bytes_to_load: int) -> int:
-        """Load more weights into VRAM without exceeding vram_bytes_to_load.
-
-        Returns:
-            The number of bytes loaded into VRAM.
-        """
-        vram_bytes_loaded = 0
-
-        for key, param in itertools.chain(self._model.named_parameters(), self._model.named_buffers()):
-            # Skip parameters that are already on the compute device.
-            if param.device.type == self._compute_device.type:
-                continue
-
-            # Check the size of the parameter.
-            param_size = calc_tensor_size(param)
-            if vram_bytes_loaded + param_size > vram_bytes_to_load:
-                # TODO(ryand): Should we just break here? If we couldn't fit this parameter into VRAM, is it really
-                # worth continuing to search for a smaller parameter that would fit?
-                continue
-
-            # Copy the parameter to the compute device.
-            # We use the 'overwrite' strategy from torch.nn.Module._apply().
-            # TODO(ryand): For some edge cases (e.g. quantized models?), we may need to support other strategies (e.g.
-            # swap).
-            if isinstance(param, torch.nn.Parameter):
-                assert param.is_leaf
-                out_param = torch.nn.Parameter(
-                    param.to(self._compute_device, copy=True), requires_grad=param.requires_grad
-                )
-                set_nested_attr(self._model, key, out_param)
-                # We did not port the param.grad handling from torch.nn.Module._apply(), because we do not expect to be
-                # handling gradients. We assert that this assumption is true.
-                assert param.grad is None
-            else:
-                # Handle buffers.
-                set_nested_attr(self._model, key, param.to(self._compute_device, copy=True))
-
-            vram_bytes_loaded += param_size
-
-        if self._cur_vram_bytes is not None:
-            self._cur_vram_bytes += vram_bytes_loaded
-
-        return vram_bytes_loaded
-
-    @torch.no_grad()
-    def partial_unload_from_vram(self, vram_bytes_to_free: int) -> int:
-        """Unload weights from VRAM until vram_bytes_to_free bytes are freed. Or the entire model is unloaded.
-
-        Returns:
-            The number of bytes unloaded from VRAM.
-        """
-        vram_bytes_freed = 0
-
-        for key, param in itertools.chain(self._model.named_parameters(), self._model.named_buffers()):
-            if vram_bytes_freed >= vram_bytes_to_free:
-                break
-
-            if param.device.type != self._compute_device.type:
-                continue
-
-            if isinstance(param, torch.nn.Parameter):
-                # Create a new parameter, but inject the existing CPU tensor into it.
-                out_param = torch.nn.Parameter(self._cpu_state_dict[key], requires_grad=param.requires_grad)
-                set_nested_attr(self._model, key, out_param)
-            else:
-                # Handle buffers.
-                set_nested_attr(self._model, key, self._cpu_state_dict[key])
-
-            vram_bytes_freed += calc_tensor_size(param)
-
-        if self._cur_vram_bytes is not None:
-            self._cur_vram_bytes -= vram_bytes_freed
-
-        return vram_bytes_freed
--- a/invokeai/backend/model_manager/load/model_cache/model_cache.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache.py
@@ -1,538 +0,0 @@
-import gc
-from logging import Logger
-from typing import Dict, List, Optional
-
-import torch
-
-from invokeai.backend.model_manager import AnyModel, SubModelType
-from invokeai.backend.model_manager.load.memory_snapshot import MemorySnapshot
-from invokeai.backend.model_manager.load.model_cache.cache_record import CacheRecord
-from invokeai.backend.model_manager.load.model_cache.cache_stats import CacheStats
-from invokeai.backend.model_manager.load.model_cache.cached_model.cached_model_only_full_load import (
-    CachedModelOnlyFullLoad,
-)
-from invokeai.backend.model_manager.load.model_cache.cached_model.cached_model_with_partial_load import (
-    CachedModelWithPartialLoad,
-)
-from invokeai.backend.model_manager.load.model_util import calc_model_size_by_data
-from invokeai.backend.util.devices import TorchDevice
-from invokeai.backend.util.logging import InvokeAILogger
-from invokeai.backend.util.prefix_logger_adapter import PrefixedLoggerAdapter
-
-# Size of a GB in bytes.
-GB = 2**30
-
-# Size of a MB in bytes.
-MB = 2**20
-
-
-# TODO(ryand): Where should this go? The ModelCache shouldn't be concerned with submodels.
-def get_model_cache_key(model_key: str, submodel_type: Optional[SubModelType] = None) -> str:
-    """Get the cache key for a model based on the optional submodel type."""
-    if submodel_type:
-        return f"{model_key}:{submodel_type.value}"
-    else:
-        return model_key
-
-
-class ModelCache:
-    """A cache for managing models in memory.
-
-    The cache is based on two levels of model storage:
-    - execution_device: The device where most models are executed (typically "cuda", "mps", or "cpu").
-    - storage_device: The device where models are offloaded when not in active use (typically "cpu").
-
-    The model cache is based on the following assumptions:
-    - storage_device_mem_size > execution_device_mem_size
-    - disk_to_storage_device_transfer_time >> storage_device_to_execution_device_transfer_time
-
-    A copy of all models in the cache is always kept on the storage_device. A subset of the models also have a copy on
-    the execution_device.
-
-    Models are moved between the storage_device and the execution_device as necessary. Cache size limits are enforced
-    on both the storage_device and the execution_device. The execution_device cache uses a smallest-first offload
-    policy. The storage_device cache uses a least-recently-used (LRU) offload policy.
-
-    Note: Neither of these offload policies has really been compared against alternatives. It's likely that different
-    policies would be better, although the optimal policies are likely heavily dependent on usage patterns and HW
-    configuration.
-
-    The cache returns context manager generators designed to load the model into the execution device (often GPU) within
-    the context, and unload outside the context.
-
-    Example usage:
-    ```
-    cache = ModelCache(max_cache_size=7.5, max_vram_cache_size=6.0)
-    with cache.get_model('runwayml/stable-diffusion-1-5') as SD1:
-        do_something_on_gpu(SD1)
-    ```
-    """
-
-    def __init__(
-        self,
-        max_cache_size: float,
-        max_vram_cache_size: float,
-        execution_device: torch.device = torch.device("cuda"),
-        storage_device: torch.device = torch.device("cpu"),
-        lazy_offloading: bool = True,
-        log_memory_usage: bool = False,
-        logger: Optional[Logger] = None,
-    ):
-        """
-        Initialize the model RAM cache.
-
-        :param max_cache_size: Maximum size of the storage_device cache in GBs.
-        :param max_vram_cache_size: Maximum size of the execution_device cache in GBs.
-        :param execution_device: Torch device to load active model into [torch.device('cuda')]
-        :param storage_device: Torch device to save inactive model in [torch.device('cpu')]
-        :param lazy_offloading: Keep model in VRAM until another model needs to be loaded
-        :param log_memory_usage: If True, a memory snapshot will be captured before and after every model cache
-            operation, and the result will be logged (at debug level). There is a time cost to capturing the memory
-            snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's
-            behaviour.
-        :param logger: InvokeAILogger to use (otherwise creates one)
-        """
-        # allow lazy offloading only when vram cache enabled
-        # TODO(ryand): Think about what lazy_offloading should mean in the new model cache.
-        self._lazy_offloading = lazy_offloading and max_vram_cache_size > 0
-        self._max_cache_size: float = max_cache_size
-        self._max_vram_cache_size: float = max_vram_cache_size
-        self._execution_device: torch.device = execution_device
-        self._storage_device: torch.device = storage_device
-        self._logger = PrefixedLoggerAdapter(
-            logger or InvokeAILogger.get_logger(self.__class__.__name__), "MODEL CACHE"
-        )
-        self._log_memory_usage = log_memory_usage
-        self._stats: Optional[CacheStats] = None
-
-        self._cached_models: Dict[str, CacheRecord] = {}
-        self._cache_stack: List[str] = []
-
-    @property
-    def max_cache_size(self) -> float:
-        """Return the cap on cache size."""
-        return self._max_cache_size
-
-    @max_cache_size.setter
-    def max_cache_size(self, value: float) -> None:
-        """Set the cap on cache size."""
-        self._max_cache_size = value
-
-    @property
-    def max_vram_cache_size(self) -> float:
-        """Return the cap on vram cache size."""
-        return self._max_vram_cache_size
-
-    @max_vram_cache_size.setter
-    def max_vram_cache_size(self, value: float) -> None:
-        """Set the cap on vram cache size."""
-        self._max_vram_cache_size = value
-
-    @property
-    def stats(self) -> Optional[CacheStats]:
-        """Return collected CacheStats object."""
-        return self._stats
-
-    @stats.setter
-    def stats(self, stats: CacheStats) -> None:
-        """Set the CacheStats object for collecting cache statistics."""
-        self._stats = stats
-
-    def put(self, key: str, model: AnyModel) -> None:
-        """Add a model to the cache."""
-        if key in self._cached_models:
-            self._logger.debug(
-                f"Attempted to add model {key} ({model.__class__.__name__}), but it already exists in the cache. No action necessary."
-            )
-            return
-
-        size = calc_model_size_by_data(self._logger, model)
-        self.make_room(size)
-
-        # Wrap model.
-        if isinstance(model, torch.nn.Module):
-            wrapped_model = CachedModelWithPartialLoad(model, self._execution_device)
-        else:
-            wrapped_model = CachedModelOnlyFullLoad(model, self._execution_device, size)
-
-        # running_on_cpu = self._execution_device == torch.device("cpu")
-        # state_dict = model.state_dict() if isinstance(model, torch.nn.Module) and not running_on_cpu else None
-        cache_record = CacheRecord(key=key, cached_model=wrapped_model)
-        self._cached_models[key] = cache_record
-        self._cache_stack.append(key)
-        self._logger.debug(
-            f"Added model {key} (Type: {model.__class__.__name__}, Wrap mode: {wrapped_model.__class__.__name__}, Model size: {size/MB:.2f}MB)"
-        )
-
-    def get(self, key: str, stats_name: Optional[str] = None) -> CacheRecord:
-        """Retrieve a model from the cache.
-
-        :param key: Model key
-        :param stats_name: A human-readable id for the model for the purposes of stats reporting.
-
-        Raises IndexError if the model is not in the cache.
-        """
-        if key in self._cached_models:
-            if self.stats:
-                self.stats.hits += 1
-        else:
-            if self.stats:
-                self.stats.misses += 1
-            self._logger.debug(f"Cache miss: {key}")
-            raise IndexError(f"The model with key {key} is not in the cache.")
-
-        cache_entry = self._cached_models[key]
-
-        # more stats
-        if self.stats:
-            stats_name = stats_name or key
-            self.stats.cache_size = int(self._max_cache_size * GB)
-            self.stats.high_watermark = max(self.stats.high_watermark, self._get_ram_in_use())
-            self.stats.in_cache = len(self._cached_models)
-            self.stats.loaded_model_sizes[stats_name] = max(
-                self.stats.loaded_model_sizes.get(stats_name, 0), cache_entry.cached_model.total_bytes()
-            )
-
-        # this moves the entry to the top (right end) of the stack
-        self._cache_stack = [k for k in self._cache_stack if k != key]
-        self._cache_stack.append(key)
-
-        self._logger.debug(f"Cache hit: {key} (Type: {cache_entry.cached_model.model.__class__.__name__})")
-
-        return cache_entry
-
-    def lock(self, key: str) -> None:
-        """Lock a model for use and move it into VRAM."""
-        cache_entry = self._cached_models[key]
-        cache_entry.lock()
-
-        self._logger.debug(f"Locking model {key} (Type: {cache_entry.cached_model.model.__class__.__name__})")
-
-        try:
-            self._load_locked_model(cache_entry)
-            self._logger.debug(
-                f"Finished locking model {key} (Type: {cache_entry.cached_model.model.__class__.__name__})"
-            )
-        except torch.cuda.OutOfMemoryError:
-            self._logger.warning("Insufficient GPU memory to load model. Aborting")
-            cache_entry.unlock()
-            raise
-        except Exception:
-            cache_entry.unlock()
-            raise
-
-        self._log_cache_state()
-
-    def unlock(self, key: str) -> None:
-        """Unlock a model."""
-        cache_entry = self._cached_models[key]
-        cache_entry.unlock()
-        self._logger.debug(f"Unlocked model {key} (Type: {cache_entry.cached_model.model.__class__.__name__})")
-
-    def _load_locked_model(self, cache_entry: CacheRecord) -> None:
-        """Helper function for self.lock(). Loads a locked model into VRAM."""
-        vram_available = self._get_vram_available()
-
-        # Calculate model_vram_needed, the amount of additional VRAM that will be used if we fully load the model into
-        # VRAM.
-        model_cur_vram_bytes = cache_entry.cached_model.cur_vram_bytes()
-        model_total_bytes = cache_entry.cached_model.total_bytes()
-        model_vram_needed = model_total_bytes - model_cur_vram_bytes
-
-        # The amount of VRAM that must be freed to make room for model_vram_needed.
-        vram_bytes_to_free = max(0, model_vram_needed - vram_available)
-
-        self._logger.debug(
-            f"Before unloading: {self._get_vram_state_str(model_cur_vram_bytes, model_total_bytes, vram_available)}"
-        )
-
-        # Make room for the model in VRAM.
-        # 1. If the model can fit entirely in VRAM, then make enough room for it to be loaded fully.
-        # 2. If the model can't fit fully into VRAM, then unload all other models and load as much of the model as
-        #    possible.
-        vram_bytes_freed = self._offload_unlocked_models(vram_bytes_to_free)
-        self._logger.debug(f"Unloaded models (if necessary): vram_bytes_freed={(vram_bytes_freed/MB):.2f}MB")
-
-        # Check the updated vram_available after offloading.
-        vram_available = self._get_vram_available()
-        self._logger.debug(
-            f"After unloading: {self._get_vram_state_str(model_cur_vram_bytes, model_total_bytes, vram_available)}"
-        )
-
-        # Move as much of the model as possible into VRAM.
-        model_bytes_loaded = 0
-        if isinstance(cache_entry.cached_model, CachedModelWithPartialLoad):
-            model_bytes_loaded = cache_entry.cached_model.partial_load_to_vram(vram_available)
-        elif isinstance(cache_entry.cached_model, CachedModelOnlyFullLoad):  # type: ignore
-            # Partial load is not supported, so we have not choice but to try and fit it all into VRAM.
-            model_bytes_loaded = cache_entry.cached_model.full_load_to_vram()
-        else:
-            raise ValueError(f"Unsupported cached model type: {type(cache_entry.cached_model)}")
-
-        model_cur_vram_bytes = cache_entry.cached_model.cur_vram_bytes()
-        vram_available = self._get_vram_available()
-        self._logger.debug(f"Loaded model onto execution device: model_bytes_loaded={(model_bytes_loaded/MB):.2f}MB, ")
-        self._logger.debug(
-            f"After loading: {self._get_vram_state_str(model_cur_vram_bytes, model_total_bytes, vram_available)}"
-        )
-
-    def _get_vram_available(self) -> int:
-        """Get the amount of VRAM available in the cache."""
-        return int(self._max_vram_cache_size * GB) - self._get_vram_in_use()
-
-    def _get_vram_in_use(self) -> int:
-        """Get the amount of VRAM currently in use."""
-        return sum(ce.cached_model.cur_vram_bytes() for ce in self._cached_models.values())
-
-    def _get_ram_available(self) -> int:
-        """Get the amount of RAM available in the cache."""
-        return int(self._max_cache_size * GB) - self._get_ram_in_use()
-
-    def _get_ram_in_use(self) -> int:
-        """Get the amount of RAM currently in use."""
-        return sum(ce.cached_model.total_bytes() for ce in self._cached_models.values())
-
-    def _capture_memory_snapshot(self) -> Optional[MemorySnapshot]:
-        if self._log_memory_usage:
-            return MemorySnapshot.capture()
-        return None
-
-    def _get_vram_state_str(self, model_cur_vram_bytes: int, model_total_bytes: int, vram_available: int) -> str:
-        """Helper function for preparing a VRAM state log string."""
-        model_cur_vram_bytes_percent = model_cur_vram_bytes / model_total_bytes if model_total_bytes > 0 else 0
-        return (
-            f"model_total={model_total_bytes/MB:.0f} MB, "
-            + f"model_vram={model_cur_vram_bytes/MB:.0f} MB ({model_cur_vram_bytes_percent:.1%} %), "
-            + f"vram_total={int(self._max_vram_cache_size * GB)/MB:.0f} MB, "
-            + f"vram_available={(vram_available/MB):.0f} MB, "
-        )
-
-    def _offload_unlocked_models(self, vram_bytes_to_free: int) -> int:
-        """Offload models from the execution_device until vram_bytes_to_free bytes are freed, or all models are
-        offloaded. Of course, locked models are not offloaded.
-
-        Returns:
-            int: The number of bytes freed.
-        """
-        self._logger.debug(f"Offloading unlocked models with goal of freeing {vram_bytes_to_free/MB:.2f}MB of VRAM.")
-        vram_bytes_freed = 0
-        # TODO(ryand): Give more thought to the offloading policy used here.
-        cache_entries_increasing_size = sorted(self._cached_models.values(), key=lambda x: x.cached_model.total_bytes())
-        for cache_entry in cache_entries_increasing_size:
-            if vram_bytes_freed >= vram_bytes_to_free:
-                break
-            if cache_entry.is_locked:
-                continue
-
-            if isinstance(cache_entry.cached_model, CachedModelWithPartialLoad):
-                cache_entry_bytes_freed = cache_entry.cached_model.partial_unload_from_vram(
-                    vram_bytes_to_free - vram_bytes_freed
-                )
-            elif isinstance(cache_entry.cached_model, CachedModelOnlyFullLoad):  # type: ignore
-                cache_entry_bytes_freed = cache_entry.cached_model.full_unload_from_vram()
-            else:
-                raise ValueError(f"Unsupported cached model type: {type(cache_entry.cached_model)}")
-            if cache_entry_bytes_freed > 0:
-                self._logger.debug(
-                    f"Unloaded {cache_entry.key} from VRAM to free {(cache_entry_bytes_freed/MB):.0f} MB."
-                )
-            vram_bytes_freed += cache_entry_bytes_freed
-
-        TorchDevice.empty_cache()
-        return vram_bytes_freed
-
-    # def _move_model_to_device(self, cache_entry: CacheRecord, target_device: torch.device) -> None:
-    #     """Move model into the indicated device.
-
-    #     :param cache_entry: The CacheRecord for the model
-    #     :param target_device: The torch.device to move the model into
-
-    #     May raise a torch.cuda.OutOfMemoryError
-    #     """
-    #     self._logger.debug(f"Called to move {cache_entry.key} to {target_device}")
-    #     source_device = cache_entry.device
-
-    #     # Note: We compare device types only so that 'cuda' == 'cuda:0'.
-    #     # This would need to be revised to support multi-GPU.
-    #     if torch.device(source_device).type == torch.device(target_device).type:
-    #         return
-
-    #     # Some models don't have a `to` method, in which case they run in RAM/CPU.
-    #     if not hasattr(cache_entry.model, "to"):
-    #         return
-
-    #     # This roundabout method for moving the model around is done to avoid
-    #     # the cost of moving the model from RAM to VRAM and then back from VRAM to RAM.
-    #     # When moving to VRAM, we copy (not move) each element of the state dict from
-    #     # RAM to a new state dict in VRAM, and then inject it into the model.
-    #     # This operation is slightly faster than running `to()` on the whole model.
-    #     #
-    #     # When the model needs to be removed from VRAM we simply delete the copy
-    #     # of the state dict in VRAM, and reinject the state dict that is cached
-    #     # in RAM into the model. So this operation is very fast.
-    #     start_model_to_time = time.time()
-    #     snapshot_before = self._capture_memory_snapshot()
-
-    #     try:
-    #         if cache_entry.state_dict is not None:
-    #             assert hasattr(cache_entry.model, "load_state_dict")
-    #             if target_device == self._storage_device:
-    #                 cache_entry.model.load_state_dict(cache_entry.state_dict, assign=True)
-    #             else:
-    #                 new_dict: Dict[str, torch.Tensor] = {}
-    #                 for k, v in cache_entry.state_dict.items():
-    #                     new_dict[k] = v.to(target_device, copy=True)
-    #                 cache_entry.model.load_state_dict(new_dict, assign=True)
-    #         cache_entry.model.to(target_device)
-    #         cache_entry.device = target_device
-    #     except Exception as e:  # blow away cache entry
-    #         self._delete_cache_entry(cache_entry)
-    #         raise e
-
-    #     snapshot_after = self._capture_memory_snapshot()
-    #     end_model_to_time = time.time()
-    #     self._logger.debug(
-    #         f"Moved model '{cache_entry.key}' from {source_device} to"
-    #         f" {target_device} in {(end_model_to_time-start_model_to_time):.2f}s."
-    #         f"Estimated model size: {(cache_entry.size/GB):.3f} GB."
-    #         f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
-    #     )
-
-    #     if (
-    #         snapshot_before is not None
-    #         and snapshot_after is not None
-    #         and snapshot_before.vram is not None
-    #         and snapshot_after.vram is not None
-    #     ):
-    #         vram_change = abs(snapshot_before.vram - snapshot_after.vram)
-
-    #         # If the estimated model size does not match the change in VRAM, log a warning.
-    #         if not math.isclose(
-    #             vram_change,
-    #             cache_entry.size,
-    #             rel_tol=0.1,
-    #             abs_tol=10 * MB,
-    #         ):
-    #             self._logger.debug(
-    #                 f"Moving model '{cache_entry.key}' from {source_device} to"
-    #                 f" {target_device} caused an unexpected change in VRAM usage. The model's"
-    #                 " estimated size may be incorrect. Estimated model size:"
-    #                 f" {(cache_entry.size/GB):.3f} GB.\n"
-    #                 f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
-    #             )
-
-    def _log_cache_state(self, title: str = "Model cache state:", include_entry_details: bool = True):
-        ram_size_bytes = self._max_cache_size * GB
-        ram_in_use_bytes = self._get_ram_in_use()
-        ram_in_use_bytes_percent = ram_in_use_bytes / ram_size_bytes if ram_size_bytes > 0 else 0
-        ram_available_bytes = self._get_ram_available()
-        ram_available_bytes_percent = ram_available_bytes / ram_size_bytes if ram_size_bytes > 0 else 0
-
-        vram_size_bytes = self._max_vram_cache_size * GB
-        vram_in_use_bytes = self._get_vram_in_use()
-        vram_in_use_bytes_percent = vram_in_use_bytes / vram_size_bytes if vram_size_bytes > 0 else 0
-        vram_available_bytes = self._get_vram_available()
-        vram_available_bytes_percent = vram_available_bytes / vram_size_bytes if vram_size_bytes > 0 else 0
-
-        log = f"{title}\n"
-
-        log_format = "  {:<30} Limit: {:>7.1f} MB, Used: {:>7.1f} MB ({:>5.1%}), Available: {:>7.1f} MB ({:>5.1%})\n"
-        log += log_format.format(
-            f"Storage Device ({self._storage_device.type})",
-            ram_size_bytes / MB,
-            ram_in_use_bytes / MB,
-            ram_in_use_bytes_percent,
-            ram_available_bytes / MB,
-            ram_available_bytes_percent,
-        )
-        log += log_format.format(
-            f"Compute Device ({self._execution_device.type})",
-            vram_size_bytes / MB,
-            vram_in_use_bytes / MB,
-            vram_in_use_bytes_percent,
-            vram_available_bytes / MB,
-            vram_available_bytes_percent,
-        )
-
-        if torch.cuda.is_available():
-            log += "  {:<30} {} MB\n".format("CUDA Memory Allocated:", torch.cuda.memory_allocated() / MB)
-        log += "  {:<30} {}\n".format("Total models:", len(self._cached_models))
-
-        if include_entry_details and len(self._cached_models) > 0:
-            log += "  Models:\n"
-            log_format = (
-                "    {:<80} total={:>7.1f} MB, vram={:>7.1f} MB ({:>5.1%}), ram={:>7.1f} MB ({:>5.1%}), locked={}\n"
-            )
-            for cache_record in self._cached_models.values():
-                total_bytes = cache_record.cached_model.total_bytes()
-                cur_vram_bytes = cache_record.cached_model.cur_vram_bytes()
-                cur_vram_bytes_percent = cur_vram_bytes / total_bytes if total_bytes > 0 else 0
-                cur_ram_bytes = total_bytes - cur_vram_bytes
-                cur_ram_bytes_percent = cur_ram_bytes / total_bytes if total_bytes > 0 else 0
-
-                log += log_format.format(
-                    f"{cache_record.key} ({cache_record.cached_model.model.__class__.__name__}):",
-                    total_bytes / MB,
-                    cur_vram_bytes / MB,
-                    cur_vram_bytes_percent,
-                    cur_ram_bytes / MB,
-                    cur_ram_bytes_percent,
-                    cache_record.is_locked,
-                )
-
-        self._logger.debug(log)
-
-    def make_room(self, bytes_needed: int) -> None:
-        """Make enough room in the cache to accommodate a new model of indicated size.
-
-        Note: This function deletes all of the cache's internal references to a model in order to free it. If there are
-        external references to the model, there's nothing that the cache can do about it, and those models will not be
-        garbage-collected.
-        """
-        self._logger.debug(f"Making room for {bytes_needed/MB:.2f}MB of RAM.")
-        self._log_cache_state(title="Before dropping models:")
-
-        ram_bytes_available = self._get_ram_available()
-        ram_bytes_to_free = max(0, bytes_needed - ram_bytes_available)
-
-        ram_bytes_freed = 0
-        pos = 0
-        models_cleared = 0
-        while ram_bytes_freed < ram_bytes_to_free and pos < len(self._cache_stack):
-            model_key = self._cache_stack[pos]
-            cache_entry = self._cached_models[model_key]
-
-            if not cache_entry.is_locked:
-                ram_bytes_freed += cache_entry.cached_model.total_bytes()
-                self._logger.debug(
-                    f"Dropping {model_key} from RAM cache to free {(cache_entry.cached_model.total_bytes()/MB):.2f}MB."
-                )
-                self._delete_cache_entry(cache_entry)
-                del cache_entry
-                models_cleared += 1
-            else:
-                pos += 1
-
-        if models_cleared > 0:
-            # There would likely be some 'garbage' to be collected regardless of whether a model was cleared or not, but
-            # there is a significant time cost to calling `gc.collect()`, so we want to use it sparingly. (The time cost
-            # is high even if no garbage gets collected.)
-            #
-            # Calling gc.collect(...) when a model is cleared seems like a good middle-ground:
-            # - If models had to be cleared, it's a signal that we are close to our memory limit.
-            # - If models were cleared, there's a good chance that there's a significant amount of garbage to be
-            #   collected.
-            #
-            # Keep in mind that gc is only responsible for handling reference cycles. Most objects should be cleaned up
-            # immediately when their reference count hits 0.
-            if self.stats:
-                self.stats.cleared = models_cleared
-            gc.collect()
-
-        TorchDevice.empty_cache()
-        self._logger.debug(f"Dropped {models_cleared} models to free {ram_bytes_freed/MB:.2f}MB of RAM.")
-        self._log_cache_state(title="After dropping models:")
-
-    def _delete_cache_entry(self, cache_entry: CacheRecord) -> None:
-        self._cache_stack.remove(cache_entry.key)
-        del self._cached_models[cache_entry.key]
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_base.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_base.py
@@ -0,0 +1,221 @@
+# Copyright (c) 2024 Lincoln D. Stein and the InvokeAI Development team
+# TODO: Add Stalker's proper name to copyright
+"""
+Manage a RAM cache of diffusion/transformer models for fast switching.
+They are moved between GPU VRAM and CPU RAM as necessary. If the cache
+grows larger than a preset maximum, then the least recently used
+model will be cleared and (re)loaded from disk when next needed.
+"""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from logging import Logger
+from typing import Dict, Generic, Optional, TypeVar
+
+import torch
+
+from invokeai.backend.model_manager.config import AnyModel, SubModelType
+
+
+class ModelLockerBase(ABC):
+    """Base class for the model locker used by the loader."""
+
+    @abstractmethod
+    def lock(self) -> AnyModel:
+        """Lock the contained model and move it into VRAM."""
+        pass
+
+    @abstractmethod
+    def unlock(self) -> None:
+        """Unlock the contained model, and remove it from VRAM."""
+        pass
+
+    @abstractmethod
+    def get_state_dict(self) -> Optional[Dict[str, torch.Tensor]]:
+        """Return the state dict (if any) for the cached model."""
+        pass
+
+    @property
+    @abstractmethod
+    def model(self) -> AnyModel:
+        """Return the model."""
+        pass
+
+
+T = TypeVar("T")
+
+
+@dataclass
+class CacheRecord(Generic[T]):
+    """
+    Elements of the cache:
+
+    key: Unique key for each model, same as used in the models database.
+    model: Model in memory.
+    state_dict: A read-only copy of the model's state dict in RAM. It will be
+                used as a template for creating a copy in the VRAM.
+    size: Size of the model
+    loaded: True if the model's state dict is currently in VRAM
+
+    Before a model is executed, the state_dict template is copied into VRAM,
+    and then injected into the model. When the model is finished, the VRAM
+    copy of the state dict is deleted, and the RAM version is reinjected
+    into the model.
+
+    The state_dict should be treated as a read-only attribute. Do not attempt
+    to patch or otherwise modify it. Instead, patch the copy of the state_dict
+    after it is loaded into the execution device (e.g. CUDA) using the `LoadedModel`
+    context manager call `model_on_device()`.
+    """
+
+    key: str
+    model: T
+    device: torch.device
+    state_dict: Optional[Dict[str, torch.Tensor]]
+    size: int
+    loaded: bool = False
+    _locks: int = 0
+
+    def lock(self) -> None:
+        """Lock this record."""
+        self._locks += 1
+
+    def unlock(self) -> None:
+        """Unlock this record."""
+        self._locks -= 1
+        assert self._locks >= 0
+
+    @property
+    def locked(self) -> bool:
+        """Return true if record is locked."""
+        return self._locks > 0
+
+
+@dataclass
+class CacheStats(object):
+    """Collect statistics on cache performance."""
+
+    hits: int = 0  # cache hits
+    misses: int = 0  # cache misses
+    high_watermark: int = 0  # amount of cache used
+    in_cache: int = 0  # number of models in cache
+    cleared: int = 0  # number of models cleared to make space
+    cache_size: int = 0  # total size of cache
+    loaded_model_sizes: Dict[str, int] = field(default_factory=dict)
+
+
+class ModelCacheBase(ABC, Generic[T]):
+    """Virtual base class for RAM model cache."""
+
+    @property
+    @abstractmethod
+    def storage_device(self) -> torch.device:
+        """Return the storage device (e.g. "CPU" for RAM)."""
+        pass
+
+    @property
+    @abstractmethod
+    def execution_device(self) -> torch.device:
+        """Return the exection device (e.g. "cuda" for VRAM)."""
+        pass
+
+    @property
+    @abstractmethod
+    def lazy_offloading(self) -> bool:
+        """Return true if the cache is configured to lazily offload models in VRAM."""
+        pass
+
+    @property
+    @abstractmethod
+    def max_cache_size(self) -> float:
+        """Return the maximum size the RAM cache can grow to."""
+        pass
+
+    @max_cache_size.setter
+    @abstractmethod
+    def max_cache_size(self, value: float) -> None:
+        """Set the cap on vram cache size."""
+
+    @property
+    @abstractmethod
+    def max_vram_cache_size(self) -> float:
+        """Return the maximum size the VRAM cache can grow to."""
+        pass
+
+    @max_vram_cache_size.setter
+    @abstractmethod
+    def max_vram_cache_size(self, value: float) -> float:
+        """Set the maximum size the VRAM cache can grow to."""
+        pass
+
+    @abstractmethod
+    def offload_unlocked_models(self, size_required: int) -> None:
+        """Offload from VRAM any models not actively in use."""
+        pass
+
+    @abstractmethod
+    def move_model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device: torch.device) -> None:
+        """Move model into the indicated device."""
+        pass
+
+    @property
+    @abstractmethod
+    def stats(self) -> Optional[CacheStats]:
+        """Return collected CacheStats object."""
+        pass
+
+    @stats.setter
+    @abstractmethod
+    def stats(self, stats: CacheStats) -> None:
+        """Set the CacheStats object for collectin cache statistics."""
+        pass
+
+    @property
+    @abstractmethod
+    def logger(self) -> Logger:
+        """Return the logger used by the cache."""
+        pass
+
+    @abstractmethod
+    def make_room(self, size: int) -> None:
+        """Make enough room in the cache to accommodate a new model of indicated size."""
+        pass
+
+    @abstractmethod
+    def put(
+        self,
+        key: str,
+        model: T,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> None:
+        """Store model under key and optional submodel_type."""
+        pass
+
+    @abstractmethod
+    def get(
+        self,
+        key: str,
+        submodel_type: Optional[SubModelType] = None,
+        stats_name: Optional[str] = None,
+    ) -> ModelLockerBase:
+        """
+        Retrieve model using key and optional submodel_type.
+
+        :param key: Opaque model key
+        :param submodel_type: Type of the submodel to fetch
+        :param stats_name: A human-readable id for the model for the purposes of
+        stats reporting.
+
+        This may raise an IndexError if the model is not in the cache.
+        """
+        pass
+
+    @abstractmethod
+    def cache_size(self) -> int:
+        """Get the total size of the models currently cached."""
+        pass
+
+    @abstractmethod
+    def print_cuda_stats(self) -> None:
+        """Log debugging information on CUDA usage."""
+        pass
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
@@ -0,0 +1,426 @@
+# Copyright (c) 2024 Lincoln D. Stein and the InvokeAI Development team
+# TODO: Add Stalker's proper name to copyright
+""" """
+
+import gc
+import math
+import time
+from contextlib import suppress
+from logging import Logger
+from typing import Dict, List, Optional
+
+import torch
+
+from invokeai.backend.model_manager import AnyModel, SubModelType
+from invokeai.backend.model_manager.load.memory_snapshot import MemorySnapshot, get_pretty_snapshot_diff
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import (
+    CacheRecord,
+    CacheStats,
+    ModelCacheBase,
+    ModelLockerBase,
+)
+from invokeai.backend.model_manager.load.model_cache.model_locker import ModelLocker
+from invokeai.backend.model_manager.load.model_util import calc_model_size_by_data
+from invokeai.backend.util.devices import TorchDevice
+from invokeai.backend.util.logging import InvokeAILogger
+
+# Size of a GB in bytes.
+GB = 2**30
+
+# Size of a MB in bytes.
+MB = 2**20
+
+
+class ModelCache(ModelCacheBase[AnyModel]):
+    """A cache for managing models in memory.
+
+    The cache is based on two levels of model storage:
+    - execution_device: The device where most models are executed (typically "cuda", "mps", or "cpu").
+    - storage_device: The device where models are offloaded when not in active use (typically "cpu").
+
+    The model cache is based on the following assumptions:
+    - storage_device_mem_size > execution_device_mem_size
+    - disk_to_storage_device_transfer_time >> storage_device_to_execution_device_transfer_time
+
+    A copy of all models in the cache is always kept on the storage_device. A subset of the models also have a copy on
+    the execution_device.
+
+    Models are moved between the storage_device and the execution_device as necessary. Cache size limits are enforced
+    on both the storage_device and the execution_device. The execution_device cache uses a smallest-first offload
+    policy. The storage_device cache uses a least-recently-used (LRU) offload policy.
+
+    Note: Neither of these offload policies has really been compared against alternatives. It's likely that different
+    policies would be better, although the optimal policies are likely heavily dependent on usage patterns and HW
+    configuration.
+
+    The cache returns context manager generators designed to load the model into the execution device (often GPU) within
+    the context, and unload outside the context.
+
+    Example usage:
+    ```
+    cache = ModelCache(max_cache_size=7.5, max_vram_cache_size=6.0)
+    with cache.get_model('runwayml/stable-diffusion-1-5') as SD1:
+        do_something_on_gpu(SD1)
+    ```
+    """
+
+    def __init__(
+        self,
+        max_cache_size: float,
+        max_vram_cache_size: float,
+        execution_device: torch.device = torch.device("cuda"),
+        storage_device: torch.device = torch.device("cpu"),
+        precision: torch.dtype = torch.float16,
+        lazy_offloading: bool = True,
+        log_memory_usage: bool = False,
+        logger: Optional[Logger] = None,
+    ):
+        """
+        Initialize the model RAM cache.
+
+        :param max_cache_size: Maximum size of the storage_device cache in GBs.
+        :param max_vram_cache_size: Maximum size of the execution_device cache in GBs.
+        :param execution_device: Torch device to load active model into [torch.device('cuda')]
+        :param storage_device: Torch device to save inactive model in [torch.device('cpu')]
+        :param precision: Precision for loaded models [torch.float16]
+        :param lazy_offloading: Keep model in VRAM until another model needs to be loaded
+        :param log_memory_usage: If True, a memory snapshot will be captured before and after every model cache
+            operation, and the result will be logged (at debug level). There is a time cost to capturing the memory
+            snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's
+            behaviour.
+        :param logger: InvokeAILogger to use (otherwise creates one)
+        """
+        # allow lazy offloading only when vram cache enabled
+        self._lazy_offloading = lazy_offloading and max_vram_cache_size > 0
+        self._max_cache_size: float = max_cache_size
+        self._max_vram_cache_size: float = max_vram_cache_size
+        self._execution_device: torch.device = execution_device
+        self._storage_device: torch.device = storage_device
+        self._logger = logger or InvokeAILogger.get_logger(self.__class__.__name__)
+        self._log_memory_usage = log_memory_usage
+        self._stats: Optional[CacheStats] = None
+
+        self._cached_models: Dict[str, CacheRecord[AnyModel]] = {}
+        self._cache_stack: List[str] = []
+
+    @property
+    def logger(self) -> Logger:
+        """Return the logger used by the cache."""
+        return self._logger
+
+    @property
+    def lazy_offloading(self) -> bool:
+        """Return true if the cache is configured to lazily offload models in VRAM."""
+        return self._lazy_offloading
+
+    @property
+    def storage_device(self) -> torch.device:
+        """Return the storage device (e.g. "CPU" for RAM)."""
+        return self._storage_device
+
+    @property
+    def execution_device(self) -> torch.device:
+        """Return the exection device (e.g. "cuda" for VRAM)."""
+        return self._execution_device
+
+    @property
+    def max_cache_size(self) -> float:
+        """Return the cap on cache size."""
+        return self._max_cache_size
+
+    @max_cache_size.setter
+    def max_cache_size(self, value: float) -> None:
+        """Set the cap on cache size."""
+        self._max_cache_size = value
+
+    @property
+    def max_vram_cache_size(self) -> float:
+        """Return the cap on vram cache size."""
+        return self._max_vram_cache_size
+
+    @max_vram_cache_size.setter
+    def max_vram_cache_size(self, value: float) -> None:
+        """Set the cap on vram cache size."""
+        self._max_vram_cache_size = value
+
+    @property
+    def stats(self) -> Optional[CacheStats]:
+        """Return collected CacheStats object."""
+        return self._stats
+
+    @stats.setter
+    def stats(self, stats: CacheStats) -> None:
+        """Set the CacheStats object for collectin cache statistics."""
+        self._stats = stats
+
+    def cache_size(self) -> int:
+        """Get the total size of the models currently cached."""
+        total = 0
+        for cache_record in self._cached_models.values():
+            total += cache_record.size
+        return total
+
+    def put(
+        self,
+        key: str,
+        model: AnyModel,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> None:
+        """Store model under key and optional submodel_type."""
+        key = self._make_cache_key(key, submodel_type)
+        if key in self._cached_models:
+            return
+        size = calc_model_size_by_data(self.logger, model)
+        self.make_room(size)
+
+        running_on_cpu = self.execution_device == torch.device("cpu")
+        state_dict = model.state_dict() if isinstance(model, torch.nn.Module) and not running_on_cpu else None
+        cache_record = CacheRecord(key=key, model=model, device=self.storage_device, state_dict=state_dict, size=size)
+        self._cached_models[key] = cache_record
+        self._cache_stack.append(key)
+
+    def get(
+        self,
+        key: str,
+        submodel_type: Optional[SubModelType] = None,
+        stats_name: Optional[str] = None,
+    ) -> ModelLockerBase:
+        """
+        Retrieve model using key and optional submodel_type.
+
+        :param key: Opaque model key
+        :param submodel_type: Type of the submodel to fetch
+        :param stats_name: A human-readable id for the model for the purposes of
+        stats reporting.
+
+        This may raise an IndexError if the model is not in the cache.
+        """
+        key = self._make_cache_key(key, submodel_type)
+        if key in self._cached_models:
+            if self.stats:
+                self.stats.hits += 1
+        else:
+            if self.stats:
+                self.stats.misses += 1
+            raise IndexError(f"The model with key {key} is not in the cache.")
+
+        cache_entry = self._cached_models[key]
+
+        # more stats
+        if self.stats:
+            stats_name = stats_name or key
+            self.stats.cache_size = int(self._max_cache_size * GB)
+            self.stats.high_watermark = max(self.stats.high_watermark, self.cache_size())
+            self.stats.in_cache = len(self._cached_models)
+            self.stats.loaded_model_sizes[stats_name] = max(
+                self.stats.loaded_model_sizes.get(stats_name, 0), cache_entry.size
+            )
+
+        # this moves the entry to the top (right end) of the stack
+        with suppress(Exception):
+            self._cache_stack.remove(key)
+        self._cache_stack.append(key)
+        return ModelLocker(
+            cache=self,
+            cache_entry=cache_entry,
+        )
+
+    def _capture_memory_snapshot(self) -> Optional[MemorySnapshot]:
+        if self._log_memory_usage:
+            return MemorySnapshot.capture()
+        return None
+
+    def _make_cache_key(self, model_key: str, submodel_type: Optional[SubModelType] = None) -> str:
+        if submodel_type:
+            return f"{model_key}:{submodel_type.value}"
+        else:
+            return model_key
+
+    def offload_unlocked_models(self, size_required: int) -> None:
+        """Offload models from the execution_device to make room for size_required.
+
+        :param size_required: The amount of space to clear in the execution_device cache, in bytes.
+        """
+        reserved = self._max_vram_cache_size * GB
+        vram_in_use = torch.cuda.memory_allocated() + size_required
+        self.logger.debug(f"{(vram_in_use/GB):.2f}GB VRAM needed for models; max allowed={(reserved/GB):.2f}GB")
+        for _, cache_entry in sorted(self._cached_models.items(), key=lambda x: x[1].size):
+            if vram_in_use <= reserved:
+                break
+            if not cache_entry.loaded:
+                continue
+            if not cache_entry.locked:
+                self.move_model_to_device(cache_entry, self.storage_device)
+                cache_entry.loaded = False
+                vram_in_use = torch.cuda.memory_allocated() + size_required
+                self.logger.debug(
+                    f"Removing {cache_entry.key} from VRAM to free {(cache_entry.size/GB):.2f}GB; vram free = {(torch.cuda.memory_allocated()/GB):.2f}GB"
+                )
+
+        TorchDevice.empty_cache()
+
+    def move_model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device: torch.device) -> None:
+        """Move model into the indicated device.
+
+        :param cache_entry: The CacheRecord for the model
+        :param target_device: The torch.device to move the model into
+
+        May raise a torch.cuda.OutOfMemoryError
+        """
+        self.logger.debug(f"Called to move {cache_entry.key} to {target_device}")
+        source_device = cache_entry.device
+
+        # Note: We compare device types only so that 'cuda' == 'cuda:0'.
+        # This would need to be revised to support multi-GPU.
+        if torch.device(source_device).type == torch.device(target_device).type:
+            return
+
+        # Some models don't have a `to` method, in which case they run in RAM/CPU.
+        if not hasattr(cache_entry.model, "to"):
+            return
+
+        # This roundabout method for moving the model around is done to avoid
+        # the cost of moving the model from RAM to VRAM and then back from VRAM to RAM.
+        # When moving to VRAM, we copy (not move) each element of the state dict from
+        # RAM to a new state dict in VRAM, and then inject it into the model.
+        # This operation is slightly faster than running `to()` on the whole model.
+        #
+        # When the model needs to be removed from VRAM we simply delete the copy
+        # of the state dict in VRAM, and reinject the state dict that is cached
+        # in RAM into the model. So this operation is very fast.
+        start_model_to_time = time.time()
+        snapshot_before = self._capture_memory_snapshot()
+
+        try:
+            if cache_entry.state_dict is not None:
+                assert hasattr(cache_entry.model, "load_state_dict")
+                if target_device == self.storage_device:
+                    cache_entry.model.load_state_dict(cache_entry.state_dict, assign=True)
+                else:
+                    new_dict: Dict[str, torch.Tensor] = {}
+                    for k, v in cache_entry.state_dict.items():
+                        new_dict[k] = v.to(target_device, copy=True)
+                    cache_entry.model.load_state_dict(new_dict, assign=True)
+            cache_entry.model.to(target_device)
+            cache_entry.device = target_device
+        except Exception as e:  # blow away cache entry
+            self._delete_cache_entry(cache_entry)
+            raise e
+
+        snapshot_after = self._capture_memory_snapshot()
+        end_model_to_time = time.time()
+        self.logger.debug(
+            f"Moved model '{cache_entry.key}' from {source_device} to"
+            f" {target_device} in {(end_model_to_time-start_model_to_time):.2f}s."
+            f"Estimated model size: {(cache_entry.size/GB):.3f} GB."
+            f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
+        )
+
+        if (
+            snapshot_before is not None
+            and snapshot_after is not None
+            and snapshot_before.vram is not None
+            and snapshot_after.vram is not None
+        ):
+            vram_change = abs(snapshot_before.vram - snapshot_after.vram)
+
+            # If the estimated model size does not match the change in VRAM, log a warning.
+            if not math.isclose(
+                vram_change,
+                cache_entry.size,
+                rel_tol=0.1,
+                abs_tol=10 * MB,
+            ):
+                self.logger.debug(
+                    f"Moving model '{cache_entry.key}' from {source_device} to"
+                    f" {target_device} caused an unexpected change in VRAM usage. The model's"
+                    " estimated size may be incorrect. Estimated model size:"
+                    f" {(cache_entry.size/GB):.3f} GB.\n"
+                    f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
+                )
+
+    def print_cuda_stats(self) -> None:
+        """Log CUDA diagnostics."""
+        vram = "%4.2fG" % (torch.cuda.memory_allocated() / GB)
+        ram = "%4.2fG" % (self.cache_size() / GB)
+
+        in_ram_models = 0
+        in_vram_models = 0
+        locked_in_vram_models = 0
+        for cache_record in self._cached_models.values():
+            if hasattr(cache_record.model, "device"):
+                if cache_record.model.device == self.storage_device:
+                    in_ram_models += 1
+                else:
+                    in_vram_models += 1
+                if cache_record.locked:
+                    locked_in_vram_models += 1
+
+                self.logger.debug(
+                    f"Current VRAM/RAM usage: {vram}/{ram}; models_in_ram/models_in_vram(locked) ="
+                    f" {in_ram_models}/{in_vram_models}({locked_in_vram_models})"
+                )
+
+    def make_room(self, size: int) -> None:
+        """Make enough room in the cache to accommodate a new model of indicated size.
+
+        Note: This function deletes all of the cache's internal references to a model in order to free it. If there are
+        external references to the model, there's nothing that the cache can do about it, and those models will not be
+        garbage-collected.
+        """
+        bytes_needed = size
+        maximum_size = self.max_cache_size * GB  # stored in GB, convert to bytes
+        current_size = self.cache_size()
+
+        if current_size + bytes_needed > maximum_size:
+            self.logger.debug(
+                f"Max cache size exceeded: {(current_size/GB):.2f}/{self.max_cache_size:.2f} GB, need an additional"
+                f" {(bytes_needed/GB):.2f} GB"
+            )
+
+        self.logger.debug(f"Before making_room: cached_models={len(self._cached_models)}")
+
+        pos = 0
+        models_cleared = 0
+        while current_size + bytes_needed > maximum_size and pos < len(self._cache_stack):
+            model_key = self._cache_stack[pos]
+            cache_entry = self._cached_models[model_key]
+            device = cache_entry.model.device if hasattr(cache_entry.model, "device") else None
+            self.logger.debug(
+                f"Model: {model_key}, locks: {cache_entry._locks}, device: {device}, loaded: {cache_entry.loaded}"
+            )
+
+            if not cache_entry.locked:
+                self.logger.debug(
+                    f"Removing {model_key} from RAM cache to free at least {(size/GB):.2f} GB (-{(cache_entry.size/GB):.2f} GB)"
+                )
+                current_size -= cache_entry.size
+                models_cleared += 1
+                self._delete_cache_entry(cache_entry)
+                del cache_entry
+
+            else:
+                pos += 1
+
+        if models_cleared > 0:
+            # There would likely be some 'garbage' to be collected regardless of whether a model was cleared or not, but
+            # there is a significant time cost to calling `gc.collect()`, so we want to use it sparingly. (The time cost
+            # is high even if no garbage gets collected.)
+            #
+            # Calling gc.collect(...) when a model is cleared seems like a good middle-ground:
+            # - If models had to be cleared, it's a signal that we are close to our memory limit.
+            # - If models were cleared, there's a good chance that there's a significant amount of garbage to be
+            #   collected.
+            #
+            # Keep in mind that gc is only responsible for handling reference cycles. Most objects should be cleaned up
+            # immediately when their reference count hits 0.
+            if self.stats:
+                self.stats.cleared = models_cleared
+            gc.collect()
+
+        TorchDevice.empty_cache()
+        self.logger.debug(f"After making room: cached_models={len(self._cached_models)}")
+
+    def _delete_cache_entry(self, cache_entry: CacheRecord[AnyModel]) -> None:
+        self._cache_stack.remove(cache_entry.key)
+        del self._cached_models[cache_entry.key]
--- a/invokeai/backend/model_manager/load/model_cache/model_locker.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_locker.py
@@ -0,0 +1,64 @@
+"""
+Base class and implementation of a class that moves models in and out of VRAM.
+"""
+
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.model_manager import AnyModel
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import (
+    CacheRecord,
+    ModelCacheBase,
+    ModelLockerBase,
+)
+
+
+class ModelLocker(ModelLockerBase):
+    """Internal class that mediates movement in and out of GPU."""
+
+    def __init__(self, cache: ModelCacheBase[AnyModel], cache_entry: CacheRecord[AnyModel]):
+        """
+        Initialize the model locker.
+
+        :param cache: The ModelCache object
+        :param cache_entry: The entry in the model cache
+        """
+        self._cache = cache
+        self._cache_entry = cache_entry
+
+    @property
+    def model(self) -> AnyModel:
+        """Return the model without moving it around."""
+        return self._cache_entry.model
+
+    def get_state_dict(self) -> Optional[Dict[str, torch.Tensor]]:
+        """Return the state dict (if any) for the cached model."""
+        return self._cache_entry.state_dict
+
+    def lock(self) -> AnyModel:
+        """Move the model into the execution device (GPU) and lock it."""
+        self._cache_entry.lock()
+        try:
+            if self._cache.lazy_offloading:
+                self._cache.offload_unlocked_models(self._cache_entry.size)
+            self._cache.move_model_to_device(self._cache_entry, self._cache.execution_device)
+            self._cache_entry.loaded = True
+            self._cache.logger.debug(f"Locking {self._cache_entry.key} in {self._cache.execution_device}")
+            self._cache.print_cuda_stats()
+        except torch.cuda.OutOfMemoryError:
+            self._cache.logger.warning("Insufficient GPU memory to load model. Aborting")
+            self._cache_entry.unlock()
+            raise
+        except Exception:
+            self._cache_entry.unlock()
+            raise
+
+        return self.model
+
+    def unlock(self) -> None:
+        """Call upon exit from context."""
+        self._cache_entry.unlock()
+        if not self._cache.lazy_offloading:
+            self._cache.offload_unlocked_models(0)
+            self._cache.print_cuda_stats()
--- a/invokeai/backend/model_manager/load/model_cache/torch_function_autocast_context.py
+++ b/invokeai/backend/model_manager/load/model_cache/torch_function_autocast_context.py
@@ -1,33 +0,0 @@
-from typing import Any, Callable
-
-import torch
-from torch.overrides import TorchFunctionMode
-
-
-def add_autocast_to_module_forward(m: torch.nn.Module, to_device: torch.device):
-    """Monkey-patch m.forward(...) with a new forward(...) method that activates device autocasting for its duration."""
-    old_forward = m.forward
-
-    def new_forward(*args: Any, **kwargs: Any):
-        with TorchFunctionAutocastDeviceContext(to_device):
-            return old_forward(*args, **kwargs)
-
-    m.forward = new_forward
-
-
-def _cast_to_device_and_run(
-    func: Callable[..., Any], args: tuple[Any, ...], kwargs: dict[str, Any], to_device: torch.device
-):
-    args_on_device = [a.to(to_device) if isinstance(a, torch.Tensor) else a for a in args]
-    kwargs_on_device = {k: v.to(to_device) if isinstance(v, torch.Tensor) else v for k, v in kwargs.items()}
-    return func(*args_on_device, **kwargs_on_device)
-
-
-class TorchFunctionAutocastDeviceContext(TorchFunctionMode):
-    def __init__(self, to_device: torch.device):
-        self._to_device = to_device
-
-    def __torch_function__(
-        self, func: Callable[..., Any], types, args: tuple[Any, ...] = (), kwargs: dict[str, Any] | None = None
-    ):
-        return _cast_to_device_and_run(func, args, kwargs or {}, self._to_device)
--- a/invokeai/backend/model_manager/load/model_loaders/flux.py
+++ b/invokeai/backend/model_manager/load/model_loaders/flux.py
@@ -84,15 +84,7 @@ class FluxVAELoader(ModelLoader):
            model = AutoEncoder(ae_params[config.config_path])
            sd = load_file(model_path)
            model.load_state_dict(sd, assign=True)
-            # VAE is broken in float16, which mps defaults to
-            if self._torch_dtype == torch.float16:
-                try:
-                    vae_dtype = torch.tensor([1.0], dtype=torch.bfloat16, device=self._torch_device).dtype
-                except TypeError:
-                    vae_dtype = torch.float32
-            else:
-                vae_dtype = self._torch_dtype
-            model.to(vae_dtype)
+            model.to(dtype=self._torch_dtype)

        return model

@@ -136,9 +128,9 @@ class BnbQuantizedLlmInt8bCheckpointModel(ModelLoader):
                "The bnb modules are not available. Please install bitsandbytes if available on your platform."
            )
        match submodel_type:
-            case SubModelType.Tokenizer2 | SubModelType.Tokenizer3:
+            case SubModelType.Tokenizer2:
                return T5Tokenizer.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512)
-            case SubModelType.TextEncoder2 | SubModelType.TextEncoder3:
+            case SubModelType.TextEncoder2:
                te2_model_path = Path(config.path) / "text_encoder_2"
                model_config = AutoConfig.from_pretrained(te2_model_path)
                with accelerate.init_empty_weights():
@@ -180,9 +172,9 @@ class T5EncoderCheckpointModel(ModelLoader):
            raise ValueError("Only T5EncoderConfig models are currently supported here.")

        match submodel_type:
-            case SubModelType.Tokenizer2 | SubModelType.Tokenizer3:
+            case SubModelType.Tokenizer2:
                return T5Tokenizer.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512)
-            case SubModelType.TextEncoder2 | SubModelType.TextEncoder3:
+            case SubModelType.TextEncoder2:
                return T5EncoderModel.from_pretrained(Path(config.path) / "text_encoder_2", torch_dtype="auto")

        raise ValueError(
--- a/invokeai/backend/model_manager/load/model_loaders/lora.py
+++ b/invokeai/backend/model_manager/load/model_loaders/lora.py
@@ -26,7 +26,7 @@ from invokeai.backend.model_manager import (
    SubModelType,
 )
 from invokeai.backend.model_manager.load.load_default import ModelLoader
-from invokeai.backend.model_manager.load.model_cache.model_cache import ModelCache
+from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase
 from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry


@@ -40,7 +40,7 @@ class LoRALoader(ModelLoader):
        self,
        app_config: InvokeAIAppConfig,
        logger: Logger,
-        ram_cache: ModelCache,
+        ram_cache: ModelCacheBase[AnyModel],
    ):
        """Initialize the loader."""
        super().__init__(app_config, logger, ram_cache)
--- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
+++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
@@ -25,7 +25,6 @@ from invokeai.backend.model_manager.config import (
    DiffusersConfigBase,
    MainCheckpointConfig,
 )
-from invokeai.backend.model_manager.load.model_cache.model_cache import get_model_cache_key
 from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry
 from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader
 from invokeai.backend.util.silence_warnings import SilenceWarnings
@@ -43,7 +42,6 @@ VARIANT_TO_IN_CHANNEL_MAP = {
@ModelLoaderRegistry.register(
    base=BaseModelType.StableDiffusionXLRefiner, type=ModelType.Main, format=ModelFormat.Diffusers
 )
-@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusion3, type=ModelType.Main, format=ModelFormat.Diffusers)
@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusion1, type=ModelType.Main, format=ModelFormat.Checkpoint)
@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusion2, type=ModelType.Main, format=ModelFormat.Checkpoint)
@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusionXL, type=ModelType.Main, format=ModelFormat.Checkpoint)
@@ -53,6 +51,13 @@ VARIANT_TO_IN_CHANNEL_MAP = {
 class StableDiffusionDiffusersModel(GenericDiffusersLoader):
    """Class to load main models."""

+    model_base_to_model_type = {
+        BaseModelType.StableDiffusion1: "FrozenCLIPEmbedder",
+        BaseModelType.StableDiffusion2: "FrozenOpenCLIPEmbedder",
+        BaseModelType.StableDiffusionXL: "SDXL",
+        BaseModelType.StableDiffusionXLRefiner: "SDXL-Refiner",
+    }
+
    def _load_model(
        self,
        config: AnyModelConfig,
@@ -133,5 +138,5 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
            if subtype == submodel_type:
                continue
            if submodel := getattr(pipeline, subtype.value, None):
-                self._ram_cache.put(get_model_cache_key(config.key, subtype), model=submodel)
+                self._ram_cache.put(config.key, submodel_type=subtype, model=submodel)
        return getattr(pipeline, submodel_type.value)
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@@ -1,7 +1,7 @@
 import json
 import re
 from pathlib import Path
-from typing import Any, Callable, Dict, Literal, Optional, Union
+from typing import Any, Dict, Literal, Optional, Union

 import safetensors.torch
 import spandrel
@@ -22,7 +22,6 @@ from invokeai.backend.lora.conversions.flux_kohya_lora_conversion_utils import i
 from invokeai.backend.model_hash.model_hash import HASHING_ALGORITHMS, ModelHash
 from invokeai.backend.model_manager.config import (
    AnyModelConfig,
-    AnyVariant,
    BaseModelType,
    ControlAdapterDefaultSettings,
    InvalidModelConfigException,
@@ -34,15 +33,8 @@ from invokeai.backend.model_manager.config import (
    ModelType,
    ModelVariantType,
    SchedulerPredictionType,
-    SubmodelDefinition,
-    SubModelType,
-)
-from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import ConfigLoader
-from invokeai.backend.model_manager.util.model_util import (
-    get_clip_variant_type,
-    lora_token_vector_length,
-    read_checkpoint_meta,
 )
+from invokeai.backend.model_manager.util.model_util import lora_token_vector_length, read_checkpoint_meta
 from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor
 from invokeai.backend.quantization.gguf.loaders import gguf_sd_loader
 from invokeai.backend.spandrel_image_to_image_model import SpandrelImageToImageModel
@@ -120,7 +112,6 @@ class ModelProbe(object):
        "StableDiffusionXLPipeline": ModelType.Main,
        "StableDiffusionXLImg2ImgPipeline": ModelType.Main,
        "StableDiffusionXLInpaintPipeline": ModelType.Main,
-        "StableDiffusion3Pipeline": ModelType.Main,
        "LatentConsistencyModelPipeline": ModelType.Main,
        "AutoencoderKL": ModelType.VAE,
        "AutoencoderTiny": ModelType.VAE,
@@ -131,12 +122,8 @@ class ModelProbe(object):
        "CLIPTextModel": ModelType.CLIPEmbed,
        "T5EncoderModel": ModelType.T5Encoder,
        "FluxControlNetModel": ModelType.ControlNet,
-        "SD3Transformer2DModel": ModelType.Main,
-        "CLIPTextModelWithProjection": ModelType.CLIPEmbed,
    }

-    TYPE2VARIANT: Dict[ModelType, Callable[[str], Optional[AnyVariant]]] = {ModelType.CLIPEmbed: get_clip_variant_type}
-
    @classmethod
    def register_probe(
        cls, format: Literal["diffusers", "checkpoint", "onnx"], model_type: ModelType, probe_class: type[ProbeBase]
@@ -183,10 +170,7 @@ class ModelProbe(object):
        fields["path"] = model_path.as_posix()
        fields["type"] = fields.get("type") or model_type
        fields["base"] = fields.get("base") or probe.get_base_type()
-        variant_func = cls.TYPE2VARIANT.get(fields["type"], None)
-        fields["variant"] = (
-            fields.get("variant") or (variant_func and variant_func(model_path.as_posix())) or probe.get_variant_type()
-        )
+        fields["variant"] = fields.get("variant") or probe.get_variant_type()
        fields["prediction_type"] = fields.get("prediction_type") or probe.get_scheduler_prediction_type()
        fields["image_encoder_model_id"] = fields.get("image_encoder_model_id") or probe.get_image_encoder_model_id()
        fields["name"] = fields.get("name") or cls.get_model_name(model_path)
@@ -233,10 +217,6 @@ class ModelProbe(object):
                and fields["prediction_type"] == SchedulerPredictionType.VPrediction
            )

-        get_submodels = getattr(probe, "get_submodels", None)
-        if fields["base"] == BaseModelType.StableDiffusion3 and callable(get_submodels):
-            fields["submodels"] = get_submodels()
-
        model_info = ModelConfigFactory.make_config(fields)  # , key=fields.get("key", None))
        return model_info

@@ -469,7 +449,7 @@ class ModelProbe(object):
        """
        # scan model
        scan_result = scan_file_path(checkpoint)
-        if scan_result.infected_files != 0 or scan_result.scan_err:
+        if scan_result.infected_files != 0:
            raise Exception("The model {model_name} is potentially infected by malware. Aborting import.")


@@ -485,7 +465,6 @@ MODEL_NAME_TO_PREPROCESSOR = {
    "lineart anime": "lineart_anime_image_processor",
    "lineart_anime": "lineart_anime_image_processor",
    "lineart": "lineart_image_processor",
-    "soft": "hed_image_processor",
    "softedge": "hed_image_processor",
    "hed": "hed_image_processor",
    "shuffle": "content_shuffle_image_processor",
@@ -768,33 +747,18 @@ class FolderProbeBase(ProbeBase):

 class PipelineFolderProbe(FolderProbeBase):
    def get_base_type(self) -> BaseModelType:
-        # Handle pipelines with a UNet (i.e SD 1.x, SD2, SDXL).
-        config_path = self.model_path / "unet" / "config.json"
-        if config_path.exists():
-            with open(config_path) as file:
-                unet_conf = json.load(file)
-            if unet_conf["cross_attention_dim"] == 768:
-                return BaseModelType.StableDiffusion1
-            elif unet_conf["cross_attention_dim"] == 1024:
-                return BaseModelType.StableDiffusion2
-            elif unet_conf["cross_attention_dim"] == 1280:
-                return BaseModelType.StableDiffusionXLRefiner
-            elif unet_conf["cross_attention_dim"] == 2048:
-                return BaseModelType.StableDiffusionXL
-            else:
-                raise InvalidModelConfigException(f"Unknown base model for {self.model_path}")
-
-        # Handle pipelines with a transformer (i.e. SD3).
-        config_path = self.model_path / "transformer" / "config.json"
-        if config_path.exists():
-            with open(config_path) as file:
-                transformer_conf = json.load(file)
-            if transformer_conf["_class_name"] == "SD3Transformer2DModel":
-                return BaseModelType.StableDiffusion3
-            else:
-                raise InvalidModelConfigException(f"Unknown base model for {self.model_path}")
-
-        raise InvalidModelConfigException(f"Unknown base model for {self.model_path}")
+        with open(self.model_path / "unet" / "config.json", "r") as file:
+            unet_conf = json.load(file)
+        if unet_conf["cross_attention_dim"] == 768:
+            return BaseModelType.StableDiffusion1
+        elif unet_conf["cross_attention_dim"] == 1024:
+            return BaseModelType.StableDiffusion2
+        elif unet_conf["cross_attention_dim"] == 1280:
+            return BaseModelType.StableDiffusionXLRefiner
+        elif unet_conf["cross_attention_dim"] == 2048:
+            return BaseModelType.StableDiffusionXL
+        else:
+            raise InvalidModelConfigException(f"Unknown base model for {self.model_path}")

    def get_scheduler_prediction_type(self) -> SchedulerPredictionType:
        with open(self.model_path / "scheduler" / "scheduler_config.json", "r") as file:
@@ -806,23 +770,6 @@ class PipelineFolderProbe(FolderProbeBase):
        else:
            raise InvalidModelConfigException("Unknown scheduler prediction type: {scheduler_conf['prediction_type']}")

-    def get_submodels(self) -> Dict[SubModelType, SubmodelDefinition]:
-        config = ConfigLoader.load_config(self.model_path, config_name="model_index.json")
-        submodels: Dict[SubModelType, SubmodelDefinition] = {}
-        for key, value in config.items():
-            if key.startswith("_") or not (isinstance(value, list) and len(value) == 2):
-                continue
-            model_loader = str(value[1])
-            if model_type := ModelProbe.CLASS2TYPE.get(model_loader):
-                variant_func = ModelProbe.TYPE2VARIANT.get(model_type, None)
-                submodels[SubModelType(key)] = SubmodelDefinition(
-                    path_or_prefix=(self.model_path / key).resolve().as_posix(),
-                    model_type=model_type,
-                    variant=variant_func and variant_func((self.model_path / key).as_posix()),
-                )
-
-        return submodels
-
    def get_variant_type(self) -> ModelVariantType:
        # This only works for pipelines! Any kind of
        # exception results in our returning the
--- a/invokeai/backend/model_manager/starter_models.py
+++ b/invokeai/backend/model_manager/starter_models.py
@@ -140,22 +140,6 @@ flux_dev = StarterModel(
    type=ModelType.Main,
    dependencies=[t5_base_encoder, flux_vae, clip_l_encoder],
 )
-sd35_medium = StarterModel(
-    name="SD3.5 Medium",
-    base=BaseModelType.StableDiffusion3,
-    source="stabilityai/stable-diffusion-3.5-medium",
-    description="Medium SD3.5 Model: ~15GB",
-    type=ModelType.Main,
-    dependencies=[],
-)
-sd35_large = StarterModel(
-    name="SD3.5 Large",
-    base=BaseModelType.StableDiffusion3,
-    source="stabilityai/stable-diffusion-3.5-large",
-    description="Large SD3.5 Model: ~19G",
-    type=ModelType.Main,
-    dependencies=[],
-)
 cyberrealistic_sd1 = StarterModel(
    name="CyberRealistic v4.1",
    base=BaseModelType.StableDiffusion1,
@@ -298,12 +282,13 @@ ip_adapter_sdxl = StarterModel(
    previous_names=["IP Adapter SDXL"],
 )
 ip_adapter_flux = StarterModel(
-    name="Standard Reference (XLabs FLUX IP-Adapter v2)",
+    name="Standard Reference (XLabs FLUX IP-Adapter)",
    base=BaseModelType.Flux,
-    source="https://huggingface.co/XLabs-AI/flux-ip-adapter-v2/resolve/main/ip_adapter.safetensors",
+    source="https://huggingface.co/XLabs-AI/flux-ip-adapter/resolve/main/flux-ip-adapter.safetensors",
    description="References images with a more generalized/looser degree of precision.",
    type=ModelType.IPAdapter,
    dependencies=[clip_vit_l_image_encoder],
+    previous_names=["XLabs FLUX IP-Adapter"],
 )
 # endregion
 # region ControlNet
@@ -585,8 +570,6 @@ STARTER_MODELS: list[StarterModel] = [
    flux_dev_quantized,
    flux_schnell,
    flux_dev,
-    sd35_medium,
-    sd35_large,
    cyberrealistic_sd1,
    rev_animated_sd1,
    dreamshaper_8_sd1,
--- a/invokeai/backend/model_manager/util/model_util.py
+++ b/invokeai/backend/model_manager/util/model_util.py
@@ -8,7 +8,6 @@ import safetensors
 import torch
 from picklescan.scanner import scan_file_path

-from invokeai.backend.model_manager.config import ClipVariantType
 from invokeai.backend.quantization.gguf.loaders import gguf_sd_loader


@@ -44,7 +43,7 @@ def _fast_safetensors_reader(path: str) -> Dict[str, torch.Tensor]:
    return checkpoint


-def read_checkpoint_meta(path: Union[str, Path], scan: bool = True) -> Dict[str, torch.Tensor]:
+def read_checkpoint_meta(path: Union[str, Path], scan: bool = False) -> Dict[str, torch.Tensor]:
    if str(path).endswith(".safetensors"):
        try:
            path_str = path.as_posix() if isinstance(path, Path) else path
@@ -55,7 +54,7 @@ def read_checkpoint_meta(path: Union[str, Path], scan: bool = True) -> Dict[str,
    else:
        if scan:
            scan_result = scan_file_path(path)
-            if scan_result.infected_files != 0 or scan_result.scan_err:
+            if scan_result.infected_files != 0:
                raise Exception(f'The model file "{path}" is potentially infected by malware. Aborting import.')
        if str(path).endswith(".gguf"):
            # The GGUF reader used here uses numpy memmap, so these tensors are not loaded into memory during this function
@@ -166,25 +165,3 @@ def convert_bundle_to_flux_transformer_checkpoint(
        del transformer_state_dict[k]

    return original_state_dict
-
-
-def get_clip_variant_type(location: str) -> Optional[ClipVariantType]:
-    try:
-        path = Path(location)
-        config_path = path / "config.json"
-        if not config_path.exists():
-            config_path = path / "text_encoder" / "config.json"
-        if not config_path.exists():
-            return ClipVariantType.L
-        with open(config_path) as file:
-            clip_conf = json.load(file)
-            hidden_size = clip_conf.get("hidden_size", -1)
-            match hidden_size:
-                case 1280:
-                    return ClipVariantType.G
-                case 768:
-                    return ClipVariantType.L
-                case _:
-                    return ClipVariantType.L
-    except Exception:
-        return ClipVariantType.L
--- a/invokeai/backend/model_manager/util/select_hf_files.py
+++ b/invokeai/backend/model_manager/util/select_hf_files.py
@@ -85,7 +85,6 @@ def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path
    """Select the proper variant files from a list of HuggingFace repo_id paths."""
    result: set[Path] = set()
    subfolder_weights: dict[Path, list[SubfolderCandidate]] = {}
-    safetensors_detected = False
    for path in files:
        if path.suffix in [".onnx", ".pb", ".onnx_data"]:
            if variant == ModelRepoVariant.ONNX:
@@ -120,27 +119,19 @@ def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path
            # We prefer safetensors over other file formats and an exact variant match. We'll score each file based on
            # variant and format and select the best one.

-            if safetensors_detected and path.suffix == ".bin":
-                continue
-
            parent = path.parent
            score = 0

            if path.suffix == ".safetensors":
-                safetensors_detected = True
-                if parent in subfolder_weights:
-                    subfolder_weights[parent] = [sfc for sfc in subfolder_weights[parent] if sfc.path.suffix != ".bin"]
                score += 1

            candidate_variant_label = path.suffixes[0] if len(path.suffixes) == 2 else None

            # Some special handling is needed here if there is not an exact match and if we cannot infer the variant
            # from the file name. In this case, we only give this file a point if the requested variant is FP32 or DEFAULT.
-            if (
-                variant is not ModelRepoVariant.Default
-                and candidate_variant_label
-                and candidate_variant_label.startswith(f".{variant.value}")
-            ) or (not candidate_variant_label and variant in [ModelRepoVariant.FP32, ModelRepoVariant.Default]):
+            if candidate_variant_label == f".{variant}" or (
+                not candidate_variant_label and variant in [ModelRepoVariant.FP32, ModelRepoVariant.Default]
+            ):
                score += 1

            if parent not in subfolder_weights:
@@ -155,7 +146,7 @@ def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path
        # Check if at least one of the files has the explicit fp16 variant.
        at_least_one_fp16 = False
        for candidate in candidate_list:
-            if len(candidate.path.suffixes) == 2 and candidate.path.suffixes[0].startswith(".fp16"):
+            if len(candidate.path.suffixes) == 2 and candidate.path.suffixes[0] == ".fp16":
                at_least_one_fp16 = True
                break

@@ -171,16 +162,7 @@ def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path
        # candidate.
        highest_score_candidate = max(candidate_list, key=lambda candidate: candidate.score)
        if highest_score_candidate:
-            pattern = r"^(.*?)-\d+-of-\d+(\.\w+)$"
-            match = re.match(pattern, highest_score_candidate.path.as_posix())
-            if match:
-                for candidate in candidate_list:
-                    if candidate.path.as_posix().startswith(match.group(1)) and candidate.path.as_posix().endswith(
-                        match.group(2)
-                    ):
-                        result.add(candidate.path)
-            else:
-                result.add(highest_score_candidate.path)
+            result.add(highest_score_candidate.path)

    # If one of the architecture-related variants was specified and no files matched other than
    # config and text files then we return an empty list
--- a/invokeai/backend/sd3/extensions/inpaint_extension.py
+++ b/invokeai/backend/sd3/extensions/inpaint_extension.py
@@ -1,58 +0,0 @@
-import torch
-
-
-class InpaintExtension:
-    """A class for managing inpainting with SD3."""
-
-    def __init__(self, init_latents: torch.Tensor, inpaint_mask: torch.Tensor, noise: torch.Tensor):
-        """Initialize InpaintExtension.
-
-        Args:
-            init_latents (torch.Tensor): The initial latents (i.e. un-noised at timestep 0).
-            inpaint_mask (torch.Tensor): A mask specifying which elements to inpaint. Range [0, 1]. Values of 1 will be
-                re-generated. Values of 0 will remain unchanged. Values between 0 and 1 can be used to blend the
-                inpainted region with the background.
-            noise (torch.Tensor): The noise tensor used to noise the init_latents.
-        """
-        assert init_latents.dim() == inpaint_mask.dim() == noise.dim() == 4
-        assert init_latents.shape[-2:] == inpaint_mask.shape[-2:] == noise.shape[-2:]
-
-        self._init_latents = init_latents
-        self._inpaint_mask = inpaint_mask
-        self._noise = noise
-
-    def _apply_mask_gradient_adjustment(self, t_prev: float) -> torch.Tensor:
-        """Applies inpaint mask gradient adjustment and returns the inpaint mask to be used at the current timestep."""
-        # As we progress through the denoising process, we promote gradient regions of the mask to have a full weight of
-        # 1.0. This helps to produce more coherent seams around the inpainted region. We experimented with a (small)
-        # number of promotion strategies (e.g. gradual promotion based on timestep), but found that a simple cutoff
-        # threshold worked well.
-        # We use a small epsilon to avoid any potential issues with floating point precision.
-        eps = 1e-4
-        mask_gradient_t_cutoff = 0.5
-        if t_prev > mask_gradient_t_cutoff:
-            # Early in the denoising process, use the inpaint mask as-is.
-            return self._inpaint_mask
-        else:
-            # After the cut-off, promote all non-zero mask values to 1.0.
-            mask = self._inpaint_mask.where(self._inpaint_mask <= (0.0 + eps), 1.0)
-
-        return mask
-
-    def merge_intermediate_latents_with_init_latents(
-        self, intermediate_latents: torch.Tensor, t_prev: float
-    ) -> torch.Tensor:
-        """Merge the intermediate latents with the initial latents for the current timestep using the inpaint mask. I.e.
-        update the intermediate latents to keep the regions that are not being inpainted on the correct noise
-        trajectory.
-
-        This function should be called after each denoising step.
-        """
-
-        mask = self._apply_mask_gradient_adjustment(t_prev)
-
-        # Noise the init latents for the current timestep.
-        noised_init_latents = self._noise * t_prev + (1.0 - t_prev) * self._init_latents
-
-        # Merge the intermediate latents with the noised_init_latents using the inpaint_mask.
-        return intermediate_latents * mask + noised_init_latents * (1.0 - mask)
--- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py
+++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
@@ -499,22 +499,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
                    for idx, value in enumerate(single_t2i_adapter_data.adapter_state):
                        accum_adapter_state[idx] += value * t2i_adapter_weight

-            # Hack: force compatibility with irregular resolutions by padding the feature map with zeros
-            for idx, tensor in enumerate(accum_adapter_state):
-                # The tensor size is supposed to be some integer downscale factor of the latents size.
-                # Internally, the unet will pad the latents before downscaling between levels when it is no longer divisible by its downscale factor.
-                # If the latent size does not scale down evenly, we need to pad the tensor so that it matches the the downscaled padded latents later on.
-                scale_factor = latents.size()[-1] // tensor.size()[-1]
-                required_padding_width = math.ceil(latents.size()[-1] / scale_factor) - tensor.size()[-1]
-                required_padding_height = math.ceil(latents.size()[-2] / scale_factor) - tensor.size()[-2]
-                tensor = torch.nn.functional.pad(
-                    tensor,
-                    (0, required_padding_width, 0, required_padding_height, 0, 0, 0, 0),
-                    mode="constant",
-                    value=0,
-                )
-                accum_adapter_state[idx] = tensor
-
            down_intrablock_additional_residuals = accum_adapter_state

        # Handle inpainting models.
--- a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py
+++ b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py
@@ -49,32 +49,9 @@ class FLUXConditioningInfo:
        return self


-@dataclass
-class SD3ConditioningInfo:
-    clip_l_pooled_embeds: torch.Tensor
-    clip_l_embeds: torch.Tensor
-    clip_g_pooled_embeds: torch.Tensor
-    clip_g_embeds: torch.Tensor
-    t5_embeds: torch.Tensor | None
-
-    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
-        self.clip_l_pooled_embeds = self.clip_l_pooled_embeds.to(device=device, dtype=dtype)
-        self.clip_l_embeds = self.clip_l_embeds.to(device=device, dtype=dtype)
-        self.clip_g_pooled_embeds = self.clip_g_pooled_embeds.to(device=device, dtype=dtype)
-        self.clip_g_embeds = self.clip_g_embeds.to(device=device, dtype=dtype)
-        if self.t5_embeds is not None:
-            self.t5_embeds = self.t5_embeds.to(device=device, dtype=dtype)
-        return self
-
-
@dataclass
 class ConditioningFieldData:
-    conditionings: (
-        List[BasicConditioningInfo]
-        | List[SDXLConditioningInfo]
-        | List[FLUXConditioningInfo]
-        | List[SD3ConditioningInfo]
-    )
+    conditionings: List[BasicConditioningInfo] | List[SDXLConditioningInfo] | List[FLUXConditioningInfo]


@dataclass
--- a/invokeai/backend/util/prefix_logger_adapter.py
+++ b/invokeai/backend/util/prefix_logger_adapter.py
@@ -1,12 +0,0 @@
-import logging
-from typing import Any, MutableMapping
-
-
-# Issue with type hints related to LoggerAdapter: https://github.com/python/typeshed/issues/7855
-class PrefixedLoggerAdapter(logging.LoggerAdapter):  # type: ignore
-    def __init__(self, logger: logging.Logger, prefix: str):
-        super().__init__(logger, {})
-        self.prefix = prefix
-
-    def process(self, msg: str, kwargs: MutableMapping[str, Any]) -> tuple[str, MutableMapping[str, Any]]:
-        return f"[{self.prefix}] {msg}", kwargs
--- a/invokeai/frontend/web/knip.ts
+++ b/invokeai/frontend/web/knip.ts
@@ -9,7 +9,6 @@ const config: KnipConfig = {
    'src/services/api/schema.ts',
    'src/features/nodes/types/v1/**',
    'src/features/nodes/types/v2/**',
-    'src/features/parameters/types/parameterSchemas.ts',
    // TODO(psyche): maybe we can clean up these utils after canvas v2 release
    'src/features/controlLayers/konva/util.ts',
    // TODO(psyche): restore HRF functionality?
--- a/invokeai/frontend/web/package.json
+++ b/invokeai/frontend/web/package.json
@@ -52,13 +52,13 @@
    }
  },
  "dependencies": {
-    "@atlaskit/pragmatic-drag-and-drop": "^1.4.0",
-    "@atlaskit/pragmatic-drag-and-drop-auto-scroll": "^1.4.0",
-    "@atlaskit/pragmatic-drag-and-drop-hitbox": "^1.0.3",
    "@dagrejs/dagre": "^1.1.4",
    "@dagrejs/graphlib": "^2.2.4",
+    "@dnd-kit/core": "^6.1.0",
+    "@dnd-kit/sortable": "^8.0.0",
+    "@dnd-kit/utilities": "^3.2.2",
    "@fontsource-variable/inter": "^5.1.0",
-    "@invoke-ai/ui-library": "^0.0.44",
+    "@invoke-ai/ui-library": "^0.0.43",
    "@nanostores/react": "^0.7.3",
    "@reduxjs/toolkit": "2.2.3",
    "@roarr/browser-log-writer": "^1.3.0",
--- a/invokeai/frontend/web/pnpm-lock.yaml
+++ b/invokeai/frontend/web/pnpm-lock.yaml
@@ -5,27 +5,27 @@ settings:
  excludeLinksFromLockfile: false

 dependencies:
-  '@atlaskit/pragmatic-drag-and-drop':
-    specifier: ^1.4.0
-    version: 1.4.0
-  '@atlaskit/pragmatic-drag-and-drop-auto-scroll':
-    specifier: ^1.4.0
-    version: 1.4.0
-  '@atlaskit/pragmatic-drag-and-drop-hitbox':
-    specifier: ^1.0.3
-    version: 1.0.3
  '@dagrejs/dagre':
    specifier: ^1.1.4
    version: 1.1.4
  '@dagrejs/graphlib':
    specifier: ^2.2.4
    version: 2.2.4
+  '@dnd-kit/core':
+    specifier: ^6.1.0
+    version: 6.1.0(react-dom@18.3.1)(react@18.3.1)
+  '@dnd-kit/sortable':
+    specifier: ^8.0.0
+    version: 8.0.0(@dnd-kit/core@6.1.0)(react@18.3.1)
+  '@dnd-kit/utilities':
+    specifier: ^3.2.2
+    version: 3.2.2(react@18.3.1)
  '@fontsource-variable/inter':
    specifier: ^5.1.0
    version: 5.1.0
  '@invoke-ai/ui-library':
-    specifier: ^0.0.44
-    version: 0.0.44(@chakra-ui/form-control@2.2.0)(@chakra-ui/icon@3.2.0)(@chakra-ui/media-query@3.3.0)(@chakra-ui/menu@2.2.1)(@chakra-ui/spinner@2.1.0)(@chakra-ui/system@2.6.2)(@fontsource-variable/inter@5.1.0)(@types/react@18.3.11)(i18next@23.15.1)(react-dom@18.3.1)(react@18.3.1)
+    specifier: ^0.0.43
+    version: 0.0.43(@chakra-ui/form-control@2.2.0)(@chakra-ui/icon@3.2.0)(@chakra-ui/media-query@3.3.0)(@chakra-ui/menu@2.2.1)(@chakra-ui/spinner@2.1.0)(@chakra-ui/system@2.6.2)(@fontsource-variable/inter@5.1.0)(@types/react@18.3.11)(i18next@23.15.1)(react-dom@18.3.1)(react@18.3.1)
  '@nanostores/react':
    specifier: ^0.7.3
    version: 0.7.3(nanostores@0.11.3)(react@18.3.1)
@@ -319,28 +319,6 @@ packages:
      '@jridgewell/trace-mapping': 0.3.25
    dev: true

-  /@atlaskit/pragmatic-drag-and-drop-auto-scroll@1.4.0:
-    resolution: {integrity: sha512-5GoikoTSW13UX76F9TDeWB8x3jbbGlp/Y+3aRkHe1MOBMkrWkwNpJ42MIVhhX/6NSeaZiPumP0KbGJVs2tOWSQ==}
-    dependencies:
-      '@atlaskit/pragmatic-drag-and-drop': 1.4.0
-      '@babel/runtime': 7.25.7
-    dev: false
-
-  /@atlaskit/pragmatic-drag-and-drop-hitbox@1.0.3:
-    resolution: {integrity: sha512-/Sbu/HqN2VGLYBhnsG7SbRNg98XKkbF6L7XDdBi+izRybfaK1FeMfodPpm/xnBHPJzwYMdkE0qtLyv6afhgMUA==}
-    dependencies:
-      '@atlaskit/pragmatic-drag-and-drop': 1.4.0
-      '@babel/runtime': 7.25.7
-    dev: false
-
-  /@atlaskit/pragmatic-drag-and-drop@1.4.0:
-    resolution: {integrity: sha512-qRY3PTJIcxfl/QB8Gwswz+BRvlmgAC5pB+J2hL6dkIxgqAgVwOhAamMUKsrOcFU/axG2Q7RbNs1xfoLKDuhoPg==}
-    dependencies:
-      '@babel/runtime': 7.25.7
-      bind-event-listener: 3.0.0
-      raf-schd: 4.0.3
-    dev: false
-
  /@babel/code-frame@7.25.7:
    resolution: {integrity: sha512-0xZJFNE5XMpENsgfHYTw8FbX4kv53mFLn2i3XPoq69LyhYSCBJtitaHx9QnsVTrsogI4Z3+HtEfZ2/GFPOtf5g==}
    engines: {node: '>=6.9.0'}
@@ -515,8 +493,8 @@ packages:
    resolution: {integrity: sha512-MV6D4VLRIHr4PkW4zMyqfrNS1mPlCTiCXwvYGtDFQYr+xHFfonhAuf9WjsSc0nyp2m0OdkSLnzmVKkZFLo25Tg==}
    dev: false

-  /@chakra-ui/anatomy@2.3.5:
-    resolution: {integrity: sha512-3im33cUOxCbISjaBlINE2u8BOwJSCdzpjCX0H+0JxK2xz26UaVA5xeI3NYHUoxDnr/QIrgfrllGxS0szYwOcyg==}
+  /@chakra-ui/anatomy@2.3.4:
+    resolution: {integrity: sha512-fFIYN7L276gw0Q7/ikMMlZxP7mvnjRaWJ7f3Jsf9VtDOi6eAYIBRrhQe6+SZ0PGmoOkRaBc7gSE5oeIbgFFyrw==}
    dev: false

  /@chakra-ui/breakpoint-utils@2.0.8:
@@ -573,12 +551,12 @@ packages:
      react: 18.3.1
    dev: false

-  /@chakra-ui/hooks@2.4.3(react@18.3.1):
-    resolution: {integrity: sha512-Sr2zsoTZw3p7HbrUy4aLpTIkE2XXUelAUgg3NGwMzrmx75bE0qVyiuuTFOuyEzGxYVV2Fe8QtcKKilm6RwzTGg==}
+  /@chakra-ui/hooks@2.4.2(react@18.3.1):
+    resolution: {integrity: sha512-LRKiVE1oA7afT5tbbSKAy7Uas2xFHE6IkrQdbhWCHmkHBUtPvjQQDgwtnd4IRZPmoEfNGwoJ/MQpwOM/NRTTwA==}
    peerDependencies:
      react: '>=18'
    dependencies:
-      '@chakra-ui/utils': 2.2.3(react@18.3.1)
+      '@chakra-ui/utils': 2.2.2(react@18.3.1)
      '@zag-js/element-size': 0.31.1
      copy-to-clipboard: 3.3.3
      framesync: 6.1.2
@@ -596,13 +574,13 @@ packages:
      react: 18.3.1
    dev: false

-  /@chakra-ui/icons@2.2.4(@chakra-ui/react@2.10.4)(react@18.3.1):
+  /@chakra-ui/icons@2.2.4(@chakra-ui/react@2.10.2)(react@18.3.1):
    resolution: {integrity: sha512-l5QdBgwrAg3Sc2BRqtNkJpfuLw/pWRDwwT58J6c4PqQT6wzXxyNa8Q0PForu1ltB5qEiFb1kxr/F/HO1EwNa6g==}
    peerDependencies:
      '@chakra-ui/react': '>=2.0.0'
      react: '>=18'
    dependencies:
-      '@chakra-ui/react': 2.10.4(@emotion/react@11.13.3)(@emotion/styled@11.13.0)(@types/react@18.3.11)(framer-motion@11.10.0)(react-dom@18.3.1)(react@18.3.1)
+      '@chakra-ui/react': 2.10.2(@emotion/react@11.13.3)(@emotion/styled@11.13.0)(@types/react@18.3.11)(framer-motion@11.10.0)(react-dom@18.3.1)(react@18.3.1)
      react: 18.3.1
    dev: false

@@ -825,8 +803,8 @@ packages:
      react: 18.3.1
    dev: false

-  /@chakra-ui/react@2.10.4(@emotion/react@11.13.3)(@emotion/styled@11.13.0)(@types/react@18.3.11)(framer-motion@11.10.0)(react-dom@18.3.1)(react@18.3.1):
-    resolution: {integrity: sha512-XyRWnuZ1Uw7Mlj5pKUGO5/WhnIHP/EOrpy6lGZC1yWlkd0eIfIpYMZ1ALTZx4KPEdbBaes48dgiMT2ROCqLhkA==}
+  /@chakra-ui/react@2.10.2(@emotion/react@11.13.3)(@emotion/styled@11.13.0)(@types/react@18.3.11)(framer-motion@11.10.0)(react-dom@18.3.1)(react@18.3.1):
+    resolution: {integrity: sha512-TfIHTqTlxTHYJZBtpiR5EZasPUrLYKJxdbHkdOJb5G1OQ+2c5kKl5XA7c2pMtsEptzb7KxAAIB62t3hxdfWp1w==}
    peerDependencies:
      '@emotion/react': '>=11'
      '@emotion/styled': '>=11'
@@ -834,10 +812,10 @@ packages:
      react: '>=18'
      react-dom: '>=18'
    dependencies:
-      '@chakra-ui/hooks': 2.4.3(react@18.3.1)
-      '@chakra-ui/styled-system': 2.12.1(react@18.3.1)
-      '@chakra-ui/theme': 3.4.7(@chakra-ui/styled-system@2.12.1)(react@18.3.1)
-      '@chakra-ui/utils': 2.2.3(react@18.3.1)
+      '@chakra-ui/hooks': 2.4.2(react@18.3.1)
+      '@chakra-ui/styled-system': 2.11.2(react@18.3.1)
+      '@chakra-ui/theme': 3.4.6(@chakra-ui/styled-system@2.11.2)(react@18.3.1)
+      '@chakra-ui/utils': 2.2.2(react@18.3.1)
      '@emotion/react': 11.13.3(@types/react@18.3.11)(react@18.3.1)
      '@emotion/styled': 11.13.0(@emotion/react@11.13.3)(@types/react@18.3.11)(react@18.3.1)
      '@popperjs/core': 2.11.8
@@ -868,10 +846,10 @@ packages:
      react: 18.3.1
    dev: false

-  /@chakra-ui/styled-system@2.12.1(react@18.3.1):
-    resolution: {integrity: sha512-DQph1nDiCPtgze7nDe0a36530ByXb5VpPosKGyWMvKocVeZJcDtYG6XM0+V5a0wKuFBXsViBBRIFUTiUesJAcg==}
+  /@chakra-ui/styled-system@2.11.2(react@18.3.1):
+    resolution: {integrity: sha512-y++z2Uop+hjfZX9mbH88F1ikazPv32asD2er56zMJBemUAzweXnHTpiCQbluEDSUDhqmghVZAdb+5L4XLbsRxA==}
    dependencies:
-      '@chakra-ui/utils': 2.2.3(react@18.3.1)
+      '@chakra-ui/utils': 2.2.2(react@18.3.1)
      csstype: 3.1.3
    transitivePeerDependencies:
      - react
@@ -915,14 +893,14 @@ packages:
      color2k: 2.0.3
    dev: false

-  /@chakra-ui/theme-tools@2.2.7(@chakra-ui/styled-system@2.12.1)(react@18.3.1):
-    resolution: {integrity: sha512-K/VJd0QcnKik7m+qZTkggqNLep6+MPUu8IP5TUpHsnSM5R/RVjsJIR7gO8IZVAIMIGLLTIhGshHxeMekqv6LcQ==}
+  /@chakra-ui/theme-tools@2.2.6(@chakra-ui/styled-system@2.11.2)(react@18.3.1):
+    resolution: {integrity: sha512-3UhKPyzKbV3l/bg1iQN9PBvffYp+EBOoYMUaeTUdieQRPFzo2jbYR0lNCxqv8h5aGM/k54nCHU2M/GStyi9F2A==}
    peerDependencies:
      '@chakra-ui/styled-system': '>=2.0.0'
    dependencies:
-      '@chakra-ui/anatomy': 2.3.5
-      '@chakra-ui/styled-system': 2.12.1(react@18.3.1)
-      '@chakra-ui/utils': 2.2.3(react@18.3.1)
+      '@chakra-ui/anatomy': 2.3.4
+      '@chakra-ui/styled-system': 2.11.2(react@18.3.1)
+      '@chakra-ui/utils': 2.2.2(react@18.3.1)
      color2k: 2.0.3
    transitivePeerDependencies:
      - react
@@ -948,15 +926,15 @@ packages:
      '@chakra-ui/theme-tools': 2.1.2(@chakra-ui/styled-system@2.9.2)
    dev: false

-  /@chakra-ui/theme@3.4.7(@chakra-ui/styled-system@2.12.1)(react@18.3.1):
-    resolution: {integrity: sha512-pfewthgZTFNUYeUwGvhPQO/FTIyf375cFV1AT8N1y0aJiw4KDe7YTGm7p0aFy4AwAjH2ydMgeEx/lua4tx8qyQ==}
+  /@chakra-ui/theme@3.4.6(@chakra-ui/styled-system@2.11.2)(react@18.3.1):
+    resolution: {integrity: sha512-ZwFBLfiMC3URwaO31ONXoKH9k0TX0OW3UjdPF3EQkQpYyrk/fm36GkkzajjtdpWEd7rzDLRsQjPmvwNaSoNDtg==}
    peerDependencies:
      '@chakra-ui/styled-system': '>=2.8.0'
    dependencies:
-      '@chakra-ui/anatomy': 2.3.5
-      '@chakra-ui/styled-system': 2.12.1(react@18.3.1)
-      '@chakra-ui/theme-tools': 2.2.7(@chakra-ui/styled-system@2.12.1)(react@18.3.1)
-      '@chakra-ui/utils': 2.2.3(react@18.3.1)
+      '@chakra-ui/anatomy': 2.3.4
+      '@chakra-ui/styled-system': 2.11.2(react@18.3.1)
+      '@chakra-ui/theme-tools': 2.2.6(@chakra-ui/styled-system@2.11.2)(react@18.3.1)
+      '@chakra-ui/utils': 2.2.2(react@18.3.1)
    transitivePeerDependencies:
      - react
    dev: false
@@ -981,8 +959,8 @@ packages:
      lodash.mergewith: 4.6.2
    dev: false

-  /@chakra-ui/utils@2.2.3(react@18.3.1):
-    resolution: {integrity: sha512-cldoCQuexZ6e07/9hWHKD4l1QXXlM1Nax9tuQOBvVf/EgwNZt3nZu8zZRDFlhAOKCTQDkmpLTTu+eXXjChNQOw==}
+  /@chakra-ui/utils@2.2.2(react@18.3.1):
+    resolution: {integrity: sha512-jUPLT0JzRMWxpdzH6c+t0YMJYrvc5CLericgITV3zDSXblkfx3DsYXqU11DJTSGZI9dUKzM1Wd0Wswn4eJwvFQ==}
    peerDependencies:
      react: '>=16.8.0'
    dependencies:
@@ -1002,6 +980,49 @@ packages:
    engines: {node: '>17.0.0'}
    dev: false

+  /@dnd-kit/accessibility@3.1.0(react@18.3.1):
+    resolution: {integrity: sha512-ea7IkhKvlJUv9iSHJOnxinBcoOI3ppGnnL+VDJ75O45Nss6HtZd8IdN8touXPDtASfeI2T2LImb8VOZcL47wjQ==}
+    peerDependencies:
+      react: '>=16.8.0'
+    dependencies:
+      react: 18.3.1
+      tslib: 2.7.0
+    dev: false
+
+  /@dnd-kit/core@6.1.0(react-dom@18.3.1)(react@18.3.1):
+    resolution: {integrity: sha512-J3cQBClB4TVxwGo3KEjssGEXNJqGVWx17aRTZ1ob0FliR5IjYgTxl5YJbKTzA6IzrtelotH19v6y7uoIRUZPSg==}
+    peerDependencies:
+      react: '>=16.8.0'
+      react-dom: '>=16.8.0'
+    dependencies:
+      '@dnd-kit/accessibility': 3.1.0(react@18.3.1)
+      '@dnd-kit/utilities': 3.2.2(react@18.3.1)
+      react: 18.3.1
+      react-dom: 18.3.1(react@18.3.1)
+      tslib: 2.7.0
+    dev: false
+
+  /@dnd-kit/sortable@8.0.0(@dnd-kit/core@6.1.0)(react@18.3.1):
+    resolution: {integrity: sha512-U3jk5ebVXe1Lr7c2wU7SBZjcWdQP+j7peHJfCspnA81enlu88Mgd7CC8Q+pub9ubP7eKVETzJW+IBAhsqbSu/g==}
+    peerDependencies:
+      '@dnd-kit/core': ^6.1.0
+      react: '>=16.8.0'
+    dependencies:
+      '@dnd-kit/core': 6.1.0(react-dom@18.3.1)(react@18.3.1)
+      '@dnd-kit/utilities': 3.2.2(react@18.3.1)
+      react: 18.3.1
+      tslib: 2.7.0
+    dev: false
+
+  /@dnd-kit/utilities@3.2.2(react@18.3.1):
+    resolution: {integrity: sha512-+MKAJEOfaBe5SmV6t34p80MMKhjvUz0vRrvVJbPT0WElzaOJ/1xs+D+KDv+tD/NE5ujfrChEcshd4fLn0wpiqg==}
+    peerDependencies:
+      react: '>=16.8.0'
+    dependencies:
+      react: 18.3.1
+      tslib: 2.7.0
+    dev: false
+
  /@emotion/babel-plugin@11.12.0:
    resolution: {integrity: sha512-y2WQb+oP8Jqvvclh8Q55gLUyb7UFvgv7eJfsj7td5TToBrIUtPay2kMrZi4xjq9qw2vD0ZR5fSho0yqoFgX7Rw==}
    dependencies:
@@ -1675,20 +1696,20 @@ packages:
      prettier: 3.3.3
    dev: true

-  /@invoke-ai/ui-library@0.0.44(@chakra-ui/form-control@2.2.0)(@chakra-ui/icon@3.2.0)(@chakra-ui/media-query@3.3.0)(@chakra-ui/menu@2.2.1)(@chakra-ui/spinner@2.1.0)(@chakra-ui/system@2.6.2)(@fontsource-variable/inter@5.1.0)(@types/react@18.3.11)(i18next@23.15.1)(react-dom@18.3.1)(react@18.3.1):
-    resolution: {integrity: sha512-PDseHmdr8oi8cmrpx3UwIYHn4NduAJX2R0pM0pyM54xrCMPMgYiCbC/eOs8Gt4fBc2ziiPZ9UGoW4evnE3YJsg==}
+  /@invoke-ai/ui-library@0.0.43(@chakra-ui/form-control@2.2.0)(@chakra-ui/icon@3.2.0)(@chakra-ui/media-query@3.3.0)(@chakra-ui/menu@2.2.1)(@chakra-ui/spinner@2.1.0)(@chakra-ui/system@2.6.2)(@fontsource-variable/inter@5.1.0)(@types/react@18.3.11)(i18next@23.15.1)(react-dom@18.3.1)(react@18.3.1):
+    resolution: {integrity: sha512-t3fPYyks07ue3dEBPJuTHbeDLnDckDCOrtvc07mMDbLOnlPEZ0StaeiNGH+oO8qLzAuMAlSTdswgHfzTc2MmPw==}
    peerDependencies:
      '@fontsource-variable/inter': ^5.0.16
      react: ^18.2.0
      react-dom: ^18.2.0
    dependencies:
-      '@chakra-ui/anatomy': 2.2.2
-      '@chakra-ui/icons': 2.2.4(@chakra-ui/react@2.10.4)(react@18.3.1)
+      '@chakra-ui/anatomy': 2.3.4
+      '@chakra-ui/icons': 2.2.4(@chakra-ui/react@2.10.2)(react@18.3.1)
      '@chakra-ui/layout': 2.3.1(@chakra-ui/system@2.6.2)(react@18.3.1)
      '@chakra-ui/portal': 2.1.0(react-dom@18.3.1)(react@18.3.1)
-      '@chakra-ui/react': 2.10.4(@emotion/react@11.13.3)(@emotion/styled@11.13.0)(@types/react@18.3.11)(framer-motion@11.10.0)(react-dom@18.3.1)(react@18.3.1)
-      '@chakra-ui/styled-system': 2.9.2
-      '@chakra-ui/theme-tools': 2.1.2(@chakra-ui/styled-system@2.9.2)
+      '@chakra-ui/react': 2.10.2(@emotion/react@11.13.3)(@emotion/styled@11.13.0)(@types/react@18.3.11)(framer-motion@11.10.0)(react-dom@18.3.1)(react@18.3.1)
+      '@chakra-ui/styled-system': 2.11.2(react@18.3.1)
+      '@chakra-ui/theme-tools': 2.2.6(@chakra-ui/styled-system@2.11.2)(react@18.3.1)
      '@emotion/react': 11.13.3(@types/react@18.3.11)(react@18.3.1)
      '@emotion/styled': 11.13.0(@emotion/react@11.13.3)(@types/react@18.3.11)(react@18.3.1)
      '@fontsource-variable/inter': 5.1.0
@@ -4292,10 +4313,6 @@ packages:
      open: 8.4.2
    dev: true

-  /bind-event-listener@3.0.0:
-    resolution: {integrity: sha512-PJvH288AWQhKs2v9zyfYdPzlPqf5bXbGMmhmUIY9x4dAUGIWgomO771oBQNwJnMQSnUIXhKu6sgzpBRXTlvb8Q==}
-    dev: false
-
  /bl@4.1.0:
    resolution: {integrity: sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==}
    dependencies:
@@ -7540,10 +7557,6 @@ packages:
    resolution: {integrity: sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==}
    dev: true

-  /raf-schd@4.0.3:
-    resolution: {integrity: sha512-tQkJl2GRWh83ui2DiPTJz9wEiMN20syf+5oKfB03yYP7ioZcJwsIK8FjrtLwH1m7C7e+Tt2yYBlrOpdT+dyeIQ==}
-    dev: false
-
  /raf-throttle@2.0.6:
    resolution: {integrity: sha512-C7W6hy78A+vMmk5a/B6C5szjBHrUzWJkVyakjKCK59Uy2CcA7KhO1JUvvH32IXYFIcyJ3FMKP3ZzCc2/71I6Vg==}
    dev: false
--- a/invokeai/frontend/web/public/assets/images/denoising-strength.png
+++ b/invokeai/frontend/web/public/assets/images/denoising-strength.png
--- a/invokeai/frontend/web/public/locales/de.json
+++ b/invokeai/frontend/web/public/locales/de.json
@@ -95,10 +95,7 @@
        "none": "Keine",
        "new": "Neu",
        "ok": "OK",
-        "close": "Schließen",
-        "clipboard": "Zwischenablage",
-        "generating": "Generieren",
-        "loadingModel": "Lade Modell"
+        "close": "Schließen"
    },
    "gallery": {
        "galleryImageSize": "Bildgröße",
@@ -538,12 +535,14 @@
        "addModels": "Model hinzufügen",
        "deleteModelImage": "Lösche Model Bild",
        "huggingFaceRepoID": "HuggingFace Repo ID",
+        "hfToken": "HuggingFace Schlüssel",
        "huggingFacePlaceholder": "besitzer/model-name",
        "modelSettings": "Modelleinstellungen",
        "typePhraseHere": "Phrase hier eingeben",
        "spandrelImageToImage": "Bild zu Bild (Spandrel)",
        "starterModels": "Einstiegsmodelle",
        "t5Encoder": "T5-Kodierer",
+        "useDefaultSettings": "Standardeinstellungen verwenden",
        "uploadImage": "Bild hochladen",
        "urlOrLocalPath": "URL oder lokaler Pfad",
        "install": "Installieren",
@@ -593,15 +592,7 @@
        "loraTriggerPhrases": "LoRA-Auslösephrasen",
        "installingBundle": "Bündel wird installiert",
        "triggerPhrases": "Auslösephrasen",
-        "mainModelTriggerPhrases": "Hauptmodell-Auslösephrasen",
-        "noDefaultSettings": "Für dieses Modell sind keine Standardeinstellungen konfiguriert. Besuchen Sie den Modell-Manager, um Standardeinstellungen hinzuzufügen.",
-        "defaultSettingsOutOfSync": "Einige Einstellungen stimmen nicht mit den Standardeinstellungen des Modells überein:",
-        "clipLEmbed": "CLIP-L einbetten",
-        "clipGEmbed": "CLIP-G einbetten",
-        "hfTokenLabel": "HuggingFace-Token (für einige Modelle erforderlich)",
-        "hfTokenHelperText": "Für die Nutzung einiger Modelle ist ein HF-Token erforderlich. Klicken Sie hier, um Ihr Token zu erstellen oder zu erhalten.",
-        "hfForbidden": "Sie haben keinen Zugriff auf dieses HF-Modell",
-        "hfTokenInvalid": "Ungültiges oder fehlendes HF-Token"
+        "mainModelTriggerPhrases": "Hauptmodell-Auslösephrasen"
    },
    "parameters": {
        "images": "Bilder",
@@ -687,41 +678,10 @@
    "toast": {
        "uploadFailed": "Hochladen fehlgeschlagen",
        "imageCopied": "Bild kopiert",
-        "parametersNotSet": "Parameter nicht zurückgerufen",
+        "parametersNotSet": "Parameter nicht festgelegt",
        "addedToBoard": "Dem Board hinzugefügt",
        "loadedWithWarnings": "Workflow mit Warnungen geladen",
-        "imageSaved": "Bild gespeichert",
-        "linkCopied": "Link kopiert",
-        "problemCopyingLayer": "Ebene kann nicht kopiert werden",
-        "problemSavingLayer": "Ebene kann nicht gespeichert werden",
-        "parameterSetDesc": "{{parameter}} zurückgerufen",
-        "imageUploaded": "Bild hochgeladen",
-        "problemCopyingImage": "Bild kann nicht kopiert werden",
-        "parameterNotSetDesc": "{{parameter}} kann nicht zurückgerufen werden",
-        "prunedQueue": "Warteschlange bereinigt",
-        "modelAddedSimple": "Modell zur Warteschlange hinzugefügt",
-        "parametersSet": "Parameter zurückgerufen",
-        "imageNotLoadedDesc": "Bild konnte nicht gefunden werden",
-        "setControlImage": "Als Kontrollbild festlegen",
-        "sentToUpscale": "An Vergrößerung gesendet",
-        "parameterNotSetDescWithMessage": "{{parameter}} kann nicht zurückgerufen werden: {{message}}",
-        "unableToLoadImageMetadata": "Bildmetadaten können nicht geladen werden",
-        "unableToLoadImage": "Bild kann nicht geladen werden",
-        "serverError": "Serverfehler",
-        "parameterNotSet": "Parameter nicht zurückgerufen",
-        "sessionRef": "Sitzung: {{sessionId}}",
-        "problemDownloadingImage": "Bild kann nicht heruntergeladen werden",
-        "parameters": "Parameter",
-        "parameterSet": "Parameter zurückgerufen",
-        "importFailed": "Import fehlgeschlagen",
-        "importSuccessful": "Import erfolgreich",
-        "setNodeField": "Als Knotenfeld festlegen",
-        "somethingWentWrong": "Etwas ist schief gelaufen",
-        "workflowLoaded": "Arbeitsablauf geladen",
-        "workflowDeleted": "Arbeitsablauf gelöscht",
-        "errorCopied": "Fehler kopiert",
-        "layerCopiedToClipboard": "Ebene in die Zwischenablage kopiert",
-        "sentToCanvas": "An Leinwand gesendet"
+        "imageSaved": "Bild gespeichert"
    },
    "accessibility": {
        "uploadImage": "Bild hochladen",
@@ -778,8 +738,7 @@
        "deletedPrivateBoardsCannotbeRestored": "Gelöschte Boards können nicht wiederhergestellt werden. Wenn Sie „Nur Board löschen“ wählen, werden die Bilder in einen privaten, nicht kategorisierten Status für den Ersteller des Bildes versetzt.",
        "assetsWithCount_one": "{{count}} in der Sammlung",
        "assetsWithCount_other": "{{count}} in der Sammlung",
-        "deletedBoardsCannotbeRestored": "Gelöschte Ordner können nicht wiederhergestellt werden. Die Auswahl von \"Nur Ordner löschen\" verschiebt Bilder in einen unkategorisierten Zustand.",
-        "updateBoardError": "Fehler beim Aktualisieren des Ordners"
+        "deletedBoardsCannotbeRestored": "Gelöschte Ordner können nicht wiederhergestellt werden. Die Auswahl von \"Nur Ordner löschen\" verschiebt Bilder in einen unkategorisierten Zustand."
    },
    "queue": {
        "status": "Status",
@@ -851,8 +810,7 @@
        "upscaling": "Hochskalierung",
        "canvas": "Leinwand",
        "prompts_one": "Prompt",
-        "prompts_other": "Prompts",
-        "batchSize": "Stapelgröße"
+        "prompts_other": "Prompts"
    },
    "metadata": {
        "negativePrompt": "Negativ Beschreibung",
@@ -867,6 +825,7 @@
        "width": "Breite",
        "createdBy": "Erstellt von",
        "steps": "Schritte",
+        "seamless": "Nahtlos",
        "positivePrompt": "Positiver Prompt",
        "generationMode": "Generierungsmodus",
        "Threshold": "Rauschen-Schwelle",
@@ -883,9 +842,7 @@
        "recallParameter": "{{label}} Abrufen",
        "parsingFailed": "Parsing Fehlgeschlagen",
        "canvasV2Metadata": "Leinwand",
-        "guidance": "Führung",
-        "seamlessXAxis": "Nahtlose X Achse",
-        "seamlessYAxis": "Nahtlose Y Achse"
+        "guidance": "Führung"
    },
    "popovers": {
        "noiseUseCPU": {
@@ -1092,21 +1049,6 @@
        },
        "patchmatchDownScaleSize": {
            "heading": "Herunterskalieren"
-        },
-        "paramHeight": {
-            "heading": "Höhe",
-            "paragraphs": [
-                "Höhe des generierten Bildes. Muss ein Vielfaches von 8 sein."
-            ]
-        },
-        "paramUpscaleMethod": {
-            "heading": "Vergrößerungsmethode",
-            "paragraphs": [
-                "Methode zum Hochskalieren des Bildes für High Resolution Fix."
-            ]
-        },
-        "paramHrf": {
-            "heading": "High Resolution Fix aktivieren"
        }
    },
    "invocationCache": {
@@ -1228,19 +1170,7 @@
        "workflowVersion": "Version",
        "saveToGallery": "In Galerie speichern",
        "noWorkflows": "Keine Arbeitsabläufe",
-        "noMatchingWorkflows": "Keine passenden Arbeitsabläufe",
-        "unknownErrorValidatingWorkflow": "Unbekannter Fehler beim Validieren des Arbeitsablaufes",
-        "inputFieldTypeParseError": "Typ des Eingabefelds {{node}}.{{field}} kann nicht analysiert werden ({{message}})",
-        "workflowSettings": "Arbeitsablauf Editor Einstellungen",
-        "unableToLoadWorkflow": "Arbeitsablauf kann nicht geladen werden",
-        "viewMode": "In linearen Ansicht verwenden",
-        "unableToValidateWorkflow": "Arbeitsablauf kann nicht validiert werden",
-        "outputFieldTypeParseError": "Typ des Ausgabefelds {{node}}.{{field}} kann nicht analysiert werden ({{message}})",
-        "unableToGetWorkflowVersion": "Version des Arbeitsablaufschemas kann nicht bestimmt werden",
-        "unknownFieldType": "$t(nodes.unknownField) Typ: {{type}}",
-        "unknownField": "Unbekanntes Feld",
-        "unableToUpdateNodes_one": "{{count}} Knoten kann nicht aktualisiert werden",
-        "unableToUpdateNodes_other": "{{count}} Knoten können nicht aktualisiert werden"
+        "noMatchingWorkflows": "Keine passenden Arbeitsabläufe"
    },
    "hrf": {
        "enableHrf": "Korrektur für hohe Auflösungen",
@@ -1370,7 +1300,15 @@
        "enableLogging": "Protokollierung aktivieren"
    },
    "whatsNew": {
-        "whatsNewInInvoke": "Was gibt's Neues"
+        "whatsNewInInvoke": "Was gibt's Neues",
+        "canvasV2Announcement": {
+            "fluxSupport": "Unterstützung für Flux-Modelle",
+            "newCanvas": "Eine leistungsstarke neue Kontrollfläche",
+            "newLayerTypes": "Neue Ebenentypen für noch mehr Kontrolle",
+            "readReleaseNotes": "Anmerkungen zu dieser Version lesen",
+            "watchReleaseVideo": "Video über diese Version anzeigen",
+            "watchUiUpdatesOverview": "Interface-Updates Übersicht"
+        }
    },
    "stylePresets": {
        "name": "Name",
@@ -1421,13 +1359,7 @@
        "pullBboxIntoLayerOk": "Bbox in die Ebene gezogen",
        "saveBboxToGallery": "Bbox in Galerie speichern",
        "tool": {
-            "bbox": "Bbox",
-            "brush": "Pinsel",
-            "eraser": "Radiergummi",
-            "colorPicker": "Farbwähler",
-            "view": "Ansicht",
-            "rectangle": "Rechteck",
-            "move": "Verschieben"
+            "bbox": "Bbox"
        },
        "transform": {
            "fitToBbox": "An Bbox anpassen",
@@ -1469,6 +1401,7 @@
        "deleteReferenceImage": "Referenzbild löschen",
        "referenceImage": "Referenzbild",
        "opacity": "Opazität",
+        "resetCanvas": "Leinwand zurücksetzen",
        "removeBookmark": "Lesezeichen entfernen",
        "rasterLayer": "Raster-Ebene",
        "rasterLayers_withCount_visible": "Raster-Ebenen ({{count}})",
@@ -1545,30 +1478,7 @@
        "layer_one": "Ebene",
        "layer_other": "Ebenen",
        "layer_withCount_one": "Ebene ({{count}})",
-        "layer_withCount_other": "Ebenen ({{count}})",
-        "fill": {
-            "fillStyle": "Füllstil",
-            "diagonal": "Diagonal",
-            "vertical": "Vertikal",
-            "fillColor": "Füllfarbe",
-            "grid": "Raster",
-            "solid": "Solide",
-            "crosshatch": "Kreuzschraffur",
-            "horizontal": "Horizontal"
-        },
-        "filter": {
-            "apply": "Anwenden",
-            "reset": "Zurücksetzen",
-            "cancel": "Abbrechen",
-            "spandrel_filter": {
-                "label": "Bild-zu-Bild Modell",
-                "description": "Ein Bild-zu-Bild Modell auf der ausgewählten Ebene ausführen.",
-                "model": "Modell"
-            },
-            "filters": "Filter",
-            "filterType": "Filtertyp",
-            "filter": "Filter"
-        }
+        "layer_withCount_other": "Ebenen ({{count}})"
    },
    "upsell": {
        "shareAccess": "Zugang teilen",
--- a/Show More
+++ b/Show More