chore(ui): lint

chore: release v4.2.9.dev4
Canvas dev build.
2026-01-23 02:48:11 -05:00 · 2024-08-26 22:57:56 +10:00 · 2024-08-26 22:46:07 +10:00 · 2024-08-26 22:45:11 +10:00 · 2024-08-26 22:44:47 +10:00 · 2024-08-26 22:29:28 +10:00
675 changed files with 11182 additions and 23938 deletions
--- a/.github/workflows/build-container.yml
+++ b/.github/workflows/build-container.yml
@@ -13,12 +13,6 @@ on:
    tags:
      - 'v*.*.*'
  workflow_dispatch:
-    inputs:
-      push-to-registry:
-        description: Push the built image to the container registry
-        required: false
-        type: boolean
-        default: false

 permissions:
  contents: write
@@ -56,15 +50,16 @@ jobs:
          df -h

      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@v5
+        uses: docker/metadata-action@v4
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          images: |
            ghcr.io/${{ github.repository }}
+            ${{ env.DOCKERHUB_REPOSITORY }}
          tags: |
            type=ref,event=branch
            type=ref,event=tag
@@ -77,33 +72,49 @@ jobs:
            suffix=-${{ matrix.gpu-driver }},onlatest=false

      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
+        uses: docker/setup-qemu-action@v2

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@v2
        with:
          platforms: ${{ env.PLATFORMS }}

      - name: Login to GitHub Container Registry
        if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
+        uses: docker/login-action@v2
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
          password: ${{ secrets.GITHUB_TOKEN }}

+      # - name: Login to Docker Hub
+      #   if: github.event_name != 'pull_request' && vars.DOCKERHUB_REPOSITORY != ''
+      #   uses: docker/login-action@v2
+      #   with:
+      #     username: ${{ secrets.DOCKERHUB_USERNAME }}
+      #     password: ${{ secrets.DOCKERHUB_TOKEN }}
+
      - name: Build container
        timeout-minutes: 40
        id: docker_build
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v4
        with:
          context: .
          file: docker/Dockerfile
          platforms: ${{ env.PLATFORMS }}
-          push: ${{ github.ref == 'refs/heads/main' || github.ref_type == 'tag' || github.event.inputs.push-to-registry }}
+          push: ${{ github.ref == 'refs/heads/main' || github.ref_type == 'tag' }}
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          cache-from: |
            type=gha,scope=${{ github.ref_name }}-${{ matrix.gpu-driver }}
            type=gha,scope=main-${{ matrix.gpu-driver }}
          cache-to: type=gha,mode=max,scope=${{ github.ref_name }}-${{ matrix.gpu-driver }}
+
+      # - name: Docker Hub Description
+      #   if: github.ref == 'refs/heads/main' || github.ref == 'refs/tags/*' && vars.DOCKERHUB_REPOSITORY != ''
+      #   uses: peter-evans/dockerhub-description@v3
+      #   with:
+      #     username: ${{ secrets.DOCKERHUB_USERNAME }}
+      #     password: ${{ secrets.DOCKERHUB_TOKEN }}
+      #     repository: ${{ vars.DOCKERHUB_REPOSITORY }}
+      #     short-description: ${{ github.event.repository.description }}
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -60,7 +60,7 @@ jobs:
            extra-index-url: 'https://download.pytorch.org/whl/cpu'
            github-env: $GITHUB_ENV
          - platform: macos-default
-            os: macOS-14
+            os: macOS-12
            github-env: $GITHUB_ENV
          - platform: windows-cpu
            os: windows-2022
--- a/docs/help/FAQ.md
+++ b/docs/help/FAQ.md
@@ -196,22 +196,6 @@ tips to reduce the problem:
    === "12GB VRAM GPU"

        This should be sufficient to generate larger images up to about 1280x1280.
-		
-## Checkpoint Models Load Slowly or Use Too Much RAM
-
-The difference between diffusers models (a folder containing multiple
-subfolders) and checkpoint models (a file ending with .safetensors or
-.ckpt) is that InvokeAI is able to load diffusers models into memory
-incrementally, while checkpoint models must be loaded all at
-once. With very large models, or systems with limited RAM, you may
-experience slowdowns and other memory-related issues when loading
-checkpoint models.
-
-To solve this, go to the Model Manager tab (the cube), select the
-checkpoint model that's giving you trouble, and press the "Convert"
-button in the upper right of your browser window. This will conver the
-checkpoint into a diffusers model, after which loading should be
-faster and less memory-intensive.

 ## Memory Leak (Linux)

--- a/invokeai/app/api/routers/model_manager.py
+++ b/invokeai/app/api/routers/model_manager.py
@@ -3,10 +3,8 @@

 import io
 import pathlib
-import shutil
 import traceback
 from copy import deepcopy
-from enum import Enum
 from tempfile import TemporaryDirectory
 from typing import List, Optional, Type

@@ -19,7 +17,6 @@ from starlette.exceptions import HTTPException
 from typing_extensions import Annotated

 from invokeai.app.api.dependencies import ApiDependencies
-from invokeai.app.services.config import get_config
 from invokeai.app.services.model_images.model_images_common import ModelImageFileNotFoundException
 from invokeai.app.services.model_install.model_install_common import ModelInstallJob
 from invokeai.app.services.model_records import (
@@ -34,7 +31,6 @@ from invokeai.backend.model_manager.config import (
    ModelFormat,
    ModelType,
 )
-from invokeai.backend.model_manager.load.model_cache.model_cache_base import CacheStats
 from invokeai.backend.model_manager.metadata.fetch.huggingface import HuggingFaceMetadataFetch
 from invokeai.backend.model_manager.metadata.metadata_base import ModelMetadataWithFiles, UnknownMetadataException
 from invokeai.backend.model_manager.search import ModelSearch
@@ -54,13 +50,6 @@ class ModelsList(BaseModel):
    model_config = ConfigDict(use_enum_values=True)


-class CacheType(str, Enum):
-    """Cache type - one of vram or ram."""
-
-    RAM = "RAM"
-    VRAM = "VRAM"
-
-
 def add_cover_image_to_model_config(config: AnyModelConfig, dependencies: Type[ApiDependencies]) -> AnyModelConfig:
    """Add a cover image URL to a model configuration."""
    cover_image = dependencies.invoker.services.model_images.get_url(config.key)
@@ -808,83 +797,3 @@ async def get_starter_models() -> list[StarterModel]:
        model.dependencies = missing_deps

    return starter_models
-
-
-@model_manager_router.get(
-    "/model_cache",
-    operation_id="get_cache_size",
-    response_model=float,
-    summary="Get maximum size of model manager RAM or VRAM cache.",
-)
-async def get_cache_size(cache_type: CacheType = Query(description="The cache type", default=CacheType.RAM)) -> float:
-    """Return the current RAM or VRAM cache size setting (in GB)."""
-    cache = ApiDependencies.invoker.services.model_manager.load.ram_cache
-    value = 0.0
-    if cache_type == CacheType.RAM:
-        value = cache.max_cache_size
-    elif cache_type == CacheType.VRAM:
-        value = cache.max_vram_cache_size
-    return value
-
-
-@model_manager_router.put(
-    "/model_cache",
-    operation_id="set_cache_size",
-    response_model=float,
-    summary="Set maximum size of model manager RAM or VRAM cache, optionally writing new value out to invokeai.yaml config file.",
-)
-async def set_cache_size(
-    value: float = Query(description="The new value for the maximum cache size"),
-    cache_type: CacheType = Query(description="The cache type", default=CacheType.RAM),
-    persist: bool = Query(description="Write new value out to invokeai.yaml", default=False),
-) -> float:
-    """Set the current RAM or VRAM cache size setting (in GB). ."""
-    cache = ApiDependencies.invoker.services.model_manager.load.ram_cache
-    app_config = get_config()
-    # Record initial state.
-    vram_old = app_config.vram
-    ram_old = app_config.ram
-
-    # Prepare target state.
-    vram_new = vram_old
-    ram_new = ram_old
-    if cache_type == CacheType.RAM:
-        ram_new = value
-    elif cache_type == CacheType.VRAM:
-        vram_new = value
-    else:
-        raise ValueError(f"Unexpected {cache_type=}.")
-
-    config_path = app_config.config_file_path
-    new_config_path = config_path.with_suffix(".yaml.new")
-
-    try:
-        # Try to apply the target state.
-        cache.max_vram_cache_size = vram_new
-        cache.max_cache_size = ram_new
-        app_config.ram = ram_new
-        app_config.vram = vram_new
-        if persist:
-            app_config.write_file(new_config_path)
-            shutil.move(new_config_path, config_path)
-    except Exception as e:
-        # If there was a failure, restore the initial state.
-        cache.max_cache_size = ram_old
-        cache.max_vram_cache_size = vram_old
-        app_config.ram = ram_old
-        app_config.vram = vram_old
-
-        raise RuntimeError("Failed to update cache size") from e
-    return value
-
-
-@model_manager_router.get(
-    "/stats",
-    operation_id="get_stats",
-    response_model=Optional[CacheStats],
-    summary="Get model manager RAM cache performance statistics.",
-)
-async def get_stats() -> Optional[CacheStats]:
-    """Return performance statistics on the model manager's RAM cache. Will return null if no models have been loaded."""
-
-    return ApiDependencies.invoker.services.model_manager.load.ram_cache.stats
--- a/invokeai/app/api/routers/session_queue.py
+++ b/invokeai/app/api/routers/session_queue.py
@@ -11,7 +11,7 @@ from invokeai.app.services.session_queue.session_queue_common import (
    Batch,
    BatchStatus,
    CancelByBatchIDsResult,
-    CancelByDestinationResult,
+    CancelByOriginResult,
    ClearResult,
    EnqueueBatchResult,
    PruneResult,
@@ -107,18 +107,16 @@ async def cancel_by_batch_ids(


@session_queue_router.put(
-    "/{queue_id}/cancel_by_destination",
-    operation_id="cancel_by_destination",
+    "/{queue_id}/cancel_by_origin",
+    operation_id="cancel_by_origin",
    responses={200: {"model": CancelByBatchIDsResult}},
 )
-async def cancel_by_destination(
+async def cancel_by_origin(
    queue_id: str = Path(description="The queue id to perform this operation on"),
-    destination: str = Query(description="The destination to cancel all queue items for"),
-) -> CancelByDestinationResult:
+    origin: str = Query(description="The origin to cancel all queue items for"),
+) -> CancelByOriginResult:
    """Immediately cancels all queue items with the given origin"""
-    return ApiDependencies.invoker.services.session_queue.cancel_by_destination(
-        queue_id=queue_id, destination=destination
-    )
+    return ApiDependencies.invoker.services.session_queue.cancel_by_origin(queue_id=queue_id, origin=origin)


@session_queue_router.put(
--- a/invokeai/app/invocations/baseinvocation.py
+++ b/invokeai/app/invocations/baseinvocation.py
@@ -20,6 +20,7 @@ from typing import (
    Type,
    TypeVar,
    Union,
+    cast,
 )

 import semver
@@ -79,7 +80,7 @@ class UIConfigBase(BaseModel):
    version: str = Field(
        description='The node\'s version. Should be a valid semver string e.g. "1.0.0" or "3.8.13".',
    )
-    node_pack: str = Field(description="The node pack that this node belongs to, will be 'invokeai' for built-in nodes")
+    node_pack: Optional[str] = Field(default=None, description="Whether or not this is a custom node")
    classification: Classification = Field(default=Classification.Stable, description="The node's classification")

    model_config = ConfigDict(
@@ -229,16 +230,18 @@ class BaseInvocation(ABC, BaseModel):
    @staticmethod
    def json_schema_extra(schema: dict[str, Any], model_class: Type[BaseInvocation]) -> None:
        """Adds various UI-facing attributes to the invocation's OpenAPI schema."""
-        if title := model_class.UIConfig.title:
-            schema["title"] = title
-        if tags := model_class.UIConfig.tags:
-            schema["tags"] = tags
-        if category := model_class.UIConfig.category:
-            schema["category"] = category
-        if node_pack := model_class.UIConfig.node_pack:
-            schema["node_pack"] = node_pack
-        schema["classification"] = model_class.UIConfig.classification
-        schema["version"] = model_class.UIConfig.version
+        uiconfig = cast(UIConfigBase | None, getattr(model_class, "UIConfig", None))
+        if uiconfig is not None:
+            if uiconfig.title is not None:
+                schema["title"] = uiconfig.title
+            if uiconfig.tags is not None:
+                schema["tags"] = uiconfig.tags
+            if uiconfig.category is not None:
+                schema["category"] = uiconfig.category
+            if uiconfig.node_pack is not None:
+                schema["node_pack"] = uiconfig.node_pack
+            schema["classification"] = uiconfig.classification
+            schema["version"] = uiconfig.version
        if "required" not in schema or not isinstance(schema["required"], list):
            schema["required"] = []
        schema["class"] = "invocation"
@@ -309,7 +312,7 @@ class BaseInvocation(ABC, BaseModel):
        json_schema_extra={"field_kind": FieldKind.NodeAttribute},
    )

-    UIConfig: ClassVar[UIConfigBase]
+    UIConfig: ClassVar[Type[UIConfigBase]]

    model_config = ConfigDict(
        protected_namespaces=(),
@@ -438,25 +441,30 @@ def invocation(
        validate_fields(cls.model_fields, invocation_type)

        # Add OpenAPI schema extras
-        uiconfig: dict[str, Any] = {}
-        uiconfig["title"] = title
-        uiconfig["tags"] = tags
-        uiconfig["category"] = category
-        uiconfig["classification"] = classification
-        # The node pack is the module name - will be "invokeai" for built-in nodes
-        uiconfig["node_pack"] = cls.__module__.split(".")[0]
+        uiconfig_name = cls.__qualname__ + ".UIConfig"
+        if not hasattr(cls, "UIConfig") or cls.UIConfig.__qualname__ != uiconfig_name:
+            cls.UIConfig = type(uiconfig_name, (UIConfigBase,), {})
+        cls.UIConfig.title = title
+        cls.UIConfig.tags = tags
+        cls.UIConfig.category = category
+        cls.UIConfig.classification = classification
+
+        # Grab the node pack's name from the module name, if it's a custom node
+        is_custom_node = cls.__module__.rsplit(".", 1)[0] == "invokeai.app.invocations"
+        if is_custom_node:
+            cls.UIConfig.node_pack = cls.__module__.split(".")[0]
+        else:
+            cls.UIConfig.node_pack = None

        if version is not None:
            try:
                semver.Version.parse(version)
            except ValueError as e:
                raise InvalidVersionError(f'Invalid version string for node "{invocation_type}": "{version}"') from e
-            uiconfig["version"] = version
+            cls.UIConfig.version = version
        else:
            logger.warn(f'No version specified for node "{invocation_type}", using "1.0.0"')
-            uiconfig["version"] = "1.0.0"
-
-        cls.UIConfig = UIConfigBase(**uiconfig)
+            cls.UIConfig.version = "1.0.0"

        if use_cache is not None:
            cls.model_fields["use_cache"].default = use_cache
--- a/invokeai/app/invocations/compel.py
+++ b/invokeai/app/invocations/compel.py
@@ -19,8 +19,7 @@ from invokeai.app.invocations.model import CLIPField
 from invokeai.app.invocations.primitives import ConditioningOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.ti_utils import generate_ti_list
-from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
-from invokeai.backend.lora.lora_patcher import LoraPatcher
+from invokeai.backend.lora import LoRAModelRaw
 from invokeai.backend.model_patcher import ModelPatcher
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
    BasicConditioningInfo,
@@ -83,10 +82,9 @@ class CompelInvocation(BaseInvocation):
            # apply all patches while the model is on the target device
            text_encoder_info.model_on_device() as (cached_weights, text_encoder),
            tokenizer_info as tokenizer,
-            LoraPatcher.apply_lora_patches(
-                model=text_encoder,
-                patches=_lora_loader(),
-                prefix="lora_te_",
+            ModelPatcher.apply_lora_text_encoder(
+                text_encoder,
+                loras=_lora_loader(),
                cached_weights=cached_weights,
            ),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
@@ -179,9 +177,9 @@ class SDXLPromptInvocationBase:
            # apply all patches while the model is on the target device
            text_encoder_info.model_on_device() as (cached_weights, text_encoder),
            tokenizer_info as tokenizer,
-            LoraPatcher.apply_lora_patches(
+            ModelPatcher.apply_lora(
                text_encoder,
-                patches=_lora_loader(),
+                loras=_lora_loader(),
                prefix=lora_prefix,
                cached_weights=cached_weights,
            ),
--- a/invokeai/app/invocations/denoise_latents.py
+++ b/invokeai/app/invocations/denoise_latents.py
@@ -36,8 +36,7 @@ from invokeai.app.invocations.t2i_adapter import T2IAdapterField
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.controlnet_utils import prepare_control_image
 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
-from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
-from invokeai.backend.lora.lora_patcher import LoraPatcher
+from invokeai.backend.lora import LoRAModelRaw
 from invokeai.backend.model_manager import BaseModelType, ModelVariantType
 from invokeai.backend.model_patcher import ModelPatcher
 from invokeai.backend.stable_diffusion import PipelineIntermediateState
@@ -186,7 +185,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
    )
    denoise_mask: Optional[DenoiseMaskField] = InputField(
        default=None,
-        description=FieldDescriptions.denoise_mask,
+        description=FieldDescriptions.mask,
        input=Input.Connection,
        ui_order=8,
    )
@@ -980,10 +979,9 @@ class DenoiseLatentsInvocation(BaseInvocation):
            ModelPatcher.apply_freeu(unet, self.unet.freeu_config),
            SeamlessExt.static_patch_model(unet, self.unet.seamless_axes),  # FIXME
            # Apply the LoRA after unet has been moved to its target device for faster patching.
-            LoraPatcher.apply_lora_patches(
-                model=unet,
-                patches=_lora_loader(),
-                prefix="lora_unet_",
+            ModelPatcher.apply_lora_unet(
+                unet,
+                loras=_lora_loader(),
                cached_weights=cached_weights,
            ),
        ):
--- a/invokeai/app/invocations/fields.py
+++ b/invokeai/app/invocations/fields.py
@@ -40,18 +40,14 @@ class UIType(str, Enum, metaclass=MetaEnum):

    # region Model Field Types
    MainModel = "MainModelField"
-    FluxMainModel = "FluxMainModelField"
    SDXLMainModel = "SDXLMainModelField"
    SDXLRefinerModel = "SDXLRefinerModelField"
    ONNXModel = "ONNXModelField"
    VAEModel = "VAEModelField"
-    FluxVAEModel = "FluxVAEModelField"
    LoRAModel = "LoRAModelField"
    ControlNetModel = "ControlNetModelField"
    IPAdapterModel = "IPAdapterModelField"
    T2IAdapterModel = "T2IAdapterModelField"
-    T5EncoderModel = "T5EncoderModelField"
-    CLIPEmbedModel = "CLIPEmbedModelField"
    SpandrelImageToImageModel = "SpandrelImageToImageModelField"
    # endregion

@@ -129,17 +125,13 @@ class FieldDescriptions:
    negative_cond = "Negative conditioning tensor"
    noise = "Noise tensor"
    clip = "CLIP (tokenizer, text encoder, LoRAs) and skipped layer count"
-    t5_encoder = "T5 tokenizer and text encoder"
-    clip_embed_model = "CLIP Embed loader"
    unet = "UNet (scheduler, LoRAs)"
-    transformer = "Transformer"
    vae = "VAE"
    cond = "Conditioning tensor"
    controlnet_model = "ControlNet model to load"
    vae_model = "VAE model to load"
    lora_model = "LoRA model to load"
    main_model = "Main model (UNet, VAE, CLIP) to load"
-    flux_model = "Flux model (Transformer) to load"
    sdxl_main_model = "SDXL Main model (UNet, VAE, CLIP1, CLIP2) to load"
    sdxl_refiner_model = "SDXL Refiner Main Modde (UNet, VAE, CLIP2) to load"
    onnx_main_model = "ONNX Main model (UNet, VAE, CLIP) to load"
@@ -181,7 +173,7 @@ class FieldDescriptions:
    )
    num_1 = "The first number"
    num_2 = "The second number"
-    denoise_mask = "A mask of the region to apply the denoising process to."
+    mask = "The mask to use for the operation"
    board = "The board to save the image to"
    image = "The image to process"
    tile_size = "Tile size"
@@ -239,12 +231,6 @@ class ColorField(BaseModel):
        return (self.r, self.g, self.b, self.a)


-class FluxConditioningField(BaseModel):
-    """A conditioning tensor primitive value"""
-
-    conditioning_name: str = Field(description="The name of conditioning tensor")
-
-
 class ConditioningField(BaseModel):
    """A conditioning tensor primitive value"""

--- a/invokeai/app/invocations/flux_denoise.py
+++ b/invokeai/app/invocations/flux_denoise.py
@@ -1,267 +0,0 @@
-from typing import Callable, Iterator, Optional, Tuple
-
-import torch
-import torchvision.transforms as tv_transforms
-from torchvision.transforms.functional import resize as tv_resize
-
-from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
-from invokeai.app.invocations.fields import (
-    DenoiseMaskField,
-    FieldDescriptions,
-    FluxConditioningField,
-    Input,
-    InputField,
-    LatentsField,
-    WithBoard,
-    WithMetadata,
-)
-from invokeai.app.invocations.model import TransformerField
-from invokeai.app.invocations.primitives import LatentsOutput
-from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.flux.denoise import denoise
-from invokeai.backend.flux.inpaint_extension import InpaintExtension
-from invokeai.backend.flux.model import Flux
-from invokeai.backend.flux.sampling_utils import (
-    clip_timestep_schedule,
-    generate_img_ids,
-    get_noise,
-    get_schedule,
-    pack,
-    unpack,
-)
-from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
-from invokeai.backend.lora.lora_patcher import LoraPatcher
-from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState
-from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo
-from invokeai.backend.util.devices import TorchDevice
-
-
-@invocation(
-    "flux_denoise",
-    title="FLUX Denoise",
-    tags=["image", "flux"],
-    category="image",
-    version="1.0.0",
-    classification=Classification.Prototype,
-)
-class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
-    """Run denoising process with a FLUX transformer model."""
-
-    # If latents is provided, this means we are doing image-to-image.
-    latents: Optional[LatentsField] = InputField(
-        default=None,
-        description=FieldDescriptions.latents,
-        input=Input.Connection,
-    )
-    # denoise_mask is used for image-to-image inpainting. Only the masked region is modified.
-    denoise_mask: Optional[DenoiseMaskField] = InputField(
-        default=None,
-        description=FieldDescriptions.denoise_mask,
-        input=Input.Connection,
-    )
-    denoising_start: float = InputField(
-        default=0.0,
-        ge=0,
-        le=1,
-        description=FieldDescriptions.denoising_start,
-    )
-    denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
-    transformer: TransformerField = InputField(
-        description=FieldDescriptions.flux_model,
-        input=Input.Connection,
-        title="Transformer",
-    )
-    positive_text_conditioning: FluxConditioningField = InputField(
-        description=FieldDescriptions.positive_cond, input=Input.Connection
-    )
-    width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.")
-    height: int = InputField(default=1024, multiple_of=16, description="Height of the generated image.")
-    num_steps: int = InputField(
-        default=4, description="Number of diffusion steps. Recommended values are schnell: 4, dev: 50."
-    )
-    guidance: float = InputField(
-        default=4.0,
-        description="The guidance strength. Higher values adhere more strictly to the prompt, and will produce less diverse images. FLUX dev only, ignored for schnell.",
-    )
-    seed: int = InputField(default=0, description="Randomness seed for reproducibility.")
-
-    @torch.no_grad()
-    def invoke(self, context: InvocationContext) -> LatentsOutput:
-        latents = self._run_diffusion(context)
-        latents = latents.detach().to("cpu")
-
-        name = context.tensors.save(tensor=latents)
-        return LatentsOutput.build(latents_name=name, latents=latents, seed=None)
-
-    def _run_diffusion(
-        self,
-        context: InvocationContext,
-    ):
-        inference_dtype = torch.bfloat16
-
-        # Load the conditioning data.
-        cond_data = context.conditioning.load(self.positive_text_conditioning.conditioning_name)
-        assert len(cond_data.conditionings) == 1
-        flux_conditioning = cond_data.conditionings[0]
-        assert isinstance(flux_conditioning, FLUXConditioningInfo)
-        flux_conditioning = flux_conditioning.to(dtype=inference_dtype)
-        t5_embeddings = flux_conditioning.t5_embeds
-        clip_embeddings = flux_conditioning.clip_embeds
-
-        # Load the input latents, if provided.
-        init_latents = context.tensors.load(self.latents.latents_name) if self.latents else None
-        if init_latents is not None:
-            init_latents = init_latents.to(device=TorchDevice.choose_torch_device(), dtype=inference_dtype)
-
-        # Prepare input noise.
-        noise = get_noise(
-            num_samples=1,
-            height=self.height,
-            width=self.width,
-            device=TorchDevice.choose_torch_device(),
-            dtype=inference_dtype,
-            seed=self.seed,
-        )
-
-        transformer_info = context.models.load(self.transformer.transformer)
-        is_schnell = "schnell" in transformer_info.config.config_path
-
-        # Calculate the timestep schedule.
-        image_seq_len = noise.shape[-1] * noise.shape[-2] // 4
-        timesteps = get_schedule(
-            num_steps=self.num_steps,
-            image_seq_len=image_seq_len,
-            shift=not is_schnell,
-        )
-
-        # Clip the timesteps schedule based on denoising_start and denoising_end.
-        timesteps = clip_timestep_schedule(timesteps, self.denoising_start, self.denoising_end)
-
-        # Prepare input latent image.
-        if init_latents is not None:
-            # If init_latents is provided, we are doing image-to-image.
-
-            if is_schnell:
-                context.logger.warning(
-                    "Running image-to-image with a FLUX schnell model. This is not recommended. The results are likely "
-                    "to be poor. Consider using a FLUX dev model instead."
-                )
-
-            # Noise the orig_latents by the appropriate amount for the first timestep.
-            t_0 = timesteps[0]
-            x = t_0 * noise + (1.0 - t_0) * init_latents
-        else:
-            # init_latents are not provided, so we are not doing image-to-image (i.e. we are starting from pure noise).
-            if self.denoising_start > 1e-5:
-                raise ValueError("denoising_start should be 0 when initial latents are not provided.")
-
-            x = noise
-
-        # If len(timesteps) == 1, then short-circuit. We are just noising the input latents, but not taking any
-        # denoising steps.
-        if len(timesteps) <= 1:
-            return x
-
-        inpaint_mask = self._prep_inpaint_mask(context, x)
-
-        b, _c, h, w = x.shape
-        img_ids = generate_img_ids(h=h, w=w, batch_size=b, device=x.device, dtype=x.dtype)
-
-        bs, t5_seq_len, _ = t5_embeddings.shape
-        txt_ids = torch.zeros(bs, t5_seq_len, 3, dtype=inference_dtype, device=TorchDevice.choose_torch_device())
-
-        # Pack all latent tensors.
-        init_latents = pack(init_latents) if init_latents is not None else None
-        inpaint_mask = pack(inpaint_mask) if inpaint_mask is not None else None
-        noise = pack(noise)
-        x = pack(x)
-
-        # Now that we have 'packed' the latent tensors, verify that we calculated the image_seq_len correctly.
-        assert image_seq_len == x.shape[1]
-
-        # Prepare inpaint extension.
-        inpaint_extension: InpaintExtension | None = None
-        if inpaint_mask is not None:
-            assert init_latents is not None
-            inpaint_extension = InpaintExtension(
-                init_latents=init_latents,
-                inpaint_mask=inpaint_mask,
-                noise=noise,
-            )
-
-        with (
-            transformer_info.model_on_device() as (cached_weights, transformer),
-            # Apply the LoRA after transformer has been moved to its target device for faster patching.
-            LoraPatcher.apply_lora_patches(
-                model=transformer,
-                patches=self._lora_iterator(context),
-                prefix="",
-                cached_weights=cached_weights,
-            ),
-        ):
-            assert isinstance(transformer, Flux)
-
-            x = denoise(
-                model=transformer,
-                img=x,
-                img_ids=img_ids,
-                txt=t5_embeddings,
-                txt_ids=txt_ids,
-                vec=clip_embeddings,
-                timesteps=timesteps,
-                step_callback=self._build_step_callback(context),
-                guidance=self.guidance,
-                inpaint_extension=inpaint_extension,
-            )
-
-        x = unpack(x.float(), self.height, self.width)
-        return x
-
-    def _prep_inpaint_mask(self, context: InvocationContext, latents: torch.Tensor) -> torch.Tensor | None:
-        """Prepare the inpaint mask.
-
-        - Loads the mask
-        - Resizes if necessary
-        - Casts to same device/dtype as latents
-        - Expands mask to the same shape as latents so that they line up after 'packing'
-
-        Args:
-            context (InvocationContext): The invocation context, for loading the inpaint mask.
-            latents (torch.Tensor): A latent image tensor. In 'unpacked' format. Used to determine the target shape,
-                device, and dtype for the inpaint mask.
-
-        Returns:
-            torch.Tensor | None: Inpaint mask.
-        """
-        if self.denoise_mask is None:
-            return None
-
-        mask = context.tensors.load(self.denoise_mask.mask_name)
-
-        _, _, latent_height, latent_width = latents.shape
-        mask = tv_resize(
-            img=mask,
-            size=[latent_height, latent_width],
-            interpolation=tv_transforms.InterpolationMode.BILINEAR,
-            antialias=False,
-        )
-
-        mask = mask.to(device=latents.device, dtype=latents.dtype)
-
-        # Expand the inpaint mask to the same shape as `latents` so that when we 'pack' `mask` it lines up with
-        # `latents`.
-        return mask.expand_as(latents)
-
-    def _lora_iterator(self, context: InvocationContext) -> Iterator[Tuple[LoRAModelRaw, float]]:
-        for lora in self.transformer.loras:
-            lora_info = context.models.load(lora.lora)
-            assert isinstance(lora_info.model, LoRAModelRaw)
-            yield (lora_info.model, lora.weight)
-            del lora_info
-
-    def _build_step_callback(self, context: InvocationContext) -> Callable[[PipelineIntermediateState], None]:
-        def step_callback(state: PipelineIntermediateState) -> None:
-            state.latents = unpack(state.latents.float(), self.height, self.width).squeeze()
-            context.util.flux_step_callback(state)
-
-        return step_callback
--- a/invokeai/app/invocations/flux_lora_loader.py
+++ b/invokeai/app/invocations/flux_lora_loader.py
@@ -1,53 +0,0 @@
-from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
-from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType
-from invokeai.app.invocations.model import LoRAField, ModelIdentifierField, TransformerField
-from invokeai.app.services.shared.invocation_context import InvocationContext
-
-
-@invocation_output("flux_lora_loader_output")
-class FluxLoRALoaderOutput(BaseInvocationOutput):
-    """FLUX LoRA Loader Output"""
-
-    transformer: TransformerField = OutputField(
-        default=None, description=FieldDescriptions.transformer, title="FLUX Transformer"
-    )
-
-
-@invocation(
-    "flux_lora_loader",
-    title="FLUX LoRA",
-    tags=["lora", "model", "flux"],
-    category="model",
-    version="1.0.0",
-)
-class FluxLoRALoaderInvocation(BaseInvocation):
-    """Apply a LoRA model to a FLUX transformer."""
-
-    lora: ModelIdentifierField = InputField(
-        description=FieldDescriptions.lora_model, title="LoRA", ui_type=UIType.LoRAModel
-    )
-    weight: float = InputField(default=0.75, description=FieldDescriptions.lora_weight)
-    transformer: TransformerField = InputField(
-        description=FieldDescriptions.transformer,
-        input=Input.Connection,
-        title="FLUX Transformer",
-    )
-
-    def invoke(self, context: InvocationContext) -> FluxLoRALoaderOutput:
-        lora_key = self.lora.key
-
-        if not context.models.exists(lora_key):
-            raise ValueError(f"Unknown lora: {lora_key}!")
-
-        if any(lora.lora.key == lora_key for lora in self.transformer.loras):
-            raise Exception(f'LoRA "{lora_key}" already applied to transformer.')
-
-        transformer = self.transformer.model_copy(deep=True)
-        transformer.loras.append(
-            LoRAField(
-                lora=self.lora,
-                weight=self.weight,
-            )
-        )
-
-        return FluxLoRALoaderOutput(transformer=transformer)
--- a/invokeai/app/invocations/flux_text_encoder.py
+++ b/invokeai/app/invocations/flux_text_encoder.py
@@ -1,92 +0,0 @@
-from typing import Literal
-
-import torch
-from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer
-
-from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
-from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField
-from invokeai.app.invocations.model import CLIPField, T5EncoderField
-from invokeai.app.invocations.primitives import FluxConditioningOutput
-from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.flux.modules.conditioner import HFEncoder
-from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, FLUXConditioningInfo
-
-
-@invocation(
-    "flux_text_encoder",
-    title="FLUX Text Encoding",
-    tags=["prompt", "conditioning", "flux"],
-    category="conditioning",
-    version="1.0.0",
-    classification=Classification.Prototype,
-)
-class FluxTextEncoderInvocation(BaseInvocation):
-    """Encodes and preps a prompt for a flux image."""
-
-    clip: CLIPField = InputField(
-        title="CLIP",
-        description=FieldDescriptions.clip,
-        input=Input.Connection,
-    )
-    t5_encoder: T5EncoderField = InputField(
-        title="T5Encoder",
-        description=FieldDescriptions.t5_encoder,
-        input=Input.Connection,
-    )
-    t5_max_seq_len: Literal[256, 512] = InputField(
-        description="Max sequence length for the T5 encoder. Expected to be 256 for FLUX schnell models and 512 for FLUX dev models."
-    )
-    prompt: str = InputField(description="Text prompt to encode.")
-
-    @torch.no_grad()
-    def invoke(self, context: InvocationContext) -> FluxConditioningOutput:
-        # Note: The T5 and CLIP encoding are done in separate functions to ensure that all model references are locally
-        # scoped. This ensures that the T5 model can be freed and gc'd before loading the CLIP model (if necessary).
-        t5_embeddings = self._t5_encode(context)
-        clip_embeddings = self._clip_encode(context)
-        conditioning_data = ConditioningFieldData(
-            conditionings=[FLUXConditioningInfo(clip_embeds=clip_embeddings, t5_embeds=t5_embeddings)]
-        )
-
-        conditioning_name = context.conditioning.save(conditioning_data)
-        return FluxConditioningOutput.build(conditioning_name)
-
-    def _t5_encode(self, context: InvocationContext) -> torch.Tensor:
-        t5_tokenizer_info = context.models.load(self.t5_encoder.tokenizer)
-        t5_text_encoder_info = context.models.load(self.t5_encoder.text_encoder)
-
-        prompt = [self.prompt]
-
-        with (
-            t5_text_encoder_info as t5_text_encoder,
-            t5_tokenizer_info as t5_tokenizer,
-        ):
-            assert isinstance(t5_text_encoder, T5EncoderModel)
-            assert isinstance(t5_tokenizer, T5Tokenizer)
-
-            t5_encoder = HFEncoder(t5_text_encoder, t5_tokenizer, False, self.t5_max_seq_len)
-
-            prompt_embeds = t5_encoder(prompt)
-
-        assert isinstance(prompt_embeds, torch.Tensor)
-        return prompt_embeds
-
-    def _clip_encode(self, context: InvocationContext) -> torch.Tensor:
-        clip_tokenizer_info = context.models.load(self.clip.tokenizer)
-        clip_text_encoder_info = context.models.load(self.clip.text_encoder)
-
-        prompt = [self.prompt]
-
-        with (
-            clip_text_encoder_info as clip_text_encoder,
-            clip_tokenizer_info as clip_tokenizer,
-        ):
-            assert isinstance(clip_text_encoder, CLIPTextModel)
-            assert isinstance(clip_tokenizer, CLIPTokenizer)
-
-            clip_encoder = HFEncoder(clip_text_encoder, clip_tokenizer, True, 77)
-
-            pooled_prompt_embeds = clip_encoder(prompt)
-
-        assert isinstance(pooled_prompt_embeds, torch.Tensor)
-        return pooled_prompt_embeds
--- a/invokeai/app/invocations/flux_vae_decode.py
+++ b/invokeai/app/invocations/flux_vae_decode.py
@@ -1,60 +0,0 @@
-import torch
-from einops import rearrange
-from PIL import Image
-
-from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
-from invokeai.app.invocations.fields import (
-    FieldDescriptions,
-    Input,
-    InputField,
-    LatentsField,
-    WithBoard,
-    WithMetadata,
-)
-from invokeai.app.invocations.model import VAEField
-from invokeai.app.invocations.primitives import ImageOutput
-from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.flux.modules.autoencoder import AutoEncoder
-from invokeai.backend.model_manager.load.load_base import LoadedModel
-from invokeai.backend.util.devices import TorchDevice
-
-
-@invocation(
-    "flux_vae_decode",
-    title="FLUX Latents to Image",
-    tags=["latents", "image", "vae", "l2i", "flux"],
-    category="latents",
-    version="1.0.0",
-)
-class FluxVaeDecodeInvocation(BaseInvocation, WithMetadata, WithBoard):
-    """Generates an image from latents."""
-
-    latents: LatentsField = InputField(
-        description=FieldDescriptions.latents,
-        input=Input.Connection,
-    )
-    vae: VAEField = InputField(
-        description=FieldDescriptions.vae,
-        input=Input.Connection,
-    )
-
-    def _vae_decode(self, vae_info: LoadedModel, latents: torch.Tensor) -> Image.Image:
-        with vae_info as vae:
-            assert isinstance(vae, AutoEncoder)
-            latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=TorchDevice.choose_torch_dtype())
-            img = vae.decode(latents)
-
-        img = img.clamp(-1, 1)
-        img = rearrange(img[0], "c h w -> h w c")  # noqa: F821
-        img_pil = Image.fromarray((127.5 * (img + 1.0)).byte().cpu().numpy())
-        return img_pil
-
-    @torch.no_grad()
-    def invoke(self, context: InvocationContext) -> ImageOutput:
-        latents = context.tensors.load(self.latents.latents_name)
-        vae_info = context.models.load(self.vae.vae)
-        image = self._vae_decode(vae_info=vae_info, latents=latents)
-
-        TorchDevice.empty_cache()
-        image_dto = context.images.save(image=image)
-        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/flux_vae_encode.py
+++ b/invokeai/app/invocations/flux_vae_encode.py
@@ -1,67 +0,0 @@
-import einops
-import torch
-
-from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
-from invokeai.app.invocations.fields import (
-    FieldDescriptions,
-    ImageField,
-    Input,
-    InputField,
-)
-from invokeai.app.invocations.model import VAEField
-from invokeai.app.invocations.primitives import LatentsOutput
-from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.flux.modules.autoencoder import AutoEncoder
-from invokeai.backend.model_manager import LoadedModel
-from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor
-from invokeai.backend.util.devices import TorchDevice
-
-
-@invocation(
-    "flux_vae_encode",
-    title="FLUX Image to Latents",
-    tags=["latents", "image", "vae", "i2l", "flux"],
-    category="latents",
-    version="1.0.0",
-)
-class FluxVaeEncodeInvocation(BaseInvocation):
-    """Encodes an image into latents."""
-
-    image: ImageField = InputField(
-        description="The image to encode.",
-    )
-    vae: VAEField = InputField(
-        description=FieldDescriptions.vae,
-        input=Input.Connection,
-    )
-
-    @staticmethod
-    def vae_encode(vae_info: LoadedModel, image_tensor: torch.Tensor) -> torch.Tensor:
-        # TODO(ryand): Expose seed parameter at the invocation level.
-        # TODO(ryand): Write a util function for generating random tensors that is consistent across devices / dtypes.
-        # There's a starting point in get_noise(...), but it needs to be extracted and generalized. This function
-        # should be used for VAE encode sampling.
-        generator = torch.Generator(device=TorchDevice.choose_torch_device()).manual_seed(0)
-        with vae_info as vae:
-            assert isinstance(vae, AutoEncoder)
-            image_tensor = image_tensor.to(
-                device=TorchDevice.choose_torch_device(), dtype=TorchDevice.choose_torch_dtype()
-            )
-            latents = vae.encode(image_tensor, sample=True, generator=generator)
-            return latents
-
-    @torch.no_grad()
-    def invoke(self, context: InvocationContext) -> LatentsOutput:
-        image = context.images.get_pil(self.image.image_name)
-
-        vae_info = context.models.load(self.vae.vae)
-
-        image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
-        if image_tensor.dim() == 3:
-            image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w")
-
-        latents = self.vae_encode(vae_info=vae_info, image_tensor=image_tensor)
-
-        latents = latents.to("cpu")
-        name = context.tensors.save(tensor=latents)
-        return LatentsOutput.build(latents_name=name, latents=latents, seed=None)
--- a/invokeai/app/invocations/mask.py
+++ b/invokeai/app/invocations/mask.py
@@ -126,7 +126,7 @@ class ImageMaskToTensorInvocation(BaseInvocation, WithMetadata):
    title="Tensor Mask to Image",
    tags=["mask"],
    category="mask",
-    version="1.1.0",
+    version="1.0.0",
 )
 class MaskTensorToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
    """Convert a mask tensor to an image."""
@@ -135,11 +135,6 @@ class MaskTensorToImageInvocation(BaseInvocation, WithMetadata, WithBoard):

    def invoke(self, context: InvocationContext) -> ImageOutput:
        mask = context.tensors.load(self.mask.tensor_name)
-
-        # Squeeze the channel dimension if it exists.
-        if mask.dim() == 3:
-            mask = mask.squeeze(0)
-
        # Ensure that the mask is binary.
        if mask.dtype != torch.bool:
            mask = mask > 0.5
--- a/invokeai/app/invocations/model.py
+++ b/invokeai/app/invocations/model.py
@@ -1,5 +1,5 @@
 import copy
-from typing import List, Literal, Optional
+from typing import List, Optional

 from pydantic import BaseModel, Field

@@ -13,14 +13,7 @@ from invokeai.app.invocations.baseinvocation import (
 from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.shared.models import FreeUConfig
-from invokeai.backend.flux.util import max_seq_lengths
-from invokeai.backend.model_manager.config import (
-    AnyModelConfig,
-    BaseModelType,
-    CheckpointConfigBase,
-    ModelType,
-    SubModelType,
-)
+from invokeai.backend.model_manager.config import AnyModelConfig, BaseModelType, ModelType, SubModelType


 class ModelIdentifierField(BaseModel):
@@ -67,16 +60,6 @@ class CLIPField(BaseModel):
    loras: List[LoRAField] = Field(description="LoRAs to apply on model loading")


-class TransformerField(BaseModel):
-    transformer: ModelIdentifierField = Field(description="Info to load Transformer submodel")
-    loras: List[LoRAField] = Field(description="LoRAs to apply on model loading")
-
-
-class T5EncoderField(BaseModel):
-    tokenizer: ModelIdentifierField = Field(description="Info to load tokenizer submodel")
-    text_encoder: ModelIdentifierField = Field(description="Info to load text_encoder submodel")
-
-
 class VAEField(BaseModel):
    vae: ModelIdentifierField = Field(description="Info to load vae submodel")
    seamless_axes: List[str] = Field(default_factory=list, description='Axes("x" and "y") to which apply seamless')
@@ -139,78 +122,6 @@ class ModelIdentifierInvocation(BaseInvocation):
        return ModelIdentifierOutput(model=self.model)


-@invocation_output("flux_model_loader_output")
-class FluxModelLoaderOutput(BaseInvocationOutput):
-    """Flux base model loader output"""
-
-    transformer: TransformerField = OutputField(description=FieldDescriptions.transformer, title="Transformer")
-    clip: CLIPField = OutputField(description=FieldDescriptions.clip, title="CLIP")
-    t5_encoder: T5EncoderField = OutputField(description=FieldDescriptions.t5_encoder, title="T5 Encoder")
-    vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE")
-    max_seq_len: Literal[256, 512] = OutputField(
-        description="The max sequence length to used for the T5 encoder. (256 for schnell transformer, 512 for dev transformer)",
-        title="Max Seq Length",
-    )
-
-
-@invocation(
-    "flux_model_loader",
-    title="Flux Main Model",
-    tags=["model", "flux"],
-    category="model",
-    version="1.0.4",
-    classification=Classification.Prototype,
-)
-class FluxModelLoaderInvocation(BaseInvocation):
-    """Loads a flux base model, outputting its submodels."""
-
-    model: ModelIdentifierField = InputField(
-        description=FieldDescriptions.flux_model,
-        ui_type=UIType.FluxMainModel,
-        input=Input.Direct,
-    )
-
-    t5_encoder_model: ModelIdentifierField = InputField(
-        description=FieldDescriptions.t5_encoder, ui_type=UIType.T5EncoderModel, input=Input.Direct, title="T5 Encoder"
-    )
-
-    clip_embed_model: ModelIdentifierField = InputField(
-        description=FieldDescriptions.clip_embed_model,
-        ui_type=UIType.CLIPEmbedModel,
-        input=Input.Direct,
-        title="CLIP Embed",
-    )
-
-    vae_model: ModelIdentifierField = InputField(
-        description=FieldDescriptions.vae_model, ui_type=UIType.FluxVAEModel, title="VAE"
-    )
-
-    def invoke(self, context: InvocationContext) -> FluxModelLoaderOutput:
-        for key in [self.model.key, self.t5_encoder_model.key, self.clip_embed_model.key, self.vae_model.key]:
-            if not context.models.exists(key):
-                raise ValueError(f"Unknown model: {key}")
-
-        transformer = self.model.model_copy(update={"submodel_type": SubModelType.Transformer})
-        vae = self.vae_model.model_copy(update={"submodel_type": SubModelType.VAE})
-
-        tokenizer = self.clip_embed_model.model_copy(update={"submodel_type": SubModelType.Tokenizer})
-        clip_encoder = self.clip_embed_model.model_copy(update={"submodel_type": SubModelType.TextEncoder})
-
-        tokenizer2 = self.t5_encoder_model.model_copy(update={"submodel_type": SubModelType.Tokenizer2})
-        t5_encoder = self.t5_encoder_model.model_copy(update={"submodel_type": SubModelType.TextEncoder2})
-
-        transformer_config = context.models.get_config(transformer)
-        assert isinstance(transformer_config, CheckpointConfigBase)
-
-        return FluxModelLoaderOutput(
-            transformer=TransformerField(transformer=transformer, loras=[]),
-            clip=CLIPField(tokenizer=tokenizer, text_encoder=clip_encoder, loras=[], skipped_layers=0),
-            t5_encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder),
-            vae=VAEField(vae=vae),
-            max_seq_len=max_seq_lengths[transformer_config.config_path],
-        )
-
-
@invocation(
    "main_model_loader",
    title="Main Model",
--- a/invokeai/app/invocations/primitives.py
+++ b/invokeai/app/invocations/primitives.py
@@ -12,7 +12,6 @@ from invokeai.app.invocations.fields import (
    ConditioningField,
    DenoiseMaskField,
    FieldDescriptions,
-    FluxConditioningField,
    ImageField,
    Input,
    InputField,
@@ -415,17 +414,6 @@ class MaskOutput(BaseInvocationOutput):
    height: int = OutputField(description="The height of the mask in pixels.")


-@invocation_output("flux_conditioning_output")
-class FluxConditioningOutput(BaseInvocationOutput):
-    """Base class for nodes that output a single conditioning tensor"""
-
-    conditioning: FluxConditioningField = OutputField(description=FieldDescriptions.cond)
-
-    @classmethod
-    def build(cls, conditioning_name: str) -> "FluxConditioningOutput":
-        return cls(conditioning=FluxConditioningField(conditioning_name=conditioning_name))
-
-
@invocation_output("conditioning_output")
 class ConditioningOutput(BaseInvocationOutput):
    """Base class for nodes that output a single conditioning tensor"""
--- a/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py
+++ b/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py
@@ -22,8 +22,8 @@ from invokeai.app.invocations.fields import (
 from invokeai.app.invocations.model import UNetField
 from invokeai.app.invocations.primitives import LatentsOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
-from invokeai.backend.lora.lora_patcher import LoraPatcher
+from invokeai.backend.lora import LoRAModelRaw
+from invokeai.backend.model_patcher import ModelPatcher
 from invokeai.backend.stable_diffusion.diffusers_pipeline import ControlNetData, PipelineIntermediateState
 from invokeai.backend.stable_diffusion.multi_diffusion_pipeline import (
    MultiDiffusionPipeline,
@@ -204,11 +204,7 @@ class TiledMultiDiffusionDenoiseLatents(BaseInvocation):
        # Load the UNet model.
        unet_info = context.models.load(self.unet.unet)

-        with (
-            ExitStack() as exit_stack,
-            unet_info as unet,
-            LoraPatcher.apply_lora_patches(model=unet, patches=_lora_loader(), prefix="lora_unet_"),
-        ):
+        with ExitStack() as exit_stack, unet_info as unet, ModelPatcher.apply_lora_unet(unet, _lora_loader()):
            assert isinstance(unet, UNet2DConditionModel)
            latents = latents.to(device=unet.device, dtype=unet.dtype)
            if noise is not None:
--- a/invokeai/app/services/events/events_common.py
+++ b/invokeai/app/services/events/events_common.py
@@ -88,8 +88,7 @@ class QueueItemEventBase(QueueEventBase):

    item_id: int = Field(description="The ID of the queue item")
    batch_id: str = Field(description="The ID of the queue batch")
-    origin: str | None = Field(default=None, description="The origin of the queue item")
-    destination: str | None = Field(default=None, description="The destination of the queue item")
+    origin: str | None = Field(default=None, description="The origin of the batch")


 class InvocationEventBase(QueueItemEventBase):
@@ -115,7 +114,6 @@ class InvocationStartedEvent(InvocationEventBase):
            item_id=queue_item.item_id,
            batch_id=queue_item.batch_id,
            origin=queue_item.origin,
-            destination=queue_item.destination,
            session_id=queue_item.session_id,
            invocation=invocation,
            invocation_source_id=queue_item.session.prepared_source_mapping[invocation.id],
@@ -150,7 +148,6 @@ class InvocationDenoiseProgressEvent(InvocationEventBase):
            item_id=queue_item.item_id,
            batch_id=queue_item.batch_id,
            origin=queue_item.origin,
-            destination=queue_item.destination,
            session_id=queue_item.session_id,
            invocation=invocation,
            invocation_source_id=queue_item.session.prepared_source_mapping[invocation.id],
@@ -189,7 +186,6 @@ class InvocationCompleteEvent(InvocationEventBase):
            item_id=queue_item.item_id,
            batch_id=queue_item.batch_id,
            origin=queue_item.origin,
-            destination=queue_item.destination,
            session_id=queue_item.session_id,
            invocation=invocation,
            invocation_source_id=queue_item.session.prepared_source_mapping[invocation.id],
@@ -223,7 +219,6 @@ class InvocationErrorEvent(InvocationEventBase):
            item_id=queue_item.item_id,
            batch_id=queue_item.batch_id,
            origin=queue_item.origin,
-            destination=queue_item.destination,
            session_id=queue_item.session_id,
            invocation=invocation,
            invocation_source_id=queue_item.session.prepared_source_mapping[invocation.id],
@@ -262,7 +257,6 @@ class QueueItemStatusChangedEvent(QueueItemEventBase):
            item_id=queue_item.item_id,
            batch_id=queue_item.batch_id,
            origin=queue_item.origin,
-            destination=queue_item.destination,
            session_id=queue_item.session_id,
            status=queue_item.status,
            error_type=queue_item.error_type,
--- a/invokeai/app/services/model_install/model_install_common.py
+++ b/invokeai/app/services/model_install/model_install_common.py
@@ -103,7 +103,7 @@ class HFModelSource(StringLikeSource):
        if self.variant:
            base += f":{self.variant or ''}"
        if self.subfolder:
-            base += f"::{self.subfolder.as_posix()}"
+            base += f":{self.subfolder}"
        return base


--- a/invokeai/app/services/model_install/model_install_default.py
+++ b/invokeai/app/services/model_install/model_install_default.py
@@ -783,9 +783,8 @@ class ModelInstallService(ModelInstallServiceBase):
        # So what we do is to synthesize a folder named "sdxl-turbo_vae" here.
        if subfolder:
            top = Path(remote_files[0].path.parts[0])  # e.g. "sdxl-turbo/"
-            path_to_remove = top / subfolder  # sdxl-turbo/vae/
-            subfolder_rename = subfolder.name.replace("/", "_").replace("\\", "_")
-            path_to_add = Path(f"{top}_{subfolder_rename}")
+            path_to_remove = top / subfolder.parts[-1]  # sdxl-turbo/vae/
+            path_to_add = Path(f"{top}_{subfolder}")
        else:
            path_to_remove = Path(".")
            path_to_add = Path(".")
--- a/invokeai/app/services/model_records/model_records_base.py
+++ b/invokeai/app/services/model_records/model_records_base.py
@@ -77,7 +77,6 @@ class ModelRecordChanges(BaseModelExcludeNull):
    type: Optional[ModelType] = Field(description="Type of model", default=None)
    key: Optional[str] = Field(description="Database ID for this model", default=None)
    hash: Optional[str] = Field(description="hash of model file", default=None)
-    format: Optional[str] = Field(description="format of model file", default=None)
    trigger_phrases: Optional[set[str]] = Field(description="Set of trigger phrases for this model", default=None)
    default_settings: Optional[MainModelDefaultSettings | ControlAdapterDefaultSettings] = Field(
        description="Default settings for this model", default=None
--- a/invokeai/app/services/session_queue/session_queue_base.py
+++ b/invokeai/app/services/session_queue/session_queue_base.py
@@ -6,7 +6,7 @@ from invokeai.app.services.session_queue.session_queue_common import (
    Batch,
    BatchStatus,
    CancelByBatchIDsResult,
-    CancelByDestinationResult,
+    CancelByOriginResult,
    CancelByQueueIDResult,
    ClearResult,
    EnqueueBatchResult,
@@ -97,8 +97,8 @@ class SessionQueueBase(ABC):
        pass

    @abstractmethod
-    def cancel_by_destination(self, queue_id: str, destination: str) -> CancelByDestinationResult:
-        """Cancels all queue items with the given batch destination"""
+    def cancel_by_origin(self, queue_id: str, origin: str) -> CancelByOriginResult:
+        """Cancels all queue items with the given batch origin"""
        pass

    @abstractmethod
--- a/invokeai/app/services/session_queue/session_queue_common.py
+++ b/invokeai/app/services/session_queue/session_queue_common.py
@@ -77,14 +77,7 @@ BatchDataCollection: TypeAlias = list[list[BatchDatum]]

 class Batch(BaseModel):
    batch_id: str = Field(default_factory=uuid_string, description="The ID of the batch")
-    origin: str | None = Field(
-        default=None,
-        description="The origin of this queue item. This data is used by the frontend to determine how to handle results.",
-    )
-    destination: str | None = Field(
-        default=None,
-        description="The origin of this queue item. This data is used by the frontend to determine how to handle results",
-    )
+    origin: str | None = Field(default=None, description="The origin of this batch.")
    data: Optional[BatchDataCollection] = Field(default=None, description="The batch data collection.")
    graph: Graph = Field(description="The graph to initialize the session with")
    workflow: Optional[WorkflowWithoutID] = Field(
@@ -203,14 +196,7 @@ class SessionQueueItemWithoutGraph(BaseModel):
    status: QUEUE_ITEM_STATUS = Field(default="pending", description="The status of this queue item")
    priority: int = Field(default=0, description="The priority of this queue item")
    batch_id: str = Field(description="The ID of the batch associated with this queue item")
-    origin: str | None = Field(
-        default=None,
-        description="The origin of this queue item. This data is used by the frontend to determine how to handle results.",
-    )
-    destination: str | None = Field(
-        default=None,
-        description="The origin of this queue item. This data is used by the frontend to determine how to handle results",
-    )
+    origin: str | None = Field(default=None, description="The origin of this queue item. ")
    session_id: str = Field(
        description="The ID of the session associated with this queue item. The session doesn't exist in graph_executions until the queue item is executed."
    )
@@ -311,7 +297,6 @@ class BatchStatus(BaseModel):
    queue_id: str = Field(..., description="The ID of the queue")
    batch_id: str = Field(..., description="The ID of the batch")
    origin: str | None = Field(..., description="The origin of the batch")
-    destination: str | None = Field(..., description="The destination of the batch")
    pending: int = Field(..., description="Number of queue items with status 'pending'")
    in_progress: int = Field(..., description="Number of queue items with status 'in_progress'")
    completed: int = Field(..., description="Number of queue items with status 'complete'")
@@ -346,10 +331,10 @@ class CancelByBatchIDsResult(BaseModel):
    canceled: int = Field(..., description="Number of queue items canceled")


-class CancelByDestinationResult(CancelByBatchIDsResult):
-    """Result of canceling by a destination"""
+class CancelByOriginResult(BaseModel):
+    """Result of canceling by list of batch ids"""

-    pass
+    canceled: int = Field(..., description="Number of queue items canceled")


 class CancelByQueueIDResult(CancelByBatchIDsResult):
@@ -458,7 +443,6 @@ class SessionQueueValueToInsert(NamedTuple):
    priority: int  # priority
    workflow: Optional[str]  # workflow json
    origin: str | None
-    destination: str | None


 ValuesToInsert: TypeAlias = list[SessionQueueValueToInsert]
@@ -480,7 +464,6 @@ def prepare_values_to_insert(queue_id: str, batch: Batch, priority: int, max_new
                priority,  # priority
                json.dumps(workflow, default=to_jsonable_python) if workflow else None,  # workflow (json)
                batch.origin,  # origin
-                batch.destination,  # destination
            )
        )
    return values_to_insert
--- a/invokeai/app/services/session_queue/session_queue_sqlite.py
+++ b/invokeai/app/services/session_queue/session_queue_sqlite.py
@@ -10,7 +10,7 @@ from invokeai.app.services.session_queue.session_queue_common import (
    Batch,
    BatchStatus,
    CancelByBatchIDsResult,
-    CancelByDestinationResult,
+    CancelByOriginResult,
    CancelByQueueIDResult,
    ClearResult,
    EnqueueBatchResult,
@@ -128,8 +128,8 @@ class SqliteSessionQueue(SessionQueueBase):

            self.__cursor.executemany(
                """--sql
-                INSERT INTO session_queue (queue_id, session, session_id, batch_id, field_values, priority, workflow, origin, destination)
-                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+                INSERT INTO session_queue (queue_id, session, session_id, batch_id, field_values, priority, workflow, origin)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                """,
                values_to_insert,
            )
@@ -426,19 +426,19 @@ class SqliteSessionQueue(SessionQueueBase):
            self.__lock.release()
        return CancelByBatchIDsResult(canceled=count)

-    def cancel_by_destination(self, queue_id: str, destination: str) -> CancelByDestinationResult:
+    def cancel_by_origin(self, queue_id: str, origin: str) -> CancelByOriginResult:
        try:
            current_queue_item = self.get_current(queue_id)
            self.__lock.acquire()
            where = """--sql
                WHERE
                  queue_id == ?
-                  AND destination == ?
+                  AND origin == ?
                  AND status != 'canceled'
                  AND status != 'completed'
                  AND status != 'failed'
                """
-            params = (queue_id, destination)
+            params = (queue_id, origin)
            self.__cursor.execute(
                f"""--sql
                SELECT COUNT(*)
@@ -457,14 +457,14 @@ class SqliteSessionQueue(SessionQueueBase):
                params,
            )
            self.__conn.commit()
-            if current_queue_item is not None and current_queue_item.destination == destination:
+            if current_queue_item is not None and current_queue_item.origin == origin:
                self._set_queue_item_status(current_queue_item.item_id, "canceled")
        except Exception:
            self.__conn.rollback()
            raise
        finally:
            self.__lock.release()
-        return CancelByDestinationResult(canceled=count)
+        return CancelByOriginResult(canceled=count)

    def cancel_by_queue_id(self, queue_id: str) -> CancelByQueueIDResult:
        try:
@@ -579,8 +579,7 @@ class SqliteSessionQueue(SessionQueueBase):
                    session_id,
                    batch_id,
                    queue_id,
-                    origin,
-                    destination
+                    origin
                FROM session_queue
                WHERE queue_id = ?
            """
@@ -660,7 +659,7 @@ class SqliteSessionQueue(SessionQueueBase):
            self.__lock.acquire()
            self.__cursor.execute(
                """--sql
-                SELECT status, count(*), origin, destination
+                SELECT status, count(*), origin
                FROM session_queue
                WHERE
                  queue_id = ?
@@ -673,7 +672,6 @@ class SqliteSessionQueue(SessionQueueBase):
            total = sum(row[1] for row in result)
            counts: dict[str, int] = {row[0]: row[1] for row in result}
            origin = result[0]["origin"] if result else None
-            destination = result[0]["destination"] if result else None
        except Exception:
            self.__conn.rollback()
            raise
@@ -683,7 +681,6 @@ class SqliteSessionQueue(SessionQueueBase):
        return BatchStatus(
            batch_id=batch_id,
            origin=origin,
-            destination=destination,
            queue_id=queue_id,
            pending=counts.get("pending", 0),
            in_progress=counts.get("in_progress", 0),
--- a/invokeai/app/services/shared/invocation_context.py
+++ b/invokeai/app/services/shared/invocation_context.py
@@ -14,7 +14,7 @@ from invokeai.app.services.image_records.image_records_common import ImageCatego
 from invokeai.app.services.images.images_common import ImageDTO
 from invokeai.app.services.invocation_services import InvocationServices
 from invokeai.app.services.model_records.model_records_base import UnknownModelException
-from invokeai.app.util.step_callback import flux_step_callback, stable_diffusion_step_callback
+from invokeai.app.util.step_callback import stable_diffusion_step_callback
 from invokeai.backend.model_manager.config import (
    AnyModel,
    AnyModelConfig,
@@ -557,24 +557,6 @@ class UtilInterface(InvocationContextInterface):
            is_canceled=self.is_canceled,
        )

-    def flux_step_callback(self, intermediate_state: PipelineIntermediateState) -> None:
-        """
-        The step callback emits a progress event with the current step, the total number of
-        steps, a preview image, and some other internal metadata.
-
-        This should be called after each denoising step.
-
-        Args:
-            intermediate_state: The intermediate state of the diffusion pipeline.
-        """
-
-        flux_step_callback(
-            context_data=self._data,
-            intermediate_state=intermediate_state,
-            events=self._services.events,
-            is_canceled=self.is_canceled,
-        )
-

 class InvocationContext:
    """Provides access to various services and data for the current invocation.
--- a/invokeai/app/services/shared/sqlite_migrator/migrations/migration_15.py
+++ b/invokeai/app/services/shared/sqlite_migrator/migrations/migration_15.py
@@ -10,11 +10,9 @@ class Migration15Callback:
    def _add_origin_col(self, cursor: sqlite3.Cursor) -> None:
        """
        - Adds `origin` column to the session queue table.
-        - Adds `destination` column to the session queue table.
        """

        cursor.execute("ALTER TABLE session_queue ADD COLUMN origin TEXT;")
-        cursor.execute("ALTER TABLE session_queue ADD COLUMN destination TEXT;")


 def build_migration_15() -> Migration:
@@ -23,7 +21,6 @@ def build_migration_15() -> Migration:

    This migration does the following:
        - Adds `origin` column to the session queue table.
-        - Adds `destination` column to the session queue table.
    """
    migration_15 = Migration(
        from_version=14,
--- a/invokeai/app/services/workflow_records/default_workflows/FLUX
+++ b/invokeai/app/services/workflow_records/default_workflows/FLUX
@@ -1,407 +0,0 @@
-{
-  "name": "FLUX Image to Image",
-  "author": "InvokeAI",
-  "description": "A simple image-to-image workflow using a FLUX dev model. ",
-  "version": "1.0.4",
-  "contact": "",
-  "tags": "image2image, flux, image-to-image",
-  "notes": "Prerequisite model downloads: T5 Encoder, CLIP-L Encoder, and FLUX VAE. Quantized and un-quantized versions can be found in the starter models tab within your Model Manager. We recommend using FLUX dev models for image-to-image workflows. The image-to-image performance with FLUX schnell models is poor.",
-  "exposedFields": [
-    {
-      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "fieldName": "model"
-    },
-    {
-      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "fieldName": "t5_encoder_model"
-    },
-    {
-      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "fieldName": "clip_embed_model"
-    },
-    {
-      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "fieldName": "vae_model"
-    },
-    {
-      "nodeId": "ace0258f-67d7-4eee-a218-6fff27065214",
-      "fieldName": "denoising_start"
-    },
-    {
-      "nodeId": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
-      "fieldName": "prompt"
-    },
-    {
-      "nodeId": "ace0258f-67d7-4eee-a218-6fff27065214",
-      "fieldName": "num_steps"
-    }
-  ],
-  "meta": {
-    "version": "3.0.0",
-    "category": "default"
-  },
-  "nodes": [
-    {
-      "id": "2981a67c-480f-4237-9384-26b68dbf912b",
-      "type": "invocation",
-      "data": {
-        "id": "2981a67c-480f-4237-9384-26b68dbf912b",
-        "type": "flux_vae_encode",
-        "version": "1.0.0",
-        "label": "",
-        "notes": "",
-        "isOpen": true,
-        "isIntermediate": true,
-        "useCache": true,
-        "inputs": {
-          "image": {
-            "name": "image",
-            "label": "",
-            "value": {
-              "image_name": "8a5c62aa-9335-45d2-9c71-89af9fc1f8d4.png"
-            }
-          },
-          "vae": {
-            "name": "vae",
-            "label": ""
-          }
-        }
-      },
-      "position": {
-        "x": 732.7680166609682,
-        "y": -24.37398171806909
-      }
-    },
-    {
-      "id": "ace0258f-67d7-4eee-a218-6fff27065214",
-      "type": "invocation",
-      "data": {
-        "id": "ace0258f-67d7-4eee-a218-6fff27065214",
-        "type": "flux_denoise",
-        "version": "1.0.0",
-        "label": "",
-        "notes": "",
-        "isOpen": true,
-        "isIntermediate": true,
-        "useCache": true,
-        "inputs": {
-          "board": {
-            "name": "board",
-            "label": ""
-          },
-          "metadata": {
-            "name": "metadata",
-            "label": ""
-          },
-          "latents": {
-            "name": "latents",
-            "label": ""
-          },
-          "denoise_mask": {
-            "name": "denoise_mask",
-            "label": ""
-          },
-          "denoising_start": {
-            "name": "denoising_start",
-            "label": "",
-            "value": 0.04
-          },
-          "denoising_end": {
-            "name": "denoising_end",
-            "label": "",
-            "value": 1
-          },
-          "transformer": {
-            "name": "transformer",
-            "label": ""
-          },
-          "positive_text_conditioning": {
-            "name": "positive_text_conditioning",
-            "label": ""
-          },
-          "width": {
-            "name": "width",
-            "label": "",
-            "value": 1024
-          },
-          "height": {
-            "name": "height",
-            "label": "",
-            "value": 1024
-          },
-          "num_steps": {
-            "name": "num_steps",
-            "label": "Steps (Recommend 30 for Dev, 4 for Schnell)",
-            "value": 30
-          },
-          "guidance": {
-            "name": "guidance",
-            "label": "",
-            "value": 4
-          },
-          "seed": {
-            "name": "seed",
-            "label": "",
-            "value": 0
-          }
-        }
-      },
-      "position": {
-        "x": 1182.8836633018684,
-        "y": -251.38882958913183
-      }
-    },
-    {
-      "id": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
-      "type": "invocation",
-      "data": {
-        "id": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
-        "type": "flux_vae_decode",
-        "version": "1.0.0",
-        "label": "",
-        "notes": "",
-        "isOpen": true,
-        "isIntermediate": false,
-        "useCache": true,
-        "inputs": {
-          "board": {
-            "name": "board",
-            "label": ""
-          },
-          "metadata": {
-            "name": "metadata",
-            "label": ""
-          },
-          "latents": {
-            "name": "latents",
-            "label": ""
-          },
-          "vae": {
-            "name": "vae",
-            "label": ""
-          }
-        }
-      },
-      "position": {
-        "x": 1575.5797431839133,
-        "y": -209.00150975507415
-      }
-    },
-    {
-      "id": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "type": "invocation",
-      "data": {
-        "id": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-        "type": "flux_model_loader",
-        "version": "1.0.4",
-        "label": "",
-        "notes": "",
-        "isOpen": true,
-        "isIntermediate": true,
-        "useCache": false,
-        "inputs": {
-          "model": {
-            "name": "model",
-            "label": "Model (dev variant recommended for Image-to-Image)"
-          },
-          "t5_encoder_model": {
-            "name": "t5_encoder_model",
-            "label": ""
-          },
-          "clip_embed_model": {
-            "name": "clip_embed_model",
-            "label": "",
-            "value": {
-              "key": "fa23a584-b623-415d-832a-21b5098ff1a1",
-              "hash": "blake3:17c19f0ef941c3b7609a9c94a659ca5364de0be364a91d4179f0e39ba17c3b70",
-              "name": "clip-vit-large-patch14",
-              "base": "any",
-              "type": "clip_embed"
-            }
-          },
-          "vae_model": {
-            "name": "vae_model",
-            "label": "",
-            "value": {
-              "key": "74fc82ba-c0a8-479d-a890-2126f82da758",
-              "hash": "blake3:ce21cb76364aa6e2421311cf4a4b5eb052a76c4f1cd207b50703d8978198a068",
-              "name": "FLUX.1-schnell_ae",
-              "base": "flux",
-              "type": "vae"
-            }
-          }
-        }
-      },
-      "position": {
-        "x": 328.1809894659957,
-        "y": -90.2241133566946
-      }
-    },
-    {
-      "id": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
-      "type": "invocation",
-      "data": {
-        "id": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
-        "type": "flux_text_encoder",
-        "version": "1.0.0",
-        "label": "",
-        "notes": "",
-        "isOpen": true,
-        "isIntermediate": true,
-        "useCache": true,
-        "inputs": {
-          "clip": {
-            "name": "clip",
-            "label": ""
-          },
-          "t5_encoder": {
-            "name": "t5_encoder",
-            "label": ""
-          },
-          "t5_max_seq_len": {
-            "name": "t5_max_seq_len",
-            "label": "T5 Max Seq Len",
-            "value": 256
-          },
-          "prompt": {
-            "name": "prompt",
-            "label": "",
-            "value": "a cat wearing a birthday hat"
-          }
-        }
-      },
-      "position": {
-        "x": 745.8823365057267,
-        "y": -299.60249175851914
-      }
-    },
-    {
-      "id": "4754c534-a5f3-4ad0-9382-7887985e668c",
-      "type": "invocation",
-      "data": {
-        "id": "4754c534-a5f3-4ad0-9382-7887985e668c",
-        "type": "rand_int",
-        "version": "1.0.1",
-        "label": "",
-        "notes": "",
-        "isOpen": true,
-        "isIntermediate": true,
-        "useCache": false,
-        "inputs": {
-          "low": {
-            "name": "low",
-            "label": "",
-            "value": 0
-          },
-          "high": {
-            "name": "high",
-            "label": "",
-            "value": 2147483647
-          }
-        }
-      },
-      "position": {
-        "x": 725.834098928012,
-        "y": 496.2710031089931
-      }
-    }
-  ],
-  "edges": [
-    {
-      "id": "reactflow__edge-2981a67c-480f-4237-9384-26b68dbf912bheight-ace0258f-67d7-4eee-a218-6fff27065214height",
-      "type": "default",
-      "source": "2981a67c-480f-4237-9384-26b68dbf912b",
-      "target": "ace0258f-67d7-4eee-a218-6fff27065214",
-      "sourceHandle": "height",
-      "targetHandle": "height"
-    },
-    {
-      "id": "reactflow__edge-2981a67c-480f-4237-9384-26b68dbf912bwidth-ace0258f-67d7-4eee-a218-6fff27065214width",
-      "type": "default",
-      "source": "2981a67c-480f-4237-9384-26b68dbf912b",
-      "target": "ace0258f-67d7-4eee-a218-6fff27065214",
-      "sourceHandle": "width",
-      "targetHandle": "width"
-    },
-    {
-      "id": "reactflow__edge-2981a67c-480f-4237-9384-26b68dbf912blatents-ace0258f-67d7-4eee-a218-6fff27065214latents",
-      "type": "default",
-      "source": "2981a67c-480f-4237-9384-26b68dbf912b",
-      "target": "ace0258f-67d7-4eee-a218-6fff27065214",
-      "sourceHandle": "latents",
-      "targetHandle": "latents"
-    },
-    {
-      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90vae-2981a67c-480f-4237-9384-26b68dbf912bvae",
-      "type": "default",
-      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "target": "2981a67c-480f-4237-9384-26b68dbf912b",
-      "sourceHandle": "vae",
-      "targetHandle": "vae"
-    },
-    {
-      "id": "reactflow__edge-ace0258f-67d7-4eee-a218-6fff27065214latents-7e5172eb-48c1-44db-a770-8fd83e1435d1latents",
-      "type": "default",
-      "source": "ace0258f-67d7-4eee-a218-6fff27065214",
-      "target": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
-      "sourceHandle": "latents",
-      "targetHandle": "latents"
-    },
-    {
-      "id": "reactflow__edge-4754c534-a5f3-4ad0-9382-7887985e668cvalue-ace0258f-67d7-4eee-a218-6fff27065214seed",
-      "type": "default",
-      "source": "4754c534-a5f3-4ad0-9382-7887985e668c",
-      "target": "ace0258f-67d7-4eee-a218-6fff27065214",
-      "sourceHandle": "value",
-      "targetHandle": "seed"
-    },
-    {
-      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90transformer-ace0258f-67d7-4eee-a218-6fff27065214transformer",
-      "type": "default",
-      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "target": "ace0258f-67d7-4eee-a218-6fff27065214",
-      "sourceHandle": "transformer",
-      "targetHandle": "transformer"
-    },
-    {
-      "id": "reactflow__edge-01f674f8-b3d1-4df1-acac-6cb8e0bfb63cconditioning-ace0258f-67d7-4eee-a218-6fff27065214positive_text_conditioning",
-      "type": "default",
-      "source": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
-      "target": "ace0258f-67d7-4eee-a218-6fff27065214",
-      "sourceHandle": "conditioning",
-      "targetHandle": "positive_text_conditioning"
-    },
-    {
-      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90vae-7e5172eb-48c1-44db-a770-8fd83e1435d1vae",
-      "type": "default",
-      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "target": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
-      "sourceHandle": "vae",
-      "targetHandle": "vae"
-    },
-    {
-      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90max_seq_len-01f674f8-b3d1-4df1-acac-6cb8e0bfb63ct5_max_seq_len",
-      "type": "default",
-      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "target": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
-      "sourceHandle": "max_seq_len",
-      "targetHandle": "t5_max_seq_len"
-    },
-    {
-      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90t5_encoder-01f674f8-b3d1-4df1-acac-6cb8e0bfb63ct5_encoder",
-      "type": "default",
-      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "target": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
-      "sourceHandle": "t5_encoder",
-      "targetHandle": "t5_encoder"
-    },
-    {
-      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90clip-01f674f8-b3d1-4df1-acac-6cb8e0bfb63cclip",
-      "type": "default",
-      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "target": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
-      "sourceHandle": "clip",
-      "targetHandle": "clip"
-    }
-  ]
-}
--- a/invokeai/app/services/workflow_records/default_workflows/Flux
+++ b/invokeai/app/services/workflow_records/default_workflows/Flux
@@ -1,326 +0,0 @@
-{
-  "name": "FLUX Text to Image",
-  "author": "InvokeAI",
-  "description": "A simple text-to-image workflow using FLUX dev or schnell models.",
-  "version": "1.0.4",
-  "contact": "",
-  "tags": "text2image, flux",
-  "notes": "Prerequisite model downloads: T5 Encoder, CLIP-L Encoder, and FLUX VAE. Quantized and un-quantized versions can be found in the starter models tab within your Model Manager. We recommend 4 steps for FLUX schnell models and 30 steps for FLUX dev models.",
-  "exposedFields": [
-    {
-      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "fieldName": "model"
-    },
-    {
-      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "fieldName": "t5_encoder_model"
-    },
-    {
-      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "fieldName": "clip_embed_model"
-    },
-    {
-      "nodeId": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "fieldName": "vae_model"
-    },
-    {
-      "nodeId": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
-      "fieldName": "prompt"
-    },
-    {
-      "nodeId": "4fe24f07-f906-4f55-ab2c-9beee56ef5bd",
-      "fieldName": "num_steps"
-    }
-  ],
-  "meta": {
-    "version": "3.0.0",
-    "category": "default"
-  },
-  "nodes": [
-    {
-      "id": "4fe24f07-f906-4f55-ab2c-9beee56ef5bd",
-      "type": "invocation",
-      "data": {
-        "id": "4fe24f07-f906-4f55-ab2c-9beee56ef5bd",
-        "type": "flux_denoise",
-        "version": "1.0.0",
-        "label": "",
-        "notes": "",
-        "isOpen": true,
-        "isIntermediate": true,
-        "useCache": true,
-        "inputs": {
-          "board": {
-            "name": "board",
-            "label": ""
-          },
-          "metadata": {
-            "name": "metadata",
-            "label": ""
-          },
-          "latents": {
-            "name": "latents",
-            "label": ""
-          },
-          "denoise_mask": {
-            "name": "denoise_mask",
-            "label": ""
-          },
-          "denoising_start": {
-            "name": "denoising_start",
-            "label": "",
-            "value": 0
-          },
-          "denoising_end": {
-            "name": "denoising_end",
-            "label": "",
-            "value": 1
-          },
-          "transformer": {
-            "name": "transformer",
-            "label": ""
-          },
-          "positive_text_conditioning": {
-            "name": "positive_text_conditioning",
-            "label": ""
-          },
-          "width": {
-            "name": "width",
-            "label": "",
-            "value": 1024
-          },
-          "height": {
-            "name": "height",
-            "label": "",
-            "value": 1024
-          },
-          "num_steps": {
-            "name": "num_steps",
-            "label": "Steps (Recommend 30 for Dev, 4 for Schnell)",
-            "value": 30
-          },
-          "guidance": {
-            "name": "guidance",
-            "label": "",
-            "value": 4
-          },
-          "seed": {
-            "name": "seed",
-            "label": "",
-            "value": 0
-          }
-        }
-      },
-      "position": {
-        "x": 1186.1868226120378,
-        "y": -214.9459927686657
-      }
-    },
-    {
-      "id": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
-      "type": "invocation",
-      "data": {
-        "id": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
-        "type": "flux_vae_decode",
-        "version": "1.0.0",
-        "label": "",
-        "notes": "",
-        "isOpen": true,
-        "isIntermediate": false,
-        "useCache": true,
-        "inputs": {
-          "board": {
-            "name": "board",
-            "label": ""
-          },
-          "metadata": {
-            "name": "metadata",
-            "label": ""
-          },
-          "latents": {
-            "name": "latents",
-            "label": ""
-          },
-          "vae": {
-            "name": "vae",
-            "label": ""
-          }
-        }
-      },
-      "position": {
-        "x": 1575.5797431839133,
-        "y": -209.00150975507415
-      }
-    },
-    {
-      "id": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "type": "invocation",
-      "data": {
-        "id": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-        "type": "flux_model_loader",
-        "version": "1.0.4",
-        "label": "",
-        "notes": "",
-        "isOpen": true,
-        "isIntermediate": true,
-        "useCache": false,
-        "inputs": {
-          "model": {
-            "name": "model",
-            "label": ""
-          },
-          "t5_encoder_model": {
-            "name": "t5_encoder_model",
-            "label": ""
-          },
-          "clip_embed_model": {
-            "name": "clip_embed_model",
-            "label": ""
-          },
-          "vae_model": {
-            "name": "vae_model",
-            "label": ""
-          }
-        }
-      },
-      "position": {
-        "x": 381.1882713063478,
-        "y": -95.89663532854017
-      }
-    },
-    {
-      "id": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
-      "type": "invocation",
-      "data": {
-        "id": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
-        "type": "flux_text_encoder",
-        "version": "1.0.0",
-        "label": "",
-        "notes": "",
-        "isOpen": true,
-        "isIntermediate": true,
-        "useCache": true,
-        "inputs": {
-          "clip": {
-            "name": "clip",
-            "label": ""
-          },
-          "t5_encoder": {
-            "name": "t5_encoder",
-            "label": ""
-          },
-          "t5_max_seq_len": {
-            "name": "t5_max_seq_len",
-            "label": "T5 Max Seq Len",
-            "value": 256
-          },
-          "prompt": {
-            "name": "prompt",
-            "label": "",
-            "value": "a cat"
-          }
-        }
-      },
-      "position": {
-        "x": 778.4899149328337,
-        "y": -100.36469216659502
-      }
-    },
-    {
-      "id": "4754c534-a5f3-4ad0-9382-7887985e668c",
-      "type": "invocation",
-      "data": {
-        "id": "4754c534-a5f3-4ad0-9382-7887985e668c",
-        "type": "rand_int",
-        "version": "1.0.1",
-        "label": "",
-        "notes": "",
-        "isOpen": true,
-        "isIntermediate": true,
-        "useCache": false,
-        "inputs": {
-          "low": {
-            "name": "low",
-            "label": "",
-            "value": 0
-          },
-          "high": {
-            "name": "high",
-            "label": "",
-            "value": 2147483647
-          }
-        }
-      },
-      "position": {
-        "x": 800.9667463219505,
-        "y": 285.8297267547506
-      }
-    }
-  ],
-  "edges": [
-    {
-      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90transformer-4fe24f07-f906-4f55-ab2c-9beee56ef5bdtransformer",
-      "type": "default",
-      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "target": "4fe24f07-f906-4f55-ab2c-9beee56ef5bd",
-      "sourceHandle": "transformer",
-      "targetHandle": "transformer"
-    },
-    {
-      "id": "reactflow__edge-01f674f8-b3d1-4df1-acac-6cb8e0bfb63cconditioning-4fe24f07-f906-4f55-ab2c-9beee56ef5bdpositive_text_conditioning",
-      "type": "default",
-      "source": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
-      "target": "4fe24f07-f906-4f55-ab2c-9beee56ef5bd",
-      "sourceHandle": "conditioning",
-      "targetHandle": "positive_text_conditioning"
-    },
-    {
-      "id": "reactflow__edge-4754c534-a5f3-4ad0-9382-7887985e668cvalue-4fe24f07-f906-4f55-ab2c-9beee56ef5bdseed",
-      "type": "default",
-      "source": "4754c534-a5f3-4ad0-9382-7887985e668c",
-      "target": "4fe24f07-f906-4f55-ab2c-9beee56ef5bd",
-      "sourceHandle": "value",
-      "targetHandle": "seed"
-    },
-    {
-      "id": "reactflow__edge-4fe24f07-f906-4f55-ab2c-9beee56ef5bdlatents-7e5172eb-48c1-44db-a770-8fd83e1435d1latents",
-      "type": "default",
-      "source": "4fe24f07-f906-4f55-ab2c-9beee56ef5bd",
-      "target": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
-      "sourceHandle": "latents",
-      "targetHandle": "latents"
-    },
-    {
-      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90vae-7e5172eb-48c1-44db-a770-8fd83e1435d1vae",
-      "type": "default",
-      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "target": "7e5172eb-48c1-44db-a770-8fd83e1435d1",
-      "sourceHandle": "vae",
-      "targetHandle": "vae"
-    },
-    {
-      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90max_seq_len-01f674f8-b3d1-4df1-acac-6cb8e0bfb63ct5_max_seq_len",
-      "type": "default",
-      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "target": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
-      "sourceHandle": "max_seq_len",
-      "targetHandle": "t5_max_seq_len"
-    },
-    {
-      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90t5_encoder-01f674f8-b3d1-4df1-acac-6cb8e0bfb63ct5_encoder",
-      "type": "default",
-      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "target": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
-      "sourceHandle": "t5_encoder",
-      "targetHandle": "t5_encoder"
-    },
-    {
-      "id": "reactflow__edge-f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90clip-01f674f8-b3d1-4df1-acac-6cb8e0bfb63cclip",
-      "type": "default",
-      "source": "f8d9d7c8-9ed7-4bd7-9e42-ab0e89bfac90",
-      "target": "01f674f8-b3d1-4df1-acac-6cb8e0bfb63c",
-      "sourceHandle": "clip",
-      "targetHandle": "clip"
-    }
-  ]
-}
--- a/invokeai/app/util/step_callback.py
+++ b/invokeai/app/util/step_callback.py
@@ -38,25 +38,6 @@ SD1_5_LATENT_RGB_FACTORS = [
    [-0.1307, -0.1874, -0.7445],  # L4
 ]

-FLUX_LATENT_RGB_FACTORS = [
-    [-0.0412, 0.0149, 0.0521],
-    [0.0056, 0.0291, 0.0768],
-    [0.0342, -0.0681, -0.0427],
-    [-0.0258, 0.0092, 0.0463],
-    [0.0863, 0.0784, 0.0547],
-    [-0.0017, 0.0402, 0.0158],
-    [0.0501, 0.1058, 0.1152],
-    [-0.0209, -0.0218, -0.0329],
-    [-0.0314, 0.0083, 0.0896],
-    [0.0851, 0.0665, -0.0472],
-    [-0.0534, 0.0238, -0.0024],
-    [0.0452, -0.0026, 0.0048],
-    [0.0892, 0.0831, 0.0881],
-    [-0.1117, -0.0304, -0.0789],
-    [0.0027, -0.0479, -0.0043],
-    [-0.1146, -0.0827, -0.0598],
-]
-

 def sample_to_lowres_estimated_image(
    samples: torch.Tensor, latent_rgb_factors: torch.Tensor, smooth_matrix: Optional[torch.Tensor] = None
@@ -113,32 +94,3 @@ def stable_diffusion_step_callback(
        intermediate_state,
        ProgressImage(dataURL=dataURL, width=width, height=height),
    )
-
-
-def flux_step_callback(
-    context_data: "InvocationContextData",
-    intermediate_state: PipelineIntermediateState,
-    events: "EventServiceBase",
-    is_canceled: Callable[[], bool],
-) -> None:
-    if is_canceled():
-        raise CanceledException
-    sample = intermediate_state.latents
-    latent_rgb_factors = torch.tensor(FLUX_LATENT_RGB_FACTORS, dtype=sample.dtype, device=sample.device)
-    latent_image_perm = sample.permute(1, 2, 0).to(dtype=sample.dtype, device=sample.device)
-    latent_image = latent_image_perm @ latent_rgb_factors
-    latents_ubyte = (
-        ((latent_image + 1) / 2).clamp(0, 1).mul(0xFF)  # change scale from -1..1 to 0..1  # to 0..255
-    ).to(device="cpu", dtype=torch.uint8)
-    image = Image.fromarray(latents_ubyte.cpu().numpy())
-    (width, height) = image.size
-    width *= 8
-    height *= 8
-    dataURL = image_to_dataURL(image, image_format="JPEG")
-
-    events.emit_invocation_denoise_progress(
-        context_data.queue_item,
-        context_data.invocation,
-        intermediate_state,
-        ProgressImage(dataURL=dataURL, width=width, height=height),
-    )
--- a/invokeai/backend/flux/denoise.py
+++ b/invokeai/backend/flux/denoise.py
@@ -1,56 +0,0 @@
-from typing import Callable
-
-import torch
-from tqdm import tqdm
-
-from invokeai.backend.flux.inpaint_extension import InpaintExtension
-from invokeai.backend.flux.model import Flux
-from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState
-
-
-def denoise(
-    model: Flux,
-    # model input
-    img: torch.Tensor,
-    img_ids: torch.Tensor,
-    txt: torch.Tensor,
-    txt_ids: torch.Tensor,
-    vec: torch.Tensor,
-    # sampling parameters
-    timesteps: list[float],
-    step_callback: Callable[[PipelineIntermediateState], None],
-    guidance: float,
-    inpaint_extension: InpaintExtension | None,
-):
-    step = 0
-    # guidance_vec is ignored for schnell.
-    guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype)
-    for t_curr, t_prev in tqdm(list(zip(timesteps[:-1], timesteps[1:], strict=True))):
-        t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
-        pred = model(
-            img=img,
-            img_ids=img_ids,
-            txt=txt,
-            txt_ids=txt_ids,
-            y=vec,
-            timesteps=t_vec,
-            guidance=guidance_vec,
-        )
-        preview_img = img - t_curr * pred
-        img = img + (t_prev - t_curr) * pred
-
-        if inpaint_extension is not None:
-            img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
-
-        step_callback(
-            PipelineIntermediateState(
-                step=step,
-                order=1,
-                total_steps=len(timesteps),
-                timestep=int(t_curr),
-                latents=preview_img,
-            ),
-        )
-        step += 1
-
-    return img
--- a/invokeai/backend/flux/inpaint_extension.py
+++ b/invokeai/backend/flux/inpaint_extension.py
@@ -1,35 +0,0 @@
-import torch
-
-
-class InpaintExtension:
-    """A class for managing inpainting with FLUX."""
-
-    def __init__(self, init_latents: torch.Tensor, inpaint_mask: torch.Tensor, noise: torch.Tensor):
-        """Initialize InpaintExtension.
-
-        Args:
-            init_latents (torch.Tensor): The initial latents (i.e. un-noised at timestep 0). In 'packed' format.
-            inpaint_mask (torch.Tensor): A mask specifying which elements to inpaint. Range [0, 1]. Values of 1 will be
-                re-generated. Values of 0 will remain unchanged. Values between 0 and 1 can be used to blend the
-                inpainted region with the background. In 'packed' format.
-            noise (torch.Tensor): The noise tensor used to noise the init_latents. In 'packed' format.
-        """
-        assert init_latents.shape == inpaint_mask.shape == noise.shape
-        self._init_latents = init_latents
-        self._inpaint_mask = inpaint_mask
-        self._noise = noise
-
-    def merge_intermediate_latents_with_init_latents(
-        self, intermediate_latents: torch.Tensor, timestep: float
-    ) -> torch.Tensor:
-        """Merge the intermediate latents with the initial latents for the current timestep using the inpaint mask. I.e.
-        update the intermediate latents to keep the regions that are not being inpainted on the correct noise
-        trajectory.
-
-        This function should be called after each denoising step.
-        """
-        # Noise the init latents for the current timestep.
-        noised_init_latents = self._noise * timestep + (1.0 - timestep) * self._init_latents
-
-        # Merge the intermediate latents with the noised_init_latents using the inpaint_mask.
-        return intermediate_latents * self._inpaint_mask + noised_init_latents * (1.0 - self._inpaint_mask)
--- a/invokeai/backend/flux/math.py
+++ b/invokeai/backend/flux/math.py
@@ -1,32 +0,0 @@
-# Initially pulled from https://github.com/black-forest-labs/flux
-
-import torch
-from einops import rearrange
-from torch import Tensor
-
-
-def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor) -> Tensor:
-    q, k = apply_rope(q, k, pe)
-
-    x = torch.nn.functional.scaled_dot_product_attention(q, k, v)
-    x = rearrange(x, "B H L D -> B L (H D)")
-
-    return x
-
-
-def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
-    assert dim % 2 == 0
-    scale = torch.arange(0, dim, 2, dtype=torch.float64, device=pos.device) / dim
-    omega = 1.0 / (theta**scale)
-    out = torch.einsum("...n,d->...nd", pos, omega)
-    out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1)
-    out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
-    return out.float()
-
-
-def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tensor]:
-    xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2)
-    xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2)
-    xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1]
-    xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1]
-    return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk)
--- a/invokeai/backend/flux/model.py
+++ b/invokeai/backend/flux/model.py
@@ -1,117 +0,0 @@
-# Initially pulled from https://github.com/black-forest-labs/flux
-
-from dataclasses import dataclass
-
-import torch
-from torch import Tensor, nn
-
-from invokeai.backend.flux.modules.layers import (
-    DoubleStreamBlock,
-    EmbedND,
-    LastLayer,
-    MLPEmbedder,
-    SingleStreamBlock,
-    timestep_embedding,
-)
-
-
-@dataclass
-class FluxParams:
-    in_channels: int
-    vec_in_dim: int
-    context_in_dim: int
-    hidden_size: int
-    mlp_ratio: float
-    num_heads: int
-    depth: int
-    depth_single_blocks: int
-    axes_dim: list[int]
-    theta: int
-    qkv_bias: bool
-    guidance_embed: bool
-
-
-class Flux(nn.Module):
-    """
-    Transformer model for flow matching on sequences.
-    """
-
-    def __init__(self, params: FluxParams):
-        super().__init__()
-
-        self.params = params
-        self.in_channels = params.in_channels
-        self.out_channels = self.in_channels
-        if params.hidden_size % params.num_heads != 0:
-            raise ValueError(f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}")
-        pe_dim = params.hidden_size // params.num_heads
-        if sum(params.axes_dim) != pe_dim:
-            raise ValueError(f"Got {params.axes_dim} but expected positional dim {pe_dim}")
-        self.hidden_size = params.hidden_size
-        self.num_heads = params.num_heads
-        self.pe_embedder = EmbedND(dim=pe_dim, theta=params.theta, axes_dim=params.axes_dim)
-        self.img_in = nn.Linear(self.in_channels, self.hidden_size, bias=True)
-        self.time_in = MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size)
-        self.vector_in = MLPEmbedder(params.vec_in_dim, self.hidden_size)
-        self.guidance_in = (
-            MLPEmbedder(in_dim=256, hidden_dim=self.hidden_size) if params.guidance_embed else nn.Identity()
-        )
-        self.txt_in = nn.Linear(params.context_in_dim, self.hidden_size)
-
-        self.double_blocks = nn.ModuleList(
-            [
-                DoubleStreamBlock(
-                    self.hidden_size,
-                    self.num_heads,
-                    mlp_ratio=params.mlp_ratio,
-                    qkv_bias=params.qkv_bias,
-                )
-                for _ in range(params.depth)
-            ]
-        )
-
-        self.single_blocks = nn.ModuleList(
-            [
-                SingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio)
-                for _ in range(params.depth_single_blocks)
-            ]
-        )
-
-        self.final_layer = LastLayer(self.hidden_size, 1, self.out_channels)
-
-    def forward(
-        self,
-        img: Tensor,
-        img_ids: Tensor,
-        txt: Tensor,
-        txt_ids: Tensor,
-        timesteps: Tensor,
-        y: Tensor,
-        guidance: Tensor | None = None,
-    ) -> Tensor:
-        if img.ndim != 3 or txt.ndim != 3:
-            raise ValueError("Input img and txt tensors must have 3 dimensions.")
-
-        # running on sequences img
-        img = self.img_in(img)
-        vec = self.time_in(timestep_embedding(timesteps, 256))
-        if self.params.guidance_embed:
-            if guidance is None:
-                raise ValueError("Didn't get guidance strength for guidance distilled model.")
-            vec = vec + self.guidance_in(timestep_embedding(guidance, 256))
-        vec = vec + self.vector_in(y)
-        txt = self.txt_in(txt)
-
-        ids = torch.cat((txt_ids, img_ids), dim=1)
-        pe = self.pe_embedder(ids)
-
-        for block in self.double_blocks:
-            img, txt = block(img=img, txt=txt, vec=vec, pe=pe)
-
-        img = torch.cat((txt, img), 1)
-        for block in self.single_blocks:
-            img = block(img, vec=vec, pe=pe)
-        img = img[:, txt.shape[1] :, ...]
-
-        img = self.final_layer(img, vec)  # (N, T, patch_size ** 2 * out_channels)
-        return img
--- a/invokeai/backend/flux/modules/autoencoder.py
+++ b/invokeai/backend/flux/modules/autoencoder.py
@@ -1,324 +0,0 @@
-# Initially pulled from https://github.com/black-forest-labs/flux
-
-from dataclasses import dataclass
-
-import torch
-from einops import rearrange
-from torch import Tensor, nn
-
-
-@dataclass
-class AutoEncoderParams:
-    resolution: int
-    in_channels: int
-    ch: int
-    out_ch: int
-    ch_mult: list[int]
-    num_res_blocks: int
-    z_channels: int
-    scale_factor: float
-    shift_factor: float
-
-
-class AttnBlock(nn.Module):
-    def __init__(self, in_channels: int):
-        super().__init__()
-        self.in_channels = in_channels
-
-        self.norm = nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True)
-
-        self.q = nn.Conv2d(in_channels, in_channels, kernel_size=1)
-        self.k = nn.Conv2d(in_channels, in_channels, kernel_size=1)
-        self.v = nn.Conv2d(in_channels, in_channels, kernel_size=1)
-        self.proj_out = nn.Conv2d(in_channels, in_channels, kernel_size=1)
-
-    def attention(self, h_: Tensor) -> Tensor:
-        h_ = self.norm(h_)
-        q = self.q(h_)
-        k = self.k(h_)
-        v = self.v(h_)
-
-        b, c, h, w = q.shape
-        q = rearrange(q, "b c h w -> b 1 (h w) c").contiguous()
-        k = rearrange(k, "b c h w -> b 1 (h w) c").contiguous()
-        v = rearrange(v, "b c h w -> b 1 (h w) c").contiguous()
-        h_ = nn.functional.scaled_dot_product_attention(q, k, v)
-
-        return rearrange(h_, "b 1 (h w) c -> b c h w", h=h, w=w, c=c, b=b)
-
-    def forward(self, x: Tensor) -> Tensor:
-        return x + self.proj_out(self.attention(x))
-
-
-class ResnetBlock(nn.Module):
-    def __init__(self, in_channels: int, out_channels: int):
-        super().__init__()
-        self.in_channels = in_channels
-        out_channels = in_channels if out_channels is None else out_channels
-        self.out_channels = out_channels
-
-        self.norm1 = nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True)
-        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
-        self.norm2 = nn.GroupNorm(num_groups=32, num_channels=out_channels, eps=1e-6, affine=True)
-        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
-        if self.in_channels != self.out_channels:
-            self.nin_shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
-
-    def forward(self, x):
-        h = x
-        h = self.norm1(h)
-        h = torch.nn.functional.silu(h)
-        h = self.conv1(h)
-
-        h = self.norm2(h)
-        h = torch.nn.functional.silu(h)
-        h = self.conv2(h)
-
-        if self.in_channels != self.out_channels:
-            x = self.nin_shortcut(x)
-
-        return x + h
-
-
-class Downsample(nn.Module):
-    def __init__(self, in_channels: int):
-        super().__init__()
-        # no asymmetric padding in torch conv, must do it ourselves
-        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0)
-
-    def forward(self, x: Tensor):
-        pad = (0, 1, 0, 1)
-        x = nn.functional.pad(x, pad, mode="constant", value=0)
-        x = self.conv(x)
-        return x
-
-
-class Upsample(nn.Module):
-    def __init__(self, in_channels: int):
-        super().__init__()
-        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
-
-    def forward(self, x: Tensor):
-        x = nn.functional.interpolate(x, scale_factor=2.0, mode="nearest")
-        x = self.conv(x)
-        return x
-
-
-class Encoder(nn.Module):
-    def __init__(
-        self,
-        resolution: int,
-        in_channels: int,
-        ch: int,
-        ch_mult: list[int],
-        num_res_blocks: int,
-        z_channels: int,
-    ):
-        super().__init__()
-        self.ch = ch
-        self.num_resolutions = len(ch_mult)
-        self.num_res_blocks = num_res_blocks
-        self.resolution = resolution
-        self.in_channels = in_channels
-        # downsampling
-        self.conv_in = nn.Conv2d(in_channels, self.ch, kernel_size=3, stride=1, padding=1)
-
-        curr_res = resolution
-        in_ch_mult = (1,) + tuple(ch_mult)
-        self.in_ch_mult = in_ch_mult
-        self.down = nn.ModuleList()
-        block_in = self.ch
-        for i_level in range(self.num_resolutions):
-            block = nn.ModuleList()
-            attn = nn.ModuleList()
-            block_in = ch * in_ch_mult[i_level]
-            block_out = ch * ch_mult[i_level]
-            for _ in range(self.num_res_blocks):
-                block.append(ResnetBlock(in_channels=block_in, out_channels=block_out))
-                block_in = block_out
-            down = nn.Module()
-            down.block = block
-            down.attn = attn
-            if i_level != self.num_resolutions - 1:
-                down.downsample = Downsample(block_in)
-                curr_res = curr_res // 2
-            self.down.append(down)
-
-        # middle
-        self.mid = nn.Module()
-        self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in)
-        self.mid.attn_1 = AttnBlock(block_in)
-        self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in)
-
-        # end
-        self.norm_out = nn.GroupNorm(num_groups=32, num_channels=block_in, eps=1e-6, affine=True)
-        self.conv_out = nn.Conv2d(block_in, 2 * z_channels, kernel_size=3, stride=1, padding=1)
-
-    def forward(self, x: Tensor) -> Tensor:
-        # downsampling
-        hs = [self.conv_in(x)]
-        for i_level in range(self.num_resolutions):
-            for i_block in range(self.num_res_blocks):
-                h = self.down[i_level].block[i_block](hs[-1])
-                if len(self.down[i_level].attn) > 0:
-                    h = self.down[i_level].attn[i_block](h)
-                hs.append(h)
-            if i_level != self.num_resolutions - 1:
-                hs.append(self.down[i_level].downsample(hs[-1]))
-
-        # middle
-        h = hs[-1]
-        h = self.mid.block_1(h)
-        h = self.mid.attn_1(h)
-        h = self.mid.block_2(h)
-        # end
-        h = self.norm_out(h)
-        h = torch.nn.functional.silu(h)
-        h = self.conv_out(h)
-        return h
-
-
-class Decoder(nn.Module):
-    def __init__(
-        self,
-        ch: int,
-        out_ch: int,
-        ch_mult: list[int],
-        num_res_blocks: int,
-        in_channels: int,
-        resolution: int,
-        z_channels: int,
-    ):
-        super().__init__()
-        self.ch = ch
-        self.num_resolutions = len(ch_mult)
-        self.num_res_blocks = num_res_blocks
-        self.resolution = resolution
-        self.in_channels = in_channels
-        self.ffactor = 2 ** (self.num_resolutions - 1)
-
-        # compute in_ch_mult, block_in and curr_res at lowest res
-        block_in = ch * ch_mult[self.num_resolutions - 1]
-        curr_res = resolution // 2 ** (self.num_resolutions - 1)
-        self.z_shape = (1, z_channels, curr_res, curr_res)
-
-        # z to block_in
-        self.conv_in = nn.Conv2d(z_channels, block_in, kernel_size=3, stride=1, padding=1)
-
-        # middle
-        self.mid = nn.Module()
-        self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in)
-        self.mid.attn_1 = AttnBlock(block_in)
-        self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in)
-
-        # upsampling
-        self.up = nn.ModuleList()
-        for i_level in reversed(range(self.num_resolutions)):
-            block = nn.ModuleList()
-            attn = nn.ModuleList()
-            block_out = ch * ch_mult[i_level]
-            for _ in range(self.num_res_blocks + 1):
-                block.append(ResnetBlock(in_channels=block_in, out_channels=block_out))
-                block_in = block_out
-            up = nn.Module()
-            up.block = block
-            up.attn = attn
-            if i_level != 0:
-                up.upsample = Upsample(block_in)
-                curr_res = curr_res * 2
-            self.up.insert(0, up)  # prepend to get consistent order
-
-        # end
-        self.norm_out = nn.GroupNorm(num_groups=32, num_channels=block_in, eps=1e-6, affine=True)
-        self.conv_out = nn.Conv2d(block_in, out_ch, kernel_size=3, stride=1, padding=1)
-
-    def forward(self, z: Tensor) -> Tensor:
-        # z to block_in
-        h = self.conv_in(z)
-
-        # middle
-        h = self.mid.block_1(h)
-        h = self.mid.attn_1(h)
-        h = self.mid.block_2(h)
-
-        # upsampling
-        for i_level in reversed(range(self.num_resolutions)):
-            for i_block in range(self.num_res_blocks + 1):
-                h = self.up[i_level].block[i_block](h)
-                if len(self.up[i_level].attn) > 0:
-                    h = self.up[i_level].attn[i_block](h)
-            if i_level != 0:
-                h = self.up[i_level].upsample(h)
-
-        # end
-        h = self.norm_out(h)
-        h = torch.nn.functional.silu(h)
-        h = self.conv_out(h)
-        return h
-
-
-class DiagonalGaussian(nn.Module):
-    def __init__(self, chunk_dim: int = 1):
-        super().__init__()
-        self.chunk_dim = chunk_dim
-
-    def forward(self, z: Tensor, sample: bool = True, generator: torch.Generator | None = None) -> Tensor:
-        mean, logvar = torch.chunk(z, 2, dim=self.chunk_dim)
-        if sample:
-            std = torch.exp(0.5 * logvar)
-            # Unfortunately, torch.randn_like(...) does not accept a generator argument at the time of writing, so we
-            # have to use torch.randn(...) instead.
-            return mean + std * torch.randn(size=mean.size(), generator=generator, dtype=mean.dtype, device=mean.device)
-        else:
-            return mean
-
-
-class AutoEncoder(nn.Module):
-    def __init__(self, params: AutoEncoderParams):
-        super().__init__()
-        self.encoder = Encoder(
-            resolution=params.resolution,
-            in_channels=params.in_channels,
-            ch=params.ch,
-            ch_mult=params.ch_mult,
-            num_res_blocks=params.num_res_blocks,
-            z_channels=params.z_channels,
-        )
-        self.decoder = Decoder(
-            resolution=params.resolution,
-            in_channels=params.in_channels,
-            ch=params.ch,
-            out_ch=params.out_ch,
-            ch_mult=params.ch_mult,
-            num_res_blocks=params.num_res_blocks,
-            z_channels=params.z_channels,
-        )
-        self.reg = DiagonalGaussian()
-
-        self.scale_factor = params.scale_factor
-        self.shift_factor = params.shift_factor
-
-    def encode(self, x: Tensor, sample: bool = True, generator: torch.Generator | None = None) -> Tensor:
-        """Run VAE encoding on input tensor x.
-
-        Args:
-            x (Tensor): Input image tensor. Shape: (batch_size, in_channels, height, width).
-            sample (bool, optional): If True, sample from the encoded distribution, else, return the distribution mean.
-                Defaults to True.
-            generator (torch.Generator | None, optional): Optional random number generator for reproducibility.
-                Defaults to None.
-
-        Returns:
-            Tensor: Encoded latent tensor. Shape: (batch_size, z_channels, latent_height, latent_width).
-        """
-
-        z = self.reg(self.encoder(x), sample=sample, generator=generator)
-        z = self.scale_factor * (z - self.shift_factor)
-        return z
-
-    def decode(self, z: Tensor) -> Tensor:
-        z = z / self.scale_factor + self.shift_factor
-        return self.decoder(z)
-
-    def forward(self, x: Tensor) -> Tensor:
-        return self.decode(self.encode(x))
--- a/invokeai/backend/flux/modules/conditioner.py
+++ b/invokeai/backend/flux/modules/conditioner.py
@@ -1,33 +0,0 @@
-# Initially pulled from https://github.com/black-forest-labs/flux
-
-from torch import Tensor, nn
-from transformers import PreTrainedModel, PreTrainedTokenizer
-
-
-class HFEncoder(nn.Module):
-    def __init__(self, encoder: PreTrainedModel, tokenizer: PreTrainedTokenizer, is_clip: bool, max_length: int):
-        super().__init__()
-        self.max_length = max_length
-        self.is_clip = is_clip
-        self.output_key = "pooler_output" if self.is_clip else "last_hidden_state"
-        self.tokenizer = tokenizer
-        self.hf_module = encoder
-        self.hf_module = self.hf_module.eval().requires_grad_(False)
-
-    def forward(self, text: list[str]) -> Tensor:
-        batch_encoding = self.tokenizer(
-            text,
-            truncation=True,
-            max_length=self.max_length,
-            return_length=False,
-            return_overflowing_tokens=False,
-            padding="max_length",
-            return_tensors="pt",
-        )
-
-        outputs = self.hf_module(
-            input_ids=batch_encoding["input_ids"].to(self.hf_module.device),
-            attention_mask=None,
-            output_hidden_states=False,
-        )
-        return outputs[self.output_key]
--- a/invokeai/backend/flux/modules/layers.py
+++ b/invokeai/backend/flux/modules/layers.py
@@ -1,253 +0,0 @@
-# Initially pulled from https://github.com/black-forest-labs/flux
-
-import math
-from dataclasses import dataclass
-
-import torch
-from einops import rearrange
-from torch import Tensor, nn
-
-from invokeai.backend.flux.math import attention, rope
-
-
-class EmbedND(nn.Module):
-    def __init__(self, dim: int, theta: int, axes_dim: list[int]):
-        super().__init__()
-        self.dim = dim
-        self.theta = theta
-        self.axes_dim = axes_dim
-
-    def forward(self, ids: Tensor) -> Tensor:
-        n_axes = ids.shape[-1]
-        emb = torch.cat(
-            [rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(n_axes)],
-            dim=-3,
-        )
-
-        return emb.unsqueeze(1)
-
-
-def timestep_embedding(t: Tensor, dim, max_period=10000, time_factor: float = 1000.0):
-    """
-    Create sinusoidal timestep embeddings.
-    :param t: a 1-D Tensor of N indices, one per batch element.
-                      These may be fractional.
-    :param dim: the dimension of the output.
-    :param max_period: controls the minimum frequency of the embeddings.
-    :return: an (N, D) Tensor of positional embeddings.
-    """
-    t = time_factor * t
-    half = dim // 2
-    freqs = torch.exp(-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half).to(t.device)
-
-    args = t[:, None].float() * freqs[None]
-    embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
-    if dim % 2:
-        embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
-    if torch.is_floating_point(t):
-        embedding = embedding.to(t)
-    return embedding
-
-
-class MLPEmbedder(nn.Module):
-    def __init__(self, in_dim: int, hidden_dim: int):
-        super().__init__()
-        self.in_layer = nn.Linear(in_dim, hidden_dim, bias=True)
-        self.silu = nn.SiLU()
-        self.out_layer = nn.Linear(hidden_dim, hidden_dim, bias=True)
-
-    def forward(self, x: Tensor) -> Tensor:
-        return self.out_layer(self.silu(self.in_layer(x)))
-
-
-class RMSNorm(torch.nn.Module):
-    def __init__(self, dim: int):
-        super().__init__()
-        self.scale = nn.Parameter(torch.ones(dim))
-
-    def forward(self, x: Tensor):
-        x_dtype = x.dtype
-        x = x.float()
-        rrms = torch.rsqrt(torch.mean(x**2, dim=-1, keepdim=True) + 1e-6)
-        return (x * rrms).to(dtype=x_dtype) * self.scale
-
-
-class QKNorm(torch.nn.Module):
-    def __init__(self, dim: int):
-        super().__init__()
-        self.query_norm = RMSNorm(dim)
-        self.key_norm = RMSNorm(dim)
-
-    def forward(self, q: Tensor, k: Tensor, v: Tensor) -> tuple[Tensor, Tensor]:
-        q = self.query_norm(q)
-        k = self.key_norm(k)
-        return q.to(v), k.to(v)
-
-
-class SelfAttention(nn.Module):
-    def __init__(self, dim: int, num_heads: int = 8, qkv_bias: bool = False):
-        super().__init__()
-        self.num_heads = num_heads
-        head_dim = dim // num_heads
-
-        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
-        self.norm = QKNorm(head_dim)
-        self.proj = nn.Linear(dim, dim)
-
-    def forward(self, x: Tensor, pe: Tensor) -> Tensor:
-        qkv = self.qkv(x)
-        q, k, v = rearrange(qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
-        q, k = self.norm(q, k, v)
-        x = attention(q, k, v, pe=pe)
-        x = self.proj(x)
-        return x
-
-
-@dataclass
-class ModulationOut:
-    shift: Tensor
-    scale: Tensor
-    gate: Tensor
-
-
-class Modulation(nn.Module):
-    def __init__(self, dim: int, double: bool):
-        super().__init__()
-        self.is_double = double
-        self.multiplier = 6 if double else 3
-        self.lin = nn.Linear(dim, self.multiplier * dim, bias=True)
-
-    def forward(self, vec: Tensor) -> tuple[ModulationOut, ModulationOut | None]:
-        out = self.lin(nn.functional.silu(vec))[:, None, :].chunk(self.multiplier, dim=-1)
-
-        return (
-            ModulationOut(*out[:3]),
-            ModulationOut(*out[3:]) if self.is_double else None,
-        )
-
-
-class DoubleStreamBlock(nn.Module):
-    def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False):
-        super().__init__()
-
-        mlp_hidden_dim = int(hidden_size * mlp_ratio)
-        self.num_heads = num_heads
-        self.hidden_size = hidden_size
-        self.img_mod = Modulation(hidden_size, double=True)
-        self.img_norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
-        self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias)
-
-        self.img_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
-        self.img_mlp = nn.Sequential(
-            nn.Linear(hidden_size, mlp_hidden_dim, bias=True),
-            nn.GELU(approximate="tanh"),
-            nn.Linear(mlp_hidden_dim, hidden_size, bias=True),
-        )
-
-        self.txt_mod = Modulation(hidden_size, double=True)
-        self.txt_norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
-        self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias)
-
-        self.txt_norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
-        self.txt_mlp = nn.Sequential(
-            nn.Linear(hidden_size, mlp_hidden_dim, bias=True),
-            nn.GELU(approximate="tanh"),
-            nn.Linear(mlp_hidden_dim, hidden_size, bias=True),
-        )
-
-    def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor) -> tuple[Tensor, Tensor]:
-        img_mod1, img_mod2 = self.img_mod(vec)
-        txt_mod1, txt_mod2 = self.txt_mod(vec)
-
-        # prepare image for attention
-        img_modulated = self.img_norm1(img)
-        img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift
-        img_qkv = self.img_attn.qkv(img_modulated)
-        img_q, img_k, img_v = rearrange(img_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
-        img_q, img_k = self.img_attn.norm(img_q, img_k, img_v)
-
-        # prepare txt for attention
-        txt_modulated = self.txt_norm1(txt)
-        txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift
-        txt_qkv = self.txt_attn.qkv(txt_modulated)
-        txt_q, txt_k, txt_v = rearrange(txt_qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
-        txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)
-
-        # run actual attention
-        q = torch.cat((txt_q, img_q), dim=2)
-        k = torch.cat((txt_k, img_k), dim=2)
-        v = torch.cat((txt_v, img_v), dim=2)
-
-        attn = attention(q, k, v, pe=pe)
-        txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1] :]
-
-        # calculate the img bloks
-        img = img + img_mod1.gate * self.img_attn.proj(img_attn)
-        img = img + img_mod2.gate * self.img_mlp((1 + img_mod2.scale) * self.img_norm2(img) + img_mod2.shift)
-
-        # calculate the txt bloks
-        txt = txt + txt_mod1.gate * self.txt_attn.proj(txt_attn)
-        txt = txt + txt_mod2.gate * self.txt_mlp((1 + txt_mod2.scale) * self.txt_norm2(txt) + txt_mod2.shift)
-        return img, txt
-
-
-class SingleStreamBlock(nn.Module):
-    """
-    A DiT block with parallel linear layers as described in
-    https://arxiv.org/abs/2302.05442 and adapted modulation interface.
-    """
-
-    def __init__(
-        self,
-        hidden_size: int,
-        num_heads: int,
-        mlp_ratio: float = 4.0,
-        qk_scale: float | None = None,
-    ):
-        super().__init__()
-        self.hidden_dim = hidden_size
-        self.num_heads = num_heads
-        head_dim = hidden_size // num_heads
-        self.scale = qk_scale or head_dim**-0.5
-
-        self.mlp_hidden_dim = int(hidden_size * mlp_ratio)
-        # qkv and mlp_in
-        self.linear1 = nn.Linear(hidden_size, hidden_size * 3 + self.mlp_hidden_dim)
-        # proj and mlp_out
-        self.linear2 = nn.Linear(hidden_size + self.mlp_hidden_dim, hidden_size)
-
-        self.norm = QKNorm(head_dim)
-
-        self.hidden_size = hidden_size
-        self.pre_norm = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
-
-        self.mlp_act = nn.GELU(approximate="tanh")
-        self.modulation = Modulation(hidden_size, double=False)
-
-    def forward(self, x: Tensor, vec: Tensor, pe: Tensor) -> Tensor:
-        mod, _ = self.modulation(vec)
-        x_mod = (1 + mod.scale) * self.pre_norm(x) + mod.shift
-        qkv, mlp = torch.split(self.linear1(x_mod), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1)
-
-        q, k, v = rearrange(qkv, "B L (K H D) -> K B H L D", K=3, H=self.num_heads)
-        q, k = self.norm(q, k, v)
-
-        # compute attention
-        attn = attention(q, k, v, pe=pe)
-        # compute activation in mlp stream, cat again and run second linear layer
-        output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2))
-        return x + mod.gate * output
-
-
-class LastLayer(nn.Module):
-    def __init__(self, hidden_size: int, patch_size: int, out_channels: int):
-        super().__init__()
-        self.norm_final = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
-        self.linear = nn.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True)
-        self.adaLN_modulation = nn.Sequential(nn.SiLU(), nn.Linear(hidden_size, 2 * hidden_size, bias=True))
-
-    def forward(self, x: Tensor, vec: Tensor) -> Tensor:
-        shift, scale = self.adaLN_modulation(vec).chunk(2, dim=1)
-        x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :]
-        x = self.linear(x)
-        return x
--- a/invokeai/backend/flux/sampling_utils.py
+++ b/invokeai/backend/flux/sampling_utils.py
@@ -1,135 +0,0 @@
-# Initially pulled from https://github.com/black-forest-labs/flux
-
-import math
-from typing import Callable
-
-import torch
-from einops import rearrange, repeat
-
-
-def get_noise(
-    num_samples: int,
-    height: int,
-    width: int,
-    device: torch.device,
-    dtype: torch.dtype,
-    seed: int,
-):
-    # We always generate noise on the same device and dtype then cast to ensure consistency across devices/dtypes.
-    rand_device = "cpu"
-    rand_dtype = torch.float16
-    return torch.randn(
-        num_samples,
-        16,
-        # allow for packing
-        2 * math.ceil(height / 16),
-        2 * math.ceil(width / 16),
-        device=rand_device,
-        dtype=rand_dtype,
-        generator=torch.Generator(device=rand_device).manual_seed(seed),
-    ).to(device=device, dtype=dtype)
-
-
-def time_shift(mu: float, sigma: float, t: torch.Tensor) -> torch.Tensor:
-    return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
-
-
-def get_lin_function(x1: float = 256, y1: float = 0.5, x2: float = 4096, y2: float = 1.15) -> Callable[[float], float]:
-    m = (y2 - y1) / (x2 - x1)
-    b = y1 - m * x1
-    return lambda x: m * x + b
-
-
-def get_schedule(
-    num_steps: int,
-    image_seq_len: int,
-    base_shift: float = 0.5,
-    max_shift: float = 1.15,
-    shift: bool = True,
-) -> list[float]:
-    # extra step for zero
-    timesteps = torch.linspace(1, 0, num_steps + 1)
-
-    # shifting the schedule to favor high timesteps for higher signal images
-    if shift:
-        # estimate mu based on linear estimation between two points
-        mu = get_lin_function(y1=base_shift, y2=max_shift)(image_seq_len)
-        timesteps = time_shift(mu, 1.0, timesteps)
-
-    return timesteps.tolist()
-
-
-def _find_last_index_ge_val(timesteps: list[float], val: float, eps: float = 1e-6) -> int:
-    """Find the last index in timesteps that is >= val.
-
-    We use epsilon-close equality to avoid potential floating point errors.
-    """
-    idx = len(list(filter(lambda t: t >= (val - eps), timesteps))) - 1
-    assert idx >= 0
-    return idx
-
-
-def clip_timestep_schedule(timesteps: list[float], denoising_start: float, denoising_end: float) -> list[float]:
-    """Clip the timestep schedule to the denoising range.
-
-    Args:
-        timesteps (list[float]): The original timestep schedule: [1.0, ..., 0.0].
-        denoising_start (float): A value in [0, 1] specifying the start of the denoising process. E.g. a value of 0.2
-            would mean that the denoising process start at the last timestep in the schedule >= 0.8.
-        denoising_end (float): A value in [0, 1] specifying the end of the denoising process. E.g. a value of 0.8 would
-            mean that the denoising process end at the last timestep in the schedule >= 0.2.
-
-    Returns:
-        list[float]: The clipped timestep schedule.
-    """
-    assert 0.0 <= denoising_start <= 1.0
-    assert 0.0 <= denoising_end <= 1.0
-    assert denoising_start <= denoising_end
-
-    t_start_val = 1.0 - denoising_start
-    t_end_val = 1.0 - denoising_end
-
-    t_start_idx = _find_last_index_ge_val(timesteps, t_start_val)
-    t_end_idx = _find_last_index_ge_val(timesteps, t_end_val)
-
-    clipped_timesteps = timesteps[t_start_idx : t_end_idx + 1]
-
-    return clipped_timesteps
-
-
-def unpack(x: torch.Tensor, height: int, width: int) -> torch.Tensor:
-    """Unpack flat array of patch embeddings to latent image."""
-    return rearrange(
-        x,
-        "b (h w) (c ph pw) -> b c (h ph) (w pw)",
-        h=math.ceil(height / 16),
-        w=math.ceil(width / 16),
-        ph=2,
-        pw=2,
-    )
-
-
-def pack(x: torch.Tensor) -> torch.Tensor:
-    """Pack latent image to flattented array of patch embeddings."""
-    # Pixel unshuffle with a scale of 2, and flatten the height/width dimensions to get an array of patches.
-    return rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
-
-
-def generate_img_ids(h: int, w: int, batch_size: int, device: torch.device, dtype: torch.dtype) -> torch.Tensor:
-    """Generate tensor of image position ids.
-
-    Args:
-        h (int): Height of image in latent space.
-        w (int): Width of image in latent space.
-        batch_size (int): Batch size.
-        device (torch.device): Device.
-        dtype (torch.dtype): dtype.
-
-    Returns:
-        torch.Tensor: Image position ids.
-    """
-    img_ids = torch.zeros(h // 2, w // 2, 3, device=device, dtype=dtype)
-    img_ids[..., 1] = img_ids[..., 1] + torch.arange(h // 2, device=device, dtype=dtype)[:, None]
-    img_ids[..., 2] = img_ids[..., 2] + torch.arange(w // 2, device=device, dtype=dtype)[None, :]
-    img_ids = repeat(img_ids, "h w c -> b (h w) c", b=batch_size)
-    return img_ids
--- a/invokeai/backend/flux/util.py
+++ b/invokeai/backend/flux/util.py
@@ -1,71 +0,0 @@
-# Initially pulled from https://github.com/black-forest-labs/flux
-
-from dataclasses import dataclass
-from typing import Dict, Literal
-
-from invokeai.backend.flux.model import FluxParams
-from invokeai.backend.flux.modules.autoencoder import AutoEncoderParams
-
-
-@dataclass
-class ModelSpec:
-    params: FluxParams
-    ae_params: AutoEncoderParams
-    ckpt_path: str | None
-    ae_path: str | None
-    repo_id: str | None
-    repo_flow: str | None
-    repo_ae: str | None
-
-
-max_seq_lengths: Dict[str, Literal[256, 512]] = {
-    "flux-dev": 512,
-    "flux-schnell": 256,
-}
-
-
-ae_params = {
-    "flux": AutoEncoderParams(
-        resolution=256,
-        in_channels=3,
-        ch=128,
-        out_ch=3,
-        ch_mult=[1, 2, 4, 4],
-        num_res_blocks=2,
-        z_channels=16,
-        scale_factor=0.3611,
-        shift_factor=0.1159,
-    )
-}
-
-
-params = {
-    "flux-dev": FluxParams(
-        in_channels=64,
-        vec_in_dim=768,
-        context_in_dim=4096,
-        hidden_size=3072,
-        mlp_ratio=4.0,
-        num_heads=24,
-        depth=19,
-        depth_single_blocks=38,
-        axes_dim=[16, 56, 56],
-        theta=10_000,
-        qkv_bias=True,
-        guidance_embed=True,
-    ),
-    "flux-schnell": FluxParams(
-        in_channels=64,
-        vec_in_dim=768,
-        context_in_dim=4096,
-        hidden_size=3072,
-        mlp_ratio=4.0,
-        num_heads=24,
-        depth=19,
-        depth_single_blocks=38,
-        axes_dim=[16, 56, 56],
-        theta=10_000,
-        qkv_bias=True,
-        guidance_embed=False,
-    ),
-}
--- a/invokeai/backend/lora.py
+++ b/invokeai/backend/lora.py
@@ -0,0 +1,672 @@
+# Copyright (c) 2024 The InvokeAI Development team
+"""LoRA model support."""
+
+import bisect
+from pathlib import Path
+from typing import Dict, List, Optional, Set, Tuple, Union
+
+import torch
+from safetensors.torch import load_file
+from typing_extensions import Self
+
+import invokeai.backend.util.logging as logger
+from invokeai.backend.model_manager import BaseModelType
+from invokeai.backend.raw_model import RawModel
+
+
+class LoRALayerBase:
+    # rank: Optional[int]
+    # alpha: Optional[float]
+    # bias: Optional[torch.Tensor]
+    # layer_key: str
+
+    # @property
+    # def scale(self):
+    #    return self.alpha / self.rank if (self.alpha and self.rank) else 1.0
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        if "alpha" in values:
+            self.alpha = values["alpha"].item()
+        else:
+            self.alpha = None
+
+        if "bias_indices" in values and "bias_values" in values and "bias_size" in values:
+            self.bias: Optional[torch.Tensor] = torch.sparse_coo_tensor(
+                values["bias_indices"],
+                values["bias_values"],
+                tuple(values["bias_size"]),
+            )
+
+        else:
+            self.bias = None
+
+        self.rank = None  # set in layer implementation
+        self.layer_key = layer_key
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        raise NotImplementedError()
+
+    def get_bias(self, orig_bias: torch.Tensor) -> Optional[torch.Tensor]:
+        return self.bias
+
+    def get_parameters(self, orig_module: torch.nn.Module) -> Dict[str, torch.Tensor]:
+        params = {"weight": self.get_weight(orig_module.weight)}
+        bias = self.get_bias(orig_module.bias)
+        if bias is not None:
+            params["bias"] = bias
+        return params
+
+    def calc_size(self) -> int:
+        model_size = 0
+        for val in [self.bias]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
+        if self.bias is not None:
+            self.bias = self.bias.to(device=device, dtype=dtype)
+
+    def check_keys(self, values: Dict[str, torch.Tensor], known_keys: Set[str]):
+        """Log a warning if values contains unhandled keys."""
+        # {"alpha", "bias_indices", "bias_values", "bias_size"} are hard-coded, because they are handled by
+        # `LoRALayerBase`. Sub-classes should provide the known_keys that they handled.
+        all_known_keys = known_keys | {"alpha", "bias_indices", "bias_values", "bias_size"}
+        unknown_keys = set(values.keys()) - all_known_keys
+        if unknown_keys:
+            logger.warning(
+                f"Unexpected keys found in LoRA/LyCORIS layer, model might work incorrectly! Keys: {unknown_keys}"
+            )
+
+
+# TODO: find and debug lora/locon with bias
+class LoRALayer(LoRALayerBase):
+    # up: torch.Tensor
+    # mid: Optional[torch.Tensor]
+    # down: torch.Tensor
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        super().__init__(layer_key, values)
+
+        self.up = values["lora_up.weight"]
+        self.down = values["lora_down.weight"]
+        self.mid = values.get("lora_mid.weight", None)
+
+        self.rank = self.down.shape[0]
+        self.check_keys(
+            values,
+            {
+                "lora_up.weight",
+                "lora_down.weight",
+                "lora_mid.weight",
+            },
+        )
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        if self.mid is not None:
+            up = self.up.reshape(self.up.shape[0], self.up.shape[1])
+            down = self.down.reshape(self.down.shape[0], self.down.shape[1])
+            weight = torch.einsum("m n w h, i m, n j -> i j w h", self.mid, up, down)
+        else:
+            weight = self.up.reshape(self.up.shape[0], -1) @ self.down.reshape(self.down.shape[0], -1)
+
+        return weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        for val in [self.up, self.mid, self.down]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
+        super().to(device=device, dtype=dtype)
+
+        self.up = self.up.to(device=device, dtype=dtype)
+        self.down = self.down.to(device=device, dtype=dtype)
+
+        if self.mid is not None:
+            self.mid = self.mid.to(device=device, dtype=dtype)
+
+
+class LoHALayer(LoRALayerBase):
+    # w1_a: torch.Tensor
+    # w1_b: torch.Tensor
+    # w2_a: torch.Tensor
+    # w2_b: torch.Tensor
+    # t1: Optional[torch.Tensor] = None
+    # t2: Optional[torch.Tensor] = None
+
+    def __init__(self, layer_key: str, values: Dict[str, torch.Tensor]):
+        super().__init__(layer_key, values)
+
+        self.w1_a = values["hada_w1_a"]
+        self.w1_b = values["hada_w1_b"]
+        self.w2_a = values["hada_w2_a"]
+        self.w2_b = values["hada_w2_b"]
+        self.t1 = values.get("hada_t1", None)
+        self.t2 = values.get("hada_t2", None)
+
+        self.rank = self.w1_b.shape[0]
+        self.check_keys(
+            values,
+            {
+                "hada_w1_a",
+                "hada_w1_b",
+                "hada_w2_a",
+                "hada_w2_b",
+                "hada_t1",
+                "hada_t2",
+            },
+        )
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        if self.t1 is None:
+            weight: torch.Tensor = (self.w1_a @ self.w1_b) * (self.w2_a @ self.w2_b)
+
+        else:
+            rebuild1 = torch.einsum("i j k l, j r, i p -> p r k l", self.t1, self.w1_b, self.w1_a)
+            rebuild2 = torch.einsum("i j k l, j r, i p -> p r k l", self.t2, self.w2_b, self.w2_a)
+            weight = rebuild1 * rebuild2
+
+        return weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        for val in [self.w1_a, self.w1_b, self.w2_a, self.w2_b, self.t1, self.t2]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
+        super().to(device=device, dtype=dtype)
+
+        self.w1_a = self.w1_a.to(device=device, dtype=dtype)
+        self.w1_b = self.w1_b.to(device=device, dtype=dtype)
+        if self.t1 is not None:
+            self.t1 = self.t1.to(device=device, dtype=dtype)
+
+        self.w2_a = self.w2_a.to(device=device, dtype=dtype)
+        self.w2_b = self.w2_b.to(device=device, dtype=dtype)
+        if self.t2 is not None:
+            self.t2 = self.t2.to(device=device, dtype=dtype)
+
+
+class LoKRLayer(LoRALayerBase):
+    # w1: Optional[torch.Tensor] = None
+    # w1_a: Optional[torch.Tensor] = None
+    # w1_b: Optional[torch.Tensor] = None
+    # w2: Optional[torch.Tensor] = None
+    # w2_a: Optional[torch.Tensor] = None
+    # w2_b: Optional[torch.Tensor] = None
+    # t2: Optional[torch.Tensor] = None
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        super().__init__(layer_key, values)
+
+        self.w1 = values.get("lokr_w1", None)
+        if self.w1 is None:
+            self.w1_a = values["lokr_w1_a"]
+            self.w1_b = values["lokr_w1_b"]
+        else:
+            self.w1_b = None
+            self.w1_a = None
+
+        self.w2 = values.get("lokr_w2", None)
+        if self.w2 is None:
+            self.w2_a = values["lokr_w2_a"]
+            self.w2_b = values["lokr_w2_b"]
+        else:
+            self.w2_a = None
+            self.w2_b = None
+
+        self.t2 = values.get("lokr_t2", None)
+
+        if self.w1_b is not None:
+            self.rank = self.w1_b.shape[0]
+        elif self.w2_b is not None:
+            self.rank = self.w2_b.shape[0]
+        else:
+            self.rank = None  # unscaled
+
+        self.check_keys(
+            values,
+            {
+                "lokr_w1",
+                "lokr_w1_a",
+                "lokr_w1_b",
+                "lokr_w2",
+                "lokr_w2_a",
+                "lokr_w2_b",
+                "lokr_t2",
+            },
+        )
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        w1: Optional[torch.Tensor] = self.w1
+        if w1 is None:
+            assert self.w1_a is not None
+            assert self.w1_b is not None
+            w1 = self.w1_a @ self.w1_b
+
+        w2 = self.w2
+        if w2 is None:
+            if self.t2 is None:
+                assert self.w2_a is not None
+                assert self.w2_b is not None
+                w2 = self.w2_a @ self.w2_b
+            else:
+                w2 = torch.einsum("i j k l, i p, j r -> p r k l", self.t2, self.w2_a, self.w2_b)
+
+        if len(w2.shape) == 4:
+            w1 = w1.unsqueeze(2).unsqueeze(2)
+        w2 = w2.contiguous()
+        assert w1 is not None
+        assert w2 is not None
+        weight = torch.kron(w1, w2)
+
+        return weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        for val in [self.w1, self.w1_a, self.w1_b, self.w2, self.w2_a, self.w2_b, self.t2]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
+        super().to(device=device, dtype=dtype)
+
+        if self.w1 is not None:
+            self.w1 = self.w1.to(device=device, dtype=dtype)
+        else:
+            assert self.w1_a is not None
+            assert self.w1_b is not None
+            self.w1_a = self.w1_a.to(device=device, dtype=dtype)
+            self.w1_b = self.w1_b.to(device=device, dtype=dtype)
+
+        if self.w2 is not None:
+            self.w2 = self.w2.to(device=device, dtype=dtype)
+        else:
+            assert self.w2_a is not None
+            assert self.w2_b is not None
+            self.w2_a = self.w2_a.to(device=device, dtype=dtype)
+            self.w2_b = self.w2_b.to(device=device, dtype=dtype)
+
+        if self.t2 is not None:
+            self.t2 = self.t2.to(device=device, dtype=dtype)
+
+
+class FullLayer(LoRALayerBase):
+    # bias handled in LoRALayerBase(calc_size, to)
+    # weight: torch.Tensor
+    # bias: Optional[torch.Tensor]
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        super().__init__(layer_key, values)
+
+        self.weight = values["diff"]
+        self.bias = values.get("diff_b", None)
+
+        self.rank = None  # unscaled
+        self.check_keys(values, {"diff", "diff_b"})
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        return self.weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        model_size += self.weight.nelement() * self.weight.element_size()
+        return model_size
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
+        super().to(device=device, dtype=dtype)
+
+        self.weight = self.weight.to(device=device, dtype=dtype)
+
+
+class IA3Layer(LoRALayerBase):
+    # weight: torch.Tensor
+    # on_input: torch.Tensor
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        super().__init__(layer_key, values)
+
+        self.weight = values["weight"]
+        self.on_input = values["on_input"]
+
+        self.rank = None  # unscaled
+        self.check_keys(values, {"weight", "on_input"})
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        weight = self.weight
+        if not self.on_input:
+            weight = weight.reshape(-1, 1)
+        assert orig_weight is not None
+        return orig_weight * weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        model_size += self.weight.nelement() * self.weight.element_size()
+        model_size += self.on_input.nelement() * self.on_input.element_size()
+        return model_size
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
+        super().to(device=device, dtype=dtype)
+
+        self.weight = self.weight.to(device=device, dtype=dtype)
+        self.on_input = self.on_input.to(device=device, dtype=dtype)
+
+
+class NormLayer(LoRALayerBase):
+    # bias handled in LoRALayerBase(calc_size, to)
+    # weight: torch.Tensor
+    # bias: Optional[torch.Tensor]
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        super().__init__(layer_key, values)
+
+        self.weight = values["w_norm"]
+        self.bias = values.get("b_norm", None)
+
+        self.rank = None  # unscaled
+        self.check_keys(values, {"w_norm", "b_norm"})
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        return self.weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        model_size += self.weight.nelement() * self.weight.element_size()
+        return model_size
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
+        super().to(device=device, dtype=dtype)
+
+        self.weight = self.weight.to(device=device, dtype=dtype)
+
+
+AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer, NormLayer]
+
+
+class LoRAModelRaw(RawModel):  # (torch.nn.Module):
+    _name: str
+    layers: Dict[str, AnyLoRALayer]
+
+    def __init__(
+        self,
+        name: str,
+        layers: Dict[str, AnyLoRALayer],
+    ):
+        self._name = name
+        self.layers = layers
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
+        # TODO: try revert if exception?
+        for _key, layer in self.layers.items():
+            layer.to(device=device, dtype=dtype)
+
+    def calc_size(self) -> int:
+        model_size = 0
+        for _, layer in self.layers.items():
+            model_size += layer.calc_size()
+        return model_size
+
+    @classmethod
+    def _convert_sdxl_keys_to_diffusers_format(cls, state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """Convert the keys of an SDXL LoRA state_dict to diffusers format.
+
+        The input state_dict can be in either Stability AI format or diffusers format. If the state_dict is already in
+        diffusers format, then this function will have no effect.
+
+        This function is adapted from:
+        https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L385-L409
+
+        Args:
+            state_dict (Dict[str, Tensor]): The SDXL LoRA state_dict.
+
+        Raises:
+            ValueError: If state_dict contains an unrecognized key, or not all keys could be converted.
+
+        Returns:
+            Dict[str, Tensor]: The diffusers-format state_dict.
+        """
+        converted_count = 0  # The number of Stability AI keys converted to diffusers format.
+        not_converted_count = 0  # The number of keys that were not converted.
+
+        # Get a sorted list of Stability AI UNet keys so that we can efficiently search for keys with matching prefixes.
+        # For example, we want to efficiently find `input_blocks_4_1` in the list when searching for
+        # `input_blocks_4_1_proj_in`.
+        stability_unet_keys = list(SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP)
+        stability_unet_keys.sort()
+
+        new_state_dict = {}
+        for full_key, value in state_dict.items():
+            if full_key.startswith("lora_unet_"):
+                search_key = full_key.replace("lora_unet_", "")
+                # Use bisect to find the key in stability_unet_keys that *may* match the search_key's prefix.
+                position = bisect.bisect_right(stability_unet_keys, search_key)
+                map_key = stability_unet_keys[position - 1]
+                # Now, check if the map_key *actually* matches the search_key.
+                if search_key.startswith(map_key):
+                    new_key = full_key.replace(map_key, SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP[map_key])
+                    new_state_dict[new_key] = value
+                    converted_count += 1
+                else:
+                    new_state_dict[full_key] = value
+                    not_converted_count += 1
+            elif full_key.startswith("lora_te1_") or full_key.startswith("lora_te2_"):
+                # The CLIP text encoders have the same keys in both Stability AI and diffusers formats.
+                new_state_dict[full_key] = value
+                continue
+            else:
+                raise ValueError(f"Unrecognized SDXL LoRA key prefix: '{full_key}'.")
+
+        if converted_count > 0 and not_converted_count > 0:
+            raise ValueError(
+                f"The SDXL LoRA could only be partially converted to diffusers format. converted={converted_count},"
+                f" not_converted={not_converted_count}"
+            )
+
+        return new_state_dict
+
+    @classmethod
+    def from_checkpoint(
+        cls,
+        file_path: Union[str, Path],
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+        base_model: Optional[BaseModelType] = None,
+    ) -> Self:
+        device = device or torch.device("cpu")
+        dtype = dtype or torch.float32
+
+        if isinstance(file_path, str):
+            file_path = Path(file_path)
+
+        model = cls(
+            name=file_path.stem,
+            layers={},
+        )
+
+        if file_path.suffix == ".safetensors":
+            sd = load_file(file_path.absolute().as_posix(), device="cpu")
+        else:
+            sd = torch.load(file_path, map_location="cpu")
+
+        state_dict = cls._group_state(sd)
+
+        if base_model == BaseModelType.StableDiffusionXL:
+            state_dict = cls._convert_sdxl_keys_to_diffusers_format(state_dict)
+
+        for layer_key, values in state_dict.items():
+            # Detect layers according to LyCORIS detection logic(`weight_list_det`)
+            # https://github.com/KohakuBlueleaf/LyCORIS/tree/8ad8000efb79e2b879054da8c9356e6143591bad/lycoris/modules
+
+            # lora and locon
+            if "lora_up.weight" in values:
+                layer: AnyLoRALayer = LoRALayer(layer_key, values)
+
+            # loha
+            elif "hada_w1_a" in values:
+                layer = LoHALayer(layer_key, values)
+
+            # lokr
+            elif "lokr_w1" in values or "lokr_w1_a" in values:
+                layer = LoKRLayer(layer_key, values)
+
+            # diff
+            elif "diff" in values:
+                layer = FullLayer(layer_key, values)
+
+            # ia3
+            elif "on_input" in values:
+                layer = IA3Layer(layer_key, values)
+
+            # norms
+            elif "w_norm" in values:
+                layer = NormLayer(layer_key, values)
+
+            else:
+                print(f">> Encountered unknown lora layer module in {model.name}: {layer_key} - {list(values.keys())}")
+                raise Exception("Unknown lora format!")
+
+            # lower memory consumption by removing already parsed layer values
+            state_dict[layer_key].clear()
+
+            layer.to(device=device, dtype=dtype)
+            model.layers[layer_key] = layer
+
+        return model
+
+    @staticmethod
+    def _group_state(state_dict: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, torch.Tensor]]:
+        state_dict_groupped: Dict[str, Dict[str, torch.Tensor]] = {}
+
+        for key, value in state_dict.items():
+            stem, leaf = key.split(".", 1)
+            if stem not in state_dict_groupped:
+                state_dict_groupped[stem] = {}
+            state_dict_groupped[stem][leaf] = value
+
+        return state_dict_groupped
+
+
+# code from
+# https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L15C1-L97C32
+def make_sdxl_unet_conversion_map() -> List[Tuple[str, str]]:
+    """Create a dict mapping state_dict keys from Stability AI SDXL format to diffusers SDXL format."""
+    unet_conversion_map_layer = []
+
+    for i in range(3):  # num_blocks is 3 in sdxl
+        # loop over downblocks/upblocks
+        for j in range(2):
+            # loop over resnets/attentions for downblocks
+            hf_down_res_prefix = f"down_blocks.{i}.resnets.{j}."
+            sd_down_res_prefix = f"input_blocks.{3*i + j + 1}.0."
+            unet_conversion_map_layer.append((sd_down_res_prefix, hf_down_res_prefix))
+
+            if i < 3:
+                # no attention layers in down_blocks.3
+                hf_down_atn_prefix = f"down_blocks.{i}.attentions.{j}."
+                sd_down_atn_prefix = f"input_blocks.{3*i + j + 1}.1."
+                unet_conversion_map_layer.append((sd_down_atn_prefix, hf_down_atn_prefix))
+
+        for j in range(3):
+            # loop over resnets/attentions for upblocks
+            hf_up_res_prefix = f"up_blocks.{i}.resnets.{j}."
+            sd_up_res_prefix = f"output_blocks.{3*i + j}.0."
+            unet_conversion_map_layer.append((sd_up_res_prefix, hf_up_res_prefix))
+
+            # if i > 0: commentout for sdxl
+            # no attention layers in up_blocks.0
+            hf_up_atn_prefix = f"up_blocks.{i}.attentions.{j}."
+            sd_up_atn_prefix = f"output_blocks.{3*i + j}.1."
+            unet_conversion_map_layer.append((sd_up_atn_prefix, hf_up_atn_prefix))
+
+        if i < 3:
+            # no downsample in down_blocks.3
+            hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0.conv."
+            sd_downsample_prefix = f"input_blocks.{3*(i+1)}.0.op."
+            unet_conversion_map_layer.append((sd_downsample_prefix, hf_downsample_prefix))
+
+            # no upsample in up_blocks.3
+            hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0."
+            sd_upsample_prefix = f"output_blocks.{3*i + 2}.{2}."  # change for sdxl
+            unet_conversion_map_layer.append((sd_upsample_prefix, hf_upsample_prefix))
+
+    hf_mid_atn_prefix = "mid_block.attentions.0."
+    sd_mid_atn_prefix = "middle_block.1."
+    unet_conversion_map_layer.append((sd_mid_atn_prefix, hf_mid_atn_prefix))
+
+    for j in range(2):
+        hf_mid_res_prefix = f"mid_block.resnets.{j}."
+        sd_mid_res_prefix = f"middle_block.{2*j}."
+        unet_conversion_map_layer.append((sd_mid_res_prefix, hf_mid_res_prefix))
+
+    unet_conversion_map_resnet = [
+        # (stable-diffusion, HF Diffusers)
+        ("in_layers.0.", "norm1."),
+        ("in_layers.2.", "conv1."),
+        ("out_layers.0.", "norm2."),
+        ("out_layers.3.", "conv2."),
+        ("emb_layers.1.", "time_emb_proj."),
+        ("skip_connection.", "conv_shortcut."),
+    ]
+
+    unet_conversion_map = []
+    for sd, hf in unet_conversion_map_layer:
+        if "resnets" in hf:
+            for sd_res, hf_res in unet_conversion_map_resnet:
+                unet_conversion_map.append((sd + sd_res, hf + hf_res))
+        else:
+            unet_conversion_map.append((sd, hf))
+
+    for j in range(2):
+        hf_time_embed_prefix = f"time_embedding.linear_{j+1}."
+        sd_time_embed_prefix = f"time_embed.{j*2}."
+        unet_conversion_map.append((sd_time_embed_prefix, hf_time_embed_prefix))
+
+    for j in range(2):
+        hf_label_embed_prefix = f"add_embedding.linear_{j+1}."
+        sd_label_embed_prefix = f"label_emb.0.{j*2}."
+        unet_conversion_map.append((sd_label_embed_prefix, hf_label_embed_prefix))
+
+    unet_conversion_map.append(("input_blocks.0.0.", "conv_in."))
+    unet_conversion_map.append(("out.0.", "conv_norm_out."))
+    unet_conversion_map.append(("out.2.", "conv_out."))
+
+    return unet_conversion_map
+
+
+SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP = {
+    sd.rstrip(".").replace(".", "_"): hf.rstrip(".").replace(".", "_") for sd, hf in make_sdxl_unet_conversion_map()
+}
--- a/invokeai/backend/lora/init.py
+++ b/invokeai/backend/lora/init.py
--- a/invokeai/backend/lora/conversions/init.py
+++ b/invokeai/backend/lora/conversions/init.py
--- a/invokeai/backend/lora/conversions/flux_diffusers_lora_conversion_utils.py
+++ b/invokeai/backend/lora/conversions/flux_diffusers_lora_conversion_utils.py
@@ -1,210 +0,0 @@
-from typing import Dict
-
-import torch
-
-from invokeai.backend.lora.layers.any_lora_layer import AnyLoRALayer
-from invokeai.backend.lora.layers.concatenated_lora_layer import ConcatenatedLoRALayer
-from invokeai.backend.lora.layers.lora_layer import LoRALayer
-from invokeai.backend.lora.layers.lora_layer_base import LoRALayerBase
-from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
-
-
-def is_state_dict_likely_in_flux_diffusers_format(state_dict: Dict[str, torch.Tensor]) -> bool:
-    """Checks if the provided state dict is likely in the Diffusers FLUX LoRA format.
-
-    This is intended to be a reasonably high-precision detector, but it is not guaranteed to have perfect precision. (A
-    perfect-precision detector would require checking all keys against a whitelist and verifying tensor shapes.)
-    """
-    # First, check that all keys end in "lora_A.weight" or "lora_B.weight" (i.e. are in PEFT format).
-    all_keys_in_peft_format = all(k.endswith(("lora_A.weight", "lora_B.weight")) for k in state_dict.keys())
-
-    # Next, check that this is likely a FLUX model by spot-checking a few keys.
-    expected_keys = [
-        "transformer.single_transformer_blocks.0.attn.to_q.lora_A.weight",
-        "transformer.single_transformer_blocks.0.attn.to_q.lora_B.weight",
-        "transformer.transformer_blocks.0.attn.add_q_proj.lora_A.weight",
-        "transformer.transformer_blocks.0.attn.add_q_proj.lora_B.weight",
-    ]
-    all_expected_keys_present = all(k in state_dict for k in expected_keys)
-
-    return all_keys_in_peft_format and all_expected_keys_present
-
-
-# TODO(ryand): What alpha should we use? 1.0? Rank of the LoRA?
-def lora_model_from_flux_diffusers_state_dict(state_dict: Dict[str, torch.Tensor], alpha: float = 1.0) -> LoRAModelRaw:  # pyright: ignore[reportRedeclaration] (state_dict is intentionally re-declared)
-    """Loads a state dict in the Diffusers FLUX LoRA format into a LoRAModelRaw object.
-
-    This function is based on:
-    https://github.com/huggingface/diffusers/blob/55ac421f7bb12fd00ccbef727be4dc2f3f920abb/scripts/convert_flux_to_diffusers.py
-    """
-    # Group keys by layer.
-    grouped_state_dict: dict[str, dict[str, torch.Tensor]] = _group_by_layer(state_dict)
-
-    # Remove the "transformer." prefix from all keys.
-    grouped_state_dict = {k.replace("transformer.", ""): v for k, v in grouped_state_dict.items()}
-
-    # Constants for FLUX.1
-    num_double_layers = 19
-    num_single_layers = 38
-    # inner_dim = 3072
-    # mlp_ratio = 4.0
-
-    layers: dict[str, AnyLoRALayer] = {}
-
-    def add_lora_layer_if_present(src_key: str, dst_key: str) -> None:
-        if src_key in grouped_state_dict:
-            src_layer_dict = grouped_state_dict.pop(src_key)
-            layers[dst_key] = LoRALayer(
-                values={
-                    "lora_down.weight": src_layer_dict.pop("lora_A.weight"),
-                    "lora_up.weight": src_layer_dict.pop("lora_B.weight"),
-                    "alpha": torch.tensor(alpha),
-                },
-            )
-            assert len(src_layer_dict) == 0
-
-    def add_qkv_lora_layer_if_present(src_keys: list[str], dst_qkv_key: str) -> None:
-        """Handle the Q, K, V matrices for a transformer block. We need special handling because the diffusers format
-        stores them in separate matrices, whereas the BFL format used internally by InvokeAI concatenates them.
-        """
-        # We expect that either all src keys are present or none of them are. Verify this.
-        keys_present = [key in grouped_state_dict for key in src_keys]
-        assert all(keys_present) or not any(keys_present)
-
-        # If none of the keys are present, return early.
-        if not any(keys_present):
-            return
-
-        src_layer_dicts = [grouped_state_dict.pop(key) for key in src_keys]
-        sub_layers: list[LoRALayerBase] = []
-        for src_layer_dict in src_layer_dicts:
-            sub_layers.append(
-                LoRALayer(
-                    values={
-                        "lora_down.weight": src_layer_dict.pop("lora_A.weight"),
-                        "lora_up.weight": src_layer_dict.pop("lora_B.weight"),
-                        "alpha": torch.tensor(alpha),
-                    },
-                )
-            )
-            assert len(src_layer_dict) == 0
-        layers[dst_qkv_key] = ConcatenatedLoRALayer(lora_layers=sub_layers, concat_axis=0)
-
-    # time_text_embed.timestep_embedder -> time_in.
-    add_lora_layer_if_present("time_text_embed.timestep_embedder.linear_1", "time_in.in_layer")
-    add_lora_layer_if_present("time_text_embed.timestep_embedder.linear_2", "time_in.out_layer")
-
-    # time_text_embed.text_embedder -> vector_in.
-    add_lora_layer_if_present("time_text_embed.text_embedder.linear_1", "vector_in.in_layer")
-    add_lora_layer_if_present("time_text_embed.text_embedder.linear_2", "vector_in.out_layer")
-
-    # time_text_embed.guidance_embedder -> guidance_in.
-    add_lora_layer_if_present("time_text_embed.guidance_embedder.linear_1", "guidance_in")
-    add_lora_layer_if_present("time_text_embed.guidance_embedder.linear_2", "guidance_in")
-
-    # context_embedder -> txt_in.
-    add_lora_layer_if_present("context_embedder", "txt_in")
-
-    # x_embedder -> img_in.
-    add_lora_layer_if_present("x_embedder", "img_in")
-
-    # Double transformer blocks.
-    for i in range(num_double_layers):
-        # norms.
-        add_lora_layer_if_present(f"transformer_blocks.{i}.norm1.linear", f"double_blocks.{i}.img_mod.lin")
-        add_lora_layer_if_present(f"transformer_blocks.{i}.norm1_context.linear", f"double_blocks.{i}.txt_mod.lin")
-
-        # Q, K, V
-        add_qkv_lora_layer_if_present(
-            [
-                f"transformer_blocks.{i}.attn.to_q",
-                f"transformer_blocks.{i}.attn.to_k",
-                f"transformer_blocks.{i}.attn.to_v",
-            ],
-            f"double_blocks.{i}.img_attn.qkv",
-        )
-        add_qkv_lora_layer_if_present(
-            [
-                f"transformer_blocks.{i}.attn.add_q_proj",
-                f"transformer_blocks.{i}.attn.add_k_proj",
-                f"transformer_blocks.{i}.attn.add_v_proj",
-            ],
-            f"double_blocks.{i}.txt_attn.qkv",
-        )
-
-        # ff img_mlp
-        add_lora_layer_if_present(
-            f"transformer_blocks.{i}.ff.net.0.proj",
-            f"double_blocks.{i}.img_mlp.0",
-        )
-        add_lora_layer_if_present(
-            f"transformer_blocks.{i}.ff.net.2",
-            f"double_blocks.{i}.img_mlp.2",
-        )
-
-        # ff txt_mlp
-        add_lora_layer_if_present(
-            f"transformer_blocks.{i}.ff_context.net.0.proj",
-            f"double_blocks.{i}.txt_mlp.0",
-        )
-        add_lora_layer_if_present(
-            f"transformer_blocks.{i}.ff_context.net.2",
-            f"double_blocks.{i}.txt_mlp.2",
-        )
-
-        # output projections.
-        add_lora_layer_if_present(
-            f"transformer_blocks.{i}.attn.to_out.0",
-            f"double_blocks.{i}.img_attn.proj",
-        )
-        add_lora_layer_if_present(
-            f"transformer_blocks.{i}.attn.to_add_out",
-            f"double_blocks.{i}.txt_attn.proj",
-        )
-
-    # Single transformer blocks.
-    for i in range(num_single_layers):
-        # norms
-        add_lora_layer_if_present(
-            f"single_transformer_blocks.{i}.norm.linear",
-            f"single_blocks.{i}.modulation.lin",
-        )
-
-        # Q, K, V, mlp
-        add_qkv_lora_layer_if_present(
-            [
-                f"single_transformer_blocks.{i}.attn.to_q",
-                f"single_transformer_blocks.{i}.attn.to_k",
-                f"single_transformer_blocks.{i}.attn.to_v",
-                f"single_transformer_blocks.{i}.proj_mlp",
-            ],
-            f"single_blocks.{i}.linear1",
-        )
-
-        # Output projections.
-        add_lora_layer_if_present(
-            f"single_transformer_blocks.{i}.proj_out",
-            f"single_blocks.{i}.linear2",
-        )
-
-    # Final layer.
-    add_lora_layer_if_present("proj_out", "final_layer.linear")
-
-    # Assert that all keys were processed.
-    assert len(grouped_state_dict) == 0
-
-    return LoRAModelRaw(layers=layers)
-
-
-def _group_by_layer(state_dict: Dict[str, torch.Tensor]) -> dict[str, dict[str, torch.Tensor]]:
-    """Groups the keys in the state dict by layer."""
-    layer_dict: dict[str, dict[str, torch.Tensor]] = {}
-    for key in state_dict:
-        # Split the 'lora_A.weight' or 'lora_B.weight' suffix from the layer name.
-        parts = key.rsplit(".", maxsplit=2)
-        layer_name = parts[0]
-        key_name = ".".join(parts[1:])
-        if layer_name not in layer_dict:
-            layer_dict[layer_name] = {}
-        layer_dict[layer_name][key_name] = state_dict[key]
-    return layer_dict
--- a/invokeai/backend/lora/conversions/flux_kohya_lora_conversion_utils.py
+++ b/invokeai/backend/lora/conversions/flux_kohya_lora_conversion_utils.py
@@ -1,80 +0,0 @@
-import re
-from typing import Any, Dict, TypeVar
-
-import torch
-
-from invokeai.backend.lora.layers.any_lora_layer import AnyLoRALayer
-from invokeai.backend.lora.layers.utils import any_lora_layer_from_state_dict
-from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
-
-# A regex pattern that matches all of the keys in the Kohya FLUX LoRA format.
-# Example keys:
-#   lora_unet_double_blocks_0_img_attn_proj.alpha
-#   lora_unet_double_blocks_0_img_attn_proj.lora_down.weight
-#   lora_unet_double_blocks_0_img_attn_proj.lora_up.weight
-FLUX_KOHYA_KEY_REGEX = (
-    r"lora_unet_(\w+_blocks)_(\d+)_(img_attn|img_mlp|img_mod|txt_attn|txt_mlp|txt_mod|linear1|linear2|modulation)_?(.*)"
-)
-
-
-def is_state_dict_likely_in_flux_kohya_format(state_dict: Dict[str, Any]) -> bool:
-    """Checks if the provided state dict is likely in the Kohya FLUX LoRA format.
-
-    This is intended to be a high-precision detector, but it is not guaranteed to have perfect precision. (A
-    perfect-precision detector would require checking all keys against a whitelist and verifying tensor shapes.)
-    """
-    return all(re.match(FLUX_KOHYA_KEY_REGEX, k) for k in state_dict.keys())
-
-
-def lora_model_from_flux_kohya_state_dict(state_dict: Dict[str, torch.Tensor]) -> LoRAModelRaw:
-    # Group keys by layer.
-    grouped_state_dict: dict[str, dict[str, torch.Tensor]] = {}
-    for key, value in state_dict.items():
-        layer_name, param_name = key.split(".", 1)
-        if layer_name not in grouped_state_dict:
-            grouped_state_dict[layer_name] = {}
-        grouped_state_dict[layer_name][param_name] = value
-
-    # Convert the state dict to the InvokeAI format.
-    grouped_state_dict = convert_flux_kohya_state_dict_to_invoke_format(grouped_state_dict)
-
-    # Create LoRA layers.
-    layers: dict[str, AnyLoRALayer] = {}
-    for layer_key, layer_state_dict in grouped_state_dict.items():
-        layers[layer_key] = any_lora_layer_from_state_dict(layer_state_dict)
-
-    # Create and return the LoRAModelRaw.
-    return LoRAModelRaw(layers=layers)
-
-
-T = TypeVar("T")
-
-
-def convert_flux_kohya_state_dict_to_invoke_format(state_dict: Dict[str, T]) -> Dict[str, T]:
-    """Converts a state dict from the Kohya FLUX LoRA format to LoRA weight format used internally by InvokeAI.
-
-    Example key conversions:
-    "lora_unet_double_blocks_0_img_attn_proj" -> "double_blocks.0.img_attn.proj"
-    "lora_unet_double_blocks_0_img_attn_proj" -> "double_blocks.0.img_attn.proj"
-    "lora_unet_double_blocks_0_img_attn_proj" -> "double_blocks.0.img_attn.proj"
-    "lora_unet_double_blocks_0_img_attn_qkv" -> "double_blocks.0.img_attn.qkv"
-    "lora_unet_double_blocks_0_img_attn_qkv" -> "double_blocks.0.img.attn.qkv"
-    "lora_unet_double_blocks_0_img_attn_qkv" -> "double_blocks.0.img.attn.qkv"
-    """
-
-    def replace_func(match: re.Match[str]) -> str:
-        s = f"{match.group(1)}.{match.group(2)}.{match.group(3)}"
-        if match.group(4):
-            s += f".{match.group(4)}"
-        return s
-
-    converted_dict: dict[str, T] = {}
-    for k, v in state_dict.items():
-        match = re.match(FLUX_KOHYA_KEY_REGEX, k)
-        if match:
-            new_key = re.sub(FLUX_KOHYA_KEY_REGEX, replace_func, k)
-            converted_dict[new_key] = v
-        else:
-            raise ValueError(f"Key '{k}' does not match the expected pattern for FLUX LoRA weights.")
-
-    return converted_dict
--- a/invokeai/backend/lora/conversions/sd_lora_conversion_utils.py
+++ b/invokeai/backend/lora/conversions/sd_lora_conversion_utils.py
@@ -1,29 +0,0 @@
-from typing import Dict
-
-import torch
-
-from invokeai.backend.lora.layers.any_lora_layer import AnyLoRALayer
-from invokeai.backend.lora.layers.utils import any_lora_layer_from_state_dict
-from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
-
-
-def lora_model_from_sd_state_dict(state_dict: Dict[str, torch.Tensor]) -> LoRAModelRaw:
-    grouped_state_dict: dict[str, dict[str, torch.Tensor]] = _group_state(state_dict)
-
-    layers: dict[str, AnyLoRALayer] = {}
-    for layer_key, values in grouped_state_dict.items():
-        layers[layer_key] = any_lora_layer_from_state_dict(values)
-
-    return LoRAModelRaw(layers=layers)
-
-
-def _group_state(state_dict: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, torch.Tensor]]:
-    state_dict_groupped: Dict[str, Dict[str, torch.Tensor]] = {}
-
-    for key, value in state_dict.items():
-        stem, leaf = key.split(".", 1)
-        if stem not in state_dict_groupped:
-            state_dict_groupped[stem] = {}
-        state_dict_groupped[stem][leaf] = value
-
-    return state_dict_groupped
--- a/invokeai/backend/lora/conversions/sdxl_lora_conversion_utils.py
+++ b/invokeai/backend/lora/conversions/sdxl_lora_conversion_utils.py
@@ -1,154 +0,0 @@
-import bisect
-from typing import Dict, List, Tuple, TypeVar
-
-T = TypeVar("T")
-
-
-def convert_sdxl_keys_to_diffusers_format(state_dict: Dict[str, T]) -> dict[str, T]:
-    """Convert the keys of an SDXL LoRA state_dict to diffusers format.
-
-    The input state_dict can be in either Stability AI format or diffusers format. If the state_dict is already in
-    diffusers format, then this function will have no effect.
-
-    This function is adapted from:
-    https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L385-L409
-
-    Args:
-        state_dict (Dict[str, Tensor]): The SDXL LoRA state_dict.
-
-    Raises:
-        ValueError: If state_dict contains an unrecognized key, or not all keys could be converted.
-
-    Returns:
-        Dict[str, Tensor]: The diffusers-format state_dict.
-    """
-    converted_count = 0  # The number of Stability AI keys converted to diffusers format.
-    not_converted_count = 0  # The number of keys that were not converted.
-
-    # Get a sorted list of Stability AI UNet keys so that we can efficiently search for keys with matching prefixes.
-    # For example, we want to efficiently find `input_blocks_4_1` in the list when searching for
-    # `input_blocks_4_1_proj_in`.
-    stability_unet_keys = list(SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP)
-    stability_unet_keys.sort()
-
-    new_state_dict: dict[str, T] = {}
-    for full_key, value in state_dict.items():
-        if full_key.startswith("lora_unet_"):
-            search_key = full_key.replace("lora_unet_", "")
-            # Use bisect to find the key in stability_unet_keys that *may* match the search_key's prefix.
-            position = bisect.bisect_right(stability_unet_keys, search_key)
-            map_key = stability_unet_keys[position - 1]
-            # Now, check if the map_key *actually* matches the search_key.
-            if search_key.startswith(map_key):
-                new_key = full_key.replace(map_key, SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP[map_key])
-                new_state_dict[new_key] = value
-                converted_count += 1
-            else:
-                new_state_dict[full_key] = value
-                not_converted_count += 1
-        elif full_key.startswith("lora_te1_") or full_key.startswith("lora_te2_"):
-            # The CLIP text encoders have the same keys in both Stability AI and diffusers formats.
-            new_state_dict[full_key] = value
-            continue
-        else:
-            raise ValueError(f"Unrecognized SDXL LoRA key prefix: '{full_key}'.")
-
-    if converted_count > 0 and not_converted_count > 0:
-        raise ValueError(
-            f"The SDXL LoRA could only be partially converted to diffusers format. converted={converted_count},"
-            f" not_converted={not_converted_count}"
-        )
-
-    return new_state_dict
-
-
-# code from
-# https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L15C1-L97C32
-def _make_sdxl_unet_conversion_map() -> List[Tuple[str, str]]:
-    """Create a dict mapping state_dict keys from Stability AI SDXL format to diffusers SDXL format."""
-    unet_conversion_map_layer: list[tuple[str, str]] = []
-
-    for i in range(3):  # num_blocks is 3 in sdxl
-        # loop over downblocks/upblocks
-        for j in range(2):
-            # loop over resnets/attentions for downblocks
-            hf_down_res_prefix = f"down_blocks.{i}.resnets.{j}."
-            sd_down_res_prefix = f"input_blocks.{3*i + j + 1}.0."
-            unet_conversion_map_layer.append((sd_down_res_prefix, hf_down_res_prefix))
-
-            if i < 3:
-                # no attention layers in down_blocks.3
-                hf_down_atn_prefix = f"down_blocks.{i}.attentions.{j}."
-                sd_down_atn_prefix = f"input_blocks.{3*i + j + 1}.1."
-                unet_conversion_map_layer.append((sd_down_atn_prefix, hf_down_atn_prefix))
-
-        for j in range(3):
-            # loop over resnets/attentions for upblocks
-            hf_up_res_prefix = f"up_blocks.{i}.resnets.{j}."
-            sd_up_res_prefix = f"output_blocks.{3*i + j}.0."
-            unet_conversion_map_layer.append((sd_up_res_prefix, hf_up_res_prefix))
-
-            # if i > 0: commentout for sdxl
-            # no attention layers in up_blocks.0
-            hf_up_atn_prefix = f"up_blocks.{i}.attentions.{j}."
-            sd_up_atn_prefix = f"output_blocks.{3*i + j}.1."
-            unet_conversion_map_layer.append((sd_up_atn_prefix, hf_up_atn_prefix))
-
-        if i < 3:
-            # no downsample in down_blocks.3
-            hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0.conv."
-            sd_downsample_prefix = f"input_blocks.{3*(i+1)}.0.op."
-            unet_conversion_map_layer.append((sd_downsample_prefix, hf_downsample_prefix))
-
-            # no upsample in up_blocks.3
-            hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0."
-            sd_upsample_prefix = f"output_blocks.{3*i + 2}.{2}."  # change for sdxl
-            unet_conversion_map_layer.append((sd_upsample_prefix, hf_upsample_prefix))
-
-    hf_mid_atn_prefix = "mid_block.attentions.0."
-    sd_mid_atn_prefix = "middle_block.1."
-    unet_conversion_map_layer.append((sd_mid_atn_prefix, hf_mid_atn_prefix))
-
-    for j in range(2):
-        hf_mid_res_prefix = f"mid_block.resnets.{j}."
-        sd_mid_res_prefix = f"middle_block.{2*j}."
-        unet_conversion_map_layer.append((sd_mid_res_prefix, hf_mid_res_prefix))
-
-    unet_conversion_map_resnet = [
-        # (stable-diffusion, HF Diffusers)
-        ("in_layers.0.", "norm1."),
-        ("in_layers.2.", "conv1."),
-        ("out_layers.0.", "norm2."),
-        ("out_layers.3.", "conv2."),
-        ("emb_layers.1.", "time_emb_proj."),
-        ("skip_connection.", "conv_shortcut."),
-    ]
-
-    unet_conversion_map: list[tuple[str, str]] = []
-    for sd, hf in unet_conversion_map_layer:
-        if "resnets" in hf:
-            for sd_res, hf_res in unet_conversion_map_resnet:
-                unet_conversion_map.append((sd + sd_res, hf + hf_res))
-        else:
-            unet_conversion_map.append((sd, hf))
-
-    for j in range(2):
-        hf_time_embed_prefix = f"time_embedding.linear_{j+1}."
-        sd_time_embed_prefix = f"time_embed.{j*2}."
-        unet_conversion_map.append((sd_time_embed_prefix, hf_time_embed_prefix))
-
-    for j in range(2):
-        hf_label_embed_prefix = f"add_embedding.linear_{j+1}."
-        sd_label_embed_prefix = f"label_emb.0.{j*2}."
-        unet_conversion_map.append((sd_label_embed_prefix, hf_label_embed_prefix))
-
-    unet_conversion_map.append(("input_blocks.0.0.", "conv_in."))
-    unet_conversion_map.append(("out.0.", "conv_norm_out."))
-    unet_conversion_map.append(("out.2.", "conv_out."))
-
-    return unet_conversion_map
-
-
-SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP = {
-    sd.rstrip(".").replace(".", "_"): hf.rstrip(".").replace(".", "_") for sd, hf in _make_sdxl_unet_conversion_map()
-}
--- a/invokeai/backend/lora/layers/init.py
+++ b/invokeai/backend/lora/layers/init.py
--- a/invokeai/backend/lora/layers/any_lora_layer.py
+++ b/invokeai/backend/lora/layers/any_lora_layer.py
@@ -1,11 +0,0 @@
-from typing import Union
-
-from invokeai.backend.lora.layers.concatenated_lora_layer import ConcatenatedLoRALayer
-from invokeai.backend.lora.layers.full_layer import FullLayer
-from invokeai.backend.lora.layers.ia3_layer import IA3Layer
-from invokeai.backend.lora.layers.loha_layer import LoHALayer
-from invokeai.backend.lora.layers.lokr_layer import LoKRLayer
-from invokeai.backend.lora.layers.lora_layer import LoRALayer
-from invokeai.backend.lora.layers.norm_layer import NormLayer
-
-AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer, NormLayer, ConcatenatedLoRALayer]
--- a/invokeai/backend/lora/layers/concatenated_lora_layer.py
+++ b/invokeai/backend/lora/layers/concatenated_lora_layer.py
@@ -1,46 +0,0 @@
-from typing import List, Optional
-
-import torch
-
-from invokeai.backend.lora.layers.lora_layer_base import LoRALayerBase
-
-
-class ConcatenatedLoRALayer(LoRALayerBase):
-    """A LoRA layer that is composed of multiple LoRA layers concatenated along a specified axis.
-
-    This class was created to handle a special case with FLUX LoRA models. In the BFL FLUX model format, the attention
-    Q, K, V matrices are concatenated along the first dimension. In the diffusers LoRA format, the Q, K, V matrices are
-    stored as separate tensors. This class enables diffusers LoRA layers to be used in BFL FLUX models.
-    """
-
-    def __init__(self, lora_layers: List[LoRALayerBase], concat_axis: int = 0):
-        # Note: We pass values={} to the base class, because the values are handled by the individual LoRA layers.
-        super().__init__(values={})
-
-        self._lora_layers = lora_layers
-        self._concat_axis = concat_axis
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
-        # TODO(ryand): Currently, we pass orig_weight=None to the sub-layers. If we want to support sub-layers that
-        # require this value, we will need to implement chunking of the original weight tensor here.
-        layer_weights = [lora_layer.get_weight(None) for lora_layer in self._lora_layers]  # pyright: ignore[reportArgumentType]
-        return torch.cat(layer_weights, dim=self._concat_axis)
-
-    def get_bias(self, orig_bias: torch.Tensor) -> Optional[torch.Tensor]:
-        # TODO(ryand): Currently, we pass orig_bias=None to the sub-layers. If we want to support sub-layers that
-        # require this value, we will need to implement chunking of the original bias tensor here.
-        layer_biases = [lora_layer.get_bias(None) for lora_layer in self._lora_layers]  # pyright: ignore[reportArgumentType]
-        layer_bias_is_none = [layer_bias is None for layer_bias in layer_biases]
-        if any(layer_bias_is_none):
-            assert all(layer_bias_is_none)
-            return None
-
-        # Ignore the type error, because we have just verified that all layer biases are non-None.
-        return torch.cat(layer_biases, dim=self._concat_axis)
-
-    def calc_size(self) -> int:
-        return sum(lora_layer.calc_size() for lora_layer in self._lora_layers)
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
-        for lora_layer in self._lora_layers:
-            lora_layer.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/layers/full_layer.py
+++ b/invokeai/backend/lora/layers/full_layer.py
@@ -1,35 +0,0 @@
-from typing import Dict, Optional
-
-import torch
-
-from invokeai.backend.lora.layers.lora_layer_base import LoRALayerBase
-from invokeai.backend.util.calc_tensor_size import calc_tensor_size
-
-
-class FullLayer(LoRALayerBase):
-    # bias handled in LoRALayerBase(calc_size, to)
-    # weight: torch.Tensor
-    # bias: Optional[torch.Tensor]
-
-    def __init__(
-        self,
-        values: Dict[str, torch.Tensor],
-    ):
-        super().__init__(values)
-
-        self.weight = values["diff"]
-        self.bias = values.get("diff_b", None)
-
-        self.rank = None  # unscaled
-        self.check_keys(values, {"diff", "diff_b"})
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
-        return self.weight
-
-    def calc_size(self) -> int:
-        return calc_tensor_size(self.weight) + super().calc_size()
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
-        super().to(device=device, dtype=dtype)
-
-        self.weight = self.weight.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/layers/ia3_layer.py
+++ b/invokeai/backend/lora/layers/ia3_layer.py
@@ -1,41 +0,0 @@
-from typing import Dict, Optional
-
-import torch
-
-from invokeai.backend.lora.layers.lora_layer_base import LoRALayerBase
-from invokeai.backend.util.calc_tensor_size import calc_tensors_size
-
-
-class IA3Layer(LoRALayerBase):
-    # weight: torch.Tensor
-    # on_input: torch.Tensor
-
-    def __init__(
-        self,
-        values: Dict[str, torch.Tensor],
-    ):
-        super().__init__(values)
-
-        self.weight = values["weight"]
-        self.on_input = values["on_input"]
-
-        self.rank = None  # unscaled
-        self.check_keys(values, {"weight", "on_input"})
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
-        weight = self.weight
-        if not self.on_input:
-            weight = weight.reshape(-1, 1)
-        assert orig_weight is not None
-        return orig_weight * weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        model_size += calc_tensors_size([self.weight, self.on_input])
-        return model_size
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
-        super().to(device=device, dtype=dtype)
-
-        self.weight = self.weight.to(device=device, dtype=dtype)
-        self.on_input = self.on_input.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/layers/loha_layer.py
+++ b/invokeai/backend/lora/layers/loha_layer.py
@@ -1,67 +0,0 @@
-from typing import Dict, Optional
-
-import torch
-
-from invokeai.backend.lora.layers.lora_layer_base import LoRALayerBase
-from invokeai.backend.util.calc_tensor_size import calc_tensors_size
-
-
-class LoHALayer(LoRALayerBase):
-    # w1_a: torch.Tensor
-    # w1_b: torch.Tensor
-    # w2_a: torch.Tensor
-    # w2_b: torch.Tensor
-    # t1: Optional[torch.Tensor] = None
-    # t2: Optional[torch.Tensor] = None
-
-    def __init__(self, values: Dict[str, torch.Tensor]):
-        super().__init__(values)
-
-        self.w1_a = values["hada_w1_a"]
-        self.w1_b = values["hada_w1_b"]
-        self.w2_a = values["hada_w2_a"]
-        self.w2_b = values["hada_w2_b"]
-        self.t1 = values.get("hada_t1", None)
-        self.t2 = values.get("hada_t2", None)
-
-        self.rank = self.w1_b.shape[0]
-        self.check_keys(
-            values,
-            {
-                "hada_w1_a",
-                "hada_w1_b",
-                "hada_w2_a",
-                "hada_w2_b",
-                "hada_t1",
-                "hada_t2",
-            },
-        )
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
-        if self.t1 is None:
-            weight: torch.Tensor = (self.w1_a @ self.w1_b) * (self.w2_a @ self.w2_b)
-
-        else:
-            rebuild1 = torch.einsum("i j k l, j r, i p -> p r k l", self.t1, self.w1_b, self.w1_a)
-            rebuild2 = torch.einsum("i j k l, j r, i p -> p r k l", self.t2, self.w2_b, self.w2_a)
-            weight = rebuild1 * rebuild2
-
-        return weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        model_size += calc_tensors_size([self.w1_a, self.w1_b, self.w2_a, self.w2_b, self.t1, self.t2])
-        return model_size
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
-        super().to(device=device, dtype=dtype)
-
-        self.w1_a = self.w1_a.to(device=device, dtype=dtype)
-        self.w1_b = self.w1_b.to(device=device, dtype=dtype)
-        if self.t1 is not None:
-            self.t1 = self.t1.to(device=device, dtype=dtype)
-
-        self.w2_a = self.w2_a.to(device=device, dtype=dtype)
-        self.w2_b = self.w2_b.to(device=device, dtype=dtype)
-        if self.t2 is not None:
-            self.t2 = self.t2.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/layers/lokr_layer.py
+++ b/invokeai/backend/lora/layers/lokr_layer.py
@@ -1,112 +0,0 @@
-from typing import Dict, Optional
-
-import torch
-
-from invokeai.backend.lora.layers.lora_layer_base import LoRALayerBase
-from invokeai.backend.util.calc_tensor_size import calc_tensors_size
-
-
-class LoKRLayer(LoRALayerBase):
-    # w1: Optional[torch.Tensor] = None
-    # w1_a: Optional[torch.Tensor] = None
-    # w1_b: Optional[torch.Tensor] = None
-    # w2: Optional[torch.Tensor] = None
-    # w2_a: Optional[torch.Tensor] = None
-    # w2_b: Optional[torch.Tensor] = None
-    # t2: Optional[torch.Tensor] = None
-
-    def __init__(
-        self,
-        values: Dict[str, torch.Tensor],
-    ):
-        super().__init__(values)
-
-        self.w1 = values.get("lokr_w1", None)
-        if self.w1 is None:
-            self.w1_a = values["lokr_w1_a"]
-            self.w1_b = values["lokr_w1_b"]
-        else:
-            self.w1_b = None
-            self.w1_a = None
-
-        self.w2 = values.get("lokr_w2", None)
-        if self.w2 is None:
-            self.w2_a = values["lokr_w2_a"]
-            self.w2_b = values["lokr_w2_b"]
-        else:
-            self.w2_a = None
-            self.w2_b = None
-
-        self.t2 = values.get("lokr_t2", None)
-
-        if self.w1_b is not None:
-            self.rank = self.w1_b.shape[0]
-        elif self.w2_b is not None:
-            self.rank = self.w2_b.shape[0]
-        else:
-            self.rank = None  # unscaled
-
-        self.check_keys(
-            values,
-            {
-                "lokr_w1",
-                "lokr_w1_a",
-                "lokr_w1_b",
-                "lokr_w2",
-                "lokr_w2_a",
-                "lokr_w2_b",
-                "lokr_t2",
-            },
-        )
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
-        w1: Optional[torch.Tensor] = self.w1
-        if w1 is None:
-            assert self.w1_a is not None
-            assert self.w1_b is not None
-            w1 = self.w1_a @ self.w1_b
-
-        w2 = self.w2
-        if w2 is None:
-            if self.t2 is None:
-                assert self.w2_a is not None
-                assert self.w2_b is not None
-                w2 = self.w2_a @ self.w2_b
-            else:
-                w2 = torch.einsum("i j k l, i p, j r -> p r k l", self.t2, self.w2_a, self.w2_b)
-
-        if len(w2.shape) == 4:
-            w1 = w1.unsqueeze(2).unsqueeze(2)
-        w2 = w2.contiguous()
-        assert w1 is not None
-        assert w2 is not None
-        weight = torch.kron(w1, w2)
-
-        return weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        model_size += calc_tensors_size([self.w1, self.w1_a, self.w1_b, self.w2, self.w2_a, self.w2_b, self.t2])
-        return model_size
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
-        super().to(device=device, dtype=dtype)
-
-        if self.w1 is not None:
-            self.w1 = self.w1.to(device=device, dtype=dtype)
-        else:
-            assert self.w1_a is not None
-            assert self.w1_b is not None
-            self.w1_a = self.w1_a.to(device=device, dtype=dtype)
-            self.w1_b = self.w1_b.to(device=device, dtype=dtype)
-
-        if self.w2 is not None:
-            self.w2 = self.w2.to(device=device, dtype=dtype)
-        else:
-            assert self.w2_a is not None
-            assert self.w2_b is not None
-            self.w2_a = self.w2_a.to(device=device, dtype=dtype)
-            self.w2_b = self.w2_b.to(device=device, dtype=dtype)
-
-        if self.t2 is not None:
-            self.t2 = self.t2.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/layers/lora_layer.py
+++ b/invokeai/backend/lora/layers/lora_layer.py
@@ -1,83 +0,0 @@
-from typing import Dict, Optional
-
-import torch
-
-from invokeai.backend.lora.layers.lora_layer_base import LoRALayerBase
-from invokeai.backend.util.calc_tensor_size import calc_tensors_size
-
-
-class LoRALayer(LoRALayerBase):
-    def __init__(
-        self,
-        up: torch.Tensor,
-        down: torch.Tensor,
-        mid: Optional[torch.Tensor],
-        alpha: float | None,
-        bias: torch.Tensor | None,
-    ):
-        super().__init__(alpha=alpha, bias=bias)
-
-        self.up = up
-        self.down = down
-        self.mid = mid
-
-    @classmethod
-    def from_state_dict_values(
-        cls,
-        values: Dict[str, torch.Tensor],
-    ):
-        alpha = cls._parse_alpha(values.get("alpha", None))
-        bias = cls._parse_bias(
-            values.get("bias_indices", None), values.get("bias_values", None), values.get("bias_size", None)
-        )
-
-        cls(
-            up=values["lora_up.weight"],
-            down=values["lora_down.weight"],
-            mid=values.get("lora_mid.weight", None),
-            alpha=alpha,
-            bias=bias,
-        )
-
-        cls.warn_on_unhandled_keys(
-            values,
-            {
-                # Default keys.
-                "alpha",
-                "bias_indices",
-                "bias_values",
-                "bias_size",
-                # Layer-specific keys.
-                "lora_up.weight",
-                "lora_down.weight",
-                "lora_mid.weight",
-            },
-        )
-
-    @property
-    def rank(self) -> int:
-        return self.down.shape[0]
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
-        if self.mid is not None:
-            up = self.up.reshape(self.up.shape[0], self.up.shape[1])
-            down = self.down.reshape(self.down.shape[0], self.down.shape[1])
-            weight = torch.einsum("m n w h, i m, n j -> i j w h", self.mid, up, down)
-        else:
-            weight = self.up.reshape(self.up.shape[0], -1) @ self.down.reshape(self.down.shape[0], -1)
-
-        return weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        model_size += calc_tensors_size([self.up, self.mid, self.down])
-        return model_size
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
-        super().to(device=device, dtype=dtype)
-
-        self.up = self.up.to(device=device, dtype=dtype)
-        self.down = self.down.to(device=device, dtype=dtype)
-
-        if self.mid is not None:
-            self.mid = self.mid.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/layers/lora_layer_base.py
+++ b/invokeai/backend/lora/layers/lora_layer_base.py
@@ -1,65 +0,0 @@
-from typing import Dict, Optional, Set
-
-import torch
-
-import invokeai.backend.util.logging as logger
-from invokeai.backend.util.calc_tensor_size import calc_tensors_size
-
-
-class LoRALayerBase:
-    """Base class for all LoRA-like patching layers."""
-
-    def __init__(self, alpha: float | None, bias: torch.Tensor | None):
-        self.alpha = alpha
-        self.bias = bias
-
-    @classmethod
-    def _parse_bias(
-        cls, bias_indices: torch.Tensor | None, bias_values: torch.Tensor | None, bias_size: torch.Tensor | None
-    ) -> torch.Tensor | None:
-        assert (bias_indices is None) == (bias_values is None) == (bias_size is None)
-
-        bias = None
-        if bias_indices is not None:
-            bias = torch.sparse_coo_tensor(bias_indices, bias_values, tuple(bias_size))
-        return bias
-
-    @classmethod
-    def _parse_alpha(
-        cls,
-        alpha: torch.Tensor | None,
-    ) -> float | None:
-        return alpha.item() if alpha is not None else None
-
-    @property
-    def rank(self) -> int | None:
-        raise NotImplementedError()
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
-        raise NotImplementedError()
-
-    def get_bias(self, orig_bias: torch.Tensor) -> Optional[torch.Tensor]:
-        return self.bias
-
-    def get_parameters(self, orig_module: torch.nn.Module) -> Dict[str, torch.Tensor]:
-        params = {"weight": self.get_weight(orig_module.weight)}
-        bias = self.get_bias(orig_module.bias)
-        if bias is not None:
-            params["bias"] = bias
-        return params
-
-    def calc_size(self) -> int:
-        return calc_tensors_size([self.bias])
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
-        if self.bias is not None:
-            self.bias = self.bias.to(device=device, dtype=dtype)
-
-    @classmethod
-    def warn_on_unhandled_keys(cls, values: Dict[str, torch.Tensor], handled_keys: Set[str]):
-        """Log a warning if values contains unhandled keys."""
-        unknown_keys = set(values.keys()) - handled_keys
-        if unknown_keys:
-            logger.warning(
-                f"Unexpected keys found in LoRA/LyCORIS layer, model might work incorrectly! Unexpected keys: {unknown_keys}"
-            )
--- a/invokeai/backend/lora/layers/norm_layer.py
+++ b/invokeai/backend/lora/layers/norm_layer.py
@@ -1,37 +0,0 @@
-from typing import Dict, Optional
-
-import torch
-
-from invokeai.backend.lora.layers.lora_layer_base import LoRALayerBase
-from invokeai.backend.util.calc_tensor_size import calc_tensor_size
-
-
-class NormLayer(LoRALayerBase):
-    # bias handled in LoRALayerBase(calc_size, to)
-    # weight: torch.Tensor
-    # bias: Optional[torch.Tensor]
-
-    def __init__(
-        self,
-        values: Dict[str, torch.Tensor],
-    ):
-        super().__init__(values)
-
-        self.weight = values["w_norm"]
-        self.bias = values.get("b_norm", None)
-
-        self.rank = None  # unscaled
-        self.check_keys(values, {"w_norm", "b_norm"})
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
-        return self.weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        model_size += calc_tensor_size(self.weight)
-        return model_size
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
-        super().to(device=device, dtype=dtype)
-
-        self.weight = self.weight.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/layers/utils.py
+++ b/invokeai/backend/lora/layers/utils.py
@@ -1,33 +0,0 @@
-from typing import Dict
-
-import torch
-
-from invokeai.backend.lora.layers.any_lora_layer import AnyLoRALayer
-from invokeai.backend.lora.layers.full_layer import FullLayer
-from invokeai.backend.lora.layers.ia3_layer import IA3Layer
-from invokeai.backend.lora.layers.loha_layer import LoHALayer
-from invokeai.backend.lora.layers.lokr_layer import LoKRLayer
-from invokeai.backend.lora.layers.lora_layer import LoRALayer
-from invokeai.backend.lora.layers.norm_layer import NormLayer
-
-
-def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> AnyLoRALayer:
-    # Detect layers according to LyCORIS detection logic(`weight_list_det`)
-    # https://github.com/KohakuBlueleaf/LyCORIS/tree/8ad8000efb79e2b879054da8c9356e6143591bad/lycoris/modules
-
-    if "lora_up.weight" in state_dict:
-        # LoRA a.k.a LoCon
-        return LoRALayer(state_dict)
-    elif "hada_w1_a" in state_dict:
-        return LoHALayer(state_dict)
-    elif "lokr_w1" in state_dict or "lokr_w1_a" in state_dict:
-        return LoKRLayer(state_dict)
-    elif "diff" in state_dict:
-        # Full a.k.a Diff
-        return FullLayer(state_dict)
-    elif "on_input" in state_dict:
-        return IA3Layer(state_dict)
-    elif "w_norm" in state_dict:
-        return NormLayer(state_dict)
-    else:
-        raise ValueError(f"Unsupported lora format: {state_dict.keys()}")
--- a/invokeai/backend/lora/lora_model_raw.py
+++ b/invokeai/backend/lora/lora_model_raw.py
@@ -1,22 +0,0 @@
-# Copyright (c) 2024 The InvokeAI Development team
-from typing import Dict, Optional
-
-import torch
-
-from invokeai.backend.lora.layers.any_lora_layer import AnyLoRALayer
-from invokeai.backend.raw_model import RawModel
-
-
-class LoRAModelRaw(RawModel):  # (torch.nn.Module):
-    def __init__(self, layers: Dict[str, AnyLoRALayer]):
-        self.layers = layers
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
-        for _key, layer in self.layers.items():
-            layer.to(device=device, dtype=dtype)
-
-    def calc_size(self) -> int:
-        model_size = 0
-        for _, layer in self.layers.items():
-            model_size += layer.calc_size()
-        return model_size
--- a/invokeai/backend/lora/lora_patcher.py
+++ b/invokeai/backend/lora/lora_patcher.py
@@ -1,264 +0,0 @@
-from contextlib import contextmanager
-from typing import Dict, Iterable, Optional, Tuple
-
-import torch
-
-from invokeai.backend.lora.layers.any_lora_layer import AnyLoRALayer
-from invokeai.backend.lora.layers.lora_layer import LoRALayer
-from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
-from invokeai.backend.lora.sidecar_layers.lora.lora_conv_sidecar_layer import (
-    LoRAConv1dSidecarLayer,
-    LoRAConv2dSidecarLayer,
-    LoRAConv3dSidecarLayer,
-)
-from invokeai.backend.lora.sidecar_layers.lora.lora_linear_sidecar_layer import LoRALinearSidecarLayer
-from invokeai.backend.lora.sidecar_layers.lora_sidecar_module import LoRASidecarModule
-from invokeai.backend.util.devices import TorchDevice
-from invokeai.backend.util.original_weights_storage import OriginalWeightsStorage
-
-
-class LoraPatcher:
-    @staticmethod
-    @torch.no_grad()
-    @contextmanager
-    def apply_lora_patches(
-        model: torch.nn.Module,
-        patches: Iterable[Tuple[LoRAModelRaw, float]],
-        prefix: str,
-        cached_weights: Optional[Dict[str, torch.Tensor]] = None,
-    ):
-        """Apply one or more LoRA patches to a model within a context manager.
-
-        :param model: The model to patch.
-        :param loras: An iterator that returns tuples of LoRA patches and associated weights. An iterator is used so
-            that the LoRA patches do not need to be loaded into memory all at once.
-        :param prefix: The keys in the patches will be filtered to only include weights with this prefix.
-        :cached_weights: Read-only copy of the model's state dict in CPU, for efficient unpatching purposes.
-        """
-        original_weights = OriginalWeightsStorage(cached_weights)
-        try:
-            for patch, patch_weight in patches:
-                LoraPatcher.apply_lora_patch(
-                    model=model,
-                    prefix=prefix,
-                    patch=patch,
-                    patch_weight=patch_weight,
-                    original_weights=original_weights,
-                )
-                del patch
-
-            yield
-        finally:
-            for param_key, weight in original_weights.get_changed_weights():
-                model.get_parameter(param_key).copy_(weight)
-
-    @staticmethod
-    @torch.no_grad()
-    def apply_lora_patch(
-        model: torch.nn.Module,
-        prefix: str,
-        patch: LoRAModelRaw,
-        patch_weight: float,
-        original_weights: OriginalWeightsStorage,
-    ):
-        """
-        Apply a single LoRA patch to a model.
-        :param model: The model to patch.
-        :param patch: LoRA model to patch in.
-        :param patch_weight: LoRA patch weight.
-        :param prefix: A string prefix that precedes keys used in the LoRAs weight layers.
-        :param original_weights: Storage with original weights, filled by weights which lora patches, used for unpatching.
-        """
-
-        if patch_weight == 0:
-            return
-
-        # If the layer keys contain a dot, then they are not flattened, and can be directly used to access model
-        # submodules. If the layer keys do not contain a dot, then they are flattened, meaning that all '.' have been
-        # replaced with '_'. Non-flattened keys are preferred, because they allow submodules to be accessed directly
-        # without searching, but some legacy code still uses flattened keys.
-        layer_keys_are_flattened = "." not in next(iter(patch.layers.keys()))
-
-        prefix_len = len(prefix)
-
-        for layer_key, layer in patch.layers.items():
-            if not layer_key.startswith(prefix):
-                continue
-
-            module_key, module = LoraPatcher._get_submodule(
-                model, layer_key[prefix_len:], layer_key_is_flattened=layer_keys_are_flattened
-            )
-
-            # All of the LoRA weight calculations will be done on the same device as the module weight.
-            # (Performance will be best if this is a CUDA device.)
-            device = module.weight.device
-            dtype = module.weight.dtype
-
-            layer_scale = layer.alpha / layer.rank if (layer.alpha and layer.rank) else 1.0
-
-            # We intentionally move to the target device first, then cast. Experimentally, this was found to
-            # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
-            # same thing in a single call to '.to(...)'.
-            layer.to(device=device)
-            layer.to(dtype=torch.float32)
-
-            # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
-            # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
-            for param_name, lora_param_weight in layer.get_parameters(module).items():
-                param_key = module_key + "." + param_name
-                module_param = module.get_parameter(param_name)
-
-                # Save original weight
-                original_weights.save(param_key, module_param)
-
-                if module_param.shape != lora_param_weight.shape:
-                    lora_param_weight = lora_param_weight.reshape(module_param.shape)
-
-                lora_param_weight *= patch_weight * layer_scale
-                module_param += lora_param_weight.to(dtype=dtype)
-
-            layer.to(device=TorchDevice.CPU_DEVICE)
-
-    @staticmethod
-    @torch.no_grad()
-    @contextmanager
-    def apply_lora_sidecar_patches(
-        model: torch.nn.Module,
-        patches: Iterable[Tuple[LoRAModelRaw, float]],
-        prefix: str,
-    ):
-        original_modules: dict[str, torch.nn.Module] = {}
-        try:
-            for patch, patch_weight in patches:
-                LoraPatcher._apply_lora_sidecar_patch(
-                    model=model,
-                    prefix=prefix,
-                    patch=patch,
-                    patch_weight=patch_weight,
-                    original_modules=original_modules,
-                )
-
-            yield
-        finally:
-            # Restore original modules.
-            # Note: This logic assumes no nested modules in original_modules.
-            for module_key, orig_module in original_modules.items():
-                module_parent_key, module_name = module_key.rsplit(".", 1)
-                parent_module = model.get_submodule(module_parent_key)
-                LoraPatcher._set_submodule(parent_module, module_name, orig_module)
-
-    @staticmethod
-    def _apply_lora_sidecar_patch(
-        model: torch.nn.Module,
-        patch: LoRAModelRaw,
-        patch_weight: float,
-        prefix: str,
-        original_modules: dict[str, torch.nn.Module],
-    ):
-        if patch_weight == 0:
-            return
-
-        # If the layer keys contain a dot, then they are not flattened, and can be directly used to access model
-        # submodules. If the layer keys do not contain a dot, then they are flattened, meaning that all '.' have been
-        # replaced with '_'. Non-flattened keys are preferred, because they allow submodules to be accessed directly
-        # without searching, but some legacy code still uses flattened keys.
-        layer_keys_are_flattened = "." not in next(iter(patch.layers.keys()))
-
-        prefix_len = len(prefix)
-
-        for layer_key, layer in patch.layers.items():
-            if not layer_key.startswith(prefix):
-                continue
-
-            module_key, module = LoraPatcher._get_submodule(
-                model, layer_key[prefix_len:], layer_key_is_flattened=layer_keys_are_flattened
-            )
-
-            # Initialize the LoRA sidecar layer.
-            lora_sidecar_layer = LoraPatcher._initialize_lora_sidecar_layer(module, layer, patch_weight)
-
-            # TODO(ryand): Should we move the LoRA sidecar layer to the same device/dtype as the orig module?
-
-            if module_key in original_modules:
-                # The module has already been patched with a LoRASidecarModule. Append to it.
-                assert isinstance(module, LoRASidecarModule)
-                module.add_lora_layer(lora_sidecar_layer)
-            else:
-                # The module has not yet been patched with a LoRASidecarModule. Create one.
-                lora_sidecar_module = LoRASidecarModule(module, [lora_sidecar_layer])
-                original_modules[module_key] = module
-                module_parent_key, module_name = module_key.rsplit(".", 1)
-                module_parent = model.get_submodule(module_parent_key)
-                LoraPatcher._set_submodule(module_parent, module_name, lora_sidecar_module)
-
-    @staticmethod
-    def _initialize_lora_sidecar_layer(orig_layer: torch.nn.Module, lora_layer: AnyLoRALayer, patch_weight: float):
-        if isinstance(orig_layer, torch.nn.Linear):
-            if isinstance(lora_layer, LoRALayer):
-                return LoRALinearSidecarLayer.from_layers(orig_layer, lora_layer, patch_weight)
-            else:
-                raise ValueError(f"Unsupported Linear LoRA layer type: {type(lora_layer)}")
-        elif isinstance(orig_layer, torch.nn.Conv1d):
-            if isinstance(lora_layer, LoRALayer):
-                return LoRAConv1dSidecarLayer.from_layers(orig_layer, lora_layer, patch_weight)
-            else:
-                raise ValueError(f"Unsupported Conv1D LoRA layer type: {type(lora_layer)}")
-        elif isinstance(orig_layer, torch.nn.Conv2d):
-            if isinstance(lora_layer, LoRALayer):
-                return LoRAConv2dSidecarLayer.from_layers(orig_layer, lora_layer, patch_weight)
-            else:
-                raise ValueError(f"Unsupported Conv2D LoRA layer type: {type(lora_layer)}")
-        elif isinstance(orig_layer, torch.nn.Conv3d):
-            if isinstance(lora_layer, LoRALayer):
-                return LoRAConv3dSidecarLayer.from_layers(orig_layer, lora_layer, patch_weight)
-            else:
-                raise ValueError(f"Unsupported Conv3D LoRA layer type: {type(lora_layer)}")
-        else:
-            raise ValueError(f"Unsupported layer type: {type(orig_layer)}")
-
-    @staticmethod
-    def _set_submodule(parent_module: torch.nn.Module, module_name: str, submodule: torch.nn.Module):
-        try:
-            submodule_index = int(module_name)
-            # If the module name is an integer, then we use the __setitem__ method to set the submodule.
-            parent_module[submodule_index] = submodule
-        except ValueError:
-            # If the module name is not an integer, then we use the setattr method to set the submodule.
-            setattr(parent_module, module_name, submodule)
-
-    @staticmethod
-    def _get_submodule(
-        model: torch.nn.Module, layer_key: str, layer_key_is_flattened: bool
-    ) -> tuple[str, torch.nn.Module]:
-        """Get the submodule corresponding to the given layer key.
-        :param model: The model to search.
-        :param layer_key: The layer key to search for.
-        :param layer_key_is_flattened: Whether the layer key is flattened. If flattened, then all '.' have been replaced
-            with '_'. Non-flattened keys are preferred, because they allow submodules to be accessed directly without
-            searching, but some legacy code still uses flattened keys.
-        :return: A tuple containing the module key and the submodule.
-        """
-        if not layer_key_is_flattened:
-            return layer_key, model.get_submodule(layer_key)
-
-        # Handle flattened keys.
-        assert "." not in layer_key
-
-        module = model
-        module_key = ""
-        key_parts = layer_key.split("_")
-
-        submodule_name = key_parts.pop(0)
-
-        while len(key_parts) > 0:
-            try:
-                module = module.get_submodule(submodule_name)
-                module_key += "." + submodule_name
-                submodule_name = key_parts.pop(0)
-            except Exception:
-                submodule_name += "_" + key_parts.pop(0)
-
-        module = module.get_submodule(submodule_name)
-        module_key = (module_key + "." + submodule_name).lstrip(".")
-
-        return module_key, module
--- a/invokeai/backend/lora/sidecar_layers/init.py
+++ b/invokeai/backend/lora/sidecar_layers/init.py
--- a/invokeai/backend/lora/sidecar_layers/lora/init.py
+++ b/invokeai/backend/lora/sidecar_layers/lora/init.py
--- a/invokeai/backend/lora/sidecar_layers/lora/base_lora_layer.py
+++ b/invokeai/backend/lora/sidecar_layers/lora/base_lora_layer.py
--- a/invokeai/backend/lora/sidecar_layers/lora/lora_conv_sidecar_layer.py
+++ b/invokeai/backend/lora/sidecar_layers/lora/lora_conv_sidecar_layer.py
@@ -1,135 +0,0 @@
-import typing
-
-import torch
-
-from invokeai.backend.lora.layers.lora_layer import LoRALayer
-
-
-class LoRAConvSidecarLayer(torch.nn.Module):
-    """An implementation of a conv LoRA layer based on the paper 'LoRA: Low-Rank Adaptation of Large Language Models'.
-    (https://arxiv.org/pdf/2106.09685.pdf)
-    """
-
-    @property
-    def conv_module(self) -> type[torch.nn.Conv1d | torch.nn.Conv2d | torch.nn.Conv3d]:
-        """The conv module to be set by child classes. One of torch.nn.Conv1d, torch.nn.Conv2d, torch.nn.Conv3d."""
-        raise NotImplementedError(
-            "LoRAConvLayer cannot be used directly. Use LoRAConv1dLayer, LoRAConv2dLayer, or LoRAConv3dLayer instead."
-        )
-
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        include_mid: bool,
-        rank: int,
-        alpha: float,
-        weight: float,
-        kernel_size: typing.Union[int, tuple[int]] = 1,
-        stride: typing.Union[int, tuple[int]] = 1,
-        padding: typing.Union[str, int, tuple[int]] = 0,
-        device: torch.device | None = None,
-        dtype: torch.dtype | None = None,
-    ):
-        super().__init__()
-
-        if rank > min(in_channels, out_channels):
-            raise ValueError(f"LoRA rank {rank} must be less than or equal to {min(in_channels, out_channels)}")
-
-        self._down = self.conv_module(
-            in_channels,
-            rank,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            bias=False,
-            device=device,
-            dtype=dtype,
-        )
-        self._up = self.conv_module(rank, out_channels, kernel_size=1, stride=1, bias=False, device=device, dtype=dtype)
-        self._mid = None
-        if include_mid:
-            self._mid = self.conv_module(rank, rank, kernel_size=1, stride=1, bias=False, device=device, dtype=dtype)
-
-        # Register alpha as a buffer so that it is not trained, but still gets saved to the state_dict.
-        self.register_buffer("alpha", torch.tensor(alpha, device=device, dtype=dtype))
-
-        self._weight = weight
-        self._rank = rank
-
-    @classmethod
-    def from_layers(cls, orig_layer: torch.nn.Module, lora_layer: LoRALayer, weight: float):
-        # Initialize the LoRA layer.
-        with torch.device("meta"):
-            model = cls.from_orig_layer(
-                orig_layer,
-                include_mid=lora_layer.mid is not None,
-                rank=lora_layer.rank,
-                # TODO(ryand): Is this the right default in case of missing alpha?
-                alpha=lora_layer.alpha if lora_layer.alpha is not None else lora_layer.rank,
-                weight=weight,
-            )
-
-        # Inject weight into the LoRA layer.
-        model._up.weight.data = lora_layer.up
-        model._down.weight.data = lora_layer.down
-        if lora_layer.mid is not None:
-            assert model._mid is not None
-            model._mid.weight.data = lora_layer.mid
-
-        return model
-
-    @classmethod
-    def from_orig_layer(
-        cls,
-        layer: torch.nn.Module,
-        include_mid: bool,
-        rank: int,
-        alpha: float,
-        weight: float,
-        device: torch.device | None = None,
-        dtype: torch.dtype | None = None,
-    ):
-        if not isinstance(layer, cls.conv_module):
-            raise TypeError(f"'{__class__.__name__}' cannot be initialized from a layer of type '{type(layer)}'.")
-
-        return cls(
-            in_channels=layer.in_channels,
-            out_channels=layer.out_channels,
-            include_mid=include_mid,
-            weight=weight,
-            kernel_size=layer.kernel_size,
-            stride=layer.stride,
-            padding=layer.padding,
-            rank=rank,
-            alpha=alpha,
-            device=layer.weight.device if device is None else device,
-            dtype=layer.weight.dtype if dtype is None else dtype,
-        )
-
-    def forward(self, x: torch.Tensor):
-        x = self._down(x)
-        if self._mid is not None:
-            x = self._mid(x)
-        x = self._up(x)
-
-        x *= self._weight * self.alpha / self._rank
-        return x
-
-
-class LoRAConv1dSidecarLayer(LoRAConvSidecarLayer):
-    @property
-    def conv_module(self):
-        return torch.nn.Conv1d
-
-
-class LoRAConv2dSidecarLayer(LoRAConvSidecarLayer):
-    @property
-    def conv_module(self):
-        return torch.nn.Conv2d
-
-
-class LoRAConv3dSidecarLayer(LoRAConvSidecarLayer):
-    @property
-    def conv_module(self):
-        return torch.nn.Conv3d
--- a/invokeai/backend/lora/sidecar_layers/lora/lora_linear_sidecar_layer.py
+++ b/invokeai/backend/lora/sidecar_layers/lora/lora_linear_sidecar_layer.py
@@ -1,95 +0,0 @@
-import torch
-
-from invokeai.backend.lora.layers.lora_layer import LoRALayer
-
-
-class LoRALinearSidecarLayer(torch.nn.Module):
-    """An implementation of a linear LoRA layer based on the paper 'LoRA: Low-Rank Adaptation of Large Language Models'.
-    (https://arxiv.org/pdf/2106.09685.pdf)
-    """
-
-    def __init__(
-        self,
-        in_features: int,
-        out_features: int,
-        include_mid: bool,
-        rank: int,
-        alpha: float,
-        weight: float,
-        device: torch.device | None = None,
-        dtype: torch.dtype | None = None,
-    ):
-        super().__init__()
-
-        if rank > min(in_features, out_features):
-            raise ValueError(f"LoRA rank {rank} must be less than or equal to {min(in_features, out_features)}")
-
-        self._down = torch.nn.Linear(in_features, rank, bias=False, device=device, dtype=dtype)
-        self._up = torch.nn.Linear(rank, out_features, bias=False, device=device, dtype=dtype)
-        self._mid = None
-        if include_mid:
-            self._mid = torch.nn.Linear(rank, rank, bias=False, device=device, dtype=dtype)
-
-        # Register alpha as a buffer so that it is not trained, but still gets saved to the state_dict.
-        self.register_buffer("alpha", torch.tensor(alpha, device=device, dtype=dtype))
-
-        self._weight = weight
-        self._rank = rank
-
-    @classmethod
-    def from_layers(cls, orig_layer: torch.nn.Module, lora_layer: LoRALayer, weight: float):
-        # Initialize the LoRA layer.
-        with torch.device("meta"):
-            model = cls.from_orig_layer(
-                orig_layer,
-                include_mid=lora_layer.mid is not None,
-                rank=lora_layer.rank,
-                # TODO(ryand): Is this the right default in case of missing alpha?
-                alpha=lora_layer.alpha if lora_layer.alpha is not None else lora_layer.rank,
-                weight=weight,
-            )
-
-        # TODO(ryand): Are there cases where we need to reshape the weight matrices to match the conv layers?
-
-        # Inject weight into the LoRA layer.
-        model._up.weight.data = lora_layer.up
-        model._down.weight.data = lora_layer.down
-        if lora_layer.mid is not None:
-            assert model._mid is not None
-            model._mid.weight.data = lora_layer.mid
-
-        return model
-
-    @classmethod
-    def from_orig_layer(
-        cls,
-        layer: torch.nn.Module,
-        include_mid: bool,
-        rank: int,
-        alpha: float,
-        weight: float,
-        device: torch.device | None = None,
-        dtype: torch.dtype | None = None,
-    ):
-        if not isinstance(layer, torch.nn.Linear):
-            raise TypeError(f"'{__class__.__name__}' cannot be initialized from a layer of type '{type(layer)}'.")
-
-        return cls(
-            in_features=layer.in_features,
-            out_features=layer.out_features,
-            include_mid=include_mid,
-            rank=rank,
-            alpha=alpha,
-            weight=weight,
-            device=layer.weight.device if device is None else device,
-            dtype=layer.weight.dtype if dtype is None else dtype,
-        )
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        x = self._down(x)
-        if self._mid is not None:
-            x = self._mid(x)
-        x = self._up(x)
-
-        x *= self._weight * self.alpha / self._rank
-        return x
--- a/invokeai/backend/lora/sidecar_layers/lora_sidecar_layer.py
+++ b/invokeai/backend/lora/sidecar_layers/lora_sidecar_layer.py
--- a/invokeai/backend/lora/sidecar_layers/lora_sidecar_module.py
+++ b/invokeai/backend/lora/sidecar_layers/lora_sidecar_module.py
@@ -1,17 +0,0 @@
-import torch
-
-
-class LoRASidecarModule(torch.nn.Module):
-    def __init__(self, orig_module: torch.nn.Module, lora_layers: list[torch.nn.Module]):
-        super().__init__()
-        self._orig_module = orig_module
-        self._lora_layers = lora_layers
-
-    def add_lora_layer(self, lora_layer: torch.nn.Module):
-        self._lora_layers.append(lora_layer)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        x = self._orig_module(x)
-        for lora_layer in self._lora_layers:
-            x += lora_layer(x)
-        return x
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@@ -52,7 +52,6 @@ class BaseModelType(str, Enum):
    StableDiffusion2 = "sd-2"
    StableDiffusionXL = "sdxl"
    StableDiffusionXLRefiner = "sdxl-refiner"
-    Flux = "flux"
    # Kandinsky2_1 = "kandinsky-2.1"


@@ -67,9 +66,7 @@ class ModelType(str, Enum):
    TextualInversion = "embedding"
    IPAdapter = "ip_adapter"
    CLIPVision = "clip_vision"
-    CLIPEmbed = "clip_embed"
    T2IAdapter = "t2i_adapter"
-    T5Encoder = "t5_encoder"
    SpandrelImageToImage = "spandrel_image_to_image"


@@ -77,7 +74,6 @@ class SubModelType(str, Enum):
    """Submodel type."""

    UNet = "unet"
-    Transformer = "transformer"
    TextEncoder = "text_encoder"
    TextEncoder2 = "text_encoder_2"
    Tokenizer = "tokenizer"
@@ -108,9 +104,6 @@ class ModelFormat(str, Enum):
    EmbeddingFile = "embedding_file"
    EmbeddingFolder = "embedding_folder"
    InvokeAI = "invokeai"
-    T5Encoder = "t5_encoder"
-    BnbQuantizedLlmInt8b = "bnb_quantized_int8b"
-    BnbQuantizednf4b = "bnb_quantized_nf4b"


 class SchedulerPredictionType(str, Enum):
@@ -193,9 +186,7 @@ class ModelConfigBase(BaseModel):
 class CheckpointConfigBase(ModelConfigBase):
    """Model config for checkpoint-style models."""

-    format: Literal[ModelFormat.Checkpoint, ModelFormat.BnbQuantizednf4b] = Field(
-        description="Format of the provided checkpoint model", default=ModelFormat.Checkpoint
-    )
+    format: Literal[ModelFormat.Checkpoint] = ModelFormat.Checkpoint
    config_path: str = Field(description="path to the checkpoint model config file")
    converted_at: Optional[float] = Field(
        description="When this model was last converted to diffusers", default_factory=time.time
@@ -214,26 +205,6 @@ class LoRAConfigBase(ModelConfigBase):
    trigger_phrases: Optional[set[str]] = Field(description="Set of trigger phrases for this model", default=None)


-class T5EncoderConfigBase(ModelConfigBase):
-    type: Literal[ModelType.T5Encoder] = ModelType.T5Encoder
-
-
-class T5EncoderConfig(T5EncoderConfigBase):
-    format: Literal[ModelFormat.T5Encoder] = ModelFormat.T5Encoder
-
-    @staticmethod
-    def get_tag() -> Tag:
-        return Tag(f"{ModelType.T5Encoder.value}.{ModelFormat.T5Encoder.value}")
-
-
-class T5EncoderBnbQuantizedLlmInt8bConfig(T5EncoderConfigBase):
-    format: Literal[ModelFormat.BnbQuantizedLlmInt8b] = ModelFormat.BnbQuantizedLlmInt8b
-
-    @staticmethod
-    def get_tag() -> Tag:
-        return Tag(f"{ModelType.T5Encoder.value}.{ModelFormat.BnbQuantizedLlmInt8b.value}")
-
-
 class LoRALyCORISConfig(LoRAConfigBase):
    """Model config for LoRA/Lycoris models."""

@@ -258,6 +229,7 @@ class VAECheckpointConfig(CheckpointConfigBase):
    """Model config for standalone VAE models."""

    type: Literal[ModelType.VAE] = ModelType.VAE
+    format: Literal[ModelFormat.Checkpoint] = ModelFormat.Checkpoint

    @staticmethod
    def get_tag() -> Tag:
@@ -296,6 +268,7 @@ class ControlNetCheckpointConfig(CheckpointConfigBase, ControlAdapterConfigBase)
    """Model config for ControlNet models (diffusers version)."""

    type: Literal[ModelType.ControlNet] = ModelType.ControlNet
+    format: Literal[ModelFormat.Checkpoint] = ModelFormat.Checkpoint

    @staticmethod
    def get_tag() -> Tag:
@@ -344,21 +317,6 @@ class MainCheckpointConfig(CheckpointConfigBase, MainConfigBase):
        return Tag(f"{ModelType.Main.value}.{ModelFormat.Checkpoint.value}")


-class MainBnbQuantized4bCheckpointConfig(CheckpointConfigBase, MainConfigBase):
-    """Model config for main checkpoint models."""
-
-    prediction_type: SchedulerPredictionType = SchedulerPredictionType.Epsilon
-    upcast_attention: bool = False
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.format = ModelFormat.BnbQuantizednf4b
-
-    @staticmethod
-    def get_tag() -> Tag:
-        return Tag(f"{ModelType.Main.value}.{ModelFormat.BnbQuantizednf4b.value}")
-
-
 class MainDiffusersConfig(DiffusersConfigBase, MainConfigBase):
    """Model config for main diffusers models."""

@@ -392,17 +350,6 @@ class IPAdapterCheckpointConfig(IPAdapterBaseConfig):
        return Tag(f"{ModelType.IPAdapter.value}.{ModelFormat.Checkpoint.value}")


-class CLIPEmbedDiffusersConfig(DiffusersConfigBase):
-    """Model config for Clip Embeddings."""
-
-    type: Literal[ModelType.CLIPEmbed] = ModelType.CLIPEmbed
-    format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers
-
-    @staticmethod
-    def get_tag() -> Tag:
-        return Tag(f"{ModelType.CLIPEmbed.value}.{ModelFormat.Diffusers.value}")
-
-
 class CLIPVisionDiffusersConfig(DiffusersConfigBase):
    """Model config for CLIPVision."""

@@ -461,15 +408,12 @@ AnyModelConfig = Annotated[
    Union[
        Annotated[MainDiffusersConfig, MainDiffusersConfig.get_tag()],
        Annotated[MainCheckpointConfig, MainCheckpointConfig.get_tag()],
-        Annotated[MainBnbQuantized4bCheckpointConfig, MainBnbQuantized4bCheckpointConfig.get_tag()],
        Annotated[VAEDiffusersConfig, VAEDiffusersConfig.get_tag()],
        Annotated[VAECheckpointConfig, VAECheckpointConfig.get_tag()],
        Annotated[ControlNetDiffusersConfig, ControlNetDiffusersConfig.get_tag()],
        Annotated[ControlNetCheckpointConfig, ControlNetCheckpointConfig.get_tag()],
        Annotated[LoRALyCORISConfig, LoRALyCORISConfig.get_tag()],
        Annotated[LoRADiffusersConfig, LoRADiffusersConfig.get_tag()],
-        Annotated[T5EncoderConfig, T5EncoderConfig.get_tag()],
-        Annotated[T5EncoderBnbQuantizedLlmInt8bConfig, T5EncoderBnbQuantizedLlmInt8bConfig.get_tag()],
        Annotated[TextualInversionFileConfig, TextualInversionFileConfig.get_tag()],
        Annotated[TextualInversionFolderConfig, TextualInversionFolderConfig.get_tag()],
        Annotated[IPAdapterInvokeAIConfig, IPAdapterInvokeAIConfig.get_tag()],
@@ -477,7 +421,6 @@ AnyModelConfig = Annotated[
        Annotated[T2IAdapterConfig, T2IAdapterConfig.get_tag()],
        Annotated[SpandrelImageToImageConfig, SpandrelImageToImageConfig.get_tag()],
        Annotated[CLIPVisionDiffusersConfig, CLIPVisionDiffusersConfig.get_tag()],
-        Annotated[CLIPEmbedDiffusersConfig, CLIPEmbedDiffusersConfig.get_tag()],
    ],
    Discriminator(get_model_discriminator_value),
 ]
--- a/invokeai/backend/model_manager/load/load_default.py
+++ b/invokeai/backend/model_manager/load/load_default.py
@@ -66,14 +66,12 @@ class ModelLoader(ModelLoaderBase):
        return (model_base / config.path).resolve()

    def _load_and_cache(self, config: AnyModelConfig, submodel_type: Optional[SubModelType] = None) -> ModelLockerBase:
-        stats_name = ":".join([config.base, config.type, config.name, (submodel_type or "")])
        try:
-            return self._ram_cache.get(config.key, submodel_type, stats_name=stats_name)
+            return self._ram_cache.get(config.key, submodel_type)
        except IndexError:
            pass

        config.path = str(self._get_model_path(config))
-        self._ram_cache.make_room(self.get_size_fs(config, Path(config.path), submodel_type))
        loaded_model = self._load_model(config, submodel_type)

        self._ram_cache.put(
@@ -85,7 +83,7 @@ class ModelLoader(ModelLoaderBase):
        return self._ram_cache.get(
            key=config.key,
            submodel_type=submodel_type,
-            stats_name=stats_name,
+            stats_name=":".join([config.base, config.type, config.name, (submodel_type or "")]),
        )

    def get_size_fs(
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_base.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_base.py
@@ -128,24 +128,7 @@ class ModelCacheBase(ABC, Generic[T]):
    @property
    @abstractmethod
    def max_cache_size(self) -> float:
-        """Return the maximum size the RAM cache can grow to."""
-        pass
-
-    @max_cache_size.setter
-    @abstractmethod
-    def max_cache_size(self, value: float) -> None:
-        """Set the cap on vram cache size."""
-
-    @property
-    @abstractmethod
-    def max_vram_cache_size(self) -> float:
-        """Return the maximum size the VRAM cache can grow to."""
-        pass
-
-    @max_vram_cache_size.setter
-    @abstractmethod
-    def max_vram_cache_size(self, value: float) -> float:
-        """Set the maximum size the VRAM cache can grow to."""
+        """Return true if the cache is configured to lazily offload models in VRAM."""
        pass

    @abstractmethod
@@ -210,6 +193,15 @@ class ModelCacheBase(ABC, Generic[T]):
        """
        pass

+    @abstractmethod
+    def exists(
+        self,
+        key: str,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> bool:
+        """Return true if the model identified by key and submodel_type is in the cache."""
+        pass
+
    @abstractmethod
    def cache_size(self) -> int:
        """Get the total size of the models currently cached."""
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
@@ -1,6 +1,22 @@
 # Copyright (c) 2024 Lincoln D. Stein and the InvokeAI Development team
 # TODO: Add Stalker's proper name to copyright
-""" """
+"""
+Manage a RAM cache of diffusion/transformer models for fast switching.
+They are moved between GPU VRAM and CPU RAM as necessary. If the cache
+grows larger than a preset maximum, then the least recently used
+model will be cleared and (re)loaded from disk when next needed.
+
+The cache returns context manager generators designed to load the
+model into the GPU within the context, and unload outside the
+context. Use like this:
+
+   cache = ModelCache(max_cache_size=7.5)
+   with cache.get_model('runwayml/stable-diffusion-1-5') as SD1,
+          cache.get_model('stabilityai/stable-diffusion-2') as SD2:
+       do_something_in_GPU(SD1,SD2)
+
+
+"""

 import gc
 import math
@@ -24,74 +40,53 @@ from invokeai.backend.model_manager.load.model_util import calc_model_size_by_da
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.logging import InvokeAILogger

-# Size of a GB in bytes.
-GB = 2**30
+# Maximum size of the cache, in gigs
+# Default is roughly enough to hold three fp16 diffusers models in RAM simultaneously
+DEFAULT_MAX_CACHE_SIZE = 6.0
+
+# amount of GPU memory to hold in reserve for use by generations (GB)
+DEFAULT_MAX_VRAM_CACHE_SIZE = 2.75
+
+# actual size of a gig
+GIG = 1073741824

 # Size of a MB in bytes.
 MB = 2**20


 class ModelCache(ModelCacheBase[AnyModel]):
-    """A cache for managing models in memory.
-
-    The cache is based on two levels of model storage:
-    - execution_device: The device where most models are executed (typically "cuda", "mps", or "cpu").
-    - storage_device: The device where models are offloaded when not in active use (typically "cpu").
-
-    The model cache is based on the following assumptions:
-    - storage_device_mem_size > execution_device_mem_size
-    - disk_to_storage_device_transfer_time >> storage_device_to_execution_device_transfer_time
-
-    A copy of all models in the cache is always kept on the storage_device. A subset of the models also have a copy on
-    the execution_device.
-
-    Models are moved between the storage_device and the execution_device as necessary. Cache size limits are enforced
-    on both the storage_device and the execution_device. The execution_device cache uses a smallest-first offload
-    policy. The storage_device cache uses a least-recently-used (LRU) offload policy.
-
-    Note: Neither of these offload policies has really been compared against alternatives. It's likely that different
-    policies would be better, although the optimal policies are likely heavily dependent on usage patterns and HW
-    configuration.
-
-    The cache returns context manager generators designed to load the model into the execution device (often GPU) within
-    the context, and unload outside the context.
-
-    Example usage:
-    ```
-    cache = ModelCache(max_cache_size=7.5, max_vram_cache_size=6.0)
-    with cache.get_model('runwayml/stable-diffusion-1-5') as SD1:
-        do_something_on_gpu(SD1)
-    ```
-    """
+    """Implementation of ModelCacheBase."""

    def __init__(
        self,
-        max_cache_size: float,
-        max_vram_cache_size: float,
+        max_cache_size: float = DEFAULT_MAX_CACHE_SIZE,
+        max_vram_cache_size: float = DEFAULT_MAX_VRAM_CACHE_SIZE,
        execution_device: torch.device = torch.device("cuda"),
        storage_device: torch.device = torch.device("cpu"),
        precision: torch.dtype = torch.float16,
+        sequential_offload: bool = False,
        lazy_offloading: bool = True,
+        sha_chunksize: int = 16777216,
        log_memory_usage: bool = False,
        logger: Optional[Logger] = None,
    ):
        """
        Initialize the model RAM cache.

-        :param max_cache_size: Maximum size of the storage_device cache in GBs.
-        :param max_vram_cache_size: Maximum size of the execution_device cache in GBs.
+        :param max_cache_size: Maximum size of the RAM cache [6.0 GB]
        :param execution_device: Torch device to load active model into [torch.device('cuda')]
        :param storage_device: Torch device to save inactive model in [torch.device('cpu')]
        :param precision: Precision for loaded models [torch.float16]
        :param lazy_offloading: Keep model in VRAM until another model needs to be loaded
+        :param sequential_offload: Conserve VRAM by loading and unloading each stage of the pipeline sequentially
        :param log_memory_usage: If True, a memory snapshot will be captured before and after every model cache
            operation, and the result will be logged (at debug level). There is a time cost to capturing the memory
            snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's
            behaviour.
-        :param logger: InvokeAILogger to use (otherwise creates one)
        """
        # allow lazy offloading only when vram cache enabled
        self._lazy_offloading = lazy_offloading and max_vram_cache_size > 0
+        self._precision: torch.dtype = precision
        self._max_cache_size: float = max_cache_size
        self._max_vram_cache_size: float = max_vram_cache_size
        self._execution_device: torch.device = execution_device
@@ -133,16 +128,6 @@ class ModelCache(ModelCacheBase[AnyModel]):
        """Set the cap on cache size."""
        self._max_cache_size = value

-    @property
-    def max_vram_cache_size(self) -> float:
-        """Return the cap on vram cache size."""
-        return self._max_vram_cache_size
-
-    @max_vram_cache_size.setter
-    def max_vram_cache_size(self, value: float) -> None:
-        """Set the cap on vram cache size."""
-        self._max_vram_cache_size = value
-
    @property
    def stats(self) -> Optional[CacheStats]:
        """Return collected CacheStats object."""
@@ -160,6 +145,15 @@ class ModelCache(ModelCacheBase[AnyModel]):
            total += cache_record.size
        return total

+    def exists(
+        self,
+        key: str,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> bool:
+        """Return true if the model identified by key and submodel_type is in the cache."""
+        key = self._make_cache_key(key, submodel_type)
+        return key in self._cached_models
+
    def put(
        self,
        key: str,
@@ -209,7 +203,7 @@ class ModelCache(ModelCacheBase[AnyModel]):
        # more stats
        if self.stats:
            stats_name = stats_name or key
-            self.stats.cache_size = int(self._max_cache_size * GB)
+            self.stats.cache_size = int(self._max_cache_size * GIG)
            self.stats.high_watermark = max(self.stats.high_watermark, self.cache_size())
            self.stats.in_cache = len(self._cached_models)
            self.stats.loaded_model_sizes[stats_name] = max(
@@ -237,13 +231,10 @@ class ModelCache(ModelCacheBase[AnyModel]):
            return model_key

    def offload_unlocked_models(self, size_required: int) -> None:
-        """Offload models from the execution_device to make room for size_required.
-
-        :param size_required: The amount of space to clear in the execution_device cache, in bytes.
-        """
-        reserved = self._max_vram_cache_size * GB
+        """Move any unused models from VRAM."""
+        reserved = self._max_vram_cache_size * GIG
        vram_in_use = torch.cuda.memory_allocated() + size_required
-        self.logger.debug(f"{(vram_in_use/GB):.2f}GB VRAM needed for models; max allowed={(reserved/GB):.2f}GB")
+        self.logger.debug(f"{(vram_in_use/GIG):.2f}GB VRAM needed for models; max allowed={(reserved/GIG):.2f}GB")
        for _, cache_entry in sorted(self._cached_models.items(), key=lambda x: x[1].size):
            if vram_in_use <= reserved:
                break
@@ -254,7 +245,7 @@ class ModelCache(ModelCacheBase[AnyModel]):
                cache_entry.loaded = False
                vram_in_use = torch.cuda.memory_allocated() + size_required
                self.logger.debug(
-                    f"Removing {cache_entry.key} from VRAM to free {(cache_entry.size/GB):.2f}GB; vram free = {(torch.cuda.memory_allocated()/GB):.2f}GB"
+                    f"Removing {cache_entry.key} from VRAM to free {(cache_entry.size/GIG):.2f}GB; vram free = {(torch.cuda.memory_allocated()/GIG):.2f}GB"
                )

        TorchDevice.empty_cache()
@@ -312,7 +303,7 @@ class ModelCache(ModelCacheBase[AnyModel]):
        self.logger.debug(
            f"Moved model '{cache_entry.key}' from {source_device} to"
            f" {target_device} in {(end_model_to_time-start_model_to_time):.2f}s."
-            f"Estimated model size: {(cache_entry.size/GB):.3f} GB."
+            f"Estimated model size: {(cache_entry.size/GIG):.3f} GB."
            f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
        )

@@ -335,14 +326,14 @@ class ModelCache(ModelCacheBase[AnyModel]):
                    f"Moving model '{cache_entry.key}' from {source_device} to"
                    f" {target_device} caused an unexpected change in VRAM usage. The model's"
                    " estimated size may be incorrect. Estimated model size:"
-                    f" {(cache_entry.size/GB):.3f} GB.\n"
+                    f" {(cache_entry.size/GIG):.3f} GB.\n"
                    f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
                )

    def print_cuda_stats(self) -> None:
        """Log CUDA diagnostics."""
-        vram = "%4.2fG" % (torch.cuda.memory_allocated() / GB)
-        ram = "%4.2fG" % (self.cache_size() / GB)
+        vram = "%4.2fG" % (torch.cuda.memory_allocated() / GIG)
+        ram = "%4.2fG" % (self.cache_size() / GIG)

        in_ram_models = 0
        in_vram_models = 0
@@ -362,20 +353,17 @@ class ModelCache(ModelCacheBase[AnyModel]):
                )

    def make_room(self, size: int) -> None:
-        """Make enough room in the cache to accommodate a new model of indicated size.
-
-        Note: This function deletes all of the cache's internal references to a model in order to free it. If there are
-        external references to the model, there's nothing that the cache can do about it, and those models will not be
-        garbage-collected.
-        """
+        """Make enough room in the cache to accommodate a new model of indicated size."""
+        # calculate how much memory this model will require
+        # multiplier = 2 if self.precision==torch.float32 else 1
        bytes_needed = size
-        maximum_size = self.max_cache_size * GB  # stored in GB, convert to bytes
+        maximum_size = self.max_cache_size * GIG  # stored in GB, convert to bytes
        current_size = self.cache_size()

        if current_size + bytes_needed > maximum_size:
            self.logger.debug(
-                f"Max cache size exceeded: {(current_size/GB):.2f}/{self.max_cache_size:.2f} GB, need an additional"
-                f" {(bytes_needed/GB):.2f} GB"
+                f"Max cache size exceeded: {(current_size/GIG):.2f}/{self.max_cache_size:.2f} GB, need an additional"
+                f" {(bytes_needed/GIG):.2f} GB"
            )

        self.logger.debug(f"Before making_room: cached_models={len(self._cached_models)}")
@@ -392,7 +380,7 @@ class ModelCache(ModelCacheBase[AnyModel]):

            if not cache_entry.locked:
                self.logger.debug(
-                    f"Removing {model_key} from RAM cache to free at least {(size/GB):.2f} GB (-{(cache_entry.size/GB):.2f} GB)"
+                    f"Removing {model_key} from RAM cache to free at least {(size/GIG):.2f} GB (-{(cache_entry.size/GIG):.2f} GB)"
                )
                current_size -= cache_entry.size
                models_cleared += 1
--- a/invokeai/backend/model_manager/load/model_loaders/flux.py
+++ b/invokeai/backend/model_manager/load/model_loaders/flux.py
@@ -1,246 +0,0 @@
-# Copyright (c) 2024, Brandon W. Rising and the InvokeAI Development Team
-"""Class for Flux model loading in InvokeAI."""
-
-from pathlib import Path
-from typing import Optional
-
-import accelerate
-import torch
-from safetensors.torch import load_file
-from transformers import AutoConfig, AutoModelForTextEncoding, CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer
-
-from invokeai.app.services.config.config_default import get_config
-from invokeai.backend.flux.model import Flux
-from invokeai.backend.flux.modules.autoencoder import AutoEncoder
-from invokeai.backend.flux.util import ae_params, params
-from invokeai.backend.model_manager import (
-    AnyModel,
-    AnyModelConfig,
-    BaseModelType,
-    ModelFormat,
-    ModelType,
-    SubModelType,
-)
-from invokeai.backend.model_manager.config import (
-    CheckpointConfigBase,
-    CLIPEmbedDiffusersConfig,
-    MainBnbQuantized4bCheckpointConfig,
-    MainCheckpointConfig,
-    T5EncoderBnbQuantizedLlmInt8bConfig,
-    T5EncoderConfig,
-    VAECheckpointConfig,
-)
-from invokeai.backend.model_manager.load.load_default import ModelLoader
-from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry
-from invokeai.backend.model_manager.util.model_util import (
-    convert_bundle_to_flux_transformer_checkpoint,
-)
-from invokeai.backend.util.silence_warnings import SilenceWarnings
-
-try:
-    from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8
-    from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4
-
-    bnb_available = True
-except ImportError:
-    bnb_available = False
-
-app_config = get_config()
-
-
-@ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.VAE, format=ModelFormat.Checkpoint)
-class FluxVAELoader(ModelLoader):
-    """Class to load VAE models."""
-
-    def _load_model(
-        self,
-        config: AnyModelConfig,
-        submodel_type: Optional[SubModelType] = None,
-    ) -> AnyModel:
-        if not isinstance(config, VAECheckpointConfig):
-            raise ValueError("Only VAECheckpointConfig models are currently supported here.")
-        model_path = Path(config.path)
-
-        with SilenceWarnings():
-            model = AutoEncoder(ae_params[config.config_path])
-            sd = load_file(model_path)
-            model.load_state_dict(sd, assign=True)
-            model.to(dtype=self._torch_dtype)
-
-        return model
-
-
-@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.CLIPEmbed, format=ModelFormat.Diffusers)
-class ClipCheckpointModel(ModelLoader):
-    """Class to load main models."""
-
-    def _load_model(
-        self,
-        config: AnyModelConfig,
-        submodel_type: Optional[SubModelType] = None,
-    ) -> AnyModel:
-        if not isinstance(config, CLIPEmbedDiffusersConfig):
-            raise ValueError("Only CLIPEmbedDiffusersConfig models are currently supported here.")
-
-        match submodel_type:
-            case SubModelType.Tokenizer:
-                return CLIPTokenizer.from_pretrained(Path(config.path) / "tokenizer")
-            case SubModelType.TextEncoder:
-                return CLIPTextModel.from_pretrained(Path(config.path) / "text_encoder")
-
-        raise ValueError(
-            f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}"
-        )
-
-
-@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.BnbQuantizedLlmInt8b)
-class BnbQuantizedLlmInt8bCheckpointModel(ModelLoader):
-    """Class to load main models."""
-
-    def _load_model(
-        self,
-        config: AnyModelConfig,
-        submodel_type: Optional[SubModelType] = None,
-    ) -> AnyModel:
-        if not isinstance(config, T5EncoderBnbQuantizedLlmInt8bConfig):
-            raise ValueError("Only T5EncoderBnbQuantizedLlmInt8bConfig models are currently supported here.")
-        if not bnb_available:
-            raise ImportError(
-                "The bnb modules are not available. Please install bitsandbytes if available on your platform."
-            )
-        match submodel_type:
-            case SubModelType.Tokenizer2:
-                return T5Tokenizer.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512)
-            case SubModelType.TextEncoder2:
-                te2_model_path = Path(config.path) / "text_encoder_2"
-                model_config = AutoConfig.from_pretrained(te2_model_path)
-                with accelerate.init_empty_weights():
-                    model = AutoModelForTextEncoding.from_config(model_config)
-                    model = quantize_model_llm_int8(model, modules_to_not_convert=set())
-
-                state_dict_path = te2_model_path / "bnb_llm_int8_model.safetensors"
-                state_dict = load_file(state_dict_path)
-                self._load_state_dict_into_t5(model, state_dict)
-
-                return model
-
-        raise ValueError(
-            f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}"
-        )
-
-    @classmethod
-    def _load_state_dict_into_t5(cls, model: T5EncoderModel, state_dict: dict[str, torch.Tensor]):
-        # There is a shared reference to a single weight tensor in the model.
-        # Both "encoder.embed_tokens.weight" and "shared.weight" refer to the same tensor, so only the latter should
-        # be present in the state_dict.
-        missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False, assign=True)
-        assert len(unexpected_keys) == 0
-        assert set(missing_keys) == {"encoder.embed_tokens.weight"}
-        # Assert that the layers we expect to be shared are actually shared.
-        assert model.encoder.embed_tokens.weight is model.shared.weight
-
-
-@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.T5Encoder, format=ModelFormat.T5Encoder)
-class T5EncoderCheckpointModel(ModelLoader):
-    """Class to load main models."""
-
-    def _load_model(
-        self,
-        config: AnyModelConfig,
-        submodel_type: Optional[SubModelType] = None,
-    ) -> AnyModel:
-        if not isinstance(config, T5EncoderConfig):
-            raise ValueError("Only T5EncoderConfig models are currently supported here.")
-
-        match submodel_type:
-            case SubModelType.Tokenizer2:
-                return T5Tokenizer.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512)
-            case SubModelType.TextEncoder2:
-                return T5EncoderModel.from_pretrained(Path(config.path) / "text_encoder_2")
-
-        raise ValueError(
-            f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}"
-        )
-
-
-@ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.Main, format=ModelFormat.Checkpoint)
-class FluxCheckpointModel(ModelLoader):
-    """Class to load main models."""
-
-    def _load_model(
-        self,
-        config: AnyModelConfig,
-        submodel_type: Optional[SubModelType] = None,
-    ) -> AnyModel:
-        if not isinstance(config, CheckpointConfigBase):
-            raise ValueError("Only CheckpointConfigBase models are currently supported here.")
-
-        match submodel_type:
-            case SubModelType.Transformer:
-                return self._load_from_singlefile(config)
-
-        raise ValueError(
-            f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}"
-        )
-
-    def _load_from_singlefile(
-        self,
-        config: AnyModelConfig,
-    ) -> AnyModel:
-        assert isinstance(config, MainCheckpointConfig)
-        model_path = Path(config.path)
-
-        with SilenceWarnings():
-            model = Flux(params[config.config_path])
-            sd = load_file(model_path)
-            if "model.diffusion_model.double_blocks.0.img_attn.norm.key_norm.scale" in sd:
-                sd = convert_bundle_to_flux_transformer_checkpoint(sd)
-            new_sd_size = sum([ten.nelement() * torch.bfloat16.itemsize for ten in sd.values()])
-            self._ram_cache.make_room(new_sd_size)
-            for k in sd.keys():
-                # We need to cast to bfloat16 due to it being the only currently supported dtype for inference
-                sd[k] = sd[k].to(torch.bfloat16)
-            model.load_state_dict(sd, assign=True)
-        return model
-
-
-@ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.Main, format=ModelFormat.BnbQuantizednf4b)
-class FluxBnbQuantizednf4bCheckpointModel(ModelLoader):
-    """Class to load main models."""
-
-    def _load_model(
-        self,
-        config: AnyModelConfig,
-        submodel_type: Optional[SubModelType] = None,
-    ) -> AnyModel:
-        if not isinstance(config, CheckpointConfigBase):
-            raise ValueError("Only CheckpointConfigBase models are currently supported here.")
-
-        match submodel_type:
-            case SubModelType.Transformer:
-                return self._load_from_singlefile(config)
-
-        raise ValueError(
-            f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}"
-        )
-
-    def _load_from_singlefile(
-        self,
-        config: AnyModelConfig,
-    ) -> AnyModel:
-        assert isinstance(config, MainBnbQuantized4bCheckpointConfig)
-        if not bnb_available:
-            raise ImportError(
-                "The bnb modules are not available. Please install bitsandbytes if available on your platform."
-            )
-        model_path = Path(config.path)
-
-        with SilenceWarnings():
-            with accelerate.init_empty_weights():
-                model = Flux(params[config.config_path])
-                model = quantize_model_nf4(model, modules_to_not_convert=set(), compute_dtype=torch.bfloat16)
-            sd = load_file(model_path)
-            if "model.diffusion_model.double_blocks.0.img_attn.norm.key_norm.scale" in sd:
-                sd = convert_bundle_to_flux_transformer_checkpoint(sd)
-            model.load_state_dict(sd, assign=True)
-        return model
--- a/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py
+++ b/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py
@@ -78,12 +78,7 @@ class GenericDiffusersLoader(ModelLoader):

    # TO DO: Add exception handling
    def _hf_definition_to_type(self, module: str, class_name: str) -> ModelMixin:  # fix with correct type
-        if module in [
-            "diffusers",
-            "transformers",
-            "invokeai.backend.quantization.fast_quantized_transformers_model",
-            "invokeai.backend.quantization.fast_quantized_diffusion_model",
-        ]:
+        if module in ["diffusers", "transformers"]:
            res_type = sys.modules[module]
        else:
            res_type = sys.modules["diffusers"].pipelines
--- a/invokeai/backend/model_manager/load/model_loaders/lora.py
+++ b/invokeai/backend/model_manager/load/model_loaders/lora.py
@@ -5,18 +5,8 @@ from logging import Logger
 from pathlib import Path
 from typing import Optional

-import torch
-from safetensors.torch import load_file
-
 from invokeai.app.services.config import InvokeAIAppConfig
-from invokeai.backend.lora.conversions.flux_diffusers_lora_conversion_utils import (
-    lora_model_from_flux_diffusers_state_dict,
-)
-from invokeai.backend.lora.conversions.flux_kohya_lora_conversion_utils import (
-    lora_model_from_flux_kohya_state_dict,
-)
-from invokeai.backend.lora.conversions.sd_lora_conversion_utils import lora_model_from_sd_state_dict
-from invokeai.backend.lora.conversions.sdxl_lora_conversion_utils import convert_sdxl_keys_to_diffusers_format
+from invokeai.backend.lora import LoRAModelRaw
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
@@ -55,33 +45,14 @@ class LoRALoader(ModelLoader):
            raise ValueError("There are no submodels in a LoRA model.")
        model_path = Path(config.path)
        assert self._model_base is not None
-
-        # Load the state dict from the model file.
-        if model_path.suffix == ".safetensors":
-            state_dict = load_file(model_path.absolute().as_posix(), device="cpu")
-        else:
-            state_dict = torch.load(model_path, map_location="cpu")
-
-        # Apply state_dict key conversions, if necessary.
-        if self._model_base == BaseModelType.StableDiffusionXL:
-            state_dict = convert_sdxl_keys_to_diffusers_format(state_dict)
-            model = lora_model_from_sd_state_dict(state_dict=state_dict)
-        elif self._model_base == BaseModelType.Flux:
-            if config.format == ModelFormat.Diffusers:
-                model = lora_model_from_flux_diffusers_state_dict(state_dict=state_dict)
-            elif config.format == ModelFormat.LyCORIS:
-                model = lora_model_from_flux_kohya_state_dict(state_dict=state_dict)
-            else:
-                raise ValueError(f"LoRA model is in unsupported FLUX format: {config.format}")
-        elif self._model_base in [BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2]:
-            # Currently, we don't apply any conversions for SD1 and SD2 LoRA models.
-            model = lora_model_from_sd_state_dict(state_dict=state_dict)
-        else:
-            raise ValueError(f"Unsupported LoRA base model: {self._model_base}")
-
-        model.to(dtype=self._torch_dtype)
+        model = LoRAModelRaw.from_checkpoint(
+            file_path=model_path,
+            dtype=self._torch_dtype,
+            base_model=self._model_base,
+        )
        return model

+    # override
    def _get_model_path(self, config: AnyModelConfig) -> Path:
        # cheating a little - we remember this variable for using in the subsequent call to _load_model()
        self._model_base = config.base
--- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
+++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
@@ -36,18 +36,8 @@ VARIANT_TO_IN_CHANNEL_MAP = {
 }


-@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusion1, type=ModelType.Main, format=ModelFormat.Diffusers)
-@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusion2, type=ModelType.Main, format=ModelFormat.Diffusers)
-@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusionXL, type=ModelType.Main, format=ModelFormat.Diffusers)
-@ModelLoaderRegistry.register(
-    base=BaseModelType.StableDiffusionXLRefiner, type=ModelType.Main, format=ModelFormat.Diffusers
-)
-@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusion1, type=ModelType.Main, format=ModelFormat.Checkpoint)
-@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusion2, type=ModelType.Main, format=ModelFormat.Checkpoint)
-@ModelLoaderRegistry.register(base=BaseModelType.StableDiffusionXL, type=ModelType.Main, format=ModelFormat.Checkpoint)
-@ModelLoaderRegistry.register(
-    base=BaseModelType.StableDiffusionXLRefiner, type=ModelType.Main, format=ModelFormat.Checkpoint
-)
+@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.Main, format=ModelFormat.Diffusers)
+@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.Main, format=ModelFormat.Checkpoint)
 class StableDiffusionDiffusersModel(GenericDiffusersLoader):
    """Class to load main models."""

--- a/invokeai/backend/model_manager/load/model_util.py
+++ b/invokeai/backend/model_manager/load/model_util.py
@@ -9,18 +9,17 @@ from typing import Optional
 import torch
 from diffusers.pipelines.pipeline_utils import DiffusionPipeline
 from diffusers.schedulers.scheduling_utils import SchedulerMixin
-from transformers import CLIPTokenizer, T5Tokenizer, T5TokenizerFast
+from transformers import CLIPTokenizer

 from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline
 from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline
 from invokeai.backend.image_util.segment_anything.segment_anything_pipeline import SegmentAnythingPipeline
 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
-from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
+from invokeai.backend.lora import LoRAModelRaw
 from invokeai.backend.model_manager.config import AnyModel
 from invokeai.backend.onnx.onnx_runtime import IAIOnnxRuntimeModel
 from invokeai.backend.spandrel_image_to_image_model import SpandrelImageToImageModel
 from invokeai.backend.textual_inversion import TextualInversionModelRaw
-from invokeai.backend.util.calc_tensor_size import calc_tensor_size


 def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int:
@@ -51,17 +50,6 @@ def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int:
        ),
    ):
        return model.calc_size()
-    elif isinstance(
-        model,
-        (
-            T5TokenizerFast,
-            T5Tokenizer,
-        ),
-    ):
-        # HACK(ryand): len(model) just returns the vocabulary size, so this is blatantly wrong. It should be small
-        # relative to the text encoder that it's used with, so shouldn't matter too much, but we should fix this at some
-        # point.
-        return len(model)
    else:
        # TODO(ryand): Promote this from a log to an exception once we are confident that we are handling all of the
        # supported model types.
@@ -84,9 +72,10 @@ def _calc_pipeline_by_data(pipeline: DiffusionPipeline) -> int:

 def calc_module_size(model: torch.nn.Module) -> int:
    """Calculate the size (in bytes) of a torch.nn.Module."""
-    mem_params = sum([calc_tensor_size(param) for param in model.parameters()])
-    mem_bufs = sum([calc_tensor_size(buf) for buf in model.buffers()])
-    return mem_params + mem_bufs
+    mem_params = sum([param.nelement() * param.element_size() for param in model.parameters()])
+    mem_bufs = sum([buf.nelement() * buf.element_size() for buf in model.buffers()])
+    mem: int = mem_params + mem_bufs  # in bytes
+    return mem


 def _calc_onnx_model_by_data(model: IAIOnnxRuntimeModel) -> int:
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@@ -10,10 +10,6 @@ from picklescan.scanner import scan_file_path

 import invokeai.backend.util.logging as logger
 from invokeai.app.util.misc import uuid_string
-from invokeai.backend.lora.conversions.flux_diffusers_lora_conversion_utils import (
-    is_state_dict_likely_in_flux_diffusers_format,
-)
-from invokeai.backend.lora.conversions.flux_kohya_lora_conversion_utils import is_state_dict_likely_in_flux_kohya_format
 from invokeai.backend.model_hash.model_hash import HASHING_ALGORITHMS, ModelHash
 from invokeai.backend.model_manager.config import (
    AnyModelConfig,
@@ -99,7 +95,6 @@ class ModelProbe(object):
    }

    CLASS2TYPE = {
-        "FluxPipeline": ModelType.Main,
        "StableDiffusionPipeline": ModelType.Main,
        "StableDiffusionInpaintPipeline": ModelType.Main,
        "StableDiffusionXLPipeline": ModelType.Main,
@@ -111,9 +106,6 @@ class ModelProbe(object):
        "ControlNetModel": ModelType.ControlNet,
        "CLIPVisionModelWithProjection": ModelType.CLIPVision,
        "T2IAdapter": ModelType.T2IAdapter,
-        "CLIPModel": ModelType.CLIPEmbed,
-        "CLIPTextModel": ModelType.CLIPEmbed,
-        "T5EncoderModel": ModelType.T5Encoder,
    }

    @classmethod
@@ -169,7 +161,7 @@ class ModelProbe(object):
        fields["description"] = (
            fields.get("description") or f"{fields['base'].value} {model_type.value} model {fields['name']}"
        )
-        fields["format"] = ModelFormat(fields.get("format")) if "format" in fields else probe.get_format()
+        fields["format"] = fields.get("format") or probe.get_format()
        fields["hash"] = fields.get("hash") or ModelHash(algorithm=hash_algo).hash(model_path)

        fields["default_settings"] = fields.get("default_settings")
@@ -184,10 +176,10 @@ class ModelProbe(object):
            fields["repo_variant"] = fields.get("repo_variant") or probe.get_repo_variant()

        # additional fields needed for main and controlnet models
-        if fields["type"] in [ModelType.Main, ModelType.ControlNet, ModelType.VAE] and fields["format"] in [
-            ModelFormat.Checkpoint,
-            ModelFormat.BnbQuantizednf4b,
-        ]:
+        if (
+            fields["type"] in [ModelType.Main, ModelType.ControlNet, ModelType.VAE]
+            and fields["format"] is ModelFormat.Checkpoint
+        ):
            ckpt_config_path = cls._get_checkpoint_config_path(
                model_path,
                model_type=fields["type"],
@@ -230,27 +222,13 @@ class ModelProbe(object):
        ckpt = ckpt.get("state_dict", ckpt)

        for key in [str(k) for k in ckpt.keys()]:
-            if key.startswith(
-                (
-                    "cond_stage_model.",
-                    "first_stage_model.",
-                    "model.diffusion_model.",
-                    # FLUX models in the official BFL format contain keys with the "double_blocks." prefix.
-                    "double_blocks.",
-                    # Some FLUX checkpoint files contain transformer keys prefixed with "model.diffusion_model".
-                    # This prefix is typically used to distinguish between multiple models bundled in a single file.
-                    "model.diffusion_model.double_blocks.",
-                )
-            ):
-                # Keys starting with double_blocks are associated with Flux models
+            if key.startswith(("cond_stage_model.", "first_stage_model.", "model.diffusion_model.")):
                return ModelType.Main
            elif key.startswith(("encoder.conv_in", "decoder.conv_in")):
                return ModelType.VAE
            elif key.startswith(("lora_te_", "lora_unet_")):
                return ModelType.LoRA
-            # "lora_A.weight" and "lora_B.weight" are associated with models in PEFT format. We don't support all PEFT
-            # LoRA models, but as of the time of writing, we support Diffusers FLUX PEFT LoRA models.
-            elif key.endswith(("to_k_lora.up.weight", "to_q_lora.down.weight", "lora_A.weight", "lora_B.weight")):
+            elif key.endswith(("to_k_lora.up.weight", "to_q_lora.down.weight")):
                return ModelType.LoRA
            elif key.startswith(("controlnet", "control_model", "input_blocks")):
                return ModelType.ControlNet
@@ -302,16 +280,9 @@ class ModelProbe(object):
        if (folder_path / "image_encoder.txt").exists():
            return ModelType.IPAdapter

-        config_path = None
-        for p in [
-            folder_path / "model_index.json",  # pipeline
-            folder_path / "config.json",  # most diffusers
-            folder_path / "text_encoder_2" / "config.json",  # T5 text encoder
-            folder_path / "text_encoder" / "config.json",  # T5 CLIP
-        ]:
-            if p.exists():
-                config_path = p
-                break
+        i = folder_path / "model_index.json"
+        c = folder_path / "config.json"
+        config_path = i if i.exists() else c if c.exists() else None

        if config_path:
            with open(config_path, "r") as file:
@@ -350,30 +321,10 @@ class ModelProbe(object):
            return possible_conf.absolute()

        if model_type is ModelType.Main:
-            if base_type == BaseModelType.Flux:
-                # TODO: Decide between dev/schnell
-                checkpoint = ModelProbe._scan_and_load_checkpoint(model_path)
-                state_dict = checkpoint.get("state_dict") or checkpoint
-                if (
-                    "guidance_in.out_layer.weight" in state_dict
-                    or "model.diffusion_model.guidance_in.out_layer.weight" in state_dict
-                ):
-                    # For flux, this is a key in invokeai.backend.flux.util.params
-                    #   Due to model type and format being the descriminator for model configs this
-                    #   is used rather than attempting to support flux with separate model types and format
-                    #   If changed in the future, please fix me
-                    config_file = "flux-dev"
-                else:
-                    # For flux, this is a key in invokeai.backend.flux.util.params
-                    #   Due to model type and format being the discriminator for model configs this
-                    #   is used rather than attempting to support flux with separate model types and format
-                    #   If changed in the future, please fix me
-                    config_file = "flux-schnell"
-            else:
-                config_file = LEGACY_CONFIGS[base_type][variant_type]
-                if isinstance(config_file, dict):  # need another tier for sd-2.x models
-                    config_file = config_file[prediction_type]
-                config_file = f"stable-diffusion/{config_file}"
+            config_file = LEGACY_CONFIGS[base_type][variant_type]
+            if isinstance(config_file, dict):  # need another tier for sd-2.x models
+                config_file = config_file[prediction_type]
+            config_file = f"stable-diffusion/{config_file}"
        elif model_type is ModelType.ControlNet:
            config_file = (
                "controlnet/cldm_v15.yaml"
@@ -382,13 +333,7 @@ class ModelProbe(object):
            )
        elif model_type is ModelType.VAE:
            config_file = (
-                # For flux, this is a key in invokeai.backend.flux.util.ae_params
-                #   Due to model type and format being the descriminator for model configs this
-                #   is used rather than attempting to support flux with separate model types and format
-                #   If changed in the future, please fix me
-                "flux"
-                if base_type is BaseModelType.Flux
-                else "stable-diffusion/v1-inference.yaml"
+                "stable-diffusion/v1-inference.yaml"
                if base_type is BaseModelType.StableDiffusion1
                else "stable-diffusion/sd_xl_base.yaml"
                if base_type is BaseModelType.StableDiffusionXL
@@ -471,18 +416,11 @@ class CheckpointProbeBase(ProbeBase):
        self.checkpoint = ModelProbe._scan_and_load_checkpoint(model_path)

    def get_format(self) -> ModelFormat:
-        state_dict = self.checkpoint.get("state_dict") or self.checkpoint
-        if (
-            "double_blocks.0.img_attn.proj.weight.quant_state.bitsandbytes__nf4" in state_dict
-            or "model.diffusion_model.double_blocks.0.img_attn.proj.weight.quant_state.bitsandbytes__nf4" in state_dict
-        ):
-            return ModelFormat.BnbQuantizednf4b
        return ModelFormat("checkpoint")

    def get_variant_type(self) -> ModelVariantType:
        model_type = ModelProbe.get_model_type_from_checkpoint(self.model_path, self.checkpoint)
-        base_type = self.get_base_type()
-        if model_type != ModelType.Main or base_type == BaseModelType.Flux:
+        if model_type != ModelType.Main:
            return ModelVariantType.Normal
        state_dict = self.checkpoint.get("state_dict") or self.checkpoint
        in_channels = state_dict["model.diffusion_model.input_blocks.0.0.weight"].shape[1]
@@ -502,11 +440,6 @@ class PipelineCheckpointProbe(CheckpointProbeBase):
    def get_base_type(self) -> BaseModelType:
        checkpoint = self.checkpoint
        state_dict = self.checkpoint.get("state_dict") or checkpoint
-        if (
-            "double_blocks.0.img_attn.norm.key_norm.scale" in state_dict
-            or "model.diffusion_model.double_blocks.0.img_attn.norm.key_norm.scale" in state_dict
-        ):
-            return BaseModelType.Flux
        key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight"
        if key_name in state_dict and state_dict[key_name].shape[-1] == 768:
            return BaseModelType.StableDiffusion1
@@ -549,7 +482,6 @@ class VaeCheckpointProbe(CheckpointProbeBase):
            (r"xl", BaseModelType.StableDiffusionXL),
            (r"sd2", BaseModelType.StableDiffusion2),
            (r"vae", BaseModelType.StableDiffusion1),
-            (r"FLUX.1-schnell_ae", BaseModelType.Flux),
        ]:
            if re.search(regexp, self.model_path.name, re.IGNORECASE):
                return basetype
@@ -560,21 +492,12 @@ class LoRACheckpointProbe(CheckpointProbeBase):
    """Class for LoRA checkpoints."""

    def get_format(self) -> ModelFormat:
-        if is_state_dict_likely_in_flux_diffusers_format(self.checkpoint):
-            # TODO(ryand): This is an unusual case. In other places throughout the codebase, we treat
-            # ModelFormat.Diffusers as meaning that the model is in a directory. In this case, the model is a single
-            # file, but the weight keys are in the diffusers format.
-            return ModelFormat.Diffusers
-        return ModelFormat.LyCORIS
+        return ModelFormat("lycoris")

    def get_base_type(self) -> BaseModelType:
-        if is_state_dict_likely_in_flux_kohya_format(self.checkpoint) or is_state_dict_likely_in_flux_diffusers_format(
-            self.checkpoint
-        ):
-            return BaseModelType.Flux
+        checkpoint = self.checkpoint
+        token_vector_length = lora_token_vector_length(checkpoint)

-        # If we've gotten here, we assume that the model is a Stable Diffusion model.
-        token_vector_length = lora_token_vector_length(self.checkpoint)
        if token_vector_length == 768:
            return BaseModelType.StableDiffusion1
        elif token_vector_length == 1024:
@@ -790,30 +713,6 @@ class TextualInversionFolderProbe(FolderProbeBase):
        return TextualInversionCheckpointProbe(path).get_base_type()


-class T5EncoderFolderProbe(FolderProbeBase):
-    def get_base_type(self) -> BaseModelType:
-        return BaseModelType.Any
-
-    def get_format(self) -> ModelFormat:
-        path = self.model_path / "text_encoder_2"
-        if (path / "model.safetensors.index.json").exists():
-            return ModelFormat.T5Encoder
-        files = list(path.glob("*.safetensors"))
-        if len(files) == 0:
-            raise InvalidModelConfigException(f"{self.model_path.as_posix()}: no .safetensors files found")
-
-        # shortcut: look for the quantization in the name
-        if any(x for x in files if "llm_int8" in x.as_posix()):
-            return ModelFormat.BnbQuantizedLlmInt8b
-
-        # more reliable path: probe contents for a 'SCB' key
-        ckpt = read_checkpoint_meta(files[0], scan=True)
-        if any("SCB" in x for x in ckpt.keys()):
-            return ModelFormat.BnbQuantizedLlmInt8b
-
-        raise InvalidModelConfigException(f"{self.model_path.as_posix()}: unknown model format")
-
-
 class ONNXFolderProbe(PipelineFolderProbe):
    def get_base_type(self) -> BaseModelType:
        # Due to the way the installer is set up, the configuration file for safetensors
@@ -906,11 +805,6 @@ class CLIPVisionFolderProbe(FolderProbeBase):
        return BaseModelType.Any


-class CLIPEmbedFolderProbe(FolderProbeBase):
-    def get_base_type(self) -> BaseModelType:
-        return BaseModelType.Any
-
-
 class SpandrelImageToImageFolderProbe(FolderProbeBase):
    def get_base_type(self) -> BaseModelType:
        raise NotImplementedError()
@@ -941,10 +835,8 @@ ModelProbe.register_probe("diffusers", ModelType.Main, PipelineFolderProbe)
 ModelProbe.register_probe("diffusers", ModelType.VAE, VaeFolderProbe)
 ModelProbe.register_probe("diffusers", ModelType.LoRA, LoRAFolderProbe)
 ModelProbe.register_probe("diffusers", ModelType.TextualInversion, TextualInversionFolderProbe)
-ModelProbe.register_probe("diffusers", ModelType.T5Encoder, T5EncoderFolderProbe)
 ModelProbe.register_probe("diffusers", ModelType.ControlNet, ControlNetFolderProbe)
 ModelProbe.register_probe("diffusers", ModelType.IPAdapter, IPAdapterFolderProbe)
-ModelProbe.register_probe("diffusers", ModelType.CLIPEmbed, CLIPEmbedFolderProbe)
 ModelProbe.register_probe("diffusers", ModelType.CLIPVision, CLIPVisionFolderProbe)
 ModelProbe.register_probe("diffusers", ModelType.T2IAdapter, T2IAdapterFolderProbe)
 ModelProbe.register_probe("diffusers", ModelType.SpandrelImageToImage, SpandrelImageToImageFolderProbe)
--- a/invokeai/backend/model_manager/starter_models.py
+++ b/invokeai/backend/model_manager/starter_models.py
@@ -2,7 +2,7 @@ from typing import Optional

 from pydantic import BaseModel

-from invokeai.backend.model_manager.config import BaseModelType, ModelFormat, ModelType
+from invokeai.backend.model_manager.config import BaseModelType, ModelType


 class StarterModelWithoutDependencies(BaseModel):
@@ -11,7 +11,6 @@ class StarterModelWithoutDependencies(BaseModel):
    name: str
    base: BaseModelType
    type: ModelType
-    format: Optional[ModelFormat] = None
    is_installed: bool = False


@@ -52,76 +51,10 @@ cyberrealistic_negative = StarterModel(
    type=ModelType.TextualInversion,
 )

-t5_base_encoder = StarterModel(
-    name="t5_base_encoder",
-    base=BaseModelType.Any,
-    source="InvokeAI/t5-v1_1-xxl::bfloat16",
-    description="T5-XXL text encoder (used in FLUX pipelines). ~8GB",
-    type=ModelType.T5Encoder,
-)
-
-t5_8b_quantized_encoder = StarterModel(
-    name="t5_bnb_int8_quantized_encoder",
-    base=BaseModelType.Any,
-    source="InvokeAI/t5-v1_1-xxl::bnb_llm_int8",
-    description="T5-XXL text encoder with bitsandbytes LLM.int8() quantization (used in FLUX pipelines). ~5GB",
-    type=ModelType.T5Encoder,
-    format=ModelFormat.BnbQuantizedLlmInt8b,
-)
-
-clip_l_encoder = StarterModel(
-    name="clip-vit-large-patch14",
-    base=BaseModelType.Any,
-    source="InvokeAI/clip-vit-large-patch14-text-encoder::bfloat16",
-    description="CLIP-L text encoder (used in FLUX pipelines). ~250MB",
-    type=ModelType.CLIPEmbed,
-)
-
-flux_vae = StarterModel(
-    name="FLUX.1-schnell_ae",
-    base=BaseModelType.Flux,
-    source="black-forest-labs/FLUX.1-schnell::ae.safetensors",
-    description="FLUX VAE compatible with both schnell and dev variants.",
-    type=ModelType.VAE,
-)
-
-
 # List of starter models, displayed on the frontend.
 # The order/sort of this list is not changed by the frontend - set it how you want it here.
 STARTER_MODELS: list[StarterModel] = [
    # region: Main
-    StarterModel(
-        name="FLUX Schnell (Quantized)",
-        base=BaseModelType.Flux,
-        source="InvokeAI/flux_schnell::transformer/bnb_nf4/flux1-schnell-bnb_nf4.safetensors",
-        description="FLUX schnell transformer quantized to bitsandbytes NF4 format. Total size with dependencies: ~12GB",
-        type=ModelType.Main,
-        dependencies=[t5_8b_quantized_encoder, flux_vae, clip_l_encoder],
-    ),
-    StarterModel(
-        name="FLUX Dev (Quantized)",
-        base=BaseModelType.Flux,
-        source="InvokeAI/flux_dev::transformer/bnb_nf4/flux1-dev-bnb_nf4.safetensors",
-        description="FLUX dev transformer quantized to bitsandbytes NF4 format. Total size with dependencies: ~12GB",
-        type=ModelType.Main,
-        dependencies=[t5_8b_quantized_encoder, flux_vae, clip_l_encoder],
-    ),
-    StarterModel(
-        name="FLUX Schnell",
-        base=BaseModelType.Flux,
-        source="InvokeAI/flux_schnell::transformer/base/flux1-schnell.safetensors",
-        description="FLUX schnell transformer in bfloat16. Total size with dependencies: ~33GB",
-        type=ModelType.Main,
-        dependencies=[t5_base_encoder, flux_vae, clip_l_encoder],
-    ),
-    StarterModel(
-        name="FLUX Dev",
-        base=BaseModelType.Flux,
-        source="InvokeAI/flux_dev::transformer/base/flux1-dev.safetensors",
-        description="FLUX dev transformer in bfloat16. Total size with dependencies: ~33GB",
-        type=ModelType.Main,
-        dependencies=[t5_base_encoder, flux_vae, clip_l_encoder],
-    ),
    StarterModel(
        name="CyberRealistic v4.1",
        base=BaseModelType.StableDiffusion1,
@@ -192,7 +125,6 @@ STARTER_MODELS: list[StarterModel] = [
    # endregion
    # region VAE
    sdxl_fp16_vae_fix,
-    flux_vae,
    # endregion
    # region LoRA
    StarterModel(
@@ -518,11 +450,6 @@ STARTER_MODELS: list[StarterModel] = [
        type=ModelType.SpandrelImageToImage,
    ),
    # endregion
-    # region TextEncoders
-    t5_base_encoder,
-    t5_8b_quantized_encoder,
-    clip_l_encoder,
-    # endregion
 ]

 assert len(STARTER_MODELS) == len({m.source for m in STARTER_MODELS}), "Duplicate starter models"
--- a/invokeai/backend/model_manager/util/model_util.py
+++ b/invokeai/backend/model_manager/util/model_util.py
@@ -133,29 +133,3 @@ def lora_token_vector_length(checkpoint: Dict[str, torch.Tensor]) -> Optional[in
            break

    return lora_token_vector_length
-
-
-def convert_bundle_to_flux_transformer_checkpoint(
-    transformer_state_dict: dict[str, torch.Tensor],
-) -> dict[str, torch.Tensor]:
-    original_state_dict: dict[str, torch.Tensor] = {}
-    keys_to_remove: list[str] = []
-
-    for k, v in transformer_state_dict.items():
-        if not k.startswith("model.diffusion_model"):
-            keys_to_remove.append(k)  # This can be removed in the future if we only want to delete transformer keys
-            continue
-        if k.endswith("scale"):
-            # Scale math must be done at bfloat16 due to our current flux model
-            # support limitations at inference time
-            v = v.to(dtype=torch.bfloat16)
-        new_key = k.replace("model.diffusion_model.", "")
-        original_state_dict[new_key] = v
-        keys_to_remove.append(k)
-
-    # Remove processed keys from the original dictionary, leaving others in case
-    # other model state dicts need to be pulled
-    for k in keys_to_remove:
-        del transformer_state_dict[k]
-
-    return original_state_dict
--- a/invokeai/backend/model_manager/util/select_hf_files.py
+++ b/invokeai/backend/model_manager/util/select_hf_files.py
@@ -54,7 +54,6 @@ def filter_files(
                "lora_weights.safetensors",
                "weights.pb",
                "onnx_data",
-                "spiece.model",  # Added for `black-forest-labs/FLUX.1-schnell`.
            )
        ):
            paths.append(file)
@@ -63,13 +62,13 @@ def filter_files(
        # downloading random checkpoints that might also be in the repo. However there is no guarantee
        # that a checkpoint doesn't contain "model" in its name, and no guarantee that future diffusers models
        # will adhere to this naming convention, so this is an area to be careful of.
-        elif re.search(r"model.*\.(safetensors|bin|onnx|xml|pth|pt|ckpt|msgpack)$", file.name):
+        elif re.search(r"model(\.[^.]+)?\.(safetensors|bin|onnx|xml|pth|pt|ckpt|msgpack)$", file.name):
            paths.append(file)

    # limit search to subfolder if requested
    if subfolder:
        subfolder = root / subfolder
-        paths = [x for x in paths if Path(subfolder) in x.parents]
+        paths = [x for x in paths if x.parent == Path(subfolder)]

    # _filter_by_variant uniquifies the paths and returns a set
    return sorted(_filter_by_variant(paths, variant))
@@ -98,9 +97,7 @@ def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path
            if variant == ModelRepoVariant.Flax:
                result.add(path)

-        # Note: '.model' was added to support:
-        # https://huggingface.co/black-forest-labs/FLUX.1-schnell/blob/768d12a373ed5cc9ef9a9dea7504dc09fcc14842/tokenizer_2/spiece.model
-        elif path.suffix in [".json", ".txt", ".model"]:
+        elif path.suffix in [".json", ".txt"]:
            result.add(path)

        elif variant in [
@@ -143,23 +140,6 @@ def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path
            continue

    for candidate_list in subfolder_weights.values():
-        # Check if at least one of the files has the explicit fp16 variant.
-        at_least_one_fp16 = False
-        for candidate in candidate_list:
-            if len(candidate.path.suffixes) == 2 and candidate.path.suffixes[0] == ".fp16":
-                at_least_one_fp16 = True
-                break
-
-        if not at_least_one_fp16:
-            # If none of the candidates in this candidate_list have the explicit fp16 variant label, then this
-            # candidate_list probably doesn't adhere to the variant naming convention that we expected. In this case,
-            # we'll simply keep all the candidates. An example of a model that hits this case is
-            # `black-forest-labs/FLUX.1-schnell` (as of commit 012d2fd).
-            for candidate in candidate_list:
-                result.add(candidate.path)
-
-        # The candidate_list seems to have the expected variant naming convention. We'll select the highest scoring
-        # candidate.
        highest_score_candidate = max(candidate_list, key=lambda candidate: candidate.score)
        if highest_score_candidate:
            result.add(highest_score_candidate.path)
--- a/invokeai/backend/model_patcher.py
+++ b/invokeai/backend/model_patcher.py
@@ -5,18 +5,32 @@ from __future__ import annotations

 import pickle
 from contextlib import contextmanager
-from typing import Any, Dict, Iterator, List, Optional, Tuple, Type, Union
+from typing import Any, Dict, Generator, Iterator, List, Optional, Tuple, Type, Union

 import numpy as np
 import torch
-from diffusers import UNet2DConditionModel
+from diffusers import OnnxRuntimeModel, UNet2DConditionModel
 from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer

 from invokeai.app.shared.models import FreeUConfig
-from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
+from invokeai.backend.lora import LoRAModelRaw
+from invokeai.backend.model_manager import AnyModel
 from invokeai.backend.model_manager.load.optimizations import skip_torch_weight_init
 from invokeai.backend.onnx.onnx_runtime import IAIOnnxRuntimeModel
+from invokeai.backend.stable_diffusion.extensions.lora import LoRAExt
 from invokeai.backend.textual_inversion import TextualInversionManager, TextualInversionModelRaw
+from invokeai.backend.util.original_weights_storage import OriginalWeightsStorage
+
+"""
+loras = [
+    (lora_model1, 0.7),
+    (lora_model2, 0.4),
+]
+with LoRAHelper.apply_lora_unet(unet, loras):
+    # unet with applied loras
+# unmodified unet
+
+"""


 class ModelPatcher:
@@ -40,6 +54,95 @@ class ModelPatcher:
        finally:
            unet.set_attn_processor(unet_orig_processors)

+    @staticmethod
+    def _resolve_lora_key(model: torch.nn.Module, lora_key: str, prefix: str) -> Tuple[str, torch.nn.Module]:
+        assert "." not in lora_key
+
+        if not lora_key.startswith(prefix):
+            raise Exception(f"lora_key with invalid prefix: {lora_key}, {prefix}")
+
+        module = model
+        module_key = ""
+        key_parts = lora_key[len(prefix) :].split("_")
+
+        submodule_name = key_parts.pop(0)
+
+        while len(key_parts) > 0:
+            try:
+                module = module.get_submodule(submodule_name)
+                module_key += "." + submodule_name
+                submodule_name = key_parts.pop(0)
+            except Exception:
+                submodule_name += "_" + key_parts.pop(0)
+
+        module = module.get_submodule(submodule_name)
+        module_key = (module_key + "." + submodule_name).lstrip(".")
+
+        return (module_key, module)
+
+    @classmethod
+    @contextmanager
+    def apply_lora_unet(
+        cls,
+        unet: UNet2DConditionModel,
+        loras: Iterator[Tuple[LoRAModelRaw, float]],
+        cached_weights: Optional[Dict[str, torch.Tensor]] = None,
+    ) -> Generator[None, None, None]:
+        with cls.apply_lora(
+            unet,
+            loras=loras,
+            prefix="lora_unet_",
+            cached_weights=cached_weights,
+        ):
+            yield
+
+    @classmethod
+    @contextmanager
+    def apply_lora_text_encoder(
+        cls,
+        text_encoder: CLIPTextModel,
+        loras: Iterator[Tuple[LoRAModelRaw, float]],
+        cached_weights: Optional[Dict[str, torch.Tensor]] = None,
+    ) -> Generator[None, None, None]:
+        with cls.apply_lora(text_encoder, loras=loras, prefix="lora_te_", cached_weights=cached_weights):
+            yield
+
+    @classmethod
+    @contextmanager
+    def apply_lora(
+        cls,
+        model: AnyModel,
+        loras: Iterator[Tuple[LoRAModelRaw, float]],
+        prefix: str,
+        cached_weights: Optional[Dict[str, torch.Tensor]] = None,
+    ) -> Generator[None, None, None]:
+        """
+        Apply one or more LoRAs to a model.
+
+        :param model: The model to patch.
+        :param loras: An iterator that returns the LoRA to patch in and its patch weight.
+        :param prefix: A string prefix that precedes keys used in the LoRAs weight layers.
+        :cached_weights: Read-only copy of the model's state dict in CPU, for unpatching purposes.
+        """
+        original_weights = OriginalWeightsStorage(cached_weights)
+        try:
+            for lora_model, lora_weight in loras:
+                LoRAExt.patch_model(
+                    model=model,
+                    prefix=prefix,
+                    lora=lora_model,
+                    lora_weight=lora_weight,
+                    original_weights=original_weights,
+                )
+                del lora_model
+
+            yield
+
+        finally:
+            with torch.no_grad():
+                for param_key, weight in original_weights.get_changed_weights():
+                    model.get_parameter(param_key).copy_(weight)
+
    @classmethod
    @contextmanager
    def apply_ti(
@@ -179,6 +282,26 @@ class ModelPatcher:


 class ONNXModelPatcher:
+    @classmethod
+    @contextmanager
+    def apply_lora_unet(
+        cls,
+        unet: OnnxRuntimeModel,
+        loras: Iterator[Tuple[LoRAModelRaw, float]],
+    ) -> None:
+        with cls.apply_lora(unet, loras, "lora_unet_"):
+            yield
+
+    @classmethod
+    @contextmanager
+    def apply_lora_text_encoder(
+        cls,
+        text_encoder: OnnxRuntimeModel,
+        loras: List[Tuple[LoRAModelRaw, float]],
+    ) -> None:
+        with cls.apply_lora(text_encoder, loras, "lora_te_"):
+            yield
+
    # based on
    # https://github.com/ssube/onnx-web/blob/ca2e436f0623e18b4cfe8a0363fcfcf10508acf7/api/onnx_web/convert/diffusion/lora.py#L323
    @classmethod
--- a/invokeai/backend/quantization/init.py
+++ b/invokeai/backend/quantization/init.py
--- a/invokeai/backend/quantization/bnb_llm_int8.py
+++ b/invokeai/backend/quantization/bnb_llm_int8.py
@@ -1,135 +0,0 @@
-import bitsandbytes as bnb
-import torch
-
-# This file contains utils for working with models that use bitsandbytes LLM.int8() quantization.
-# The utils in this file are partially inspired by:
-# https://github.com/Lightning-AI/pytorch-lightning/blob/1551a16b94f5234a4a78801098f64d0732ef5cb5/src/lightning/fabric/plugins/precision/bitsandbytes.py
-
-
-# NOTE(ryand): All of the custom state_dict manipulation logic in this file is pretty hacky. This could be made much
-# cleaner by re-implementing bnb.nn.Linear8bitLt with proper use of buffers and less magic. But, for now, we try to
-# stick close to the bitsandbytes classes to make interoperability easier with other models that might use bitsandbytes.
-
-
-class InvokeInt8Params(bnb.nn.Int8Params):
-    """We override cuda() to avoid re-quantizing the weights in the following cases:
-    - We loaded quantized weights from a state_dict on the cpu, and then moved the model to the gpu.
-    - We are moving the model back-and-forth between the cpu and gpu.
-    """
-
-    def cuda(self, device):
-        if self.has_fp16_weights:
-            return super().cuda(device)
-        elif self.CB is not None and self.SCB is not None:
-            self.data = self.data.cuda()
-            self.CB = self.data
-            self.SCB = self.SCB.cuda()
-        else:
-            # we store the 8-bit rows-major weight
-            # we convert this weight to the turning/ampere weight during the first inference pass
-            B = self.data.contiguous().half().cuda(device)
-            CB, CBt, SCB, SCBt, coo_tensorB = bnb.functional.double_quant(B)
-            del CBt
-            del SCBt
-            self.data = CB
-            self.CB = CB
-            self.SCB = SCB
-
-        return self
-
-
-class InvokeLinear8bitLt(bnb.nn.Linear8bitLt):
-    def _load_from_state_dict(
-        self,
-        state_dict: dict[str, torch.Tensor],
-        prefix: str,
-        local_metadata,
-        strict,
-        missing_keys,
-        unexpected_keys,
-        error_msgs,
-    ):
-        weight = state_dict.pop(prefix + "weight")
-        bias = state_dict.pop(prefix + "bias", None)
-
-        # See `bnb.nn.Linear8bitLt._save_to_state_dict()` for the serialization logic of SCB and weight_format.
-        scb = state_dict.pop(prefix + "SCB", None)
-
-        # Currently, we only support weight_format=0.
-        weight_format = state_dict.pop(prefix + "weight_format", None)
-        assert weight_format == 0
-
-        # TODO(ryand): Technically, we should be using `strict`, `missing_keys`, `unexpected_keys`, and `error_msgs`
-        # rather than raising an exception to correctly implement this API.
-        assert len(state_dict) == 0
-
-        if scb is not None:
-            # We are loading a pre-quantized state dict.
-            self.weight = InvokeInt8Params(
-                data=weight,
-                requires_grad=self.weight.requires_grad,
-                has_fp16_weights=False,
-                # Note: After quantization, CB is the same as weight.
-                CB=weight,
-                SCB=scb,
-            )
-            self.bias = bias if bias is None else torch.nn.Parameter(bias)
-        else:
-            # We are loading a non-quantized state dict.
-
-            # We could simply call the `super()._load_from_state_dict()` method here, but then we wouldn't be able to
-            # load from a state_dict into a model on the "meta" device. Attempting to load into a model on the "meta"
-            # device requires setting `assign=True`, doing this with the default `super()._load_from_state_dict()`
-            # implementation causes `Params4Bit` to be replaced by a `torch.nn.Parameter`. By initializing a new
-            # `Params4bit` object, we work around this issue. It's a bit hacky, but it gets the job done.
-            self.weight = InvokeInt8Params(
-                data=weight,
-                requires_grad=self.weight.requires_grad,
-                has_fp16_weights=False,
-                CB=None,
-                SCB=None,
-            )
-            self.bias = bias if bias is None else torch.nn.Parameter(bias)
-
-        # Reset the state. The persisted fields are based on the initialization behaviour in
-        # `bnb.nn.Linear8bitLt.__init__()`.
-        new_state = bnb.MatmulLtState()
-        new_state.threshold = self.state.threshold
-        new_state.has_fp16_weights = False
-        new_state.use_pool = self.state.use_pool
-        self.state = new_state
-
-
-def _convert_linear_layers_to_llm_8bit(
-    module: torch.nn.Module, ignore_modules: set[str], outlier_threshold: float, prefix: str = ""
-) -> None:
-    """Convert all linear layers in the module to bnb.nn.Linear8bitLt layers."""
-    for name, child in module.named_children():
-        fullname = f"{prefix}.{name}" if prefix else name
-        if isinstance(child, torch.nn.Linear) and not any(fullname.startswith(s) for s in ignore_modules):
-            has_bias = child.bias is not None
-            replacement = InvokeLinear8bitLt(
-                child.in_features,
-                child.out_features,
-                bias=has_bias,
-                has_fp16_weights=False,
-                threshold=outlier_threshold,
-            )
-            replacement.weight.data = child.weight.data
-            if has_bias:
-                replacement.bias.data = child.bias.data
-            replacement.requires_grad_(False)
-            module.__setattr__(name, replacement)
-        else:
-            _convert_linear_layers_to_llm_8bit(
-                child, ignore_modules, outlier_threshold=outlier_threshold, prefix=fullname
-            )
-
-
-def quantize_model_llm_int8(model: torch.nn.Module, modules_to_not_convert: set[str], outlier_threshold: float = 6.0):
-    """Apply bitsandbytes LLM.8bit() quantization to the model."""
-    _convert_linear_layers_to_llm_8bit(
-        module=model, ignore_modules=modules_to_not_convert, outlier_threshold=outlier_threshold
-    )
-
-    return model
--- a/invokeai/backend/quantization/bnb_nf4.py
+++ b/invokeai/backend/quantization/bnb_nf4.py
@@ -1,156 +0,0 @@
-import bitsandbytes as bnb
-import torch
-
-# This file contains utils for working with models that use bitsandbytes NF4 quantization.
-# The utils in this file are partially inspired by:
-# https://github.com/Lightning-AI/pytorch-lightning/blob/1551a16b94f5234a4a78801098f64d0732ef5cb5/src/lightning/fabric/plugins/precision/bitsandbytes.py
-
-# NOTE(ryand): All of the custom state_dict manipulation logic in this file is pretty hacky. This could be made much
-# cleaner by re-implementing bnb.nn.LinearNF4 with proper use of buffers and less magic. But, for now, we try to stick
-# close to the bitsandbytes classes to make interoperability easier with other models that might use bitsandbytes.
-
-
-class InvokeLinearNF4(bnb.nn.LinearNF4):
-    """A class that extends `bnb.nn.LinearNF4` to add the following functionality:
-    - Ability to load Linear NF4 layers from a pre-quantized state_dict.
-    - Ability to load Linear NF4 layers from a state_dict when the model is on the "meta" device.
-    """
-
-    def _load_from_state_dict(
-        self,
-        state_dict: dict[str, torch.Tensor],
-        prefix: str,
-        local_metadata,
-        strict,
-        missing_keys,
-        unexpected_keys,
-        error_msgs,
-    ):
-        """This method is based on the logic in the bitsandbytes serialization unit tests for `Linear4bit`:
-        https://github.com/bitsandbytes-foundation/bitsandbytes/blob/6d714a5cce3db5bd7f577bc447becc7a92d5ccc7/tests/test_linear4bit.py#L52-L71
-        """
-        weight = state_dict.pop(prefix + "weight")
-        bias = state_dict.pop(prefix + "bias", None)
-        # We expect the remaining keys to be quant_state keys.
-        quant_state_sd = state_dict
-
-        # During serialization, the quant_state is stored as subkeys of "weight." (See
-        # `bnb.nn.LinearNF4._save_to_state_dict()`). We validate that they at least have the correct prefix.
-        # TODO(ryand): Technically, we should be using `strict`, `missing_keys`, `unexpected_keys`, and `error_msgs`
-        # rather than raising an exception to correctly implement this API.
-        assert all(k.startswith(prefix + "weight.") for k in quant_state_sd.keys())
-
-        if len(quant_state_sd) > 0:
-            # We are loading a pre-quantized state dict.
-            self.weight = bnb.nn.Params4bit.from_prequantized(
-                data=weight, quantized_stats=quant_state_sd, device=weight.device
-            )
-            self.bias = bias if bias is None else torch.nn.Parameter(bias, requires_grad=False)
-        else:
-            # We are loading a non-quantized state dict.
-
-            # We could simply call the `super()._load_from_state_dict()` method here, but then we wouldn't be able to
-            # load from a state_dict into a model on the "meta" device. Attempting to load into a model on the "meta"
-            # device requires setting `assign=True`, doing this with the default `super()._load_from_state_dict()`
-            # implementation causes `Params4Bit` to be replaced by a `torch.nn.Parameter`. By initializing a new
-            # `Params4bit` object, we work around this issue. It's a bit hacky, but it gets the job done.
-            self.weight = bnb.nn.Params4bit(
-                data=weight,
-                requires_grad=self.weight.requires_grad,
-                compress_statistics=self.weight.compress_statistics,
-                quant_type=self.weight.quant_type,
-                quant_storage=self.weight.quant_storage,
-                module=self,
-            )
-            self.bias = bias if bias is None else torch.nn.Parameter(bias)
-
-
-def _replace_param(
-    param: torch.nn.Parameter | bnb.nn.Params4bit,
-    data: torch.Tensor,
-) -> torch.nn.Parameter:
-    """A helper function to replace the data of a model parameter with new data in a way that allows replacing params on
-    the "meta" device.
-
-    Supports both `torch.nn.Parameter` and `bnb.nn.Params4bit` parameters.
-    """
-    if param.device.type == "meta":
-        # Doing `param.data = data` raises a RuntimeError if param.data was on the "meta" device, so we need to
-        # re-create the param instead of overwriting the data.
-        if isinstance(param, bnb.nn.Params4bit):
-            return bnb.nn.Params4bit(
-                data,
-                requires_grad=data.requires_grad,
-                quant_state=param.quant_state,
-                compress_statistics=param.compress_statistics,
-                quant_type=param.quant_type,
-            )
-        return torch.nn.Parameter(data, requires_grad=data.requires_grad)
-
-    param.data = data
-    return param
-
-
-def _convert_linear_layers_to_nf4(
-    module: torch.nn.Module,
-    ignore_modules: set[str],
-    compute_dtype: torch.dtype,
-    compress_statistics: bool = False,
-    prefix: str = "",
-) -> None:
-    """Convert all linear layers in the model to NF4 quantized linear layers.
-
-    Args:
-        module: All linear layers in this module will be converted.
-        ignore_modules: A set of module prefixes to ignore when converting linear layers.
-        compute_dtype: The dtype to use for computation in the quantized linear layers.
-        compress_statistics: Whether to enable nested quantization (aka double quantization) where the quantization
-           constants from the first quantization are quantized again.
-        prefix: The prefix of the current module in the model. Used to call this function recursively.
-    """
-    for name, child in module.named_children():
-        fullname = f"{prefix}.{name}" if prefix else name
-        if isinstance(child, torch.nn.Linear) and not any(fullname.startswith(s) for s in ignore_modules):
-            has_bias = child.bias is not None
-            replacement = InvokeLinearNF4(
-                child.in_features,
-                child.out_features,
-                bias=has_bias,
-                compute_dtype=compute_dtype,
-                compress_statistics=compress_statistics,
-            )
-            if has_bias:
-                replacement.bias = _replace_param(replacement.bias, child.bias.data)
-            replacement.weight = _replace_param(replacement.weight, child.weight.data)
-            replacement.requires_grad_(False)
-            module.__setattr__(name, replacement)
-        else:
-            _convert_linear_layers_to_nf4(child, ignore_modules, compute_dtype=compute_dtype, prefix=fullname)
-
-
-def quantize_model_nf4(model: torch.nn.Module, modules_to_not_convert: set[str], compute_dtype: torch.dtype):
-    """Apply bitsandbytes nf4 quantization to the model.
-
-    You likely want to call this function inside a `accelerate.init_empty_weights()` context.
-
-    Example usage:
-    ```
-    # Initialize the model from a config on the meta device.
-    with accelerate.init_empty_weights():
-        model = ModelClass.from_config(...)
-
-    # Add NF4 quantization linear layers to the model - still on the meta device.
-    with accelerate.init_empty_weights():
-        model = quantize_model_nf4(model, modules_to_not_convert=set(), compute_dtype=torch.float16)
-
-    # Load a state_dict into the model. (Could be either a prequantized or non-quantized state_dict.)
-    model.load_state_dict(state_dict, strict=True, assign=True)
-
-    # Move the model to the "cuda" device. If the model was non-quantized, this is where the weight quantization takes
-    # place.
-    model.to("cuda")
-    ```
-    """
-    _convert_linear_layers_to_nf4(module=model, ignore_modules=modules_to_not_convert, compute_dtype=compute_dtype)
-
-    return model
--- a/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py
+++ b/invokeai/backend/quantization/scripts/load_flux_model_bnb_llm_int8.py
@@ -1,79 +0,0 @@
-from pathlib import Path
-
-import accelerate
-from safetensors.torch import load_file, save_file
-
-from invokeai.backend.flux.model import Flux
-from invokeai.backend.flux.util import params
-from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8
-from invokeai.backend.quantization.scripts.load_flux_model_bnb_nf4 import log_time
-
-
-def main():
-    """A script for quantizing a FLUX transformer model using the bitsandbytes LLM.int8() quantization method.
-
-    This script is primarily intended for reference. The script params (e.g. the model_path, modules_to_not_convert,
-    etc.) are hardcoded and would need to be modified for other use cases.
-    """
-    # Load the FLUX transformer model onto the meta device.
-    model_path = Path(
-        "/data/invokeai/models/.download_cache/https__huggingface.co_black-forest-labs_flux.1-schnell_resolve_main_flux1-schnell.safetensors/flux1-schnell.safetensors"
-    )
-
-    with log_time("Intialize FLUX transformer on meta device"):
-        # TODO(ryand): Determine if this is a schnell model or a dev model and load the appropriate config.
-        p = params["flux-schnell"]
-
-        # Initialize the model on the "meta" device.
-        with accelerate.init_empty_weights():
-            model = Flux(p)
-
-    # TODO(ryand): We may want to add some modules to not quantize here (e.g. the proj_out layer). See the accelerate
-    # `get_keys_to_not_convert(...)` function for a heuristic to determine which modules to not quantize.
-    modules_to_not_convert: set[str] = set()
-
-    model_int8_path = model_path.parent / "bnb_llm_int8.safetensors"
-    if model_int8_path.exists():
-        # The quantized model already exists, load it and return it.
-        print(f"A pre-quantized model already exists at '{model_int8_path}'. Attempting to load it...")
-
-        # Replace the linear layers with LLM.int8() quantized linear layers (still on the meta device).
-        with log_time("Replace linear layers with LLM.int8() layers"), accelerate.init_empty_weights():
-            model = quantize_model_llm_int8(model, modules_to_not_convert=modules_to_not_convert)
-
-        with log_time("Load state dict into model"):
-            sd = load_file(model_int8_path)
-            model.load_state_dict(sd, strict=True, assign=True)
-
-        with log_time("Move model to cuda"):
-            model = model.to("cuda")
-
-        print(f"Successfully loaded pre-quantized model from '{model_int8_path}'.")
-
-    else:
-        # The quantized model does not exist, quantize the model and save it.
-        print(f"No pre-quantized model found at '{model_int8_path}'. Quantizing the model...")
-
-        with log_time("Replace linear layers with LLM.int8() layers"), accelerate.init_empty_weights():
-            model = quantize_model_llm_int8(model, modules_to_not_convert=modules_to_not_convert)
-
-        with log_time("Load state dict into model"):
-            state_dict = load_file(model_path)
-            # TODO(ryand): Cast the state_dict to the appropriate dtype?
-            model.load_state_dict(state_dict, strict=True, assign=True)
-
-        with log_time("Move model to cuda and quantize"):
-            model = model.to("cuda")
-
-        with log_time("Save quantized model"):
-            model_int8_path.parent.mkdir(parents=True, exist_ok=True)
-            save_file(model.state_dict(), model_int8_path)
-
-        print(f"Successfully quantized and saved model to '{model_int8_path}'.")
-
-    assert isinstance(model, Flux)
-    return model
-
-
-if __name__ == "__main__":
-    main()
--- a/invokeai/backend/quantization/scripts/load_flux_model_bnb_nf4.py
+++ b/invokeai/backend/quantization/scripts/load_flux_model_bnb_nf4.py
@@ -1,96 +0,0 @@
-import time
-from contextlib import contextmanager
-from pathlib import Path
-
-import accelerate
-import torch
-from safetensors.torch import load_file, save_file
-
-from invokeai.backend.flux.model import Flux
-from invokeai.backend.flux.util import params
-from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4
-
-
-@contextmanager
-def log_time(name: str):
-    """Helper context manager to log the time taken by a block of code."""
-    start = time.time()
-    try:
-        yield None
-    finally:
-        end = time.time()
-        print(f"'{name}' took {end - start:.4f} secs")
-
-
-def main():
-    """A script for quantizing a FLUX transformer model using the bitsandbytes NF4 quantization method.
-
-    This script is primarily intended for reference. The script params (e.g. the model_path, modules_to_not_convert,
-    etc.) are hardcoded and would need to be modified for other use cases.
-    """
-    model_path = Path(
-        "/data/invokeai/models/.download_cache/https__huggingface.co_black-forest-labs_flux.1-schnell_resolve_main_flux1-schnell.safetensors/flux1-schnell.safetensors"
-    )
-
-    # inference_dtype = torch.bfloat16
-    with log_time("Intialize FLUX transformer on meta device"):
-        # TODO(ryand): Determine if this is a schnell model or a dev model and load the appropriate config.
-        p = params["flux-schnell"]
-
-        # Initialize the model on the "meta" device.
-        with accelerate.init_empty_weights():
-            model = Flux(p)
-
-    # TODO(ryand): We may want to add some modules to not quantize here (e.g. the proj_out layer). See the accelerate
-    # `get_keys_to_not_convert(...)` function for a heuristic to determine which modules to not quantize.
-    modules_to_not_convert: set[str] = set()
-
-    model_nf4_path = model_path.parent / "bnb_nf4.safetensors"
-    if model_nf4_path.exists():
-        # The quantized model already exists, load it and return it.
-        print(f"A pre-quantized model already exists at '{model_nf4_path}'. Attempting to load it...")
-
-        # Replace the linear layers with NF4 quantized linear layers (still on the meta device).
-        with log_time("Replace linear layers with NF4 layers"), accelerate.init_empty_weights():
-            model = quantize_model_nf4(
-                model, modules_to_not_convert=modules_to_not_convert, compute_dtype=torch.bfloat16
-            )
-
-        with log_time("Load state dict into model"):
-            state_dict = load_file(model_nf4_path)
-            model.load_state_dict(state_dict, strict=True, assign=True)
-
-        with log_time("Move model to cuda"):
-            model = model.to("cuda")
-
-        print(f"Successfully loaded pre-quantized model from '{model_nf4_path}'.")
-
-    else:
-        # The quantized model does not exist, quantize the model and save it.
-        print(f"No pre-quantized model found at '{model_nf4_path}'. Quantizing the model...")
-
-        with log_time("Replace linear layers with NF4 layers"), accelerate.init_empty_weights():
-            model = quantize_model_nf4(
-                model, modules_to_not_convert=modules_to_not_convert, compute_dtype=torch.bfloat16
-            )
-
-        with log_time("Load state dict into model"):
-            state_dict = load_file(model_path)
-            # TODO(ryand): Cast the state_dict to the appropriate dtype?
-            model.load_state_dict(state_dict, strict=True, assign=True)
-
-        with log_time("Move model to cuda and quantize"):
-            model = model.to("cuda")
-
-        with log_time("Save quantized model"):
-            model_nf4_path.parent.mkdir(parents=True, exist_ok=True)
-            save_file(model.state_dict(), model_nf4_path)
-
-        print(f"Successfully quantized and saved model to '{model_nf4_path}'.")
-
-    assert isinstance(model, Flux)
-    return model
-
-
-if __name__ == "__main__":
-    main()
--- a/invokeai/backend/quantization/scripts/quantize_t5_xxl_bnb_llm_int8.py
+++ b/invokeai/backend/quantization/scripts/quantize_t5_xxl_bnb_llm_int8.py
@@ -1,92 +0,0 @@
-from pathlib import Path
-
-import accelerate
-from safetensors.torch import load_file, save_file
-from transformers import AutoConfig, AutoModelForTextEncoding, T5EncoderModel
-
-from invokeai.backend.quantization.bnb_llm_int8 import quantize_model_llm_int8
-from invokeai.backend.quantization.scripts.load_flux_model_bnb_nf4 import log_time
-
-
-def load_state_dict_into_t5(model: T5EncoderModel, state_dict: dict):
-    # There is a shared reference to a single weight tensor in the model.
-    # Both "encoder.embed_tokens.weight" and "shared.weight" refer to the same tensor, so only the latter should
-    # be present in the state_dict.
-    missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False, assign=True)
-    assert len(unexpected_keys) == 0
-    assert set(missing_keys) == {"encoder.embed_tokens.weight"}
-    # Assert that the layers we expect to be shared are actually shared.
-    assert model.encoder.embed_tokens.weight is model.shared.weight
-
-
-def main():
-    """A script for quantizing a T5 text encoder model using the bitsandbytes LLM.int8() quantization method.
-
-    This script is primarily intended for reference. The script params (e.g. the model_path, modules_to_not_convert,
-    etc.) are hardcoded and would need to be modified for other use cases.
-    """
-    model_path = Path("/data/misc/text_encoder_2")
-
-    with log_time("Intialize T5 on meta device"):
-        model_config = AutoConfig.from_pretrained(model_path)
-        with accelerate.init_empty_weights():
-            model = AutoModelForTextEncoding.from_config(model_config)
-
-    # TODO(ryand): We may want to add some modules to not quantize here (e.g. the proj_out layer). See the accelerate
-    # `get_keys_to_not_convert(...)` function for a heuristic to determine which modules to not quantize.
-    modules_to_not_convert: set[str] = set()
-
-    model_int8_path = model_path / "bnb_llm_int8.safetensors"
-    if model_int8_path.exists():
-        # The quantized model already exists, load it and return it.
-        print(f"A pre-quantized model already exists at '{model_int8_path}'. Attempting to load it...")
-
-        # Replace the linear layers with LLM.int8() quantized linear layers (still on the meta device).
-        with log_time("Replace linear layers with LLM.int8() layers"), accelerate.init_empty_weights():
-            model = quantize_model_llm_int8(model, modules_to_not_convert=modules_to_not_convert)
-
-        with log_time("Load state dict into model"):
-            sd = load_file(model_int8_path)
-            load_state_dict_into_t5(model, sd)
-
-        with log_time("Move model to cuda"):
-            model = model.to("cuda")
-
-        print(f"Successfully loaded pre-quantized model from '{model_int8_path}'.")
-
-    else:
-        # The quantized model does not exist, quantize the model and save it.
-        print(f"No pre-quantized model found at '{model_int8_path}'. Quantizing the model...")
-
-        with log_time("Replace linear layers with LLM.int8() layers"), accelerate.init_empty_weights():
-            model = quantize_model_llm_int8(model, modules_to_not_convert=modules_to_not_convert)
-
-        with log_time("Load state dict into model"):
-            # Load sharded state dict.
-            files = list(model_path.glob("*.safetensors"))
-            state_dict = {}
-            for file in files:
-                sd = load_file(file)
-                state_dict.update(sd)
-            load_state_dict_into_t5(model, state_dict)
-
-        with log_time("Move model to cuda and quantize"):
-            model = model.to("cuda")
-
-        with log_time("Save quantized model"):
-            model_int8_path.parent.mkdir(parents=True, exist_ok=True)
-            state_dict = model.state_dict()
-            state_dict.pop("encoder.embed_tokens.weight")
-            save_file(state_dict, model_int8_path)
-            # This handling of shared weights could also be achieved with save_model(...), but then we'd lose control
-            # over which keys are kept. And, the corresponding load_model(...) function does not support assign=True.
-            # save_model(model, model_int8_path)
-
-        print(f"Successfully quantized and saved model to '{model_int8_path}'.")
-
-    assert isinstance(model, T5EncoderModel)
-    return model
-
-
-if __name__ == "__main__":
-    main()
--- a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py
+++ b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py
@@ -25,6 +25,11 @@ class BasicConditioningInfo:
        return self


+@dataclass
+class ConditioningFieldData:
+    conditionings: List[BasicConditioningInfo]
+
+
@dataclass
 class SDXLConditioningInfo(BasicConditioningInfo):
    """SDXL text conditioning information produced by Compel."""
@@ -38,22 +43,6 @@ class SDXLConditioningInfo(BasicConditioningInfo):
        return super().to(device=device, dtype=dtype)


-@dataclass
-class FLUXConditioningInfo:
-    clip_embeds: torch.Tensor
-    t5_embeds: torch.Tensor
-
-    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
-        self.clip_embeds = self.clip_embeds.to(device=device, dtype=dtype)
-        self.t5_embeds = self.t5_embeds.to(device=device, dtype=dtype)
-        return self
-
-
-@dataclass
-class ConditioningFieldData:
-    conditionings: List[BasicConditioningInfo] | List[SDXLConditioningInfo] | List[FLUXConditioningInfo]
-
-
@dataclass
 class IPAdapterConditioningInfo:
    cond_image_prompt_embeds: torch.Tensor
--- a/invokeai/backend/stable_diffusion/extensions/lora.py
+++ b/invokeai/backend/stable_diffusion/extensions/lora.py
@@ -1,17 +1,18 @@
 from __future__ import annotations

 from contextlib import contextmanager
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Tuple

+import torch
 from diffusers import UNet2DConditionModel

-from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
-from invokeai.backend.lora.lora_patcher import LoraPatcher
 from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase
+from invokeai.backend.util.devices import TorchDevice

 if TYPE_CHECKING:
    from invokeai.app.invocations.model import ModelIdentifierField
    from invokeai.app.services.shared.invocation_context import InvocationContext
+    from invokeai.backend.lora import LoRAModelRaw
    from invokeai.backend.util.original_weights_storage import OriginalWeightsStorage


@@ -30,14 +31,107 @@ class LoRAExt(ExtensionBase):
    @contextmanager
    def patch_unet(self, unet: UNet2DConditionModel, original_weights: OriginalWeightsStorage):
        lora_model = self._node_context.models.load(self._model_id).model
-        assert isinstance(lora_model, LoRAModelRaw)
-        LoraPatcher.apply_lora_patch(
+        self.patch_model(
            model=unet,
            prefix="lora_unet_",
-            patch=lora_model,
-            patch_weight=self._weight,
+            lora=lora_model,
+            lora_weight=self._weight,
            original_weights=original_weights,
        )
        del lora_model

        yield
+
+    @classmethod
+    @torch.no_grad()
+    def patch_model(
+        cls,
+        model: torch.nn.Module,
+        prefix: str,
+        lora: LoRAModelRaw,
+        lora_weight: float,
+        original_weights: OriginalWeightsStorage,
+    ):
+        """
+        Apply one or more LoRAs to a model.
+        :param model: The model to patch.
+        :param lora: LoRA model to patch in.
+        :param lora_weight: LoRA patch weight.
+        :param prefix: A string prefix that precedes keys used in the LoRAs weight layers.
+        :param original_weights: Storage with original weights, filled by weights which lora patches, used for unpatching.
+        """
+
+        if lora_weight == 0:
+            return
+
+        # assert lora.device.type == "cpu"
+        for layer_key, layer in lora.layers.items():
+            if not layer_key.startswith(prefix):
+                continue
+
+            # TODO(ryand): A non-negligible amount of time is currently spent resolving LoRA keys. This
+            # should be improved in the following ways:
+            # 1. The key mapping could be more-efficiently pre-computed. This would save time every time a
+            #    LoRA model is applied.
+            # 2. From an API perspective, there's no reason that the `ModelPatcher` should be aware of the
+            #    intricacies of Stable Diffusion key resolution. It should just expect the input LoRA
+            #    weights to have valid keys.
+            assert isinstance(model, torch.nn.Module)
+            module_key, module = cls._resolve_lora_key(model, layer_key, prefix)
+
+            # All of the LoRA weight calculations will be done on the same device as the module weight.
+            # (Performance will be best if this is a CUDA device.)
+            device = module.weight.device
+            dtype = module.weight.dtype
+
+            layer_scale = layer.alpha / layer.rank if (layer.alpha and layer.rank) else 1.0
+
+            # We intentionally move to the target device first, then cast. Experimentally, this was found to
+            # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
+            # same thing in a single call to '.to(...)'.
+            layer.to(device=device)
+            layer.to(dtype=torch.float32)
+
+            # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
+            # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
+            for param_name, lora_param_weight in layer.get_parameters(module).items():
+                param_key = module_key + "." + param_name
+                module_param = module.get_parameter(param_name)
+
+                # save original weight
+                original_weights.save(param_key, module_param)
+
+                if module_param.shape != lora_param_weight.shape:
+                    # TODO: debug on lycoris
+                    lora_param_weight = lora_param_weight.reshape(module_param.shape)
+
+                lora_param_weight *= lora_weight * layer_scale
+                module_param += lora_param_weight.to(dtype=dtype)
+
+            layer.to(device=TorchDevice.CPU_DEVICE)
+
+    @staticmethod
+    def _resolve_lora_key(model: torch.nn.Module, lora_key: str, prefix: str) -> Tuple[str, torch.nn.Module]:
+        assert "." not in lora_key
+
+        if not lora_key.startswith(prefix):
+            raise Exception(f"lora_key with invalid prefix: {lora_key}, {prefix}")
+
+        module = model
+        module_key = ""
+        key_parts = lora_key[len(prefix) :].split("_")
+
+        submodule_name = key_parts.pop(0)
+
+        while len(key_parts) > 0:
+            try:
+                module = module.get_submodule(submodule_name)
+                module_key += "." + submodule_name
+                submodule_name = key_parts.pop(0)
+            except Exception:
+                submodule_name += "_" + key_parts.pop(0)
+
+        module = module.get_submodule(submodule_name)
+        module_key = (module_key + "." + submodule_name).lstrip(".")
+
+        return (module_key, module)
--- a/invokeai/backend/textual_inversion.py
+++ b/invokeai/backend/textual_inversion.py
@@ -10,7 +10,6 @@ from transformers import CLIPTokenizer
 from typing_extensions import Self

 from invokeai.backend.raw_model import RawModel
-from invokeai.backend.util.calc_tensor_size import calc_tensors_size


 class TextualInversionModelRaw(RawModel):
@@ -75,7 +74,11 @@ class TextualInversionModelRaw(RawModel):

    def calc_size(self) -> int:
        """Get the size of this model in bytes."""
-        return calc_tensors_size([self.embedding, self.embedding_2])
+        embedding_size = self.embedding.element_size() * self.embedding.nelement()
+        embedding_2_size = 0
+        if self.embedding_2 is not None:
+            embedding_2_size = self.embedding_2.element_size() * self.embedding_2.nelement()
+        return embedding_size + embedding_2_size


 class TextualInversionManager(BaseTextualInversionManager):
--- a/invokeai/backend/util/init.py
+++ b/invokeai/backend/util/init.py
@@ -3,9 +3,10 @@ Initialization file for invokeai.backend.util
 """

 from invokeai.backend.util.logging import InvokeAILogger
-from invokeai.backend.util.util import Chdir, directory_size
+from invokeai.backend.util.util import GIG, Chdir, directory_size

 __all__ = [
+    "GIG",
    "directory_size",
    "Chdir",
    "InvokeAILogger",
--- a/invokeai/backend/util/calc_tensor_size.py
+++ b/invokeai/backend/util/calc_tensor_size.py
@@ -1,11 +0,0 @@
-import torch
-
-
-def calc_tensor_size(t: torch.Tensor) -> int:
-    """Calculate the size of a tensor in bytes."""
-    return t.nelement() * t.element_size()
-
-
-def calc_tensors_size(tensors: list[torch.Tensor | None]) -> int:
-    """Calculate the size of a list of tensors in bytes."""
-    return sum(calc_tensor_size(t) for t in tensors if t is not None)
--- a/invokeai/backend/util/util.py
+++ b/invokeai/backend/util/util.py
@@ -7,6 +7,9 @@ from pathlib import Path

 from PIL import Image

+# actual size of a gig
+GIG = 1073741824
+

 def slugify(value: str, allow_unicode: bool = False) -> str:
    """
--- a/invokeai/frontend/web/.eslintrc.js
+++ b/invokeai/frontend/web/.eslintrc.js
@@ -16,14 +16,6 @@ module.exports = {
    'no-promise-executor-return': 'error',
    // https://eslint.org/docs/latest/rules/require-await
    'require-await': 'error',
-    'no-restricted-properties': [
-      'error',
-      {
-        object: 'crypto',
-        property: 'randomUUID',
-        message: 'Use of crypto.randomUUID is not allowed as it is not available in all browsers.',
-      },
-    ],
  },
  overrides: [
    /**
--- a/invokeai/frontend/web/knip.ts
+++ b/invokeai/frontend/web/knip.ts
@@ -11,8 +11,6 @@ const config: KnipConfig = {
    'src/features/nodes/types/v2/**',
    // TODO(psyche): maybe we can clean up these utils after canvas v2 release
    'src/features/controlLayers/konva/util.ts',
-    // TODO(psyche): restore HRF functionality?
-    'src/features/hrf/**',
  ],
  ignoreBinaries: ['only-allow'],
  paths: {
--- a/invokeai/frontend/web/package.json
+++ b/invokeai/frontend/web/package.json
@@ -58,13 +58,12 @@
    "@dnd-kit/sortable": "^8.0.0",
    "@dnd-kit/utilities": "^3.2.2",
    "@fontsource-variable/inter": "^5.0.20",
-    "@invoke-ai/ui-library": "^0.0.33",
+    "@invoke-ai/ui-library": "^0.0.32",
    "@nanostores/react": "^0.7.3",
    "@reduxjs/toolkit": "2.2.3",
    "@roarr/browser-log-writer": "^1.3.0",
    "async-mutex": "^0.5.0",
    "chakra-react-select": "^4.9.1",
-    "cmdk": "^1.0.0",
    "compare-versions": "^6.1.1",
    "dateformat": "^5.0.3",
    "fracturedjsonjs": "^4.0.2",
@@ -92,7 +91,8 @@
    "react-i18next": "^14.1.3",
    "react-icons": "^5.2.1",
    "react-redux": "9.1.2",
-    "react-resizable-panels": "^2.1.2",
+    "react-resizable-panels": "^2.0.23",
+    "react-select": "5.8.0",
    "react-use": "^17.5.1",
    "react-virtuoso": "^4.9.0",
    "reactflow": "^11.11.4",
@@ -136,7 +136,6 @@
    "@vitest/coverage-v8": "^1.5.0",
    "@vitest/ui": "^1.5.0",
    "concurrently": "^8.2.2",
-    "csstype": "^3.1.3",
    "dpdm": "^3.14.0",
    "eslint": "^8.57.0",
    "eslint-plugin-i18next": "^6.0.9",
--- a/invokeai/frontend/web/pnpm-lock.yaml
+++ b/invokeai/frontend/web/pnpm-lock.yaml
@@ -24,8 +24,8 @@ dependencies:
    specifier: ^5.0.20
    version: 5.0.20
  '@invoke-ai/ui-library':
-    specifier: ^0.0.33
-    version: 0.0.33(@chakra-ui/form-control@2.2.0)(@chakra-ui/icon@3.2.0)(@chakra-ui/media-query@3.3.0)(@chakra-ui/menu@2.2.1)(@chakra-ui/spinner@2.1.0)(@chakra-ui/system@2.6.2)(@fontsource-variable/inter@5.0.20)(@types/react@18.3.3)(i18next@23.12.2)(react-dom@18.3.1)(react@18.3.1)
+    specifier: ^0.0.32
+    version: 0.0.32(@chakra-ui/form-control@2.2.0)(@chakra-ui/icon@3.2.0)(@chakra-ui/media-query@3.3.0)(@chakra-ui/menu@2.2.1)(@chakra-ui/spinner@2.1.0)(@chakra-ui/system@2.6.2)(@fontsource-variable/inter@5.0.20)(@types/react@18.3.3)(i18next@23.12.2)(react-dom@18.3.1)(react@18.3.1)
  '@nanostores/react':
    specifier: ^0.7.3
    version: 0.7.3(nanostores@0.11.2)(react@18.3.1)
@@ -41,9 +41,6 @@ dependencies:
  chakra-react-select:
    specifier: ^4.9.1
    version: 4.9.1(@chakra-ui/form-control@2.2.0)(@chakra-ui/icon@3.2.0)(@chakra-ui/layout@2.3.1)(@chakra-ui/media-query@3.3.0)(@chakra-ui/menu@2.2.1)(@chakra-ui/spinner@2.1.0)(@chakra-ui/system@2.6.2)(@emotion/react@11.13.3)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1)
-  cmdk:
-    specifier: ^1.0.0
-    version: 1.0.0(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1)
  compare-versions:
    specifier: ^6.1.1
    version: 6.1.1
@@ -126,8 +123,11 @@ dependencies:
    specifier: 9.1.2
    version: 9.1.2(@types/react@18.3.3)(react@18.3.1)(redux@5.0.1)
  react-resizable-panels:
-    specifier: ^2.1.2
-    version: 2.1.2(react-dom@18.3.1)(react@18.3.1)
+    specifier: ^2.0.23
+    version: 2.0.23(react-dom@18.3.1)(react@18.3.1)
+  react-select:
+    specifier: 5.8.0
+    version: 5.8.0(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1)
  react-use:
    specifier: ^17.5.1
    version: 17.5.1(react-dom@18.3.1)(react@18.3.1)
@@ -238,9 +238,6 @@ devDependencies:
  concurrently:
    specifier: ^8.2.2
    version: 8.2.2
-  csstype:
-    specifier: ^3.1.3
-    version: 3.1.3
  dpdm:
    specifier: ^3.14.0
    version: 3.14.0
@@ -2056,7 +2053,7 @@ packages:
    dependencies:
      '@chakra-ui/dom-utils': 2.1.0
      react: 18.3.1
-      react-focus-lock: 2.13.2(@types/react@18.3.3)(react@18.3.1)
+      react-focus-lock: 2.12.1(@types/react@18.3.3)(react@18.3.1)
    transitivePeerDependencies:
      - '@types/react'
    dev: false
@@ -2253,7 +2250,7 @@ packages:
      framer-motion: 10.18.0(react-dom@18.3.1)(react@18.3.1)
      react: 18.3.1
      react-dom: 18.3.1(react@18.3.1)
-      react-remove-scroll: 2.6.0(@types/react@18.3.3)(react@18.3.1)
+      react-remove-scroll: 2.5.10(@types/react@18.3.3)(react@18.3.1)
    transitivePeerDependencies:
      - '@types/react'
    dev: false
@@ -3574,8 +3571,8 @@ packages:
      prettier: 3.3.3
    dev: true

-  /@invoke-ai/ui-library@0.0.33(@chakra-ui/form-control@2.2.0)(@chakra-ui/icon@3.2.0)(@chakra-ui/media-query@3.3.0)(@chakra-ui/menu@2.2.1)(@chakra-ui/spinner@2.1.0)(@chakra-ui/system@2.6.2)(@fontsource-variable/inter@5.0.20)(@types/react@18.3.3)(i18next@23.12.2)(react-dom@18.3.1)(react@18.3.1):
-    resolution: {integrity: sha512-YLydTCOTUEgju4Ex6yXt/bvNBcO97y6zc1cGYjt7vtJMS8e6deA89cC5JejjbmVgntdnn49cDyeUxB8Z24gZew==}
+  /@invoke-ai/ui-library@0.0.32(@chakra-ui/form-control@2.2.0)(@chakra-ui/icon@3.2.0)(@chakra-ui/media-query@3.3.0)(@chakra-ui/menu@2.2.1)(@chakra-ui/spinner@2.1.0)(@chakra-ui/system@2.6.2)(@fontsource-variable/inter@5.0.20)(@types/react@18.3.3)(i18next@23.12.2)(react-dom@18.3.1)(react@18.3.1):
+    resolution: {integrity: sha512-JxAoblrDu/cZ4ha9KO4ry5OWvyLUE1Dj28i+ciMaDNUpC/cN+IyiTbUBoFoPaoN5JP8Zpd/MYCcmF2qsziHDzg==}
    peerDependencies:
      '@fontsource-variable/inter': ^5.0.16
      react: ^18.2.0
@@ -3787,288 +3784,6 @@ packages:
    resolution: {integrity: sha512-P1st0aksCrn9sGZhp8GMYwBnQsbvAWsZAX44oXNNvLHGqAOcoVxmjZiohstwQ7SqKnbR47akdNi+uleWD8+g6A==}
    dev: false

-  /@radix-ui/primitive@1.0.1:
-    resolution: {integrity: sha512-yQ8oGX2GVsEYMWGxcovu1uGWPCxV5BFfeeYxqPmuAzUyLT9qmaMXSAhXpb0WrspIeqYzdJpkh2vHModJPgRIaw==}
-    dependencies:
-      '@babel/runtime': 7.25.4
-    dev: false
-
-  /@radix-ui/react-compose-refs@1.0.1(@types/react@18.3.3)(react@18.3.1):
-    resolution: {integrity: sha512-fDSBgd44FKHa1FRMU59qBMPFcl2PZE+2nmqunj+BWFyYYjnhIDWL2ItDs3rrbJDQOtzt5nIebLCQc4QRfz6LJw==}
-    peerDependencies:
-      '@types/react': '*'
-      react: ^16.8 || ^17.0 || ^18.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-    dependencies:
-      '@babel/runtime': 7.25.4
-      '@types/react': 18.3.3
-      react: 18.3.1
-    dev: false
-
-  /@radix-ui/react-context@1.0.1(@types/react@18.3.3)(react@18.3.1):
-    resolution: {integrity: sha512-ebbrdFoYTcuZ0v4wG5tedGnp9tzcV8awzsxYph7gXUyvnNLuTIcCk1q17JEbnVhXAKG9oX3KtchwiMIAYp9NLg==}
-    peerDependencies:
-      '@types/react': '*'
-      react: ^16.8 || ^17.0 || ^18.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-    dependencies:
-      '@babel/runtime': 7.25.4
-      '@types/react': 18.3.3
-      react: 18.3.1
-    dev: false
-
-  /@radix-ui/react-dialog@1.0.5(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1):
-    resolution: {integrity: sha512-GjWJX/AUpB703eEBanuBnIWdIXg6NvJFCXcNlSZk4xdszCdhrJgBoUd1cGk67vFO+WdA2pfI/plOpqz/5GUP6Q==}
-    peerDependencies:
-      '@types/react': '*'
-      '@types/react-dom': '*'
-      react: ^16.8 || ^17.0 || ^18.0
-      react-dom: ^16.8 || ^17.0 || ^18.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-      '@types/react-dom':
-        optional: true
-    dependencies:
-      '@babel/runtime': 7.25.4
-      '@radix-ui/primitive': 1.0.1
-      '@radix-ui/react-compose-refs': 1.0.1(@types/react@18.3.3)(react@18.3.1)
-      '@radix-ui/react-context': 1.0.1(@types/react@18.3.3)(react@18.3.1)
-      '@radix-ui/react-dismissable-layer': 1.0.5(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1)
-      '@radix-ui/react-focus-guards': 1.0.1(@types/react@18.3.3)(react@18.3.1)
-      '@radix-ui/react-focus-scope': 1.0.4(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1)
-      '@radix-ui/react-id': 1.0.1(@types/react@18.3.3)(react@18.3.1)
-      '@radix-ui/react-portal': 1.0.4(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1)
-      '@radix-ui/react-presence': 1.0.1(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1)
-      '@radix-ui/react-primitive': 1.0.3(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1)
-      '@radix-ui/react-slot': 1.0.2(@types/react@18.3.3)(react@18.3.1)
-      '@radix-ui/react-use-controllable-state': 1.0.1(@types/react@18.3.3)(react@18.3.1)
-      '@types/react': 18.3.3
-      '@types/react-dom': 18.3.0
-      aria-hidden: 1.2.4
-      react: 18.3.1
-      react-dom: 18.3.1(react@18.3.1)
-      react-remove-scroll: 2.5.5(@types/react@18.3.3)(react@18.3.1)
-    dev: false
-
-  /@radix-ui/react-dismissable-layer@1.0.5(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1):
-    resolution: {integrity: sha512-aJeDjQhywg9LBu2t/At58hCvr7pEm0o2Ke1x33B+MhjNmmZ17sy4KImo0KPLgsnc/zN7GPdce8Cnn0SWvwZO7g==}
-    peerDependencies:
-      '@types/react': '*'
-      '@types/react-dom': '*'
-      react: ^16.8 || ^17.0 || ^18.0
-      react-dom: ^16.8 || ^17.0 || ^18.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-      '@types/react-dom':
-        optional: true
-    dependencies:
-      '@babel/runtime': 7.25.4
-      '@radix-ui/primitive': 1.0.1
-      '@radix-ui/react-compose-refs': 1.0.1(@types/react@18.3.3)(react@18.3.1)
-      '@radix-ui/react-primitive': 1.0.3(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1)
-      '@radix-ui/react-use-callback-ref': 1.0.1(@types/react@18.3.3)(react@18.3.1)
-      '@radix-ui/react-use-escape-keydown': 1.0.3(@types/react@18.3.3)(react@18.3.1)
-      '@types/react': 18.3.3
-      '@types/react-dom': 18.3.0
-      react: 18.3.1
-      react-dom: 18.3.1(react@18.3.1)
-    dev: false
-
-  /@radix-ui/react-focus-guards@1.0.1(@types/react@18.3.3)(react@18.3.1):
-    resolution: {integrity: sha512-Rect2dWbQ8waGzhMavsIbmSVCgYxkXLxxR3ZvCX79JOglzdEy4JXMb98lq4hPxUbLr77nP0UOGf4rcMU+s1pUA==}
-    peerDependencies:
-      '@types/react': '*'
-      react: ^16.8 || ^17.0 || ^18.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-    dependencies:
-      '@babel/runtime': 7.25.4
-      '@types/react': 18.3.3
-      react: 18.3.1
-    dev: false
-
-  /@radix-ui/react-focus-scope@1.0.4(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1):
-    resolution: {integrity: sha512-sL04Mgvf+FmyvZeYfNu1EPAaaxD+aw7cYeIB9L9Fvq8+urhltTRaEo5ysKOpHuKPclsZcSUMKlN05x4u+CINpA==}
-    peerDependencies:
-      '@types/react': '*'
-      '@types/react-dom': '*'
-      react: ^16.8 || ^17.0 || ^18.0
-      react-dom: ^16.8 || ^17.0 || ^18.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-      '@types/react-dom':
-        optional: true
-    dependencies:
-      '@babel/runtime': 7.25.4
-      '@radix-ui/react-compose-refs': 1.0.1(@types/react@18.3.3)(react@18.3.1)
-      '@radix-ui/react-primitive': 1.0.3(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1)
-      '@radix-ui/react-use-callback-ref': 1.0.1(@types/react@18.3.3)(react@18.3.1)
-      '@types/react': 18.3.3
-      '@types/react-dom': 18.3.0
-      react: 18.3.1
-      react-dom: 18.3.1(react@18.3.1)
-    dev: false
-
-  /@radix-ui/react-id@1.0.1(@types/react@18.3.3)(react@18.3.1):
-    resolution: {integrity: sha512-tI7sT/kqYp8p96yGWY1OAnLHrqDgzHefRBKQ2YAkBS5ja7QLcZ9Z/uY7bEjPUatf8RomoXM8/1sMj1IJaE5UzQ==}
-    peerDependencies:
-      '@types/react': '*'
-      react: ^16.8 || ^17.0 || ^18.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-    dependencies:
-      '@babel/runtime': 7.25.4
-      '@radix-ui/react-use-layout-effect': 1.0.1(@types/react@18.3.3)(react@18.3.1)
-      '@types/react': 18.3.3
-      react: 18.3.1
-    dev: false
-
-  /@radix-ui/react-portal@1.0.4(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1):
-    resolution: {integrity: sha512-Qki+C/EuGUVCQTOTD5vzJzJuMUlewbzuKyUy+/iHM2uwGiru9gZeBJtHAPKAEkB5KWGi9mP/CHKcY0wt1aW45Q==}
-    peerDependencies:
-      '@types/react': '*'
-      '@types/react-dom': '*'
-      react: ^16.8 || ^17.0 || ^18.0
-      react-dom: ^16.8 || ^17.0 || ^18.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-      '@types/react-dom':
-        optional: true
-    dependencies:
-      '@babel/runtime': 7.25.4
-      '@radix-ui/react-primitive': 1.0.3(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1)
-      '@types/react': 18.3.3
-      '@types/react-dom': 18.3.0
-      react: 18.3.1
-      react-dom: 18.3.1(react@18.3.1)
-    dev: false
-
-  /@radix-ui/react-presence@1.0.1(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1):
-    resolution: {integrity: sha512-UXLW4UAbIY5ZjcvzjfRFo5gxva8QirC9hF7wRE4U5gz+TP0DbRk+//qyuAQ1McDxBt1xNMBTaciFGvEmJvAZCg==}
-    peerDependencies:
-      '@types/react': '*'
-      '@types/react-dom': '*'
-      react: ^16.8 || ^17.0 || ^18.0
-      react-dom: ^16.8 || ^17.0 || ^18.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-      '@types/react-dom':
-        optional: true
-    dependencies:
-      '@babel/runtime': 7.25.4
-      '@radix-ui/react-compose-refs': 1.0.1(@types/react@18.3.3)(react@18.3.1)
-      '@radix-ui/react-use-layout-effect': 1.0.1(@types/react@18.3.3)(react@18.3.1)
-      '@types/react': 18.3.3
-      '@types/react-dom': 18.3.0
-      react: 18.3.1
-      react-dom: 18.3.1(react@18.3.1)
-    dev: false
-
-  /@radix-ui/react-primitive@1.0.3(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1):
-    resolution: {integrity: sha512-yi58uVyoAcK/Nq1inRY56ZSjKypBNKTa/1mcL8qdl6oJeEaDbOldlzrGn7P6Q3Id5d+SYNGc5AJgc4vGhjs5+g==}
-    peerDependencies:
-      '@types/react': '*'
-      '@types/react-dom': '*'
-      react: ^16.8 || ^17.0 || ^18.0
-      react-dom: ^16.8 || ^17.0 || ^18.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-      '@types/react-dom':
-        optional: true
-    dependencies:
-      '@babel/runtime': 7.25.4
-      '@radix-ui/react-slot': 1.0.2(@types/react@18.3.3)(react@18.3.1)
-      '@types/react': 18.3.3
-      '@types/react-dom': 18.3.0
-      react: 18.3.1
-      react-dom: 18.3.1(react@18.3.1)
-    dev: false
-
-  /@radix-ui/react-slot@1.0.2(@types/react@18.3.3)(react@18.3.1):
-    resolution: {integrity: sha512-YeTpuq4deV+6DusvVUW4ivBgnkHwECUu0BiN43L5UCDFgdhsRUWAghhTF5MbvNTPzmiFOx90asDSUjWuCNapwg==}
-    peerDependencies:
-      '@types/react': '*'
-      react: ^16.8 || ^17.0 || ^18.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-    dependencies:
-      '@babel/runtime': 7.25.4
-      '@radix-ui/react-compose-refs': 1.0.1(@types/react@18.3.3)(react@18.3.1)
-      '@types/react': 18.3.3
-      react: 18.3.1
-    dev: false
-
-  /@radix-ui/react-use-callback-ref@1.0.1(@types/react@18.3.3)(react@18.3.1):
-    resolution: {integrity: sha512-D94LjX4Sp0xJFVaoQOd3OO9k7tpBYNOXdVhkltUbGv2Qb9OXdrg/CpsjlZv7ia14Sylv398LswWBVVu5nqKzAQ==}
-    peerDependencies:
-      '@types/react': '*'
-      react: ^16.8 || ^17.0 || ^18.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-    dependencies:
-      '@babel/runtime': 7.25.4
-      '@types/react': 18.3.3
-      react: 18.3.1
-    dev: false
-
-  /@radix-ui/react-use-controllable-state@1.0.1(@types/react@18.3.3)(react@18.3.1):
-    resolution: {integrity: sha512-Svl5GY5FQeN758fWKrjM6Qb7asvXeiZltlT4U2gVfl8Gx5UAv2sMR0LWo8yhsIZh2oQ0eFdZ59aoOOMV7b47VA==}
-    peerDependencies:
-      '@types/react': '*'
-      react: ^16.8 || ^17.0 || ^18.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-    dependencies:
-      '@babel/runtime': 7.25.4
-      '@radix-ui/react-use-callback-ref': 1.0.1(@types/react@18.3.3)(react@18.3.1)
-      '@types/react': 18.3.3
-      react: 18.3.1
-    dev: false
-
-  /@radix-ui/react-use-escape-keydown@1.0.3(@types/react@18.3.3)(react@18.3.1):
-    resolution: {integrity: sha512-vyL82j40hcFicA+M4Ex7hVkB9vHgSse1ZWomAqV2Je3RleKGO5iM8KMOEtfoSB0PnIelMd2lATjTGMYqN5ylTg==}
-    peerDependencies:
-      '@types/react': '*'
-      react: ^16.8 || ^17.0 || ^18.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-    dependencies:
-      '@babel/runtime': 7.25.4
-      '@radix-ui/react-use-callback-ref': 1.0.1(@types/react@18.3.3)(react@18.3.1)
-      '@types/react': 18.3.3
-      react: 18.3.1
-    dev: false
-
-  /@radix-ui/react-use-layout-effect@1.0.1(@types/react@18.3.3)(react@18.3.1):
-    resolution: {integrity: sha512-v/5RegiJWYdoCvMnITBkNNx6bCj20fiaJnWtRkU18yITptraXjffz5Qbn05uOiQnOvi+dbkznkoaMltz1GnszQ==}
-    peerDependencies:
-      '@types/react': '*'
-      react: ^16.8 || ^17.0 || ^18.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-    dependencies:
-      '@babel/runtime': 7.25.4
-      '@types/react': 18.3.3
-      react: 18.3.1
-    dev: false
-
  /@reactflow/background@11.3.14(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1):
    resolution: {integrity: sha512-Gewd7blEVT5Lh6jqrvOgd4G6Qk17eGKQfsDXgyRSqM+CTwDqRldG2LsWN4sNeno6sbqVIC2fZ+rAUBFA9ZEUDA==}
    peerDependencies:
@@ -5495,6 +5210,7 @@ packages:
    resolution: {integrity: sha512-EhwApuTmMBmXuFOikhQLIBUn6uFg81SwLMOAUgodJF14SOBOCMdU04gDoYi0WOJJHD144TL32z4yDqCW3dnkQg==}
    dependencies:
      '@types/react': 18.3.3
+    dev: true

  /@types/react-transition-group@4.4.10:
    resolution: {integrity: sha512-hT/+s0VQs2ojCX823m60m5f0sL5idt9SO6Tj6Dg+rdphGPIeJbJ6CxvBYkgkGKrYeDjvIpKTR38UzmtHJOGW3Q==}
@@ -6552,21 +6268,6 @@ packages:
    requiresBuild: true
    dev: true

-  /cmdk@1.0.0(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1):
-    resolution: {integrity: sha512-gDzVf0a09TvoJ5jnuPvygTB77+XdOSwEmJ88L6XPFPlv7T3RxbP9jgenfylrAMD0+Le1aO0nVjQUzl2g+vjz5Q==}
-    peerDependencies:
-      react: ^18.0.0
-      react-dom: ^18.0.0
-    dependencies:
-      '@radix-ui/react-dialog': 1.0.5(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1)
-      '@radix-ui/react-primitive': 1.0.3(@types/react-dom@18.3.0)(@types/react@18.3.3)(react-dom@18.3.1)(react@18.3.1)
-      react: 18.3.1
-      react-dom: 18.3.1(react@18.3.1)
-    transitivePeerDependencies:
-      - '@types/react'
-      - '@types/react-dom'
-    dev: false
-
  /color-convert@1.9.3:
    resolution: {integrity: sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==}
    dependencies:
@@ -10011,8 +9712,8 @@ packages:
    resolution: {integrity: sha512-nsO+KSNgo1SbJqJEYRE9ERzo7YtYbou/OqjSQKxV7jcKox7+usiUVZOAC+XnDOABXggQTno0Y1CpVnuWEc1boQ==}
    dev: false

-  /react-focus-lock@2.13.2(@types/react@18.3.3)(react@18.3.1):
-    resolution: {integrity: sha512-T/7bsofxYqnod2xadvuwjGKHOoL5GH7/EIPI5UyEvaU/c2CcphvGI371opFtuY/SYdbMsNiuF4HsHQ50nA/TKQ==}
+  /react-focus-lock@2.12.1(@types/react@18.3.3)(react@18.3.1):
+    resolution: {integrity: sha512-lfp8Dve4yJagkHiFrC1bGtib3mF2ktqwPJw4/WGcgPW+pJ/AVQA5X2vI7xgp13FcxFEpYBBHpXai/N2DBNC0Jw==}
    peerDependencies:
      '@types/react': ^16.8.0 || ^17.0.0 || ^18.0.0
      react: ^16.8.0 || ^17.0.0 || ^18.0.0
@@ -10147,8 +9848,8 @@ packages:
      tslib: 2.7.0
    dev: false

-  /react-remove-scroll@2.5.5(@types/react@18.3.3)(react@18.3.1):
-    resolution: {integrity: sha512-ImKhrzJJsyXJfBZ4bzu8Bwpka14c/fQt0k+cyFp/PBhTfyDnU5hjOtM4AG/0AMyy8oKzOTR0lDgJIM7pYXI0kw==}
+  /react-remove-scroll@2.5.10(@types/react@18.3.3)(react@18.3.1):
+    resolution: {integrity: sha512-m3zvBRANPBw3qxVVjEIPEQinkcwlFZ4qyomuWVpNJdv4c6MvHfXV0C3L9Jx5rr3HeBHKNRX+1jreB5QloDIJjA==}
    engines: {node: '>=10'}
    peerDependencies:
      '@types/react': ^16.8.0 || ^17.0.0 || ^18.0.0
@@ -10166,27 +9867,8 @@ packages:
      use-sidecar: 1.1.2(@types/react@18.3.3)(react@18.3.1)
    dev: false

-  /react-remove-scroll@2.6.0(@types/react@18.3.3)(react@18.3.1):
-    resolution: {integrity: sha512-I2U4JVEsQenxDAKaVa3VZ/JeJZe0/2DxPWL8Tj8yLKctQJQiZM52pn/GWFpSp8dftjM3pSAHVJZscAnC/y+ySQ==}
-    engines: {node: '>=10'}
-    peerDependencies:
-      '@types/react': ^16.8.0 || ^17.0.0 || ^18.0.0
-      react: ^16.8.0 || ^17.0.0 || ^18.0.0
-    peerDependenciesMeta:
-      '@types/react':
-        optional: true
-    dependencies:
-      '@types/react': 18.3.3
-      react: 18.3.1
-      react-remove-scroll-bar: 2.3.6(@types/react@18.3.3)(react@18.3.1)
-      react-style-singleton: 2.2.1(@types/react@18.3.3)(react@18.3.1)
-      tslib: 2.7.0
-      use-callback-ref: 1.3.2(@types/react@18.3.3)(react@18.3.1)
-      use-sidecar: 1.1.2(@types/react@18.3.3)(react@18.3.1)
-    dev: false
-
-  /react-resizable-panels@2.1.2(react-dom@18.3.1)(react@18.3.1):
-    resolution: {integrity: sha512-Ku2Bo7JvE8RpHhl4X1uhkdeT9auPBoxAOlGTqomDUUrBAX2mVGuHYZTcWvlnJSgx0QyHIxHECgGB5XVPUbUOkQ==}
+  /react-resizable-panels@2.0.23(react-dom@18.3.1)(react@18.3.1):
+    resolution: {integrity: sha512-8ZKTwTU11t/FYwiwhMdtZYYyFxic5U5ysRu2YwfkAgDbUJXFvnWSJqhnzkSlW+mnDoNAzDCrJhdOSXBPA76wug==}
    peerDependencies:
      react: ^16.14.0 || ^17.0.0 || ^18.0.0
      react-dom: ^16.14.0 || ^17.0.0 || ^18.0.0
--- a/invokeai/frontend/web/src/features/controlLayers/konva/patterns/transparent_bg.png
+++ b/invokeai/frontend/web/src/features/controlLayers/konva/patterns/transparent_bg.png
--- a/invokeai/frontend/web/public/locales/de.json
+++ b/invokeai/frontend/web/public/locales/de.json
@@ -127,14 +127,7 @@
        "bulkDownloadRequestedDesc": "Dein Download wird vorbereitet. Dies kann ein paar Momente dauern.",
        "bulkDownloadRequestFailed": "Problem beim Download vorbereiten",
        "bulkDownloadFailed": "Download fehlgeschlagen",
-        "alwaysShowImageSizeBadge": "Zeige immer Bilder Größe Abzeichen",
-        "selectForCompare": "Zum Vergleichen auswählen",
-        "compareImage": "Bilder vergleichen",
-        "exitSearch": "Suche beenden",
-        "newestFirst": "Neueste zuerst",
-        "oldestFirst": "Älteste zuerst",
-        "openInViewer": "Im Viewer öffnen",
-        "swapImages": "Bilder tauschen"
+        "alwaysShowImageSizeBadge": "Zeige immer Bilder Größe Abzeichen"
    },
    "hotkeys": {
        "keyboardShortcuts": "Tastenkürzel",
@@ -638,8 +631,7 @@
        "archived": "Archiviert",
        "noBoards": "Kein {boardType}} Ordner",
        "hideBoards": "Ordner verstecken",
-        "viewBoards": "Ordner ansehen",
-        "deletedPrivateBoardsCannotbeRestored": "Gelöschte Boards können nicht wiederhergestellt werden. Wenn Sie „Nur Board löschen“ wählen, werden die Bilder in einen privaten, nicht kategorisierten Status für den Ersteller des Bildes versetzt."
+        "viewBoards": "Ordner ansehen"
    },
    "controlnet": {
        "showAdvanced": "Zeige Erweitert",
@@ -789,9 +781,7 @@
        "batchFieldValues": "Stapelverarbeitungswerte",
        "batchQueued": "Stapelverarbeitung eingereiht",
        "graphQueued": "Graph eingereiht",
-        "graphFailedToQueue": "Fehler beim Einreihen des Graphen",
-        "generations_one": "Generation",
-        "generations_other": "Generationen"
+        "graphFailedToQueue": "Fehler beim Einreihen des Graphen"
    },
    "metadata": {
        "negativePrompt": "Negativ Beschreibung",
@@ -1156,10 +1146,5 @@
        "noMatchingTriggers": "Keine passenden Trigger",
        "addPromptTrigger": "Prompt-Trigger hinzufügen",
        "compatibleEmbeddings": "Kompatible Einbettungen"
-    },
-    "ui": {
-        "tabs": {
-            "queue": "Warteschlange"
-        }
    }
 }
--- a/invokeai/frontend/web/public/locales/en.json
+++ b/invokeai/frontend/web/public/locales/en.json
@@ -93,7 +93,6 @@
        "copy": "Copy",
        "copyError": "$t(gallery.copy) Error",
        "on": "On",
-        "off": "Off",
        "or": "or",
        "checkpoint": "Checkpoint",
        "communityLabel": "Community",
@@ -135,7 +134,6 @@
        "nodes": "Workflows",
        "notInstalled": "Not $t(common.installed)",
        "openInNewTab": "Open in New Tab",
-        "openInViewer": "Open in Viewer",
        "orderBy": "Order By",
        "outpaint": "outpaint",
        "outputs": "Outputs",
@@ -166,10 +164,10 @@
        "alpha": "Alpha",
        "selected": "Selected",
        "tab": "Tab",
-        "view": "View",
-        "viewDesc": "Review images in a large gallery view",
-        "edit": "Edit",
-        "editDesc": "Edit on the Canvas",
+        "viewing": "Viewing",
+        "viewingDesc": "Review images in a large gallery view",
+        "editing": "Editing",
+        "editingDesc": "Edit on the Control Layers canvas",
        "comparing": "Comparing",
        "comparingDesc": "Comparing two images",
        "enabled": "Enabled",
@@ -330,13 +328,9 @@
        "completedIn": "Completed in",
        "batch": "Batch",
        "origin": "Origin",
-        "destination": "Destination",
-        "upscaling": "Upscaling",
-        "canvas": "Canvas",
-        "generation": "Generation",
-        "workflows": "Workflows",
-        "other": "Other",
-        "gallery": "Gallery",
+        "originCanvas": "Canvas",
+        "originWorkflows": "Workflows",
+        "originOther": "Other",
        "batchFieldValues": "Batch Field Values",
        "item": "Item",
        "session": "Session",
@@ -375,7 +369,6 @@
        "useCache": "Use Cache"
    },
    "gallery": {
-        "gallery": "Gallery",
        "alwaysShowImageSizeBadge": "Always Show Image Size Badge",
        "assets": "Assets",
        "autoAssignBoardOnClick": "Auto-Assign Board on Click",
@@ -388,11 +381,11 @@
        "deleteImage_one": "Delete Image",
        "deleteImage_other": "Delete {{count}} Images",
        "deleteImagePermanent": "Deleted images cannot be restored.",
-        "displayBoardSearch": "Board Search",
-        "displaySearch": "Image Search",
+        "displayBoardSearch": "Display Board Search",
+        "displaySearch": "Display Search",
        "download": "Download",
        "exitBoardSearch": "Exit Board Search",
-        "exitSearch": "Exit Image Search",
+        "exitSearch": "Exit Search",
        "featuresWillReset": "If you delete this image, those features will immediately be reset.",
        "galleryImageSize": "Image Size",
        "gallerySettings": "Gallery Settings",
@@ -438,8 +431,7 @@
        "compareHelp1": "Hold <Kbd>Alt</Kbd> while clicking a gallery image or using the arrow keys to change the compare image.",
        "compareHelp2": "Press <Kbd>M</Kbd> to cycle through comparison modes.",
        "compareHelp3": "Press <Kbd>C</Kbd> to swap the compared images.",
-        "compareHelp4": "Press <Kbd>Z</Kbd> or <Kbd>Esc</Kbd> to exit.",
-        "toggleMiniViewer": "Toggle Mini Viewer"
+        "compareHelp4": "Press <Kbd>Z</Kbd> or <Kbd>Esc</Kbd> to exit."
    },
    "hotkeys": {
        "searchHotkeys": "Search Hotkeys",
@@ -710,8 +702,6 @@
        "availableModels": "Available Models",
        "baseModel": "Base Model",
        "cancel": "Cancel",
-        "clipEmbed": "CLIP Embed",
-        "clipVision": "CLIP Vision",
        "config": "Config",
        "convert": "Convert",
        "convertingModelBegin": "Converting Model. Please wait.",
@@ -799,16 +789,13 @@
        "settings": "Settings",
        "simpleModelPlaceholder": "URL or path to a local file or diffusers folder",
        "source": "Source",
-        "spandrelImageToImage": "Image to Image (Spandrel)",
        "starterModels": "Starter Models",
-        "starterModelsInModelManager": "Starter Models can be found in Model Manager",
        "syncModels": "Sync Models",
        "textualInversions": "Textual Inversions",
        "triggerPhrases": "Trigger Phrases",
        "loraTriggerPhrases": "LoRA Trigger Phrases",
        "mainModelTriggerPhrases": "Main Model Trigger Phrases",
        "typePhraseHere": "Type phrase here",
-        "t5Encoder": "T5 Encoder",
        "upcastAttention": "Upcast Attention",
        "uploadImage": "Upload Image",
        "urlOrLocalPath": "URL or Local Path",
@@ -1018,8 +1005,6 @@
            "noModelForControlAdapter": "Control Adapter #{{number}} has no model selected.",
            "incompatibleBaseModelForControlAdapter": "Control Adapter #{{number}} model is incompatible with main model.",
            "noModelSelected": "No model selected",
-            "canvasManagerNotLoaded": "Canvas Manager not loaded",
-            "canvasBusy": "Canvas is busy",
            "noPrompts": "No prompts generated",
            "noNodesInGraph": "No nodes in graph",
            "systemDisconnected": "System disconnected",
@@ -1051,11 +1036,12 @@
        "scaledHeight": "Scaled H",
        "scaledWidth": "Scaled W",
        "scheduler": "Scheduler",
-        "seamlessXAxis": "Seamless X Axis",
-        "seamlessYAxis": "Seamless Y Axis",
+        "seamlessXAxis": "Seamless Tiling X Axis",
+        "seamlessYAxis": "Seamless Tiling Y Axis",
        "seed": "Seed",
        "imageActions": "Image Actions",
-        "sendToCanvas": "Send To Canvas",
+        "sendToImg2Img": "Send to Image to Image",
+        "sendToUnifiedCanvas": "Send To Unified Canvas",
        "sendToUpscale": "Send To Upscale",
        "showOptionsPanel": "Show Side Panel (O or T)",
        "shuffle": "Shuffle Seed",
@@ -1196,8 +1182,8 @@
        "problemSavingMaskDesc": "Unable to export mask",
        "prunedQueue": "Pruned Queue",
        "resetInitialImage": "Reset Initial Image",
-        "sentToCanvas": "Sent to Canvas",
-        "sentToUpscale": "Sent to Upscale",
+        "sentToImageToImage": "Sent To Image To Image",
+        "sentToUnifiedCanvas": "Sent to Unified Canvas",
        "serverError": "Server Error",
        "sessionRef": "Session: {{sessionId}}",
        "setAsCanvasInitialImage": "Set as canvas initial image",
@@ -1659,17 +1645,6 @@
        "storeNotInitialized": "Store is not initialized"
    },
    "controlLayers": {
-        "bookmark": "Bookmark for Quick Switch",
-        "fitBboxToLayers": "Fit Bbox To Layers",
-        "removeBookmark": "Remove Bookmark",
-        "saveCanvasToGallery": "Save Canvas To Gallery",
-        "saveBboxToGallery": "Save Bbox To Gallery",
-        "savedToGalleryOk": "Saved to Gallery",
-        "savedToGalleryError": "Error saving to gallery",
-        "mergeVisible": "Merge Visible",
-        "mergeVisibleOk": "Merged visible layers",
-        "mergeVisibleError": "Error merging visible layers",
-        "clearHistory": "Clear History",
        "generateMode": "Generate",
        "generateModeDesc": "Create individual images. Generated images are added directly to the gallery.",
        "composeMode": "Compose",
@@ -1677,10 +1652,10 @@
        "autoSave": "Auto-save to Gallery",
        "resetCanvas": "Reset Canvas",
        "resetAll": "Reset All",
+        "deleteAll": "Delete All",
        "clearCaches": "Clear Caches",
        "recalculateRects": "Recalculate Rects",
        "clipToBbox": "Clip Strokes to Bbox",
-        "compositeMaskedRegions": "Composite Masked Regions",
        "addLayer": "Add Layer",
        "duplicate": "Duplicate",
        "moveToFront": "Move to Front",
@@ -1699,49 +1674,35 @@
        "deletePrompt": "Delete Prompt",
        "resetRegion": "Reset Region",
        "debugLayers": "Debug Layers",
-        "showHUD": "Show HUD",
        "rectangle": "Rectangle",
-        "maskFill": "Mask Fill",
+        "maskPreviewColor": "Mask Preview Color",
        "addPositivePrompt": "Add $t(common.positivePrompt)",
        "addNegativePrompt": "Add $t(common.negativePrompt)",
        "addIPAdapter": "Add $t(common.ipAdapter)",
-        "addRasterLayer": "Add $t(controlLayers.rasterLayer)",
-        "addControlLayer": "Add $t(controlLayers.controlLayer)",
-        "addInpaintMask": "Add $t(controlLayers.inpaintMask)",
-        "addRegionalGuidance": "Add $t(controlLayers.regionalGuidance)",
        "regionalGuidanceLayer": "$t(controlLayers.regionalGuidance) $t(unifiedCanvas.layer)",
        "raster": "Raster",
-        "rasterLayer": "Raster Layer",
-        "controlLayer": "Control Layer",
-        "inpaintMask": "Inpaint Mask",
-        "regionalGuidance": "Regional Guidance",
-        "ipAdapter": "IP Adapter",
-        "sendingToCanvas": "Sending to Canvas",
-        "sendingToGallery": "Sending to Gallery",
-        "sendToGallery": "Send To Gallery",
-        "sendToGalleryDesc": "Generations will be sent to the gallery.",
-        "sendToCanvas": "Send To Canvas",
-        "sendToCanvasDesc": "Generations will be staged onto the canvas.",
-        "rasterLayer_withCount_one": "$t(controlLayers.rasterLayer)",
-        "controlLayer_withCount_one": "$t(controlLayers.controlLayer)",
-        "inpaintMask_withCount_one": "$t(controlLayers.inpaintMask)",
-        "regionalGuidance_withCount_one": "$t(controlLayers.regionalGuidance)",
-        "ipAdapter_withCount_one": "$t(controlLayers.ipAdapter)",
-        "rasterLayer_withCount_other": "Raster Layers",
-        "controlLayer_withCount_other": "Control Layers",
-        "inpaintMask_withCount_other": "Inpaint Masks",
-        "regionalGuidance_withCount_other": "Regional Guidance",
-        "ipAdapter_withCount_other": "IP Adapters",
+        "rasterLayer_one": "Raster Layer",
+        "controlLayer_one": "Control Layer",
+        "inpaintMask_one": "Inpaint Mask",
+        "regionalGuidance_one": "Regional Guidance",
+        "ipAdapter_one": "IP Adapter",
+        "rasterLayer_other": "Raster Layers",
+        "controlLayer_other": "Control Layers",
+        "inpaintMask_other": "Inpaint Masks",
+        "regionalGuidance_other": "Regional Guidance",
+        "ipAdapter_other": "IP Adapters",
        "opacity": "Opacity",
        "regionalGuidance_withCount_hidden": "Regional Guidance ({{count}} hidden)",
+        "controlAdapters_withCount_hidden": "Control Adapters ({{count}} hidden)",
        "controlLayers_withCount_hidden": "Control Layers ({{count}} hidden)",
        "rasterLayers_withCount_hidden": "Raster Layers ({{count}} hidden)",
-        "globalIPAdapters_withCount_hidden": "Global IP Adapters ({{count}} hidden)",
+        "ipAdapters_withCount_hidden": "IP Adapters ({{count}} hidden)",
        "inpaintMasks_withCount_hidden": "Inpaint Masks ({{count}} hidden)",
        "regionalGuidance_withCount_visible": "Regional Guidance ({{count}})",
+        "controlAdapters_withCount_visible": "Control Adapters ({{count}})",
        "controlLayers_withCount_visible": "Control Layers ({{count}})",
        "rasterLayers_withCount_visible": "Raster Layers ({{count}})",
-        "globalIPAdapters_withCount_visible": "Global IP Adapters ({{count}})",
+        "ipAdapters_withCount_visible": "IP Adapters ({{count}})",
        "inpaintMasks_withCount_visible": "Inpaint Masks ({{count}})",
        "globalControlAdapter": "Global $t(controlnet.controlAdapter_one)",
        "globalControlAdapterLayer": "Global $t(controlnet.controlAdapter_one) $t(unifiedCanvas.layer)",
@@ -1754,8 +1715,8 @@
        "clearProcessor": "Clear Processor",
        "resetProcessor": "Reset Processor to Defaults",
        "noLayersAdded": "No Layers Added",
-        "layer_one": "Layer",
-        "layer_other": "Layers",
+        "layers_one": "Layer",
+        "layers_other": "Layers",
        "objects_zero": "empty",
        "objects_one": "{{count}} object",
        "objects_other": "{{count}} objects",
@@ -1768,14 +1729,7 @@
        "showingType": "Showing {{type}}",
        "dynamicGrid": "Dynamic Grid",
        "logDebugInfo": "Log Debug Info",
-        "locked": "Locked",
-        "unlocked": "Unlocked",
-        "deleteSelected": "Delete Selected",
-        "deleteAll": "Delete All",
-        "flipHorizontal": "Flip Horizontal",
-        "flipVertical": "Flip Vertical",
        "fill": {
-            "fillColor": "Fill Color",
            "fillStyle": "Fill Style",
            "solid": "Solid",
            "grid": "Grid",
@@ -1791,54 +1745,16 @@
            "bbox": "Bbox",
            "move": "Move",
            "view": "View",
+            "transform": "Transform",
            "colorPicker": "Color Picker"
        },
        "filter": {
            "filter": "Filter",
            "filters": "Filters",
            "filterType": "Filter Type",
-            "autoProcess": "Auto Process",
-            "reset": "Reset",
-            "process": "Process",
-            "apply": "Apply",
-            "cancel": "Cancel",
-            "spandrel": {
-                "label": "Image-to-Image Model",
-                "description": "Run an image-to-image model on the selected layer.",
-                "paramModel": "Model",
-                "paramAutoScale": "Auto Scale",
-                "paramAutoScaleDesc": "The selected model will be run until the target scale is reached.",
-                "paramScale": "Target Scale"
-            }
-        },
-        "transform": {
-            "transform": "Transform",
-            "fitToBbox": "Fit to Bbox",
-            "reset": "Reset",
+            "preview": "Preview",
            "apply": "Apply",
            "cancel": "Cancel"
-        },
-        "settings": {
-            "snapToGrid": {
-                "label": "Snap to Grid",
-                "on": "On",
-                "off": "Off"
-            }
-        },
-        "HUD": {
-            "bbox": "Bbox",
-            "scaledBbox": "Scaled Bbox",
-            "autoSave": "Auto Save",
-            "entityStatus": {
-                "selectedEntity": "Selected Entity",
-                "selectedEntityIs": "Selected Entity is",
-                "isFiltering": "is filtering",
-                "isTransforming": "is transforming",
-                "isLocked": "is locked",
-                "isHidden": "is hidden",
-                "isDisabled": "is disabled",
-                "enabled": "Enabled"
-            }
        }
    },
    "upscaling": {
--- a/invokeai/frontend/web/public/locales/es.json
+++ b/invokeai/frontend/web/public/locales/es.json
@@ -86,15 +86,15 @@
        "loadMore": "Cargar más",
        "noImagesInGallery": "No hay imágenes para mostrar",
        "deleteImage_one": "Eliminar Imagen",
-        "deleteImage_many": "Eliminar {{count}} Imágenes",
-        "deleteImage_other": "Eliminar {{count}} Imágenes",
+        "deleteImage_many": "",
+        "deleteImage_other": "",
        "deleteImagePermanent": "Las imágenes eliminadas no se pueden restaurar.",
        "assets": "Activos",
        "autoAssignBoardOnClick": "Asignación automática de tableros al hacer clic"
    },
    "hotkeys": {
        "keyboardShortcuts": "Atajos de teclado",
-        "appHotkeys": "Atajos de aplicación",
+        "appHotkeys": "Atajos de applicación",
        "generalHotkeys": "Atajos generales",
        "galleryHotkeys": "Atajos de galería",
        "unifiedCanvasHotkeys": "Atajos de lienzo unificado",
@@ -535,7 +535,7 @@
        "bottomMessage": "Al eliminar este panel y las imágenes que contiene, se restablecerán las funciones que los estén utilizando actualmente.",
        "deleteBoardAndImages": "Borrar el panel y las imágenes",
        "loading": "Cargando...",
-        "deletedBoardsCannotbeRestored": "Los paneles eliminados no se pueden restaurar. Al Seleccionar 'Borrar Solo el Panel' transferirá las imágenes a un estado sin categorizar.",
+        "deletedBoardsCannotbeRestored": "Los paneles eliminados no se pueden restaurar",
        "move": "Mover",
        "menuItemAutoAdd": "Agregar automáticamente a este panel",
        "searchBoard": "Buscando paneles…",
@@ -549,13 +549,7 @@
        "imagesWithCount_other": "{{count}} imágenes",
        "assetsWithCount_one": "{{count}} activo",
        "assetsWithCount_many": "{{count}} activos",
-        "assetsWithCount_other": "{{count}} activos",
-        "hideBoards": "Ocultar Paneles",
-        "addPrivateBoard": "Agregar un tablero privado",
-        "addSharedBoard": "Agregar Panel Compartido",
-        "boards": "Paneles",
-        "archiveBoard": "Archivar Panel",
-        "archived": "Archivado"
+        "assetsWithCount_other": "{{count}} activos"
    },
    "accordions": {
        "compositing": {
--- a/Show More
+++ b/Show More