Enable LoRAPatcher.apply_smart_lora_patches(...) throughout the stack.

(minor) Rename num_layers -> num_loras in unit tests.
Add test_apply_smart_lora_patches_to_partially_loaded_model(...).
2026-01-25 07:28:18 -05:00 · 2024-12-12 22:41:50 +00:00 · 2024-12-12 22:41:50 +00:00 · 2024-12-12 22:41:50 +00:00 · 2024-12-12 22:41:50 +00:00 · 2024-12-12 22:41:46 +00:00
130 changed files with 2693 additions and 592055 deletions
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -2,29 +2,42 @@

 ## Builder stage

-FROM library/ubuntu:23.04 AS builder
+FROM library/ubuntu:24.04 AS builder

 ARG DEBIAN_FRONTEND=noninteractive
 RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    --mount=type=cache,target=/var/lib/apt,sharing=locked \
    apt update && apt-get install -y \
-        git \
-        python3-venv \
-        python3-pip \
-        build-essential
+        build-essential \
+        git

-ENV INVOKEAI_SRC=/opt/invokeai
-ENV VIRTUAL_ENV=/opt/venv/invokeai
+# Install `uv` for package management
+COPY --from=ghcr.io/astral-sh/uv:0.5.5 /uv /uvx /bin/

+ENV VIRTUAL_ENV=/opt/venv
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+ENV INVOKEAI_SRC=/opt/invokeai
+ENV PYTHON_VERSION=3.11
+ENV UV_COMPILE_BYTECODE=1
+ENV UV_LINK_MODE=copy
+
 ARG GPU_DRIVER=cuda
 ARG TARGETPLATFORM="linux/amd64"
 # unused but available
 ARG BUILDPLATFORM

-WORKDIR ${INVOKEAI_SRC}
+# Switch to the `ubuntu` user to work around dependency issues with uv-installed python
+RUN mkdir -p ${VIRTUAL_ENV} && \
+    mkdir -p ${INVOKEAI_SRC} && \
+    chmod -R a+w /opt
+USER ubuntu

+# Install python and create the venv
+RUN uv python install ${PYTHON_VERSION} && \
+    uv venv --relocatable --prompt "invoke" --python ${PYTHON_VERSION} ${VIRTUAL_ENV}
+
+WORKDIR ${INVOKEAI_SRC}
 COPY invokeai ./invokeai
 COPY pyproject.toml ./

@@ -32,25 +45,18 @@ COPY pyproject.toml ./
 # the local working copy can be bind-mounted into the image
 # at path defined by ${INVOKEAI_SRC}
 # NOTE: there are no pytorch builds for arm64 + cuda, only cpu
-# x86_64/CUDA is default
-RUN --mount=type=cache,target=/root/.cache/pip \
-    python3 -m venv ${VIRTUAL_ENV} &&\
+# x86_64/CUDA is the default
+RUN --mount=type=cache,target=/home/ubuntu/.cache/uv,uid=1000,gid=1000 \
    if [ "$TARGETPLATFORM" = "linux/arm64" ] || [ "$GPU_DRIVER" = "cpu" ]; then \
        extra_index_url_arg="--extra-index-url https://download.pytorch.org/whl/cpu"; \
    elif [ "$GPU_DRIVER" = "rocm" ]; then \
        extra_index_url_arg="--extra-index-url https://download.pytorch.org/whl/rocm6.1"; \
    else \
        extra_index_url_arg="--extra-index-url https://download.pytorch.org/whl/cu124"; \
-    fi &&\
+    fi && \
+    uv pip install --python ${PYTHON_VERSION} $extra_index_url_arg -e "."

-    # xformers + triton fails to install on arm64
-    if [ "$GPU_DRIVER" = "cuda" ] && [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
-        pip install $extra_index_url_arg -e ".[xformers]"; \
-    else \
-        pip install $extra_index_url_arg -e "."; \
-    fi
-
-# #### Build the Web UI ------------------------------------
+#### Build the Web UI ------------------------------------

 FROM node:20-slim AS web-builder
 ENV PNPM_HOME="/pnpm"
@@ -66,7 +72,7 @@ RUN npx vite build

 #### Runtime stage ---------------------------------------

-FROM library/ubuntu:23.04 AS runtime
+FROM library/ubuntu:24.04 AS runtime

 ARG DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED=1
@@ -83,17 +89,16 @@ RUN apt update && apt install -y --no-install-recommends \
        gosu \
        magic-wormhole \
        libglib2.0-0 \
-        libgl1-mesa-glx \
-        python3-venv \
-        python3-pip \
+        libgl1 \
+        libglx-mesa0 \
        build-essential \
        libopencv-dev \
        libstdc++-10-dev &&\
    apt-get clean && apt-get autoclean

-
 ENV INVOKEAI_SRC=/opt/invokeai
-ENV VIRTUAL_ENV=/opt/venv/invokeai
+ENV VIRTUAL_ENV=/opt/venv
+ENV PYTHON_VERSION=3.11
 ENV INVOKEAI_ROOT=/invokeai
 ENV INVOKEAI_HOST=0.0.0.0
 ENV INVOKEAI_PORT=9090
@@ -101,6 +106,14 @@ ENV PATH="$VIRTUAL_ENV/bin:$INVOKEAI_SRC:$PATH"
 ENV CONTAINER_UID=${CONTAINER_UID:-1000}
 ENV CONTAINER_GID=${CONTAINER_GID:-1000}

+# Install `uv` for package management
+# and install python for the ubuntu user (expected to exist on ubuntu >=24.x)
+# this is too tiny to optimize with multi-stage builds, but maybe we'll come back to it
+COPY --from=ghcr.io/astral-sh/uv:0.5.5 /uv /uvx /bin/
+USER ubuntu
+RUN uv python install ${PYTHON_VERSION}
+USER root
+
 # --link requires buldkit w/ dockerfile syntax 1.4
 COPY --link --from=builder ${INVOKEAI_SRC} ${INVOKEAI_SRC}
 COPY --link --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
@@ -115,7 +128,7 @@ WORKDIR ${INVOKEAI_SRC}

 # build patchmatch
 RUN cd /usr/lib/$(uname -p)-linux-gnu/pkgconfig/ && ln -sf opencv4.pc opencv.pc
-RUN python3 -c "from patchmatch import patch_match"
+RUN python -c "from patchmatch import patch_match"

 RUN mkdir -p ${INVOKEAI_ROOT} && chown -R ${CONTAINER_UID}:${CONTAINER_GID} ${INVOKEAI_ROOT}

--- a/docker/docker-entrypoint.sh
+++ b/docker/docker-entrypoint.sh
@@ -16,6 +16,9 @@ set -e -o pipefail

 USER_ID=${CONTAINER_UID:-1000}
 USER=ubuntu
+# if the user does not exist, create it. It is expected to be present on ubuntu >=24.x
+_=$(id ${USER} 2>&1) || useradd -u ${USER_ID} ${USER}
+# ensure the UID is correct
 usermod -u ${USER_ID} ${USER} 1>/dev/null

 ### Set the $PUBLIC_KEY env var to enable SSH access.
@@ -36,6 +39,8 @@ fi
 mkdir -p "${INVOKEAI_ROOT}"
 chown --recursive ${USER} "${INVOKEAI_ROOT}" || true
 cd "${INVOKEAI_ROOT}"
+export HF_HOME=${HF_HOME:-$INVOKEAI_ROOT/.cache/huggingface}
+export MPLCONFIGDIR=${MPLCONFIGDIR:-$INVOKEAI_ROOT/.matplotlib}

 # Run the CMD as the Container User (not root).
 exec gosu ${USER} "$@"
--- a/invokeai/app/invocations/init.py
+++ b/invokeai/app/invocations/init.py
@@ -15,6 +15,11 @@ custom_nodes_readme_path = str(custom_nodes_path / "README.md")
 shutil.copy(Path(__file__).parent / "custom_nodes/init.py", custom_nodes_init_path)
 shutil.copy(Path(__file__).parent / "custom_nodes/README.md", custom_nodes_readme_path)

+# set the same permissions as the destination directory, in case our source is read-only,
+# so that the files are user-writable
+for p in custom_nodes_path.glob("**/*"):
+    p.chmod(custom_nodes_path.stat().st_mode)
+
 # Import custom nodes, see https://docs.python.org/3/library/importlib.html#importing-programmatically
 spec = spec_from_file_location("custom_nodes", custom_nodes_init_path)
 if spec is None or spec.loader is None:
--- a/invokeai/app/invocations/compel.py
+++ b/invokeai/app/invocations/compel.py
@@ -82,10 +82,11 @@ class CompelInvocation(BaseInvocation):
            # apply all patches while the model is on the target device
            text_encoder_info.model_on_device() as (cached_weights, text_encoder),
            tokenizer_info as tokenizer,
-            LoRAPatcher.apply_lora_patches(
+            LoRAPatcher.apply_smart_lora_patches(
                model=text_encoder,
                patches=_lora_loader(),
                prefix="lora_te_",
+                dtype=TorchDevice.choose_torch_dtype(),
                cached_weights=cached_weights,
            ),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
@@ -179,10 +180,11 @@ class SDXLPromptInvocationBase:
            # apply all patches while the model is on the target device
            text_encoder_info.model_on_device() as (cached_weights, text_encoder),
            tokenizer_info as tokenizer,
-            LoRAPatcher.apply_lora_patches(
+            LoRAPatcher.apply_smart_lora_patches(
                text_encoder,
                patches=_lora_loader(),
                prefix=lora_prefix,
+                dtype=TorchDevice.choose_torch_dtype(),
                cached_weights=cached_weights,
            ),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
--- a/invokeai/app/invocations/denoise_latents.py
+++ b/invokeai/app/invocations/denoise_latents.py
@@ -1003,10 +1003,11 @@ class DenoiseLatentsInvocation(BaseInvocation):
            ModelPatcher.apply_freeu(unet, self.unet.freeu_config),
            SeamlessExt.static_patch_model(unet, self.unet.seamless_axes),  # FIXME
            # Apply the LoRA after unet has been moved to its target device for faster patching.
-            LoRAPatcher.apply_lora_patches(
+            LoRAPatcher.apply_smart_lora_patches(
                model=unet,
                patches=_lora_loader(),
                prefix="lora_unet_",
+                dtype=unet.dtype,
                cached_weights=cached_weights,
            ),
        ):
--- a/invokeai/app/invocations/fields.py
+++ b/invokeai/app/invocations/fields.py
@@ -56,6 +56,7 @@ class UIType(str, Enum, metaclass=MetaEnum):
    CLIPLEmbedModel = "CLIPLEmbedModelField"
    CLIPGEmbedModel = "CLIPGEmbedModelField"
    SpandrelImageToImageModel = "SpandrelImageToImageModelField"
+    StructuralLoRAModel = "StructuralLoRAModelField"
    # endregion

    # region Misc Field Types
@@ -143,6 +144,7 @@ class FieldDescriptions:
    controlnet_model = "ControlNet model to load"
    vae_model = "VAE model to load"
    lora_model = "LoRA model to load"
+    structural_lora_model = "Structural LoRA model to load"
    main_model = "Main model (UNet, VAE, CLIP) to load"
    flux_model = "Flux model (Transformer) to load"
    sd3_model = "SD3 model (MMDiTX) to load"
--- a/invokeai/app/invocations/flux_denoise.py
+++ b/invokeai/app/invocations/flux_denoise.py
@@ -1,5 +1,5 @@
 from contextlib import ExitStack
-from typing import Callable, Iterator, Optional, Tuple
+from typing import Callable, Iterator, Optional, Tuple, Union

 import numpy as np
 import numpy.typing as npt
@@ -8,6 +8,8 @@ import torchvision.transforms as tv_transforms
 from torchvision.transforms.functional import resize as tv_resize
 from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection

+from invokeai.backend.flux.modules.autoencoder import AutoEncoder
+
 from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
 from invokeai.app.invocations.fields import (
    DenoiseMaskField,
@@ -22,7 +24,7 @@ from invokeai.app.invocations.fields import (
 )
 from invokeai.app.invocations.flux_controlnet import FluxControlNetField
 from invokeai.app.invocations.ip_adapter import IPAdapterField
-from invokeai.app.invocations.model import TransformerField, VAEField
+from invokeai.app.invocations.model import TransformerField, VAEField, StructuralLoRAField, LoRAField
 from invokeai.app.invocations.primitives import LatentsOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.backend.flux.controlnet.instantx_controlnet_flux import InstantXControlNetFlux
@@ -43,6 +45,8 @@ from invokeai.backend.flux.sampling_utils import (
    pack,
    unpack,
 )
+from invokeai.backend.flux.flux_tools_sampling_utils import prepare_control
+from invokeai.backend.flux.modules.conditioner import HFEncoder
 from invokeai.backend.flux.text_conditioning import FluxTextConditioning
 from invokeai.backend.lora.conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
 from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
@@ -284,6 +288,16 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
                dtype=inference_dtype,
                device=x.device,
            )
+            img_cond = None
+            if struct_lora := self.transformer.structural_lora:
+                # What should we do when we have multiple of these?
+                if not self.controlnet_vae:
+                    raise ValueError("controlnet_vae must be set when using a strutural lora")
+                ae_info = context.models.load(self.controlnet_vae.vae)
+                img = context.images.get_pil(struct_lora.img.image_name)
+                with ae_info as ae:
+                    assert isinstance(ae, AutoEncoder)
+                    img_cond = prepare_control(self.height, self.width, self.seed, ae, img)

            # Load the transformer model.
            (cached_weights, transformer) = exit_stack.enter_context(transformer_info.model_on_device())
@@ -296,10 +310,11 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
            if config.format in [ModelFormat.Checkpoint]:
                # The model is non-quantized, so we can apply the LoRA weights directly into the model.
                exit_stack.enter_context(
-                    LoRAPatcher.apply_lora_patches(
+                    LoRAPatcher.apply_smart_lora_patches(
                        model=transformer,
                        patches=self._lora_iterator(context),
                        prefix=FLUX_LORA_TRANSFORMER_PREFIX,
+                        dtype=inference_dtype,
                        cached_weights=cached_weights,
                    )
                )
@@ -311,7 +326,7 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
                # The model is quantized, so apply the LoRA weights as sidecar layers. This results in slower inference,
                # than directly patching the weights, but is agnostic to the quantization format.
                exit_stack.enter_context(
-                    LoRAPatcher.apply_lora_sidecar_patches(
+                    LoRAPatcher.apply_lora_wrapper_patches(
                        model=transformer,
                        patches=self._lora_iterator(context),
                        prefix=FLUX_LORA_TRANSFORMER_PREFIX,
@@ -345,6 +360,7 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
                controlnet_extensions=controlnet_extensions,
                pos_ip_adapter_extensions=pos_ip_adapter_extensions,
                neg_ip_adapter_extensions=neg_ip_adapter_extensions,
+                img_cond=img_cond
            )

        x = unpack(x.float(), self.height, self.width)
@@ -682,7 +698,10 @@ class FluxDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
        return pos_ip_adapter_extensions, neg_ip_adapter_extensions

    def _lora_iterator(self, context: InvocationContext) -> Iterator[Tuple[LoRAModelRaw, float]]:
-        for lora in self.transformer.loras:
+        loras: list[Union[LoRAField, StructuralLoRAField]] = [*self.transformer.loras]
+        if self.transformer.structural_lora:
+            loras.append(self.transformer.structural_lora)
+        for lora in loras:
            lora_info = context.models.load(lora.lora)
            assert isinstance(lora_info.model, LoRAModelRaw)
            yield (lora_info.model, lora.weight)
--- a/invokeai/app/invocations/flux_model_loader.py
+++ b/invokeai/app/invocations/flux_model_loader.py
@@ -81,8 +81,8 @@ class FluxModelLoaderInvocation(BaseInvocation):
        assert isinstance(transformer_config, CheckpointConfigBase)

        return FluxModelLoaderOutput(
-            transformer=TransformerField(transformer=transformer, loras=[]),
-            clip=CLIPField(tokenizer=tokenizer, text_encoder=clip_encoder, loras=[], skipped_layers=0),
+            transformer=TransformerField(transformer=transformer, loras=[], structural_loras=[]),
+            clip=CLIPField(tokenizer=tokenizer, text_encoder=clip_encoder, loras=[], structural_loras=[], skipped_layers=0),
            t5_encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder),
            vae=VAEField(vae=vae),
            max_seq_len=max_seq_lengths[transformer_config.config_path],
--- a/invokeai/app/invocations/flux_structural_lora_loader.py
+++ b/invokeai/app/invocations/flux_structural_lora_loader.py
@@ -0,0 +1,70 @@
+from typing import Optional, Literal
+
+from invokeai.app.invocations.baseinvocation import (
+    BaseInvocation,
+    BaseInvocationOutput,
+    Classification,
+    invocation,
+    invocation_output,
+)
+from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType, ImageField
+from invokeai.app.invocations.model import VAEField, StructuralLoRAField, ModelIdentifierField, TransformerField
+from invokeai.app.services.shared.invocation_context import InvocationContext
+
+
+@invocation_output("flux_structural_lora_loader_output")
+class FluxStructuralLoRALoaderOutput(BaseInvocationOutput):
+    """Flux Structural LoRA Loader Output"""
+
+    transformer: Optional[TransformerField] = OutputField(
+        default=None, description=FieldDescriptions.transformer, title="FLUX Transformer"
+    )
+
+
+@invocation(
+    "flux_structural_lora_loader",
+    title="Flux Structural LoRA",
+    tags=["lora", "model", "flux"],
+    category="model",
+    version="1.1.0",
+    classification=Classification.Prototype,
+)
+class FluxStructuralLoRALoaderInvocation(BaseInvocation):
+    """Apply a LoRA model to a FLUX transformer and/or text encoder."""
+
+    lora: ModelIdentifierField = InputField(
+        description=FieldDescriptions.structural_lora_model, title="Structural LoRA", ui_type=UIType.StructuralLoRAModel
+    )
+    transformer: TransformerField | None = InputField(
+        default=None,
+        description=FieldDescriptions.transformer,
+        input=Input.Connection,
+        title="FLUX Transformer",
+    )
+    image: ImageField = InputField(
+        description="The image to encode.",
+    )
+    weight: float = InputField(default=0.75, description=FieldDescriptions.lora_weight)
+
+    def invoke(self, context: InvocationContext) -> FluxStructuralLoRALoaderOutput:
+        lora_key = self.lora.key
+
+        if not context.models.exists(lora_key):
+            raise ValueError(f"Unknown lora: {lora_key}!")
+
+        # Check for existing LoRAs with the same key.
+        if self.transformer and self.transformer.structural_lora and self.transformer.structural_lora.lora.key == lora_key:
+            raise ValueError(f'Structural LoRA "{lora_key}" already applied to transformer.')
+
+        output = FluxStructuralLoRALoaderOutput()
+
+        # Attach LoRA layers to the models.
+        if self.transformer is not None:
+            output.transformer = self.transformer.model_copy(deep=True)
+            output.transformer.structural_lora = StructuralLoRAField(
+                lora=self.lora,
+                img=self.image,
+                weight=self.weight,
+            )
+
+        return output
--- a/invokeai/app/invocations/flux_text_encoder.py
+++ b/invokeai/app/invocations/flux_text_encoder.py
@@ -22,6 +22,7 @@ from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
 from invokeai.backend.lora.lora_patcher import LoRAPatcher
 from invokeai.backend.model_manager.config import ModelFormat
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, FLUXConditioningInfo
+from invokeai.backend.util.devices import TorchDevice


@invocation(
@@ -111,10 +112,11 @@ class FluxTextEncoderInvocation(BaseInvocation):
            if clip_text_encoder_config.format in [ModelFormat.Diffusers]:
                # The model is non-quantized, so we can apply the LoRA weights directly into the model.
                exit_stack.enter_context(
-                    LoRAPatcher.apply_lora_patches(
+                    LoRAPatcher.apply_smart_lora_patches(
                        model=clip_text_encoder,
                        patches=self._clip_lora_iterator(context),
                        prefix=FLUX_LORA_CLIP_PREFIX,
+                        dtype=TorchDevice.choose_torch_dtype(),
                        cached_weights=cached_weights,
                    )
                )
--- a/invokeai/app/invocations/model.py
+++ b/invokeai/app/invocations/model.py
@@ -1,5 +1,5 @@
 import copy
-from typing import List, Optional
+from typing import List, Optional, Literal

 from pydantic import BaseModel, Field

@@ -10,7 +10,7 @@ from invokeai.app.invocations.baseinvocation import (
    invocation,
    invocation_output,
 )
-from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType
+from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType, ImageField
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.shared.models import FreeUConfig
 from invokeai.backend.model_manager.config import (
@@ -65,11 +65,6 @@ class CLIPField(BaseModel):
    loras: List[LoRAField] = Field(description="LoRAs to apply on model loading")


-class TransformerField(BaseModel):
-    transformer: ModelIdentifierField = Field(description="Info to load Transformer submodel")
-    loras: List[LoRAField] = Field(description="LoRAs to apply on model loading")
-
-
 class T5EncoderField(BaseModel):
    tokenizer: ModelIdentifierField = Field(description="Info to load tokenizer submodel")
    text_encoder: ModelIdentifierField = Field(description="Info to load text_encoder submodel")
@@ -79,6 +74,13 @@ class VAEField(BaseModel):
    vae: ModelIdentifierField = Field(description="Info to load vae submodel")
    seamless_axes: List[str] = Field(default_factory=list, description='Axes("x" and "y") to which apply seamless')

+class StructuralLoRAField(LoRAField):
+    img: ImageField = Field(description="Image to use in structural conditioning")
+
+class TransformerField(BaseModel):
+    transformer: ModelIdentifierField = Field(description="Info to load Transformer submodel")
+    loras: List[LoRAField] = Field(description="LoRAs to apply on model loading")
+    structural_lora: Optional[StructuralLoRAField] = Field(description="Structural LoRAs to apply on model loading", default=None)

@invocation_output("unet_output")
 class UNetOutput(BaseInvocationOutput):
--- a/invokeai/app/invocations/sd3_text_encoder.py
+++ b/invokeai/app/invocations/sd3_text_encoder.py
@@ -21,6 +21,7 @@ from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
 from invokeai.backend.lora.lora_patcher import LoRAPatcher
 from invokeai.backend.model_manager.config import ModelFormat
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData, SD3ConditioningInfo
+from invokeai.backend.util.devices import TorchDevice

 # The SD3 T5 Max Sequence Length set based on the default in diffusers.
 SD3_T5_MAX_SEQ_LEN = 256
@@ -150,10 +151,11 @@ class Sd3TextEncoderInvocation(BaseInvocation):
            if clip_text_encoder_config.format in [ModelFormat.Diffusers]:
                # The model is non-quantized, so we can apply the LoRA weights directly into the model.
                exit_stack.enter_context(
-                    LoRAPatcher.apply_lora_patches(
+                    LoRAPatcher.apply_smart_lora_patches(
                        model=clip_text_encoder,
                        patches=self._clip_lora_iterator(context, clip_model),
                        prefix=FLUX_LORA_CLIP_PREFIX,
+                        dtype=TorchDevice.choose_torch_dtype(),
                        cached_weights=cached_weights,
                    )
                )
--- a/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py
+++ b/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py
@@ -207,7 +207,9 @@ class TiledMultiDiffusionDenoiseLatents(BaseInvocation):
        with (
            ExitStack() as exit_stack,
            unet_info as unet,
-            LoRAPatcher.apply_lora_patches(model=unet, patches=_lora_loader(), prefix="lora_unet_"),
+            LoRAPatcher.apply_smart_lora_patches(
+                model=unet, patches=_lora_loader(), prefix="lora_unet_", dtype=unet.dtype
+            ),
        ):
            assert isinstance(unet, UNet2DConditionModel)
            latents = latents.to(device=unet.device, dtype=unet.dtype)
--- a/invokeai/app/services/config/config_default.py
+++ b/invokeai/app/services/config/config_default.py
@@ -4,6 +4,7 @@
 from __future__ import annotations

 import copy
+import filecmp
 import locale
 import os
 import re
@@ -525,9 +526,35 @@ def get_config() -> InvokeAIAppConfig:
    ]
    example_config.write_file(config.config_file_path.with_suffix(".example.yaml"), as_example=True)

-    # Copy all legacy configs - We know `__path__[0]` is correct here
+    # Copy all legacy configs only if needed
+    # We know `__path__[0]` is correct here
    configs_src = Path(model_configs.__path__[0])  # pyright: ignore [reportUnknownMemberType, reportUnknownArgumentType, reportAttributeAccessIssue]
-    shutil.copytree(configs_src, config.legacy_conf_path, dirs_exist_ok=True)
+    dest_path = config.legacy_conf_path
+
+    # Create destination (we don't need to check for existence)
+    dest_path.mkdir(parents=True, exist_ok=True)
+
+    # Compare directories recursively
+    comparison = filecmp.dircmp(configs_src, dest_path)
+    need_copy = any(
+        [
+            comparison.left_only,  # Files exist only in source
+            comparison.diff_files,  # Files that differ
+            comparison.common_funny,  # Files that couldn't be compared
+        ]
+    )
+
+    if need_copy:
+        # Get permissions from destination directory
+        dest_mode = dest_path.stat().st_mode
+
+        # Copy directory tree
+        shutil.copytree(configs_src, dest_path, dirs_exist_ok=True)
+
+        # Set permissions on copied files to match destination directory
+        dest_path.chmod(dest_mode)
+        for p in dest_path.glob("**/*"):
+            p.chmod(dest_mode)

    if config.config_file_path.exists():
        config_from_file = load_and_migrate_config(config.config_file_path)
--- a/invokeai/app/services/session_processor/session_processor_default.py
+++ b/invokeai/app/services/session_processor/session_processor_default.py
@@ -379,7 +379,7 @@ class DefaultSessionProcessor(SessionProcessorBase):

    async def _on_queue_item_status_changed(self, event: FastAPIEvent[QueueItemStatusChangedEvent]) -> None:
        # Make sure the cancel event is for the currently processing queue item
-        if self._queue_item and self._queue_item.item_id is not event[1].item_id:
+        if self._queue_item and self._queue_item.item_id != event[1].item_id:
            return
        if self._queue_item and event[1].status in ["completed", "failed", "canceled"]:
            # When the queue item is canceled via HTTP, the queue item status is set to `"canceled"` and this event is
--- a/invokeai/app/services/shared/sqlite_migrator/migrations/migration_11.py
+++ b/invokeai/app/services/shared/sqlite_migrator/migrations/migration_11.py
@@ -35,7 +35,7 @@ class Migration11Callback:

    def _remove_convert_cache(self) -> None:
        """Rename models/.cache to models/.convert_cache."""
-        self._logger.info("Removing .cache directory. Converted models will now be cached in .convert_cache.")
+        self._logger.info("Removing models/.cache directory. Converted models will now be cached in .convert_cache.")
        legacy_convert_path = self._app_config.root_path / "models" / ".cache"
        shutil.rmtree(legacy_convert_path, ignore_errors=True)

--- a/invokeai/backend/assets/model_base_conf_files/controlnet_sd15/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/controlnet_sd15/config.json
@@ -1,42 +0,0 @@
-{
-  "_class_name": "ControlNetModel",
-  "_diffusers_version": "0.16.0.dev0",
-  "_name_or_path": "controlnet_v1_1/control_v11p_sd15_canny",
-  "act_fn": "silu",
-  "attention_head_dim": 8,
-  "block_out_channels": [
-    320,
-    640,
-    1280,
-    1280
-  ],
-  "class_embed_type": null,
-  "conditioning_embedding_out_channels": [
-    16,
-    32,
-    96,
-    256
-  ],
-  "controlnet_conditioning_channel_order": "rgb",
-  "cross_attention_dim": 768,
-  "down_block_types": [
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D",
-    "DownBlock2D"
-  ],
-  "downsample_padding": 1,
-  "flip_sin_to_cos": true,
-  "freq_shift": 0,
-  "in_channels": 4,
-  "layers_per_block": 2,
-  "mid_block_scale_factor": 1,
-  "norm_eps": 1e-05,
-  "norm_num_groups": 32,
-  "num_class_embeds": null,
-  "only_cross_attention": false,
-  "projection_class_embeddings_input_dim": null,
-  "resnet_time_scale_shift": "default",
-  "upcast_attention": false,
-  "use_linear_projection": false
-}
--- a/invokeai/backend/assets/model_base_conf_files/controlnet_sdxl/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/controlnet_sdxl/config.json
@@ -1,56 +0,0 @@
-{
-  "_class_name": "ControlNetModel",
-  "_diffusers_version": "0.19.3",
-  "act_fn": "silu",
-  "addition_embed_type": "text_time",
-  "addition_embed_type_num_heads": 64,
-  "addition_time_embed_dim": 256,
-  "attention_head_dim": [
-    5,
-    10,
-    20
-  ],
-  "block_out_channels": [
-    320,
-    640,
-    1280
-  ],
-  "class_embed_type": null,
-  "conditioning_channels": 3,
-  "conditioning_embedding_out_channels": [
-    16,
-    32,
-    96,
-    256
-  ],
-  "controlnet_conditioning_channel_order": "rgb",
-  "cross_attention_dim": 2048,
-  "down_block_types": [
-    "DownBlock2D",
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D"
-  ],
-  "downsample_padding": 1,
-  "encoder_hid_dim": null,
-  "encoder_hid_dim_type": null,
-  "flip_sin_to_cos": true,
-  "freq_shift": 0,
-  "global_pool_conditions": false,
-  "in_channels": 4,
-  "layers_per_block": 2,
-  "mid_block_scale_factor": 1,
-  "norm_eps": 1e-05,
-  "norm_num_groups": 32,
-  "num_attention_heads": null,
-  "num_class_embeds": null,
-  "only_cross_attention": false,
-  "projection_class_embeddings_input_dim": 2816,
-  "resnet_time_scale_shift": "default",
-  "transformer_layers_per_block": [
-    1,
-    2,
-    10
-  ],
-  "upcast_attention": null,
-  "use_linear_projection": true
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/feature_extractor/preprocessor_config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/feature_extractor/preprocessor_config.json
@@ -1,20 +0,0 @@
-{
-  "crop_size": 224,
-  "do_center_crop": true,
-  "do_convert_rgb": true,
-  "do_normalize": true,
-  "do_resize": true,
-  "feature_extractor_type": "CLIPFeatureExtractor",
-  "image_mean": [
-    0.48145466,
-    0.4578275,
-    0.40821073
-  ],
-  "image_std": [
-    0.26862954,
-    0.26130258,
-    0.27577711
-  ],
-  "resample": 3,
-  "size": 224
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/model_index.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/model_index.json
@@ -1,32 +0,0 @@
-{
-  "_class_name": "StableDiffusionPipeline",
-  "_diffusers_version": "0.6.0",
-  "feature_extractor": [
-    "transformers",
-    "CLIPImageProcessor"
-  ],
-  "safety_checker": [
-    "stable_diffusion",
-    "StableDiffusionSafetyChecker"
-  ],
-  "scheduler": [
-    "diffusers",
-    "PNDMScheduler"
-  ],
-  "text_encoder": [
-    "transformers",
-    "CLIPTextModel"
-  ],
-  "tokenizer": [
-    "transformers",
-    "CLIPTokenizer"
-  ],
-  "unet": [
-    "diffusers",
-    "UNet2DConditionModel"
-  ],
-  "vae": [
-    "diffusers",
-    "AutoencoderKL"
-  ]
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/safety_checker/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/safety_checker/config.json
@@ -1,175 +0,0 @@
-{
-  "_commit_hash": "4bb648a606ef040e7685bde262611766a5fdd67b",
-  "_name_or_path": "CompVis/stable-diffusion-safety-checker",
-  "architectures": [
-    "StableDiffusionSafetyChecker"
-  ],
-  "initializer_factor": 1.0,
-  "logit_scale_init_value": 2.6592,
-  "model_type": "clip",
-  "projection_dim": 768,
-  "text_config": {
-    "_name_or_path": "",
-    "add_cross_attention": false,
-    "architectures": null,
-    "attention_dropout": 0.0,
-    "bad_words_ids": null,
-    "bos_token_id": 0,
-    "chunk_size_feed_forward": 0,
-    "cross_attention_hidden_size": null,
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
-    "dropout": 0.0,
-    "early_stopping": false,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": 2,
-    "exponential_decay_length_penalty": null,
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
-    "hidden_act": "quick_gelu",
-    "hidden_size": 768,
-    "id2label": {
-      "0": "LABEL_0",
-      "1": "LABEL_1"
-    },
-    "initializer_factor": 1.0,
-    "initializer_range": 0.02,
-    "intermediate_size": 3072,
-    "is_decoder": false,
-    "is_encoder_decoder": false,
-    "label2id": {
-      "LABEL_0": 0,
-      "LABEL_1": 1
-    },
-    "layer_norm_eps": 1e-05,
-    "length_penalty": 1.0,
-    "max_length": 20,
-    "max_position_embeddings": 77,
-    "min_length": 0,
-    "model_type": "clip_text_model",
-    "no_repeat_ngram_size": 0,
-    "num_attention_heads": 12,
-    "num_beam_groups": 1,
-    "num_beams": 1,
-    "num_hidden_layers": 12,
-    "num_return_sequences": 1,
-    "output_attentions": false,
-    "output_hidden_states": false,
-    "output_scores": false,
-    "pad_token_id": 1,
-    "prefix": null,
-    "problem_type": null,
-    "pruned_heads": {},
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
-    "return_dict": true,
-    "return_dict_in_generate": false,
-    "sep_token_id": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": true,
-    "tokenizer_class": null,
-    "top_k": 50,
-    "top_p": 1.0,
-    "torch_dtype": null,
-    "torchscript": false,
-    "transformers_version": "4.22.0.dev0",
-    "typical_p": 1.0,
-    "use_bfloat16": false,
-    "vocab_size": 49408
-  },
-  "text_config_dict": {
-    "hidden_size": 768,
-    "intermediate_size": 3072,
-    "num_attention_heads": 12,
-    "num_hidden_layers": 12
-  },
-  "torch_dtype": "float32",
-  "transformers_version": null,
-  "vision_config": {
-    "_name_or_path": "",
-    "add_cross_attention": false,
-    "architectures": null,
-    "attention_dropout": 0.0,
-    "bad_words_ids": null,
-    "bos_token_id": null,
-    "chunk_size_feed_forward": 0,
-    "cross_attention_hidden_size": null,
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
-    "dropout": 0.0,
-    "early_stopping": false,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": null,
-    "exponential_decay_length_penalty": null,
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
-    "hidden_act": "quick_gelu",
-    "hidden_size": 1024,
-    "id2label": {
-      "0": "LABEL_0",
-      "1": "LABEL_1"
-    },
-    "image_size": 224,
-    "initializer_factor": 1.0,
-    "initializer_range": 0.02,
-    "intermediate_size": 4096,
-    "is_decoder": false,
-    "is_encoder_decoder": false,
-    "label2id": {
-      "LABEL_0": 0,
-      "LABEL_1": 1
-    },
-    "layer_norm_eps": 1e-05,
-    "length_penalty": 1.0,
-    "max_length": 20,
-    "min_length": 0,
-    "model_type": "clip_vision_model",
-    "no_repeat_ngram_size": 0,
-    "num_attention_heads": 16,
-    "num_beam_groups": 1,
-    "num_beams": 1,
-    "num_channels": 3,
-    "num_hidden_layers": 24,
-    "num_return_sequences": 1,
-    "output_attentions": false,
-    "output_hidden_states": false,
-    "output_scores": false,
-    "pad_token_id": null,
-    "patch_size": 14,
-    "prefix": null,
-    "problem_type": null,
-    "pruned_heads": {},
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
-    "return_dict": true,
-    "return_dict_in_generate": false,
-    "sep_token_id": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": true,
-    "tokenizer_class": null,
-    "top_k": 50,
-    "top_p": 1.0,
-    "torch_dtype": null,
-    "torchscript": false,
-    "transformers_version": "4.22.0.dev0",
-    "typical_p": 1.0,
-    "use_bfloat16": false
-  },
-  "vision_config_dict": {
-    "hidden_size": 1024,
-    "intermediate_size": 4096,
-    "num_attention_heads": 16,
-    "num_hidden_layers": 24,
-    "patch_size": 14
-  }
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/scheduler/scheduler_config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/scheduler/scheduler_config.json
@@ -1,13 +0,0 @@
-{
-  "_class_name": "PNDMScheduler",
-  "_diffusers_version": "0.6.0",
-  "beta_end": 0.012,
-  "beta_schedule": "scaled_linear",
-  "beta_start": 0.00085,
-  "num_train_timesteps": 1000,
-  "set_alpha_to_one": false,
-  "skip_prk_steps": true,
-  "steps_offset": 1,
-  "trained_betas": null,
-  "clip_sample": false
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/text_encoder/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/text_encoder/config.json
@@ -1,25 +0,0 @@
-{
-  "_name_or_path": "openai/clip-vit-large-patch14",
-  "architectures": [
-    "CLIPTextModel"
-  ],
-  "attention_dropout": 0.0,
-  "bos_token_id": 0,
-  "dropout": 0.0,
-  "eos_token_id": 2,
-  "hidden_act": "quick_gelu",
-  "hidden_size": 768,
-  "initializer_factor": 1.0,
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 77,
-  "model_type": "clip_text_model",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 1,
-  "projection_dim": 768,
-  "torch_dtype": "float32",
-  "transformers_version": "4.22.0.dev0",
-  "vocab_size": 49408
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/merges.txt
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/merges.txt
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/special_tokens_map.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/special_tokens_map.json
@@ -1,24 +0,0 @@
-{
-  "bos_token": {
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "<|endoftext|>",
-  "unk_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/tokenizer_config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/tokenizer_config.json
@@ -1,34 +0,0 @@
-{
-  "add_prefix_space": false,
-  "bos_token": {
-    "__type": "AddedToken",
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "do_lower_case": true,
-  "eos_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "errors": "replace",
-  "model_max_length": 77,
-  "name_or_path": "openai/clip-vit-large-patch14",
-  "pad_token": "<|endoftext|>",
-  "special_tokens_map_file": "./special_tokens_map.json",
-  "tokenizer_class": "CLIPTokenizer",
-  "unk_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/vocab.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/vocab.json
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/unet/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/unet/config.json
@@ -1,36 +0,0 @@
-{
-  "_class_name": "UNet2DConditionModel",
-  "_diffusers_version": "0.6.0",
-  "act_fn": "silu",
-  "attention_head_dim": 8,
-  "block_out_channels": [
-    320,
-    640,
-    1280,
-    1280
-  ],
-  "center_input_sample": false,
-  "cross_attention_dim": 768,
-  "down_block_types": [
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D",
-    "DownBlock2D"
-  ],
-  "downsample_padding": 1,
-  "flip_sin_to_cos": true,
-  "freq_shift": 0,
-  "in_channels": 4,
-  "layers_per_block": 2,
-  "mid_block_scale_factor": 1,
-  "norm_eps": 1e-05,
-  "norm_num_groups": 32,
-  "out_channels": 4,
-  "sample_size": 64,
-  "up_block_types": [
-    "UpBlock2D",
-    "CrossAttnUpBlock2D",
-    "CrossAttnUpBlock2D",
-    "CrossAttnUpBlock2D"
-  ]
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/vae/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-epsilon/vae/config.json
@@ -1,29 +0,0 @@
-{
-  "_class_name": "AutoencoderKL",
-  "_diffusers_version": "0.6.0",
-  "act_fn": "silu",
-  "block_out_channels": [
-    128,
-    256,
-    512,
-    512
-  ],
-  "down_block_types": [
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D"
-  ],
-  "in_channels": 3,
-  "latent_channels": 4,
-  "layers_per_block": 2,
-  "norm_num_groups": 32,
-  "out_channels": 3,
-  "sample_size": 512,
-  "up_block_types": [
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D"
-  ]
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/feature_extractor/preprocessor_config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/feature_extractor/preprocessor_config.json
@@ -1,28 +0,0 @@
-{
-  "crop_size": {
-    "height": 224,
-    "width": 224
-  },
-  "do_center_crop": true,
-  "do_convert_rgb": true,
-  "do_normalize": true,
-  "do_rescale": true,
-  "do_resize": true,
-  "feature_extractor_type": "CLIPFeatureExtractor",
-  "image_mean": [
-    0.48145466,
-    0.4578275,
-    0.40821073
-  ],
-  "image_processor_type": "CLIPFeatureExtractor",
-  "image_std": [
-    0.26862954,
-    0.26130258,
-    0.27577711
-  ],
-  "resample": 3,
-  "rescale_factor": 0.00392156862745098,
-  "size": {
-    "shortest_edge": 224
-  }
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/model_index.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/model_index.json
@@ -1,33 +0,0 @@
-{
-  "_class_name": "StableDiffusionPipeline",
-  "_diffusers_version": "0.18.0.dev0",
-  "feature_extractor": [
-    "transformers",
-    "CLIPFeatureExtractor"
-  ],
-  "requires_safety_checker": true,
-  "safety_checker": [
-    "stable_diffusion",
-    "StableDiffusionSafetyChecker"
-  ],
-  "scheduler": [
-    "diffusers",
-    "DPMSolverMultistepScheduler"
-  ],
-  "text_encoder": [
-    "transformers",
-    "CLIPTextModel"
-  ],
-  "tokenizer": [
-    "transformers",
-    "CLIPTokenizer"
-  ],
-  "unet": [
-    "diffusers",
-    "UNet2DConditionModel"
-  ],
-  "vae": [
-    "diffusers",
-    "AutoencoderKL"
-  ]
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/safety_checker/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/safety_checker/config.json
@@ -1,168 +0,0 @@
-{
-  "_commit_hash": "cb41f3a270d63d454d385fc2e4f571c487c253c5",
-  "_name_or_path": "CompVis/stable-diffusion-safety-checker",
-  "architectures": [
-    "StableDiffusionSafetyChecker"
-  ],
-  "initializer_factor": 1.0,
-  "logit_scale_init_value": 2.6592,
-  "model_type": "clip",
-  "projection_dim": 768,
-  "text_config": {
-    "_name_or_path": "",
-    "add_cross_attention": false,
-    "architectures": null,
-    "attention_dropout": 0.0,
-    "bad_words_ids": null,
-    "begin_suppress_tokens": null,
-    "bos_token_id": 0,
-    "chunk_size_feed_forward": 0,
-    "cross_attention_hidden_size": null,
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
-    "dropout": 0.0,
-    "early_stopping": false,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": 2,
-    "exponential_decay_length_penalty": null,
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
-    "hidden_act": "quick_gelu",
-    "hidden_size": 768,
-    "id2label": {
-      "0": "LABEL_0",
-      "1": "LABEL_1"
-    },
-    "initializer_factor": 1.0,
-    "initializer_range": 0.02,
-    "intermediate_size": 3072,
-    "is_decoder": false,
-    "is_encoder_decoder": false,
-    "label2id": {
-      "LABEL_0": 0,
-      "LABEL_1": 1
-    },
-    "layer_norm_eps": 1e-05,
-    "length_penalty": 1.0,
-    "max_length": 20,
-    "max_position_embeddings": 77,
-    "min_length": 0,
-    "model_type": "clip_text_model",
-    "no_repeat_ngram_size": 0,
-    "num_attention_heads": 12,
-    "num_beam_groups": 1,
-    "num_beams": 1,
-    "num_hidden_layers": 12,
-    "num_return_sequences": 1,
-    "output_attentions": false,
-    "output_hidden_states": false,
-    "output_scores": false,
-    "pad_token_id": 1,
-    "prefix": null,
-    "problem_type": null,
-    "projection_dim": 512,
-    "pruned_heads": {},
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
-    "return_dict": true,
-    "return_dict_in_generate": false,
-    "sep_token_id": null,
-    "suppress_tokens": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": true,
-    "tokenizer_class": null,
-    "top_k": 50,
-    "top_p": 1.0,
-    "torch_dtype": null,
-    "torchscript": false,
-    "transformers_version": "4.30.2",
-    "typical_p": 1.0,
-    "use_bfloat16": false,
-    "vocab_size": 49408
-  },
-  "torch_dtype": "float16",
-  "transformers_version": null,
-  "vision_config": {
-    "_name_or_path": "",
-    "add_cross_attention": false,
-    "architectures": null,
-    "attention_dropout": 0.0,
-    "bad_words_ids": null,
-    "begin_suppress_tokens": null,
-    "bos_token_id": null,
-    "chunk_size_feed_forward": 0,
-    "cross_attention_hidden_size": null,
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
-    "dropout": 0.0,
-    "early_stopping": false,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": null,
-    "exponential_decay_length_penalty": null,
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
-    "hidden_act": "quick_gelu",
-    "hidden_size": 1024,
-    "id2label": {
-      "0": "LABEL_0",
-      "1": "LABEL_1"
-    },
-    "image_size": 224,
-    "initializer_factor": 1.0,
-    "initializer_range": 0.02,
-    "intermediate_size": 4096,
-    "is_decoder": false,
-    "is_encoder_decoder": false,
-    "label2id": {
-      "LABEL_0": 0,
-      "LABEL_1": 1
-    },
-    "layer_norm_eps": 1e-05,
-    "length_penalty": 1.0,
-    "max_length": 20,
-    "min_length": 0,
-    "model_type": "clip_vision_model",
-    "no_repeat_ngram_size": 0,
-    "num_attention_heads": 16,
-    "num_beam_groups": 1,
-    "num_beams": 1,
-    "num_channels": 3,
-    "num_hidden_layers": 24,
-    "num_return_sequences": 1,
-    "output_attentions": false,
-    "output_hidden_states": false,
-    "output_scores": false,
-    "pad_token_id": null,
-    "patch_size": 14,
-    "prefix": null,
-    "problem_type": null,
-    "projection_dim": 512,
-    "pruned_heads": {},
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
-    "return_dict": true,
-    "return_dict_in_generate": false,
-    "sep_token_id": null,
-    "suppress_tokens": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": true,
-    "tokenizer_class": null,
-    "top_k": 50,
-    "top_p": 1.0,
-    "torch_dtype": null,
-    "torchscript": false,
-    "transformers_version": "4.30.2",
-    "typical_p": 1.0,
-    "use_bfloat16": false
-  }
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/scheduler/scheduler_config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/scheduler/scheduler_config.json
@@ -1,26 +0,0 @@
-{
-  "_class_name": "DPMSolverMultistepScheduler",
-  "_diffusers_version": "0.18.0.dev0",
-  "algorithm_type": "dpmsolver++",
-  "beta_end": 0.012,
-  "beta_schedule": "scaled_linear",
-  "beta_start": 0.00085,
-  "clip_sample": false,
-  "clip_sample_range": 1.0,
-  "dynamic_thresholding_ratio": 0.995,
-  "lambda_min_clipped": -Infinity,
-  "lower_order_final": true,
-  "num_train_timesteps": 1000,
-  "prediction_type": "v_prediction",
-  "rescale_betas_zero_snr": false,
-  "sample_max_value": 1.0,
-  "set_alpha_to_one": false,
-  "solver_order": 2,
-  "solver_type": "midpoint",
-  "steps_offset": 1,
-  "thresholding": false,
-  "timestep_spacing": "leading",
-  "trained_betas": null,
-  "use_karras_sigmas": false,
-  "variance_type": null
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/text_encoder/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/text_encoder/config.json
@@ -1,25 +0,0 @@
-{
-  "_name_or_path": "openai/clip-vit-large-patch14",
-  "architectures": [
-    "CLIPTextModel"
-  ],
-  "attention_dropout": 0.0,
-  "bos_token_id": 0,
-  "dropout": 0.0,
-  "eos_token_id": 2,
-  "hidden_act": "quick_gelu",
-  "hidden_size": 768,
-  "initializer_factor": 1.0,
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 77,
-  "model_type": "clip_text_model",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 1,
-  "projection_dim": 768,
-  "torch_dtype": "float16",
-  "transformers_version": "4.30.2",
-  "vocab_size": 49408
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/merges.txt
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/merges.txt
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/special_tokens_map.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/special_tokens_map.json
@@ -1,24 +0,0 @@
-{
-  "bos_token": {
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "<|endoftext|>",
-  "unk_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/tokenizer_config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/tokenizer_config.json
@@ -1,33 +0,0 @@
-{
-  "add_prefix_space": false,
-  "bos_token": {
-    "__type": "AddedToken",
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "clean_up_tokenization_spaces": true,
-  "do_lower_case": true,
-  "eos_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "errors": "replace",
-  "model_max_length": 77,
-  "pad_token": "<|endoftext|>",
-  "tokenizer_class": "CLIPTokenizer",
-  "unk_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/vocab.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/vocab.json
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/unet/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/unet/config.json
@@ -1,62 +0,0 @@
-{
-  "_class_name": "UNet2DConditionModel",
-  "_diffusers_version": "0.18.0.dev0",
-  "act_fn": "silu",
-  "addition_embed_type": null,
-  "addition_embed_type_num_heads": 64,
-  "attention_head_dim": 8,
-  "block_out_channels": [
-    320,
-    640,
-    1280,
-    1280
-  ],
-  "center_input_sample": false,
-  "class_embed_type": null,
-  "class_embeddings_concat": false,
-  "conv_in_kernel": 3,
-  "conv_out_kernel": 3,
-  "cross_attention_dim": 768,
-  "cross_attention_norm": null,
-  "down_block_types": [
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D",
-    "DownBlock2D"
-  ],
-  "downsample_padding": 1,
-  "dual_cross_attention": false,
-  "encoder_hid_dim": null,
-  "encoder_hid_dim_type": null,
-  "flip_sin_to_cos": true,
-  "freq_shift": 0,
-  "in_channels": 4,
-  "layers_per_block": 2,
-  "mid_block_only_cross_attention": null,
-  "mid_block_scale_factor": 1,
-  "mid_block_type": "UNetMidBlock2DCrossAttn",
-  "norm_eps": 1e-05,
-  "norm_num_groups": 32,
-  "num_attention_heads": null,
-  "num_class_embeds": null,
-  "only_cross_attention": false,
-  "out_channels": 4,
-  "projection_class_embeddings_input_dim": null,
-  "resnet_out_scale_factor": 1.0,
-  "resnet_skip_time_act": false,
-  "resnet_time_scale_shift": "default",
-  "sample_size": 96,
-  "time_cond_proj_dim": null,
-  "time_embedding_act_fn": null,
-  "time_embedding_dim": null,
-  "time_embedding_type": "positional",
-  "timestep_post_act": null,
-  "up_block_types": [
-    "UpBlock2D",
-    "CrossAttnUpBlock2D",
-    "CrossAttnUpBlock2D",
-    "CrossAttnUpBlock2D"
-  ],
-  "upcast_attention": null,
-  "use_linear_projection": false
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/vae/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-1.5-v_prediction/vae/config.json
@@ -1,30 +0,0 @@
-{
-  "_class_name": "AutoencoderKL",
-  "_diffusers_version": "0.18.0.dev0",
-  "act_fn": "silu",
-  "block_out_channels": [
-    128,
-    256,
-    512,
-    512
-  ],
-  "down_block_types": [
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D"
-  ],
-  "in_channels": 3,
-  "latent_channels": 4,
-  "layers_per_block": 2,
-  "norm_num_groups": 32,
-  "out_channels": 3,
-  "sample_size": 768,
-  "scaling_factor": 0.18215,
-  "up_block_types": [
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D"
-  ]
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/feature_extractor/preprocessor_config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/feature_extractor/preprocessor_config.json
@@ -1,20 +0,0 @@
-{
-  "crop_size": 224,
-  "do_center_crop": true,
-  "do_convert_rgb": true,
-  "do_normalize": true,
-  "do_resize": true,
-  "feature_extractor_type": "CLIPFeatureExtractor",
-  "image_mean": [
-    0.48145466,
-    0.4578275,
-    0.40821073
-  ],
-  "image_std": [
-    0.26862954,
-    0.26130258,
-    0.27577711
-  ],
-  "resample": 3,
-  "size": 224
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/model_index.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/model_index.json
@@ -1,33 +0,0 @@
-{
-  "_class_name": "StableDiffusionPipeline",
-  "_diffusers_version": "0.8.0",
-  "feature_extractor": [
-    "transformers",
-    "CLIPImageProcessor"
-  ],
-  "requires_safety_checker": false,
-  "safety_checker": [
-    null,
-    null
-  ],
-  "scheduler": [
-    "diffusers",
-    "DDIMScheduler"
-  ],
-  "text_encoder": [
-    "transformers",
-    "CLIPTextModel"
-  ],
-  "tokenizer": [
-    "transformers",
-    "CLIPTokenizer"
-  ],
-  "unet": [
-    "diffusers",
-    "UNet2DConditionModel"
-  ],
-  "vae": [
-    "diffusers",
-    "AutoencoderKL"
-  ]
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/scheduler/scheduler_config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/scheduler/scheduler_config.json
@@ -1,14 +0,0 @@
-{
-  "_class_name": "DDIMScheduler",
-  "_diffusers_version": "0.8.0",
-  "beta_end": 0.012,
-  "beta_schedule": "scaled_linear",
-  "beta_start": 0.00085,
-  "clip_sample": false,
-  "num_train_timesteps": 1000,
-  "prediction_type": "v_prediction",
-  "set_alpha_to_one": false,
-  "skip_prk_steps": true,
-  "steps_offset": 1,
-  "trained_betas": null
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/text_encoder/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/text_encoder/config.json
@@ -1,25 +0,0 @@
-{
-  "_name_or_path": "hf-models/stable-diffusion-v2-768x768/text_encoder",
-  "architectures": [
-    "CLIPTextModel"
-  ],
-  "attention_dropout": 0.0,
-  "bos_token_id": 0,
-  "dropout": 0.0,
-  "eos_token_id": 2,
-  "hidden_act": "gelu",
-  "hidden_size": 1024,
-  "initializer_factor": 1.0,
-  "initializer_range": 0.02,
-  "intermediate_size": 4096,
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 77,
-  "model_type": "clip_text_model",
-  "num_attention_heads": 16,
-  "num_hidden_layers": 23,
-  "pad_token_id": 1,
-  "projection_dim": 512,
-  "torch_dtype": "float32",
-  "transformers_version": "4.25.0.dev0",
-  "vocab_size": 49408
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/merges.txt
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/merges.txt
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/special_tokens_map.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/special_tokens_map.json
@@ -1,24 +0,0 @@
-{
-  "bos_token": {
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "!",
-  "unk_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/tokenizer_config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/tokenizer_config.json
@@ -1,34 +0,0 @@
-{
-  "add_prefix_space": false,
-  "bos_token": {
-    "__type": "AddedToken",
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "do_lower_case": true,
-  "eos_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "errors": "replace",
-  "model_max_length": 77,
-  "name_or_path": "hf-models/stable-diffusion-v2-768x768/tokenizer",
-  "pad_token": "<|endoftext|>",
-  "special_tokens_map_file": "./special_tokens_map.json",
-  "tokenizer_class": "CLIPTokenizer",
-  "unk_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/vocab.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/vocab.json
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/unet/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/unet/config.json
@@ -1,46 +0,0 @@
-{
-  "_class_name": "UNet2DConditionModel",
-  "_diffusers_version": "0.10.0.dev0",
-  "act_fn": "silu",
-  "attention_head_dim": [
-    5,
-    10,
-    20,
-    20
-  ],
-  "block_out_channels": [
-    320,
-    640,
-    1280,
-    1280
-  ],
-  "center_input_sample": false,
-  "cross_attention_dim": 1024,
-  "down_block_types": [
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D",
-    "DownBlock2D"
-  ],
-  "downsample_padding": 1,
-  "dual_cross_attention": false,
-  "flip_sin_to_cos": true,
-  "freq_shift": 0,
-  "in_channels": 4,
-  "layers_per_block": 2,
-  "mid_block_scale_factor": 1,
-  "norm_eps": 1e-05,
-  "norm_num_groups": 32,
-  "num_class_embeds": null,
-  "only_cross_attention": false,
-  "out_channels": 4,
-  "sample_size": 96,
-  "up_block_types": [
-    "UpBlock2D",
-    "CrossAttnUpBlock2D",
-    "CrossAttnUpBlock2D",
-    "CrossAttnUpBlock2D"
-  ],
-  "use_linear_projection": true,
-  "upcast_attention": true
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/vae/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-2.0-v_prediction/vae/config.json
@@ -1,30 +0,0 @@
-{
-  "_class_name": "AutoencoderKL",
-  "_diffusers_version": "0.8.0",
-  "_name_or_path": "hf-models/stable-diffusion-v2-768x768/vae",
-  "act_fn": "silu",
-  "block_out_channels": [
-    128,
-    256,
-    512,
-    512
-  ],
-  "down_block_types": [
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D"
-  ],
-  "in_channels": 3,
-  "latent_channels": 4,
-  "layers_per_block": 2,
-  "norm_num_groups": 32,
-  "out_channels": 3,
-  "sample_size": 768,
-  "up_block_types": [
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D"
-  ]
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/model_index.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/model_index.json
@@ -1,34 +0,0 @@
-{
-  "_class_name": "StableDiffusionXLPipeline",
-  "_diffusers_version": "0.19.0.dev0",
-  "force_zeros_for_empty_prompt": true,
-  "add_watermarker": null,
-  "scheduler": [
-    "diffusers",
-    "EulerDiscreteScheduler"
-  ],
-  "text_encoder": [
-    "transformers",
-    "CLIPTextModel"
-  ],
-  "text_encoder_2": [
-    "transformers",
-    "CLIPTextModelWithProjection"
-  ],
-  "tokenizer": [
-    "transformers",
-    "CLIPTokenizer"
-  ],
-  "tokenizer_2": [
-    "transformers",
-    "CLIPTokenizer"
-  ],
-  "unet": [
-    "diffusers",
-    "UNet2DConditionModel"
-  ],
-  "vae": [
-    "diffusers",
-    "AutoencoderKL"
-  ]
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/scheduler/scheduler_config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/scheduler/scheduler_config.json
@@ -1,18 +0,0 @@
-{
-  "_class_name": "EulerDiscreteScheduler",
-  "_diffusers_version": "0.19.0.dev0",
-  "beta_end": 0.012,
-  "beta_schedule": "scaled_linear",
-  "beta_start": 0.00085,
-  "clip_sample": false,
-  "interpolation_type": "linear",
-  "num_train_timesteps": 1000,
-  "prediction_type": "epsilon",
-  "sample_max_value": 1.0,
-  "set_alpha_to_one": false,
-  "skip_prk_steps": true,
-  "steps_offset": 1,
-  "timestep_spacing": "leading",
-  "trained_betas": null,
-  "use_karras_sigmas": false
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/text_encoder/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/text_encoder/config.json
@@ -1,24 +0,0 @@
-{
-  "architectures": [
-    "CLIPTextModel"
-  ],
-  "attention_dropout": 0.0,
-  "bos_token_id": 0,
-  "dropout": 0.0,
-  "eos_token_id": 2,
-  "hidden_act": "quick_gelu",
-  "hidden_size": 768,
-  "initializer_factor": 1.0,
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 77,
-  "model_type": "clip_text_model",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 1,
-  "projection_dim": 768,
-  "torch_dtype": "float16",
-  "transformers_version": "4.32.0.dev0",
-  "vocab_size": 49408
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/text_encoder_2/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/text_encoder_2/config.json
@@ -1,24 +0,0 @@
-{
-  "architectures": [
-    "CLIPTextModelWithProjection"
-  ],
-  "attention_dropout": 0.0,
-  "bos_token_id": 0,
-  "dropout": 0.0,
-  "eos_token_id": 2,
-  "hidden_act": "gelu",
-  "hidden_size": 1280,
-  "initializer_factor": 1.0,
-  "initializer_range": 0.02,
-  "intermediate_size": 5120,
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 77,
-  "model_type": "clip_text_model",
-  "num_attention_heads": 20,
-  "num_hidden_layers": 32,
-  "pad_token_id": 1,
-  "projection_dim": 1280,
-  "torch_dtype": "float16",
-  "transformers_version": "4.32.0.dev0",
-  "vocab_size": 49408
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer/merges.txt
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer/merges.txt
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer/special_tokens_map.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer/special_tokens_map.json
@@ -1,24 +0,0 @@
-{
-  "bos_token": {
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "<|endoftext|>",
-  "unk_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer/tokenizer_config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer/tokenizer_config.json
@@ -1,33 +0,0 @@
-{
-  "add_prefix_space": false,
-  "bos_token": {
-    "__type": "AddedToken",
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "clean_up_tokenization_spaces": true,
-  "do_lower_case": true,
-  "eos_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "errors": "replace",
-  "model_max_length": 77,
-  "pad_token": "<|endoftext|>",
-  "tokenizer_class": "CLIPTokenizer",
-  "unk_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer/vocab.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer/vocab.json
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer_2/merges.txt
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer_2/merges.txt
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer_2/special_tokens_map.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer_2/special_tokens_map.json
@@ -1,24 +0,0 @@
-{
-  "bos_token": {
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "!",
-  "unk_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer_2/tokenizer_config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer_2/tokenizer_config.json
@@ -1,33 +0,0 @@
-{
-  "add_prefix_space": false,
-  "bos_token": {
-    "__type": "AddedToken",
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "clean_up_tokenization_spaces": true,
-  "do_lower_case": true,
-  "eos_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "errors": "replace",
-  "model_max_length": 77,
-  "pad_token": "!",
-  "tokenizer_class": "CLIPTokenizer",
-  "unk_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer_2/vocab.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/tokenizer_2/vocab.json
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/unet/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/unet/config.json
@@ -1,69 +0,0 @@
-{
-  "_class_name": "UNet2DConditionModel",
-  "_diffusers_version": "0.19.0.dev0",
-  "act_fn": "silu",
-  "addition_embed_type": "text_time",
-  "addition_embed_type_num_heads": 64,
-  "addition_time_embed_dim": 256,
-  "attention_head_dim": [
-    5,
-    10,
-    20
-  ],
-  "block_out_channels": [
-    320,
-    640,
-    1280
-  ],
-  "center_input_sample": false,
-  "class_embed_type": null,
-  "class_embeddings_concat": false,
-  "conv_in_kernel": 3,
-  "conv_out_kernel": 3,
-  "cross_attention_dim": 2048,
-  "cross_attention_norm": null,
-  "down_block_types": [
-    "DownBlock2D",
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D"
-  ],
-  "downsample_padding": 1,
-  "dual_cross_attention": false,
-  "encoder_hid_dim": null,
-  "encoder_hid_dim_type": null,
-  "flip_sin_to_cos": true,
-  "freq_shift": 0,
-  "in_channels": 4,
-  "layers_per_block": 2,
-  "mid_block_only_cross_attention": null,
-  "mid_block_scale_factor": 1,
-  "mid_block_type": "UNetMidBlock2DCrossAttn",
-  "norm_eps": 1e-05,
-  "norm_num_groups": 32,
-  "num_attention_heads": null,
-  "num_class_embeds": null,
-  "only_cross_attention": false,
-  "out_channels": 4,
-  "projection_class_embeddings_input_dim": 2816,
-  "resnet_out_scale_factor": 1.0,
-  "resnet_skip_time_act": false,
-  "resnet_time_scale_shift": "default",
-  "sample_size": 128,
-  "time_cond_proj_dim": null,
-  "time_embedding_act_fn": null,
-  "time_embedding_dim": null,
-  "time_embedding_type": "positional",
-  "timestep_post_act": null,
-  "transformer_layers_per_block": [
-    1,
-    2,
-    10
-  ],
-  "up_block_types": [
-    "CrossAttnUpBlock2D",
-    "CrossAttnUpBlock2D",
-    "UpBlock2D"
-  ],
-  "upcast_attention": null,
-  "use_linear_projection": true
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/vae/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/vae/config.json
@@ -1,32 +0,0 @@
-{
-  "_class_name": "AutoencoderKL",
-  "_diffusers_version": "0.20.0.dev0",
-  "_name_or_path": "../sdxl-vae/",
-  "act_fn": "silu",
-  "block_out_channels": [
-    128,
-    256,
-    512,
-    512
-  ],
-  "down_block_types": [
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D"
-  ],
-  "force_upcast": true,
-  "in_channels": 3,
-  "latent_channels": 4,
-  "layers_per_block": 2,
-  "norm_num_groups": 32,
-  "out_channels": 3,
-  "sample_size": 1024,
-  "scaling_factor": 0.13025,
-  "up_block_types": [
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D"
-  ]
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/vae_1_0/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/vae_1_0/config.json
@@ -1,31 +0,0 @@
-{
-  "_class_name": "AutoencoderKL",
-  "_diffusers_version": "0.19.0.dev0",
-  "act_fn": "silu",
-  "block_out_channels": [
-    128,
-    256,
-    512,
-    512
-  ],
-  "down_block_types": [
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D"
-  ],
-  "force_upcast": true,
-  "in_channels": 3,
-  "latent_channels": 4,
-  "layers_per_block": 2,
-  "norm_num_groups": 32,
-  "out_channels": 3,
-  "sample_size": 1024,
-  "scaling_factor": 0.13025,
-  "up_block_types": [
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D"
-  ]
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/vae_decoder/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/vae_decoder/config.json
@@ -1,31 +0,0 @@
-{
-  "_class_name": "AutoencoderKL",
-  "_diffusers_version": "0.19.0.dev0",
-  "act_fn": "silu",
-  "block_out_channels": [
-    128,
-    256,
-    512,
-    512
-  ],
-  "down_block_types": [
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D"
-  ],
-  "force_upcast": true,
-  "in_channels": 3,
-  "latent_channels": 4,
-  "layers_per_block": 2,
-  "norm_num_groups": 32,
-  "out_channels": 3,
-  "sample_size": 1024,
-  "scaling_factor": 0.13025,
-  "up_block_types": [
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D"
-  ]
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/vae_encoder/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-base-1.0/vae_encoder/config.json
@@ -1,31 +0,0 @@
-{
-  "_class_name": "AutoencoderKL",
-  "_diffusers_version": "0.19.0.dev0",
-  "act_fn": "silu",
-  "block_out_channels": [
-    128,
-    256,
-    512,
-    512
-  ],
-  "down_block_types": [
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D"
-  ],
-  "force_upcast": true,
-  "in_channels": 3,
-  "latent_channels": 4,
-  "layers_per_block": 2,
-  "norm_num_groups": 32,
-  "out_channels": 3,
-  "sample_size": 1024,
-  "scaling_factor": 0.13025,
-  "up_block_types": [
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D"
-  ]
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/model_index.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/model_index.json
@@ -1,35 +0,0 @@
-{
-  "_class_name": "StableDiffusionXLImg2ImgPipeline",
-  "_diffusers_version": "0.19.0.dev0",
-  "force_zeros_for_empty_prompt": false,
-  "add_watermarker": null,
-  "requires_aesthetics_score": true,
-  "scheduler": [
-    "diffusers",
-    "EulerDiscreteScheduler"
-  ],
-  "text_encoder": [
-    null,
-    null
-  ],
-  "text_encoder_2": [
-    "transformers",
-    "CLIPTextModelWithProjection"
-  ],
-  "tokenizer": [
-    null,
-    null
-  ],
-  "tokenizer_2": [
-    "transformers",
-    "CLIPTokenizer"
-  ],
-  "unet": [
-    "diffusers",
-    "UNet2DConditionModel"
-  ],
-  "vae": [
-    "diffusers",
-    "AutoencoderKL"
-  ]
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/scheduler/scheduler_config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/scheduler/scheduler_config.json
@@ -1,18 +0,0 @@
-{
-  "_class_name": "EulerDiscreteScheduler",
-  "_diffusers_version": "0.19.0.dev0",
-  "beta_end": 0.012,
-  "beta_schedule": "scaled_linear",
-  "beta_start": 0.00085,
-  "clip_sample": false,
-  "interpolation_type": "linear",
-  "num_train_timesteps": 1000,
-  "prediction_type": "epsilon",
-  "sample_max_value": 1.0,
-  "set_alpha_to_one": false,
-  "skip_prk_steps": true,
-  "steps_offset": 1,
-  "timestep_spacing": "leading",
-  "trained_betas": null,
-  "use_karras_sigmas": false
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/text_encoder_2/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/text_encoder_2/config.json
@@ -1,24 +0,0 @@
-{
-  "architectures": [
-    "CLIPTextModelWithProjection"
-  ],
-  "attention_dropout": 0.0,
-  "bos_token_id": 0,
-  "dropout": 0.0,
-  "eos_token_id": 2,
-  "hidden_act": "gelu",
-  "hidden_size": 1280,
-  "initializer_factor": 1.0,
-  "initializer_range": 0.02,
-  "intermediate_size": 5120,
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 77,
-  "model_type": "clip_text_model",
-  "num_attention_heads": 20,
-  "num_hidden_layers": 32,
-  "pad_token_id": 1,
-  "projection_dim": 1280,
-  "torch_dtype": "float16",
-  "transformers_version": "4.32.0.dev0",
-  "vocab_size": 49408
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/tokenizer_2/merges.txt
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/tokenizer_2/merges.txt
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/tokenizer_2/special_tokens_map.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/tokenizer_2/special_tokens_map.json
@@ -1,24 +0,0 @@
-{
-  "bos_token": {
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": "!",
-  "unk_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/tokenizer_2/tokenizer_config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/tokenizer_2/tokenizer_config.json
@@ -1,33 +0,0 @@
-{
-  "add_prefix_space": false,
-  "bos_token": {
-    "__type": "AddedToken",
-    "content": "<|startoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "clean_up_tokenization_spaces": true,
-  "do_lower_case": true,
-  "eos_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  },
-  "errors": "replace",
-  "model_max_length": 77,
-  "pad_token": "!",
-  "tokenizer_class": "CLIPTokenizer",
-  "unk_token": {
-    "__type": "AddedToken",
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": false
-  }
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/tokenizer_2/vocab.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/tokenizer_2/vocab.json
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/unet/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/unet/config.json
@@ -1,69 +0,0 @@
-{
-  "_class_name": "UNet2DConditionModel",
-  "_diffusers_version": "0.19.0.dev0",
-  "act_fn": "silu",
-  "addition_embed_type": "text_time",
-  "addition_embed_type_num_heads": 64,
-  "addition_time_embed_dim": 256,
-  "attention_head_dim": [
-    6,
-    12,
-    24,
-    24
-  ],
-  "block_out_channels": [
-    384,
-    768,
-    1536,
-    1536
-  ],
-  "center_input_sample": false,
-  "class_embed_type": null,
-  "class_embeddings_concat": false,
-  "conv_in_kernel": 3,
-  "conv_out_kernel": 3,
-  "cross_attention_dim": 1280,
-  "cross_attention_norm": null,
-  "down_block_types": [
-    "DownBlock2D",
-    "CrossAttnDownBlock2D",
-    "CrossAttnDownBlock2D",
-    "DownBlock2D"
-  ],
-  "downsample_padding": 1,
-  "dual_cross_attention": false,
-  "encoder_hid_dim": null,
-  "encoder_hid_dim_type": null,
-  "flip_sin_to_cos": true,
-  "freq_shift": 0,
-  "in_channels": 4,
-  "layers_per_block": 2,
-  "mid_block_only_cross_attention": null,
-  "mid_block_scale_factor": 1,
-  "mid_block_type": "UNetMidBlock2DCrossAttn",
-  "norm_eps": 1e-05,
-  "norm_num_groups": 32,
-  "num_attention_heads": null,
-  "num_class_embeds": null,
-  "only_cross_attention": false,
-  "out_channels": 4,
-  "projection_class_embeddings_input_dim": 2560,
-  "resnet_out_scale_factor": 1.0,
-  "resnet_skip_time_act": false,
-  "resnet_time_scale_shift": "default",
-  "sample_size": 128,
-  "time_cond_proj_dim": null,
-  "time_embedding_act_fn": null,
-  "time_embedding_dim": null,
-  "time_embedding_type": "positional",
-  "timestep_post_act": null,
-  "transformer_layers_per_block": 4,
-  "up_block_types": [
-    "UpBlock2D",
-    "CrossAttnUpBlock2D",
-    "CrossAttnUpBlock2D",
-    "UpBlock2D"
-  ],
-  "upcast_attention": null,
-  "use_linear_projection": true
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/vae/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/vae/config.json
@@ -1,32 +0,0 @@
-{
-  "_class_name": "AutoencoderKL",
-  "_diffusers_version": "0.20.0.dev0",
-  "_name_or_path": "../sdxl-vae/",
-  "act_fn": "silu",
-  "block_out_channels": [
-    128,
-    256,
-    512,
-    512
-  ],
-  "down_block_types": [
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D"
-  ],
-  "force_upcast": true,
-  "in_channels": 3,
-  "latent_channels": 4,
-  "layers_per_block": 2,
-  "norm_num_groups": 32,
-  "out_channels": 3,
-  "sample_size": 1024,
-  "scaling_factor": 0.13025,
-  "up_block_types": [
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D"
-  ]
-}
--- a/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/vae_1_0/config.json
+++ b/invokeai/backend/assets/model_base_conf_files/stable-diffusion-xl-refiner-1.0/vae_1_0/config.json
@@ -1,31 +0,0 @@
-{
-  "_class_name": "AutoencoderKL",
-  "_diffusers_version": "0.19.0.dev0",
-  "act_fn": "silu",
-  "block_out_channels": [
-    128,
-    256,
-    512,
-    512
-  ],
-  "down_block_types": [
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D",
-    "DownEncoderBlock2D"
-  ],
-  "force_upcast": true,
-  "in_channels": 3,
-  "latent_channels": 4,
-  "layers_per_block": 2,
-  "norm_num_groups": 32,
-  "out_channels": 3,
-  "sample_size": 1024,
-  "scaling_factor": 0.13025,
-  "up_block_types": [
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D",
-    "UpDecoderBlock2D"
-  ]
-}
--- a/invokeai/backend/flux/denoise.py
+++ b/invokeai/backend/flux/denoise.py
@@ -30,6 +30,8 @@ def denoise(
    controlnet_extensions: list[XLabsControlNetExtension | InstantXControlNetExtension],
    pos_ip_adapter_extensions: list[XLabsIPAdapterExtension],
    neg_ip_adapter_extensions: list[XLabsIPAdapterExtension],
+    # extra img tokens
+    img_cond: torch.Tensor | None = None,
 ):
    # step 0 is the initial state
    total_steps = len(timesteps) - 1
@@ -69,9 +71,9 @@ def denoise(
        # controlnet_residuals datastructure is efficient in that it likely contains multiple references to the same
        # tensors. Calculating the sum materializes each tensor into its own instance.
        merged_controlnet_residuals = sum_controlnet_flux_outputs(controlnet_residuals)
-
+        pred_img = torch.cat((img, img_cond), dim=-1) if img_cond is not None else img
        pred = model(
-            img=img,
+            img=pred_img,
            img_ids=img_ids,
            txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
            txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
--- a/invokeai/backend/flux/flux_tools_sampling_utils.py
+++ b/invokeai/backend/flux/flux_tools_sampling_utils.py
@@ -0,0 +1,27 @@
+import torch
+import numpy as np
+from PIL import Image
+from einops import rearrange
+
+from invokeai.backend.flux.modules.autoencoder import AutoEncoder
+
+def prepare_control(
+    height: int,
+    width: int,
+    seed: int,
+    ae: AutoEncoder,
+    cond_image: Image.Image,
+) -> torch.Tensor:
+    # load and encode the conditioning image
+    img_cond = cond_image.convert("RGB")
+    img_cond = img_cond.resize((width, height), Image.Resampling.LANCZOS)
+    img_cond = np.array(img_cond)
+    img_cond = torch.from_numpy(img_cond).float()
+    img_cond = rearrange(img_cond, "h w c -> 1 c h w")
+    ae_dtype = next(iter(ae.parameters())).dtype
+    ae_device = next(iter(ae.parameters())).device
+    img_cond = img_cond.to(device=ae_device, dtype=ae_dtype)
+    generator = torch.Generator(device=ae_device).manual_seed(seed)
+    img_cond = ae.encode(img_cond, sample=True, generator=generator)
+    img_cond = rearrange(img_cond, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
+    return img_cond
--- a/invokeai/backend/flux/math.py
+++ b/invokeai/backend/flux/math.py
@@ -32,4 +32,4 @@ def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tenso
    xk_ = xk.view(*xk.shape[:-1], -1, 1, 2)
    xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1]
    xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1]
-    return xq_out.view(*xq.shape), xk_out.view(*xk.shape)
+    return xq_out.view(*xq.shape).type_as(xq), xk_out.view(*xk.shape).type_as(xk)
--- a/invokeai/backend/flux/model.py
+++ b/invokeai/backend/flux/model.py
@@ -4,6 +4,7 @@ from dataclasses import dataclass

 import torch
 from torch import Tensor, nn
+from typing import Optional

 from invokeai.backend.flux.custom_block_processor import (
    CustomDoubleStreamBlockProcessor,
@@ -35,6 +36,7 @@ class FluxParams:
    theta: int
    qkv_bias: bool
    guidance_embed: bool
+    out_channels: Optional[int] = None


 class Flux(nn.Module):
@@ -47,7 +49,7 @@ class Flux(nn.Module):

        self.params = params
        self.in_channels = params.in_channels
-        self.out_channels = self.in_channels
+        self.out_channels = params.out_channels or self.in_channels
        if params.hidden_size % params.num_heads != 0:
            raise ValueError(f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}")
        pe_dim = params.hidden_size // params.num_heads
--- a/invokeai/backend/flux/modules/image_embedders.py
+++ b/invokeai/backend/flux/modules/image_embedders.py
@@ -0,0 +1,50 @@
+import os
+import cv2
+import numpy as np
+import torch
+
+from einops import rearrange, repeat
+from PIL import Image
+from safetensors.torch import load_file as load_sft
+from torch import nn
+from transformers import AutoModelForDepthEstimation, AutoProcessor, SiglipImageProcessor, SiglipVisionModel
+
+class DepthImageEncoder:
+    depth_model_name = "LiheYoung/depth-anything-large-hf"
+    def __init__(self, device):
+        self.device = device
+        self.depth_model = AutoModelForDepthEstimation.from_pretrained(self.depth_model_name).to(device)
+        self.processor = AutoProcessor.from_pretrained(self.depth_model_name)
+    def __call__(self, img: torch.Tensor) -> torch.Tensor:
+        hw = img.shape[-2:]
+        img = torch.clamp(img, -1.0, 1.0)
+        img_byte = ((img + 1.0) * 127.5).byte()
+        img = self.processor(img_byte, return_tensors="pt")["pixel_values"]
+        depth = self.depth_model(img.to(self.device)).predicted_depth
+        depth = repeat(depth, "b h w -> b 3 h w")
+        depth = torch.nn.functional.interpolate(depth, hw, mode="bicubic", antialias=True)
+        depth = depth / 127.5 - 1.0
+        return depth
+
+class CannyImageEncoder:
+    def __init__(
+        self,
+        device,
+        min_t: int = 50,
+        max_t: int = 200,
+    ):
+        self.device = device
+        self.min_t = min_t
+        self.max_t = max_t
+    def __call__(self, img: torch.Tensor) -> torch.Tensor:
+        assert img.shape[0] == 1, "Only batch size 1 is supported"
+        img = rearrange(img[0], "c h w -> h w c")
+        img = torch.clamp(img, -1.0, 1.0)
+        img_np = ((img + 1.0) * 127.5).numpy().astype(np.uint8)
+        # Apply Canny edge detection
+        canny = cv2.Canny(img_np, self.min_t, self.max_t)
+        # Convert back to torch tensor and reshape
+        canny = torch.from_numpy(canny).float() / 127.5 - 1.0
+        canny = rearrange(canny, "h w -> 1 1 h w")
+        canny = repeat(canny, "b 1 ... -> b 3 ...")
+        return canny.to(self.device)
--- a/invokeai/backend/lora/conversions/flux_control_lora_utils.py
+++ b/invokeai/backend/lora/conversions/flux_control_lora_utils.py
@@ -0,0 +1,65 @@
+import re
+import torch
+
+from typing import Any, Dict
+from invokeai.backend.lora.layers.any_lora_layer import AnyLoRALayer
+from invokeai.backend.lora.layers.utils import any_lora_layer_from_state_dict
+from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
+from invokeai.backend.lora.conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
+from invokeai.backend.lora.layers.lora_layer import LoRALayer
+from invokeai.backend.lora.layers.set_parameter_layer import SetParameterLayer
+
+
+# A regex pattern that matches all of the keys in the Flux Dev/Canny LoRA format.
+# Example keys:
+#   guidance_in.in_layer.lora_B.bias
+#   single_blocks.0.linear1.lora_A.weight
+#   double_blocks.0.img_attn.norm.key_norm.scale
+FLUX_STRUCTURAL_TRANSFORMER_KEY_REGEX = r"(final_layer|vector_in|txt_in|time_in|img_in|guidance_in|\w+_blocks)(\.(\d+))?\.(lora_(A|B)|(in|out)_layer|adaLN_modulation|img_attn|img_mlp|img_mod|txt_attn|txt_mlp|txt_mod|linear|linear1|linear2|modulation|norm)\.?(.*)"
+
+def is_state_dict_likely_flux_control(state_dict: Dict[str, Any]) -> bool:
+    """Checks if the provided state dict is likely in the FLUX Control LoRA format.
+
+    This is intended to be a high-precision detector, but it is not guaranteed to have perfect precision. (A
+    perfect-precision detector would require checking all keys against a whitelist and verifying tensor shapes.)
+    """
+    return all(
+        re.match(FLUX_STRUCTURAL_TRANSFORMER_KEY_REGEX, k) or re.match(FLUX_STRUCTURAL_TRANSFORMER_KEY_REGEX, k)
+        for k in state_dict.keys()
+    )
+
+def lora_model_from_flux_control_state_dict(state_dict: Dict[str, torch.Tensor]) -> LoRAModelRaw:
+    # converted_state_dict = _convert_lora_bfl_control(state_dict=state_dict)
+    # Group keys by layer.
+    grouped_state_dict: dict[str, dict[str, torch.Tensor]] = {}
+    for key, value in state_dict.items():
+        key_props = key.split(".")
+        # Got it loading using lora_down and lora_up but it didn't seem to match this lora's structure
+        # Leaving this in since it doesn't hurt anything and may be better
+        layer_prop_size = -2 if any(prop in key for prop in ["lora_B", "lora_A"]) else -1
+        layer_name = ".".join(key_props[:layer_prop_size])
+        param_name = ".".join(key_props[layer_prop_size:])
+        if layer_name not in grouped_state_dict:
+            grouped_state_dict[layer_name] = {}
+        grouped_state_dict[layer_name][param_name] = value
+
+    # Create LoRA layers.
+    layers: dict[str, AnyLoRALayer] = {}
+    for layer_key, layer_state_dict in grouped_state_dict.items():
+        # Convert to a full layer diff
+        prefixed_key = f"{FLUX_LORA_TRANSFORMER_PREFIX}{layer_key}"
+        if all(k in layer_state_dict for k in ["lora_A.weight", "lora_B.bias", "lora_B.weight"]):
+            layers[prefixed_key] = LoRALayer(
+                layer_state_dict["lora_B.weight"],
+                None,
+                layer_state_dict["lora_A.weight"],
+                None,
+                layer_state_dict["lora_B.bias"]
+            )
+        elif "scale" in layer_state_dict:
+            layers[prefixed_key] = SetParameterLayer("scale", layer_state_dict["scale"])
+        else:
+            raise AssertionError(f"{layer_key} not expected")
+    # Create and return the LoRAModelRaw.
+    return LoRAModelRaw(layers=layers)
+
--- a/invokeai/backend/lora/layers/any_lora_layer.py
+++ b/invokeai/backend/lora/layers/any_lora_layer.py
@@ -7,5 +7,6 @@ from invokeai.backend.lora.layers.loha_layer import LoHALayer
 from invokeai.backend.lora.layers.lokr_layer import LoKRLayer
 from invokeai.backend.lora.layers.lora_layer import LoRALayer
 from invokeai.backend.lora.layers.norm_layer import NormLayer
+from invokeai.backend.lora.layers.set_parameter_layer import SetParameterLayer

-AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer, NormLayer, ConcatenatedLoRALayer]
+AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer, NormLayer, ConcatenatedLoRALayer, SetParameterLayer]
--- a/invokeai/backend/lora/layers/reshape_weight_layer.py
+++ b/invokeai/backend/lora/layers/reshape_weight_layer.py
@@ -0,0 +1,34 @@
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.lora.layers.lora_layer_base import LoRALayerBase
+from invokeai.backend.util.calc_tensor_size import calc_tensor_size
+
+
+class ReshapeWeightLayer(LoRALayerBase):
+    # TODO: Just everything in this class 
+    def __init__(self, weight: Optional[torch.Tensor], bias: Optional[torch.Tensor], scale: Optional[torch.Tensor]):
+        super().__init__(alpha=None, bias=bias)
+        self.weight = torch.nn.Parameter(weight) if weight is not None else None
+        self.bias = torch.nn.Parameter(bias) if bias is not None else None
+        self.manual_scale = scale
+
+    def scale(self):
+        return self.manual_scale.float() if self.manual_scale is not None else super().scale()
+
+    def rank(self) -> int | None:
+        return None
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        return orig_weight
+
+    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
+        super().to(device=device, dtype=dtype)
+        if self.weight is not None:
+            self.weight = self.weight.to(device=device, dtype=dtype)
+        if self.manual_scale is not None:
+            self.manual_scale = self.manual_scale.to(device=device, dtype=dtype)
+
+    def calc_size(self) -> int:
+        return super().calc_size() + calc_tensor_size(self.manual_scale)
--- a/invokeai/backend/lora/layers/set_parameter_layer.py
+++ b/invokeai/backend/lora/layers/set_parameter_layer.py
@@ -0,0 +1,29 @@
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.lora.layers.lora_layer_base import LoRALayerBase
+from invokeai.backend.util.calc_tensor_size import calc_tensor_size
+
+
+class SetParameterLayer(LoRALayerBase):
+    def __init__(self, param_name: str, weight: torch.Tensor):
+        super().__init__(None, None)
+        self.weight = weight
+        self.param_name = param_name
+
+    def rank(self) -> int | None:
+        return None
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        return self.weight - orig_weight
+    
+    def get_parameters(self, orig_module: torch.nn.Module) -> Dict[str, torch.Tensor]:
+        return {self.param_name: self.get_weight(orig_module.get_parameter(self.param_name))}
+
+    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
+        super().to(device=device, dtype=dtype)
+        self.weight = self.weight.to(device=device, dtype=dtype)
+
+    def calc_size(self) -> int:
+        return super().calc_size() + calc_tensor_size(self.weight)
--- a/invokeai/backend/lora/layers/utils.py
+++ b/invokeai/backend/lora/layers/utils.py
@@ -9,6 +9,7 @@ from invokeai.backend.lora.layers.loha_layer import LoHALayer
 from invokeai.backend.lora.layers.lokr_layer import LoKRLayer
 from invokeai.backend.lora.layers.lora_layer import LoRALayer
 from invokeai.backend.lora.layers.norm_layer import NormLayer
+from invokeai.backend.lora.layers.set_parameter_layer import SetParameterLayer


 def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> AnyLoRALayer:
--- a/invokeai/backend/lora/lora_layer_wrappers.py
+++ b/invokeai/backend/lora/lora_layer_wrappers.py
@@ -0,0 +1,133 @@
+import torch
+
+from invokeai.backend.lora.layers.any_lora_layer import AnyLoRALayer
+from invokeai.backend.lora.layers.concatenated_lora_layer import ConcatenatedLoRALayer
+from invokeai.backend.lora.layers.lora_layer import LoRALayer
+
+
+class LoRASidecarWrapper(torch.nn.Module):
+    def __init__(self, orig_module: torch.nn.Module, lora_layers: list[AnyLoRALayer], lora_weights: list[float]):
+        super().__init__()
+        self._orig_module = orig_module
+        self._lora_layers = lora_layers
+        self._lora_weights = lora_weights
+
+    @property
+    def orig_module(self) -> torch.nn.Module:
+        return self._orig_module
+
+    def add_lora_layer(self, lora_layer: AnyLoRALayer, lora_weight: float):
+        self._lora_layers.append(lora_layer)
+        self._lora_weights.append(lora_weight)
+
+    @torch.no_grad()
+    def _get_lora_patched_parameters(
+        self, orig_params: dict[str, torch.Tensor], lora_layers: list[AnyLoRALayer], lora_weights: list[float]
+    ) -> dict[str, torch.Tensor]:
+        params: dict[str, torch.Tensor] = {}
+        for lora_layer, lora_weight in zip(lora_layers, lora_weights, strict=True):
+            layer_params = lora_layer.get_parameters(self._orig_module)
+            for param_name, param_weight in layer_params.items():
+                if orig_params[param_name].shape != param_weight.shape:
+                    param_weight = param_weight.reshape(orig_params[param_name].shape)
+
+                if param_name not in params:
+                    params[param_name] = param_weight * (lora_layer.scale() * lora_weight)
+                else:
+                    params[param_name] += param_weight * (lora_layer.scale() * lora_weight)
+
+        return params
+
+
+class LoRALinearWrapper(LoRASidecarWrapper):
+    def _lora_linear_forward(self, input: torch.Tensor, lora_layer: LoRALayer, lora_weight: float) -> torch.Tensor:
+        """An optimized implementation of the residual calculation for a Linear LoRALayer."""
+        x = torch.nn.functional.linear(input, lora_layer.down)
+        if lora_layer.mid is not None:
+            x = torch.nn.functional.linear(x, lora_layer.mid)
+        x = torch.nn.functional.linear(x, lora_layer.up, bias=lora_layer.bias)
+        x *= lora_weight * lora_layer.scale()
+        return x
+
+    def _concatenated_lora_forward(
+        self, input: torch.Tensor, concatenated_lora_layer: ConcatenatedLoRALayer, lora_weight: float
+    ) -> torch.Tensor:
+        """An optimized implementation of the residual calculation for a Linear ConcatenatedLoRALayer."""
+        x_chunks: list[torch.Tensor] = []
+        for lora_layer in concatenated_lora_layer.lora_layers:
+            x_chunk = torch.nn.functional.linear(input, lora_layer.down)
+            if lora_layer.mid is not None:
+                x_chunk = torch.nn.functional.linear(x_chunk, lora_layer.mid)
+            x_chunk = torch.nn.functional.linear(x_chunk, lora_layer.up, bias=lora_layer.bias)
+            x_chunk *= lora_weight * lora_layer.scale()
+            x_chunks.append(x_chunk)
+
+        # TODO(ryand): Generalize to support concat_axis != 0.
+        assert concatenated_lora_layer.concat_axis == 0
+        x = torch.cat(x_chunks, dim=-1)
+        return x
+
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        # Split the LoRA layers into those that have optimized implementations and those that don't.
+        optimized_layer_types = (LoRALayer, ConcatenatedLoRALayer)
+        optimized_layers = [
+            (layer, weight)
+            for layer, weight in zip(self._lora_layers, self._lora_weights, strict=True)
+            if isinstance(layer, optimized_layer_types)
+        ]
+        non_optimized_layers = [
+            (layer, weight)
+            for layer, weight in zip(self._lora_layers, self._lora_weights, strict=True)
+            if not isinstance(layer, optimized_layer_types)
+        ]
+
+        # First, calculate the residual for LoRA layers for which there is an optimized implementation.
+        residual = None
+        for lora_layer, lora_weight in optimized_layers:
+            if isinstance(lora_layer, LoRALayer):
+                added_residual = self._lora_linear_forward(input, lora_layer, lora_weight)
+            elif isinstance(lora_layer, ConcatenatedLoRALayer):
+                added_residual = self._concatenated_lora_forward(input, lora_layer, lora_weight)
+            else:
+                raise ValueError(f"Unsupported LoRA layer type: {type(lora_layer)}")
+
+            if residual is None:
+                residual = added_residual
+            else:
+                residual += added_residual
+
+        # Next, calculate the residuals for the LoRA layers for which there is no optimized implementation.
+        if non_optimized_layers:
+            unoptimized_layers, unoptimized_weights = zip(*non_optimized_layers, strict=True)
+            params = self._get_lora_patched_parameters(
+                orig_params={"weight": self._orig_module.weight, "bias": self._orig_module.bias},
+                lora_layers=unoptimized_layers,
+                lora_weights=unoptimized_weights,
+            )
+            added_residual = torch.nn.functional.linear(input, params["weight"], params.get("bias", None))
+            if residual is None:
+                residual = added_residual
+            else:
+                residual += added_residual
+
+        return self.orig_module(input) + residual
+
+
+class LoRAConv1dWrapper(LoRASidecarWrapper):
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        params = self._get_lora_patched_parameters(
+            orig_params={"weight": self._orig_module.weight, "bias": self._orig_module.bias},
+            lora_layers=self._lora_layers,
+            lora_weights=self._lora_weights,
+        )
+        return self.orig_module(input) + torch.nn.functional.conv1d(input, params["weight"], params.get("bias", None))
+
+
+class LoRAConv2dWrapper(LoRASidecarWrapper):
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        params = self._get_lora_patched_parameters(
+            orig_params={"weight": self._orig_module.weight, "bias": self._orig_module.bias},
+            lora_layers=self._lora_layers,
+            lora_weights=self._lora_weights,
+        )
+        return self.orig_module(input) + torch.nn.functional.conv2d(input, params["weight"], params.get("bias", None))
--- a/invokeai/backend/lora/lora_patcher.py
+++ b/invokeai/backend/lora/lora_patcher.py
@@ -4,19 +4,126 @@ from typing import Dict, Iterable, Optional, Tuple
 import torch

 from invokeai.backend.lora.layers.any_lora_layer import AnyLoRALayer
-from invokeai.backend.lora.layers.concatenated_lora_layer import ConcatenatedLoRALayer
-from invokeai.backend.lora.layers.lora_layer import LoRALayer
-from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
-from invokeai.backend.lora.sidecar_layers.concatenated_lora.concatenated_lora_linear_sidecar_layer import (
-    ConcatenatedLoRALinearSidecarLayer,
+from invokeai.backend.lora.lora_layer_wrappers import (
+    LoRAConv1dWrapper,
+    LoRAConv2dWrapper,
+    LoRALinearWrapper,
+    LoRASidecarWrapper,
 )
-from invokeai.backend.lora.sidecar_layers.lora.lora_linear_sidecar_layer import LoRALinearSidecarLayer
-from invokeai.backend.lora.sidecar_layers.lora_sidecar_module import LoRASidecarModule
+from invokeai.backend.lora.lora_model_raw import LoRAModelRaw
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.original_weights_storage import OriginalWeightsStorage


 class LoRAPatcher:
+    @staticmethod
+    @torch.no_grad()
+    @contextmanager
+    def apply_smart_lora_patches(
+        model: torch.nn.Module,
+        patches: Iterable[Tuple[LoRAModelRaw, float]],
+        prefix: str,
+        dtype: torch.dtype,
+        cached_weights: Optional[Dict[str, torch.Tensor]] = None,
+    ):
+        """Apply 'smart' LoRA patching that chooses whether to use direct patching or a sidecar wrapper for each module."""
+
+        # original_weights are stored for unpatching layers that are directly patched.
+        original_weights = OriginalWeightsStorage(cached_weights)
+        # original_modules are stored for unpatching layers that are wrapped in a LoRASidecarWrapper.
+        original_modules: dict[str, torch.nn.Module] = {}
+        try:
+            for patch, patch_weight in patches:
+                LoRAPatcher._apply_smart_lora_patch(
+                    model=model,
+                    prefix=prefix,
+                    patch=patch,
+                    patch_weight=patch_weight,
+                    original_weights=original_weights,
+                    original_modules=original_modules,
+                    dtype=dtype,
+                )
+
+            yield
+        finally:
+            # Restore directly patched layers.
+            for param_key, weight in original_weights.get_changed_weights():
+                model.get_parameter(param_key).copy_(weight)
+
+            # Restore LoRASidecarWrapper modules.
+            # Note: This logic assumes no nested modules in original_modules.
+            for module_key, orig_module in original_modules.items():
+                module_parent_key, module_name = LoRAPatcher._split_parent_key(module_key)
+                parent_module = model.get_submodule(module_parent_key)
+                LoRAPatcher._set_submodule(parent_module, module_name, orig_module)
+
+    @staticmethod
+    @torch.no_grad()
+    def _apply_smart_lora_patch(
+        model: torch.nn.Module,
+        prefix: str,
+        patch: LoRAModelRaw,
+        patch_weight: float,
+        original_weights: OriginalWeightsStorage,
+        original_modules: dict[str, torch.nn.Module],
+        dtype: torch.dtype,
+    ):
+        """Apply a single LoRA patch to a model using the 'smart' patching strategy that chooses whether to use direct
+        patching or a sidecar wrapper for each module.
+        """
+        if patch_weight == 0:
+            return
+
+        # If the layer keys contain a dot, then they are not flattened, and can be directly used to access model
+        # submodules. If the layer keys do not contain a dot, then they are flattened, meaning that all '.' have been
+        # replaced with '_'. Non-flattened keys are preferred, because they allow submodules to be accessed directly
+        # without searching, but some legacy code still uses flattened keys.
+        layer_keys_are_flattened = "." not in next(iter(patch.layers.keys()))
+
+        prefix_len = len(prefix)
+
+        for layer_key, layer in patch.layers.items():
+            if not layer_key.startswith(prefix):
+                continue
+
+            module_key, module = LoRAPatcher._get_submodule(
+                model, layer_key[prefix_len:], layer_key_is_flattened=layer_keys_are_flattened
+            )
+
+            # Decide whether to use direct patching or a sidecar wrapper.
+            # Direct patching is preferred, because it results in better runtime speed.
+            # Reasons to use sidecar patching:
+            # - The module is already wrapped in a LoRASidecarWrapper.
+            # - The module is quantized.
+            # - The module is on the CPU (and we don't want to store a second full copy of the original weights on the
+            #   CPU, since this would double the RAM usage)
+            # NOTE: For now, we don't check if the layer is quantized here. We assume that this is checked in the caller
+            # and that the caller will use the 'apply_lora_wrapper_patches' method if the layer is quantized.
+            # TODO(ryand): Handle the case where we are running without a GPU. Should we set a config flag that allows
+            # forcing full patching even on the CPU?
+            if isinstance(module, LoRASidecarWrapper) or LoRAPatcher._is_any_part_of_layer_on_cpu(module):
+                LoRAPatcher._apply_lora_layer_wrapper_patch(
+                    model=model,
+                    module_to_patch=module,
+                    module_to_patch_key=module_key,
+                    patch=layer,
+                    patch_weight=patch_weight,
+                    original_modules=original_modules,
+                    dtype=dtype,
+                )
+            else:
+                LoRAPatcher._apply_lora_layer_patch(
+                    module_to_patch=module,
+                    module_to_patch_key=module_key,
+                    patch=layer,
+                    patch_weight=patch_weight,
+                    original_weights=original_weights,
+                )
+
+    @staticmethod
+    def _is_any_part_of_layer_on_cpu(layer: torch.nn.Module) -> bool:
+        return any(p.device.type == "cpu" for p in layer.parameters())
+
    @staticmethod
    @torch.no_grad()
    @contextmanager
@@ -40,7 +147,7 @@ class LoRAPatcher:
        original_weights = OriginalWeightsStorage(cached_weights)
        try:
            for patch, patch_weight in patches:
-                LoRAPatcher.apply_lora_patch(
+                LoRAPatcher._apply_lora_patch(
                    model=model,
                    prefix=prefix,
                    patch=patch,
@@ -52,11 +159,12 @@ class LoRAPatcher:
            yield
        finally:
            for param_key, weight in original_weights.get_changed_weights():
-                model.get_parameter(param_key).copy_(weight)
+                cur_param = model.get_parameter(param_key)
+                cur_param.data = weight.to(dtype=cur_param.dtype, device=cur_param.device, copy=True)

    @staticmethod
    @torch.no_grad()
-    def apply_lora_patch(
+    def _apply_lora_patch(
        model: torch.nn.Module,
        prefix: str,
        patch: LoRAModelRaw,
@@ -91,48 +199,84 @@ class LoRAPatcher:
                model, layer_key[prefix_len:], layer_key_is_flattened=layer_keys_are_flattened
            )

-            # All of the LoRA weight calculations will be done on the same device as the module weight.
-            # (Performance will be best if this is a CUDA device.)
-            device = module.weight.device
-            dtype = module.weight.dtype
+            LoRAPatcher._apply_lora_layer_patch(
+                module_to_patch=module,
+                module_to_patch_key=module_key,
+                patch=layer,
+                patch_weight=patch_weight,
+                original_weights=original_weights,
+            )

-            layer_scale = layer.scale()
+    @staticmethod
+    @torch.no_grad()
+    def _apply_lora_layer_patch(
+        module_to_patch: torch.nn.Module,
+        module_to_patch_key: str,
+        patch: AnyLoRALayer,
+        patch_weight: float,
+        original_weights: OriginalWeightsStorage,
+    ):
+        # All of the LoRA weight calculations will be done on the same device as the module weight.
+        # (Performance will be best if this is a CUDA device.)
+        first_param = next(module_to_patch.parameters())
+        device = first_param.device
+        dtype = first_param.dtype

-            # We intentionally move to the target device first, then cast. Experimentally, this was found to
-            # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
-            # same thing in a single call to '.to(...)'.
-            layer.to(device=device)
-            layer.to(dtype=torch.float32)
+        layer_scale = patch.scale()

-            # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
-            # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
-            for param_name, lora_param_weight in layer.get_parameters(module).items():
-                param_key = module_key + "." + param_name
-                module_param = module.get_parameter(param_name)
+        # We intentionally move to the target device first, then cast. Experimentally, this was found to
+        # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
+        # same thing in a single call to '.to(...)'.
+        patch.to(device=device)
+        patch.to(dtype=torch.float32)

-                # Save original weight
-                original_weights.save(param_key, module_param)
+        # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
+        # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
+        for param_name, lora_param_weight in patch.get_parameters(module_to_patch).items():
+            param_key = module_to_patch_key + "." + param_name
+            module_param = module_to_patch.get_parameter(param_name)

-                if module_param.shape != lora_param_weight.shape:
+            # Save original weight
+            original_weights.save(param_key, module_param)
+
+            if module_param.shape != lora_param_weight.shape:
+                if module_param.nelement() == lora_param_weight.nelement():
                    lora_param_weight = lora_param_weight.reshape(module_param.shape)
+                else:
+                    # This condition was added to handle layers in FLUX control LoRAs.
+                    # TODO(ryand): Move the weight update into the LoRA layer so that the LoRAPatcher doesn't need
+                    # to worry about this?
+                    expanded_weight = torch.zeros_like(
+                        lora_param_weight, dtype=module_param.dtype, device=module_param.device
+                    )
+                    slices = tuple(slice(0, dim) for dim in module_param.shape)
+                    expanded_weight[slices] = module_param
+                    setattr(
+                        module,
+                        param_name,
+                        torch.nn.Parameter(expanded_weight, requires_grad=module_param.requires_grad),
+                    )
+                    module_param = expanded_weight

-                lora_param_weight *= patch_weight * layer_scale
-                module_param += lora_param_weight.to(dtype=dtype)
+            lora_param_weight *= patch_weight * layer_scale
+            module_param += lora_param_weight.to(dtype=dtype)

-            layer.to(device=TorchDevice.CPU_DEVICE)
+        patch.to(device=TorchDevice.CPU_DEVICE)

    @staticmethod
    @torch.no_grad()
    @contextmanager
-    def apply_lora_sidecar_patches(
+    def apply_lora_wrapper_patches(
        model: torch.nn.Module,
        patches: Iterable[Tuple[LoRAModelRaw, float]],
        prefix: str,
        dtype: torch.dtype,
    ):
-        """Apply one or more LoRA sidecar patches to a model within a context manager. Sidecar patches incur some
-        overhead compared to normal LoRA patching, but they allow for LoRA layers to applied to base layers in any
-        quantization format.
+        """Apply one or more LoRA wrapper patches to a model within a context manager. Wrapper patches incur some
+        runtime overhead compared to normal LoRA patching, but they enable:
+        - LoRA layers to be applied to quantized models
+        - LoRA layers to be applied to CPU layers without needing to store a full copy of the original weights (i.e.
+          avoid doubling the memory requirements).

        Args:
            model (torch.nn.Module): The model to patch.
@@ -140,14 +284,11 @@ class LoRAPatcher:
                associated weights. An iterator is used so that the LoRA patches do not need to be loaded into memory
                all at once.
            prefix (str): The keys in the patches will be filtered to only include weights with this prefix.
-            dtype (torch.dtype): The compute dtype of the sidecar layers. This cannot easily be inferred from the model,
-                since the sidecar layers are typically applied on top of quantized layers whose weight dtype is
-                different from their compute dtype.
        """
        original_modules: dict[str, torch.nn.Module] = {}
        try:
            for patch, patch_weight in patches:
-                LoRAPatcher._apply_lora_sidecar_patch(
+                LoRAPatcher._apply_lora_wrapper_patch(
                    model=model,
                    prefix=prefix,
                    patch=patch,
@@ -165,7 +306,7 @@ class LoRAPatcher:
                LoRAPatcher._set_submodule(parent_module, module_name, orig_module)

    @staticmethod
-    def _apply_lora_sidecar_patch(
+    def _apply_lora_wrapper_patch(
        model: torch.nn.Module,
        patch: LoRAModelRaw,
        patch_weight: float,
@@ -173,7 +314,7 @@ class LoRAPatcher:
        original_modules: dict[str, torch.nn.Module],
        dtype: torch.dtype,
    ):
-        """Apply a single LoRA sidecar patch to a model."""
+        """Apply a single LoRA wrapper patch to a model."""

        if patch_weight == 0:
            return
@@ -194,28 +335,47 @@ class LoRAPatcher:
                model, layer_key[prefix_len:], layer_key_is_flattened=layer_keys_are_flattened
            )

-            # Initialize the LoRA sidecar layer.
-            lora_sidecar_layer = LoRAPatcher._initialize_lora_sidecar_layer(module, layer, patch_weight)
+            LoRAPatcher._apply_lora_layer_wrapper_patch(
+                model=model,
+                module_to_patch=module,
+                module_to_patch_key=module_key,
+                patch=layer,
+                patch_weight=patch_weight,
+                original_modules=original_modules,
+                dtype=dtype,
+            )

-            # Replace the original module with a LoRASidecarModule if it has not already been done.
-            if module_key in original_modules:
-                # The module has already been patched with a LoRASidecarModule. Append to it.
-                assert isinstance(module, LoRASidecarModule)
-                lora_sidecar_module = module
-            else:
-                # The module has not yet been patched with a LoRASidecarModule. Create one.
-                lora_sidecar_module = LoRASidecarModule(module, [])
-                original_modules[module_key] = module
-                module_parent_key, module_name = LoRAPatcher._split_parent_key(module_key)
-                module_parent = model.get_submodule(module_parent_key)
-                LoRAPatcher._set_submodule(module_parent, module_name, lora_sidecar_module)
+    @staticmethod
+    @torch.no_grad()
+    def _apply_lora_layer_wrapper_patch(
+        model: torch.nn.Module,
+        module_to_patch: torch.nn.Module,
+        module_to_patch_key: str,
+        patch: AnyLoRALayer,
+        patch_weight: float,
+        original_modules: dict[str, torch.nn.Module],
+        dtype: torch.dtype,
+    ):
+        """Apply a single LoRA wrapper patch to a model."""

-            # Move the LoRA sidecar layer to the same device/dtype as the orig module.
-            # TODO(ryand): Experiment with moving to the device first, then casting. This could be faster.
-            lora_sidecar_layer.to(device=lora_sidecar_module.orig_module.weight.device, dtype=dtype)
+        # Replace the original module with a LoRASidecarWrapper if it has not already been done.
+        if not isinstance(module_to_patch, LoRASidecarWrapper):
+            lora_wrapper_layer = LoRAPatcher._initialize_lora_wrapper_layer(module_to_patch)
+            original_modules[module_to_patch_key] = module_to_patch
+            module_parent_key, module_name = LoRAPatcher._split_parent_key(module_to_patch_key)
+            module_parent = model.get_submodule(module_parent_key)
+            LoRAPatcher._set_submodule(module_parent, module_name, lora_wrapper_layer)
+            orig_module = module_to_patch
+        else:
+            assert module_to_patch_key in original_modules
+            lora_wrapper_layer = module_to_patch
+            orig_module = module_to_patch.orig_module

-            # Add the LoRA sidecar layer to the LoRASidecarModule.
-            lora_sidecar_module.add_lora_layer(lora_sidecar_layer)
+        # Move the LoRA layer to the same device/dtype as the orig module.
+        patch.to(device=orig_module.weight.device, dtype=dtype)
+
+        # Add the LoRA wrapper layer to the LoRASidecarWrapper.
+        lora_wrapper_layer.add_lora_layer(patch, patch_weight)

    @staticmethod
    def _split_parent_key(module_key: str) -> tuple[str, str]:
@@ -236,17 +396,13 @@ class LoRAPatcher:
            raise ValueError(f"Invalid module key: {module_key}")

    @staticmethod
-    def _initialize_lora_sidecar_layer(orig_layer: torch.nn.Module, lora_layer: AnyLoRALayer, patch_weight: float):
-        # TODO(ryand): Add support for more original layer types and LoRA layer types.
-        if isinstance(orig_layer, torch.nn.Linear) or (
-            isinstance(orig_layer, LoRASidecarModule) and isinstance(orig_layer.orig_module, torch.nn.Linear)
-        ):
-            if isinstance(lora_layer, LoRALayer):
-                return LoRALinearSidecarLayer(lora_layer=lora_layer, weight=patch_weight)
-            elif isinstance(lora_layer, ConcatenatedLoRALayer):
-                return ConcatenatedLoRALinearSidecarLayer(concatenated_lora_layer=lora_layer, weight=patch_weight)
-            else:
-                raise ValueError(f"Unsupported Linear LoRA layer type: {type(lora_layer)}")
+    def _initialize_lora_wrapper_layer(orig_layer: torch.nn.Module):
+        if isinstance(orig_layer, torch.nn.Linear):
+            return LoRALinearWrapper(orig_layer, [], [])
+        elif isinstance(orig_layer, torch.nn.Conv1d):
+            return LoRAConv1dWrapper(orig_layer, [], [])
+        elif isinstance(orig_layer, torch.nn.Conv2d):
+            return LoRAConv2dWrapper(orig_layer, [], [])
        else:
            raise ValueError(f"Unsupported layer type: {type(orig_layer)}")

--- a/invokeai/backend/lora/sidecar_layers/init.py
+++ b/invokeai/backend/lora/sidecar_layers/init.py
--- a/invokeai/backend/lora/sidecar_layers/concatenated_lora/init.py
+++ b/invokeai/backend/lora/sidecar_layers/concatenated_lora/init.py
--- a/invokeai/backend/lora/sidecar_layers/concatenated_lora/concatenated_lora_linear_sidecar_layer.py
+++ b/invokeai/backend/lora/sidecar_layers/concatenated_lora/concatenated_lora_linear_sidecar_layer.py
@@ -1,34 +0,0 @@
-import torch
-
-from invokeai.backend.lora.layers.concatenated_lora_layer import ConcatenatedLoRALayer
-
-
-class ConcatenatedLoRALinearSidecarLayer(torch.nn.Module):
-    def __init__(
-        self,
-        concatenated_lora_layer: ConcatenatedLoRALayer,
-        weight: float,
-    ):
-        super().__init__()
-
-        self._concatenated_lora_layer = concatenated_lora_layer
-        self._weight = weight
-
-    def forward(self, input: torch.Tensor) -> torch.Tensor:
-        x_chunks: list[torch.Tensor] = []
-        for lora_layer in self._concatenated_lora_layer.lora_layers:
-            x_chunk = torch.nn.functional.linear(input, lora_layer.down)
-            if lora_layer.mid is not None:
-                x_chunk = torch.nn.functional.linear(x_chunk, lora_layer.mid)
-            x_chunk = torch.nn.functional.linear(x_chunk, lora_layer.up, bias=lora_layer.bias)
-            x_chunk *= self._weight * lora_layer.scale()
-            x_chunks.append(x_chunk)
-
-        # TODO(ryand): Generalize to support concat_axis != 0.
-        assert self._concatenated_lora_layer.concat_axis == 0
-        x = torch.cat(x_chunks, dim=-1)
-        return x
-
-    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
-        self._concatenated_lora_layer.to(device=device, dtype=dtype)
-        return self
--- a/invokeai/backend/lora/sidecar_layers/lora/init.py
+++ b/invokeai/backend/lora/sidecar_layers/lora/init.py
--- a/invokeai/backend/lora/sidecar_layers/lora/lora_linear_sidecar_layer.py
+++ b/invokeai/backend/lora/sidecar_layers/lora/lora_linear_sidecar_layer.py
@@ -1,27 +0,0 @@
-import torch
-
-from invokeai.backend.lora.layers.lora_layer import LoRALayer
-
-
-class LoRALinearSidecarLayer(torch.nn.Module):
-    def __init__(
-        self,
-        lora_layer: LoRALayer,
-        weight: float,
-    ):
-        super().__init__()
-
-        self._lora_layer = lora_layer
-        self._weight = weight
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        x = torch.nn.functional.linear(x, self._lora_layer.down)
-        if self._lora_layer.mid is not None:
-            x = torch.nn.functional.linear(x, self._lora_layer.mid)
-        x = torch.nn.functional.linear(x, self._lora_layer.up, bias=self._lora_layer.bias)
-        x *= self._weight * self._lora_layer.scale()
-        return x
-
-    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
-        self._lora_layer.to(device=device, dtype=dtype)
-        return self
--- a/invokeai/backend/lora/sidecar_layers/lora_sidecar_layer.py
+++ b/invokeai/backend/lora/sidecar_layers/lora_sidecar_layer.py
--- a/invokeai/backend/lora/sidecar_layers/lora_sidecar_module.py
+++ b/invokeai/backend/lora/sidecar_layers/lora_sidecar_module.py
@@ -1,24 +0,0 @@
-import torch
-
-
-class LoRASidecarModule(torch.nn.Module):
-    """A LoRA sidecar module that wraps an original module and adds LoRA layers to it."""
-
-    def __init__(self, orig_module: torch.nn.Module, lora_layers: list[torch.nn.Module]):
-        super().__init__()
-        self.orig_module = orig_module
-        self._lora_layers = lora_layers
-
-    def add_lora_layer(self, lora_layer: torch.nn.Module):
-        self._lora_layers.append(lora_layer)
-
-    def forward(self, input: torch.Tensor) -> torch.Tensor:
-        x = self.orig_module(input)
-        for lora_layer in self._lora_layers:
-            x += lora_layer(input)
-        return x
-
-    def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None):
-        self._orig_module.to(device=device, dtype=dtype)
-        for lora_layer in self._lora_layers:
-            lora_layer.to(device=device, dtype=dtype)
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@@ -67,6 +67,7 @@ class ModelType(str, Enum):
    Main = "main"
    VAE = "vae"
    LoRA = "lora"
+    StructuralLoRa = "structural_lora"
    ControlNet = "controlnet"  # used by model_probe
    TextualInversion = "embedding"
    IPAdapter = "ip_adapter"
@@ -273,6 +274,18 @@ class LoRALyCORISConfig(LoRAConfigBase):
        return Tag(f"{ModelType.LoRA.value}.{ModelFormat.LyCORIS.value}")


+class StructuralLoRALyCORISConfig(ModelConfigBase):
+    """Model config for Structural LoRA/Lycoris models."""
+
+    type: Literal[ModelType.StructuralLoRa] = ModelType.StructuralLoRa
+    trigger_phrases: Optional[set[str]] = Field(description="Set of trigger phrases for this model", default=None)
+    format: Literal[ModelFormat.LyCORIS] = ModelFormat.LyCORIS
+
+    @staticmethod
+    def get_tag() -> Tag:
+        return Tag(f"{ModelType.StructuralLoRa.value}.{ModelFormat.LyCORIS.value}")
+
+
 class LoRADiffusersConfig(LoRAConfigBase):
    """Model config for LoRA/Diffusers models."""

@@ -535,6 +548,7 @@ AnyModelConfig = Annotated[
        Annotated[ControlNetDiffusersConfig, ControlNetDiffusersConfig.get_tag()],
        Annotated[ControlNetCheckpointConfig, ControlNetCheckpointConfig.get_tag()],
        Annotated[LoRALyCORISConfig, LoRALyCORISConfig.get_tag()],
+        Annotated[StructuralLoRALyCORISConfig, StructuralLoRALyCORISConfig.get_tag()],
        Annotated[LoRADiffusersConfig, LoRADiffusersConfig.get_tag()],
        Annotated[T5EncoderConfig, T5EncoderConfig.get_tag()],
        Annotated[T5EncoderBnbQuantizedLlmInt8bConfig, T5EncoderBnbQuantizedLlmInt8bConfig.get_tag()],
--- a/invokeai/backend/model_manager/load/model_loaders/controlnet.py
+++ b/invokeai/backend/model_manager/load/model_loaders/controlnet.py
@@ -1,12 +1,10 @@
 # Copyright (c) 2024, Lincoln D. Stein and the InvokeAI Development Team
 """Class for ControlNet model loading in InvokeAI."""

-from pathlib import Path
 from typing import Optional

 from diffusers import ControlNetModel

-import invokeai.backend.assets.model_base_conf_files as conf_file_cache
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
@@ -48,20 +46,9 @@ class ControlNetLoader(GenericDiffusersLoader):
        config: AnyModelConfig,
        submodel_type: Optional[SubModelType] = None,
    ) -> AnyModel:
-        config_dirs = {
-            BaseModelType.StableDiffusion1: "controlnet_sd15",
-            BaseModelType.StableDiffusionXL: "controlnet_sdxl",
-        }
-        try:
-            config_dir = config_dirs[config.base]
-        except KeyError:
-            raise Exception(f"No configuration template known for controlnet model with base={config.base}")
-
        if isinstance(config, ControlNetCheckpointConfig):
            return ControlNetModel.from_single_file(
                config.path,
-                config=Path(conf_file_cache.__path__[0], config_dir).as_posix(),
-                local_files_only=True,
                torch_dtype=self._torch_dtype,
            )
        else:
--- a/invokeai/backend/model_manager/load/model_loaders/lora.py
+++ b/invokeai/backend/model_manager/load/model_loaders/lora.py
@@ -13,8 +13,9 @@ from invokeai.backend.lora.conversions.flux_diffusers_lora_conversion_utils impo
    lora_model_from_flux_diffusers_state_dict,
 )
 from invokeai.backend.lora.conversions.flux_kohya_lora_conversion_utils import (
-    lora_model_from_flux_kohya_state_dict,
+    is_state_dict_likely_in_flux_kohya_format, lora_model_from_flux_kohya_state_dict,
 )
+from invokeai.backend.lora.conversions.flux_control_lora_utils import is_state_dict_likely_flux_control, lora_model_from_flux_control_state_dict
 from invokeai.backend.lora.conversions.sd_lora_conversion_utils import lora_model_from_sd_state_dict
 from invokeai.backend.lora.conversions.sdxl_lora_conversion_utils import convert_sdxl_keys_to_diffusers_format
 from invokeai.backend.model_manager import (
@@ -32,6 +33,7 @@ from invokeai.backend.model_manager.load.model_loader_registry import ModelLoade

@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.LoRA, format=ModelFormat.Diffusers)
@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.LoRA, format=ModelFormat.LyCORIS)
+@ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.StructuralLoRa, format=ModelFormat.LyCORIS)
 class LoRALoader(ModelLoader):
    """Class to load LoRA models."""

@@ -75,7 +77,10 @@ class LoRALoader(ModelLoader):
                # https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth_lora_flux.py#L1194
                model = lora_model_from_flux_diffusers_state_dict(state_dict=state_dict, alpha=None)
            elif config.format == ModelFormat.LyCORIS:
-                model = lora_model_from_flux_kohya_state_dict(state_dict=state_dict)
+                if is_state_dict_likely_in_flux_kohya_format(state_dict=state_dict):
+                    model = lora_model_from_flux_kohya_state_dict(state_dict=state_dict)
+                elif is_state_dict_likely_flux_control(state_dict=state_dict):
+                    model = lora_model_from_flux_control_state_dict(state_dict=state_dict)
            else:
                raise ValueError(f"LoRA model is in unsupported FLUX format: {config.format}")
        elif self._model_base in [BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2]:
--- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
+++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
@@ -11,7 +11,6 @@ from diffusers import (
    StableDiffusionXLPipeline,
 )

-import invokeai.backend.assets.model_base_conf_files as conf_file_cache
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
@@ -19,7 +18,6 @@ from invokeai.backend.model_manager import (
    ModelFormat,
    ModelType,
    ModelVariantType,
-    SchedulerPredictionType,
    SubModelType,
 )
 from invokeai.backend.model_manager.config import (
@@ -108,33 +106,11 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
                ModelVariantType.Normal: StableDiffusionXLPipeline,
            },
        }
-        config_dirs = {
-            BaseModelType.StableDiffusion1: {
-                SchedulerPredictionType.Epsilon: "stable-diffusion-1.5-epsilon",
-                SchedulerPredictionType.VPrediction: "stable-diffusion-1.5-v_prediction",
-            },
-            BaseModelType.StableDiffusion2: {
-                SchedulerPredictionType.VPrediction: "stable-diffusion-2.0-v_prediction",
-            },
-            BaseModelType.StableDiffusionXL: {
-                SchedulerPredictionType.Epsilon: "stable-diffusion-xl-base-1.0",
-            },
-            BaseModelType.StableDiffusionXLRefiner: {
-                SchedulerPredictionType.Epsilon: "stable-diffusion-xl-refiner-1.0",
-            },
-        }
-
        assert isinstance(config, MainCheckpointConfig)
        try:
            load_class = load_classes[config.base][config.variant]
        except KeyError as e:
            raise Exception(f"No diffusers pipeline known for base={config.base}, variant={config.variant}") from e
-        try:
-            config_dir = config_dirs[config.base][config.prediction_type]
-        except KeyError as e:
-            raise Exception(
-                f"No configuration template known for base={config.base}, prediction_type={config.prediction_type}"
-            ) from e

        # Without SilenceWarnings we get log messages like this:
        # site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
@@ -144,17 +120,8 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
        # Some weights of the model checkpoint were not used when initializing CLIPTextModelWithProjection:
        # ['text_model.embeddings.position_ids']

-        original_config_file = self._app_config.legacy_conf_path / config.config_path
-
        with SilenceWarnings():
-            pipeline = load_class.from_single_file(
-                config.path,
-                config=Path(conf_file_cache.__path__[0], config_dir).as_posix(),
-                original_config=original_config_file,
-                torch_dtype=self._torch_dtype,
-                local_files_only=True,
-                kwargs={"load_safety_checker": False},
-            )
+            pipeline = load_class.from_single_file(config.path, torch_dtype=self._torch_dtype)

        if not submodel_type:
            return pipeline
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Ryan Dick	3ed6e65a6e	Enable LoRAPatcher.apply_smart_lora_patches(...) throughout the stack.	2024-12-12 22:41:50 +00:00
Ryan Dick	52c9646f84	(minor) Rename num_layers -> num_loras in unit tests.	2024-12-12 22:41:50 +00:00
Ryan Dick	7662f0522b	Add test_apply_smart_lora_patches_to_partially_loaded_model(...).	2024-12-12 22:41:50 +00:00
Ryan Dick	e50fe69839	Add LoRAPatcher.smart_apply_lora_patches()	2024-12-12 22:41:50 +00:00
Ryan Dick	5a9f884620	Refactor LoRAPatcher slightly in preparation for a 'smart' patcher.	2024-12-12 22:41:46 +00:00
Ryan Dick	edc72d1739	Fix LoRAPatcher.apply_lora_wrapper_patches(...)	2024-12-12 22:33:07 +00:00
Ryan Dick	23f521dc7c	Finish consolidating LoRA sidecar wrapper implementations.	2024-12-12 22:33:07 +00:00
Ryan Dick	3d6b93efdd	Begin to consolidate the LoRA sidecar and LoRA layer wrapper implementations.	2024-12-12 22:33:07 +00:00
Ryan Dick	3f28d3afad	Fix bias handling in LoRAModuleWrapper and add unit test that checks that all LoRA patching methods produce the same outputs.	2024-12-12 22:33:07 +00:00
Ryan Dick	9353bfbdd6	Add LoRA wrapper patching to LoRAPatcher.	2024-12-12 22:33:07 +00:00
Ryan Dick	93f2bc6118	Add LoRA wrapper layer.	2024-12-12 22:33:07 +00:00
Ryan Dick	9019026d6d	Fixes to get FLUX Control LoRA working.	2024-12-12 00:19:39 +00:00
Brandon Rising	c195b326ec	Lots of updates centered around using the lora patcher rather than changing the modules in the transformer model	2024-12-11 14:14:50 -05:00
Brandon Rising	2f460d2a45	Support bnb quantized nf4 flux models, Use controlnet vae, only support 1 structural lora per transformer. various other refractors and bugfixes	2024-12-10 03:26:29 -05:00
Brandon Rising	4473cba512	Initial setup for flux tools control loras	2024-12-09 16:01:29 -05:00
Eugene Brodsky	4c94d41fa9	(chore) ruff format	2024-12-04 17:02:08 +00:00
Eugene Brodsky	4036244ee9	(app) clarify log message when migrating old .cache	2024-12-04 17:02:08 +00:00
Eugene Brodsky	d06232d9ba	(config) ensure legacy model configs and node template are writable by the user even if the source files are read-only	2024-12-04 17:02:08 +00:00
Eugene Brodsky	bacbdfb8fc	(docker) add comments in docker-entrypoint.sh and ensure variables are not null in bash expansion	2024-12-04 17:02:08 +00:00
Eugene Brodsky	59f42f4682	(pkg) reduce max supported python version as we have not yet tested 3.12 well enough	2024-12-04 17:02:08 +00:00
Eugene Brodsky	a636ac2899	(docker) use 'uv' to manage python installation and the invoke dependencies, since Ubuntu 24.04 comes with Python 3.12 which we do not yet support	2024-12-04 17:02:08 +00:00
Richard Lyons	bd478360d9	Upgrade docker build to ubuntu 24	2024-12-04 17:02:08 +00:00
Richard Lyons	ac0db07649	Fix docker deployment	2024-12-04 17:02:08 +00:00
psychedelicious	b7132ce9e7	fix(ui): capitalization for vietnamese language	2024-12-03 14:52:28 -08:00
psychedelicious	90f30e7748	chore: bump version to v5.4.3	2024-12-03 14:50:09 -08:00
Riccardo Giovanetti	6b86a66bc7	translationBot(ui): update translation (Italian) Currently translated at 99.3% (1633 of 1643 strings) Co-authored-by: Riccardo Giovanetti <riccardo.giovanetti@gmail.com> Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/it/ Translation: InvokeAI/Web UI	2024-12-03 13:16:12 -08:00
Linos	aa97e626e9	translationBot(ui): update translation (Vietnamese) Currently translated at 100.0% (1643 of 1643 strings) translationBot(ui): update translation (Vietnamese) Currently translated at 99.8% (1641 of 1643 strings) Co-authored-by: Linos <linos.coding@gmail.com> Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/vi/ Translation: InvokeAI/Web UI	2024-12-03 13:13:26 -08:00
Ryan Dick	c90736093f	Revert FLUX performance improvement that fails on MacOS (#7423 ) ## Summary https://github.com/invoke-ai/InvokeAI/issues/7422 As reported in the above ticket, a recent FLUX performance improvement caused a regression on MacOS. This PR reverts the offending part of the change. ## Related Issues / Discussions - Closes #7422 - Original perf improvement: https://github.com/invoke-ai/InvokeAI/pull/7399 ## QA Instructions I don't have a Mac capable of running this test, so trusting the report in #7422 that this fixes the problem. ## Checklist - [x] _The PR has a short but descriptive title, suitable for a changelog_ - [x] _Tests added / updated (if applicable)_ - [x] _Documentation added / updated (if applicable)_ - [ ] _Updated `What's New` copy (if doing a release after this PR)_	2024-12-03 10:58:00 -05:00
Ryan Dick	0bff4ace1b	Revert performance improvement, because it caused flux inference to fail on Mac: https://github.com/invoke-ai/InvokeAI/issues/7422	2024-12-03 15:18:58 +00:00
psychedelicious	5eb382074e	tweak(ui): slightly clearer logic for skipping regional guidance	2024-12-02 23:46:21 -05:00
psychedelicious	46aa930526	fix(ui): skip disabled ref images	2024-12-02 23:46:21 -05:00
psychedelicious	3305bad0c2	fix(app): queue item id check before setting cancel flag should use `!=` instead of `is not` The `is` operator compares references, not values. Thanks to a wonderfully unintuitive quirk of python, `is` works on integers from `-5` to `256`, inclusive. Whenever integers in this range are used for a value, internally python returns a reference to a stable object in memory. When integers outside this range are used as a value, python creates a new object in memory for that integer. See `PyLong_FromLong` documentation here: https://docs.python.org/3/c-api/long.html Tying this back to our session processor, we were using `is` to compare the queue item ids for equality. Our queue item ids start at 0, and each queue item created increments this by one. So this comparison works only for the first 256 queue items on the machine. Starting with the 257th queue item, the comparison starts returning `False`, and cancelation gets weird. Easy fix - use `!=` instead of `is not`.	2024-12-02 23:22:58 -05:00