WIP - not working

Tidy types in LoraModelPatcher.
Split LoraModelPatcher out from ModelPatcher monolith.
2026-01-16 16:57:58 -05:00 · 2024-04-08 10:06:11 -04:00 · 2024-04-05 16:05:31 -04:00 · 2024-04-05 15:57:46 -04:00 · 2024-04-05 15:30:01 -04:00 · 2024-04-05 15:20:07 -04:00
42 changed files with 902 additions and 834 deletions
--- a/invokeai/app/invocations/compel.py
+++ b/invokeai/app/invocations/compel.py
@@ -9,7 +9,8 @@ from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField
 from invokeai.app.invocations.primitives import ConditioningOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.ti_utils import generate_ti_list
-from invokeai.backend.lora import LoRAModelRaw
+from invokeai.backend.lora.lora_model import LoRAModelRaw
 from invokeai.backend.lora.lora_model_patcher import LoraModelPatcher
 from invokeai.backend.model_patcher import ModelPatcher
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
    BasicConditioningInfo,
@@ -80,7 +81,7 @@ class CompelInvocation(BaseInvocation):
            ),
            text_encoder_info as text_encoder,
            # Apply the LoRA after text_encoder has been moved to its target device for faster patching.
-            ModelPatcher.apply_lora_text_encoder(text_encoder, _lora_loader()),
+            LoraModelPatcher.apply_lora_text_encoder(text_encoder, _lora_loader()),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
            ModelPatcher.apply_clip_skip(text_encoder_model, self.clip.skipped_layers),
        ):
@@ -181,7 +182,7 @@ class SDXLPromptInvocationBase:
            ),
            text_encoder_info as text_encoder,
            # Apply the LoRA after text_encoder has been moved to its target device for faster patching.
-            ModelPatcher.apply_lora(text_encoder, _lora_loader(), lora_prefix),
+            LoraModelPatcher.apply_lora(text_encoder, _lora_loader(), lora_prefix),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
            ModelPatcher.apply_clip_skip(text_encoder_model, clip_field.skipped_layers),
        ):
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
@@ -48,7 +48,8 @@ from invokeai.app.invocations.t2i_adapter import T2IAdapterField
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.controlnet_utils import prepare_control_image
 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter, IPAdapterPlus
-from invokeai.backend.lora import LoRAModelRaw
+from invokeai.backend.lora.lora_model import LoRAModelRaw
 from invokeai.backend.lora.lora_model_patcher import LoraModelPatcher
 from invokeai.backend.model_manager import BaseModelType, LoadedModel
 from invokeai.backend.model_patcher import ModelPatcher
 from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
@@ -730,7 +731,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
                set_seamless(unet_info.model, self.unet.seamless_axes),  # FIXME
                unet_info as unet,
                # Apply the LoRA after unet has been moved to its target device for faster patching.
-                ModelPatcher.apply_lora_unet(unet, _lora_loader()),
+                LoraModelPatcher.apply_lora_unet(unet, _lora_loader()),
            ):
                assert isinstance(unet, UNet2DConditionModel)
                latents = latents.to(device=unet.device, dtype=unet.dtype)
--- a/invokeai/app/services/model_load/model_load_base.py
+++ b/invokeai/app/services/model_load/model_load_base.py
@@ -5,7 +5,8 @@ from abc import ABC, abstractmethod
 from typing import Optional
 from invokeai.app.services.shared.invocation_context import InvocationContextData
-from invokeai.backend.model_manager import AnyModel, AnyModelConfig, SubModelType
+from invokeai.backend.model_manager import AnyModelConfig, SubModelType
 from invokeai.backend.model_manager.any_model_type import AnyModel
 from invokeai.backend.model_manager.load import LoadedModel
 from invokeai.backend.model_manager.load.convert_cache import ModelConvertCacheBase
 from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase
--- a/invokeai/app/services/model_load/model_load_default.py
+++ b/invokeai/app/services/model_load/model_load_default.py
@@ -6,7 +6,8 @@ from typing import Optional, Type
 from invokeai.app.services.config import InvokeAIAppConfig
 from invokeai.app.services.invoker import Invoker
 from invokeai.app.services.shared.invocation_context import InvocationContextData
-from invokeai.backend.model_manager import AnyModel, AnyModelConfig, SubModelType
+from invokeai.backend.model_manager import AnyModelConfig, SubModelType
 from invokeai.backend.model_manager.any_model_type import AnyModel
 from invokeai.backend.model_manager.load import (
    LoadedModel,
    ModelLoaderRegistry,
--- a/invokeai/app/services/model_manager/init.py
+++ b/invokeai/app/services/model_manager/init.py
@@ -1,6 +1,6 @@
 """Initialization file for model manager service."""
-from invokeai.backend.model_manager import AnyModel, AnyModelConfig, BaseModelType, ModelType, SubModelType
+from invokeai.backend.model_manager import AnyModelConfig, BaseModelType, ModelType, SubModelType
 from invokeai.backend.model_manager.load import LoadedModel
 from .model_manager_default import ModelManagerService, ModelManagerServiceBase
@@ -8,7 +8,6 @@ from .model_manager_default import ModelManagerService, ModelManagerServiceBase
 __all__ = [
    "ModelManagerServiceBase",
    "ModelManagerService",
    "AnyModel",
    "AnyModelConfig",
    "BaseModelType",
    "ModelType",
--- a/invokeai/backend/ip_adapter/ip_adapter.py
+++ b/invokeai/backend/ip_adapter/ip_adapter.py
@@ -12,7 +12,6 @@ from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
 from invokeai.backend.ip_adapter.ip_attention_weights import IPAttentionWeights
 from ..raw_model import RawModel
 from .resampler import Resampler
@@ -102,7 +101,7 @@ class MLPProjModel(torch.nn.Module):
        return clip_extra_context_tokens
-class IPAdapter(RawModel):
+class IPAdapter(torch.nn.Module):
    """IP-Adapter: https://arxiv.org/pdf/2308.06721.pdf"""
    def __init__(
@@ -112,6 +111,7 @@ class IPAdapter(RawModel):
        dtype: torch.dtype = torch.float16,
        num_tokens: int = 4,
    ):
        super().__init__()
        self.device = device
        self.dtype = dtype
--- a/invokeai/backend/lora.py
+++ b/invokeai/backend/lora.py
@@ -1,624 +0,0 @@
 # Copyright (c) 2024 The InvokeAI Development team
 """LoRA model support."""
 import bisect
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple, Union
 import torch
 from safetensors.torch import load_file
 from typing_extensions import Self
 from invokeai.backend.model_manager import BaseModelType
 from .raw_model import RawModel
 class LoRALayerBase:
    # rank: Optional[int]
    # alpha: Optional[float]
    # bias: Optional[torch.Tensor]
    # layer_key: str
    # @property
    # def scale(self):
    #    return self.alpha / self.rank if (self.alpha and self.rank) else 1.0
    def __init__(
        self,
        layer_key: str,
        values: Dict[str, torch.Tensor],
    ):
        if "alpha" in values:
            self.alpha = values["alpha"].item()
        else:
            self.alpha = None
        if "bias_indices" in values and "bias_values" in values and "bias_size" in values:
            self.bias: Optional[torch.Tensor] = torch.sparse_coo_tensor(
                values["bias_indices"],
                values["bias_values"],
                tuple(values["bias_size"]),
            )
        else:
            self.bias = None
        self.rank = None  # set in layer implementation
        self.layer_key = layer_key
    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
        raise NotImplementedError()
    def calc_size(self) -> int:
        model_size = 0
        for val in [self.bias]:
            if val is not None:
                model_size += val.nelement() * val.element_size()
        return model_size
    def to(
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> None:
        if self.bias is not None:
            self.bias = self.bias.to(device=device, dtype=dtype)
 # TODO: find and debug lora/locon with bias
 class LoRALayer(LoRALayerBase):
    # up: torch.Tensor
    # mid: Optional[torch.Tensor]
    # down: torch.Tensor
    def __init__(
        self,
        layer_key: str,
        values: Dict[str, torch.Tensor],
    ):
        super().__init__(layer_key, values)
        self.up = values["lora_up.weight"]
        self.down = values["lora_down.weight"]
        if "lora_mid.weight" in values:
            self.mid: Optional[torch.Tensor] = values["lora_mid.weight"]
        else:
            self.mid = None
        self.rank = self.down.shape[0]
    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
        if self.mid is not None:
            up = self.up.reshape(self.up.shape[0], self.up.shape[1])
            down = self.down.reshape(self.down.shape[0], self.down.shape[1])
            weight = torch.einsum("m n w h, i m, n j -> i j w h", self.mid, up, down)
        else:
            weight = self.up.reshape(self.up.shape[0], -1) @ self.down.reshape(self.down.shape[0], -1)
        return weight
    def calc_size(self) -> int:
        model_size = super().calc_size()
        for val in [self.up, self.mid, self.down]:
            if val is not None:
                model_size += val.nelement() * val.element_size()
        return model_size
    def to(
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> None:
        super().to(device=device, dtype=dtype)
        self.up = self.up.to(device=device, dtype=dtype)
        self.down = self.down.to(device=device, dtype=dtype)
        if self.mid is not None:
            self.mid = self.mid.to(device=device, dtype=dtype)
 class LoHALayer(LoRALayerBase):
    # w1_a: torch.Tensor
    # w1_b: torch.Tensor
    # w2_a: torch.Tensor
    # w2_b: torch.Tensor
    # t1: Optional[torch.Tensor] = None
    # t2: Optional[torch.Tensor] = None
    def __init__(self, layer_key: str, values: Dict[str, torch.Tensor]):
        super().__init__(layer_key, values)
        self.w1_a = values["hada_w1_a"]
        self.w1_b = values["hada_w1_b"]
        self.w2_a = values["hada_w2_a"]
        self.w2_b = values["hada_w2_b"]
        if "hada_t1" in values:
            self.t1: Optional[torch.Tensor] = values["hada_t1"]
        else:
            self.t1 = None
        if "hada_t2" in values:
            self.t2: Optional[torch.Tensor] = values["hada_t2"]
        else:
            self.t2 = None
        self.rank = self.w1_b.shape[0]
    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
        if self.t1 is None:
            weight: torch.Tensor = (self.w1_a @ self.w1_b) * (self.w2_a @ self.w2_b)
        else:
            rebuild1 = torch.einsum("i j k l, j r, i p -> p r k l", self.t1, self.w1_b, self.w1_a)
            rebuild2 = torch.einsum("i j k l, j r, i p -> p r k l", self.t2, self.w2_b, self.w2_a)
            weight = rebuild1 * rebuild2
        return weight
    def calc_size(self) -> int:
        model_size = super().calc_size()
        for val in [self.w1_a, self.w1_b, self.w2_a, self.w2_b, self.t1, self.t2]:
            if val is not None:
                model_size += val.nelement() * val.element_size()
        return model_size
    def to(
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> None:
        super().to(device=device, dtype=dtype)
        self.w1_a = self.w1_a.to(device=device, dtype=dtype)
        self.w1_b = self.w1_b.to(device=device, dtype=dtype)
        if self.t1 is not None:
            self.t1 = self.t1.to(device=device, dtype=dtype)
        self.w2_a = self.w2_a.to(device=device, dtype=dtype)
        self.w2_b = self.w2_b.to(device=device, dtype=dtype)
        if self.t2 is not None:
            self.t2 = self.t2.to(device=device, dtype=dtype)
 class LoKRLayer(LoRALayerBase):
    # w1: Optional[torch.Tensor] = None
    # w1_a: Optional[torch.Tensor] = None
    # w1_b: Optional[torch.Tensor] = None
    # w2: Optional[torch.Tensor] = None
    # w2_a: Optional[torch.Tensor] = None
    # w2_b: Optional[torch.Tensor] = None
    # t2: Optional[torch.Tensor] = None
    def __init__(
        self,
        layer_key: str,
        values: Dict[str, torch.Tensor],
    ):
        super().__init__(layer_key, values)
        if "lokr_w1" in values:
            self.w1: Optional[torch.Tensor] = values["lokr_w1"]
            self.w1_a = None
            self.w1_b = None
        else:
            self.w1 = None
            self.w1_a = values["lokr_w1_a"]
            self.w1_b = values["lokr_w1_b"]
        if "lokr_w2" in values:
            self.w2: Optional[torch.Tensor] = values["lokr_w2"]
            self.w2_a = None
            self.w2_b = None
        else:
            self.w2 = None
            self.w2_a = values["lokr_w2_a"]
            self.w2_b = values["lokr_w2_b"]
        if "lokr_t2" in values:
            self.t2: Optional[torch.Tensor] = values["lokr_t2"]
        else:
            self.t2 = None
        if "lokr_w1_b" in values:
            self.rank = values["lokr_w1_b"].shape[0]
        elif "lokr_w2_b" in values:
            self.rank = values["lokr_w2_b"].shape[0]
        else:
            self.rank = None  # unscaled
    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
        w1: Optional[torch.Tensor] = self.w1
        if w1 is None:
            assert self.w1_a is not None
            assert self.w1_b is not None
            w1 = self.w1_a @ self.w1_b
        w2 = self.w2
        if w2 is None:
            if self.t2 is None:
                assert self.w2_a is not None
                assert self.w2_b is not None
                w2 = self.w2_a @ self.w2_b
            else:
                w2 = torch.einsum("i j k l, i p, j r -> p r k l", self.t2, self.w2_a, self.w2_b)
        if len(w2.shape) == 4:
            w1 = w1.unsqueeze(2).unsqueeze(2)
        w2 = w2.contiguous()
        assert w1 is not None
        assert w2 is not None
        weight = torch.kron(w1, w2)
        return weight
    def calc_size(self) -> int:
        model_size = super().calc_size()
        for val in [self.w1, self.w1_a, self.w1_b, self.w2, self.w2_a, self.w2_b, self.t2]:
            if val is not None:
                model_size += val.nelement() * val.element_size()
        return model_size
    def to(
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> None:
        super().to(device=device, dtype=dtype)
        if self.w1 is not None:
            self.w1 = self.w1.to(device=device, dtype=dtype)
        else:
            assert self.w1_a is not None
            assert self.w1_b is not None
            self.w1_a = self.w1_a.to(device=device, dtype=dtype)
            self.w1_b = self.w1_b.to(device=device, dtype=dtype)
        if self.w2 is not None:
            self.w2 = self.w2.to(device=device, dtype=dtype)
        else:
            assert self.w2_a is not None
            assert self.w2_b is not None
            self.w2_a = self.w2_a.to(device=device, dtype=dtype)
            self.w2_b = self.w2_b.to(device=device, dtype=dtype)
        if self.t2 is not None:
            self.t2 = self.t2.to(device=device, dtype=dtype)
 class FullLayer(LoRALayerBase):
    # weight: torch.Tensor
    def __init__(
        self,
        layer_key: str,
        values: Dict[str, torch.Tensor],
    ):
        super().__init__(layer_key, values)
        self.weight = values["diff"]
        if len(values.keys()) > 1:
            _keys = list(values.keys())
            _keys.remove("diff")
            raise NotImplementedError(f"Unexpected keys in lora diff layer: {_keys}")
        self.rank = None  # unscaled
    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
        return self.weight
    def calc_size(self) -> int:
        model_size = super().calc_size()
        model_size += self.weight.nelement() * self.weight.element_size()
        return model_size
    def to(
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> None:
        super().to(device=device, dtype=dtype)
        self.weight = self.weight.to(device=device, dtype=dtype)
 class IA3Layer(LoRALayerBase):
    # weight: torch.Tensor
    # on_input: torch.Tensor
    def __init__(
        self,
        layer_key: str,
        values: Dict[str, torch.Tensor],
    ):
        super().__init__(layer_key, values)
        self.weight = values["weight"]
        self.on_input = values["on_input"]
        self.rank = None  # unscaled
    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
        weight = self.weight
        if not self.on_input:
            weight = weight.reshape(-1, 1)
        assert orig_weight is not None
        return orig_weight * weight
    def calc_size(self) -> int:
        model_size = super().calc_size()
        model_size += self.weight.nelement() * self.weight.element_size()
        model_size += self.on_input.nelement() * self.on_input.element_size()
        return model_size
    def to(
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
    ):
        super().to(device=device, dtype=dtype)
        self.weight = self.weight.to(device=device, dtype=dtype)
        self.on_input = self.on_input.to(device=device, dtype=dtype)
 AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer]
 class LoRAModelRaw(RawModel):  # (torch.nn.Module):
    _name: str
    layers: Dict[str, AnyLoRALayer]
    def __init__(
        self,
        name: str,
        layers: Dict[str, AnyLoRALayer],
    ):
        self._name = name
        self.layers = layers
    @property
    def name(self) -> str:
        return self._name
    def to(
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> None:
        # TODO: try revert if exception?
        for _key, layer in self.layers.items():
            layer.to(device=device, dtype=dtype)
    def calc_size(self) -> int:
        model_size = 0
        for _, layer in self.layers.items():
            model_size += layer.calc_size()
        return model_size
    @classmethod
    def _convert_sdxl_keys_to_diffusers_format(cls, state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        """Convert the keys of an SDXL LoRA state_dict to diffusers format.
        The input state_dict can be in either Stability AI format or diffusers format. If the state_dict is already in
        diffusers format, then this function will have no effect.
        This function is adapted from:
        https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L385-L409
        Args:
            state_dict (Dict[str, Tensor]): The SDXL LoRA state_dict.
        Raises:
            ValueError: If state_dict contains an unrecognized key, or not all keys could be converted.
        Returns:
            Dict[str, Tensor]: The diffusers-format state_dict.
        """
        converted_count = 0  # The number of Stability AI keys converted to diffusers format.
        not_converted_count = 0  # The number of keys that were not converted.
        # Get a sorted list of Stability AI UNet keys so that we can efficiently search for keys with matching prefixes.
        # For example, we want to efficiently find `input_blocks_4_1` in the list when searching for
        # `input_blocks_4_1_proj_in`.
        stability_unet_keys = list(SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP)
        stability_unet_keys.sort()
        new_state_dict = {}
        for full_key, value in state_dict.items():
            if full_key.startswith("lora_unet_"):
                search_key = full_key.replace("lora_unet_", "")
                # Use bisect to find the key in stability_unet_keys that *may* match the search_key's prefix.
                position = bisect.bisect_right(stability_unet_keys, search_key)
                map_key = stability_unet_keys[position - 1]
                # Now, check if the map_key *actually* matches the search_key.
                if search_key.startswith(map_key):
                    new_key = full_key.replace(map_key, SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP[map_key])
                    new_state_dict[new_key] = value
                    converted_count += 1
                else:
                    new_state_dict[full_key] = value
                    not_converted_count += 1
            elif full_key.startswith("lora_te1_") or full_key.startswith("lora_te2_"):
                # The CLIP text encoders have the same keys in both Stability AI and diffusers formats.
                new_state_dict[full_key] = value
                continue
            else:
                raise ValueError(f"Unrecognized SDXL LoRA key prefix: '{full_key}'.")
        if converted_count > 0 and not_converted_count > 0:
            raise ValueError(
                f"The SDXL LoRA could only be partially converted to diffusers format. converted={converted_count},"
                f" not_converted={not_converted_count}"
            )
        return new_state_dict
    @classmethod
    def from_checkpoint(
        cls,
        file_path: Union[str, Path],
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
        base_model: Optional[BaseModelType] = None,
    ) -> Self:
        device = device or torch.device("cpu")
        dtype = dtype or torch.float32
        if isinstance(file_path, str):
            file_path = Path(file_path)
        model = cls(
            name=file_path.stem,
            layers={},
        )
        if file_path.suffix == ".safetensors":
            sd = load_file(file_path.absolute().as_posix(), device="cpu")
        else:
            sd = torch.load(file_path, map_location="cpu")
        state_dict = cls._group_state(sd)
        if base_model == BaseModelType.StableDiffusionXL:
            state_dict = cls._convert_sdxl_keys_to_diffusers_format(state_dict)
        for layer_key, values in state_dict.items():
            # lora and locon
            if "lora_down.weight" in values:
                layer: AnyLoRALayer = LoRALayer(layer_key, values)
            # loha
            elif "hada_w1_b" in values:
                layer = LoHALayer(layer_key, values)
            # lokr
            elif "lokr_w1_b" in values or "lokr_w1" in values:
                layer = LoKRLayer(layer_key, values)
            # diff
            elif "diff" in values:
                layer = FullLayer(layer_key, values)
            # ia3
            elif "weight" in values and "on_input" in values:
                layer = IA3Layer(layer_key, values)
            else:
                print(f">> Encountered unknown lora layer module in {model.name}: {layer_key} - {list(values.keys())}")
                raise Exception("Unknown lora format!")
            # lower memory consumption by removing already parsed layer values
            state_dict[layer_key].clear()
            layer.to(device=device, dtype=dtype)
            model.layers[layer_key] = layer
        return model
    @staticmethod
    def _group_state(state_dict: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, torch.Tensor]]:
        state_dict_groupped: Dict[str, Dict[str, torch.Tensor]] = {}
        for key, value in state_dict.items():
            stem, leaf = key.split(".", 1)
            if stem not in state_dict_groupped:
                state_dict_groupped[stem] = {}
            state_dict_groupped[stem][leaf] = value
        return state_dict_groupped
 # code from
 # https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L15C1-L97C32
 def make_sdxl_unet_conversion_map() -> List[Tuple[str, str]]:
    """Create a dict mapping state_dict keys from Stability AI SDXL format to diffusers SDXL format."""
    unet_conversion_map_layer = []
    for i in range(3):  # num_blocks is 3 in sdxl
        # loop over downblocks/upblocks
        for j in range(2):
            # loop over resnets/attentions for downblocks
            hf_down_res_prefix = f"down_blocks.{i}.resnets.{j}."
            sd_down_res_prefix = f"input_blocks.{3*i + j + 1}.0."
            unet_conversion_map_layer.append((sd_down_res_prefix, hf_down_res_prefix))
            if i < 3:
                # no attention layers in down_blocks.3
                hf_down_atn_prefix = f"down_blocks.{i}.attentions.{j}."
                sd_down_atn_prefix = f"input_blocks.{3*i + j + 1}.1."
                unet_conversion_map_layer.append((sd_down_atn_prefix, hf_down_atn_prefix))
        for j in range(3):
            # loop over resnets/attentions for upblocks
            hf_up_res_prefix = f"up_blocks.{i}.resnets.{j}."
            sd_up_res_prefix = f"output_blocks.{3*i + j}.0."
            unet_conversion_map_layer.append((sd_up_res_prefix, hf_up_res_prefix))
            # if i > 0: commentout for sdxl
            # no attention layers in up_blocks.0
            hf_up_atn_prefix = f"up_blocks.{i}.attentions.{j}."
            sd_up_atn_prefix = f"output_blocks.{3*i + j}.1."
            unet_conversion_map_layer.append((sd_up_atn_prefix, hf_up_atn_prefix))
        if i < 3:
            # no downsample in down_blocks.3
            hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0.conv."
            sd_downsample_prefix = f"input_blocks.{3*(i+1)}.0.op."
            unet_conversion_map_layer.append((sd_downsample_prefix, hf_downsample_prefix))
            # no upsample in up_blocks.3
            hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0."
            sd_upsample_prefix = f"output_blocks.{3*i + 2}.{2}."  # change for sdxl
            unet_conversion_map_layer.append((sd_upsample_prefix, hf_upsample_prefix))
    hf_mid_atn_prefix = "mid_block.attentions.0."
    sd_mid_atn_prefix = "middle_block.1."
    unet_conversion_map_layer.append((sd_mid_atn_prefix, hf_mid_atn_prefix))
    for j in range(2):
        hf_mid_res_prefix = f"mid_block.resnets.{j}."
        sd_mid_res_prefix = f"middle_block.{2*j}."
        unet_conversion_map_layer.append((sd_mid_res_prefix, hf_mid_res_prefix))
    unet_conversion_map_resnet = [
        # (stable-diffusion, HF Diffusers)
        ("in_layers.0.", "norm1."),
        ("in_layers.2.", "conv1."),
        ("out_layers.0.", "norm2."),
        ("out_layers.3.", "conv2."),
        ("emb_layers.1.", "time_emb_proj."),
        ("skip_connection.", "conv_shortcut."),
    ]
    unet_conversion_map = []
    for sd, hf in unet_conversion_map_layer:
        if "resnets" in hf:
            for sd_res, hf_res in unet_conversion_map_resnet:
                unet_conversion_map.append((sd + sd_res, hf + hf_res))
        else:
            unet_conversion_map.append((sd, hf))
    for j in range(2):
        hf_time_embed_prefix = f"time_embedding.linear_{j+1}."
        sd_time_embed_prefix = f"time_embed.{j*2}."
        unet_conversion_map.append((sd_time_embed_prefix, hf_time_embed_prefix))
    for j in range(2):
        hf_label_embed_prefix = f"add_embedding.linear_{j+1}."
        sd_label_embed_prefix = f"label_emb.0.{j*2}."
        unet_conversion_map.append((sd_label_embed_prefix, hf_label_embed_prefix))
    unet_conversion_map.append(("input_blocks.0.0.", "conv_in."))
    unet_conversion_map.append(("out.0.", "conv_norm_out."))
    unet_conversion_map.append(("out.2.", "conv_out."))
    return unet_conversion_map
 SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP = {
    sd.rstrip(".").replace(".", "_"): hf.rstrip(".").replace(".", "_") for sd, hf in make_sdxl_unet_conversion_map()
 }
--- a/invokeai/backend/lora/init.py
+++ b/invokeai/backend/lora/init.py
--- a/invokeai/backend/lora/full_layer.py
+++ b/invokeai/backend/lora/full_layer.py
@@ -0,0 +1,42 @@
 from typing import Dict, Optional
 import torch
 from invokeai.backend.lora.lora_layer_base import LoRALayerBase
 class FullLayer(LoRALayerBase):
    # weight: torch.Tensor
    def __init__(
        self,
        layer_key: str,
        values: Dict[str, torch.Tensor],
    ):
        super().__init__(layer_key, values)
        self.weight = values["diff"]
        if len(values.keys()) > 1:
            _keys = list(values.keys())
            _keys.remove("diff")
            raise NotImplementedError(f"Unexpected keys in lora diff layer: {_keys}")
        self.rank = None  # unscaled
    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
        return self.weight
    def calc_size(self) -> int:
        model_size = super().calc_size()
        model_size += self.weight.nelement() * self.weight.element_size()
        return model_size
    def to(
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> None:
        super().to(device=device, dtype=dtype)
        self.weight = self.weight.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/ia3_layer.py
+++ b/invokeai/backend/lora/ia3_layer.py
@@ -0,0 +1,45 @@
 from typing import Dict, Optional
 import torch
 from invokeai.backend.lora.lora_layer_base import LoRALayerBase
 class IA3Layer(LoRALayerBase):
    # weight: torch.Tensor
    # on_input: torch.Tensor
    def __init__(
        self,
        layer_key: str,
        values: Dict[str, torch.Tensor],
    ):
        super().__init__(layer_key, values)
        self.weight = values["weight"]
        self.on_input = values["on_input"]
        self.rank = None  # unscaled
    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
        weight = self.weight
        if not self.on_input:
            weight = weight.reshape(-1, 1)
        assert orig_weight is not None
        return orig_weight * weight
    def calc_size(self) -> int:
        model_size = super().calc_size()
        model_size += self.weight.nelement() * self.weight.element_size()
        model_size += self.on_input.nelement() * self.on_input.element_size()
        return model_size
    def to(
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
    ):
        super().to(device=device, dtype=dtype)
        self.weight = self.weight.to(device=device, dtype=dtype)
        self.on_input = self.on_input.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/loha_layer.py
+++ b/invokeai/backend/lora/loha_layer.py
@@ -0,0 +1,69 @@
 from typing import Dict, Optional
 import torch
 from invokeai.backend.lora.lora_layer_base import LoRALayerBase
 class LoHALayer(LoRALayerBase):
    # w1_a: torch.Tensor
    # w1_b: torch.Tensor
    # w2_a: torch.Tensor
    # w2_b: torch.Tensor
    # t1: Optional[torch.Tensor] = None
    # t2: Optional[torch.Tensor] = None
    def __init__(self, layer_key: str, values: Dict[str, torch.Tensor]):
        super().__init__(layer_key, values)
        self.w1_a = values["hada_w1_a"]
        self.w1_b = values["hada_w1_b"]
        self.w2_a = values["hada_w2_a"]
        self.w2_b = values["hada_w2_b"]
        if "hada_t1" in values:
            self.t1: Optional[torch.Tensor] = values["hada_t1"]
        else:
            self.t1 = None
        if "hada_t2" in values:
            self.t2: Optional[torch.Tensor] = values["hada_t2"]
        else:
            self.t2 = None
        self.rank = self.w1_b.shape[0]
    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
        if self.t1 is None:
            weight: torch.Tensor = (self.w1_a @ self.w1_b) * (self.w2_a @ self.w2_b)
        else:
            rebuild1 = torch.einsum("i j k l, j r, i p -> p r k l", self.t1, self.w1_b, self.w1_a)
            rebuild2 = torch.einsum("i j k l, j r, i p -> p r k l", self.t2, self.w2_b, self.w2_a)
            weight = rebuild1 * rebuild2
        return weight
    def calc_size(self) -> int:
        model_size = super().calc_size()
        for val in [self.w1_a, self.w1_b, self.w2_a, self.w2_b, self.t1, self.t2]:
            if val is not None:
                model_size += val.nelement() * val.element_size()
        return model_size
    def to(
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> None:
        super().to(device=device, dtype=dtype)
        self.w1_a = self.w1_a.to(device=device, dtype=dtype)
        self.w1_b = self.w1_b.to(device=device, dtype=dtype)
        if self.t1 is not None:
            self.t1 = self.t1.to(device=device, dtype=dtype)
        self.w2_a = self.w2_a.to(device=device, dtype=dtype)
        self.w2_b = self.w2_b.to(device=device, dtype=dtype)
        if self.t2 is not None:
            self.t2 = self.t2.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/lokr_layer.py
+++ b/invokeai/backend/lora/lokr_layer.py
@@ -0,0 +1,110 @@
 from typing import Dict, Optional
 import torch
 from invokeai.backend.lora.lora_layer_base import LoRALayerBase
 class LoKRLayer(LoRALayerBase):
    # w1: Optional[torch.Tensor] = None
    # w1_a: Optional[torch.Tensor] = None
    # w1_b: Optional[torch.Tensor] = None
    # w2: Optional[torch.Tensor] = None
    # w2_a: Optional[torch.Tensor] = None
    # w2_b: Optional[torch.Tensor] = None
    # t2: Optional[torch.Tensor] = None
    def __init__(
        self,
        layer_key: str,
        values: Dict[str, torch.Tensor],
    ):
        super().__init__(layer_key, values)
        if "lokr_w1" in values:
            self.w1: Optional[torch.Tensor] = values["lokr_w1"]
            self.w1_a = None
            self.w1_b = None
        else:
            self.w1 = None
            self.w1_a = values["lokr_w1_a"]
            self.w1_b = values["lokr_w1_b"]
        if "lokr_w2" in values:
            self.w2: Optional[torch.Tensor] = values["lokr_w2"]
            self.w2_a = None
            self.w2_b = None
        else:
            self.w2 = None
            self.w2_a = values["lokr_w2_a"]
            self.w2_b = values["lokr_w2_b"]
        if "lokr_t2" in values:
            self.t2: Optional[torch.Tensor] = values["lokr_t2"]
        else:
            self.t2 = None
        if "lokr_w1_b" in values:
            self.rank = values["lokr_w1_b"].shape[0]
        elif "lokr_w2_b" in values:
            self.rank = values["lokr_w2_b"].shape[0]
        else:
            self.rank = None  # unscaled
    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
        w1: Optional[torch.Tensor] = self.w1
        if w1 is None:
            assert self.w1_a is not None
            assert self.w1_b is not None
            w1 = self.w1_a @ self.w1_b
        w2 = self.w2
        if w2 is None:
            if self.t2 is None:
                assert self.w2_a is not None
                assert self.w2_b is not None
                w2 = self.w2_a @ self.w2_b
            else:
                w2 = torch.einsum("i j k l, i p, j r -> p r k l", self.t2, self.w2_a, self.w2_b)
        if len(w2.shape) == 4:
            w1 = w1.unsqueeze(2).unsqueeze(2)
        w2 = w2.contiguous()
        assert w1 is not None
        assert w2 is not None
        weight = torch.kron(w1, w2)
        return weight
    def calc_size(self) -> int:
        model_size = super().calc_size()
        for val in [self.w1, self.w1_a, self.w1_b, self.w2, self.w2_a, self.w2_b, self.t2]:
            if val is not None:
                model_size += val.nelement() * val.element_size()
        return model_size
    def to(
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> None:
        super().to(device=device, dtype=dtype)
        if self.w1 is not None:
            self.w1 = self.w1.to(device=device, dtype=dtype)
        else:
            assert self.w1_a is not None
            assert self.w1_b is not None
            self.w1_a = self.w1_a.to(device=device, dtype=dtype)
            self.w1_b = self.w1_b.to(device=device, dtype=dtype)
        if self.w2 is not None:
            self.w2 = self.w2.to(device=device, dtype=dtype)
        else:
            assert self.w2_a is not None
            assert self.w2_b is not None
            self.w2_a = self.w2_a.to(device=device, dtype=dtype)
            self.w2_b = self.w2_b.to(device=device, dtype=dtype)
        if self.t2 is not None:
            self.t2 = self.t2.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/lora_layer.py
+++ b/invokeai/backend/lora/lora_layer.py
@@ -0,0 +1,81 @@
 from typing import Optional
 import torch
 from invokeai.backend.lora.lora_layer_base import LoRALayerBase
 class LoRALayer(LoRALayerBase):
    def __init__(
        self,
        layer_key: str,
        values: dict[str, torch.Tensor],
    ):
        super().__init__(layer_key, values)
        self.up = values["lora_up.weight"]
        self.down = values["lora_down.weight"]
        self.mid: Optional[torch.Tensor] = values.get("lora_mid.weight", None)
        self.dora_scale: Optional[torch.Tensor] = values.get("dora_scale", None)
        self.rank = self.down.shape[0]
    def _apply_dora(self, orig_weight: torch.Tensor, lora_weight: torch.Tensor) -> torch.Tensor:
        """Apply DoRA to the weight matrix.
        This function is based roughly on the reference implementation in PEFT, but handles scaling in a slightly
        different way:
        https://github.com/huggingface/peft/blob/26726bf1ddee6ca75ed4e1bfd292094526707a78/src/peft/tuners/lora/layer.py#L421-L433
        """
        # Merge the original weight with the LoRA weight.
        merged_weight = orig_weight + lora_weight
        # Calculate the vector-wise L2 norm of the weight matrix across each column vector.
        weight_norm: torch.Tensor = torch.linalg.norm(merged_weight, dim=1)
        dora_factor = self.dora_scale / weight_norm
        new_weight = dora_factor * merged_weight
        # TODO(ryand): This is wasteful. We already have the final weight, but we calculate the diff, because that is
        # what the `get_weight()` API is expected to return. If we do refactor this, we'll have to give some thought to
        # how lora weight scaling should be applied - having the full weight diff makes this easy.
        weight_diff = new_weight - orig_weight
        return weight_diff
    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
        if self.mid is not None:
            up = self.up.reshape(self.up.shape[0], self.up.shape[1])
            down = self.down.reshape(self.down.shape[0], self.down.shape[1])
            weight = torch.einsum("m n w h, i m, n j -> i j w h", self.mid, up, down)
        else:
            weight = self.up.reshape(self.up.shape[0], -1) @ self.down.reshape(self.down.shape[0], -1)
        if self.dora_scale is not None:
            assert orig_weight is not None
            weight = self._apply_dora(orig_weight, weight)
        return weight
    def calc_size(self) -> int:
        model_size = super().calc_size()
        for val in [self.up, self.mid, self.down]:
            if val is not None:
                model_size += val.nelement() * val.element_size()
        return model_size
    def to(
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> None:
        super().to(device=device, dtype=dtype)
        self.up = self.up.to(device=device, dtype=dtype)
        self.down = self.down.to(device=device, dtype=dtype)
        if self.mid is not None:
            self.mid = self.mid.to(device=device, dtype=dtype)
        if self.dora_scale is not None:
            self.dora_scale = self.dora_scale.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/lora_layer_base.py
+++ b/invokeai/backend/lora/lora_layer_base.py
@@ -0,0 +1,55 @@
 from typing import Dict, Optional
 import torch
 class LoRALayerBase:
    # rank: Optional[int]
    # alpha: Optional[float]
    # bias: Optional[torch.Tensor]
    # layer_key: str
    # @property
    # def scale(self):
    #    return self.alpha / self.rank if (self.alpha and self.rank) else 1.0
    def __init__(
        self,
        layer_key: str,
        values: Dict[str, torch.Tensor],
    ):
        if "alpha" in values:
            self.alpha = values["alpha"].item()
        else:
            self.alpha = None
        if "bias_indices" in values and "bias_values" in values and "bias_size" in values:
            self.bias: Optional[torch.Tensor] = torch.sparse_coo_tensor(
                values["bias_indices"],
                values["bias_values"],
                tuple(values["bias_size"]),
            )
        else:
            self.bias = None
        self.rank = None  # set in layer implementation
        self.layer_key = layer_key
    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
        raise NotImplementedError()
    def calc_size(self) -> int:
        model_size = 0
        for val in [self.bias]:
            if val is not None:
                model_size += val.nelement() * val.element_size()
        return model_size
    def to(
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> None:
        if self.bias is not None:
            self.bias = self.bias.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/lora_model.py
+++ b/invokeai/backend/lora/lora_model.py
@@ -0,0 +1,111 @@
 from pathlib import Path
 from typing import Optional, Union
 import torch
 from invokeai.backend.lora.full_layer import FullLayer
 from invokeai.backend.lora.ia3_layer import IA3Layer
 from invokeai.backend.lora.loha_layer import LoHALayer
 from invokeai.backend.lora.lokr_layer import LoKRLayer
 from invokeai.backend.lora.lora_layer import LoRALayer
 from invokeai.backend.lora.sdxl_state_dict_utils import convert_sdxl_keys_to_diffusers_format
 from invokeai.backend.model_manager import BaseModelType
 from invokeai.backend.util.serialization import load_state_dict
 AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer]
 class LoRAModelRaw(torch.nn.Module):
    def __init__(
        self,
        name: str,
        layers: dict[str, AnyLoRALayer],
    ):
        super().__init__()
        self._name = name
        self.layers = layers
    @property
    def name(self) -> str:
        return self._name
    def to(
        self,
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> None:
        # TODO: try revert if exception?
        for _key, layer in self.layers.items():
            layer.to(device=device, dtype=dtype)
    def calc_size(self) -> int:
        model_size = 0
        for _, layer in self.layers.items():
            model_size += layer.calc_size()
        return model_size
    @classmethod
    def from_checkpoint(
        cls,
        file_path: Union[str, Path],
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
        base_model: Optional[BaseModelType] = None,
    ):
        device = device or torch.device("cpu")
        dtype = dtype or torch.float32
        file_path = Path(file_path)
        model_name = file_path.stem
        sd = load_state_dict(file_path, device=str(device))
        state_dict = cls._group_state(sd)
        if base_model == BaseModelType.StableDiffusionXL:
            state_dict = convert_sdxl_keys_to_diffusers_format(state_dict)
        layers: dict[str, AnyLoRALayer] = {}
        for layer_key, values in state_dict.items():
            # lora and locon
            if "lora_down.weight" in values:
                layer: AnyLoRALayer = LoRALayer(layer_key, values)
            # loha
            elif "hada_w1_b" in values:
                layer = LoHALayer(layer_key, values)
            # lokr
            elif "lokr_w1_b" in values or "lokr_w1" in values:
                layer = LoKRLayer(layer_key, values)
            # diff
            elif "diff" in values:
                layer = FullLayer(layer_key, values)
            # ia3
            elif "weight" in values and "on_input" in values:
                layer = IA3Layer(layer_key, values)
            else:
                raise ValueError(f"Unknown lora layer module in {model_name}: {layer_key}: {list(values.keys())}")
            # lower memory consumption by removing already parsed layer values
            state_dict[layer_key].clear()
            layer.to(device=device, dtype=dtype)
            layers[layer_key] = layer
        return cls(name=model_name, layers=layers)
    @staticmethod
    def _group_state(state_dict: dict[str, torch.Tensor]) -> dict[str, dict[str, torch.Tensor]]:
        state_dict_groupped: dict[str, dict[str, torch.Tensor]] = {}
        for key, value in state_dict.items():
            stem, leaf = key.split(".", 1)
            if stem not in state_dict_groupped:
                state_dict_groupped[stem] = {}
            state_dict_groupped[stem][leaf] = value
        return state_dict_groupped
--- a/invokeai/backend/lora/lora_model_patcher.py
+++ b/invokeai/backend/lora/lora_model_patcher.py
@@ -0,0 +1,137 @@
 from contextlib import contextmanager
 from typing import Iterator, Tuple
 import torch
 from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
 from transformers import CLIPTextModel
 from invokeai.backend.lora.lora_model import LoRAModelRaw
 from invokeai.backend.model_manager.any_model_type import AnyModel
 class LoraModelPatcher:
    @staticmethod
    def _resolve_lora_key(model: torch.nn.Module, lora_key: str, prefix: str) -> Tuple[str, torch.nn.Module]:
        assert "." not in lora_key
        if not lora_key.startswith(prefix):
            raise Exception(f"lora_key with invalid prefix: {lora_key}, {prefix}")
        module = model
        module_key = ""
        key_parts = lora_key[len(prefix) :].split("_")
        submodule_name = key_parts.pop(0)
        while len(key_parts) > 0:
            try:
                module = module.get_submodule(submodule_name)
                module_key += "." + submodule_name
                submodule_name = key_parts.pop(0)
            except Exception:
                submodule_name += "_" + key_parts.pop(0)
        module = module.get_submodule(submodule_name)
        module_key = (module_key + "." + submodule_name).lstrip(".")
        return (module_key, module)
    @classmethod
    @contextmanager
    def apply_lora_unet(
        cls,
        unet: UNet2DConditionModel,
        loras: Iterator[Tuple[LoRAModelRaw, float]],
    ):
        with cls.apply_lora(unet, loras, "lora_unet_"):
            yield
    @classmethod
    @contextmanager
    def apply_lora_text_encoder(
        cls,
        text_encoder: CLIPTextModel,
        loras: Iterator[Tuple[LoRAModelRaw, float]],
    ):
        with cls.apply_lora(text_encoder, loras, "lora_te_"):
            yield
    @classmethod
    @contextmanager
    def apply_sdxl_lora_text_encoder(
        cls,
        text_encoder: CLIPTextModel,
        loras: Iterator[Tuple[LoRAModelRaw, float]],
    ):
        with cls.apply_lora(text_encoder, loras, "lora_te1_"):
            yield
    @classmethod
    @contextmanager
    def apply_sdxl_lora_text_encoder2(
        cls,
        text_encoder: CLIPTextModel,
        loras: Iterator[Tuple[LoRAModelRaw, float]],
    ):
        with cls.apply_lora(text_encoder, loras, "lora_te2_"):
            yield
    @classmethod
    @contextmanager
    def apply_lora(
        cls,
        model: AnyModel,
        loras: Iterator[Tuple[LoRAModelRaw, float]],
        prefix: str,
    ):
        original_weights = {}
        try:
            with torch.no_grad():
                for lora, lora_weight in loras:
                    # assert lora.device.type == "cpu"
                    for layer_key, layer in lora.layers.items():
                        if not layer_key.startswith(prefix):
                            continue
                        # TODO(ryand): A non-negligible amount of time is currently spent resolving LoRA keys. This
                        # should be improved in the following ways:
                        # 1. The key mapping could be more-efficiently pre-computed. This would save time every time a
                        #    LoRA model is applied.
                        # 2. From an API perspective, there's no reason that the `LoraModelPatcher` should be aware of
                        #    the intricacies of Stable Diffusion key resolution. It should just expect the input LoRA
                        #    weights to have valid keys.
                        assert isinstance(model, torch.nn.Module)
                        module_key, module = cls._resolve_lora_key(model, layer_key, prefix)
                        # All of the LoRA weight calculations will be done on the same device as the module weight.
                        # (Performance will be best if this is a CUDA device.)
                        device = module.weight.device
                        dtype = module.weight.dtype
                        if module_key not in original_weights:
                            original_weights[module_key] = module.weight.detach().to(device="cpu", copy=True)
                        layer_scale = layer.alpha / layer.rank if (layer.alpha and layer.rank) else 1.0
                        # We intentionally move to the target device first, then cast. Experimentally, this was found to
                        # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
                        # same thing in a single call to '.to(...)'.
                        layer.to(device=device)
                        layer.to(dtype=torch.float32)
                        # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
                        # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
                        layer_weight = layer.get_weight(module.weight) * (lora_weight * layer_scale)
                        layer.to(device=torch.device("cpu"))
                        if module.weight.shape != layer_weight.shape:
                            layer_weight = layer_weight.reshape(module.weight.shape)
                        module.weight += layer_weight.to(dtype=dtype)
            yield  # wait for context manager exit
        finally:
            assert hasattr(model, "get_submodule")  # mypy not picking up fact that torch.nn.Module has get_submodule()
            with torch.no_grad():
                for module_key, weight in original_weights.items():
                    model.get_submodule(module_key).weight.copy_(weight)
--- a/invokeai/backend/lora/sdxl_state_dict_utils.py
+++ b/invokeai/backend/lora/sdxl_state_dict_utils.py
@@ -0,0 +1,157 @@
 import bisect
 from typing import TypeVar
 def make_sdxl_unet_conversion_map() -> list[tuple[str, str]]:
    """Create a dict mapping state_dict keys from Stability AI SDXL format to diffusers SDXL format.
    Ported from:
    https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L15C1-L97C32
    """
    unet_conversion_map_layer: list[tuple[str, str]] = []
    for i in range(3):  # num_blocks is 3 in sdxl
        # loop over downblocks/upblocks
        for j in range(2):
            # loop over resnets/attentions for downblocks
            hf_down_res_prefix = f"down_blocks.{i}.resnets.{j}."
            sd_down_res_prefix = f"input_blocks.{3*i + j + 1}.0."
            unet_conversion_map_layer.append((sd_down_res_prefix, hf_down_res_prefix))
            if i < 3:
                # no attention layers in down_blocks.3
                hf_down_atn_prefix = f"down_blocks.{i}.attentions.{j}."
                sd_down_atn_prefix = f"input_blocks.{3*i + j + 1}.1."
                unet_conversion_map_layer.append((sd_down_atn_prefix, hf_down_atn_prefix))
        for j in range(3):
            # loop over resnets/attentions for upblocks
            hf_up_res_prefix = f"up_blocks.{i}.resnets.{j}."
            sd_up_res_prefix = f"output_blocks.{3*i + j}.0."
            unet_conversion_map_layer.append((sd_up_res_prefix, hf_up_res_prefix))
            # if i > 0: commentout for sdxl
            # no attention layers in up_blocks.0
            hf_up_atn_prefix = f"up_blocks.{i}.attentions.{j}."
            sd_up_atn_prefix = f"output_blocks.{3*i + j}.1."
            unet_conversion_map_layer.append((sd_up_atn_prefix, hf_up_atn_prefix))
        if i < 3:
            # no downsample in down_blocks.3
            hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0.conv."
            sd_downsample_prefix = f"input_blocks.{3*(i+1)}.0.op."
            unet_conversion_map_layer.append((sd_downsample_prefix, hf_downsample_prefix))
            # no upsample in up_blocks.3
            hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0."
            sd_upsample_prefix = f"output_blocks.{3*i + 2}.{2}."  # change for sdxl
            unet_conversion_map_layer.append((sd_upsample_prefix, hf_upsample_prefix))
    hf_mid_atn_prefix = "mid_block.attentions.0."
    sd_mid_atn_prefix = "middle_block.1."
    unet_conversion_map_layer.append((sd_mid_atn_prefix, hf_mid_atn_prefix))
    for j in range(2):
        hf_mid_res_prefix = f"mid_block.resnets.{j}."
        sd_mid_res_prefix = f"middle_block.{2*j}."
        unet_conversion_map_layer.append((sd_mid_res_prefix, hf_mid_res_prefix))
    unet_conversion_map_resnet = [
        # (stable-diffusion, HF Diffusers)
        ("in_layers.0.", "norm1."),
        ("in_layers.2.", "conv1."),
        ("out_layers.0.", "norm2."),
        ("out_layers.3.", "conv2."),
        ("emb_layers.1.", "time_emb_proj."),
        ("skip_connection.", "conv_shortcut."),
    ]
    unet_conversion_map: list[tuple[str, str]] = []
    for sd, hf in unet_conversion_map_layer:
        if "resnets" in hf:
            for sd_res, hf_res in unet_conversion_map_resnet:
                unet_conversion_map.append((sd + sd_res, hf + hf_res))
        else:
            unet_conversion_map.append((sd, hf))
    for j in range(2):
        hf_time_embed_prefix = f"time_embedding.linear_{j+1}."
        sd_time_embed_prefix = f"time_embed.{j*2}."
        unet_conversion_map.append((sd_time_embed_prefix, hf_time_embed_prefix))
    for j in range(2):
        hf_label_embed_prefix = f"add_embedding.linear_{j+1}."
        sd_label_embed_prefix = f"label_emb.0.{j*2}."
        unet_conversion_map.append((sd_label_embed_prefix, hf_label_embed_prefix))
    unet_conversion_map.append(("input_blocks.0.0.", "conv_in."))
    unet_conversion_map.append(("out.0.", "conv_norm_out."))
    unet_conversion_map.append(("out.2.", "conv_out."))
    return unet_conversion_map
 SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP = {
    sd.rstrip(".").replace(".", "_"): hf.rstrip(".").replace(".", "_") for sd, hf in make_sdxl_unet_conversion_map()
 }
 T = TypeVar("T")
 def convert_sdxl_keys_to_diffusers_format(state_dict: dict[str, T]) -> dict[str, T]:
    """Convert the keys of an SDXL LoRA state_dict to diffusers format.
    The input state_dict can be in either Stability AI format or diffusers format. If the state_dict is already in
    diffusers format, then this function will have no effect.
    This function is adapted from:
    https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L385-L409
    Args:
        state_dict (dict[str, Tensor]): The SDXL LoRA state_dict.
    Raises:
        ValueError: If state_dict contains an unrecognized key, or not all keys could be converted.
    Returns:
        dict[str, Tensor]: The diffusers-format state_dict.
    """
    converted_count = 0  # The number of Stability AI keys converted to diffusers format.
    not_converted_count = 0  # The number of keys that were not converted.
    # Get a sorted list of Stability AI UNet keys so that we can efficiently search for keys with matching prefixes.
    # For example, we want to efficiently find `input_blocks_4_1` in the list when searching for
    # `input_blocks_4_1_proj_in`.
    stability_unet_keys = list(SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP)
    stability_unet_keys.sort()
    new_state_dict: dict[str, T] = {}
    for full_key, value in state_dict.items():
        if full_key.startswith("lora_unet_"):
            search_key = full_key.replace("lora_unet_", "")
            # Use bisect to find the key in stability_unet_keys that *may* match the search_key's prefix.
            position = bisect.bisect_right(stability_unet_keys, search_key)
            map_key = stability_unet_keys[position - 1]
            # Now, check if the map_key *actually* matches the search_key.
            if search_key.startswith(map_key):
                new_key = full_key.replace(map_key, SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP[map_key])
                new_state_dict[new_key] = value
                converted_count += 1
            else:
                new_state_dict[full_key] = value
                not_converted_count += 1
        elif full_key.startswith("lora_te1_") or full_key.startswith("lora_te2_"):
            # The CLIP text encoders have the same keys in both Stability AI and diffusers formats.
            new_state_dict[full_key] = value
            continue
        else:
            raise ValueError(f"Unrecognized SDXL LoRA key prefix: '{full_key}'.")
    if converted_count > 0 and not_converted_count > 0:
        raise ValueError(
            f"The SDXL LoRA could only be partially converted to diffusers format. converted={converted_count},"
            f" not_converted={not_converted_count}"
        )
    return new_state_dict
--- a/invokeai/backend/model_manager/init.py
+++ b/invokeai/backend/model_manager/init.py
@@ -1,7 +1,6 @@
 """Re-export frequently-used symbols from the Model Manager backend."""
 from .config import (
    AnyModel,
    AnyModelConfig,
    BaseModelType,
    InvalidModelConfigException,
@@ -18,7 +17,6 @@ from .probe import ModelProbe
 from .search import ModelSearch
 __all__ = [
    "AnyModel",
    "AnyModelConfig",
    "BaseModelType",
    "ModelRepoVariant",
--- a/invokeai/backend/model_manager/any_model_type.py
+++ b/invokeai/backend/model_manager/any_model_type.py
@@ -0,0 +1,12 @@
 from typing import Union
 import torch
 from diffusers.models.modeling_utils import ModelMixin
 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
 from invokeai.backend.lora.lora_model import LoRAModelRaw
 from invokeai.backend.onnx.onnx_runtime import IAIOnnxRuntimeModel
 from invokeai.backend.textual_inversion import TextualInversionModelRaw
 # ModelMixin is the base class for all diffusers and transformers models
 AnyModel = Union[ModelMixin, torch.nn.Module, IPAdapter, LoRAModelRaw, TextualInversionModelRaw, IAIOnnxRuntimeModel]
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@@ -24,20 +24,12 @@ import time
 from enum import Enum
 from typing import Literal, Optional, Type, TypeAlias, Union
 import torch
 from diffusers.models.modeling_utils import ModelMixin
 from pydantic import BaseModel, ConfigDict, Discriminator, Field, Tag, TypeAdapter
 from typing_extensions import Annotated, Any, Dict
 from invokeai.app.invocations.constants import SCHEDULER_NAME_VALUES
 from invokeai.app.util.misc import uuid_string
 from ..raw_model import RawModel
 # ModelMixin is the base class for all diffusers and transformers models
 # RawModel is the InvokeAI wrapper class for ip_adapters, loras, textual_inversion and onnx runtime
 AnyModel = Union[ModelMixin, RawModel, torch.nn.Module]
 class InvalidModelConfigException(Exception):
    """Exception for when config parser doesn't recognized this combination of model type and format."""
--- a/invokeai/backend/model_manager/convert_ckpt_to_diffusers.py
+++ b/invokeai/backend/model_manager/convert_ckpt_to_diffusers.py
@@ -15,7 +15,7 @@ from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
 )
 from omegaconf import DictConfig
-from . import AnyModel
+from invokeai.backend.model_manager.any_model_type import AnyModel
 def convert_ldm_vae_to_diffusers(
--- a/invokeai/backend/model_manager/load/load_base.py
+++ b/invokeai/backend/model_manager/load/load_base.py
@@ -10,8 +10,8 @@ from pathlib import Path
 from typing import Any, Optional
 from invokeai.app.services.config import InvokeAIAppConfig
 from invokeai.backend.model_manager.any_model_type import AnyModel
 from invokeai.backend.model_manager.config import (
    AnyModel,
    AnyModelConfig,
    SubModelType,
 )
--- a/invokeai/backend/model_manager/load/load_default.py
+++ b/invokeai/backend/model_manager/load/load_default.py
@@ -7,11 +7,11 @@ from typing import Optional
 from invokeai.app.services.config import InvokeAIAppConfig
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
    InvalidModelConfigException,
    SubModelType,
 )
 from invokeai.backend.model_manager.any_model_type import AnyModel
 from invokeai.backend.model_manager.config import DiffusersConfigBase, ModelType
 from invokeai.backend.model_manager.load.convert_cache import ModelConvertCacheBase
 from invokeai.backend.model_manager.load.load_base import LoadedModel, ModelLoaderBase
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_base.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_base.py
@@ -14,7 +14,8 @@ from typing import Dict, Generic, Optional, TypeVar
 import torch
-from invokeai.backend.model_manager.config import AnyModel, SubModelType
+from invokeai.backend.model_manager.any_model_type import AnyModel
 from invokeai.backend.model_manager.config import SubModelType
 class ModelLockerBase(ABC):
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
@@ -28,7 +28,8 @@ from typing import Dict, List, Optional
 import torch
-from invokeai.backend.model_manager import AnyModel, SubModelType
+from invokeai.backend.model_manager import SubModelType
 from invokeai.backend.model_manager.any_model_type import AnyModel
 from invokeai.backend.model_manager.load.memory_snapshot import MemorySnapshot, get_pretty_snapshot_diff
 from invokeai.backend.util.devices import choose_torch_device
 from invokeai.backend.util.logging import InvokeAILogger
--- a/invokeai/backend/model_manager/load/model_cache/model_locker.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_locker.py
@@ -4,7 +4,7 @@ Base class and implementation of a class that moves models in and out of VRAM.
 import torch
-from invokeai.backend.model_manager import AnyModel
+from invokeai.backend.model_manager.any_model_type import AnyModel
 from .model_cache_base import CacheRecord, ModelCacheBase, ModelLockerBase
--- a/invokeai/backend/model_manager/load/model_loaders/controlnet.py
+++ b/invokeai/backend/model_manager/load/model_loaders/controlnet.py
@@ -5,12 +5,12 @@ from pathlib import Path
 from typing import Optional
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
    BaseModelType,
    ModelFormat,
    ModelType,
 )
 from invokeai.backend.model_manager.any_model_type import AnyModel
 from invokeai.backend.model_manager.config import CheckpointConfigBase
 from invokeai.backend.model_manager.convert_ckpt_to_diffusers import convert_controlnet_to_diffusers
--- a/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py
+++ b/invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py
@@ -9,7 +9,6 @@ from diffusers.configuration_utils import ConfigMixin
 from diffusers.models.modeling_utils import ModelMixin
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
    BaseModelType,
    InvalidModelConfigException,
@@ -17,6 +16,7 @@ from invokeai.backend.model_manager import (
    ModelType,
    SubModelType,
 )
 from invokeai.backend.model_manager.any_model_type import AnyModel
 from invokeai.backend.model_manager.config import DiffusersConfigBase
 from .. import ModelLoader, ModelLoaderRegistry
--- a/invokeai/backend/model_manager/load/model_loaders/ip_adapter.py
+++ b/invokeai/backend/model_manager/load/model_loaders/ip_adapter.py
@@ -7,9 +7,9 @@ from typing import Optional
 import torch
 from invokeai.backend.ip_adapter.ip_adapter import build_ip_adapter
-from invokeai.backend.model_manager import AnyModel, AnyModelConfig, BaseModelType, ModelFormat, ModelType, SubModelType
+from invokeai.backend.model_manager import AnyModelConfig, BaseModelType, ModelFormat, ModelType, SubModelType
 from invokeai.backend.model_manager.any_model_type import AnyModel
 from invokeai.backend.model_manager.load import ModelLoader, ModelLoaderRegistry
 from invokeai.backend.raw_model import RawModel
@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.IPAdapter, format=ModelFormat.InvokeAI)
@@ -25,7 +25,7 @@ class IPAdapterInvokeAILoader(ModelLoader):
        if submodel_type is not None:
            raise ValueError("There are no submodels in an IP-Adapter model.")
        model_path = Path(config.path)
-        model: RawModel = build_ip_adapter(
+        model = build_ip_adapter(
            ip_adapter_ckpt_path=model_path,
            device=torch.device("cpu"),
            dtype=self._torch_dtype,
--- a/invokeai/backend/model_manager/load/model_loaders/lora.py
+++ b/invokeai/backend/model_manager/load/model_loaders/lora.py
@@ -6,15 +6,15 @@ from pathlib import Path
 from typing import Optional
 from invokeai.app.services.config import InvokeAIAppConfig
-from invokeai.backend.lora import LoRAModelRaw
+from invokeai.backend.lora.lora_model import LoRAModelRaw
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
    BaseModelType,
    ModelFormat,
    ModelType,
    SubModelType,
 )
 from invokeai.backend.model_manager.any_model_type import AnyModel
 from invokeai.backend.model_manager.load.convert_cache import ModelConvertCacheBase
 from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase
--- a/invokeai/backend/model_manager/load/model_loaders/onnx.py
+++ b/invokeai/backend/model_manager/load/model_loaders/onnx.py
@@ -6,13 +6,13 @@ from pathlib import Path
 from typing import Optional
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
    BaseModelType,
    ModelFormat,
    ModelType,
    SubModelType,
 )
 from invokeai.backend.model_manager.any_model_type import AnyModel
 from .. import ModelLoaderRegistry
 from .generic_diffusers import GenericDiffusersLoader
--- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
+++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
@@ -5,7 +5,6 @@ from pathlib import Path
 from typing import Optional
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
    BaseModelType,
    ModelFormat,
@@ -13,6 +12,7 @@ from invokeai.backend.model_manager import (
    SchedulerPredictionType,
    SubModelType,
 )
 from invokeai.backend.model_manager.any_model_type import AnyModel
 from invokeai.backend.model_manager.config import (
    CheckpointConfigBase,
    DiffusersConfigBase,
--- a/invokeai/backend/model_manager/load/model_loaders/textual_inversion.py
+++ b/invokeai/backend/model_manager/load/model_loaders/textual_inversion.py
@@ -5,13 +5,13 @@ from pathlib import Path
 from typing import Optional
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
    BaseModelType,
    ModelFormat,
    ModelType,
    SubModelType,
 )
 from invokeai.backend.model_manager.any_model_type import AnyModel
 from invokeai.backend.textual_inversion import TextualInversionModelRaw
 from .. import ModelLoader, ModelLoaderRegistry
--- a/invokeai/backend/model_manager/load/model_loaders/vae.py
+++ b/invokeai/backend/model_manager/load/model_loaders/vae.py
@@ -14,7 +14,8 @@ from invokeai.backend.model_manager import (
    ModelFormat,
    ModelType,
 )
-from invokeai.backend.model_manager.config import AnyModel, CheckpointConfigBase
+from invokeai.backend.model_manager.any_model_type import AnyModel
 from invokeai.backend.model_manager.config import CheckpointConfigBase
 from invokeai.backend.model_manager.convert_ckpt_to_diffusers import convert_ldm_vae_to_diffusers
 from .. import ModelLoaderRegistry
--- a/invokeai/backend/model_manager/load/model_util.py
+++ b/invokeai/backend/model_manager/load/model_util.py
@@ -8,7 +8,7 @@ from typing import Optional
 import torch
 from diffusers import DiffusionPipeline
-from invokeai.backend.model_manager.config import AnyModel
+from invokeai.backend.model_manager.any_model_type import AnyModel
 from invokeai.backend.onnx.onnx_runtime import IAIOnnxRuntimeModel
--- a/invokeai/backend/model_manager/load/optimizations.py
+++ b/invokeai/backend/model_manager/load/optimizations.py
@@ -17,7 +17,7 @@ def skip_torch_weight_init() -> Generator[None, None, None]:
    completely unnecessary if the intent is to load checkpoint weights from disk for the layer. This context manager
    monkey-patches common torch layers to skip the weight initialization step.
    """
-    torch_modules = [torch.nn.Linear, torch.nn.modules.conv._ConvNd, torch.nn.Embedding]
+    torch_modules = [torch.nn.Linear, torch.nn.modules.conv._ConvNd, torch.nn.Embedding, torch.nn.LayerNorm]
    saved_functions = [hasattr(m, "reset_parameters") and m.reset_parameters for m in torch_modules]
    try:
--- a/invokeai/backend/model_patcher.py
+++ b/invokeai/backend/model_patcher.py
@@ -13,157 +13,14 @@ from diffusers import OnnxRuntimeModel, UNet2DConditionModel
 from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
 from invokeai.app.shared.models import FreeUConfig
-from invokeai.backend.model_manager import AnyModel
+from invokeai.backend.lora.lora_model import LoRAModelRaw
 from invokeai.backend.model_manager.load.optimizations import skip_torch_weight_init
 from invokeai.backend.onnx.onnx_runtime import IAIOnnxRuntimeModel
 from .lora import LoRAModelRaw
 from .textual_inversion import TextualInversionManager, TextualInversionModelRaw
 """
 loras = [
    (lora_model1, 0.7),
    (lora_model2, 0.4),
 ]
 with LoRAHelper.apply_lora_unet(unet, loras):
    # unet with applied loras
 # unmodified unet
 """
 # TODO: rename smth like ModelPatcher and add TI method?
 class ModelPatcher:
    @staticmethod
    def _resolve_lora_key(model: torch.nn.Module, lora_key: str, prefix: str) -> Tuple[str, torch.nn.Module]:
        assert "." not in lora_key
        if not lora_key.startswith(prefix):
            raise Exception(f"lora_key with invalid prefix: {lora_key}, {prefix}")
        module = model
        module_key = ""
        key_parts = lora_key[len(prefix) :].split("_")
        submodule_name = key_parts.pop(0)
        while len(key_parts) > 0:
            try:
                module = module.get_submodule(submodule_name)
                module_key += "." + submodule_name
                submodule_name = key_parts.pop(0)
            except Exception:
                submodule_name += "_" + key_parts.pop(0)
        module = module.get_submodule(submodule_name)
        module_key = (module_key + "." + submodule_name).lstrip(".")
        return (module_key, module)
    @classmethod
    @contextmanager
    def apply_lora_unet(
        cls,
        unet: UNet2DConditionModel,
        loras: Iterator[Tuple[LoRAModelRaw, float]],
    ) -> None:
        with cls.apply_lora(unet, loras, "lora_unet_"):
            yield
    @classmethod
    @contextmanager
    def apply_lora_text_encoder(
        cls,
        text_encoder: CLIPTextModel,
        loras: Iterator[Tuple[LoRAModelRaw, float]],
    ) -> None:
        with cls.apply_lora(text_encoder, loras, "lora_te_"):
            yield
    @classmethod
    @contextmanager
    def apply_sdxl_lora_text_encoder(
        cls,
        text_encoder: CLIPTextModel,
        loras: List[Tuple[LoRAModelRaw, float]],
    ) -> None:
        with cls.apply_lora(text_encoder, loras, "lora_te1_"):
            yield
    @classmethod
    @contextmanager
    def apply_sdxl_lora_text_encoder2(
        cls,
        text_encoder: CLIPTextModel,
        loras: List[Tuple[LoRAModelRaw, float]],
    ) -> None:
        with cls.apply_lora(text_encoder, loras, "lora_te2_"):
            yield
    @classmethod
    @contextmanager
    def apply_lora(
        cls,
        model: AnyModel,
        loras: Iterator[Tuple[LoRAModelRaw, float]],
        prefix: str,
    ) -> None:
        original_weights = {}
        try:
            with torch.no_grad():
                for lora, lora_weight in loras:
                    # assert lora.device.type == "cpu"
                    for layer_key, layer in lora.layers.items():
                        if not layer_key.startswith(prefix):
                            continue
                        # TODO(ryand): A non-negligible amount of time is currently spent resolving LoRA keys. This
                        # should be improved in the following ways:
                        # 1. The key mapping could be more-efficiently pre-computed. This would save time every time a
                        #    LoRA model is applied.
                        # 2. From an API perspective, there's no reason that the `ModelPatcher` should be aware of the
                        #    intricacies of Stable Diffusion key resolution. It should just expect the input LoRA
                        #    weights to have valid keys.
                        assert isinstance(model, torch.nn.Module)
                        module_key, module = cls._resolve_lora_key(model, layer_key, prefix)
                        # All of the LoRA weight calculations will be done on the same device as the module weight.
                        # (Performance will be best if this is a CUDA device.)
                        device = module.weight.device
                        dtype = module.weight.dtype
                        if module_key not in original_weights:
                            original_weights[module_key] = module.weight.detach().to(device="cpu", copy=True)
                        layer_scale = layer.alpha / layer.rank if (layer.alpha and layer.rank) else 1.0
                        # We intentionally move to the target device first, then cast. Experimentally, this was found to
                        # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
                        # same thing in a single call to '.to(...)'.
                        layer.to(device=device)
                        layer.to(dtype=torch.float32)
                        # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
                        # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
                        layer_weight = layer.get_weight(module.weight) * (lora_weight * layer_scale)
                        layer.to(device=torch.device("cpu"))
                        assert isinstance(layer_weight, torch.Tensor)  # mypy thinks layer_weight is a float|Any ??!
                        if module.weight.shape != layer_weight.shape:
                            # TODO: debug on lycoris
                            assert hasattr(layer_weight, "reshape")
                            layer_weight = layer_weight.reshape(module.weight.shape)
                        assert isinstance(layer_weight, torch.Tensor)  # mypy thinks layer_weight is a float|Any ??!
                        module.weight += layer_weight.to(dtype=dtype)
            yield  # wait for context manager exit
        finally:
            assert hasattr(model, "get_submodule")  # mypy not picking up fact that torch.nn.Module has get_submodule()
            with torch.no_grad():
                for module_key, weight in original_weights.items():
                    model.get_submodule(module_key).weight.copy_(weight)
    @classmethod
    @contextmanager
    def apply_ti(
--- a/invokeai/backend/onnx/onnx_runtime.py
+++ b/invokeai/backend/onnx/onnx_runtime.py
@@ -6,17 +6,16 @@ from typing import Any, List, Optional, Tuple, Union
 import numpy as np
 import onnx
 import torch
 from onnx import numpy_helper
 from onnxruntime import InferenceSession, SessionOptions, get_available_providers
 from ..raw_model import RawModel
 ONNX_WEIGHTS_NAME = "model.onnx"
 # NOTE FROM LS: This was copied from Stalker's original implementation.
 # I have not yet gone through and fixed all the type hints
-class IAIOnnxRuntimeModel(RawModel):
+class IAIOnnxRuntimeModel(torch.nn.Module):
    class _tensor_access:
        def __init__(self, model):  # type: ignore
            self.model = model
@@ -103,7 +102,7 @@ class IAIOnnxRuntimeModel(RawModel):
        self.proto = onnx.load(model_path, load_external_data=False)
        """
-
+        super().__init__()
        self.proto = onnx.load(model_path, load_external_data=True)
        # self.data = dict()
        # for tensor in self.proto.graph.initializer:
--- a/invokeai/backend/raw_model.py
+++ b/invokeai/backend/raw_model.py
@@ -1,15 +0,0 @@
 """Base class for 'Raw' models.
 The RawModel class is the base class of LoRAModelRaw and TextualInversionModelRaw,
 and is used for type checking of calls to the model patcher. Its main purpose
 is to avoid a circular import issues when lora.py tries to import BaseModelType
 from invokeai.backend.model_manager.config, and the latter tries to import LoRAModelRaw
 from lora.py.
 The term 'raw' was introduced to describe a wrapper around a torch.nn.Module
 that adds additional methods and attributes.
 """
 class RawModel:
    """Base class for 'Raw' model wrappers."""
--- a/invokeai/backend/textual_inversion.py
+++ b/invokeai/backend/textual_inversion.py
@@ -9,10 +9,8 @@ from safetensors.torch import load_file
 from transformers import CLIPTokenizer
 from typing_extensions import Self
 from .raw_model import RawModel
-
+class TextualInversionModelRaw(torch.nn.Module):
 class TextualInversionModelRaw(RawModel):
    embedding: torch.Tensor  # [n, 768]|[n, 1280]
    embedding_2: Optional[torch.Tensor] = None  # [n, 768]|[n, 1280]   - for SDXL models
--- a/invokeai/backend/util/serialization.py
+++ b/invokeai/backend/util/serialization.py
@@ -0,0 +1,37 @@
 from pathlib import Path
 from typing import Any, Optional, Union
 import torch
 from safetensors.torch import load_file
 def state_dict_to(
    state_dict: dict[str, torch.Tensor], device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None
 ) -> dict[str, torch.Tensor]:
    new_state_dict: dict[str, torch.Tensor] = {}
    for k, v in state_dict.items():
        new_state_dict[k] = v.to(device=device, dtype=dtype, non_blocking=True)
    return new_state_dict
 def load_state_dict(file_path: Union[str, Path], device: str = "cpu") -> Any:
    """Load a state_dict from a file that may be in either PyTorch or safetensors format. The file format is inferred
    from the file extension.
    """
    file_path = Path(file_path)
    if file_path.suffix == ".safetensors":
        state_dict = load_file(
            file_path,
            device=device,
        )
    else:
        # weights_only=True is used to address a security vulnerability that allows arbitrary code execution.
        # This option was first introduced in https://github.com/pytorch/pytorch/pull/86812.
        #
        # mmap=True is used to both reduce memory usage and speed up loading. This setting causes torch.load() to more
        # closely mirror the behaviour of safetensors.torch.load_file().  This option was first introduced in
        # https://github.com/pytorch/pytorch/pull/102549. The discussion on that PR provides helpful context.
        state_dict = torch.load(file_path, map_location=device, weights_only=True, mmap=True)
    return state_dict
--- a/tests/backend/model_manager/test_lora.py
+++ b/tests/backend/model_manager/test_lora.py
@@ -5,8 +5,9 @@
 import pytest
 import torch
-from invokeai.backend.lora import LoRALayer, LoRAModelRaw
+from invokeai.backend.lora.lora_layer import LoRALayer
-from invokeai.backend.model_patcher import ModelPatcher
+from invokeai.backend.lora.lora_model import LoRAModelRaw
 from invokeai.backend.lora.lora_model_patcher import LoraModelPatcher
@pytest.mark.parametrize(
@@ -44,7 +45,7 @@ def test_apply_lora(device):
    orig_linear_weight = model["linear_layer_1"].weight.data.detach().clone()
    expected_patched_linear_weight = orig_linear_weight + (lora_dim * lora_weight)
-    with ModelPatcher.apply_lora(model, [(lora, lora_weight)], prefix=""):
+    with LoraModelPatcher.apply_lora(model, [(lora, lora_weight)], prefix=""):
        # After patching, all LoRA layer weights should have been moved back to the cpu.
        assert lora_layers["linear_layer_1"].up.device.type == "cpu"
        assert lora_layers["linear_layer_1"].down.device.type == "cpu"
@@ -86,7 +87,7 @@ def test_apply_lora_change_device():
    orig_linear_weight = model["linear_layer_1"].weight.data.detach().clone()
-    with ModelPatcher.apply_lora(model, [(lora, 0.5)], prefix=""):
+    with LoraModelPatcher.apply_lora(model, [(lora, 0.5)], prefix=""):
        # After patching, all LoRA layer weights should have been moved back to the cpu.
        assert lora_layers["linear_layer_1"].up.device.type == "cpu"
        assert lora_layers["linear_layer_1"].down.device.type == "cpu"
Author	SHA1	Message	Date
Ryan Dick	6bfb4927c7	WIP - not working	2024-04-08 10:06:11 -04:00
Ryan Dick	c15e9e23ca	Tidy types in LoraModelPatcher.	2024-04-05 16:05:31 -04:00
Ryan Dick	e1aa1ed6af	Split LoraModelPatcher out from ModelPatcher monolith.	2024-04-05 15:57:46 -04:00
Ryan Dick	4b68050c9b	Minor cleanup of LoRAModelRaw.	2024-04-05 15:30:01 -04:00
Ryan Dick	9e68a5c851	Use load_state_dict() util in LoRAModelRaw.	2024-04-05 15:20:07 -04:00
Ryan Dick	61a672cd81	Tidy types in sdxl_state_dict_utils.py.	2024-04-05 15:16:36 -04:00
Ryan Dick	c27a2e59da	Copy convert_sdxl_keys_to_diffusers_format() to sdxl_state_dict_utils.py.	2024-04-05 15:12:22 -04:00
Ryan Dick	4e3f42e388	Split lora.py monolith into separate files.	2024-04-05 15:04:14 -04:00
Ryan Dick	5d41157404	Add LayerNorm to list of modules optimized by skip_torch_weight_init()	2024-04-05 14:39:57 -04:00
Ryan Dick	8db4ba252a	Add util function for loading state_dicts from disk.	2024-04-05 14:39:57 -04:00
Ryan Dick	6d9fb207f0	Remove RawModel - it was just creating a weird layer of indirection in the AnyModel type without adding any value.	2024-04-04 18:07:13 -04:00