WIP - not working

Tidy types in LoraModelPatcher.
Split LoraModelPatcher out from ModelPatcher monolith.
2026-01-15 16:18:06 -05:00 · 2024-04-08 10:06:11 -04:00 · 2024-04-05 16:05:31 -04:00 · 2024-04-05 15:57:46 -04:00 · 2024-04-05 15:30:01 -04:00 · 2024-04-05 15:20:07 -04:00
19 changed files with 860 additions and 776 deletions
--- a/invokeai/app/invocations/compel.py
+++ b/invokeai/app/invocations/compel.py
@@ -9,7 +9,8 @@ from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField
 from invokeai.app.invocations.primitives import ConditioningOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.ti_utils import generate_ti_list
-from invokeai.backend.lora import LoRAModelRaw
+from invokeai.backend.lora.lora_model import LoRAModelRaw
+from invokeai.backend.lora.lora_model_patcher import LoraModelPatcher
 from invokeai.backend.model_patcher import ModelPatcher
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
    BasicConditioningInfo,
@@ -80,7 +81,7 @@ class CompelInvocation(BaseInvocation):
            ),
            text_encoder_info as text_encoder,
            # Apply the LoRA after text_encoder has been moved to its target device for faster patching.
-            ModelPatcher.apply_lora_text_encoder(text_encoder, _lora_loader()),
+            LoraModelPatcher.apply_lora_text_encoder(text_encoder, _lora_loader()),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
            ModelPatcher.apply_clip_skip(text_encoder_model, self.clip.skipped_layers),
        ):
@@ -181,7 +182,7 @@ class SDXLPromptInvocationBase:
            ),
            text_encoder_info as text_encoder,
            # Apply the LoRA after text_encoder has been moved to its target device for faster patching.
-            ModelPatcher.apply_lora(text_encoder, _lora_loader(), lora_prefix),
+            LoraModelPatcher.apply_lora(text_encoder, _lora_loader(), lora_prefix),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
            ModelPatcher.apply_clip_skip(text_encoder_model, clip_field.skipped_layers),
        ):
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
@@ -48,7 +48,8 @@ from invokeai.app.invocations.t2i_adapter import T2IAdapterField
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.controlnet_utils import prepare_control_image
 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter, IPAdapterPlus
-from invokeai.backend.lora import LoRAModelRaw
+from invokeai.backend.lora.lora_model import LoRAModelRaw
+from invokeai.backend.lora.lora_model_patcher import LoraModelPatcher
 from invokeai.backend.model_manager import BaseModelType, LoadedModel
 from invokeai.backend.model_patcher import ModelPatcher
 from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
@@ -730,7 +731,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
                set_seamless(unet_info.model, self.unet.seamless_axes),  # FIXME
                unet_info as unet,
                # Apply the LoRA after unet has been moved to its target device for faster patching.
-                ModelPatcher.apply_lora_unet(unet, _lora_loader()),
+                LoraModelPatcher.apply_lora_unet(unet, _lora_loader()),
            ):
                assert isinstance(unet, UNet2DConditionModel)
                latents = latents.to(device=unet.device, dtype=unet.dtype)
--- a/invokeai/backend/lora.py
+++ b/invokeai/backend/lora.py
@@ -1,620 +0,0 @@
-# Copyright (c) 2024 The InvokeAI Development team
-"""LoRA model support."""
-
-import bisect
-from pathlib import Path
-from typing import Dict, List, Optional, Tuple, Union
-
-import torch
-from safetensors.torch import load_file
-from typing_extensions import Self
-
-from invokeai.backend.model_manager import BaseModelType
-
-
-class LoRALayerBase:
-    # rank: Optional[int]
-    # alpha: Optional[float]
-    # bias: Optional[torch.Tensor]
-    # layer_key: str
-
-    # @property
-    # def scale(self):
-    #    return self.alpha / self.rank if (self.alpha and self.rank) else 1.0
-
-    def __init__(
-        self,
-        layer_key: str,
-        values: Dict[str, torch.Tensor],
-    ):
-        if "alpha" in values:
-            self.alpha = values["alpha"].item()
-        else:
-            self.alpha = None
-
-        if "bias_indices" in values and "bias_values" in values and "bias_size" in values:
-            self.bias: Optional[torch.Tensor] = torch.sparse_coo_tensor(
-                values["bias_indices"],
-                values["bias_values"],
-                tuple(values["bias_size"]),
-            )
-
-        else:
-            self.bias = None
-
-        self.rank = None  # set in layer implementation
-        self.layer_key = layer_key
-
-    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
-        raise NotImplementedError()
-
-    def calc_size(self) -> int:
-        model_size = 0
-        for val in [self.bias]:
-            if val is not None:
-                model_size += val.nelement() * val.element_size()
-        return model_size
-
-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-    ) -> None:
-        if self.bias is not None:
-            self.bias = self.bias.to(device=device, dtype=dtype)
-
-
-# TODO: find and debug lora/locon with bias
-class LoRALayer(LoRALayerBase):
-    # up: torch.Tensor
-    # mid: Optional[torch.Tensor]
-    # down: torch.Tensor
-
-    def __init__(
-        self,
-        layer_key: str,
-        values: Dict[str, torch.Tensor],
-    ):
-        super().__init__(layer_key, values)
-
-        self.up = values["lora_up.weight"]
-        self.down = values["lora_down.weight"]
-        if "lora_mid.weight" in values:
-            self.mid: Optional[torch.Tensor] = values["lora_mid.weight"]
-        else:
-            self.mid = None
-
-        self.rank = self.down.shape[0]
-
-    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
-        if self.mid is not None:
-            up = self.up.reshape(self.up.shape[0], self.up.shape[1])
-            down = self.down.reshape(self.down.shape[0], self.down.shape[1])
-            weight = torch.einsum("m n w h, i m, n j -> i j w h", self.mid, up, down)
-        else:
-            weight = self.up.reshape(self.up.shape[0], -1) @ self.down.reshape(self.down.shape[0], -1)
-
-        return weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        for val in [self.up, self.mid, self.down]:
-            if val is not None:
-                model_size += val.nelement() * val.element_size()
-        return model_size
-
-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-    ) -> None:
-        super().to(device=device, dtype=dtype)
-
-        self.up = self.up.to(device=device, dtype=dtype)
-        self.down = self.down.to(device=device, dtype=dtype)
-
-        if self.mid is not None:
-            self.mid = self.mid.to(device=device, dtype=dtype)
-
-
-class LoHALayer(LoRALayerBase):
-    # w1_a: torch.Tensor
-    # w1_b: torch.Tensor
-    # w2_a: torch.Tensor
-    # w2_b: torch.Tensor
-    # t1: Optional[torch.Tensor] = None
-    # t2: Optional[torch.Tensor] = None
-
-    def __init__(self, layer_key: str, values: Dict[str, torch.Tensor]):
-        super().__init__(layer_key, values)
-
-        self.w1_a = values["hada_w1_a"]
-        self.w1_b = values["hada_w1_b"]
-        self.w2_a = values["hada_w2_a"]
-        self.w2_b = values["hada_w2_b"]
-
-        if "hada_t1" in values:
-            self.t1: Optional[torch.Tensor] = values["hada_t1"]
-        else:
-            self.t1 = None
-
-        if "hada_t2" in values:
-            self.t2: Optional[torch.Tensor] = values["hada_t2"]
-        else:
-            self.t2 = None
-
-        self.rank = self.w1_b.shape[0]
-
-    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
-        if self.t1 is None:
-            weight: torch.Tensor = (self.w1_a @ self.w1_b) * (self.w2_a @ self.w2_b)
-
-        else:
-            rebuild1 = torch.einsum("i j k l, j r, i p -> p r k l", self.t1, self.w1_b, self.w1_a)
-            rebuild2 = torch.einsum("i j k l, j r, i p -> p r k l", self.t2, self.w2_b, self.w2_a)
-            weight = rebuild1 * rebuild2
-
-        return weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        for val in [self.w1_a, self.w1_b, self.w2_a, self.w2_b, self.t1, self.t2]:
-            if val is not None:
-                model_size += val.nelement() * val.element_size()
-        return model_size
-
-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-    ) -> None:
-        super().to(device=device, dtype=dtype)
-
-        self.w1_a = self.w1_a.to(device=device, dtype=dtype)
-        self.w1_b = self.w1_b.to(device=device, dtype=dtype)
-        if self.t1 is not None:
-            self.t1 = self.t1.to(device=device, dtype=dtype)
-
-        self.w2_a = self.w2_a.to(device=device, dtype=dtype)
-        self.w2_b = self.w2_b.to(device=device, dtype=dtype)
-        if self.t2 is not None:
-            self.t2 = self.t2.to(device=device, dtype=dtype)
-
-
-class LoKRLayer(LoRALayerBase):
-    # w1: Optional[torch.Tensor] = None
-    # w1_a: Optional[torch.Tensor] = None
-    # w1_b: Optional[torch.Tensor] = None
-    # w2: Optional[torch.Tensor] = None
-    # w2_a: Optional[torch.Tensor] = None
-    # w2_b: Optional[torch.Tensor] = None
-    # t2: Optional[torch.Tensor] = None
-
-    def __init__(
-        self,
-        layer_key: str,
-        values: Dict[str, torch.Tensor],
-    ):
-        super().__init__(layer_key, values)
-
-        if "lokr_w1" in values:
-            self.w1: Optional[torch.Tensor] = values["lokr_w1"]
-            self.w1_a = None
-            self.w1_b = None
-        else:
-            self.w1 = None
-            self.w1_a = values["lokr_w1_a"]
-            self.w1_b = values["lokr_w1_b"]
-
-        if "lokr_w2" in values:
-            self.w2: Optional[torch.Tensor] = values["lokr_w2"]
-            self.w2_a = None
-            self.w2_b = None
-        else:
-            self.w2 = None
-            self.w2_a = values["lokr_w2_a"]
-            self.w2_b = values["lokr_w2_b"]
-
-        if "lokr_t2" in values:
-            self.t2: Optional[torch.Tensor] = values["lokr_t2"]
-        else:
-            self.t2 = None
-
-        if "lokr_w1_b" in values:
-            self.rank = values["lokr_w1_b"].shape[0]
-        elif "lokr_w2_b" in values:
-            self.rank = values["lokr_w2_b"].shape[0]
-        else:
-            self.rank = None  # unscaled
-
-    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
-        w1: Optional[torch.Tensor] = self.w1
-        if w1 is None:
-            assert self.w1_a is not None
-            assert self.w1_b is not None
-            w1 = self.w1_a @ self.w1_b
-
-        w2 = self.w2
-        if w2 is None:
-            if self.t2 is None:
-                assert self.w2_a is not None
-                assert self.w2_b is not None
-                w2 = self.w2_a @ self.w2_b
-            else:
-                w2 = torch.einsum("i j k l, i p, j r -> p r k l", self.t2, self.w2_a, self.w2_b)
-
-        if len(w2.shape) == 4:
-            w1 = w1.unsqueeze(2).unsqueeze(2)
-        w2 = w2.contiguous()
-        assert w1 is not None
-        assert w2 is not None
-        weight = torch.kron(w1, w2)
-
-        return weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        for val in [self.w1, self.w1_a, self.w1_b, self.w2, self.w2_a, self.w2_b, self.t2]:
-            if val is not None:
-                model_size += val.nelement() * val.element_size()
-        return model_size
-
-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-    ) -> None:
-        super().to(device=device, dtype=dtype)
-
-        if self.w1 is not None:
-            self.w1 = self.w1.to(device=device, dtype=dtype)
-        else:
-            assert self.w1_a is not None
-            assert self.w1_b is not None
-            self.w1_a = self.w1_a.to(device=device, dtype=dtype)
-            self.w1_b = self.w1_b.to(device=device, dtype=dtype)
-
-        if self.w2 is not None:
-            self.w2 = self.w2.to(device=device, dtype=dtype)
-        else:
-            assert self.w2_a is not None
-            assert self.w2_b is not None
-            self.w2_a = self.w2_a.to(device=device, dtype=dtype)
-            self.w2_b = self.w2_b.to(device=device, dtype=dtype)
-
-        if self.t2 is not None:
-            self.t2 = self.t2.to(device=device, dtype=dtype)
-
-
-class FullLayer(LoRALayerBase):
-    # weight: torch.Tensor
-
-    def __init__(
-        self,
-        layer_key: str,
-        values: Dict[str, torch.Tensor],
-    ):
-        super().__init__(layer_key, values)
-
-        self.weight = values["diff"]
-
-        if len(values.keys()) > 1:
-            _keys = list(values.keys())
-            _keys.remove("diff")
-            raise NotImplementedError(f"Unexpected keys in lora diff layer: {_keys}")
-
-        self.rank = None  # unscaled
-
-    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
-        return self.weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        model_size += self.weight.nelement() * self.weight.element_size()
-        return model_size
-
-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-    ) -> None:
-        super().to(device=device, dtype=dtype)
-
-        self.weight = self.weight.to(device=device, dtype=dtype)
-
-
-class IA3Layer(LoRALayerBase):
-    # weight: torch.Tensor
-    # on_input: torch.Tensor
-
-    def __init__(
-        self,
-        layer_key: str,
-        values: Dict[str, torch.Tensor],
-    ):
-        super().__init__(layer_key, values)
-
-        self.weight = values["weight"]
-        self.on_input = values["on_input"]
-
-        self.rank = None  # unscaled
-
-    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
-        weight = self.weight
-        if not self.on_input:
-            weight = weight.reshape(-1, 1)
-        assert orig_weight is not None
-        return orig_weight * weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        model_size += self.weight.nelement() * self.weight.element_size()
-        model_size += self.on_input.nelement() * self.on_input.element_size()
-        return model_size
-
-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-    ):
-        super().to(device=device, dtype=dtype)
-
-        self.weight = self.weight.to(device=device, dtype=dtype)
-        self.on_input = self.on_input.to(device=device, dtype=dtype)
-
-
-AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer]
-
-
-class LoRAModelRaw(torch.nn.Module):
-    def __init__(
-        self,
-        name: str,
-        layers: Dict[str, AnyLoRALayer],
-    ):
-        super().__init__()
-        self._name = name
-        self.layers = layers
-
-    @property
-    def name(self) -> str:
-        return self._name
-
-    def to(
-        self,
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-    ) -> None:
-        # TODO: try revert if exception?
-        for _key, layer in self.layers.items():
-            layer.to(device=device, dtype=dtype)
-
-    def calc_size(self) -> int:
-        model_size = 0
-        for _, layer in self.layers.items():
-            model_size += layer.calc_size()
-        return model_size
-
-    @classmethod
-    def _convert_sdxl_keys_to_diffusers_format(cls, state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
-        """Convert the keys of an SDXL LoRA state_dict to diffusers format.
-
-        The input state_dict can be in either Stability AI format or diffusers format. If the state_dict is already in
-        diffusers format, then this function will have no effect.
-
-        This function is adapted from:
-        https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L385-L409
-
-        Args:
-            state_dict (Dict[str, Tensor]): The SDXL LoRA state_dict.
-
-        Raises:
-            ValueError: If state_dict contains an unrecognized key, or not all keys could be converted.
-
-        Returns:
-            Dict[str, Tensor]: The diffusers-format state_dict.
-        """
-        converted_count = 0  # The number of Stability AI keys converted to diffusers format.
-        not_converted_count = 0  # The number of keys that were not converted.
-
-        # Get a sorted list of Stability AI UNet keys so that we can efficiently search for keys with matching prefixes.
-        # For example, we want to efficiently find `input_blocks_4_1` in the list when searching for
-        # `input_blocks_4_1_proj_in`.
-        stability_unet_keys = list(SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP)
-        stability_unet_keys.sort()
-
-        new_state_dict = {}
-        for full_key, value in state_dict.items():
-            if full_key.startswith("lora_unet_"):
-                search_key = full_key.replace("lora_unet_", "")
-                # Use bisect to find the key in stability_unet_keys that *may* match the search_key's prefix.
-                position = bisect.bisect_right(stability_unet_keys, search_key)
-                map_key = stability_unet_keys[position - 1]
-                # Now, check if the map_key *actually* matches the search_key.
-                if search_key.startswith(map_key):
-                    new_key = full_key.replace(map_key, SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP[map_key])
-                    new_state_dict[new_key] = value
-                    converted_count += 1
-                else:
-                    new_state_dict[full_key] = value
-                    not_converted_count += 1
-            elif full_key.startswith("lora_te1_") or full_key.startswith("lora_te2_"):
-                # The CLIP text encoders have the same keys in both Stability AI and diffusers formats.
-                new_state_dict[full_key] = value
-                continue
-            else:
-                raise ValueError(f"Unrecognized SDXL LoRA key prefix: '{full_key}'.")
-
-        if converted_count > 0 and not_converted_count > 0:
-            raise ValueError(
-                f"The SDXL LoRA could only be partially converted to diffusers format. converted={converted_count},"
-                f" not_converted={not_converted_count}"
-            )
-
-        return new_state_dict
-
-    @classmethod
-    def from_checkpoint(
-        cls,
-        file_path: Union[str, Path],
-        device: Optional[torch.device] = None,
-        dtype: Optional[torch.dtype] = None,
-        base_model: Optional[BaseModelType] = None,
-    ) -> Self:
-        device = device or torch.device("cpu")
-        dtype = dtype or torch.float32
-
-        if isinstance(file_path, str):
-            file_path = Path(file_path)
-
-        model = cls(
-            name=file_path.stem,
-            layers={},
-        )
-
-        if file_path.suffix == ".safetensors":
-            sd = load_file(file_path.absolute().as_posix(), device="cpu")
-        else:
-            sd = torch.load(file_path, map_location="cpu")
-
-        state_dict = cls._group_state(sd)
-
-        if base_model == BaseModelType.StableDiffusionXL:
-            state_dict = cls._convert_sdxl_keys_to_diffusers_format(state_dict)
-
-        for layer_key, values in state_dict.items():
-            # lora and locon
-            if "lora_down.weight" in values:
-                layer: AnyLoRALayer = LoRALayer(layer_key, values)
-
-            # loha
-            elif "hada_w1_b" in values:
-                layer = LoHALayer(layer_key, values)
-
-            # lokr
-            elif "lokr_w1_b" in values or "lokr_w1" in values:
-                layer = LoKRLayer(layer_key, values)
-
-            # diff
-            elif "diff" in values:
-                layer = FullLayer(layer_key, values)
-
-            # ia3
-            elif "weight" in values and "on_input" in values:
-                layer = IA3Layer(layer_key, values)
-
-            else:
-                print(f">> Encountered unknown lora layer module in {model.name}: {layer_key} - {list(values.keys())}")
-                raise Exception("Unknown lora format!")
-
-            # lower memory consumption by removing already parsed layer values
-            state_dict[layer_key].clear()
-
-            layer.to(device=device, dtype=dtype)
-            model.layers[layer_key] = layer
-
-        return model
-
-    @staticmethod
-    def _group_state(state_dict: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, torch.Tensor]]:
-        state_dict_groupped: Dict[str, Dict[str, torch.Tensor]] = {}
-
-        for key, value in state_dict.items():
-            stem, leaf = key.split(".", 1)
-            if stem not in state_dict_groupped:
-                state_dict_groupped[stem] = {}
-            state_dict_groupped[stem][leaf] = value
-
-        return state_dict_groupped
-
-
-# code from
-# https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L15C1-L97C32
-def make_sdxl_unet_conversion_map() -> List[Tuple[str, str]]:
-    """Create a dict mapping state_dict keys from Stability AI SDXL format to diffusers SDXL format."""
-    unet_conversion_map_layer = []
-
-    for i in range(3):  # num_blocks is 3 in sdxl
-        # loop over downblocks/upblocks
-        for j in range(2):
-            # loop over resnets/attentions for downblocks
-            hf_down_res_prefix = f"down_blocks.{i}.resnets.{j}."
-            sd_down_res_prefix = f"input_blocks.{3*i + j + 1}.0."
-            unet_conversion_map_layer.append((sd_down_res_prefix, hf_down_res_prefix))
-
-            if i < 3:
-                # no attention layers in down_blocks.3
-                hf_down_atn_prefix = f"down_blocks.{i}.attentions.{j}."
-                sd_down_atn_prefix = f"input_blocks.{3*i + j + 1}.1."
-                unet_conversion_map_layer.append((sd_down_atn_prefix, hf_down_atn_prefix))
-
-        for j in range(3):
-            # loop over resnets/attentions for upblocks
-            hf_up_res_prefix = f"up_blocks.{i}.resnets.{j}."
-            sd_up_res_prefix = f"output_blocks.{3*i + j}.0."
-            unet_conversion_map_layer.append((sd_up_res_prefix, hf_up_res_prefix))
-
-            # if i > 0: commentout for sdxl
-            # no attention layers in up_blocks.0
-            hf_up_atn_prefix = f"up_blocks.{i}.attentions.{j}."
-            sd_up_atn_prefix = f"output_blocks.{3*i + j}.1."
-            unet_conversion_map_layer.append((sd_up_atn_prefix, hf_up_atn_prefix))
-
-        if i < 3:
-            # no downsample in down_blocks.3
-            hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0.conv."
-            sd_downsample_prefix = f"input_blocks.{3*(i+1)}.0.op."
-            unet_conversion_map_layer.append((sd_downsample_prefix, hf_downsample_prefix))
-
-            # no upsample in up_blocks.3
-            hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0."
-            sd_upsample_prefix = f"output_blocks.{3*i + 2}.{2}."  # change for sdxl
-            unet_conversion_map_layer.append((sd_upsample_prefix, hf_upsample_prefix))
-
-    hf_mid_atn_prefix = "mid_block.attentions.0."
-    sd_mid_atn_prefix = "middle_block.1."
-    unet_conversion_map_layer.append((sd_mid_atn_prefix, hf_mid_atn_prefix))
-
-    for j in range(2):
-        hf_mid_res_prefix = f"mid_block.resnets.{j}."
-        sd_mid_res_prefix = f"middle_block.{2*j}."
-        unet_conversion_map_layer.append((sd_mid_res_prefix, hf_mid_res_prefix))
-
-    unet_conversion_map_resnet = [
-        # (stable-diffusion, HF Diffusers)
-        ("in_layers.0.", "norm1."),
-        ("in_layers.2.", "conv1."),
-        ("out_layers.0.", "norm2."),
-        ("out_layers.3.", "conv2."),
-        ("emb_layers.1.", "time_emb_proj."),
-        ("skip_connection.", "conv_shortcut."),
-    ]
-
-    unet_conversion_map = []
-    for sd, hf in unet_conversion_map_layer:
-        if "resnets" in hf:
-            for sd_res, hf_res in unet_conversion_map_resnet:
-                unet_conversion_map.append((sd + sd_res, hf + hf_res))
-        else:
-            unet_conversion_map.append((sd, hf))
-
-    for j in range(2):
-        hf_time_embed_prefix = f"time_embedding.linear_{j+1}."
-        sd_time_embed_prefix = f"time_embed.{j*2}."
-        unet_conversion_map.append((sd_time_embed_prefix, hf_time_embed_prefix))
-
-    for j in range(2):
-        hf_label_embed_prefix = f"add_embedding.linear_{j+1}."
-        sd_label_embed_prefix = f"label_emb.0.{j*2}."
-        unet_conversion_map.append((sd_label_embed_prefix, hf_label_embed_prefix))
-
-    unet_conversion_map.append(("input_blocks.0.0.", "conv_in."))
-    unet_conversion_map.append(("out.0.", "conv_norm_out."))
-    unet_conversion_map.append(("out.2.", "conv_out."))
-
-    return unet_conversion_map
-
-
-SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP = {
-    sd.rstrip(".").replace(".", "_"): hf.rstrip(".").replace(".", "_") for sd, hf in make_sdxl_unet_conversion_map()
-}
--- a/invokeai/backend/lora/init.py
+++ b/invokeai/backend/lora/init.py
--- a/invokeai/backend/lora/full_layer.py
+++ b/invokeai/backend/lora/full_layer.py
@@ -0,0 +1,42 @@
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.lora.lora_layer_base import LoRALayerBase
+
+
+class FullLayer(LoRALayerBase):
+    # weight: torch.Tensor
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        super().__init__(layer_key, values)
+
+        self.weight = values["diff"]
+
+        if len(values.keys()) > 1:
+            _keys = list(values.keys())
+            _keys.remove("diff")
+            raise NotImplementedError(f"Unexpected keys in lora diff layer: {_keys}")
+
+        self.rank = None  # unscaled
+
+    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
+        return self.weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        model_size += self.weight.nelement() * self.weight.element_size()
+        return model_size
+
+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ) -> None:
+        super().to(device=device, dtype=dtype)
+
+        self.weight = self.weight.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/ia3_layer.py
+++ b/invokeai/backend/lora/ia3_layer.py
@@ -0,0 +1,45 @@
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.lora.lora_layer_base import LoRALayerBase
+
+
+class IA3Layer(LoRALayerBase):
+    # weight: torch.Tensor
+    # on_input: torch.Tensor
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        super().__init__(layer_key, values)
+
+        self.weight = values["weight"]
+        self.on_input = values["on_input"]
+
+        self.rank = None  # unscaled
+
+    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
+        weight = self.weight
+        if not self.on_input:
+            weight = weight.reshape(-1, 1)
+        assert orig_weight is not None
+        return orig_weight * weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        model_size += self.weight.nelement() * self.weight.element_size()
+        model_size += self.on_input.nelement() * self.on_input.element_size()
+        return model_size
+
+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ):
+        super().to(device=device, dtype=dtype)
+
+        self.weight = self.weight.to(device=device, dtype=dtype)
+        self.on_input = self.on_input.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/loha_layer.py
+++ b/invokeai/backend/lora/loha_layer.py
@@ -0,0 +1,69 @@
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.lora.lora_layer_base import LoRALayerBase
+
+
+class LoHALayer(LoRALayerBase):
+    # w1_a: torch.Tensor
+    # w1_b: torch.Tensor
+    # w2_a: torch.Tensor
+    # w2_b: torch.Tensor
+    # t1: Optional[torch.Tensor] = None
+    # t2: Optional[torch.Tensor] = None
+
+    def __init__(self, layer_key: str, values: Dict[str, torch.Tensor]):
+        super().__init__(layer_key, values)
+
+        self.w1_a = values["hada_w1_a"]
+        self.w1_b = values["hada_w1_b"]
+        self.w2_a = values["hada_w2_a"]
+        self.w2_b = values["hada_w2_b"]
+
+        if "hada_t1" in values:
+            self.t1: Optional[torch.Tensor] = values["hada_t1"]
+        else:
+            self.t1 = None
+
+        if "hada_t2" in values:
+            self.t2: Optional[torch.Tensor] = values["hada_t2"]
+        else:
+            self.t2 = None
+
+        self.rank = self.w1_b.shape[0]
+
+    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
+        if self.t1 is None:
+            weight: torch.Tensor = (self.w1_a @ self.w1_b) * (self.w2_a @ self.w2_b)
+
+        else:
+            rebuild1 = torch.einsum("i j k l, j r, i p -> p r k l", self.t1, self.w1_b, self.w1_a)
+            rebuild2 = torch.einsum("i j k l, j r, i p -> p r k l", self.t2, self.w2_b, self.w2_a)
+            weight = rebuild1 * rebuild2
+
+        return weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        for val in [self.w1_a, self.w1_b, self.w2_a, self.w2_b, self.t1, self.t2]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ) -> None:
+        super().to(device=device, dtype=dtype)
+
+        self.w1_a = self.w1_a.to(device=device, dtype=dtype)
+        self.w1_b = self.w1_b.to(device=device, dtype=dtype)
+        if self.t1 is not None:
+            self.t1 = self.t1.to(device=device, dtype=dtype)
+
+        self.w2_a = self.w2_a.to(device=device, dtype=dtype)
+        self.w2_b = self.w2_b.to(device=device, dtype=dtype)
+        if self.t2 is not None:
+            self.t2 = self.t2.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/lokr_layer.py
+++ b/invokeai/backend/lora/lokr_layer.py
@@ -0,0 +1,110 @@
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.lora.lora_layer_base import LoRALayerBase
+
+
+class LoKRLayer(LoRALayerBase):
+    # w1: Optional[torch.Tensor] = None
+    # w1_a: Optional[torch.Tensor] = None
+    # w1_b: Optional[torch.Tensor] = None
+    # w2: Optional[torch.Tensor] = None
+    # w2_a: Optional[torch.Tensor] = None
+    # w2_b: Optional[torch.Tensor] = None
+    # t2: Optional[torch.Tensor] = None
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        super().__init__(layer_key, values)
+
+        if "lokr_w1" in values:
+            self.w1: Optional[torch.Tensor] = values["lokr_w1"]
+            self.w1_a = None
+            self.w1_b = None
+        else:
+            self.w1 = None
+            self.w1_a = values["lokr_w1_a"]
+            self.w1_b = values["lokr_w1_b"]
+
+        if "lokr_w2" in values:
+            self.w2: Optional[torch.Tensor] = values["lokr_w2"]
+            self.w2_a = None
+            self.w2_b = None
+        else:
+            self.w2 = None
+            self.w2_a = values["lokr_w2_a"]
+            self.w2_b = values["lokr_w2_b"]
+
+        if "lokr_t2" in values:
+            self.t2: Optional[torch.Tensor] = values["lokr_t2"]
+        else:
+            self.t2 = None
+
+        if "lokr_w1_b" in values:
+            self.rank = values["lokr_w1_b"].shape[0]
+        elif "lokr_w2_b" in values:
+            self.rank = values["lokr_w2_b"].shape[0]
+        else:
+            self.rank = None  # unscaled
+
+    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
+        w1: Optional[torch.Tensor] = self.w1
+        if w1 is None:
+            assert self.w1_a is not None
+            assert self.w1_b is not None
+            w1 = self.w1_a @ self.w1_b
+
+        w2 = self.w2
+        if w2 is None:
+            if self.t2 is None:
+                assert self.w2_a is not None
+                assert self.w2_b is not None
+                w2 = self.w2_a @ self.w2_b
+            else:
+                w2 = torch.einsum("i j k l, i p, j r -> p r k l", self.t2, self.w2_a, self.w2_b)
+
+        if len(w2.shape) == 4:
+            w1 = w1.unsqueeze(2).unsqueeze(2)
+        w2 = w2.contiguous()
+        assert w1 is not None
+        assert w2 is not None
+        weight = torch.kron(w1, w2)
+
+        return weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        for val in [self.w1, self.w1_a, self.w1_b, self.w2, self.w2_a, self.w2_b, self.t2]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ) -> None:
+        super().to(device=device, dtype=dtype)
+
+        if self.w1 is not None:
+            self.w1 = self.w1.to(device=device, dtype=dtype)
+        else:
+            assert self.w1_a is not None
+            assert self.w1_b is not None
+            self.w1_a = self.w1_a.to(device=device, dtype=dtype)
+            self.w1_b = self.w1_b.to(device=device, dtype=dtype)
+
+        if self.w2 is not None:
+            self.w2 = self.w2.to(device=device, dtype=dtype)
+        else:
+            assert self.w2_a is not None
+            assert self.w2_b is not None
+            self.w2_a = self.w2_a.to(device=device, dtype=dtype)
+            self.w2_b = self.w2_b.to(device=device, dtype=dtype)
+
+        if self.t2 is not None:
+            self.t2 = self.t2.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/lora_layer.py
+++ b/invokeai/backend/lora/lora_layer.py
@@ -0,0 +1,81 @@
+from typing import Optional
+
+import torch
+
+from invokeai.backend.lora.lora_layer_base import LoRALayerBase
+
+
+class LoRALayer(LoRALayerBase):
+    def __init__(
+        self,
+        layer_key: str,
+        values: dict[str, torch.Tensor],
+    ):
+        super().__init__(layer_key, values)
+
+        self.up = values["lora_up.weight"]
+        self.down = values["lora_down.weight"]
+
+        self.mid: Optional[torch.Tensor] = values.get("lora_mid.weight", None)
+        self.dora_scale: Optional[torch.Tensor] = values.get("dora_scale", None)
+        self.rank = self.down.shape[0]
+
+    def _apply_dora(self, orig_weight: torch.Tensor, lora_weight: torch.Tensor) -> torch.Tensor:
+        """Apply DoRA to the weight matrix.
+
+        This function is based roughly on the reference implementation in PEFT, but handles scaling in a slightly
+        different way:
+        https://github.com/huggingface/peft/blob/26726bf1ddee6ca75ed4e1bfd292094526707a78/src/peft/tuners/lora/layer.py#L421-L433
+
+        """
+        # Merge the original weight with the LoRA weight.
+        merged_weight = orig_weight + lora_weight
+
+        # Calculate the vector-wise L2 norm of the weight matrix across each column vector.
+        weight_norm: torch.Tensor = torch.linalg.norm(merged_weight, dim=1)
+
+        dora_factor = self.dora_scale / weight_norm
+        new_weight = dora_factor * merged_weight
+
+        # TODO(ryand): This is wasteful. We already have the final weight, but we calculate the diff, because that is
+        # what the `get_weight()` API is expected to return. If we do refactor this, we'll have to give some thought to
+        # how lora weight scaling should be applied - having the full weight diff makes this easy.
+        weight_diff = new_weight - orig_weight
+        return weight_diff
+
+    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
+        if self.mid is not None:
+            up = self.up.reshape(self.up.shape[0], self.up.shape[1])
+            down = self.down.reshape(self.down.shape[0], self.down.shape[1])
+            weight = torch.einsum("m n w h, i m, n j -> i j w h", self.mid, up, down)
+        else:
+            weight = self.up.reshape(self.up.shape[0], -1) @ self.down.reshape(self.down.shape[0], -1)
+
+        if self.dora_scale is not None:
+            assert orig_weight is not None
+            weight = self._apply_dora(orig_weight, weight)
+
+        return weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        for val in [self.up, self.mid, self.down]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ) -> None:
+        super().to(device=device, dtype=dtype)
+
+        self.up = self.up.to(device=device, dtype=dtype)
+        self.down = self.down.to(device=device, dtype=dtype)
+
+        if self.mid is not None:
+            self.mid = self.mid.to(device=device, dtype=dtype)
+
+        if self.dora_scale is not None:
+            self.dora_scale = self.dora_scale.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/lora_layer_base.py
+++ b/invokeai/backend/lora/lora_layer_base.py
@@ -0,0 +1,55 @@
+from typing import Dict, Optional
+
+import torch
+
+
+class LoRALayerBase:
+    # rank: Optional[int]
+    # alpha: Optional[float]
+    # bias: Optional[torch.Tensor]
+    # layer_key: str
+
+    # @property
+    # def scale(self):
+    #    return self.alpha / self.rank if (self.alpha and self.rank) else 1.0
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        if "alpha" in values:
+            self.alpha = values["alpha"].item()
+        else:
+            self.alpha = None
+
+        if "bias_indices" in values and "bias_values" in values and "bias_size" in values:
+            self.bias: Optional[torch.Tensor] = torch.sparse_coo_tensor(
+                values["bias_indices"],
+                values["bias_values"],
+                tuple(values["bias_size"]),
+            )
+
+        else:
+            self.bias = None
+
+        self.rank = None  # set in layer implementation
+        self.layer_key = layer_key
+
+    def get_weight(self, orig_weight: Optional[torch.Tensor]) -> torch.Tensor:
+        raise NotImplementedError()
+
+    def calc_size(self) -> int:
+        model_size = 0
+        for val in [self.bias]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ) -> None:
+        if self.bias is not None:
+            self.bias = self.bias.to(device=device, dtype=dtype)
--- a/invokeai/backend/lora/lora_model.py
+++ b/invokeai/backend/lora/lora_model.py
@@ -0,0 +1,111 @@
+from pathlib import Path
+from typing import Optional, Union
+
+import torch
+
+from invokeai.backend.lora.full_layer import FullLayer
+from invokeai.backend.lora.ia3_layer import IA3Layer
+from invokeai.backend.lora.loha_layer import LoHALayer
+from invokeai.backend.lora.lokr_layer import LoKRLayer
+from invokeai.backend.lora.lora_layer import LoRALayer
+from invokeai.backend.lora.sdxl_state_dict_utils import convert_sdxl_keys_to_diffusers_format
+from invokeai.backend.model_manager import BaseModelType
+from invokeai.backend.util.serialization import load_state_dict
+
+AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer]
+
+
+class LoRAModelRaw(torch.nn.Module):
+    def __init__(
+        self,
+        name: str,
+        layers: dict[str, AnyLoRALayer],
+    ):
+        super().__init__()
+        self._name = name
+        self.layers = layers
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ) -> None:
+        # TODO: try revert if exception?
+        for _key, layer in self.layers.items():
+            layer.to(device=device, dtype=dtype)
+
+    def calc_size(self) -> int:
+        model_size = 0
+        for _, layer in self.layers.items():
+            model_size += layer.calc_size()
+        return model_size
+
+    @classmethod
+    def from_checkpoint(
+        cls,
+        file_path: Union[str, Path],
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+        base_model: Optional[BaseModelType] = None,
+    ):
+        device = device or torch.device("cpu")
+        dtype = dtype or torch.float32
+
+        file_path = Path(file_path)
+
+        model_name = file_path.stem
+
+        sd = load_state_dict(file_path, device=str(device))
+        state_dict = cls._group_state(sd)
+
+        if base_model == BaseModelType.StableDiffusionXL:
+            state_dict = convert_sdxl_keys_to_diffusers_format(state_dict)
+
+        layers: dict[str, AnyLoRALayer] = {}
+        for layer_key, values in state_dict.items():
+            # lora and locon
+            if "lora_down.weight" in values:
+                layer: AnyLoRALayer = LoRALayer(layer_key, values)
+
+            # loha
+            elif "hada_w1_b" in values:
+                layer = LoHALayer(layer_key, values)
+
+            # lokr
+            elif "lokr_w1_b" in values or "lokr_w1" in values:
+                layer = LoKRLayer(layer_key, values)
+
+            # diff
+            elif "diff" in values:
+                layer = FullLayer(layer_key, values)
+
+            # ia3
+            elif "weight" in values and "on_input" in values:
+                layer = IA3Layer(layer_key, values)
+
+            else:
+                raise ValueError(f"Unknown lora layer module in {model_name}: {layer_key}: {list(values.keys())}")
+
+            # lower memory consumption by removing already parsed layer values
+            state_dict[layer_key].clear()
+
+            layer.to(device=device, dtype=dtype)
+            layers[layer_key] = layer
+
+        return cls(name=model_name, layers=layers)
+
+    @staticmethod
+    def _group_state(state_dict: dict[str, torch.Tensor]) -> dict[str, dict[str, torch.Tensor]]:
+        state_dict_groupped: dict[str, dict[str, torch.Tensor]] = {}
+
+        for key, value in state_dict.items():
+            stem, leaf = key.split(".", 1)
+            if stem not in state_dict_groupped:
+                state_dict_groupped[stem] = {}
+            state_dict_groupped[stem][leaf] = value
+
+        return state_dict_groupped
--- a/invokeai/backend/lora/lora_model_patcher.py
+++ b/invokeai/backend/lora/lora_model_patcher.py
@@ -0,0 +1,137 @@
+from contextlib import contextmanager
+from typing import Iterator, Tuple
+
+import torch
+from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
+from transformers import CLIPTextModel
+
+from invokeai.backend.lora.lora_model import LoRAModelRaw
+from invokeai.backend.model_manager.any_model_type import AnyModel
+
+
+class LoraModelPatcher:
+    @staticmethod
+    def _resolve_lora_key(model: torch.nn.Module, lora_key: str, prefix: str) -> Tuple[str, torch.nn.Module]:
+        assert "." not in lora_key
+
+        if not lora_key.startswith(prefix):
+            raise Exception(f"lora_key with invalid prefix: {lora_key}, {prefix}")
+
+        module = model
+        module_key = ""
+        key_parts = lora_key[len(prefix) :].split("_")
+
+        submodule_name = key_parts.pop(0)
+
+        while len(key_parts) > 0:
+            try:
+                module = module.get_submodule(submodule_name)
+                module_key += "." + submodule_name
+                submodule_name = key_parts.pop(0)
+            except Exception:
+                submodule_name += "_" + key_parts.pop(0)
+
+        module = module.get_submodule(submodule_name)
+        module_key = (module_key + "." + submodule_name).lstrip(".")
+
+        return (module_key, module)
+
+    @classmethod
+    @contextmanager
+    def apply_lora_unet(
+        cls,
+        unet: UNet2DConditionModel,
+        loras: Iterator[Tuple[LoRAModelRaw, float]],
+    ):
+        with cls.apply_lora(unet, loras, "lora_unet_"):
+            yield
+
+    @classmethod
+    @contextmanager
+    def apply_lora_text_encoder(
+        cls,
+        text_encoder: CLIPTextModel,
+        loras: Iterator[Tuple[LoRAModelRaw, float]],
+    ):
+        with cls.apply_lora(text_encoder, loras, "lora_te_"):
+            yield
+
+    @classmethod
+    @contextmanager
+    def apply_sdxl_lora_text_encoder(
+        cls,
+        text_encoder: CLIPTextModel,
+        loras: Iterator[Tuple[LoRAModelRaw, float]],
+    ):
+        with cls.apply_lora(text_encoder, loras, "lora_te1_"):
+            yield
+
+    @classmethod
+    @contextmanager
+    def apply_sdxl_lora_text_encoder2(
+        cls,
+        text_encoder: CLIPTextModel,
+        loras: Iterator[Tuple[LoRAModelRaw, float]],
+    ):
+        with cls.apply_lora(text_encoder, loras, "lora_te2_"):
+            yield
+
+    @classmethod
+    @contextmanager
+    def apply_lora(
+        cls,
+        model: AnyModel,
+        loras: Iterator[Tuple[LoRAModelRaw, float]],
+        prefix: str,
+    ):
+        original_weights = {}
+        try:
+            with torch.no_grad():
+                for lora, lora_weight in loras:
+                    # assert lora.device.type == "cpu"
+                    for layer_key, layer in lora.layers.items():
+                        if not layer_key.startswith(prefix):
+                            continue
+
+                        # TODO(ryand): A non-negligible amount of time is currently spent resolving LoRA keys. This
+                        # should be improved in the following ways:
+                        # 1. The key mapping could be more-efficiently pre-computed. This would save time every time a
+                        #    LoRA model is applied.
+                        # 2. From an API perspective, there's no reason that the `LoraModelPatcher` should be aware of
+                        #    the intricacies of Stable Diffusion key resolution. It should just expect the input LoRA
+                        #    weights to have valid keys.
+                        assert isinstance(model, torch.nn.Module)
+                        module_key, module = cls._resolve_lora_key(model, layer_key, prefix)
+
+                        # All of the LoRA weight calculations will be done on the same device as the module weight.
+                        # (Performance will be best if this is a CUDA device.)
+                        device = module.weight.device
+                        dtype = module.weight.dtype
+
+                        if module_key not in original_weights:
+                            original_weights[module_key] = module.weight.detach().to(device="cpu", copy=True)
+
+                        layer_scale = layer.alpha / layer.rank if (layer.alpha and layer.rank) else 1.0
+
+                        # We intentionally move to the target device first, then cast. Experimentally, this was found to
+                        # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
+                        # same thing in a single call to '.to(...)'.
+                        layer.to(device=device)
+                        layer.to(dtype=torch.float32)
+                        # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
+                        # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
+                        layer_weight = layer.get_weight(module.weight) * (lora_weight * layer_scale)
+                        layer.to(device=torch.device("cpu"))
+
+                        if module.weight.shape != layer_weight.shape:
+                            layer_weight = layer_weight.reshape(module.weight.shape)
+
+                        module.weight += layer_weight.to(dtype=dtype)
+
+            yield  # wait for context manager exit
+
+        finally:
+            assert hasattr(model, "get_submodule")  # mypy not picking up fact that torch.nn.Module has get_submodule()
+            with torch.no_grad():
+                for module_key, weight in original_weights.items():
+                    model.get_submodule(module_key).weight.copy_(weight)
--- a/invokeai/backend/lora/sdxl_state_dict_utils.py
+++ b/invokeai/backend/lora/sdxl_state_dict_utils.py
@@ -0,0 +1,157 @@
+import bisect
+from typing import TypeVar
+
+
+def make_sdxl_unet_conversion_map() -> list[tuple[str, str]]:
+    """Create a dict mapping state_dict keys from Stability AI SDXL format to diffusers SDXL format.
+
+    Ported from:
+    https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L15C1-L97C32
+    """
+    unet_conversion_map_layer: list[tuple[str, str]] = []
+
+    for i in range(3):  # num_blocks is 3 in sdxl
+        # loop over downblocks/upblocks
+        for j in range(2):
+            # loop over resnets/attentions for downblocks
+            hf_down_res_prefix = f"down_blocks.{i}.resnets.{j}."
+            sd_down_res_prefix = f"input_blocks.{3*i + j + 1}.0."
+            unet_conversion_map_layer.append((sd_down_res_prefix, hf_down_res_prefix))
+
+            if i < 3:
+                # no attention layers in down_blocks.3
+                hf_down_atn_prefix = f"down_blocks.{i}.attentions.{j}."
+                sd_down_atn_prefix = f"input_blocks.{3*i + j + 1}.1."
+                unet_conversion_map_layer.append((sd_down_atn_prefix, hf_down_atn_prefix))
+
+        for j in range(3):
+            # loop over resnets/attentions for upblocks
+            hf_up_res_prefix = f"up_blocks.{i}.resnets.{j}."
+            sd_up_res_prefix = f"output_blocks.{3*i + j}.0."
+            unet_conversion_map_layer.append((sd_up_res_prefix, hf_up_res_prefix))
+
+            # if i > 0: commentout for sdxl
+            # no attention layers in up_blocks.0
+            hf_up_atn_prefix = f"up_blocks.{i}.attentions.{j}."
+            sd_up_atn_prefix = f"output_blocks.{3*i + j}.1."
+            unet_conversion_map_layer.append((sd_up_atn_prefix, hf_up_atn_prefix))
+
+        if i < 3:
+            # no downsample in down_blocks.3
+            hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0.conv."
+            sd_downsample_prefix = f"input_blocks.{3*(i+1)}.0.op."
+            unet_conversion_map_layer.append((sd_downsample_prefix, hf_downsample_prefix))
+
+            # no upsample in up_blocks.3
+            hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0."
+            sd_upsample_prefix = f"output_blocks.{3*i + 2}.{2}."  # change for sdxl
+            unet_conversion_map_layer.append((sd_upsample_prefix, hf_upsample_prefix))
+
+    hf_mid_atn_prefix = "mid_block.attentions.0."
+    sd_mid_atn_prefix = "middle_block.1."
+    unet_conversion_map_layer.append((sd_mid_atn_prefix, hf_mid_atn_prefix))
+
+    for j in range(2):
+        hf_mid_res_prefix = f"mid_block.resnets.{j}."
+        sd_mid_res_prefix = f"middle_block.{2*j}."
+        unet_conversion_map_layer.append((sd_mid_res_prefix, hf_mid_res_prefix))
+
+    unet_conversion_map_resnet = [
+        # (stable-diffusion, HF Diffusers)
+        ("in_layers.0.", "norm1."),
+        ("in_layers.2.", "conv1."),
+        ("out_layers.0.", "norm2."),
+        ("out_layers.3.", "conv2."),
+        ("emb_layers.1.", "time_emb_proj."),
+        ("skip_connection.", "conv_shortcut."),
+    ]
+
+    unet_conversion_map: list[tuple[str, str]] = []
+    for sd, hf in unet_conversion_map_layer:
+        if "resnets" in hf:
+            for sd_res, hf_res in unet_conversion_map_resnet:
+                unet_conversion_map.append((sd + sd_res, hf + hf_res))
+        else:
+            unet_conversion_map.append((sd, hf))
+
+    for j in range(2):
+        hf_time_embed_prefix = f"time_embedding.linear_{j+1}."
+        sd_time_embed_prefix = f"time_embed.{j*2}."
+        unet_conversion_map.append((sd_time_embed_prefix, hf_time_embed_prefix))
+
+    for j in range(2):
+        hf_label_embed_prefix = f"add_embedding.linear_{j+1}."
+        sd_label_embed_prefix = f"label_emb.0.{j*2}."
+        unet_conversion_map.append((sd_label_embed_prefix, hf_label_embed_prefix))
+
+    unet_conversion_map.append(("input_blocks.0.0.", "conv_in."))
+    unet_conversion_map.append(("out.0.", "conv_norm_out."))
+    unet_conversion_map.append(("out.2.", "conv_out."))
+
+    return unet_conversion_map
+
+
+SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP = {
+    sd.rstrip(".").replace(".", "_"): hf.rstrip(".").replace(".", "_") for sd, hf in make_sdxl_unet_conversion_map()
+}
+
+
+T = TypeVar("T")
+
+
+def convert_sdxl_keys_to_diffusers_format(state_dict: dict[str, T]) -> dict[str, T]:
+    """Convert the keys of an SDXL LoRA state_dict to diffusers format.
+
+    The input state_dict can be in either Stability AI format or diffusers format. If the state_dict is already in
+    diffusers format, then this function will have no effect.
+
+    This function is adapted from:
+    https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L385-L409
+
+    Args:
+        state_dict (dict[str, Tensor]): The SDXL LoRA state_dict.
+
+    Raises:
+        ValueError: If state_dict contains an unrecognized key, or not all keys could be converted.
+
+    Returns:
+        dict[str, Tensor]: The diffusers-format state_dict.
+    """
+    converted_count = 0  # The number of Stability AI keys converted to diffusers format.
+    not_converted_count = 0  # The number of keys that were not converted.
+
+    # Get a sorted list of Stability AI UNet keys so that we can efficiently search for keys with matching prefixes.
+    # For example, we want to efficiently find `input_blocks_4_1` in the list when searching for
+    # `input_blocks_4_1_proj_in`.
+    stability_unet_keys = list(SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP)
+    stability_unet_keys.sort()
+
+    new_state_dict: dict[str, T] = {}
+    for full_key, value in state_dict.items():
+        if full_key.startswith("lora_unet_"):
+            search_key = full_key.replace("lora_unet_", "")
+            # Use bisect to find the key in stability_unet_keys that *may* match the search_key's prefix.
+            position = bisect.bisect_right(stability_unet_keys, search_key)
+            map_key = stability_unet_keys[position - 1]
+            # Now, check if the map_key *actually* matches the search_key.
+            if search_key.startswith(map_key):
+                new_key = full_key.replace(map_key, SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP[map_key])
+                new_state_dict[new_key] = value
+                converted_count += 1
+            else:
+                new_state_dict[full_key] = value
+                not_converted_count += 1
+        elif full_key.startswith("lora_te1_") or full_key.startswith("lora_te2_"):
+            # The CLIP text encoders have the same keys in both Stability AI and diffusers formats.
+            new_state_dict[full_key] = value
+            continue
+        else:
+            raise ValueError(f"Unrecognized SDXL LoRA key prefix: '{full_key}'.")
+
+    if converted_count > 0 and not_converted_count > 0:
+        raise ValueError(
+            f"The SDXL LoRA could only be partially converted to diffusers format. converted={converted_count},"
+            f" not_converted={not_converted_count}"
+        )
+
+    return new_state_dict
--- a/invokeai/backend/model_manager/any_model_type.py
+++ b/invokeai/backend/model_manager/any_model_type.py
@@ -4,7 +4,7 @@ import torch
 from diffusers.models.modeling_utils import ModelMixin

 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
-from invokeai.backend.lora import LoRAModelRaw
+from invokeai.backend.lora.lora_model import LoRAModelRaw
 from invokeai.backend.onnx.onnx_runtime import IAIOnnxRuntimeModel
 from invokeai.backend.textual_inversion import TextualInversionModelRaw

--- a/invokeai/backend/model_manager/load/model_loaders/lora.py
+++ b/invokeai/backend/model_manager/load/model_loaders/lora.py
@@ -6,7 +6,7 @@ from pathlib import Path
 from typing import Optional

 from invokeai.app.services.config import InvokeAIAppConfig
-from invokeai.backend.lora import LoRAModelRaw
+from invokeai.backend.lora.lora_model import LoRAModelRaw
 from invokeai.backend.model_manager import (
    AnyModelConfig,
    BaseModelType,
--- a/invokeai/backend/model_manager/load/optimizations.py
+++ b/invokeai/backend/model_manager/load/optimizations.py
@@ -17,7 +17,7 @@ def skip_torch_weight_init() -> Generator[None, None, None]:
    completely unnecessary if the intent is to load checkpoint weights from disk for the layer. This context manager
    monkey-patches common torch layers to skip the weight initialization step.
    """
-    torch_modules = [torch.nn.Linear, torch.nn.modules.conv._ConvNd, torch.nn.Embedding]
+    torch_modules = [torch.nn.Linear, torch.nn.modules.conv._ConvNd, torch.nn.Embedding, torch.nn.LayerNorm]
    saved_functions = [hasattr(m, "reset_parameters") and m.reset_parameters for m in torch_modules]

    try:
--- a/invokeai/backend/model_patcher.py
+++ b/invokeai/backend/model_patcher.py
@@ -13,157 +13,14 @@ from diffusers import OnnxRuntimeModel, UNet2DConditionModel
 from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer

 from invokeai.app.shared.models import FreeUConfig
-from invokeai.backend.model_manager.any_model_type import AnyModel
+from invokeai.backend.lora.lora_model import LoRAModelRaw
 from invokeai.backend.model_manager.load.optimizations import skip_torch_weight_init
 from invokeai.backend.onnx.onnx_runtime import IAIOnnxRuntimeModel

-from .lora import LoRAModelRaw
 from .textual_inversion import TextualInversionManager, TextualInversionModelRaw

-"""
-loras = [
-    (lora_model1, 0.7),
-    (lora_model2, 0.4),
-]
-with LoRAHelper.apply_lora_unet(unet, loras):
-    # unet with applied loras
-# unmodified unet

-"""
-
-
-# TODO: rename smth like ModelPatcher and add TI method?
 class ModelPatcher:
-    @staticmethod
-    def _resolve_lora_key(model: torch.nn.Module, lora_key: str, prefix: str) -> Tuple[str, torch.nn.Module]:
-        assert "." not in lora_key
-
-        if not lora_key.startswith(prefix):
-            raise Exception(f"lora_key with invalid prefix: {lora_key}, {prefix}")
-
-        module = model
-        module_key = ""
-        key_parts = lora_key[len(prefix) :].split("_")
-
-        submodule_name = key_parts.pop(0)
-
-        while len(key_parts) > 0:
-            try:
-                module = module.get_submodule(submodule_name)
-                module_key += "." + submodule_name
-                submodule_name = key_parts.pop(0)
-            except Exception:
-                submodule_name += "_" + key_parts.pop(0)
-
-        module = module.get_submodule(submodule_name)
-        module_key = (module_key + "." + submodule_name).lstrip(".")
-
-        return (module_key, module)
-
-    @classmethod
-    @contextmanager
-    def apply_lora_unet(
-        cls,
-        unet: UNet2DConditionModel,
-        loras: Iterator[Tuple[LoRAModelRaw, float]],
-    ) -> None:
-        with cls.apply_lora(unet, loras, "lora_unet_"):
-            yield
-
-    @classmethod
-    @contextmanager
-    def apply_lora_text_encoder(
-        cls,
-        text_encoder: CLIPTextModel,
-        loras: Iterator[Tuple[LoRAModelRaw, float]],
-    ) -> None:
-        with cls.apply_lora(text_encoder, loras, "lora_te_"):
-            yield
-
-    @classmethod
-    @contextmanager
-    def apply_sdxl_lora_text_encoder(
-        cls,
-        text_encoder: CLIPTextModel,
-        loras: List[Tuple[LoRAModelRaw, float]],
-    ) -> None:
-        with cls.apply_lora(text_encoder, loras, "lora_te1_"):
-            yield
-
-    @classmethod
-    @contextmanager
-    def apply_sdxl_lora_text_encoder2(
-        cls,
-        text_encoder: CLIPTextModel,
-        loras: List[Tuple[LoRAModelRaw, float]],
-    ) -> None:
-        with cls.apply_lora(text_encoder, loras, "lora_te2_"):
-            yield
-
-    @classmethod
-    @contextmanager
-    def apply_lora(
-        cls,
-        model: AnyModel,
-        loras: Iterator[Tuple[LoRAModelRaw, float]],
-        prefix: str,
-    ) -> None:
-        original_weights = {}
-        try:
-            with torch.no_grad():
-                for lora, lora_weight in loras:
-                    # assert lora.device.type == "cpu"
-                    for layer_key, layer in lora.layers.items():
-                        if not layer_key.startswith(prefix):
-                            continue
-
-                        # TODO(ryand): A non-negligible amount of time is currently spent resolving LoRA keys. This
-                        # should be improved in the following ways:
-                        # 1. The key mapping could be more-efficiently pre-computed. This would save time every time a
-                        #    LoRA model is applied.
-                        # 2. From an API perspective, there's no reason that the `ModelPatcher` should be aware of the
-                        #    intricacies of Stable Diffusion key resolution. It should just expect the input LoRA
-                        #    weights to have valid keys.
-                        assert isinstance(model, torch.nn.Module)
-                        module_key, module = cls._resolve_lora_key(model, layer_key, prefix)
-
-                        # All of the LoRA weight calculations will be done on the same device as the module weight.
-                        # (Performance will be best if this is a CUDA device.)
-                        device = module.weight.device
-                        dtype = module.weight.dtype
-
-                        if module_key not in original_weights:
-                            original_weights[module_key] = module.weight.detach().to(device="cpu", copy=True)
-
-                        layer_scale = layer.alpha / layer.rank if (layer.alpha and layer.rank) else 1.0
-
-                        # We intentionally move to the target device first, then cast. Experimentally, this was found to
-                        # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
-                        # same thing in a single call to '.to(...)'.
-                        layer.to(device=device)
-                        layer.to(dtype=torch.float32)
-                        # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
-                        # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
-                        layer_weight = layer.get_weight(module.weight) * (lora_weight * layer_scale)
-                        layer.to(device=torch.device("cpu"))
-
-                        assert isinstance(layer_weight, torch.Tensor)  # mypy thinks layer_weight is a float|Any ??!
-                        if module.weight.shape != layer_weight.shape:
-                            # TODO: debug on lycoris
-                            assert hasattr(layer_weight, "reshape")
-                            layer_weight = layer_weight.reshape(module.weight.shape)
-
-                        assert isinstance(layer_weight, torch.Tensor)  # mypy thinks layer_weight is a float|Any ??!
-                        module.weight += layer_weight.to(dtype=dtype)
-
-            yield  # wait for context manager exit
-
-        finally:
-            assert hasattr(model, "get_submodule")  # mypy not picking up fact that torch.nn.Module has get_submodule()
-            with torch.no_grad():
-                for module_key, weight in original_weights.items():
-                    model.get_submodule(module_key).weight.copy_(weight)
-
    @classmethod
    @contextmanager
    def apply_ti(
--- a/invokeai/backend/util/serialization.py
+++ b/invokeai/backend/util/serialization.py
@@ -0,0 +1,37 @@
+from pathlib import Path
+from typing import Any, Optional, Union
+
+import torch
+from safetensors.torch import load_file
+
+
+def state_dict_to(
+    state_dict: dict[str, torch.Tensor], device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None
+) -> dict[str, torch.Tensor]:
+    new_state_dict: dict[str, torch.Tensor] = {}
+    for k, v in state_dict.items():
+        new_state_dict[k] = v.to(device=device, dtype=dtype, non_blocking=True)
+    return new_state_dict
+
+
+def load_state_dict(file_path: Union[str, Path], device: str = "cpu") -> Any:
+    """Load a state_dict from a file that may be in either PyTorch or safetensors format. The file format is inferred
+    from the file extension.
+    """
+    file_path = Path(file_path)
+
+    if file_path.suffix == ".safetensors":
+        state_dict = load_file(
+            file_path,
+            device=device,
+        )
+    else:
+        # weights_only=True is used to address a security vulnerability that allows arbitrary code execution.
+        # This option was first introduced in https://github.com/pytorch/pytorch/pull/86812.
+        #
+        # mmap=True is used to both reduce memory usage and speed up loading. This setting causes torch.load() to more
+        # closely mirror the behaviour of safetensors.torch.load_file().  This option was first introduced in
+        # https://github.com/pytorch/pytorch/pull/102549. The discussion on that PR provides helpful context.
+        state_dict = torch.load(file_path, map_location=device, weights_only=True, mmap=True)
+
+    return state_dict
--- a/tests/backend/model_manager/test_lora.py
+++ b/tests/backend/model_manager/test_lora.py
@@ -5,8 +5,9 @@
 import pytest
 import torch

-from invokeai.backend.lora import LoRALayer, LoRAModelRaw
-from invokeai.backend.model_patcher import ModelPatcher
+from invokeai.backend.lora.lora_layer import LoRALayer
+from invokeai.backend.lora.lora_model import LoRAModelRaw
+from invokeai.backend.lora.lora_model_patcher import LoraModelPatcher


@pytest.mark.parametrize(
@@ -44,7 +45,7 @@ def test_apply_lora(device):
    orig_linear_weight = model["linear_layer_1"].weight.data.detach().clone()
    expected_patched_linear_weight = orig_linear_weight + (lora_dim * lora_weight)

-    with ModelPatcher.apply_lora(model, [(lora, lora_weight)], prefix=""):
+    with LoraModelPatcher.apply_lora(model, [(lora, lora_weight)], prefix=""):
        # After patching, all LoRA layer weights should have been moved back to the cpu.
        assert lora_layers["linear_layer_1"].up.device.type == "cpu"
        assert lora_layers["linear_layer_1"].down.device.type == "cpu"
@@ -86,7 +87,7 @@ def test_apply_lora_change_device():

    orig_linear_weight = model["linear_layer_1"].weight.data.detach().clone()

-    with ModelPatcher.apply_lora(model, [(lora, 0.5)], prefix=""):
+    with LoraModelPatcher.apply_lora(model, [(lora, 0.5)], prefix=""):
        # After patching, all LoRA layer weights should have been moved back to the cpu.
        assert lora_layers["linear_layer_1"].up.device.type == "cpu"
        assert lora_layers["linear_layer_1"].down.device.type == "cpu"
Author	SHA1	Message	Date
Ryan Dick	6bfb4927c7	WIP - not working	2024-04-08 10:06:11 -04:00
Ryan Dick	c15e9e23ca	Tidy types in LoraModelPatcher.	2024-04-05 16:05:31 -04:00
Ryan Dick	e1aa1ed6af	Split LoraModelPatcher out from ModelPatcher monolith.	2024-04-05 15:57:46 -04:00
Ryan Dick	4b68050c9b	Minor cleanup of LoRAModelRaw.	2024-04-05 15:30:01 -04:00
Ryan Dick	9e68a5c851	Use load_state_dict() util in LoRAModelRaw.	2024-04-05 15:20:07 -04:00
Ryan Dick	61a672cd81	Tidy types in sdxl_state_dict_utils.py.	2024-04-05 15:16:36 -04:00
Ryan Dick	c27a2e59da	Copy convert_sdxl_keys_to_diffusers_format() to sdxl_state_dict_utils.py.	2024-04-05 15:12:22 -04:00
Ryan Dick	4e3f42e388	Split lora.py monolith into separate files.	2024-04-05 15:04:14 -04:00
Ryan Dick	5d41157404	Add LayerNorm to list of modules optimized by skip_torch_weight_init()	2024-04-05 14:39:57 -04:00
Ryan Dick	8db4ba252a	Add util function for loading state_dicts from disk.	2024-04-05 14:39:57 -04:00