A very primitive working version of peft patching. It is very slow. LoRAs don't get unloaded yet, so can only be run once. And the results are *slightly* different than the old implementation. I suspect this is because the lora weight is not being applied to the UNet, but there could be other issues as well.

WIP
Add LayerNorm to list of modules optimized by skip_torch_weight_init()
2026-01-15 07:28:06 -05:00 · 2024-04-05 12:02:05 -04:00 · 2024-04-04 22:40:42 -04:00 · 2024-04-04 18:07:13 -04:00 · 2024-04-04 18:07:13 -04:00 · 2024-04-04 18:07:13 -04:00
10 changed files with 576 additions and 26 deletions
--- a/invokeai/app/invocations/compel.py
+++ b/invokeai/app/invocations/compel.py
@@ -9,8 +9,9 @@ from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField
 from invokeai.app.invocations.primitives import ConditioningOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.ti_utils import generate_ti_list
-from invokeai.backend.lora import LoRAModelRaw
 from invokeai.backend.model_patcher import ModelPatcher
+from invokeai.backend.peft.peft_model import PeftModel
+from invokeai.backend.peft.peft_model_patcher import PeftModelPatcher
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
    BasicConditioningInfo,
    ConditioningFieldData,
@@ -61,15 +62,12 @@ class CompelInvocation(BaseInvocation):
        text_encoder_model = text_encoder_info.model
        assert isinstance(text_encoder_model, CLIPTextModel)

-        def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
+        def _lora_loader() -> Iterator[Tuple[PeftModel, float]]:
            for lora in self.clip.loras:
                lora_info = context.models.load(lora.lora)
-                assert isinstance(lora_info.model, LoRAModelRaw)
+                assert isinstance(lora_info.model, PeftModel)
                yield (lora_info.model, lora.weight)
                del lora_info
-            return
-
-        # loras = [(context.models.get(**lora.dict(exclude={"weight"})).context.model, lora.weight) for lora in self.clip.loras]

        ti_list = generate_ti_list(self.prompt, text_encoder_info.config.base, context)

@@ -80,7 +78,7 @@ class CompelInvocation(BaseInvocation):
            ),
            text_encoder_info as text_encoder,
            # Apply the LoRA after text_encoder has been moved to its target device for faster patching.
-            ModelPatcher.apply_lora_text_encoder(text_encoder, _lora_loader()),
+            PeftModelPatcher.apply_peft_model_to_text_encoder(text_encoder, _lora_loader(), "text_encoder"),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
            ModelPatcher.apply_clip_skip(text_encoder_model, self.clip.skipped_layers),
        ):
@@ -161,16 +159,13 @@ class SDXLPromptInvocationBase:
                c_pooled = None
            return c, c_pooled, None

-        def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
+        def _lora_loader() -> Iterator[Tuple[PeftModel, float]]:
            for lora in clip_field.loras:
                lora_info = context.models.load(lora.lora)
                lora_model = lora_info.model
-                assert isinstance(lora_model, LoRAModelRaw)
+                assert isinstance(lora_model, PeftModel)
                yield (lora_model, lora.weight)
                del lora_info
-            return
-
-        # loras = [(context.models.get(**lora.dict(exclude={"weight"})).context.model, lora.weight) for lora in self.clip.loras]

        ti_list = generate_ti_list(prompt, text_encoder_info.config.base, context)

@@ -181,7 +176,7 @@ class SDXLPromptInvocationBase:
            ),
            text_encoder_info as text_encoder,
            # Apply the LoRA after text_encoder has been moved to its target device for faster patching.
-            ModelPatcher.apply_lora(text_encoder, _lora_loader(), lora_prefix),
+            PeftModelPatcher.apply_peft_model_to_text_encoder(text_encoder, _lora_loader(), lora_prefix),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
            ModelPatcher.apply_clip_skip(text_encoder_model, clip_field.skipped_layers),
        ):
@@ -259,15 +254,15 @@ class SDXLCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
    @torch.no_grad()
    def invoke(self, context: InvocationContext) -> ConditioningOutput:
        c1, c1_pooled, ec1 = self.run_clip_compel(
-            context, self.clip, self.prompt, False, "lora_te1_", zero_on_empty=True
+            context, self.clip, self.prompt, False, "text_encoder", zero_on_empty=True
        )
        if self.style.strip() == "":
            c2, c2_pooled, ec2 = self.run_clip_compel(
-                context, self.clip2, self.prompt, True, "lora_te2_", zero_on_empty=True
+                context, self.clip2, self.prompt, True, "text_encoder_2", zero_on_empty=True
            )
        else:
            c2, c2_pooled, ec2 = self.run_clip_compel(
-                context, self.clip2, self.style, True, "lora_te2_", zero_on_empty=True
+                context, self.clip2, self.style, True, "text_encoder_2", zero_on_empty=True
            )

        original_size = (self.original_height, self.original_width)
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
@@ -48,9 +48,10 @@ from invokeai.app.invocations.t2i_adapter import T2IAdapterField
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.controlnet_utils import prepare_control_image
 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter, IPAdapterPlus
-from invokeai.backend.lora import LoRAModelRaw
 from invokeai.backend.model_manager import BaseModelType, LoadedModel
 from invokeai.backend.model_patcher import ModelPatcher
+from invokeai.backend.peft.peft_model import PeftModel
+from invokeai.backend.peft.peft_model_patcher import PeftModelPatcher
 from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningData, IPAdapterConditioningInfo
 from invokeai.backend.util.silence_warnings import SilenceWarnings
@@ -714,13 +715,12 @@ class DenoiseLatentsInvocation(BaseInvocation):
            def step_callback(state: PipelineIntermediateState) -> None:
                context.util.sd_step_callback(state, unet_config.base)

-            def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
+            def _lora_loader() -> Iterator[Tuple[PeftModel, float]]:
                for lora in self.unet.loras:
                    lora_info = context.models.load(lora.lora)
-                    assert isinstance(lora_info.model, LoRAModelRaw)
+                    assert isinstance(lora_info.model, PeftModel)
                    yield (lora_info.model, lora.weight)
                    del lora_info
-                return

            unet_info = context.models.load(self.unet.unet)
            assert isinstance(unet_info.model, UNet2DConditionModel)
@@ -730,7 +730,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
                set_seamless(unet_info.model, self.unet.seamless_axes),  # FIXME
                unet_info as unet,
                # Apply the LoRA after unet has been moved to its target device for faster patching.
-                ModelPatcher.apply_lora_unet(unet, _lora_loader()),
+                PeftModelPatcher.apply_peft_model_to_unet(unet, _lora_loader()),
            ):
                assert isinstance(unet, UNet2DConditionModel)
                latents = latents.to(device=unet.device, dtype=unet.dtype)
--- a/invokeai/backend/model_manager/any_model_type.py
+++ b/invokeai/backend/model_manager/any_model_type.py
@@ -4,9 +4,9 @@ import torch
 from diffusers.models.modeling_utils import ModelMixin

 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
-from invokeai.backend.lora import LoRAModelRaw
 from invokeai.backend.onnx.onnx_runtime import IAIOnnxRuntimeModel
+from invokeai.backend.peft.peft_model import PeftModel
 from invokeai.backend.textual_inversion import TextualInversionModelRaw

 # ModelMixin is the base class for all diffusers and transformers models
-AnyModel = Union[ModelMixin, torch.nn.Module, IPAdapter, LoRAModelRaw, TextualInversionModelRaw, IAIOnnxRuntimeModel]
+AnyModel = Union[ModelMixin, torch.nn.Module, IPAdapter, PeftModel, TextualInversionModelRaw, IAIOnnxRuntimeModel]
--- a/invokeai/backend/model_manager/load/model_loaders/lora.py
+++ b/invokeai/backend/model_manager/load/model_loaders/lora.py
@@ -6,7 +6,6 @@ from pathlib import Path
 from typing import Optional

 from invokeai.app.services.config import InvokeAIAppConfig
-from invokeai.backend.lora import LoRAModelRaw
 from invokeai.backend.model_manager import (
    AnyModelConfig,
    BaseModelType,
@@ -17,6 +16,7 @@ from invokeai.backend.model_manager import (
 from invokeai.backend.model_manager.any_model_type import AnyModel
 from invokeai.backend.model_manager.load.convert_cache import ModelConvertCacheBase
 from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase
+from invokeai.backend.peft.peft_model import PeftModel

 from .. import ModelLoader, ModelLoaderRegistry

@@ -47,7 +47,7 @@ class LoRALoader(ModelLoader):
            raise ValueError("There are no submodels in a LoRA model.")
        model_path = Path(config.path)
        assert self._model_base is not None
-        model = LoRAModelRaw.from_checkpoint(
+        model = PeftModel.from_checkpoint(
            file_path=model_path,
            dtype=self._torch_dtype,
            base_model=self._model_base,
--- a/invokeai/backend/model_manager/load/optimizations.py
+++ b/invokeai/backend/model_manager/load/optimizations.py
@@ -17,7 +17,7 @@ def skip_torch_weight_init() -> Generator[None, None, None]:
    completely unnecessary if the intent is to load checkpoint weights from disk for the layer. This context manager
    monkey-patches common torch layers to skip the weight initialization step.
    """
-    torch_modules = [torch.nn.Linear, torch.nn.modules.conv._ConvNd, torch.nn.Embedding]
+    torch_modules = [torch.nn.Linear, torch.nn.modules.conv._ConvNd, torch.nn.Embedding, torch.nn.LayerNorm]
    saved_functions = [hasattr(m, "reset_parameters") and m.reset_parameters for m in torch_modules]

    try:
--- a/invokeai/backend/peft/peft_format_utils.py
+++ b/invokeai/backend/peft/peft_format_utils.py
@@ -0,0 +1,85 @@
+import torch
+from diffusers.utils.state_dict_utils import convert_state_dict
+
+KOHYA_SS_TO_PEFT = {
+    "lora_down": "lora_A",
+    "lora_up": "lora_B",
+    # This is not a comprehensive dict. See `convert_state_dict_to_peft(...)` for more info on the conversion.
+}
+
+
+def convert_state_dict_kohya_to_peft(state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
+    # TODO(ryand): Check that state_dict is in Kohya format.
+
+    peft_partial_state_dict = convert_state_dict(state_dict, KOHYA_SS_TO_PEFT)
+
+    peft_state_dict: dict[str, torch.Tensor] = {}
+    for key, weight in peft_partial_state_dict.items():
+
+
+    for kohya_key, weight in kohya_ss_partial_state_dict.items():
+        if "text_encoder_2." in kohya_key:
+            kohya_key = kohya_key.replace("text_encoder_2.", "lora_te2.")
+        elif "text_encoder." in kohya_key:
+            kohya_key = kohya_key.replace("text_encoder.", "lora_te1.")
+        elif "unet" in kohya_key:
+            kohya_key = kohya_key.replace("unet", "lora_unet")
+        kohya_key = kohya_key.replace(".", "_", kohya_key.count(".") - 2)
+        kohya_key = kohya_key.replace(peft_adapter_name, "")  # Kohya doesn't take names
+        kohya_ss_state_dict[kohya_key] = weight
+        if "lora_down" in kohya_key:
+            alpha_key = f'{kohya_key.split(".")[0]}.alpha'
+            kohya_ss_state_dict[alpha_key] = torch.tensor(len(weight))
+def convert_state_dict_to_kohya(state_dict, original_type=None, **kwargs):
+    r"""
+    Converts a `PEFT` state dict to `Kohya` format that can be used in AUTOMATIC1111, ComfyUI, SD.Next, InvokeAI, etc.
+    The method only supports the conversion from PEFT to Kohya for now.
+
+    Args:
+        state_dict (`dict[str, torch.Tensor]`):
+            The state dict to convert.
+        original_type (`StateDictType`, *optional*):
+            The original type of the state dict, if not provided, the method will try to infer it automatically.
+        kwargs (`dict`, *args*):
+            Additional arguments to pass to the method.
+
+            - **adapter_name**: For example, in case of PEFT, some keys will be pre-pended
+                with the adapter name, therefore needs a special handling. By default PEFT also takes care of that in
+                `get_peft_model_state_dict` method:
+                https://github.com/huggingface/peft/blob/ba0477f2985b1ba311b83459d29895c809404e99/src/peft/utils/save_and_load.py#L92
+                but we add it here in case we don't want to rely on that method.
+    """
+
+    peft_adapter_name = kwargs.pop("adapter_name", None)
+    if peft_adapter_name is not None:
+        peft_adapter_name = "." + peft_adapter_name
+    else:
+        peft_adapter_name = ""
+
+    if original_type is None:
+        if any(f".lora_A{peft_adapter_name}.weight" in k for k in state_dict.keys()):
+            original_type = StateDictType.PEFT
+
+    if original_type not in KOHYA_STATE_DICT_MAPPINGS.keys():
+        raise ValueError(f"Original type {original_type} is not supported")
+
+    # Use the convert_state_dict function with the appropriate mapping
+    kohya_ss_partial_state_dict = convert_state_dict(state_dict, KOHYA_STATE_DICT_MAPPINGS[StateDictType.PEFT])
+    kohya_ss_state_dict = {}
+
+    # Additional logic for replacing header, alpha parameters `.` with `_` in all keys
+    for kohya_key, weight in kohya_ss_partial_state_dict.items():
+        if "text_encoder_2." in kohya_key:
+            kohya_key = kohya_key.replace("text_encoder_2.", "lora_te2.")
+        elif "text_encoder." in kohya_key:
+            kohya_key = kohya_key.replace("text_encoder.", "lora_te1.")
+        elif "unet" in kohya_key:
+            kohya_key = kohya_key.replace("unet", "lora_unet")
+        kohya_key = kohya_key.replace(".", "_", kohya_key.count(".") - 2)
+        kohya_key = kohya_key.replace(peft_adapter_name, "")  # Kohya doesn't take names
+        kohya_ss_state_dict[kohya_key] = weight
+        if "lora_down" in kohya_key:
+            alpha_key = f'{kohya_key.split(".")[0]}.alpha'
+            kohya_ss_state_dict[alpha_key] = torch.tensor(len(weight))
+
+    return kohya_ss_state_dict
--- a/invokeai/backend/peft/peft_model.py
+++ b/invokeai/backend/peft/peft_model.py
@@ -0,0 +1,52 @@
+from pathlib import Path
+from typing import Optional, Union
+
+import torch
+from diffusers.loaders.lora_conversion_utils import _convert_kohya_lora_to_diffusers
+
+from invokeai.backend.model_manager.config import BaseModelType
+from invokeai.backend.peft.sdxl_format_utils import convert_sdxl_keys_to_diffusers_format
+from invokeai.backend.util.serialization import load_state_dict
+
+
+class PeftModel:
+    """A class for loading and managing parameter-efficient fine-tuning models."""
+
+    def __init__(
+        self,
+        name: str,
+        state_dict: dict[str, torch.Tensor],
+        network_alphas: dict[str, torch.Tensor],
+    ):
+        self.name = name
+        self.state_dict = state_dict
+        self.network_alphas = network_alphas
+
+    def calc_size(self) -> int:
+        model_size = 0
+        for tensor in self.state_dict.values():
+            model_size += tensor.nelement() * tensor.element_size()
+        return model_size
+
+    @classmethod
+    def from_checkpoint(
+        cls,
+        file_path: Union[str, Path],
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+        base_model: Optional[BaseModelType] = None,
+    ):
+        device = device or torch.device("cpu")
+        dtype = dtype or torch.float32
+
+        file_path = Path(file_path)
+
+        state_dict = load_state_dict(file_path, device=str(device))
+        # lora_unet_up_blocks_1_attentions_2_transformer_blocks_1_ff_net_2.lora_down.weight
+        if base_model == BaseModelType.StableDiffusionXL:
+            state_dict = convert_sdxl_keys_to_diffusers_format(state_dict)
+
+        # TODO(ryand): We shouldn't be using an unexported function from diffusers here. Consider opening an upstream PR
+        # to move this function to state_dict_utils.py.
+        state_dict, network_alphas = _convert_kohya_lora_to_diffusers(state_dict)
+        return cls(name=file_path.stem, state_dict=state_dict, network_alphas=network_alphas)
--- a/invokeai/backend/peft/peft_model_patcher.py
+++ b/invokeai/backend/peft/peft_model_patcher.py
@@ -0,0 +1,227 @@
+from __future__ import annotations
+
+from contextlib import contextmanager
+from typing import Iterator, Tuple
+
+import torch
+from diffusers.models.lora import text_encoder_attn_modules, text_encoder_mlp_modules
+from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
+from diffusers.utils.peft_utils import get_peft_kwargs, scale_lora_layers
+from diffusers.utils.state_dict_utils import convert_state_dict_to_peft, convert_unet_state_dict_to_peft
+from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict
+
+from invokeai.backend.peft.peft_model import PeftModel
+
+UNET_NAME = "unet"
+
+
+class PeftModelPatcher:
+    @classmethod
+    @contextmanager
+    @torch.no_grad()
+    def apply_peft_model_to_text_encoder(
+        cls,
+        text_encoder: torch.nn.Module,
+        peft_models: Iterator[Tuple[PeftModel, float]],
+        prefix: str,
+    ):
+        original_weights = {}
+
+        try:
+            for peft_model, peft_model_weight in peft_models:
+                keys = list(peft_model.state_dict.keys())
+
+                # Load the layers corresponding to text encoder and make necessary adjustments.
+                text_encoder_keys = [k for k in keys if k.startswith(prefix) and k.split(".")[0] == prefix]
+                text_encoder_lora_state_dict = {
+                    k.replace(f"{prefix}.", ""): v for k, v in peft_model.state_dict.items() if k in text_encoder_keys
+                }
+
+                if len(text_encoder_lora_state_dict) == 0:
+                    continue
+
+                if peft_model.name in getattr(text_encoder, "peft_config", {}):
+                    raise ValueError(f"Adapter name {peft_model.name} already in use in the text encoder ({prefix}).")
+
+                rank = {}
+                # TODO(ryand): Is this necessary?
+                # text_encoder_lora_state_dict = convert_state_dict_to_diffusers(text_encoder_lora_state_dict)
+
+                text_encoder_lora_state_dict = convert_state_dict_to_peft(text_encoder_lora_state_dict)
+
+                for name, _ in text_encoder_attn_modules(text_encoder):
+                    rank_key = f"{name}.out_proj.lora_B.weight"
+                    rank[rank_key] = text_encoder_lora_state_dict[rank_key].shape[1]
+
+                patch_mlp = any(".mlp." in key for key in text_encoder_lora_state_dict.keys())
+                if patch_mlp:
+                    for name, _ in text_encoder_mlp_modules(text_encoder):
+                        rank_key_fc1 = f"{name}.fc1.lora_B.weight"
+                        rank_key_fc2 = f"{name}.fc2.lora_B.weight"
+
+                        rank[rank_key_fc1] = text_encoder_lora_state_dict[rank_key_fc1].shape[1]
+                        rank[rank_key_fc2] = text_encoder_lora_state_dict[rank_key_fc2].shape[1]
+
+                network_alphas = peft_model.network_alphas
+                if network_alphas is not None:
+                    alpha_keys = [
+                        k for k in network_alphas.keys() if k.startswith(prefix) and k.split(".")[0] == prefix
+                    ]
+                    network_alphas = {
+                        k.replace(f"{prefix}.", ""): v for k, v in network_alphas.items() if k in alpha_keys
+                    }
+
+                lora_config_kwargs = get_peft_kwargs(rank, network_alphas, text_encoder_lora_state_dict, is_unet=False)
+                lora_config_kwargs["inference_mode"] = True
+                lora_config = LoraConfig(**lora_config_kwargs)
+
+                new_text_encoder = inject_adapter_in_model(lora_config, text_encoder, peft_model.name)
+                incompatible_keys = set_peft_model_state_dict(
+                    new_text_encoder, text_encoder_lora_state_dict, peft_model.name
+                )
+                if incompatible_keys is not None:
+                    # check only for unexpected keys
+                    unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
+                    if unexpected_keys:
+                        raise ValueError(f"Failed to inject unexpected PEFT keys: {unexpected_keys}")
+
+                # inject LoRA layers and load the state dict
+                # in transformers we automatically check whether the adapter name is already in use or not
+                # text_encoder.load_adapter(
+                #     adapter_name=adapter_name,
+                #     adapter_state_dict=text_encoder_lora_state_dict,
+                #     peft_config=lora_config,
+                # )
+
+                scale_lora_layers(text_encoder, weight=peft_model_weight)
+                text_encoder.to(device=text_encoder.device, dtype=text_encoder.dtype)
+
+            yield
+        finally:
+            # TODO
+            pass
+            # for module_key, weight in original_weights.items():
+            #     model.get_submodule(module_key).weight.copy_(weight)
+
+    @classmethod
+    @contextmanager
+    @torch.no_grad()
+    def apply_peft_model_to_unet(
+        cls,
+        unet: UNet2DConditionModel,
+        peft_models: Iterator[Tuple[PeftModel, float]],
+    ):
+        try:
+            for peft_model, peft_model_weight in peft_models:
+                keys = list(peft_model.state_dict.keys())
+
+                unet_keys = [k for k in keys if k.startswith(UNET_NAME)]
+                state_dict = {
+                    k.replace(f"{UNET_NAME}.", ""): v for k, v in peft_model.state_dict.items() if k in unet_keys
+                }
+
+                network_alphas = peft_model.network_alphas
+                if network_alphas is not None:
+                    alpha_keys = [k for k in network_alphas.keys() if k.startswith(UNET_NAME)]
+                    network_alphas = {
+                        k.replace(f"{UNET_NAME}.", ""): v for k, v in network_alphas.items() if k in alpha_keys
+                    }
+
+                if len(state_dict) == 0:
+                    continue
+
+                if peft_model.name in getattr(unet, "peft_config", {}):
+                    raise ValueError(f"Adapter name {peft_model.name} already in use in the Unet.")
+
+                state_dict = convert_unet_state_dict_to_peft(state_dict)
+
+                if network_alphas is not None:
+                    # The alphas state dict have the same structure as Unet, thus we convert it to peft format using
+                    # `convert_unet_state_dict_to_peft` method.
+                    network_alphas = convert_unet_state_dict_to_peft(network_alphas)
+
+                rank = {}
+                for key, val in state_dict.items():
+                    if "lora_B" in key:
+                        rank[key] = val.shape[1]
+
+                lora_config_kwargs = get_peft_kwargs(rank, network_alphas, state_dict, is_unet=True)
+                lora_config_kwargs["inference_mode"] = True
+                lora_config = LoraConfig(**lora_config_kwargs)
+
+                inject_adapter_in_model(lora_config, unet, adapter_name=peft_model.name)
+                incompatible_keys = set_peft_model_state_dict(unet, state_dict, peft_model.name)
+                if incompatible_keys is not None:
+                    # check only for unexpected keys
+                    unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
+                    if unexpected_keys:
+                        raise ValueError(f"Failed to inject unexpected PEFT keys: {unexpected_keys}")
+
+                # TODO(ryand): What does this do?
+                unet.load_attn_procs(state_dict, network_alphas=network_alphas, low_cpu_mem_usage=True)
+
+                # TODO(ryand): Apply the lora weight. Where does diffusers do this? They don't seem to do it when they
+                # patch the UNet.
+            yield
+        finally:
+            # TODO
+            pass
+            # for module_key, weight in original_weights.items():
+            #     model.get_submodule(module_key).weight.copy_(weight)
+
+    @classmethod
+    @contextmanager
+    @torch.no_grad()
+    def apply_peft_patch(
+        cls,
+        model: torch.nn.Module,
+        peft_models: Iterator[Tuple[PeftModel, float]],
+        prefix: str,
+    ):
+        original_weights = {}
+
+        model_state_dict = model.state_dict()
+        try:
+            for peft_model, peft_model_weight in peft_models:
+                for layer_key, layer in peft_model.state_dict.items():
+                    if not layer_key.startswith(prefix):
+                        continue
+
+                    module_key = layer_key.replace(prefix + ".", "")
+                    # TODO(ryand): Make this work.
+
+                    module = model_state_dict[module_key]
+
+                    # All of the LoRA weight calculations will be done on the same device as the module weight.
+                    # (Performance will be best if this is a CUDA device.)
+                    device = module.weight.device
+                    dtype = module.weight.dtype
+
+                    if module_key not in original_weights:
+                        # TODO(ryand): Set non_blocking = True?
+                        original_weights[module_key] = module.weight.detach().to(device="cpu", copy=True)
+
+                    layer_scale = layer.alpha / layer.rank if (layer.alpha and layer.rank) else 1.0
+
+                    # We intentionally move to the target device first, then cast. Experimentally, this was found to
+                    # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the
+                    # same thing in a single call to '.to(...)'.
+                    layer.to(device=device)
+                    layer.to(dtype=torch.float32)
+                    # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA
+                    # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed.
+                    layer_weight = layer.get_weight(module.weight) * (lora_weight * layer_scale)
+                    layer.to(device=torch.device("cpu"))
+
+                    assert isinstance(layer_weight, torch.Tensor)  # mypy thinks layer_weight is a float|Any ??!
+                    if module.weight.shape != layer_weight.shape:
+                        # TODO: debug on lycoris
+                        assert hasattr(layer_weight, "reshape")
+                        layer_weight = layer_weight.reshape(module.weight.shape)
+
+                    assert isinstance(layer_weight, torch.Tensor)  # mypy thinks layer_weight is a float|Any ??!
+                    module.weight += layer_weight.to(dtype=dtype)
+            yield
+        finally:
+            for module_key, weight in original_weights.items():
+                model.get_submodule(module_key).weight.copy_(weight)
--- a/invokeai/backend/peft/sdxl_format_utils.py
+++ b/invokeai/backend/peft/sdxl_format_utils.py
@@ -0,0 +1,154 @@
+import bisect
+
+import torch
+
+
+def convert_sdxl_keys_to_diffusers_format(state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
+    """Convert the keys of an SDXL LoRA state_dict to diffusers format.
+
+    The input state_dict can be in either Stability AI format or diffusers format. If the state_dict is already in
+    diffusers format, then this function will have no effect.
+
+    This function is adapted from:
+    https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L385-L409
+
+    Args:
+        state_dict (Dict[str, Tensor]): The SDXL LoRA state_dict.
+
+    Raises:
+        ValueError: If state_dict contains an unrecognized key, or not all keys could be converted.
+
+    Returns:
+        Dict[str, Tensor]: The diffusers-format state_dict.
+    """
+    converted_count = 0  # The number of Stability AI keys converted to diffusers format.
+    not_converted_count = 0  # The number of keys that were not converted.
+
+    # Get a sorted list of Stability AI UNet keys so that we can efficiently search for keys with matching prefixes.
+    # For example, we want to efficiently find `input_blocks_4_1` in the list when searching for
+    # `input_blocks_4_1_proj_in`.
+    stability_unet_keys = list(SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP)
+    stability_unet_keys.sort()
+
+    new_state_dict = {}
+    for full_key, value in state_dict.items():
+        if full_key.startswith("lora_unet_"):
+            search_key = full_key.replace("lora_unet_", "")
+            # Use bisect to find the key in stability_unet_keys that *may* match the search_key's prefix.
+            position = bisect.bisect_right(stability_unet_keys, search_key)
+            map_key = stability_unet_keys[position - 1]
+            # Now, check if the map_key *actually* matches the search_key.
+            if search_key.startswith(map_key):
+                new_key = full_key.replace(map_key, SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP[map_key])
+                new_state_dict[new_key] = value
+                converted_count += 1
+            else:
+                new_state_dict[full_key] = value
+                not_converted_count += 1
+        elif full_key.startswith("lora_te1_") or full_key.startswith("lora_te2_"):
+            # The CLIP text encoders have the same keys in both Stability AI and diffusers formats.
+            new_state_dict[full_key] = value
+            continue
+        else:
+            raise ValueError(f"Unrecognized SDXL LoRA key prefix: '{full_key}'.")
+
+    if converted_count > 0 and not_converted_count > 0:
+        raise ValueError(
+            f"The SDXL LoRA could only be partially converted to diffusers format. converted={converted_count},"
+            f" not_converted={not_converted_count}"
+        )
+
+    return new_state_dict
+
+
+# Code based on:
+# https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L15C1-L97C32
+def make_sdxl_unet_conversion_map() -> list[tuple[str, str]]:
+    """Create a dict mapping state_dict keys from Stability AI SDXL format to diffusers SDXL format."""
+    unet_conversion_map_layer: list[tuple[str, str]] = []
+
+    for i in range(3):  # num_blocks is 3 in sdxl
+        # loop over downblocks/upblocks
+        for j in range(2):
+            # loop over resnets/attentions for downblocks
+            hf_down_res_prefix = f"down_blocks.{i}.resnets.{j}."
+            sd_down_res_prefix = f"input_blocks.{3*i + j + 1}.0."
+            unet_conversion_map_layer.append((sd_down_res_prefix, hf_down_res_prefix))
+
+            if i < 3:
+                # no attention layers in down_blocks.3
+                hf_down_atn_prefix = f"down_blocks.{i}.attentions.{j}."
+                sd_down_atn_prefix = f"input_blocks.{3*i + j + 1}.1."
+                unet_conversion_map_layer.append((sd_down_atn_prefix, hf_down_atn_prefix))
+
+        for j in range(3):
+            # loop over resnets/attentions for upblocks
+            hf_up_res_prefix = f"up_blocks.{i}.resnets.{j}."
+            sd_up_res_prefix = f"output_blocks.{3*i + j}.0."
+            unet_conversion_map_layer.append((sd_up_res_prefix, hf_up_res_prefix))
+
+            # if i > 0: commentout for sdxl
+            # no attention layers in up_blocks.0
+            hf_up_atn_prefix = f"up_blocks.{i}.attentions.{j}."
+            sd_up_atn_prefix = f"output_blocks.{3*i + j}.1."
+            unet_conversion_map_layer.append((sd_up_atn_prefix, hf_up_atn_prefix))
+
+        if i < 3:
+            # no downsample in down_blocks.3
+            hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0.conv."
+            sd_downsample_prefix = f"input_blocks.{3*(i+1)}.0.op."
+            unet_conversion_map_layer.append((sd_downsample_prefix, hf_downsample_prefix))
+
+            # no upsample in up_blocks.3
+            hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0."
+            sd_upsample_prefix = f"output_blocks.{3*i + 2}.{2}."  # change for sdxl
+            unet_conversion_map_layer.append((sd_upsample_prefix, hf_upsample_prefix))
+
+    hf_mid_atn_prefix = "mid_block.attentions.0."
+    sd_mid_atn_prefix = "middle_block.1."
+    unet_conversion_map_layer.append((sd_mid_atn_prefix, hf_mid_atn_prefix))
+
+    for j in range(2):
+        hf_mid_res_prefix = f"mid_block.resnets.{j}."
+        sd_mid_res_prefix = f"middle_block.{2*j}."
+        unet_conversion_map_layer.append((sd_mid_res_prefix, hf_mid_res_prefix))
+
+    unet_conversion_map_resnet = [
+        # (stable-diffusion, HF Diffusers)
+        ("in_layers.0.", "norm1."),
+        ("in_layers.2.", "conv1."),
+        ("out_layers.0.", "norm2."),
+        ("out_layers.3.", "conv2."),
+        ("emb_layers.1.", "time_emb_proj."),
+        ("skip_connection.", "conv_shortcut."),
+    ]
+
+    unet_conversion_map: list[tuple[str, str]] = []
+    for sd, hf in unet_conversion_map_layer:
+        if "resnets" in hf:
+            for sd_res, hf_res in unet_conversion_map_resnet:
+                unet_conversion_map.append((sd + sd_res, hf + hf_res))
+        else:
+            unet_conversion_map.append((sd, hf))
+
+    for j in range(2):
+        hf_time_embed_prefix = f"time_embedding.linear_{j+1}."
+        sd_time_embed_prefix = f"time_embed.{j*2}."
+        unet_conversion_map.append((sd_time_embed_prefix, hf_time_embed_prefix))
+
+    for j in range(2):
+        hf_label_embed_prefix = f"add_embedding.linear_{j+1}."
+        sd_label_embed_prefix = f"label_emb.0.{j*2}."
+        unet_conversion_map.append((sd_label_embed_prefix, hf_label_embed_prefix))
+
+    unet_conversion_map.append(("input_blocks.0.0.", "conv_in."))
+    unet_conversion_map.append(("out.0.", "conv_norm_out."))
+    unet_conversion_map.append(("out.2.", "conv_out."))
+
+    return unet_conversion_map
+
+
+# A mapping of state_dict key prefixes from Stability AI SDXL format to diffusers SDXL format.
+SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP = {
+    sd.rstrip(".").replace(".", "_"): hf.rstrip(".").replace(".", "_") for sd, hf in make_sdxl_unet_conversion_map()
+}
--- a/invokeai/backend/util/serialization.py
+++ b/invokeai/backend/util/serialization.py
@@ -0,0 +1,37 @@
+from pathlib import Path
+from typing import Any, Optional, Union
+
+import torch
+from safetensors.torch import load_file
+
+
+def state_dict_to(
+    state_dict: dict[str, torch.Tensor], device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None
+) -> dict[str, torch.Tensor]:
+    new_state_dict: dict[str, torch.Tensor] = {}
+    for k, v in state_dict.items():
+        new_state_dict[k] = v.to(device=device, dtype=dtype, non_blocking=True)
+    return new_state_dict
+
+
+def load_state_dict(file_path: Union[str, Path], device: str = "cpu") -> Any:
+    """Load a state_dict from a file that may be in either PyTorch or safetensors format. The file format is inferred
+    from the file extension.
+    """
+    file_path = Path(file_path)
+
+    if file_path.suffix == ".safetensors":
+        state_dict = load_file(
+            file_path,
+            device=device,
+        )
+    else:
+        # weights_only=True is used to address a security vulnerability that allows arbitrary code execution.
+        # This option was first introduced in https://github.com/pytorch/pytorch/pull/86812.
+        #
+        # mmap=True is used to both reduce memory usage and speed up loading. This setting causes torch.load() to more
+        # closely mirror the behaviour of safetensors.torch.load_file().  This option was first introduced in
+        # https://github.com/pytorch/pytorch/pull/102549. The discussion on that PR provides helpful context.
+        state_dict = torch.load(file_path, map_location=device, weights_only=True, mmap=True)
+
+    return state_dict
Author	SHA1	Message	Date
Ryan Dick	f9fda503a3	A very primitive working version of peft patching. It is very slow. LoRAs don't get unloaded yet, so can only be run once. And the results are slightly different than the old implementation. I suspect this is because the lora weight is not being applied to the UNet, but there could be other issues as well.	2024-04-05 12:02:05 -04:00
Ryan Dick	22c66cf55b	WIP	2024-04-04 22:40:42 -04:00
Ryan Dick	8260252aeb	Add LayerNorm to list of modules optimized by skip_torch_weight_init()	2024-04-04 18:07:13 -04:00
Ryan Dick	74fe74721a	Add util function for loading state_dicts from disk.	2024-04-04 18:07:13 -04:00
Ryan Dick	3e1af51737	Add skeleton of a PeftModel class.	2024-04-04 18:07:13 -04:00