Split PEFT layer implementations into separate files.

2026-04-23 03:00:31 -04:00 · 2024-09-03 18:04:48 +00:00
parent 2622f7dc02
commit 1105833124
11 changed files with 449 additions and 401 deletions
--- a/invokeai/backend/lora.py
+++ b/invokeai/backend/lora.py
@@ -3,416 +3,23 @@

 import bisect
 from pathlib import Path
-from typing import Dict, List, Optional, Set, Tuple, Union
+from typing import Dict, List, Optional, Tuple, Union

 import torch
 from safetensors.torch import load_file
 from typing_extensions import Self

-import invokeai.backend.util.logging as logger
 from invokeai.backend.model_manager import BaseModelType
+from invokeai.backend.peft.layers.any_lora_layer import AnyLoRALayer
+from invokeai.backend.peft.layers.full_layer import FullLayer
+from invokeai.backend.peft.layers.ia3_layer import IA3Layer
+from invokeai.backend.peft.layers.loha_layer import LoHALayer
+from invokeai.backend.peft.layers.lokr_layer import LoKRLayer
+from invokeai.backend.peft.layers.lora_layer import LoRALayer
+from invokeai.backend.peft.layers.norm_layer import NormLayer
 from invokeai.backend.raw_model import RawModel


-class LoRALayerBase:
-    # rank: Optional[int]
-    # alpha: Optional[float]
-    # bias: Optional[torch.Tensor]
-    # layer_key: str
-
-    # @property
-    # def scale(self):
-    #    return self.alpha / self.rank if (self.alpha and self.rank) else 1.0
-
-    def __init__(
-        self,
-        layer_key: str,
-        values: Dict[str, torch.Tensor],
-    ):
-        if "alpha" in values:
-            self.alpha = values["alpha"].item()
-        else:
-            self.alpha = None
-
-        if "bias_indices" in values and "bias_values" in values and "bias_size" in values:
-            self.bias: Optional[torch.Tensor] = torch.sparse_coo_tensor(
-                values["bias_indices"],
-                values["bias_values"],
-                tuple(values["bias_size"]),
-            )
-
-        else:
-            self.bias = None
-
-        self.rank = None  # set in layer implementation
-        self.layer_key = layer_key
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
-        raise NotImplementedError()
-
-    def get_bias(self, orig_bias: torch.Tensor) -> Optional[torch.Tensor]:
-        return self.bias
-
-    def get_parameters(self, orig_module: torch.nn.Module) -> Dict[str, torch.Tensor]:
-        params = {"weight": self.get_weight(orig_module.weight)}
-        bias = self.get_bias(orig_module.bias)
-        if bias is not None:
-            params["bias"] = bias
-        return params
-
-    def calc_size(self) -> int:
-        model_size = 0
-        for val in [self.bias]:
-            if val is not None:
-                model_size += val.nelement() * val.element_size()
-        return model_size
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
-        if self.bias is not None:
-            self.bias = self.bias.to(device=device, dtype=dtype)
-
-    def check_keys(self, values: Dict[str, torch.Tensor], known_keys: Set[str]):
-        """Log a warning if values contains unhandled keys."""
-        # {"alpha", "bias_indices", "bias_values", "bias_size"} are hard-coded, because they are handled by
-        # `LoRALayerBase`. Sub-classes should provide the known_keys that they handled.
-        all_known_keys = known_keys | {"alpha", "bias_indices", "bias_values", "bias_size"}
-        unknown_keys = set(values.keys()) - all_known_keys
-        if unknown_keys:
-            logger.warning(
-                f"Unexpected keys found in LoRA/LyCORIS layer, model might work incorrectly! Keys: {unknown_keys}"
-            )
-
-
-# TODO: find and debug lora/locon with bias
-class LoRALayer(LoRALayerBase):
-    # up: torch.Tensor
-    # mid: Optional[torch.Tensor]
-    # down: torch.Tensor
-
-    def __init__(
-        self,
-        layer_key: str,
-        values: Dict[str, torch.Tensor],
-    ):
-        super().__init__(layer_key, values)
-
-        self.up = values["lora_up.weight"]
-        self.down = values["lora_down.weight"]
-        self.mid = values.get("lora_mid.weight", None)
-
-        self.rank = self.down.shape[0]
-        self.check_keys(
-            values,
-            {
-                "lora_up.weight",
-                "lora_down.weight",
-                "lora_mid.weight",
-            },
-        )
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
-        if self.mid is not None:
-            up = self.up.reshape(self.up.shape[0], self.up.shape[1])
-            down = self.down.reshape(self.down.shape[0], self.down.shape[1])
-            weight = torch.einsum("m n w h, i m, n j -> i j w h", self.mid, up, down)
-        else:
-            weight = self.up.reshape(self.up.shape[0], -1) @ self.down.reshape(self.down.shape[0], -1)
-
-        return weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        for val in [self.up, self.mid, self.down]:
-            if val is not None:
-                model_size += val.nelement() * val.element_size()
-        return model_size
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
-        super().to(device=device, dtype=dtype)
-
-        self.up = self.up.to(device=device, dtype=dtype)
-        self.down = self.down.to(device=device, dtype=dtype)
-
-        if self.mid is not None:
-            self.mid = self.mid.to(device=device, dtype=dtype)
-
-
-class LoHALayer(LoRALayerBase):
-    # w1_a: torch.Tensor
-    # w1_b: torch.Tensor
-    # w2_a: torch.Tensor
-    # w2_b: torch.Tensor
-    # t1: Optional[torch.Tensor] = None
-    # t2: Optional[torch.Tensor] = None
-
-    def __init__(self, layer_key: str, values: Dict[str, torch.Tensor]):
-        super().__init__(layer_key, values)
-
-        self.w1_a = values["hada_w1_a"]
-        self.w1_b = values["hada_w1_b"]
-        self.w2_a = values["hada_w2_a"]
-        self.w2_b = values["hada_w2_b"]
-        self.t1 = values.get("hada_t1", None)
-        self.t2 = values.get("hada_t2", None)
-
-        self.rank = self.w1_b.shape[0]
-        self.check_keys(
-            values,
-            {
-                "hada_w1_a",
-                "hada_w1_b",
-                "hada_w2_a",
-                "hada_w2_b",
-                "hada_t1",
-                "hada_t2",
-            },
-        )
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
-        if self.t1 is None:
-            weight: torch.Tensor = (self.w1_a @ self.w1_b) * (self.w2_a @ self.w2_b)
-
-        else:
-            rebuild1 = torch.einsum("i j k l, j r, i p -> p r k l", self.t1, self.w1_b, self.w1_a)
-            rebuild2 = torch.einsum("i j k l, j r, i p -> p r k l", self.t2, self.w2_b, self.w2_a)
-            weight = rebuild1 * rebuild2
-
-        return weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        for val in [self.w1_a, self.w1_b, self.w2_a, self.w2_b, self.t1, self.t2]:
-            if val is not None:
-                model_size += val.nelement() * val.element_size()
-        return model_size
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
-        super().to(device=device, dtype=dtype)
-
-        self.w1_a = self.w1_a.to(device=device, dtype=dtype)
-        self.w1_b = self.w1_b.to(device=device, dtype=dtype)
-        if self.t1 is not None:
-            self.t1 = self.t1.to(device=device, dtype=dtype)
-
-        self.w2_a = self.w2_a.to(device=device, dtype=dtype)
-        self.w2_b = self.w2_b.to(device=device, dtype=dtype)
-        if self.t2 is not None:
-            self.t2 = self.t2.to(device=device, dtype=dtype)
-
-
-class LoKRLayer(LoRALayerBase):
-    # w1: Optional[torch.Tensor] = None
-    # w1_a: Optional[torch.Tensor] = None
-    # w1_b: Optional[torch.Tensor] = None
-    # w2: Optional[torch.Tensor] = None
-    # w2_a: Optional[torch.Tensor] = None
-    # w2_b: Optional[torch.Tensor] = None
-    # t2: Optional[torch.Tensor] = None
-
-    def __init__(
-        self,
-        layer_key: str,
-        values: Dict[str, torch.Tensor],
-    ):
-        super().__init__(layer_key, values)
-
-        self.w1 = values.get("lokr_w1", None)
-        if self.w1 is None:
-            self.w1_a = values["lokr_w1_a"]
-            self.w1_b = values["lokr_w1_b"]
-        else:
-            self.w1_b = None
-            self.w1_a = None
-
-        self.w2 = values.get("lokr_w2", None)
-        if self.w2 is None:
-            self.w2_a = values["lokr_w2_a"]
-            self.w2_b = values["lokr_w2_b"]
-        else:
-            self.w2_a = None
-            self.w2_b = None
-
-        self.t2 = values.get("lokr_t2", None)
-
-        if self.w1_b is not None:
-            self.rank = self.w1_b.shape[0]
-        elif self.w2_b is not None:
-            self.rank = self.w2_b.shape[0]
-        else:
-            self.rank = None  # unscaled
-
-        self.check_keys(
-            values,
-            {
-                "lokr_w1",
-                "lokr_w1_a",
-                "lokr_w1_b",
-                "lokr_w2",
-                "lokr_w2_a",
-                "lokr_w2_b",
-                "lokr_t2",
-            },
-        )
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
-        w1: Optional[torch.Tensor] = self.w1
-        if w1 is None:
-            assert self.w1_a is not None
-            assert self.w1_b is not None
-            w1 = self.w1_a @ self.w1_b
-
-        w2 = self.w2
-        if w2 is None:
-            if self.t2 is None:
-                assert self.w2_a is not None
-                assert self.w2_b is not None
-                w2 = self.w2_a @ self.w2_b
-            else:
-                w2 = torch.einsum("i j k l, i p, j r -> p r k l", self.t2, self.w2_a, self.w2_b)
-
-        if len(w2.shape) == 4:
-            w1 = w1.unsqueeze(2).unsqueeze(2)
-        w2 = w2.contiguous()
-        assert w1 is not None
-        assert w2 is not None
-        weight = torch.kron(w1, w2)
-
-        return weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        for val in [self.w1, self.w1_a, self.w1_b, self.w2, self.w2_a, self.w2_b, self.t2]:
-            if val is not None:
-                model_size += val.nelement() * val.element_size()
-        return model_size
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
-        super().to(device=device, dtype=dtype)
-
-        if self.w1 is not None:
-            self.w1 = self.w1.to(device=device, dtype=dtype)
-        else:
-            assert self.w1_a is not None
-            assert self.w1_b is not None
-            self.w1_a = self.w1_a.to(device=device, dtype=dtype)
-            self.w1_b = self.w1_b.to(device=device, dtype=dtype)
-
-        if self.w2 is not None:
-            self.w2 = self.w2.to(device=device, dtype=dtype)
-        else:
-            assert self.w2_a is not None
-            assert self.w2_b is not None
-            self.w2_a = self.w2_a.to(device=device, dtype=dtype)
-            self.w2_b = self.w2_b.to(device=device, dtype=dtype)
-
-        if self.t2 is not None:
-            self.t2 = self.t2.to(device=device, dtype=dtype)
-
-
-class FullLayer(LoRALayerBase):
-    # bias handled in LoRALayerBase(calc_size, to)
-    # weight: torch.Tensor
-    # bias: Optional[torch.Tensor]
-
-    def __init__(
-        self,
-        layer_key: str,
-        values: Dict[str, torch.Tensor],
-    ):
-        super().__init__(layer_key, values)
-
-        self.weight = values["diff"]
-        self.bias = values.get("diff_b", None)
-
-        self.rank = None  # unscaled
-        self.check_keys(values, {"diff", "diff_b"})
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
-        return self.weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        model_size += self.weight.nelement() * self.weight.element_size()
-        return model_size
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
-        super().to(device=device, dtype=dtype)
-
-        self.weight = self.weight.to(device=device, dtype=dtype)
-
-
-class IA3Layer(LoRALayerBase):
-    # weight: torch.Tensor
-    # on_input: torch.Tensor
-
-    def __init__(
-        self,
-        layer_key: str,
-        values: Dict[str, torch.Tensor],
-    ):
-        super().__init__(layer_key, values)
-
-        self.weight = values["weight"]
-        self.on_input = values["on_input"]
-
-        self.rank = None  # unscaled
-        self.check_keys(values, {"weight", "on_input"})
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
-        weight = self.weight
-        if not self.on_input:
-            weight = weight.reshape(-1, 1)
-        assert orig_weight is not None
-        return orig_weight * weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        model_size += self.weight.nelement() * self.weight.element_size()
-        model_size += self.on_input.nelement() * self.on_input.element_size()
-        return model_size
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
-        super().to(device=device, dtype=dtype)
-
-        self.weight = self.weight.to(device=device, dtype=dtype)
-        self.on_input = self.on_input.to(device=device, dtype=dtype)
-
-
-class NormLayer(LoRALayerBase):
-    # bias handled in LoRALayerBase(calc_size, to)
-    # weight: torch.Tensor
-    # bias: Optional[torch.Tensor]
-
-    def __init__(
-        self,
-        layer_key: str,
-        values: Dict[str, torch.Tensor],
-    ):
-        super().__init__(layer_key, values)
-
-        self.weight = values["w_norm"]
-        self.bias = values.get("b_norm", None)
-
-        self.rank = None  # unscaled
-        self.check_keys(values, {"w_norm", "b_norm"})
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
-        return self.weight
-
-    def calc_size(self) -> int:
-        model_size = super().calc_size()
-        model_size += self.weight.nelement() * self.weight.element_size()
-        return model_size
-
-    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
-        super().to(device=device, dtype=dtype)
-
-        self.weight = self.weight.to(device=device, dtype=dtype)
-
-
-AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer, NormLayer]
-
-
 class LoRAModelRaw(RawModel):  # (torch.nn.Module):
    _name: str
    layers: Dict[str, AnyLoRALayer]
--- a/invokeai/backend/peft/init.py
+++ b/invokeai/backend/peft/init.py
--- a/invokeai/backend/peft/layers/init.py
+++ b/invokeai/backend/peft/layers/init.py
--- a/invokeai/backend/peft/layers/any_lora_layer.py
+++ b/invokeai/backend/peft/layers/any_lora_layer.py
@@ -0,0 +1,10 @@
+from typing import Union
+
+from invokeai.backend.peft.layers.full_layer import FullLayer
+from invokeai.backend.peft.layers.ia3_layer import IA3Layer
+from invokeai.backend.peft.layers.loha_layer import LoHALayer
+from invokeai.backend.peft.layers.lokr_layer import LoKRLayer
+from invokeai.backend.peft.layers.lora_layer import LoRALayer
+from invokeai.backend.peft.layers.norm_layer import NormLayer
+
+AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer, NormLayer]
--- a/invokeai/backend/peft/layers/full_layer.py
+++ b/invokeai/backend/peft/layers/full_layer.py
@@ -0,0 +1,37 @@
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.peft.layers.lora_layer_base import LoRALayerBase
+
+
+class FullLayer(LoRALayerBase):
+    # bias handled in LoRALayerBase(calc_size, to)
+    # weight: torch.Tensor
+    # bias: Optional[torch.Tensor]
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        super().__init__(layer_key, values)
+
+        self.weight = values["diff"]
+        self.bias = values.get("diff_b", None)
+
+        self.rank = None  # unscaled
+        self.check_keys(values, {"diff", "diff_b"})
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        return self.weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        model_size += self.weight.nelement() * self.weight.element_size()
+        return model_size
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
+        super().to(device=device, dtype=dtype)
+
+        self.weight = self.weight.to(device=device, dtype=dtype)
--- a/invokeai/backend/peft/layers/ia3_layer.py
+++ b/invokeai/backend/peft/layers/ia3_layer.py
@@ -0,0 +1,42 @@
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.peft.layers.lora_layer_base import LoRALayerBase
+
+
+class IA3Layer(LoRALayerBase):
+    # weight: torch.Tensor
+    # on_input: torch.Tensor
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        super().__init__(layer_key, values)
+
+        self.weight = values["weight"]
+        self.on_input = values["on_input"]
+
+        self.rank = None  # unscaled
+        self.check_keys(values, {"weight", "on_input"})
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        weight = self.weight
+        if not self.on_input:
+            weight = weight.reshape(-1, 1)
+        assert orig_weight is not None
+        return orig_weight * weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        model_size += self.weight.nelement() * self.weight.element_size()
+        model_size += self.on_input.nelement() * self.on_input.element_size()
+        return model_size
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
+        super().to(device=device, dtype=dtype)
+
+        self.weight = self.weight.to(device=device, dtype=dtype)
+        self.on_input = self.on_input.to(device=device, dtype=dtype)
--- a/invokeai/backend/peft/layers/loha_layer.py
+++ b/invokeai/backend/peft/layers/loha_layer.py
@@ -0,0 +1,68 @@
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.peft.layers.lora_layer_base import LoRALayerBase
+
+
+class LoHALayer(LoRALayerBase):
+    # w1_a: torch.Tensor
+    # w1_b: torch.Tensor
+    # w2_a: torch.Tensor
+    # w2_b: torch.Tensor
+    # t1: Optional[torch.Tensor] = None
+    # t2: Optional[torch.Tensor] = None
+
+    def __init__(self, layer_key: str, values: Dict[str, torch.Tensor]):
+        super().__init__(layer_key, values)
+
+        self.w1_a = values["hada_w1_a"]
+        self.w1_b = values["hada_w1_b"]
+        self.w2_a = values["hada_w2_a"]
+        self.w2_b = values["hada_w2_b"]
+        self.t1 = values.get("hada_t1", None)
+        self.t2 = values.get("hada_t2", None)
+
+        self.rank = self.w1_b.shape[0]
+        self.check_keys(
+            values,
+            {
+                "hada_w1_a",
+                "hada_w1_b",
+                "hada_w2_a",
+                "hada_w2_b",
+                "hada_t1",
+                "hada_t2",
+            },
+        )
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        if self.t1 is None:
+            weight: torch.Tensor = (self.w1_a @ self.w1_b) * (self.w2_a @ self.w2_b)
+
+        else:
+            rebuild1 = torch.einsum("i j k l, j r, i p -> p r k l", self.t1, self.w1_b, self.w1_a)
+            rebuild2 = torch.einsum("i j k l, j r, i p -> p r k l", self.t2, self.w2_b, self.w2_a)
+            weight = rebuild1 * rebuild2
+
+        return weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        for val in [self.w1_a, self.w1_b, self.w2_a, self.w2_b, self.t1, self.t2]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
+        super().to(device=device, dtype=dtype)
+
+        self.w1_a = self.w1_a.to(device=device, dtype=dtype)
+        self.w1_b = self.w1_b.to(device=device, dtype=dtype)
+        if self.t1 is not None:
+            self.t1 = self.t1.to(device=device, dtype=dtype)
+
+        self.w2_a = self.w2_a.to(device=device, dtype=dtype)
+        self.w2_b = self.w2_b.to(device=device, dtype=dtype)
+        if self.t2 is not None:
+            self.t2 = self.t2.to(device=device, dtype=dtype)
--- a/invokeai/backend/peft/layers/lokr_layer.py
+++ b/invokeai/backend/peft/layers/lokr_layer.py
@@ -0,0 +1,114 @@
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.peft.layers.lora_layer_base import LoRALayerBase
+
+
+class LoKRLayer(LoRALayerBase):
+    # w1: Optional[torch.Tensor] = None
+    # w1_a: Optional[torch.Tensor] = None
+    # w1_b: Optional[torch.Tensor] = None
+    # w2: Optional[torch.Tensor] = None
+    # w2_a: Optional[torch.Tensor] = None
+    # w2_b: Optional[torch.Tensor] = None
+    # t2: Optional[torch.Tensor] = None
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        super().__init__(layer_key, values)
+
+        self.w1 = values.get("lokr_w1", None)
+        if self.w1 is None:
+            self.w1_a = values["lokr_w1_a"]
+            self.w1_b = values["lokr_w1_b"]
+        else:
+            self.w1_b = None
+            self.w1_a = None
+
+        self.w2 = values.get("lokr_w2", None)
+        if self.w2 is None:
+            self.w2_a = values["lokr_w2_a"]
+            self.w2_b = values["lokr_w2_b"]
+        else:
+            self.w2_a = None
+            self.w2_b = None
+
+        self.t2 = values.get("lokr_t2", None)
+
+        if self.w1_b is not None:
+            self.rank = self.w1_b.shape[0]
+        elif self.w2_b is not None:
+            self.rank = self.w2_b.shape[0]
+        else:
+            self.rank = None  # unscaled
+
+        self.check_keys(
+            values,
+            {
+                "lokr_w1",
+                "lokr_w1_a",
+                "lokr_w1_b",
+                "lokr_w2",
+                "lokr_w2_a",
+                "lokr_w2_b",
+                "lokr_t2",
+            },
+        )
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        w1: Optional[torch.Tensor] = self.w1
+        if w1 is None:
+            assert self.w1_a is not None
+            assert self.w1_b is not None
+            w1 = self.w1_a @ self.w1_b
+
+        w2 = self.w2
+        if w2 is None:
+            if self.t2 is None:
+                assert self.w2_a is not None
+                assert self.w2_b is not None
+                w2 = self.w2_a @ self.w2_b
+            else:
+                w2 = torch.einsum("i j k l, i p, j r -> p r k l", self.t2, self.w2_a, self.w2_b)
+
+        if len(w2.shape) == 4:
+            w1 = w1.unsqueeze(2).unsqueeze(2)
+        w2 = w2.contiguous()
+        assert w1 is not None
+        assert w2 is not None
+        weight = torch.kron(w1, w2)
+
+        return weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        for val in [self.w1, self.w1_a, self.w1_b, self.w2, self.w2_a, self.w2_b, self.t2]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
+        super().to(device=device, dtype=dtype)
+
+        if self.w1 is not None:
+            self.w1 = self.w1.to(device=device, dtype=dtype)
+        else:
+            assert self.w1_a is not None
+            assert self.w1_b is not None
+            self.w1_a = self.w1_a.to(device=device, dtype=dtype)
+            self.w1_b = self.w1_b.to(device=device, dtype=dtype)
+
+        if self.w2 is not None:
+            self.w2 = self.w2.to(device=device, dtype=dtype)
+        else:
+            assert self.w2_a is not None
+            assert self.w2_b is not None
+            self.w2_a = self.w2_a.to(device=device, dtype=dtype)
+            self.w2_b = self.w2_b.to(device=device, dtype=dtype)
+
+        if self.t2 is not None:
+            self.t2 = self.t2.to(device=device, dtype=dtype)
--- a/invokeai/backend/peft/layers/lora_layer.py
+++ b/invokeai/backend/peft/layers/lora_layer.py
@@ -0,0 +1,59 @@
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.peft.layers.lora_layer_base import LoRALayerBase
+
+
+# TODO: find and debug lora/locon with bias
+class LoRALayer(LoRALayerBase):
+    # up: torch.Tensor
+    # mid: Optional[torch.Tensor]
+    # down: torch.Tensor
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        super().__init__(layer_key, values)
+
+        self.up = values["lora_up.weight"]
+        self.down = values["lora_down.weight"]
+        self.mid = values.get("lora_mid.weight", None)
+
+        self.rank = self.down.shape[0]
+        self.check_keys(
+            values,
+            {
+                "lora_up.weight",
+                "lora_down.weight",
+                "lora_mid.weight",
+            },
+        )
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        if self.mid is not None:
+            up = self.up.reshape(self.up.shape[0], self.up.shape[1])
+            down = self.down.reshape(self.down.shape[0], self.down.shape[1])
+            weight = torch.einsum("m n w h, i m, n j -> i j w h", self.mid, up, down)
+        else:
+            weight = self.up.reshape(self.up.shape[0], -1) @ self.down.reshape(self.down.shape[0], -1)
+
+        return weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        for val in [self.up, self.mid, self.down]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
+        super().to(device=device, dtype=dtype)
+
+        self.up = self.up.to(device=device, dtype=dtype)
+        self.down = self.down.to(device=device, dtype=dtype)
+
+        if self.mid is not None:
+            self.mid = self.mid.to(device=device, dtype=dtype)
--- a/invokeai/backend/peft/layers/lora_layer_base.py
+++ b/invokeai/backend/peft/layers/lora_layer_base.py
@@ -0,0 +1,74 @@
+from typing import Dict, Optional, Set
+
+import torch
+
+import invokeai.backend.util.logging as logger
+
+
+class LoRALayerBase:
+    # rank: Optional[int]
+    # alpha: Optional[float]
+    # bias: Optional[torch.Tensor]
+    # layer_key: str
+
+    # @property
+    # def scale(self):
+    #    return self.alpha / self.rank if (self.alpha and self.rank) else 1.0
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        if "alpha" in values:
+            self.alpha = values["alpha"].item()
+        else:
+            self.alpha = None
+
+        if "bias_indices" in values and "bias_values" in values and "bias_size" in values:
+            self.bias: Optional[torch.Tensor] = torch.sparse_coo_tensor(
+                values["bias_indices"],
+                values["bias_values"],
+                tuple(values["bias_size"]),
+            )
+
+        else:
+            self.bias = None
+
+        self.rank = None  # set in layer implementation
+        self.layer_key = layer_key
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        raise NotImplementedError()
+
+    def get_bias(self, orig_bias: torch.Tensor) -> Optional[torch.Tensor]:
+        return self.bias
+
+    def get_parameters(self, orig_module: torch.nn.Module) -> Dict[str, torch.Tensor]:
+        params = {"weight": self.get_weight(orig_module.weight)}
+        bias = self.get_bias(orig_module.bias)
+        if bias is not None:
+            params["bias"] = bias
+        return params
+
+    def calc_size(self) -> int:
+        model_size = 0
+        for val in [self.bias]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
+        if self.bias is not None:
+            self.bias = self.bias.to(device=device, dtype=dtype)
+
+    def check_keys(self, values: Dict[str, torch.Tensor], known_keys: Set[str]):
+        """Log a warning if values contains unhandled keys."""
+        # {"alpha", "bias_indices", "bias_values", "bias_size"} are hard-coded, because they are handled by
+        # `LoRALayerBase`. Sub-classes should provide the known_keys that they handled.
+        all_known_keys = known_keys | {"alpha", "bias_indices", "bias_values", "bias_size"}
+        unknown_keys = set(values.keys()) - all_known_keys
+        if unknown_keys:
+            logger.warning(
+                f"Unexpected keys found in LoRA/LyCORIS layer, model might work incorrectly! Keys: {unknown_keys}"
+            )
--- a/invokeai/backend/peft/layers/norm_layer.py
+++ b/invokeai/backend/peft/layers/norm_layer.py
@@ -0,0 +1,37 @@
+from typing import Dict, Optional
+
+import torch
+
+from invokeai.backend.peft.layers.lora_layer_base import LoRALayerBase
+
+
+class NormLayer(LoRALayerBase):
+    # bias handled in LoRALayerBase(calc_size, to)
+    # weight: torch.Tensor
+    # bias: Optional[torch.Tensor]
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: Dict[str, torch.Tensor],
+    ):
+        super().__init__(layer_key, values)
+
+        self.weight = values["w_norm"]
+        self.bias = values.get("b_norm", None)
+
+        self.rank = None  # unscaled
+        self.check_keys(values, {"w_norm", "b_norm"})
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
+        return self.weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        model_size += self.weight.nelement() * self.weight.element_size()
+        return model_size
+
+    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
+        super().to(device=device, dtype=dtype)
+
+        self.weight = self.weight.to(device=device, dtype=dtype)