Compare commits

...

1 Commits

Author SHA1 Message Date
Kent Keirsey
bbae964646 Support PEFT Loras with Base_Model.model prefix (#8433)
* Support PEFT Loras with Base_Model.model prefix

* update tests

* ruff

* fix python complaints

* update kes

* format keys

* remove unneeded test
2025-08-18 09:16:14 -04:00
4 changed files with 6909 additions and 8 deletions

View File

@@ -18,16 +18,25 @@ def is_state_dict_likely_in_flux_diffusers_format(state_dict: Dict[str, torch.Te
# First, check that all keys end in "lora_A.weight" or "lora_B.weight" (i.e. are in PEFT format).
all_keys_in_peft_format = all(k.endswith(("lora_A.weight", "lora_B.weight")) for k in state_dict.keys())
# Next, check that this is likely a FLUX model by spot-checking a few keys.
expected_keys = [
# Check if keys use transformer prefix
transformer_prefix_keys = [
"transformer.single_transformer_blocks.0.attn.to_q.lora_A.weight",
"transformer.single_transformer_blocks.0.attn.to_q.lora_B.weight",
"transformer.transformer_blocks.0.attn.add_q_proj.lora_A.weight",
"transformer.transformer_blocks.0.attn.add_q_proj.lora_B.weight",
]
all_expected_keys_present = all(k in state_dict for k in expected_keys)
transformer_keys_present = all(k in state_dict for k in transformer_prefix_keys)
return all_keys_in_peft_format and all_expected_keys_present
# Check if keys use base_model.model prefix
base_model_prefix_keys = [
"base_model.model.single_transformer_blocks.0.attn.to_q.lora_A.weight",
"base_model.model.single_transformer_blocks.0.attn.to_q.lora_B.weight",
"base_model.model.transformer_blocks.0.attn.add_q_proj.lora_A.weight",
"base_model.model.transformer_blocks.0.attn.add_q_proj.lora_B.weight",
]
base_model_keys_present = all(k in state_dict for k in base_model_prefix_keys)
return all_keys_in_peft_format and (transformer_keys_present or base_model_keys_present)
def lora_model_from_flux_diffusers_state_dict(
@@ -49,8 +58,16 @@ def lora_layers_from_flux_diffusers_grouped_state_dict(
https://github.com/huggingface/diffusers/blob/55ac421f7bb12fd00ccbef727be4dc2f3f920abb/scripts/convert_flux_to_diffusers.py
"""
# Remove the "transformer." prefix from all keys.
grouped_state_dict = {k.replace("transformer.", ""): v for k, v in grouped_state_dict.items()}
# Determine which prefix is used and remove it from all keys.
# Check if any key starts with "base_model.model." prefix
has_base_model_prefix = any(k.startswith("base_model.model.") for k in grouped_state_dict.keys())
if has_base_model_prefix:
# Remove the "base_model.model." prefix from all keys.
grouped_state_dict = {k.replace("base_model.model.", ""): v for k, v in grouped_state_dict.items()}
else:
# Remove the "transformer." prefix from all keys.
grouped_state_dict = {k.replace("transformer.", ""): v for k, v in grouped_state_dict.items()}
# Constants for FLUX.1
num_double_layers = 19

View File

@@ -0,0 +1,766 @@
# A sample state dict in the Diffusers FLUX LoRA format with base_model.model prefix.
# These keys are based on the LoRA model in peft_adapter_model.safetensors
state_dict_keys = {
"base_model.model.proj_out.lora_A.weight": [4, 3072],
"base_model.model.proj_out.lora_B.weight": [64, 4],
"base_model.model.single_transformer_blocks.0.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.0.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.0.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.0.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.0.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.0.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.0.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.1.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.1.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.1.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.1.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.1.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.1.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.1.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.10.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.10.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.10.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.10.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.10.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.10.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.10.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.11.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.11.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.11.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.11.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.11.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.11.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.11.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.12.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.12.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.12.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.12.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.12.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.12.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.12.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.13.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.13.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.13.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.13.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.13.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.13.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.13.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.14.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.14.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.14.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.14.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.14.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.14.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.14.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.15.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.15.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.15.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.15.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.15.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.15.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.15.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.16.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.16.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.16.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.16.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.16.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.16.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.16.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.17.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.17.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.17.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.17.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.17.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.17.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.17.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.18.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.18.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.18.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.18.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.18.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.18.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.18.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.19.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.19.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.19.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.19.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.19.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.19.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.19.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.19.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.19.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.19.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.2.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.2.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.2.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.2.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.2.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.2.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.2.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.20.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.20.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.20.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.20.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.20.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.20.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.20.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.20.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.20.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.20.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.21.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.21.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.21.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.21.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.21.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.21.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.21.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.21.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.21.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.21.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.22.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.22.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.22.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.22.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.22.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.22.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.22.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.22.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.22.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.22.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.23.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.23.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.23.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.23.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.23.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.23.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.23.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.23.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.23.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.23.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.24.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.24.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.24.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.24.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.24.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.24.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.24.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.24.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.24.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.24.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.25.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.25.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.25.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.25.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.25.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.25.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.25.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.25.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.25.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.25.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.26.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.26.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.26.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.26.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.26.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.26.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.26.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.26.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.26.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.26.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.27.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.27.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.27.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.27.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.27.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.27.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.27.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.27.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.27.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.27.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.28.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.28.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.28.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.28.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.28.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.28.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.28.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.28.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.28.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.28.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.29.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.29.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.29.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.29.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.29.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.29.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.29.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.29.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.29.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.29.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.3.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.3.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.3.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.3.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.3.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.3.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.3.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.30.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.30.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.30.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.30.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.30.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.30.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.30.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.30.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.30.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.30.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.31.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.31.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.31.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.31.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.31.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.31.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.31.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.31.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.31.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.31.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.32.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.32.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.32.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.32.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.32.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.32.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.32.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.32.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.32.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.32.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.33.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.33.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.33.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.33.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.33.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.33.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.33.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.33.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.33.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.33.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.34.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.34.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.34.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.34.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.34.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.34.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.34.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.34.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.34.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.34.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.35.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.35.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.35.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.35.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.35.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.35.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.35.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.35.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.35.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.35.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.36.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.36.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.36.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.36.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.36.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.36.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.36.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.36.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.36.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.36.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.37.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.37.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.37.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.37.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.37.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.37.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.37.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.37.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.37.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.37.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.4.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.4.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.4.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.4.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.4.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.4.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.4.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.5.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.5.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.5.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.5.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.5.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.5.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.5.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.6.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.6.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.6.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.6.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.6.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.6.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.6.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.7.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.7.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.7.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.7.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.7.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.7.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.7.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.8.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.8.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.8.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.8.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.8.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.8.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.8.proj_out.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.9.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.9.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.9.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.single_transformer_blocks.9.proj_mlp.lora_A.weight": [4, 3072],
"base_model.model.single_transformer_blocks.9.proj_mlp.lora_B.weight": [12288, 4],
"base_model.model.single_transformer_blocks.9.proj_out.lora_A.weight": [4, 15360],
"base_model.model.single_transformer_blocks.9.proj_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.0.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.0.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.0.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.0.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.0.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.0.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.0.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.0.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.0.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.0.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.0.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.0.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.0.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.0.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.0.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.0.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.0.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.1.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.1.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.1.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.1.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.1.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.1.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.1.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.1.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.1.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.1.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.1.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.1.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.1.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.1.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.1.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.1.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.1.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.10.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.10.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.10.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.10.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.10.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.10.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.10.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.10.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.10.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.10.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.10.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.10.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.10.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.10.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.10.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.10.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.10.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.11.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.11.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.11.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.11.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.11.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.11.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.11.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.11.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.11.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.11.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.11.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.11.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.11.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.11.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.11.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.11.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.11.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.12.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.12.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.12.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.12.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.12.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.12.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.12.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.12.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.12.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.12.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.12.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.12.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.12.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.12.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.12.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.12.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.12.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.13.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.13.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.13.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.13.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.13.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.13.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.13.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.13.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.13.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.13.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.13.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.13.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.13.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.13.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.13.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.13.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.13.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.14.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.14.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.14.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.14.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.14.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.14.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.14.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.14.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.14.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.14.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.14.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.14.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.14.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.14.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.14.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.14.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.14.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.15.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.15.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.15.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.15.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.15.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.15.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.15.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.15.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.15.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.15.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.15.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.15.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.15.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.15.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.15.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.15.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.15.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.16.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.16.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.16.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.16.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.16.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.16.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.16.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.16.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.16.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.16.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.16.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.16.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.16.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.16.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.16.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.16.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.16.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.17.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.17.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.17.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.17.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.17.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.17.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.17.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.17.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.17.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.17.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.17.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.17.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.17.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.17.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.17.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.17.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.17.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.18.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.18.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.18.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.18.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.18.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.18.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.18.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.18.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.18.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.18.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.18.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.18.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.18.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.18.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.18.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.18.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.18.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.2.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.2.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.2.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.2.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.2.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.2.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.2.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.2.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.2.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.2.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.2.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.2.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.2.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.2.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.2.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.2.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.2.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.3.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.3.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.3.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.3.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.3.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.3.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.3.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.3.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.3.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.3.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.3.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.3.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.3.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.3.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.3.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.3.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.3.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.4.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.4.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.4.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.4.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.4.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.4.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.4.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.4.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.4.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.4.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.4.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.4.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.4.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.4.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.4.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.4.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.4.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.5.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.5.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.5.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.5.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.5.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.5.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.5.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.5.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.5.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.5.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.5.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.5.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.5.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.5.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.5.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.5.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.5.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.6.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.6.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.6.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.6.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.6.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.6.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.6.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.6.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.6.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.6.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.6.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.6.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.6.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.6.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.6.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.6.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.6.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.7.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.7.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.7.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.7.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.7.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.7.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.7.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.7.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.7.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.7.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.7.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.7.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.7.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.7.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.7.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.7.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.7.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.8.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.8.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.8.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.8.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.8.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.8.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.8.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.8.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.8.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.8.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.8.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.8.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.8.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.8.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.8.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.8.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.8.ff_context.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.9.attn.add_k_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.9.attn.add_k_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.9.attn.add_q_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.9.attn.add_q_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.9.attn.add_v_proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.9.attn.add_v_proj.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.9.attn.to_add_out.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.9.attn.to_add_out.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.9.attn.to_k.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.9.attn.to_out.0.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.9.attn.to_out.0.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.9.attn.to_q.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.9.attn.to_v.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 4],
"base_model.model.transformer_blocks.9.ff.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.9.ff.net.0.proj.lora_B.weight": [12288, 4],
"base_model.model.transformer_blocks.9.ff_context.net.0.proj.lora_A.weight": [4, 3072],
"base_model.model.transformer_blocks.9.ff_context.net.0.proj.lora_B.weight": [12288, 4],
}

View File

@@ -9,6 +9,9 @@ from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_L
from tests.backend.patches.lora_conversions.lora_state_dicts.flux_dora_onetrainer_format import (
state_dict_keys as flux_onetrainer_state_dict_keys,
)
from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_diffusers_base_model_format import (
state_dict_keys as flux_diffusers_base_model_state_dict_keys,
)
from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_diffusers_format import (
state_dict_keys as flux_diffusers_state_dict_keys,
)
@@ -21,7 +24,14 @@ from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_kohya_for
from tests.backend.patches.lora_conversions.lora_state_dicts.utils import keys_to_mock_state_dict
@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys])
@pytest.mark.parametrize(
"sd_keys",
[
flux_diffusers_state_dict_keys,
flux_diffusers_no_proj_mlp_state_dict_keys,
flux_diffusers_base_model_state_dict_keys,
],
)
def test_is_state_dict_likely_in_flux_diffusers_format_true(sd_keys: dict[str, list[int]]):
"""Test that is_state_dict_likely_in_flux_diffusers_format() can identify a state dict in the Diffusers FLUX LoRA format."""
# Construct a state dict that is in the Diffusers FLUX LoRA format.
@@ -41,7 +51,14 @@ def test_is_state_dict_likely_in_flux_diffusers_format_false(sd_keys: dict[str,
assert not is_state_dict_likely_in_flux_diffusers_format(state_dict)
@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys])
@pytest.mark.parametrize(
"sd_keys",
[
flux_diffusers_state_dict_keys,
flux_diffusers_no_proj_mlp_state_dict_keys,
flux_diffusers_base_model_state_dict_keys,
],
)
def test_lora_model_from_flux_diffusers_state_dict(sd_keys: dict[str, list[int]]):
"""Test that lora_model_from_flux_diffusers_state_dict() can load a state dict in the Diffusers FLUX LoRA format."""
# Construct a state dict that is in the Diffusers FLUX LoRA format.

File diff suppressed because it is too large Load Diff