mirror of
https://github.com/invoke-ai/InvokeAI.git
synced 2026-01-18 00:47:56 -05:00
Compare commits
1 Commits
controlnet
...
maryhipp/c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bbae964646 |
@@ -18,16 +18,25 @@ def is_state_dict_likely_in_flux_diffusers_format(state_dict: Dict[str, torch.Te
|
||||
# First, check that all keys end in "lora_A.weight" or "lora_B.weight" (i.e. are in PEFT format).
|
||||
all_keys_in_peft_format = all(k.endswith(("lora_A.weight", "lora_B.weight")) for k in state_dict.keys())
|
||||
|
||||
# Next, check that this is likely a FLUX model by spot-checking a few keys.
|
||||
expected_keys = [
|
||||
# Check if keys use transformer prefix
|
||||
transformer_prefix_keys = [
|
||||
"transformer.single_transformer_blocks.0.attn.to_q.lora_A.weight",
|
||||
"transformer.single_transformer_blocks.0.attn.to_q.lora_B.weight",
|
||||
"transformer.transformer_blocks.0.attn.add_q_proj.lora_A.weight",
|
||||
"transformer.transformer_blocks.0.attn.add_q_proj.lora_B.weight",
|
||||
]
|
||||
all_expected_keys_present = all(k in state_dict for k in expected_keys)
|
||||
transformer_keys_present = all(k in state_dict for k in transformer_prefix_keys)
|
||||
|
||||
return all_keys_in_peft_format and all_expected_keys_present
|
||||
# Check if keys use base_model.model prefix
|
||||
base_model_prefix_keys = [
|
||||
"base_model.model.single_transformer_blocks.0.attn.to_q.lora_A.weight",
|
||||
"base_model.model.single_transformer_blocks.0.attn.to_q.lora_B.weight",
|
||||
"base_model.model.transformer_blocks.0.attn.add_q_proj.lora_A.weight",
|
||||
"base_model.model.transformer_blocks.0.attn.add_q_proj.lora_B.weight",
|
||||
]
|
||||
base_model_keys_present = all(k in state_dict for k in base_model_prefix_keys)
|
||||
|
||||
return all_keys_in_peft_format and (transformer_keys_present or base_model_keys_present)
|
||||
|
||||
|
||||
def lora_model_from_flux_diffusers_state_dict(
|
||||
@@ -49,8 +58,16 @@ def lora_layers_from_flux_diffusers_grouped_state_dict(
|
||||
https://github.com/huggingface/diffusers/blob/55ac421f7bb12fd00ccbef727be4dc2f3f920abb/scripts/convert_flux_to_diffusers.py
|
||||
"""
|
||||
|
||||
# Remove the "transformer." prefix from all keys.
|
||||
grouped_state_dict = {k.replace("transformer.", ""): v for k, v in grouped_state_dict.items()}
|
||||
# Determine which prefix is used and remove it from all keys.
|
||||
# Check if any key starts with "base_model.model." prefix
|
||||
has_base_model_prefix = any(k.startswith("base_model.model.") for k in grouped_state_dict.keys())
|
||||
|
||||
if has_base_model_prefix:
|
||||
# Remove the "base_model.model." prefix from all keys.
|
||||
grouped_state_dict = {k.replace("base_model.model.", ""): v for k, v in grouped_state_dict.items()}
|
||||
else:
|
||||
# Remove the "transformer." prefix from all keys.
|
||||
grouped_state_dict = {k.replace("transformer.", ""): v for k, v in grouped_state_dict.items()}
|
||||
|
||||
# Constants for FLUX.1
|
||||
num_double_layers = 19
|
||||
|
||||
@@ -0,0 +1,766 @@
|
||||
# A sample state dict in the Diffusers FLUX LoRA format with base_model.model prefix.
|
||||
# These keys are based on the LoRA model in peft_adapter_model.safetensors
|
||||
state_dict_keys = {
|
||||
"base_model.model.proj_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.proj_out.lora_B.weight": [64, 4],
|
||||
"base_model.model.single_transformer_blocks.0.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.0.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.0.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.0.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.0.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.0.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.0.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.1.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.1.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.1.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.1.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.1.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.1.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.1.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.10.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.10.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.10.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.10.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.10.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.10.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.10.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.11.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.11.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.11.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.11.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.11.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.11.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.11.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.12.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.12.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.12.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.12.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.12.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.12.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.12.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.13.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.13.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.13.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.13.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.13.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.13.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.13.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.14.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.14.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.14.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.14.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.14.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.14.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.14.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.15.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.15.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.15.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.15.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.15.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.15.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.15.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.16.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.16.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.16.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.16.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.16.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.16.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.16.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.17.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.17.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.17.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.17.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.17.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.17.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.17.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.18.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.18.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.18.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.18.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.18.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.18.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.18.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.19.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.19.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.19.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.19.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.19.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.19.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.19.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.19.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.19.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.19.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.2.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.2.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.2.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.2.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.2.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.2.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.2.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.20.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.20.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.20.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.20.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.20.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.20.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.20.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.20.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.20.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.20.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.21.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.21.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.21.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.21.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.21.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.21.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.21.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.21.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.21.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.21.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.22.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.22.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.22.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.22.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.22.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.22.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.22.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.22.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.22.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.22.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.23.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.23.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.23.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.23.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.23.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.23.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.23.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.23.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.23.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.23.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.24.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.24.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.24.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.24.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.24.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.24.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.24.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.24.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.24.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.24.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.25.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.25.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.25.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.25.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.25.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.25.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.25.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.25.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.25.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.25.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.26.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.26.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.26.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.26.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.26.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.26.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.26.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.26.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.26.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.26.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.27.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.27.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.27.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.27.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.27.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.27.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.27.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.27.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.27.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.27.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.28.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.28.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.28.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.28.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.28.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.28.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.28.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.28.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.28.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.28.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.29.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.29.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.29.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.29.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.29.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.29.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.29.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.29.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.29.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.29.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.3.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.3.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.3.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.3.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.3.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.3.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.3.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.30.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.30.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.30.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.30.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.30.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.30.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.30.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.30.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.30.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.30.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.31.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.31.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.31.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.31.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.31.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.31.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.31.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.31.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.31.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.31.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.32.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.32.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.32.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.32.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.32.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.32.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.32.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.32.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.32.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.32.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.33.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.33.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.33.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.33.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.33.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.33.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.33.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.33.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.33.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.33.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.34.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.34.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.34.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.34.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.34.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.34.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.34.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.34.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.34.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.34.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.35.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.35.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.35.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.35.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.35.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.35.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.35.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.35.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.35.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.35.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.36.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.36.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.36.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.36.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.36.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.36.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.36.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.36.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.36.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.36.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.37.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.37.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.37.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.37.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.37.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.37.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.37.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.37.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.37.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.37.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.4.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.4.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.4.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.4.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.4.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.4.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.4.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.5.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.5.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.5.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.5.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.5.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.5.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.5.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.6.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.6.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.6.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.6.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.6.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.6.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.6.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.7.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.7.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.7.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.7.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.7.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.7.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.7.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.8.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.8.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.8.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.8.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.8.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.8.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.8.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.9.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.9.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.9.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.single_transformer_blocks.9.proj_mlp.lora_A.weight": [4, 3072],
|
||||
"base_model.model.single_transformer_blocks.9.proj_mlp.lora_B.weight": [12288, 4],
|
||||
"base_model.model.single_transformer_blocks.9.proj_out.lora_A.weight": [4, 15360],
|
||||
"base_model.model.single_transformer_blocks.9.proj_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.0.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.0.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.0.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.0.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.0.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.0.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.0.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.0.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.0.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.0.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.0.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.0.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.0.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.0.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.0.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.0.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.0.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.1.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.1.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.1.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.1.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.1.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.1.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.1.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.1.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.1.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.1.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.1.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.1.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.1.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.1.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.1.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.1.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.1.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.10.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.10.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.10.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.10.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.10.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.10.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.10.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.10.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.10.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.10.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.10.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.10.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.10.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.10.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.10.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.10.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.10.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.11.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.11.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.11.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.11.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.11.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.11.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.11.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.11.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.11.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.11.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.11.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.11.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.11.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.11.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.11.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.11.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.11.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.12.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.12.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.12.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.12.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.12.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.12.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.12.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.12.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.12.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.12.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.12.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.12.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.12.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.12.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.12.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.12.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.12.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.13.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.13.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.13.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.13.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.13.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.13.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.13.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.13.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.13.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.13.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.13.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.13.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.13.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.13.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.13.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.13.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.13.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.14.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.14.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.14.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.14.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.14.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.14.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.14.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.14.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.14.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.14.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.14.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.14.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.14.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.14.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.14.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.14.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.14.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.15.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.15.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.15.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.15.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.15.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.15.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.15.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.15.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.15.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.15.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.15.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.15.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.15.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.15.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.15.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.15.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.15.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.16.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.16.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.16.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.16.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.16.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.16.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.16.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.16.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.16.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.16.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.16.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.16.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.16.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.16.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.16.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.16.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.16.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.17.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.17.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.17.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.17.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.17.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.17.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.17.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.17.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.17.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.17.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.17.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.17.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.17.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.17.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.17.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.17.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.17.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.18.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.18.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.18.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.18.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.18.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.18.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.18.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.18.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.18.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.18.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.18.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.18.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.18.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.18.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.18.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.18.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.18.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.2.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.2.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.2.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.2.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.2.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.2.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.2.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.2.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.2.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.2.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.2.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.2.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.2.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.2.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.2.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.2.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.2.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.3.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.3.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.3.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.3.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.3.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.3.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.3.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.3.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.3.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.3.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.3.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.3.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.3.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.3.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.3.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.3.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.3.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.4.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.4.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.4.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.4.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.4.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.4.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.4.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.4.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.4.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.4.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.4.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.4.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.4.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.4.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.4.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.4.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.4.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.5.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.5.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.5.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.5.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.5.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.5.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.5.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.5.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.5.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.5.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.5.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.5.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.5.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.5.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.5.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.5.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.5.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.6.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.6.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.6.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.6.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.6.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.6.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.6.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.6.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.6.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.6.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.6.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.6.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.6.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.6.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.6.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.6.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.6.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.7.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.7.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.7.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.7.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.7.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.7.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.7.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.7.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.7.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.7.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.7.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.7.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.7.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.7.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.7.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.7.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.7.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.8.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.8.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.8.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.8.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.8.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.8.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.8.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.8.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.8.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.8.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.8.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.8.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.8.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.8.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.8.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.8.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.8.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.9.attn.add_k_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.9.attn.add_k_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.9.attn.add_q_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.9.attn.add_q_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.9.attn.add_v_proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.9.attn.add_v_proj.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.9.attn.to_add_out.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.9.attn.to_add_out.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.9.attn.to_k.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.9.attn.to_out.0.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.9.attn.to_out.0.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.9.attn.to_q.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.9.attn.to_v.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 4],
|
||||
"base_model.model.transformer_blocks.9.ff.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.9.ff.net.0.proj.lora_B.weight": [12288, 4],
|
||||
"base_model.model.transformer_blocks.9.ff_context.net.0.proj.lora_A.weight": [4, 3072],
|
||||
"base_model.model.transformer_blocks.9.ff_context.net.0.proj.lora_B.weight": [12288, 4],
|
||||
}
|
||||
@@ -9,6 +9,9 @@ from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_L
|
||||
from tests.backend.patches.lora_conversions.lora_state_dicts.flux_dora_onetrainer_format import (
|
||||
state_dict_keys as flux_onetrainer_state_dict_keys,
|
||||
)
|
||||
from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_diffusers_base_model_format import (
|
||||
state_dict_keys as flux_diffusers_base_model_state_dict_keys,
|
||||
)
|
||||
from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_diffusers_format import (
|
||||
state_dict_keys as flux_diffusers_state_dict_keys,
|
||||
)
|
||||
@@ -21,7 +24,14 @@ from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_kohya_for
|
||||
from tests.backend.patches.lora_conversions.lora_state_dicts.utils import keys_to_mock_state_dict
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys])
|
||||
@pytest.mark.parametrize(
|
||||
"sd_keys",
|
||||
[
|
||||
flux_diffusers_state_dict_keys,
|
||||
flux_diffusers_no_proj_mlp_state_dict_keys,
|
||||
flux_diffusers_base_model_state_dict_keys,
|
||||
],
|
||||
)
|
||||
def test_is_state_dict_likely_in_flux_diffusers_format_true(sd_keys: dict[str, list[int]]):
|
||||
"""Test that is_state_dict_likely_in_flux_diffusers_format() can identify a state dict in the Diffusers FLUX LoRA format."""
|
||||
# Construct a state dict that is in the Diffusers FLUX LoRA format.
|
||||
@@ -41,7 +51,14 @@ def test_is_state_dict_likely_in_flux_diffusers_format_false(sd_keys: dict[str,
|
||||
assert not is_state_dict_likely_in_flux_diffusers_format(state_dict)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys])
|
||||
@pytest.mark.parametrize(
|
||||
"sd_keys",
|
||||
[
|
||||
flux_diffusers_state_dict_keys,
|
||||
flux_diffusers_no_proj_mlp_state_dict_keys,
|
||||
flux_diffusers_base_model_state_dict_keys,
|
||||
],
|
||||
)
|
||||
def test_lora_model_from_flux_diffusers_state_dict(sd_keys: dict[str, list[int]]):
|
||||
"""Test that lora_model_from_flux_diffusers_state_dict() can load a state dict in the Diffusers FLUX LoRA format."""
|
||||
# Construct a state dict that is in the Diffusers FLUX LoRA format.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user