Handle tied embeddings for llama 3.2 1B (#13796)

Previously the output.weight layer would not be loaded, and would only contain randomly initialized values. This led to junk when doing a forward pass. Signed-off-by: Daniel Xu <daniel@thinkingmachines.ai>
2026-01-06 21:53:53 -05:00 · 2025-12-22 13:31:40 -08:00
parent 7f1d41c9f9
commit 4edaaf19e5
1 changed files with 5 additions and 0 deletions
--- a/extra/models/llama.py
+++ b/extra/models/llama.py
@@ -245,6 +245,11 @@ def convert_from_huggingface(weights:dict[str, Tensor], n_layers: int, n_heads:
      continue
    sd[keymap[k]] = v
  for k,v in experts.items(): sd[k] = Tensor.stack(*[v[i] for i in range(len(v))])
+
+  # Handle tied embeddings (e.g., Llama 3.2 1B Instruct where lm_head shares weights with embed_tokens)
+  if "output.weight" not in sd and "tok_embeddings.weight" in sd:
+    sd["output.weight"] = sd["tok_embeddings.weight"]
+
  return sd

 def convert_from_gguf(weights:dict[str, Tensor], n_layers:int):