From 4edaaf19e5598c6e5325ffae263ae470ec2db6d3 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Mon, 22 Dec 2025 13:31:40 -0800 Subject: [PATCH] Handle tied embeddings for llama 3.2 1B (#13796) Previously the output.weight layer would not be loaded, and would only contain randomly initialized values. This led to junk when doing a forward pass. Signed-off-by: Daniel Xu --- extra/models/llama.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/extra/models/llama.py b/extra/models/llama.py index e0ac6857ec..0448efd3c9 100644 --- a/extra/models/llama.py +++ b/extra/models/llama.py @@ -245,6 +245,11 @@ def convert_from_huggingface(weights:dict[str, Tensor], n_layers: int, n_heads: continue sd[keymap[k]] = v for k,v in experts.items(): sd[k] = Tensor.stack(*[v[i] for i in range(len(v))]) + + # Handle tied embeddings (e.g., Llama 3.2 1B Instruct where lm_head shares weights with embed_tokens) + if "output.weight" not in sd and "tok_embeddings.weight" in sd: + sd["output.weight"] = sd["tok_embeddings.weight"] + return sd def convert_from_gguf(weights:dict[str, Tensor], n_layers:int):