From 4edaaf19e5598c6e5325ffae263ae470ec2db6d3 Mon Sep 17 00:00:00 2001
From: Daniel Xu <accounts@dxuuu.xyz>
Date: Mon, 22 Dec 2025 13:31:40 -0800
Subject: [PATCH] Handle tied embeddings for llama 3.2 1B (#13796)

Previously the output.weight layer would not be loaded, and would only
contain randomly initialized values. This led to junk when doing a
forward pass.

Signed-off-by: Daniel Xu <daniel@thinkingmachines.ai>
---
 extra/models/llama.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/extra/models/llama.py b/extra/models/llama.py
index e0ac6857ec..0448efd3c9 100644
--- a/extra/models/llama.py
+++ b/extra/models/llama.py
@@ -245,6 +245,11 @@ def convert_from_huggingface(weights:dict[str, Tensor], n_layers: int, n_heads:
       continue
     sd[keymap[k]] = v
   for k,v in experts.items(): sd[k] = Tensor.stack(*[v[i] for i in range(len(v))])
+
+  # Handle tied embeddings (e.g., Llama 3.2 1B Instruct where lm_head shares weights with embed_tokens)
+  if "output.weight" not in sd and "tok_embeddings.weight" in sd:
+    sd["output.weight"] = sd["tok_embeddings.weight"]
+
   return sd
 
 def convert_from_gguf(weights:dict[str, Tensor], n_layers:int):