mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-06 21:53:53 -05:00
Handle tied embeddings for llama 3.2 1B (#13796)
Previously the output.weight layer would not be loaded, and would only contain randomly initialized values. This led to junk when doing a forward pass. Signed-off-by: Daniel Xu <daniel@thinkingmachines.ai>
This commit is contained in:
@@ -245,6 +245,11 @@ def convert_from_huggingface(weights:dict[str, Tensor], n_layers: int, n_heads:
|
||||
continue
|
||||
sd[keymap[k]] = v
|
||||
for k,v in experts.items(): sd[k] = Tensor.stack(*[v[i] for i in range(len(v))])
|
||||
|
||||
# Handle tied embeddings (e.g., Llama 3.2 1B Instruct where lm_head shares weights with embed_tokens)
|
||||
if "output.weight" not in sd and "tok_embeddings.weight" in sd:
|
||||
sd["output.weight"] = sd["tok_embeddings.weight"]
|
||||
|
||||
return sd
|
||||
|
||||
def convert_from_gguf(weights:dict[str, Tensor], n_layers:int):
|
||||
|
||||
Reference in New Issue
Block a user