push copy to disk (#12348)

This commit is contained in:
wozeparrot
2025-09-29 21:55:05 -07:00
committed by GitHub
parent 881709cd33
commit 2a0caa09c2
3 changed files with 19 additions and 4 deletions

View File

@@ -134,8 +134,7 @@ class GPT2:
transposed = ('attn.c_attn.weight', 'attn.c_proj.weight', 'mlp.c_fc.weight', 'mlp.c_proj.weight')
for k in weights:
if k.endswith(transposed):
# TODO: it should not silently break without that .to(None)
weights[k] = weights[k].to(None).T
weights[k] = weights[k].T
# lm head and wte are tied
weights['lm_head.weight'] = weights['wte.weight']