hotfix: fix dataset reading for new llm.c

2026-01-10 07:28:15 -05:00 · 2024-06-03 14:03:16 +02:00
parent 6e0c16dfb0
commit eecfdd2f6e
1 changed files with 3 additions and 2 deletions
--- a/examples/llm.c/train_gpt2.py
+++ b/examples/llm.c/train_gpt2.py
@@ -142,7 +142,8 @@ if __name__ == "__main__":
  assert os.path.isfile(tokens_bin)
  print(f"loading cached tokens in {tokens_bin}")
  with open(tokens_bin, "rb") as f:
-    tokens = np.frombuffer(f.read(), dtype=np.int32)
+    f.seek(0x400)
+    tokens = np.frombuffer(f.read(), dtype=np.uint16).astype(np.int32)
  tokens = Tensor(tokens)

  # lightweight dataloader
@@ -161,7 +162,7 @@ if __name__ == "__main__":
  # forward backward for a few iterations
  data_iter = iter(get_batch())
  x, y = next(data_iter) # we'll overfit this batch below
-  optimizer = nn.optim.Adam(nn.state.get_parameters(model), lr=1e-4)
+  optimizer = nn.optim.AdamW(nn.state.get_parameters(model), lr=1e-4, weight_decay=0)

  @TinyJit
  def step(x, y):