correct llm.py mem bw benchmark for moe (#14626)

only count active experts. verified on olmoe
2026-04-29 03:00:14 -04:00 · 2026-02-20 16:11:22 -05:00
parent aa905db7f7
commit 86e7804d60
1 changed files with 4 additions and 0 deletions
--- a/tinygrad/apps/llm.py
+++ b/tinygrad/apps/llm.py
@@ -340,6 +340,10 @@ if __name__ == "__main__":
  # do benchmark
  if args.benchmark:
    param_bytes = sum(x.nbytes() for x in nn.state.get_parameters(model))
+    for b in model.blk:
+      if hasattr(b, 'ffn_gate_exps'):
+        expert_bytes = b.ffn_gate_exps.weight.nbytes() + b.ffn_up_exps.weight.nbytes() + b.ffn_down_exps.weight.nbytes()
+        param_bytes -= int(expert_bytes * (1 - b.num_experts_per_tok / b.ffn_gate_exps.weight.shape[0]))
    gen = model.generate([0], 0)
    for _ in range(args.benchmark):
      GlobalCounters.reset()