show llama bandwith with timing (#3844)

2026-01-10 15:38:29 -05:00 · 2024-03-20 17:19:15 -04:00
parent 7ff47e45a1
commit 9d1d08fbb0
1 changed files with 1 additions and 1 deletions
--- a/examples/llama.py
+++ b/examples/llama.py
@@ -422,7 +422,7 @@ After you are done speaking, output [EOS]. You are not Chad.
      if args.timing or args.profile: print("")
      st = GlobalCounters.time_sum_s
      with Profiling(enabled=args.profile):
-        with Timing("total ", enabled=args.timing, on_exit=lambda x: f", {1e9/x:.2f} tok/sec"):
+        with Timing("total ", enabled=args.timing, on_exit=lambda x: f", {1e9/x:.2f} tok/s, {GlobalCounters.global_mem/x:.2f} GB/s, param {param_count*2/x:.2f} GB/s"):
          with Timing("enqueue in ", on_exit=(lambda et: (f", {(GlobalCounters.time_sum_s-st)*1e3:.2f} ms on GPU" if DEBUG>=2 else "")+
                      f", {GlobalCounters.global_ops*1e-9:.2f} GOPS, {GlobalCounters.global_mem*1e-9:.2f} GB"+
                      (f", {GlobalCounters.global_mem*1e-9/(GlobalCounters.time_sum_s-st):.2f} GB/s, param {param_count*1e-9*2/(GlobalCounters.time_sum_s-st):.2f} GB/s" if DEBUG>=2 else "")) if DEBUG else None, enabled=args.timing):