log_kernel

2026-01-24 06:18:01 -05:00 · 2023-02-10 21:51:53 -06:00
parent b9f02671d3
commit a4cb161bd4
3 changed files with 7 additions and 6 deletions
--- a/tinygrad/ops.py
+++ b/tinygrad/ops.py
@@ -86,6 +86,11 @@ class GlobalCounters:
  cache : ClassVar[Optional[list]] = None
  @staticmethod
  def reset(): GlobalCounters.global_ops, GlobalCounters.global_mem, GlobalCounters.time_sum, GlobalCounters.kernel_count, GlobalCounters.cache = 0,0,0,0,None
+  @staticmethod
+  def log_kernel(op_estimate:int, mem_estimate:int):
+    GlobalCounters.kernel_count += 1
+    GlobalCounters.global_ops += op_estimate
+    GlobalCounters.global_mem += mem_estimate

 # assumes you are using ShapeTracker
 # used in GPUBuffer and LLVMBuffer
--- a/tinygrad/runtime/cuda.py
+++ b/tinygrad/runtime/cuda.py
@@ -26,6 +26,4 @@ class CLProgram:
    global_size = [x//y for x,y in zip(global_size, local_size)]
    if DEBUG >= 2: print("CUDA launch", global_size, local_size)
    self.prg(*args, block=tuple(local_size), grid=tuple(global_size))
-    GlobalCounters.kernel_count += 1
-    GlobalCounters.global_ops += self.op_estimate
-    GlobalCounters.global_mem += self.mem_estimate
+    GlobalCounters.log_kernel(self.op_estimate, self.mem_estimate)
--- a/tinygrad/runtime/opencl.py
+++ b/tinygrad/runtime/opencl.py
@@ -91,7 +91,5 @@ class CLProgram:
    if DEBUG >= 1:
      print(f"**CL** {GlobalCounters.kernel_count:6d} {self.name:28s} args {len(args[2:]):5d}  kernels {str(args[0]):18s} {str(args[1]):12s} OPs {self.op_estimate/1e6:7.1f}M/{GlobalCounters.global_ops/1e9:7.2f}G  mem {CL.mem_used/1e9:5.2f} GB " +
            (str() if DEBUG <= 1 or CL.CACHE is not None else f"tm {et/1e3:9.2f}us/{GlobalCounters.time_sum/1e6:9.2f}ms ({self.op_estimate/et:8.2f} GFLOPS)"))
-    GlobalCounters.kernel_count += 1
-    GlobalCounters.global_ops += self.op_estimate
-    GlobalCounters.global_mem += self.mem_estimate
+    GlobalCounters.log_kernel(self.op_estimate, self.mem_estimate)
    return e if CL.CACHE is None else None