log_kernel

This commit is contained in:
George Hotz
2023-02-10 21:51:53 -06:00
parent b9f02671d3
commit a4cb161bd4
3 changed files with 7 additions and 6 deletions

View File

@@ -86,6 +86,11 @@ class GlobalCounters:
cache : ClassVar[Optional[list]] = None
@staticmethod
def reset(): GlobalCounters.global_ops, GlobalCounters.global_mem, GlobalCounters.time_sum, GlobalCounters.kernel_count, GlobalCounters.cache = 0,0,0,0,None
@staticmethod
def log_kernel(op_estimate:int, mem_estimate:int):
GlobalCounters.kernel_count += 1
GlobalCounters.global_ops += op_estimate
GlobalCounters.global_mem += mem_estimate
# assumes you are using ShapeTracker
# used in GPUBuffer and LLVMBuffer

View File

@@ -26,6 +26,4 @@ class CLProgram:
global_size = [x//y for x,y in zip(global_size, local_size)]
if DEBUG >= 2: print("CUDA launch", global_size, local_size)
self.prg(*args, block=tuple(local_size), grid=tuple(global_size))
GlobalCounters.kernel_count += 1
GlobalCounters.global_ops += self.op_estimate
GlobalCounters.global_mem += self.mem_estimate
GlobalCounters.log_kernel(self.op_estimate, self.mem_estimate)

View File

@@ -91,7 +91,5 @@ class CLProgram:
if DEBUG >= 1:
print(f"**CL** {GlobalCounters.kernel_count:6d} {self.name:28s} args {len(args[2:]):5d} kernels {str(args[0]):18s} {str(args[1]):12s} OPs {self.op_estimate/1e6:7.1f}M/{GlobalCounters.global_ops/1e9:7.2f}G mem {CL.mem_used/1e9:5.2f} GB " +
(str() if DEBUG <= 1 or CL.CACHE is not None else f"tm {et/1e3:9.2f}us/{GlobalCounters.time_sum/1e6:9.2f}ms ({self.op_estimate/et:8.2f} GFLOPS)"))
GlobalCounters.kernel_count += 1
GlobalCounters.global_ops += self.op_estimate
GlobalCounters.global_mem += self.mem_estimate
GlobalCounters.log_kernel(self.op_estimate, self.mem_estimate)
return e if CL.CACHE is None else None