pmatmul example + GB/s bugfix [run_process_replay] (#5974)

* pmatmul example + bugfix

* improve pmatmul

* Update real_pmatmul.py
This commit is contained in:
George Hotz
2024-08-07 22:32:11 -07:00
committed by GitHub
parent c5baa3d66b
commit bc55c8a30e
2 changed files with 22 additions and 2 deletions

View File

@@ -0,0 +1,20 @@
import time
from tinygrad import Tensor, Device, TinyJit
from tinygrad.helpers import getenv
if __name__ == "__main__":
DEVS = [f"NV:{i}" for i in range(getenv("GPUS", 2))]
N = getenv("N", 8192)
A = Tensor.rand(N, N).shard(DEVS, 0).realize()
B = Tensor.rand(N, N).shard(DEVS, 1).realize()
print("***** MUL *****")
jmatmul = TinyJit(Tensor.dot)
for i in range(10):
Device["NV:0"].synchronize()
Device["NV:1"].synchronize()
st = time.perf_counter()
jmatmul(A, B)
Device["NV:0"].synchronize()
Device["NV:1"].synchronize()
et = time.perf_counter()
print(f"{(N*N*N*2*1e-12)/(et-st):.2f} TFLOPS")

View File

@@ -66,9 +66,9 @@ def get_kernel(renderer:Renderer, ast:LazyOp) -> Kernel:
# **************** Runners ****************
class Runner:
def __init__(self, display_name:str, dname:str, op_estimate:sint=0, mem_estimate:sint=0, lds_estimate:sint=0):
def __init__(self, display_name:str, dname:str, op_estimate:sint=0, mem_estimate:sint=0, lds_estimate:Optional[sint]=None):
self.first_run, self.display_name, self.dname, self.op_estimate, self.mem_estimate, self.lds_estimate = \
True, display_name, dname, op_estimate, mem_estimate, lds_estimate
True, display_name, dname, op_estimate, mem_estimate, mem_estimate if lds_estimate is None else lds_estimate
@property
def device(self): return Device[self.dname]
def exec(self, rawbufs:List[Buffer], var_vals:Optional[Dict[Variable, int]]=None) -> Optional[float]: