mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-08 22:48:25 -05:00
pmatmul example + GB/s bugfix [run_process_replay] (#5974)
* pmatmul example + bugfix * improve pmatmul * Update real_pmatmul.py
This commit is contained in:
20
extra/gemm/real_pmatmul.py
Normal file
20
extra/gemm/real_pmatmul.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import time
|
||||
from tinygrad import Tensor, Device, TinyJit
|
||||
from tinygrad.helpers import getenv
|
||||
|
||||
if __name__ == "__main__":
|
||||
DEVS = [f"NV:{i}" for i in range(getenv("GPUS", 2))]
|
||||
N = getenv("N", 8192)
|
||||
A = Tensor.rand(N, N).shard(DEVS, 0).realize()
|
||||
B = Tensor.rand(N, N).shard(DEVS, 1).realize()
|
||||
print("***** MUL *****")
|
||||
jmatmul = TinyJit(Tensor.dot)
|
||||
for i in range(10):
|
||||
Device["NV:0"].synchronize()
|
||||
Device["NV:1"].synchronize()
|
||||
st = time.perf_counter()
|
||||
jmatmul(A, B)
|
||||
Device["NV:0"].synchronize()
|
||||
Device["NV:1"].synchronize()
|
||||
et = time.perf_counter()
|
||||
print(f"{(N*N*N*2*1e-12)/(et-st):.2f} TFLOPS")
|
||||
@@ -66,9 +66,9 @@ def get_kernel(renderer:Renderer, ast:LazyOp) -> Kernel:
|
||||
# **************** Runners ****************
|
||||
|
||||
class Runner:
|
||||
def __init__(self, display_name:str, dname:str, op_estimate:sint=0, mem_estimate:sint=0, lds_estimate:sint=0):
|
||||
def __init__(self, display_name:str, dname:str, op_estimate:sint=0, mem_estimate:sint=0, lds_estimate:Optional[sint]=None):
|
||||
self.first_run, self.display_name, self.dname, self.op_estimate, self.mem_estimate, self.lds_estimate = \
|
||||
True, display_name, dname, op_estimate, mem_estimate, lds_estimate
|
||||
True, display_name, dname, op_estimate, mem_estimate, mem_estimate if lds_estimate is None else lds_estimate
|
||||
@property
|
||||
def device(self): return Device[self.dname]
|
||||
def exec(self, rawbufs:List[Buffer], var_vals:Optional[Dict[Variable, int]]=None) -> Optional[float]:
|
||||
|
||||
Reference in New Issue
Block a user