mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
benchmark single kernel launch (#8921)
* benchmark kernel launch * don't realize unneeded * faster * faster metal * fix mypy * without sync * no div 0 * lru cache that * no sync in the profile
This commit is contained in:
38
test/external/external_benchmark_kernel_launch.py
vendored
Normal file
38
test/external/external_benchmark_kernel_launch.py
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
import time
|
||||
from tinygrad import Tensor, TinyJit, Device, Context
|
||||
from tinygrad.helpers import Profiling, Timing, GlobalCounters
|
||||
|
||||
# python3 test/test_speed_v_torch.py TestSpeed.test_add_a
|
||||
|
||||
@TinyJit
|
||||
def plus(a:Tensor, b:Tensor): return a+b
|
||||
|
||||
if __name__ == "__main__":
|
||||
a = Tensor([1]).realize()
|
||||
b = Tensor([1]).realize()
|
||||
for i in range(5):
|
||||
with Timing(prefix=f"{i}:"):
|
||||
c = plus(a,b)
|
||||
Device[c.device].synchronize()
|
||||
assert c.item() == 2
|
||||
for i in range(5):
|
||||
st = time.perf_counter()
|
||||
c = plus(a,b)
|
||||
et = time.perf_counter() - st
|
||||
print(f"nosync {i}: {et*1e6:.2f} us")
|
||||
Device[c.device].synchronize()
|
||||
for i in range(5):
|
||||
st = time.perf_counter()
|
||||
c = plus(a,b)
|
||||
Device[c.device].synchronize()
|
||||
et = time.perf_counter() - st
|
||||
print(f"precise {i}: {et*1e6:.2f} us")
|
||||
assert GlobalCounters.time_sum_s == 0
|
||||
with Context(DEBUG=2):
|
||||
st = time.perf_counter()
|
||||
c = plus(a,b)
|
||||
Device[c.device].synchronize()
|
||||
et = time.perf_counter() - st
|
||||
print(f"kernel {GlobalCounters.time_sum_s*1e3:.2f} ms / full {et*1e3:.2f} ms -- {et/(GlobalCounters.time_sum_s+1e-12):.2f} x")
|
||||
with Profiling():
|
||||
c = plus(a,b)
|
||||
@@ -202,8 +202,12 @@ class TestSpeed(unittest.TestCase):
|
||||
def f(a, b): return (a*b).sum()
|
||||
helper_test_generic_square('mul_sum', 4096, f, f)
|
||||
|
||||
def test_add(self):
|
||||
for N in [1, 1024, 4096]:
|
||||
def test_add_a(self):
|
||||
def f(a, b): return a + b
|
||||
helper_test_generic_square('add', 1, f, f)
|
||||
|
||||
def test_add_big(self):
|
||||
for N in [1024, 4096]:
|
||||
def f(a, b): return a + b
|
||||
helper_test_generic_square('add', N, f, f)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user