mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-07 22:23:55 -05:00
improve microbenchmarks (#13492)
* improve microbenchmarks * bugfix + ubench * lil * no src in const method
This commit is contained in:
@@ -1,51 +1,99 @@
|
||||
import unittest, time
|
||||
from tinygrad import dtypes, Tensor, UOp, getenv
|
||||
from tinygrad.helpers import Profiling
|
||||
from tinygrad.uop.ops import UOp
|
||||
from tinygrad.dtype import dtypes
|
||||
|
||||
# it's about 1 ms per 1k UOps on M3
|
||||
N = 10000
|
||||
PYPROFILE = getenv("PYPROFILE")
|
||||
class TestBench(unittest.TestCase):
|
||||
@staticmethod
|
||||
def setUpClass():
|
||||
# no fixed cost
|
||||
Tensor.empty(10,10)
|
||||
Tensor.randn(10,10)
|
||||
|
||||
class TestMicrobenchmarks(unittest.TestCase):
|
||||
def start_time(self): self.st = time.perf_counter()
|
||||
def setUp(self):
|
||||
self.st = time.perf_counter()
|
||||
# it's about 1 ms per 1k UOps on M3
|
||||
if PYPROFILE:
|
||||
self.prof = Profiling()
|
||||
self.prof.__enter__()
|
||||
else:
|
||||
self.prof = None
|
||||
self.N = 10000
|
||||
self.start_time()
|
||||
|
||||
def tearDown(self):
|
||||
et = (time.perf_counter() - self.st)
|
||||
print(f"{self._testMethodName} {et*1e3:.2f} ms")
|
||||
if self.prof is not None: self.prof.__exit__()
|
||||
print(f"{self._testMethodName:30s} {et*1e6/self.N:.2f} us")
|
||||
|
||||
def test_uop_instant_creation(self):
|
||||
for i in range(N): UOp.const(dtypes.int, 100+i)
|
||||
for i in range(self.N): UOp.const(dtypes.int, 100+i)
|
||||
|
||||
def test_uop_list_creation(self):
|
||||
[UOp.const(dtypes.int, 100+i) for i in range(N)]
|
||||
[UOp.const(dtypes.int, 100+i) for i in range(self.N)]
|
||||
|
||||
def test_uop_add_2n(self):
|
||||
a = UOp.const(dtypes.int, 2)
|
||||
for _ in range(N): a = a + a
|
||||
for _ in range(self.N): a = a + a
|
||||
|
||||
def test_uop_toposort(self):
|
||||
a = UOp.const(dtypes.int, 0)
|
||||
for i in range(N): a = a + UOp.const(dtypes.int, 100+i)
|
||||
self.setUp()
|
||||
self.assertEqual(len(a.toposort()), 2*N+1)
|
||||
for i in range(self.N): a = a + UOp.const(dtypes.int, 100+i)
|
||||
self.start_time()
|
||||
self.assertEqual(len(a.toposort()), 2*self.N+1)
|
||||
|
||||
def test_uop_toposort_2n(self):
|
||||
a = UOp.const(dtypes.int, 0)
|
||||
for i in range(N): a = a + a
|
||||
self.setUp()
|
||||
self.assertEqual(len(a.toposort()), N+1)
|
||||
for _ in range(self.N): a = a + a
|
||||
self.start_time()
|
||||
self.assertEqual(len(a.toposort()), self.N+1)
|
||||
|
||||
def test_uop_simplify(self):
|
||||
a = UOp.const(dtypes.int, 2)
|
||||
for _ in range(N): (a+a).simplify()
|
||||
for _ in range(self.N): (a+a).simplify()
|
||||
|
||||
class TestMicroprofile(unittest.TestCase):
|
||||
def test_uop_simplify_complex(self):
|
||||
self.N //= 10 # this test is slow
|
||||
x = UOp.variable("x", 0, 10)
|
||||
y = UOp.variable("y", 0, 10)
|
||||
expr = (x*2)+5+(x*4)+(y*2)+y
|
||||
with Profiling():
|
||||
for _ in range(1000): expr.simplify()
|
||||
for _ in range(self.N): expr.simplify()
|
||||
|
||||
def test_uop_simplify_div(self):
|
||||
self.N //= 10 # this test is slow
|
||||
x = UOp.variable("x", 0, 10)
|
||||
y = UOp.variable("y", 0, 10)
|
||||
z = UOp.variable("z", 0, 10)
|
||||
expr = (x*4+y*8)//(z*2)
|
||||
for _ in range(self.N): expr.simplify()
|
||||
|
||||
def test_uop_chain_free(self):
|
||||
a = UOp.const(dtypes.int, 2)
|
||||
for _ in range(self.N): a = a + a
|
||||
self.start_time()
|
||||
del a
|
||||
|
||||
def test_tensor_zeros(self):
|
||||
self.N //= 10 # this test is slow
|
||||
for _ in range(self.N): Tensor.zeros(10, 10)
|
||||
|
||||
def test_tensor_add(self):
|
||||
self.N //= 10 # this test is slow
|
||||
a = Tensor.zeros(10, 10)
|
||||
b = Tensor.zeros(10, 10)
|
||||
for _ in range(self.N): a+b
|
||||
|
||||
def test_tensor_empty(self):
|
||||
self.N //= 10 # this test is slow
|
||||
for _ in range(self.N): Tensor.empty(10, 10)
|
||||
|
||||
def test_tensor_rand(self):
|
||||
self.N //= 100 # this test is very slow
|
||||
for _ in range(self.N): Tensor.rand(10, 10)
|
||||
|
||||
def test_tensor_randn(self):
|
||||
self.N //= 100 # this test is very slow
|
||||
for _ in range(self.N): Tensor.randn(10, 10)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -108,7 +108,7 @@ class dtypes:
|
||||
def is_float(x: DType) -> bool: return x.scalar() in dtypes.floats or isinstance(x, ImageDType)
|
||||
@staticmethod # static methods on top, or bool in the type info will refer to dtypes.bool
|
||||
@functools.cache
|
||||
def is_int(x: DType) -> bool: return x.scalar() in dtypes.ints + (dtypes.index,)
|
||||
def is_int(x: DType) -> bool: return x.scalar() in dtypes.index_like
|
||||
@staticmethod
|
||||
@functools.cache
|
||||
def is_unsigned(x: DType) -> bool: return x.scalar() in dtypes.uints
|
||||
@@ -185,6 +185,7 @@ class dtypes:
|
||||
uints = (uint8, uint16, uint32, uint64)
|
||||
sints = (int8, int16, int32, int64)
|
||||
ints = uints + sints
|
||||
index_like = ints + (index,)
|
||||
all = floats + ints + (bool, index) # noqa: A003
|
||||
|
||||
if (env_default_float := getenv("DEFAULT_FLOAT", "")):
|
||||
|
||||
@@ -429,12 +429,14 @@ class UOp(OpMixin, metaclass=UOpMetaClass):
|
||||
if op in {Ops.CMPLT, Ops.CMPNE, Ops.CMPEQ}: out_dtype = dtypes.bool.vec(out_dtype.count) if out_dtype.count > 1 else dtypes.bool
|
||||
return UOp(op, out_dtype, (self,)+src, **kwargs)
|
||||
@staticmethod
|
||||
def const(dtype:DType, b:ConstLike, device:str|tuple[str, ...]|None=None, shape:tuple[sint, ...]|None=None, src=None, unique:bool|int=False):
|
||||
def const(dtype:DType, b:ConstLike, device:str|tuple[str, ...]|None=None, shape:tuple[sint, ...]|None=None, unique:bool|int=False):
|
||||
if isinstance(b, UOp): return b.unbind()[0] if b.op is Ops.BIND else b
|
||||
if isinstance(b, tuple) and all_same(b): b = b[0] # doesn't have to be a VCONST if they are all the same
|
||||
if isinstance(b, tuple) and all_same(b):
|
||||
assert len(b) > 0, "can't create const from empty tuple"
|
||||
b = b[0] # doesn't have to be a VCONST if they are all the same
|
||||
# NOTE: float('nan') != float('nan'), so we canonicalize here
|
||||
if isinstance(b, float) and math.isnan(b): b = math.nan
|
||||
ret = UOp(Ops.VCONST if isinstance(b, tuple) else Ops.CONST, dtype, arg=dtypes.as_const(b, dtype), src=() if src is None else (src,))
|
||||
ret = UOp(Ops.VCONST if isinstance(b, tuple) else Ops.CONST, dtype, arg=dtypes.as_const(b, dtype))
|
||||
if device is not None:
|
||||
if unique or not isinstance(unique, bool): ret = ret.replace(src=(UOp(Ops.DEVICE, arg=device), UOp.unique(None if unique is True else unique)))
|
||||
else: ret = ret.replace(src=(UOp(Ops.DEVICE, arg=device),))
|
||||
@@ -572,7 +574,7 @@ class UOp(OpMixin, metaclass=UOpMetaClass):
|
||||
else: usrcs.append(UOp(Ops.VECTORIZE, dtypes.index.vec(len(arg)), tuple(UOp.const(dtypes.index, x) if isinstance(x, int) else x for x in arg)))
|
||||
if len(usrcs) == 0: ret = UOp(op, self.dtype, (self,), arg)
|
||||
else: ret = UOp(op, self.dtype, (self,)+UOp.sink(*usrcs).simplify().src)
|
||||
# for all movement ops, we check shape property
|
||||
# for all movement ops, we check shape property to validity check the movement op
|
||||
if ret.shape == self.shape and same_shape_noop: return self
|
||||
return ret
|
||||
|
||||
|
||||
@@ -255,7 +255,8 @@ symbolic = symbolic_simple+commutative+PatternMatcher([
|
||||
# after with 1 src is just src[0]
|
||||
(UPat(Ops.AFTER, src=(UPat.var("s"),)), lambda s: s),
|
||||
# VECTORIZE/CONST
|
||||
(UPat(Ops.VECTORIZE, src=UPat(Ops.CONST), name="vec"), lambda vec: UOp.const(vec.dtype, tuple(x.arg for x in vec.src))),
|
||||
(UPat(Ops.VECTORIZE, src=UPat(Ops.CONST), name="vec"),
|
||||
lambda vec: UOp.const(vec.dtype, tuple(x.arg for x in vec.src)) if len(vec.src) > 0 else None),
|
||||
])+div_and_mod_symbolic+gep_pushing
|
||||
|
||||
# ******** we take a small aside to "simplify_valid" to rewrite valids ********
|
||||
|
||||
Reference in New Issue
Block a user