mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
* LazyBuffer = UOp * try 4 at this diff * skip optimization tests p1 * raise kernel count expectations * BIND isn't the _only_ uop that can become a tensor * fix test_ones_sum on symbolic * bump openpilot, correctness first * offset on assign is fine * uop is immutable * what if this was higher * more optimization skips * instant fold const copy * test_multitensor shouldn't expect buffer for unrealized * move copy folder to upats * start BUFFER_VIEW * kinda BUFFER_VIEW * Revert "kinda BUFFER_VIEW" This reverts commit94b4fe3040. * BUFFER_VIEW try 2 * linter and missed _device * pylint * keep Ops.CONTIGUOUS * always BUFFER_VIEW disk * test * cpu isn't a real device * buffer references afte del * add that back * start bringing some of these back * more test updates * simpler simplify copy * subbufer everything * this is fine with buffer view * cleanup the diff in test/ 1 * copy is one thing * diff pruning * diff pruning 2 * oh bind unbinds way too early * extra * more diff pruning * more const folding * experiment with symbolic here * Revert "experiment with symbolic here" This reverts commitcb87d61f7a. * Revert "more const folding" This reverts commit2a7d258a2b. * Revert VALID early folding This reverts commit4074f52317. * storing const is fine * fix test_prefer_half_buffer * iterate on test_real_world * this fixes test_train_mnist memory, breaks everything else * Revert "this fixes test_train_mnist memory, breaks everything else" This reverts commitdccfcbe068. * always expect buffer to exist here * temp debug: something is mutating lazydata in compile3 * Revert "temp debug: something is mutating lazydata in compile3" This reverts commit71400f0d55. * everything back to normal * compile3 * compile3 test * start captured jit work, that test passes * finalized memory skip set * linter err * back to base here * tiny metaop cleanup * print tensor * 4th type this unbind got me * green pickle * tensor_variable sanity * cast sanity * link from the reds * COPY sanity + minor repr change * you can exist * enable test_winograd * bye bye nbytes * danger, uop is mutating * real become * delete those from uop init * put it in buffer init * buffer inits with so much stuff * buffer pickle try 2 * toposort can't be a cached property * fix test_schedule_gc_with_inputs * remove all @unittest.skip(gc) * Revert "remove all @unittest.skip(gc)" This reverts commit9d8d92dd85. * reenable real world + test_schedule_gc * test: RUN_PROCESS_REPLAY=0 * fix pickle jit * test changes * reenable test_lru_alloc and TestTrain * fix imagedtype * bring pr back * reenable 3 gc tests * test_schedule better diff * disable SPLIT_REDUCEOP * test_save_all_dtypes looks fixed * fix metadata * skip that one * fix viz by not pickling buffers * simple test for const folding * bring split reduceop back * add simplify_alu * simplify_binop fixes a test * fix cast folding * disable that test * that test looks fine * changes from delete_lazy pruning p1 * cast folding and children base * test: cast folding from pruning branch * green test_sgd_4convs_fuse_conv_bw * enable some indexing folding * test_complex_backward is fixed * prune more, 295 -> 233 * fix test_multi_const_folding_literal * fix double copy * early become test * ooooops * clean up ctx in all big_graph * fix openpilot 208 kernels * train_cifar is fine now * fix CAST_BEFORE_VIEW * ever faker const * back to 13 * mark expectedFailure * fine don't create them * test_multi_const_folding_tensor --------- Co-authored-by: George Hotz <geohot@gmail.com> Co-authored-by: George Hotz <72895+geohot@users.noreply.github.com>
306 lines
13 KiB
Python
306 lines
13 KiB
Python
import unittest, math
|
|
from tinygrad import Tensor, Device, dtypes
|
|
from tinygrad.ops import Ops
|
|
from tinygrad.engine.schedule import create_schedule
|
|
from tinygrad.helpers import CI
|
|
import numpy as np
|
|
from tinygrad.device import is_dtype_supported
|
|
|
|
def _check_ast_count(desired_count:int, t:Tensor):
|
|
# NOTE: this has side effect because everything can be scheduled only once
|
|
schedule = create_schedule(t.lazydata.lbs)
|
|
asts = [s for s in schedule if s.ast.op is Ops.SINK]
|
|
assert len(asts) == desired_count, f"{len(asts)} != {desired_count}"
|
|
|
|
class TestUnaryOpsConstFolding(unittest.TestCase):
|
|
def test_all_consts_ops(self):
|
|
_check_ast_count(0, Tensor.ones(4).exp())
|
|
_check_ast_count(0, Tensor.ones(4).sqrt())
|
|
_check_ast_count(0, Tensor.ones(4) + Tensor.ones(4))
|
|
_check_ast_count(0, Tensor.ones(4) / Tensor.ones(4))
|
|
|
|
def test_cast(self):
|
|
_check_ast_count(0, Tensor.ones(4).cast(dtypes.int16))
|
|
_check_ast_count(0, Tensor.full(4, fill_value=-1).cast(dtypes.uint16))
|
|
|
|
@unittest.expectedFailure # no two level fold at lazybuffer
|
|
def test_neg_folding(self):
|
|
_check_ast_count(0, Tensor([1, 2, 3]).mul(-1).neg())
|
|
_check_ast_count(0, Tensor([1, 2, 3]).neg().mul(-1))
|
|
_check_ast_count(0, Tensor([1, 2, 3]).neg().neg())
|
|
|
|
def test_neg_realized_no_fold(self):
|
|
x = Tensor.randn(32, 32)
|
|
x = x.clip(0, 1).realize()
|
|
_check_ast_count(1, x.neg())
|
|
|
|
class TestBinaryOpsConstFolding(unittest.TestCase):
|
|
def test_add_literal_zero(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) + 0)
|
|
def test_add_tensor_zero(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) + Tensor.zeros(4))
|
|
def test_literal_zero_add(self):
|
|
_check_ast_count(0, 0 + Tensor([1.0, 2, 3, 4]))
|
|
def test_tensor_zero_add(self):
|
|
_check_ast_count(0, Tensor.zeros(4) + Tensor([1.0, 2, 3, 4]))
|
|
|
|
def test_sub_literal_zero(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) - 0)
|
|
def test_sub_tensor_zero(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) - Tensor.zeros(4))
|
|
|
|
def test_mul_literal_zero(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) * 0)
|
|
def test_mul_tensor_zero(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) * Tensor.zeros(4))
|
|
def test_literal_zero_mul(self):
|
|
_check_ast_count(0, 0 * Tensor([1.0, 2, 3, 4]) * 0)
|
|
def test_tensor_zero_mul(self):
|
|
_check_ast_count(0, Tensor.zeros(4) * Tensor([1.0, 2, 3, 4]))
|
|
|
|
def test_mul_literal_one(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) * 1)
|
|
def test_mul_tensor_one(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) * Tensor.ones(4))
|
|
def test_literal_one_mul(self):
|
|
_check_ast_count(0, 1 * Tensor([1.0, 2, 3, 4]))
|
|
def test_tensor_one_mul(self):
|
|
_check_ast_count(0, Tensor.ones(4) * Tensor([1.0, 2, 3, 4]))
|
|
|
|
def test_bool_tensor_mul_bool(self):
|
|
_check_ast_count(0, Tensor([True, False]) * True)
|
|
_check_ast_count(0, Tensor([True, False]) * False)
|
|
def test_bool_mul_bool_tensor(self):
|
|
_check_ast_count(0, True * Tensor([True, False]))
|
|
_check_ast_count(0, False * Tensor([True, False]))
|
|
|
|
def test_div_literal_one(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) / 1)
|
|
def test_div_tensor_one(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) / Tensor.ones(4))
|
|
|
|
def test_idiv_literal_one(self):
|
|
_check_ast_count(0, Tensor([1, 2, 3, 4]) // 1)
|
|
def test_idiv_tensor_one(self):
|
|
_check_ast_count(0, Tensor([1, 2, 3, 4]) // Tensor.ones(4, dtype=dtypes.int32))
|
|
|
|
def test_pow_literal_zero(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) ** 0)
|
|
def test_pow_tensor_zero(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) ** Tensor.zeros(4))
|
|
|
|
def test_pow_literal_one(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) ** 1)
|
|
def test_pow_tensor_one(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) ** Tensor.ones(4))
|
|
def test_literal_one_pow(self):
|
|
_check_ast_count(0, 1 ** Tensor([1.0, 2, 3, 4]))
|
|
def test_tensor_one_pow(self):
|
|
_check_ast_count(0, Tensor.ones(4) ** Tensor([1.0, 2, 3, 4]))
|
|
|
|
# folds advance indexing into basic indexing
|
|
class TestIndexingConstFolding(unittest.TestCase):
|
|
def test_scalar_index(self):
|
|
t = Tensor.arange(16).float().reshape(1,1,4,4).realize()
|
|
_check_ast_count(0, t[:,:,Tensor(1),:])
|
|
# NOTE: this is no longer supported because the 1+2 isn't folding early.
|
|
#_check_ast_count(0, t[:,:,Tensor(1)+2,:])
|
|
_check_ast_count(0, t[:,:,Tensor(1),Tensor(0)])
|
|
|
|
@unittest.expectedFailure
|
|
def test_const_tensor_index(self):
|
|
# TODO: implement const tensor folded indexing
|
|
t = Tensor.arange(16).float().reshape(1,1,4,4).realize()
|
|
_check_ast_count(0, t[:,:,Tensor.ones(2,1),:])
|
|
_check_ast_count(0, t[:,:,Tensor.ones(1,2)+2,:])
|
|
_check_ast_count(0, t[:,:,Tensor.ones(1,1),Tensor.zeros(2,1,2)])
|
|
|
|
class TestMovedConstFolding(unittest.TestCase):
|
|
def test_add_shrunk_zero(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) + Tensor.zeros(6).shrink(((1, 5),)))
|
|
|
|
def test_add_padded_zero(self):
|
|
# TODO: it's 1 now, this might be possible to fold
|
|
_check_ast_count(1, Tensor([1.0, 2, 3, 4]) + Tensor.zeros(2).pad(((1, 1),)))
|
|
|
|
def test_mul_shrunk_one(self):
|
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) * Tensor.ones(6).shrink(((1, 5),)))
|
|
|
|
def test_add_padded_one(self):
|
|
_check_ast_count(1, Tensor([1.0, 2, 3, 4]) * Tensor.ones(2).pad(((1, 1),)))
|
|
|
|
def test_cast_padded(self):
|
|
# NOTE: this is folded due to CAST_BEFORE_VIEW
|
|
if is_dtype_supported(dtypes.int16):
|
|
_check_ast_count(0, Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int16))
|
|
np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int16).numpy(), [0, 1, 1, 1, 1, 0])
|
|
if is_dtype_supported(dtypes.uint16):
|
|
_check_ast_count(0, Tensor.full(4, fill_value=-1).pad(((1, 1),)).cast(dtypes.uint16))
|
|
np.testing.assert_equal(Tensor.full(4, fill_value=-1).pad(((1, 1),)).cast(dtypes.uint16).numpy(), [0, 65535, 65535, 65535, 65535, 0])
|
|
# not folded
|
|
if is_dtype_supported(dtypes.int64):
|
|
_check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int64))
|
|
np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int64).numpy(), [0, 1, 1, 1, 1, 0])
|
|
|
|
class TestReduceOpsConstFolding(unittest.TestCase):
|
|
def test_const_sum(self):
|
|
_check_ast_count(0, Tensor.ones(4, 5, 6).sum())
|
|
np.testing.assert_equal(Tensor.ones(4, 5, 6).sum().numpy(), 4 * 5 * 6)
|
|
_check_ast_count(0, Tensor.ones(4, 5, 6).sum(axis=0))
|
|
np.testing.assert_equal(Tensor.ones(4, 5, 6).sum(axis=0).numpy(), np.full((5, 6), 4))
|
|
_check_ast_count(0, Tensor(4).sum())
|
|
np.testing.assert_equal(Tensor(4).sum().numpy(), 4)
|
|
|
|
def test_padded_const_sum(self):
|
|
_check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).sum())
|
|
np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).sum().numpy(), 4)
|
|
|
|
# NOTE: cannot just count the non-padded area because some Ops f do not have f(0) = 0.
|
|
_check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).exp().sum())
|
|
np.testing.assert_allclose(Tensor.ones(4).pad(((1, 1),)).exp().sum().numpy(), 4 * math.e + 2)
|
|
|
|
def test_bool_zero_max(self):
|
|
_check_ast_count(0, Tensor.full((1, 2), True).shrink(((0, 1), (0, 0))).max((1, 0)))
|
|
np.testing.assert_equal(Tensor.full((1, 2), True).shrink(((0, 1), (0, 0))).max((1, 0)).numpy(), False)
|
|
|
|
def test_zero_size_ops(self):
|
|
for reduceop in [lambda x:x.prod(), lambda x:x.sum()]: # lambda x:x.max() NOTE: numpy gives "reduction operation maximum which has no identity"
|
|
_check_ast_count(0, reduceop(Tensor.empty(1, 0)))
|
|
np.testing.assert_equal(reduceop(Tensor.empty(shape:=(1, 0))).numpy(), reduceop(np.empty(shape)))
|
|
|
|
def test_zero_size_ops_view(self):
|
|
for reduceop in [lambda x:x.prod(), lambda x:x.sum()]:
|
|
_check_ast_count(0, reduceop(Tensor.empty(1, 0, 4).permute((1, 2, 0)).contiguous()))
|
|
np.testing.assert_equal(reduceop(Tensor.empty(shape:=(1, 0))).numpy(), reduceop(np.empty((shape))))
|
|
|
|
def test_zero_size_ops_realized(self):
|
|
for reduceop in [lambda x:x.prod(), lambda x:x.sum()]:
|
|
_check_ast_count(0, reduceop((Tensor.randn(0, 1)+1).realize()))
|
|
np.testing.assert_equal(reduceop((Tensor.randn(shape:=(0, 1))+1).realize()).numpy(), reduceop(np.empty(shape)))
|
|
|
|
def test_zero_size_realize_folded(self):
|
|
# non contiguous folded output doesn't realize
|
|
_check_ast_count(0, Tensor.empty(1, 0).sum())
|
|
# contiguous folded const can still schedule
|
|
a = Tensor.empty(1, 0).sum().contiguous()
|
|
_check_ast_count(2, a+2)
|
|
self.assertIsNotNone(a.lazydata.base.realized)
|
|
np.testing.assert_equal((Tensor.empty(1, 0).sum().contiguous()+2).numpy(), 2)
|
|
# otherwise we just fuse it
|
|
_check_ast_count(1, (Tensor.empty(1, 0).sum()+2).contiguous())
|
|
np.testing.assert_equal((Tensor.empty(1, 0).sum()+2).numpy(), 2)
|
|
|
|
def test_const_prod(self):
|
|
_check_ast_count(0, Tensor.full((2, 3), fill_value=2).prod())
|
|
np.testing.assert_equal(Tensor.full((2, 3), fill_value=2).prod().numpy(), 2**(2*3))
|
|
_check_ast_count(0, Tensor.full((4, 5, 6), fill_value=2).prod(axis=0))
|
|
np.testing.assert_equal(Tensor.full((4, 5, 6), fill_value=2).prod(axis=0).numpy(), np.full((5, 6), 2**4))
|
|
_check_ast_count(0, Tensor(4).prod())
|
|
np.testing.assert_equal(Tensor(4).prod().numpy(), 4)
|
|
|
|
def test_const_max(self):
|
|
_check_ast_count(0, Tensor.ones(4, 5, 6).max())
|
|
np.testing.assert_equal(Tensor.ones(4, 5, 6).max().numpy(), 1)
|
|
_check_ast_count(0, Tensor(4).max())
|
|
np.testing.assert_equal(Tensor(4).max().numpy(), 4)
|
|
|
|
def test_sum_output_dtype(self):
|
|
# sum output dtype can be different from input
|
|
for dt in dtypes.fields().values():
|
|
if is_dtype_supported(dt):
|
|
t = Tensor.ones(16, dtype=dt).reshape(4, 4)
|
|
assert t.sum().dtype == t.contiguous().sum().dtype
|
|
|
|
@unittest.skipIf(CI and Device.DEFAULT in {"GPU", "CUDA", "METAL"}, "no GPU CI")
|
|
class TestMultiConstFolding(unittest.TestCase):
|
|
def test_multi_const_folding_literal(self):
|
|
ds = tuple(f"{Device.DEFAULT}:{i}" for i in range(4))
|
|
t = Tensor.arange(16).float().realize().to(ds)
|
|
|
|
# non const folding case creates one ast on each shard
|
|
_check_ast_count(4, t + 1)
|
|
_check_ast_count(4, 1 + t)
|
|
_check_ast_count(4, t * 2)
|
|
_check_ast_count(4, 2 * t)
|
|
|
|
# const folded
|
|
_check_ast_count(0, t + 0)
|
|
_check_ast_count(0, 0 + t)
|
|
_check_ast_count(0, t * 0)
|
|
_check_ast_count(0, 0 * t)
|
|
_check_ast_count(0, t * 1)
|
|
_check_ast_count(0, 1 * t)
|
|
np.testing.assert_equal((t + 0).numpy(), np.arange(16))
|
|
np.testing.assert_equal((t * 0).numpy(), [0] * 16)
|
|
np.testing.assert_equal((t * 1).numpy(), np.arange(16))
|
|
|
|
_check_ast_count(0, t ** 0)
|
|
_check_ast_count(0, t ** 1)
|
|
_check_ast_count(0, 1 ** t)
|
|
|
|
def test_multi_const_folding_tensor(self):
|
|
ds = tuple(f"{Device.DEFAULT}:{i}" for i in range(4))
|
|
t = Tensor.arange(16).float().realize().to(ds)
|
|
zero = Tensor.zeros(16).realize().to(ds)
|
|
one = Tensor.ones(16).realize().to(ds)
|
|
|
|
# const folded
|
|
_check_ast_count(0, t + zero)
|
|
_check_ast_count(0, zero + t)
|
|
_check_ast_count(0, t * zero)
|
|
_check_ast_count(0, zero * t)
|
|
_check_ast_count(0, t * one)
|
|
_check_ast_count(0, one * t)
|
|
np.testing.assert_equal((t + zero).numpy(), np.arange(16))
|
|
np.testing.assert_equal((t * zero).numpy(), [0] * 16)
|
|
np.testing.assert_equal((t * one).numpy(), np.arange(16))
|
|
|
|
@unittest.expectedFailure
|
|
def test_multi_todo_pow(self):
|
|
ds = tuple(f"{Device.DEFAULT}:{i}" for i in range(4))
|
|
t = Tensor.arange(16).float().realize().to(ds)
|
|
zero = Tensor.zeros(16).realize().to(ds)
|
|
one = Tensor.ones(16).realize().to(ds)
|
|
|
|
# TODO: fix pow folding
|
|
_check_ast_count(0, t ** zero)
|
|
_check_ast_count(0, t ** one)
|
|
_check_ast_count(0, one ** t)
|
|
|
|
class TestTautologicalCompare(unittest.TestCase):
|
|
# without const folding, these would have triggered -Wtautological-compare in clang
|
|
def test_lt_false(self):
|
|
# bool < False is always false
|
|
np.testing.assert_equal((Tensor([True, False]) < False).numpy(), [False, False])
|
|
|
|
def test_true_lt(self):
|
|
# True < bool is always false
|
|
np.testing.assert_equal((True < Tensor([True, False])).numpy(), [False, False])
|
|
|
|
def test_truth_table(self):
|
|
np.testing.assert_equal((Tensor(False) < Tensor(False)).numpy(), False)
|
|
np.testing.assert_equal((Tensor(False) < Tensor(True)).numpy(), True)
|
|
np.testing.assert_equal((Tensor(True) < Tensor(False)).numpy(), False)
|
|
np.testing.assert_equal((Tensor(True) < Tensor(True)).numpy(), False)
|
|
|
|
def test_a_eq_a(self):
|
|
# self eq is always true for int or bool
|
|
a = Tensor([1, 2, 3])
|
|
np.testing.assert_equal((a == a).numpy(), [True, True, True])
|
|
|
|
# not true for nan
|
|
a = Tensor([math.nan, 1.0, 2.0])
|
|
np.testing.assert_equal((a == a).numpy(), [False, True, True])
|
|
|
|
def test_a_ne_a(self):
|
|
# self not eq is always false for int or bool
|
|
a = Tensor([1, 2, 3])
|
|
np.testing.assert_equal((a != a).numpy(), [False, False, False])
|
|
|
|
# not true for nan
|
|
a = Tensor([math.nan, 1.0, 2.0])
|
|
np.testing.assert_equal((a != a).numpy(), [True, False, False])
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|