diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3ace5c52f9..9d97541df2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -584,7 +584,7 @@ jobs: key: metal deps: testing - name: some unit tests - run: METAL=1 RANGEIFY=1 python -m pytest -n=auto test/unit/test_winograd.py --durations=20 + run: METAL=1 RANGEIFY=1 python -m pytest -n=auto test/unit/test_winograd.py test/unit/test_linalg.py --durations=20 - name: Test METAL=1 RANGEIFY=1 run: METAL=1 RANGEIFY=1 python -m pytest -n=auto test/test_ops.py --durations=20 - name: Run process replay tests diff --git a/test/test_pickle.py b/test/test_pickle.py index d5c62cd77f..ddb01670fd 100644 --- a/test/test_pickle.py +++ b/test/test_pickle.py @@ -2,7 +2,7 @@ import unittest, pickle, types import numpy as np from tinygrad import Tensor, TinyJit, Variable, dtypes from tinygrad.helpers import GlobalCounters, ContextVar, Context -from tinygrad.uop.ops import PatternMatcher, UPat, UOp, Ops +from tinygrad.uop.ops import PatternMatcher, UPat, UOp class TestPickle(unittest.TestCase): def test_pickle_code_object(self): @@ -45,10 +45,9 @@ class TestPickle(unittest.TestCase): t_values = t.numpy() del t # free buffers print("** post pickle") - init = GlobalCounters.kernel_count t2:Tensor = pickle.loads(st) + assert t2.uop.is_realized np.testing.assert_equal(t_values, t2.numpy()) - self.assertEqual(GlobalCounters.kernel_count-init, 0) def test_pickle_realized_tensor_alt2(self): print("** init") @@ -70,14 +69,14 @@ class TestPickle(unittest.TestCase): def test_pickle_buffer_uop(self): t = Tensor.arange(4).realize() a = t.uop - assert a.op is Ops.BUFFER - self.assertIsNotNone(buffer:=a.realized) + assert a.is_realized + self.assertIsNotNone(buffer:=a.base.realized) s = pickle.dumps(a) # free buffers del a del buffer a2:UOp = pickle.loads(s) - self.assertListEqual(a2.realized.as_buffer().cast("I").tolist(), [0, 1, 2, 3]) + self.assertListEqual(a2.base.realized.as_buffer().cast("I").tolist(), [0, 1, 2, 3]) def test_pickle_unrealized_tensor(self): t = Tensor.ones(10, 10) diff --git a/tinygrad/schedule/rangeify.py b/tinygrad/schedule/rangeify.py index dae1bfc20a..b0405f96de 100644 --- a/tinygrad/schedule/rangeify.py +++ b/tinygrad/schedule/rangeify.py @@ -312,7 +312,8 @@ def might_end_axis(idx:UOp): if all(x.op not in {Ops.REDUCE_AXIS} for x in idx.toposort()): return None to_end_axis = [] for i,a in enumerate(idx.src[1:]): - if any(x.arg > idx.arg for x in a.toposort() if x.op is Ops.RANGE): + # in RANGEIFY=1, always realize + if not (RANGEIFY > 1) or any(x.arg > idx.arg for x in a.toposort() if x.op is Ops.RANGE): to_end_axis.append(i) if to_end_axis: return idx.replace(src=(idx.src[0].realize(arg=tuple(to_end_axis)),)+idx.src[1:], arg=None) return idx.replace(arg=None) diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index 7269c849f8..615dea1839 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -4100,7 +4100,7 @@ class Tensor(MathTrait): R = self.clone() Q = Tensor.eye(m, dtype=self.dtype).reshape((1,) * len(b_shape) + (m, m)).expand(b_shape + (m, m)).contiguous() for i in range(min(m, n)): - x = R[..., i:m, i] + x = R[..., i:m, i].contiguous() # TODO: without contigous this can silently be wrong, should at least assert s = -x[..., 0].sign() u1 = x[..., 0] - s * x.square().sum(-1).sqrt() w = x.unsqueeze(-1) / u1.reshape(b_shape + (1, 1))