diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e1a6001f34..7e335c2712 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -542,7 +542,7 @@ jobs: CPU=1 RANGEIFY=1 python3 test/test_multitensor.py TestMultiTensor.test_matmul_shard_1_1 TestMultiTensor.test_simple_add_W TestMultiTensor.test_simple_reduce \ TestMultiTensor.test_elementwise_dtype TestMultiTensor.test_shard_no_recompile TestHandleData.test_copied_to_device TestMultiRamUsage CPU=1 RANGEIFY=1 python3 -m pytest test/test_multitensor.py::TestMultiAssign -k 'not (multi_assign_piece_noncontig or multi_assign_var_offset)' - CPU=1 RANGEIFY=1 python3 -m pytest -n=auto test/test_multitensor.py::TestMultiTensor test/unit/test_allreduce.py -k 'not const_folding' + CPU=1 RANGEIFY=1 python3 -m pytest -n=auto test/test_multitensor.py::TestMultiTensor test/test_multitensor.py::TestBatchNorm test/unit/test_allreduce.py -k 'not const_folding' - name: Test CPU=1 RANGEIFY=2 run: CPU=1 CPU_LLVM=0 RANGEIFY=2 python3 -m pytest -n auto test/test_tiny.py test/test_rangeify.py test/test_ops.py --durations 20 # slow (and still wrong on beautiful_mnist) diff --git a/tinygrad/schedule/rangeify.py b/tinygrad/schedule/rangeify.py index 4581981850..8df51d966e 100644 --- a/tinygrad/schedule/rangeify.py +++ b/tinygrad/schedule/rangeify.py @@ -400,6 +400,9 @@ pm_cleanups = double_reshape+pm_mops+PatternMatcher([ (UPat(Ops.COPY, src=(UPat.cvar("x"), UPat()), name="copy"), lambda copy,x: copy.const_like(x.arg)), (UPat(Ops.COPY, src=(UPat(GroupOp.All-{Ops.CONTIGUOUS, Ops.COPY}).f(Ops.BUFFERIZE, allow_any_len=True, name="b") .f(Ops.INDEX, allow_any_len=True, name="x"), UPat()), name="copy"), pre_bufferize), + # mstack on CONST is CONST + (UPat(Ops.MSTACK, src=(UPat.var("s"),), allow_any_len=True).f(Ops.INDEX, allow_any_len=True), + lambda s: UOp.const(c.dtype, c.arg) if (c:=s.base).op is Ops.CONST else None), ]) def late_buffer_view(t:UOp, b:UOp):