diff --git a/test/test_jit.py b/test/test_jit.py index a7d4536cfd..bba8ebd54a 100644 --- a/test/test_jit.py +++ b/test/test_jit.py @@ -838,7 +838,7 @@ class TestJitRandom(unittest.TestCase): tst = {0:[], 1:[]} for r in [0,1]: Tensor.manual_seed(1337) - with Context(RANGEIFY=r): + with Context(JIT=r): _ = Tensor.randint(4, high=3) # this second one makes the behavior different _ = Tensor.randint(4, high=3) diff --git a/test/test_multitensor.py b/test/test_multitensor.py index 253cedec19..fdf07f3ca7 100644 --- a/test/test_multitensor.py +++ b/test/test_multitensor.py @@ -2,7 +2,7 @@ import unittest, functools, random from tinygrad import Tensor, Device, nn, GlobalCounters, TinyJit, dtypes, Variable from tinygrad.device import is_dtype_supported from tinygrad.uop.ops import Ops, UOp -from tinygrad.helpers import CI, getenv, prod, Context, RANGEIFY +from tinygrad.helpers import CI, getenv, prod, Context from tinygrad.nn.state import get_parameters, get_state_dict from tinygrad.engine.realize import lower_schedule, BufferCopy, CompiledRunner, run_schedule import numpy as np @@ -390,7 +390,7 @@ class TestMultiTensor(unittest.TestCase): # NOTE: this is failing on LLVM CI, no idea why. Works locally. @unittest.skipIf(CI and REAL_DEV in ("CUDA", "NV", "CPU", "AMD"), "slow, and flaky on CPU") - @unittest.skipIf(RANGEIFY, "TODO: pm_rangeify hangs") + @unittest.skip("TODO: pm_rangeify hangs") def test_data_parallel_resnet(self): from extra.models.resnet import ResNet18 @@ -427,7 +427,7 @@ class TestMultiTensor(unittest.TestCase): np.testing.assert_allclose(grad, shard_grad, atol=1e-5, rtol=1e-5) @unittest.skipIf(CI and REAL_DEV in ("CUDA", "NV", "CPU", "AMD"), "slow, and flaky on CPU") - @unittest.skipIf(RANGEIFY, "TODO: pm_rangeify hangs") + @unittest.skip("TODO: pm_rangeify hangs") def test_data_parallel_resnet_train_step(self): from extra.models.resnet import ResNet18 fake_image = Tensor.rand((2, 3, 224//16, 224//16)) @@ -435,7 +435,7 @@ class TestMultiTensor(unittest.TestCase): m = ResNet18() self._test_model_train_step(m, fake_image, labels) - @unittest.skipIf(RANGEIFY, "TODO: pm_rangeify hangs") + @unittest.skip("TODO: pm_rangeify hangs") def test_data_parallel_simple_train_step(self): class Model: def __init__(self): self.conv1 = nn.Linear(128,128) @@ -800,7 +800,7 @@ class TestMultiTensor(unittest.TestCase): t = Tensor.rand(16, 16).shard(devices_2, axis=0) np.testing.assert_allclose(t.numpy(), t.clone().numpy()) - @unittest.skipIf(RANGEIFY, "RANGEIFY doesn't support multi const folding") + @unittest.skip("RANGEIFY doesn't support multi const folding") def test_multi_const_folding(self): with Context(TRACK_MATCH_STATS=0): a = Tensor.arange(3).realize() diff --git a/test/test_rangeify.py b/test/test_rangeify.py index 4f22a1dcc8..144375c6f4 100644 --- a/test/test_rangeify.py +++ b/test/test_rangeify.py @@ -70,6 +70,7 @@ class TestRangeify(unittest.TestCase): ret = A.sum(axis=2).contiguous(arg=(1,)).sum(axis=1) ret.realize() + @unittest.skip("RANGEIFY=0 does nothing") def test_double_gemm_real(self): def go(): with Context(DEBUG=0): @@ -199,6 +200,7 @@ class TestRangeify(unittest.TestCase): out = blk._feed_forward(x) out.realize() + @unittest.skip("RANGEIFY=0 does nothing") def test_flash_attention(self): BS, HEADS, SEQLEN, EMB = 4, 2, 16, 8 diff --git a/test/test_schedule.py b/test/test_schedule.py index 68fe09b0f7..4817272317 100644 --- a/test/test_schedule.py +++ b/test/test_schedule.py @@ -344,7 +344,7 @@ class TestSchedule(unittest.TestCase): out1 = r1 + y schedule = check_schedule([out0, out1], 2) reduceops = [x for si in schedule for x in si.ast.toposort() if x.op in {Ops.REDUCE_AXIS, Ops.REDUCE}] - assert len(reduceops) in [2,3] # why is RANGEIFY different? + self.assertEqual(len(reduceops), 2) # why is RANGEIFY different? def test_div_collapse_buffer(self): a = Tensor.full((4,), 4.0).contiguous().realize() diff --git a/tinygrad/helpers.py b/tinygrad/helpers.py index fd13c8b166..a7a0357403 100644 --- a/tinygrad/helpers.py +++ b/tinygrad/helpers.py @@ -141,7 +141,7 @@ DONT_REALIZE_EXPAND, DONT_GROUP_REDUCES = ContextVar("DONT_REALIZE_EXPAND", 0), QUANTIZE, VALIDATE_WITH_CPU, DISABLE_FAST_IDIV = ContextVar("QUANTIZE", 0), ContextVar("VALIDATE_WITH_CPU", 0), ContextVar("DISABLE_FAST_IDIV", 0) CORRECT_DIVMOD_FOLDING, FUSE_OPTIM = ContextVar("CORRECT_DIVMOD_FOLDING", 0), ContextVar("FUSE_OPTIM", 0) ALLOW_DEVICE_USAGE, MAX_BUFFER_SIZE = ContextVar("ALLOW_DEVICE_USAGE", 1), ContextVar("MAX_BUFFER_SIZE", 0) -RANGEIFY, FUSE_ATTENTION = ContextVar("RANGEIFY", 1), ContextVar("FUSE_ATTENTION", 0) +FUSE_ATTENTION = ContextVar("FUSE_ATTENTION", 0) EMULATE = ContextVar("EMULATE", "") CPU_COUNT = ContextVar("CPU_COUNT", max(1, (os.cpu_count() or 1) // (4 if ARCH_X86 else 2))) # take 1/2 of the cores, accounting HT CPU_LLVM, AMD_LLVM = ContextVar("CPU_LLVM", 0), ContextVar("AMD_LLVM", 1)