mark slow tests as slow instead of as CI (#13736)

* mark slow tests as slow instead of as CI

* CI shouldn't have different behavior

* more skips / CI

* slow
This commit is contained in:
George Hotz
2025-12-17 10:29:57 -04:00
committed by GitHub
parent 9015a22523
commit 3dbde178c1
26 changed files with 80 additions and 264 deletions

View File

@@ -1,6 +1,7 @@
import unittest, ctypes, struct, os, random, numpy as np import unittest, ctypes, struct, os, random, numpy as np
from tinygrad import Device, Tensor, dtypes from tinygrad import Device, Tensor, dtypes
from tinygrad.helpers import getenv, CI, mv_address, DEBUG from tinygrad.helpers import getenv, mv_address, DEBUG
from test.helpers import slow
from tinygrad.device import Buffer, BufferSpec from tinygrad.device import Buffer, BufferSpec
from tinygrad.runtime.support.hcq import HCQCompiled, HCQBuffer from tinygrad.runtime.support.hcq import HCQCompiled, HCQBuffer
from tinygrad.runtime.autogen import libc from tinygrad.runtime.autogen import libc
@@ -220,7 +221,7 @@ class TestHCQ(unittest.TestCase):
mv_buf1 = buf1.as_buffer().cast('Q') mv_buf1 = buf1.as_buffer().cast('Q')
assert libc.memcmp(mv_address(mv_buf1), buf2._buf.va_addr, sz) == 0 assert libc.memcmp(mv_address(mv_buf1), buf2._buf.va_addr, sz) == 0
@unittest.skipIf(CI, "skip in CI") @slow
def test_copy_64bit(self): def test_copy_64bit(self):
if TestHCQ.d0.hw_copy_queue_t is None: self.skipTest("device does not support copy queue") if TestHCQ.d0.hw_copy_queue_t is None: self.skipTest("device does not support copy queue")

View File

@@ -2,12 +2,11 @@ import unittest
from tinygrad import Device from tinygrad import Device
from tinygrad.device import Buffer from tinygrad.device import Buffer
from tinygrad.dtype import dtypes from tinygrad.dtype import dtypes
from tinygrad.helpers import CI
from tinygrad.runtime.ops_cl import CLDevice, CLAllocator, CLCompiler, CLProgram from tinygrad.runtime.ops_cl import CLDevice, CLAllocator, CLCompiler, CLProgram
@unittest.skipUnless(Device.DEFAULT == "CL", "Runs only on OpenCL") @unittest.skipUnless(Device.DEFAULT == "CL", "Runs only on OpenCL")
class TestCLError(unittest.TestCase): class TestCLError(unittest.TestCase):
@unittest.skipIf(CI, "dangerous for CI, it allocates tons of memory") @unittest.skip("allocates tons of memory")
def test_oom(self): def test_oom(self):
with self.assertRaises(RuntimeError) as err: with self.assertRaises(RuntimeError) as err:
allocator = CLAllocator(CLDevice()) allocator = CLAllocator(CLDevice())

View File

@@ -261,7 +261,7 @@ class TestHCQ(unittest.TestCase):
et = _time_queue(q, TestHCQ.d0) et = _time_queue(q, TestHCQ.d0)
gb_s = (SZ/1e9)/et gb_s = (SZ/1e9)/et
print(f"same device copy: {et*1e3:.2f} ms, {gb_s:.2f} GB/s") print(f"same device copy: {et*1e3:.2f} ms, {gb_s:.2f} GB/s")
assert (0.3 if CI else 10) <= gb_s <= 1000 assert 0.3 <= gb_s <= 1000
def test_cross_device_copy_bandwidth(self): def test_cross_device_copy_bandwidth(self):
SZ = 2_000_000_000 SZ = 2_000_000_000
@@ -273,7 +273,7 @@ class TestHCQ(unittest.TestCase):
et = _time_queue(q, TestHCQ.d0) et = _time_queue(q, TestHCQ.d0)
gb_s = (SZ/1e9)/et gb_s = (SZ/1e9)/et
print(f"cross device copy: {et*1e3:.2f} ms, {gb_s:.2f} GB/s") print(f"cross device copy: {et*1e3:.2f} ms, {gb_s:.2f} GB/s")
assert (0.3 if CI else 2) <= gb_s <= 50 assert 0.3 <= gb_s <= 50
def test_interleave_compute_and_copy(self): def test_interleave_compute_and_copy(self):
q = TestHCQ.compute_queue() q = TestHCQ.compute_queue()

View File

@@ -1,12 +1,12 @@
import unittest import unittest
from tinygrad.helpers import CI from test.helpers import slow
from examples.mamba import Mamba, generate from examples.mamba import Mamba, generate
from transformers import AutoTokenizer from transformers import AutoTokenizer
PROMPT = 'Why is gravity ' PROMPT = 'Why is gravity '
TOKENIZER = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b") TOKENIZER = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
@unittest.skipIf(CI, "model is slow for CI") @slow
class TestMamba(unittest.TestCase): class TestMamba(unittest.TestCase):
def test_mamba_130M(self): def test_mamba_130M(self):
OUT_130M = '''Why is gravity \nnot a good idea?\n\nA:''' OUT_130M = '''Why is gravity \nnot a good idea?\n\nA:'''

View File

@@ -8,11 +8,11 @@ from hypothesis.extra import numpy as stn
import numpy as np import numpy as np
import torch import torch
from tinygrad import Tensor from tinygrad import Tensor
from tinygrad.helpers import CI, getenv from tinygrad.helpers import getenv
settings.register_profile(__file__, settings.default, settings.register_profile(__file__, settings.default,
max_examples=100 if CI else 250, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False)) max_examples=100, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
# torch wraparound for large numbers # torch wraparound for large numbers

View File

@@ -1,4 +1,4 @@
import time, struct, functools import os, time, struct, functools, unittest
from typing import Any, Callable from typing import Any, Callable
import numpy as np import numpy as np
from tinygrad import Tensor, dtypes, Device from tinygrad import Tensor, dtypes, Device
@@ -9,6 +9,9 @@ from tinygrad.dtype import DType
from tinygrad.nn.state import get_parameters from tinygrad.nn.state import get_parameters
from tinygrad.helpers import T, CI from tinygrad.helpers import T, CI
from tinygrad.codegen import full_rewrite from tinygrad.codegen import full_rewrite
# decorator to skip slow tests by default, run with RUN_SLOW=1 to include them
slow = unittest.skipUnless(os.getenv("RUN_SLOW"), "slow test, set RUN_SLOW=1 to run")
from tinygrad.runtime.ops_python import PythonProgram, PythonRenderer, PythonCompiler from tinygrad.runtime.ops_python import PythonProgram, PythonRenderer, PythonCompiler
def derandomize_model(model): def derandomize_model(model):

View File

@@ -4,7 +4,8 @@ import numpy as np
from PIL import Image from PIL import Image
from tinygrad import Tensor from tinygrad import Tensor
from tinygrad.helpers import getenv, CI from tinygrad.helpers import getenv
from test.helpers import slow
from extra.models.efficientnet import EfficientNet from extra.models.efficientnet import EfficientNet
from extra.models.vit import ViT from extra.models.vit import ViT
from extra.models.resnet import ResNet50 from extra.models.resnet import ResNet50
@@ -56,12 +57,12 @@ class TestEfficientNet(unittest.TestCase):
def tearDownClass(cls): def tearDownClass(cls):
del cls.model del cls.model
@unittest.skipIf(CI, "covered by test_chicken_car") @slow
def test_chicken(self): def test_chicken(self):
labels = _infer(self.model, chicken_img) labels = _infer(self.model, chicken_img)
self.assertEqual(_LABELS[labels[0]], "hen") self.assertEqual(_LABELS[labels[0]], "hen")
@unittest.skipIf(CI, "covered by test_chicken_car") @slow
def test_car(self): def test_car(self):
labels = _infer(self.model, car_img) labels = _infer(self.model, car_img)
self.assertEqual(_LABELS[labels[0]], "sports car, sport car") self.assertEqual(_LABELS[labels[0]], "sports car, sport car")

View File

@@ -1,8 +1,8 @@
#!/usr/bin/env python #!/usr/bin/env python
import unittest import unittest
import numpy as np import numpy as np
from tinygrad import Tensor, Device from tinygrad import Tensor
from tinygrad.helpers import CI from test.helpers import slow
from tinygrad.nn.state import get_parameters from tinygrad.nn.state import get_parameters
from tinygrad.nn import optim, BatchNorm2d from tinygrad.nn import optim, BatchNorm2d
from extra.training import train, evaluate from extra.training import train, evaluate
@@ -49,7 +49,7 @@ class TinyConvNet:
x = x.reshape(shape=[x.shape[0], -1]) x = x.reshape(shape=[x.shape[0], -1])
return x.dot(self.l1) return x.dot(self.l1)
@unittest.skipIf(CI and Device.DEFAULT == "CPU", "slow") @slow
class TestMNIST(unittest.TestCase): class TestMNIST(unittest.TestCase):
def test_sgd_onestep(self): def test_sgd_onestep(self):
np.random.seed(1337) np.random.seed(1337)

View File

@@ -5,11 +5,12 @@ from tinygrad.nn import optim
from tinygrad.nn.state import get_parameters from tinygrad.nn.state import get_parameters
from tinygrad.engine.jit import TinyJit from tinygrad.engine.jit import TinyJit
from tinygrad import Tensor, Device, GlobalCounters, dtypes, Variable from tinygrad import Tensor, Device, GlobalCounters, dtypes, Variable
from tinygrad.helpers import CI, Context from tinygrad.helpers import Context
from test.helpers import slow
from extra.lr_scheduler import OneCycleLR from extra.lr_scheduler import OneCycleLR
from test.helpers import derandomize_model from test.helpers import derandomize_model
from examples.gpt2 import Transformer as GPT2Transformer, MODEL_PARAMS as GPT2_MODEL_PARAMS from examples.gpt2 import Transformer as GPT2Transformer
from examples.hlb_cifar10 import SpeedyResNet, hyp from examples.hlb_cifar10 import SpeedyResNet, hyp
from examples.llama import Transformer as LLaMaTransformer from examples.llama import Transformer as LLaMaTransformer
from examples.stable_diffusion import UNetModel, unet_params from examples.stable_diffusion import UNetModel, unet_params
@@ -20,7 +21,7 @@ global_mem_used = 0
def helper_test(nm, gen, model, max_memory_allowed, max_kernels_allowed, all_jitted=False): def helper_test(nm, gen, model, max_memory_allowed, max_kernels_allowed, all_jitted=False):
with Context(JIT=2): with Context(JIT=2):
tms = [] tms = []
for _ in range(2 if CI else 4): for _ in range(2):
early_gen = [x.realize() if isinstance(x, Tensor) else x for x in gen()] early_gen = [x.realize() if isinstance(x, Tensor) else x for x in gen()]
GlobalCounters.reset() GlobalCounters.reset()
Device[Device.DEFAULT].synchronize() Device[Device.DEFAULT].synchronize()
@@ -52,7 +53,7 @@ class TestRealWorld(unittest.TestCase):
def tearDown(self): def tearDown(self):
dtypes.default_float = self.old_float dtypes.default_float = self.old_float
@unittest.skipIf(CI and Device.DEFAULT == "CPU", "slow, covered by METAL") @slow
@unittest.skipUnless(is_dtype_supported(dtypes.float16), "need dtypes.float16") @unittest.skipUnless(is_dtype_supported(dtypes.float16), "need dtypes.float16")
def test_stable_diffusion(self): def test_stable_diffusion(self):
params = unet_params params = unet_params
@@ -92,14 +93,14 @@ class TestRealWorld(unittest.TestCase):
dtypes.default_float = dtypes.float16 dtypes.default_float = dtypes.float16
args_tiny = {"dim": 1024, "n_heads": 8, "n_layers": 8, "norm_eps": 1e-5, "vocab_size": 1000} args_tiny = {"dim": 1024, "n_heads": 8, "n_layers": 8, "norm_eps": 1e-5, "vocab_size": 1000}
model = GPT2Transformer(**(args_tiny if CI else GPT2_MODEL_PARAMS["gpt2-medium"])) model = GPT2Transformer(**args_tiny)
derandomize_model(model) derandomize_model(model)
@TinyJit @TinyJit
def test(t, v): def test(t, v):
with Context(JIT=0): return model(t, v).realize() with Context(JIT=0): return model(t, v).realize()
helper_test("test_gpt2", lambda: (Tensor([[1,]]),Variable("pos", 1, 100).bind(1)), test, 0.23 if CI else 0.9, 160 if CI else 468, all_jitted=True) helper_test("test_gpt2", lambda: (Tensor([[1,]]),Variable("pos", 1, 100).bind(1)), test, 0.23, 160, all_jitted=True)
@unittest.skipIf(CI and Device.DEFAULT == "CPU", "slow") @slow
def test_train_mnist(self): def test_train_mnist(self):
from examples.beautiful_mnist import Model from examples.beautiful_mnist import Model
with Tensor.train(): with Tensor.train():
@@ -117,7 +118,7 @@ class TestRealWorld(unittest.TestCase):
helper_test("train_mnist", lambda: (Tensor.randn(BS, 1, 28, 28),), train, 0.017, 103) helper_test("train_mnist", lambda: (Tensor.randn(BS, 1, 28, 28),), train, 0.017, 103)
@unittest.skipIf(CI and Device.DEFAULT in {"CPU", "CL"}, "slow") @slow
def test_forward_cifar(self): def test_forward_cifar(self):
BS = 32 BS = 32
# with training batchnorm still though # with training batchnorm still though
@@ -127,7 +128,7 @@ class TestRealWorld(unittest.TestCase):
def run(X): return model(X) def run(X): return model(X)
helper_test("forward_cifar", lambda: (Tensor.randn(BS, 3, 32, 32),), run, 0.033, 27) helper_test("forward_cifar", lambda: (Tensor.randn(BS, 3, 32, 32),), run, 0.033, 27)
@unittest.skipIf(CI and Device.DEFAULT in {"CPU", "CL"}, "slow") @slow
def test_train_cifar(self): def test_train_cifar(self):
with Tensor.train(): with Tensor.train():
model = SpeedyResNet(Tensor.ones((12,3,2,2))) model = SpeedyResNet(Tensor.ones((12,3,2,2)))
@@ -157,7 +158,7 @@ class TestRealWorld(unittest.TestCase):
final_div_factor=1./(initial_div_factor*final_lr_ratio), total_steps=4) final_div_factor=1./(initial_div_factor*final_lr_ratio), total_steps=4)
assert not np.isnan(lr_scheduler.min_lr), "lr too small or initial_div_facotr too big for half" assert not np.isnan(lr_scheduler.min_lr), "lr too small or initial_div_facotr too big for half"
@unittest.skipIf(CI and Device.DEFAULT == "CPU", "slow") @slow
def test_bert(self): def test_bert(self):
with Tensor.train(): with Tensor.train():
args_tiny = {"attention_probs_dropout_prob": 0.0, "hidden_dropout_prob": 0.0, "vocab_size": 30522, "type_vocab_size": 2, args_tiny = {"attention_probs_dropout_prob": 0.0, "hidden_dropout_prob": 0.0, "vocab_size": 30522, "type_vocab_size": 2,

View File

@@ -3,7 +3,8 @@ import numpy as np
from tinygrad import Device from tinygrad import Device
from tinygrad.nn.state import get_parameters from tinygrad.nn.state import get_parameters
from tinygrad.nn import optim from tinygrad.nn import optim
from tinygrad.helpers import getenv, CI from tinygrad.helpers import getenv
from test.helpers import slow
from extra.training import train from extra.training import train
from extra.models.convnext import ConvNeXt from extra.models.convnext import ConvNeXt
from extra.models.efficientnet import EfficientNet from extra.models.efficientnet import EfficientNet
@@ -38,7 +39,7 @@ class TestTrain(unittest.TestCase):
train_one_step(model,X,Y) train_one_step(model,X,Y)
check_gc() check_gc()
@unittest.skipIf(CI, "slow") @slow
def test_efficientnet(self): def test_efficientnet(self):
model = EfficientNet(0) model = EfficientNet(0)
X = np.zeros((BS,3,224,224), dtype=np.float32) X = np.zeros((BS,3,224,224), dtype=np.float32)
@@ -46,7 +47,7 @@ class TestTrain(unittest.TestCase):
train_one_step(model,X,Y) train_one_step(model,X,Y)
check_gc() check_gc()
@unittest.skipIf(CI, "slow") @slow
def test_vit(self): def test_vit(self):
model = ViT() model = ViT()
X = np.zeros((BS,3,224,224), dtype=np.float32) X = np.zeros((BS,3,224,224), dtype=np.float32)
@@ -54,7 +55,7 @@ class TestTrain(unittest.TestCase):
train_one_step(model,X,Y) train_one_step(model,X,Y)
check_gc() check_gc()
@unittest.skipIf(CI, "slow") @slow
def test_transformer(self): def test_transformer(self):
# this should be small GPT-2, but the param count is wrong # this should be small GPT-2, but the param count is wrong
# (real ff_dim is 768*4) # (real ff_dim is 768*4)
@@ -64,7 +65,7 @@ class TestTrain(unittest.TestCase):
train_one_step(model,X,Y) train_one_step(model,X,Y)
check_gc() check_gc()
@unittest.skipIf(CI, "slow") @slow
def test_resnet(self): def test_resnet(self):
X = np.zeros((BS, 3, 224, 224), dtype=np.float32) X = np.zeros((BS, 3, 224, 224), dtype=np.float32)
Y = np.zeros((BS), dtype=np.int32) Y = np.zeros((BS), dtype=np.int32)

View File

@@ -2,7 +2,8 @@ import unittest
import pathlib import pathlib
from examples.whisper import init_whisper, load_file_waveform, transcribe_file, transcribe_waveform from examples.whisper import init_whisper, load_file_waveform, transcribe_file, transcribe_waveform
import examples.mlperf.metrics as metrics import examples.mlperf.metrics as metrics
from tinygrad.helpers import CI, fetch, CPU_LLVM from tinygrad.helpers import fetch
from test.helpers import slow
from tinygrad import Device, dtypes from tinygrad import Device, dtypes
from tinygrad.device import is_dtype_supported from tinygrad.device import is_dtype_supported
@@ -75,11 +76,11 @@ class TestWhisper(unittest.TestCase):
def test_transcribe_file1(self): def test_transcribe_file1(self):
self.assertEqual(transcribe_file(self.model, self.enc, TEST_FILE_1), TRANSCRIPTION_1) self.assertEqual(transcribe_file(self.model, self.enc, TEST_FILE_1), TRANSCRIPTION_1)
@unittest.skipIf(CI or (Device.DEFAULT == "CPU" and CPU_LLVM), "too many tests for CI") @slow
def test_transcribe_file2(self): def test_transcribe_file2(self):
self.assertEqual(transcribe_file(self.model, self.enc, TEST_FILE_2), TRANSCRIPTION_2) self.assertEqual(transcribe_file(self.model, self.enc, TEST_FILE_2), TRANSCRIPTION_2)
@unittest.skipIf(CI or (Device.DEFAULT == "CPU" and CPU_LLVM), "too many tests for CI") @slow
def test_transcribe_batch12(self): def test_transcribe_batch12(self):
waveforms = [load_file_waveform(TEST_FILE_1), load_file_waveform(TEST_FILE_2)] waveforms = [load_file_waveform(TEST_FILE_1), load_file_waveform(TEST_FILE_2)]
transcriptions = transcribe_waveform(self.model, self.enc, waveforms) transcriptions = transcribe_waveform(self.model, self.enc, waveforms)
@@ -95,14 +96,14 @@ class TestWhisper(unittest.TestCase):
self.assertEqual(TRANSCRIPTION_1, transcriptions[1]) self.assertEqual(TRANSCRIPTION_1, transcriptions[1])
@unittest.skip("file 3 url is broken") @unittest.skip("file 3 url is broken")
@unittest.skipIf(CI or (Device.DEFAULT == "CPU" and CPU_LLVM), "too long for CI") @slow
def test_transcribe_long(self): def test_transcribe_long(self):
waveform = [load_file_waveform(fetch(TEST_FILE_3_URL))] waveform = [load_file_waveform(fetch(TEST_FILE_3_URL))]
transcription = transcribe_waveform(self.model, self.enc, waveform) transcription = transcribe_waveform(self.model, self.enc, waveform)
self.assertWER(transcription, TRANSCRIPTION_3, 0.085) self.assertWER(transcription, TRANSCRIPTION_3, 0.085)
@unittest.skip("file 3 url is broken") @unittest.skip("file 3 url is broken")
@unittest.skipIf(CI or (Device.DEFAULT == "CPU" and CPU_LLVM), "too long for CI") @slow
def test_transcribe_long_no_batch(self): def test_transcribe_long_no_batch(self):
waveforms = [load_file_waveform(fetch(TEST_FILE_3_URL)), load_file_waveform(TEST_FILE_1)] waveforms = [load_file_waveform(fetch(TEST_FILE_3_URL)), load_file_waveform(TEST_FILE_1)]

View File

@@ -7,7 +7,8 @@ from tinygrad.tensor import _to_np_dtype
from tinygrad.uop.ops import Ops from tinygrad.uop.ops import Ops
from tinygrad.dtype import DType from tinygrad.dtype import DType
from tinygrad.device import is_dtype_supported from tinygrad.device import is_dtype_supported
from tinygrad.helpers import AMX, CI, AMD_LLVM, CPU_LLVM from tinygrad.helpers import AMX, AMD_LLVM, CPU_LLVM
from test.helpers import slow
from tinygrad.engine.realize import CompiledRunner, get_program from tinygrad.engine.realize import CompiledRunner, get_program
from tinygrad.codegen.opt import Opt, OptOps, KernelOptError from tinygrad.codegen.opt import Opt, OptOps, KernelOptError
@@ -119,7 +120,7 @@ class TestTensorCores(unittest.TestCase):
helper_tc_ensure_uops_and_opts_count(tc.dims[0], tc.dims[1], tc.dims[2]//8, tc.dtype_in, tc.dtype_out, tc_opt=2, ensure_triggered=False) helper_tc_ensure_uops_and_opts_count(tc.dims[0], tc.dims[1], tc.dims[2]//8, tc.dtype_in, tc.dtype_out, tc_opt=2, ensure_triggered=False)
@unittest.skipIf(Device.DEFAULT == "PYTHON", "not generated on EMULATED device") @unittest.skipIf(Device.DEFAULT == "PYTHON", "not generated on EMULATED device")
@unittest.skipIf(CI and Device.DEFAULT in {"AMD"}, "AMD CI is really slow here") @slow
@unittest.skipUnless(Device[Device.DEFAULT].renderer.tensor_cores, "test requires tensor cores") @unittest.skipUnless(Device[Device.DEFAULT].renderer.tensor_cores, "test requires tensor cores")
def test_tensor_cores_multi_reduce(self): def test_tensor_cores_multi_reduce(self):
for tc in Device[Device.DEFAULT].renderer.tensor_cores: for tc in Device[Device.DEFAULT].renderer.tensor_cores:

View File

@@ -1,7 +1,7 @@
import unittest import unittest
import numpy as np import numpy as np
from tinygrad import Tensor, GlobalCounters, dtypes, nn, Device, Variable from tinygrad import Tensor, GlobalCounters, dtypes, nn, Device, Variable
from tinygrad.helpers import CI, Context, getenv from tinygrad.helpers import Context, getenv
from tinygrad.engine.realize import run_schedule from tinygrad.engine.realize import run_schedule
from tinygrad.engine.realize import CompiledRunner, ExecItem, get_program from tinygrad.engine.realize import CompiledRunner, ExecItem, get_program
from tinygrad.uop.ops import Ops from tinygrad.uop.ops import Ops
@@ -143,7 +143,7 @@ class TestIndexing(unittest.TestCase):
def test_llama_embedding(self, noopt=1, op_limit=65536): def test_llama_embedding(self, noopt=1, op_limit=65536):
# llama3 is 128256 # llama3 is 128256
vocab_size, embed_size = (10, 3) if CI else (32000, 4096) vocab_size, embed_size = (10, 3)
emb = nn.Embedding(vocab_size, embed_size) emb = nn.Embedding(vocab_size, embed_size)
emb_w = emb.weight.numpy() emb_w = emb.weight.numpy()
x = Tensor([1,2,3,4]) x = Tensor([1,2,3,4])
@@ -161,7 +161,7 @@ class TestIndexing(unittest.TestCase):
# TODO: reshape to match torch, should we do this in nn? # TODO: reshape to match torch, should we do this in nn?
np.testing.assert_allclose(z.numpy().reshape(4, embed_size), torch_z.detach().numpy(), atol=1e-8, rtol=1e-8) np.testing.assert_allclose(z.numpy().reshape(4, embed_size), torch_z.detach().numpy(), atol=1e-8, rtol=1e-8)
# at least the arange is being fused # at least the arange is being fused
def test_llama_embedding_opt(self): self.test_llama_embedding(0, 1_736_704_000 if CI else 5_898_240_000) def test_llama_embedding_opt(self): self.test_llama_embedding(0, 1_736_704_000)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@@ -3,7 +3,7 @@ import functools, unittest, ctypes
from tinygrad.device import Device, Buffer from tinygrad.device import Device, Buffer
from tinygrad.tensor import Tensor, _to_np_dtype from tinygrad.tensor import Tensor, _to_np_dtype
from tinygrad.helpers import Context, CI, dedup, from_mv from tinygrad.helpers import Context, dedup, from_mv
from tinygrad.dtype import dtypes from tinygrad.dtype import dtypes
from tinygrad.engine.jit import MultiGraphRunner from tinygrad.engine.jit import MultiGraphRunner
from tinygrad.engine.realize import ExecItem, BufferXfer, get_runner, CompiledRunner from tinygrad.engine.realize import ExecItem, BufferXfer, get_runner, CompiledRunner
@@ -12,8 +12,8 @@ from test.helpers import needs_second_gpu
np.random.seed(1337) np.random.seed(1337)
Tensor.manual_seed(1337) Tensor.manual_seed(1337)
BUF_SIZE = 4096 if CI else 4096 * 128 BUF_SIZE = 4096
RUN_CNT = 4 if CI else 32 RUN_CNT = 4
cached_prgs = {} cached_prgs = {}
def helper_exec_op(device, outbuf, inbufs): def helper_exec_op(device, outbuf, inbufs):

View File

@@ -2,12 +2,12 @@ import unittest, functools, random
from tinygrad import Tensor, Device, nn, GlobalCounters, TinyJit, dtypes, Variable from tinygrad import Tensor, Device, nn, GlobalCounters, TinyJit, dtypes, Variable
from tinygrad.device import is_dtype_supported from tinygrad.device import is_dtype_supported
from tinygrad.uop.ops import Ops, UOp from tinygrad.uop.ops import Ops, UOp
from tinygrad.helpers import CI, getenv, prod, Context from tinygrad.helpers import getenv, prod, Context
from tinygrad.nn.state import get_parameters, get_state_dict from tinygrad.nn.state import get_parameters, get_state_dict
from tinygrad.engine.realize import lower_schedule, BufferCopy, CompiledRunner, run_schedule from tinygrad.engine.realize import lower_schedule, BufferCopy, CompiledRunner, run_schedule
import numpy as np import numpy as np
from hypothesis import given, strategies as strat, settings from hypothesis import given, strategies as strat, settings
from test.helpers import REAL_DEV, not_support_multi_device, needs_second_gpu from test.helpers import not_support_multi_device, needs_second_gpu, slow
settings.register_profile("my_profile", max_examples=200, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False)) settings.register_profile("my_profile", max_examples=200, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
settings.load_profile("my_profile") settings.load_profile("my_profile")
@@ -420,7 +420,7 @@ class TestMultiTensor(unittest.TestCase):
np.testing.assert_allclose(y.numpy(), y_shard.numpy(), atol=1e-6, rtol=1e-6) np.testing.assert_allclose(y.numpy(), y_shard.numpy(), atol=1e-6, rtol=1e-6)
# NOTE: this is failing on LLVM CI, no idea why. Works locally. # NOTE: this is failing on LLVM CI, no idea why. Works locally.
@unittest.skipIf(CI and REAL_DEV in ("CUDA", "NV", "CPU", "AMD"), "slow, and flaky on CPU") @slow
def test_data_parallel_resnet(self): def test_data_parallel_resnet(self):
from extra.models.resnet import ResNet18 from extra.models.resnet import ResNet18
@@ -456,7 +456,7 @@ class TestMultiTensor(unittest.TestCase):
# sometimes there is zeros in these grads... why? # sometimes there is zeros in these grads... why?
np.testing.assert_allclose(grad, shard_grad, atol=1e-5, rtol=1e-5) np.testing.assert_allclose(grad, shard_grad, atol=1e-5, rtol=1e-5)
@unittest.skipIf(CI and REAL_DEV in ("CUDA", "NV", "CPU", "AMD"), "slow, and flaky on CPU") @slow
@unittest.skip("TODO: pm_rangeify hangs") @unittest.skip("TODO: pm_rangeify hangs")
def test_data_parallel_resnet_train_step(self): def test_data_parallel_resnet_train_step(self):
from extra.models.resnet import ResNet18 from extra.models.resnet import ResNet18

View File

@@ -4,14 +4,14 @@ import numpy as np
import torch import torch
from tinygrad import Tensor, Device, TinyJit, dtypes from tinygrad import Tensor, Device, TinyJit, dtypes
from tinygrad.uop.ops import Ops from tinygrad.uop.ops import Ops
from tinygrad.helpers import GlobalCounters, CI, Context from tinygrad.helpers import GlobalCounters, Context
from tinygrad.nn import Conv1d, ConvTranspose1d, Conv2d, ConvTranspose2d, Linear, Embedding from tinygrad.nn import Conv1d, ConvTranspose1d, Conv2d, ConvTranspose2d, Linear, Embedding
from tinygrad.nn import BatchNorm, LayerNorm, LayerNorm2d, GroupNorm, InstanceNorm, RMSNorm, LSTMCell from tinygrad.nn import BatchNorm, LayerNorm, LayerNorm2d, GroupNorm, InstanceNorm, RMSNorm, LSTMCell
from tinygrad.nn.state import load_state_dict from tinygrad.nn.state import load_state_dict
from tinygrad.engine.realize import run_schedule from tinygrad.engine.realize import run_schedule
from test.helpers import not_support_multi_device, needs_second_gpu from test.helpers import not_support_multi_device, needs_second_gpu, slow
@unittest.skipIf(CI and Device.DEFAULT in {"CUDA", "NV"}, "slow") @slow
class TestNN(unittest.TestCase): class TestNN(unittest.TestCase):
def test_batchnorm2d(self, training=False, threed=False, track_running_stats=True): def test_batchnorm2d(self, training=False, threed=False, track_running_stats=True):
with Tensor.train(training): with Tensor.train(training):

View File

@@ -11,8 +11,7 @@ if getenv("TINY_BACKEND"):
import tinygrad.nn.torch # noqa: F401 # pylint: disable=unused-import import tinygrad.nn.torch # noqa: F401 # pylint: disable=unused-import
torch.set_default_device("tiny") torch.set_default_device("tiny")
if CI: warnings.filterwarnings("ignore", message="Non-empty compiler output encountered")
warnings.filterwarnings("ignore", message="Non-empty compiler output encountered")
FORWARD_ONLY = getenv("FORWARD_ONLY", 0) FORWARD_ONLY = getenv("FORWARD_ONLY", 0)
PRINT_TENSORS = getenv("PRINT_TENSORS", 0) PRINT_TENSORS = getenv("PRINT_TENSORS", 0)

View File

@@ -3,9 +3,8 @@ import torch
import unittest import unittest
from tinygrad import Tensor, Device, dtypes from tinygrad import Tensor, Device, dtypes
from tinygrad.nn.optim import Adam, SGD, AdamW, Muon, LAMB from tinygrad.nn.optim import Adam, SGD, AdamW, Muon, LAMB
from tinygrad.helpers import CI
from tinygrad.device import is_dtype_supported from tinygrad.device import is_dtype_supported
from test.helpers import needs_second_gpu from test.helpers import needs_second_gpu, slow
np.random.seed(1337) np.random.seed(1337)
x_init = np.random.randn(1,4).astype(np.float32) x_init = np.random.randn(1,4).astype(np.float32)
@@ -42,7 +41,7 @@ def step(tensor, optim, steps=1, teeny=False, **kwargs):
optim.step() optim.step()
return net.x.detach().numpy(), net.W.detach().numpy() return net.x.detach().numpy(), net.W.detach().numpy()
@unittest.skipIf(CI and Device.DEFAULT in {"CUDA", "NV"}, "slow") @slow
class TestOptim(unittest.TestCase): class TestOptim(unittest.TestCase):
def setUp(self): def setUp(self):
self.old_training = Tensor.training self.old_training = Tensor.training

View File

@@ -1,6 +1,6 @@
import unittest import unittest
from tinygrad import Tensor, nn, Device from tinygrad import Tensor, nn, Device
from tinygrad.helpers import Context, GlobalCounters, CI, getenv, PCONTIG, DEBUG from tinygrad.helpers import Context, GlobalCounters, getenv, PCONTIG, DEBUG
from tinygrad.uop.ops import graph_rewrite, PatternMatcher, UPat, Ops from tinygrad.uop.ops import graph_rewrite, PatternMatcher, UPat, Ops
from tinygrad.codegen.opt import OptOps, Opt from tinygrad.codegen.opt import OptOps, Opt
from tinygrad.renderer.ptx import PTXRenderer from tinygrad.renderer.ptx import PTXRenderer
@@ -153,199 +153,6 @@ class TestPcontig(unittest.TestCase):
opts += (Opt(OptOps.UPCAST, 4, 4),) opts += (Opt(OptOps.UPCAST, 4, 4),)
self.test_flash_attention(opts) self.test_flash_attention(opts)
# *** non CI rangeify tests below this line ***
N = 256
@unittest.skipIf(CI, "useless in CI, doesn't test anything")
class TestRangeifyOpt(unittest.TestCase):
def test_randperm(self):
Tensor.randperm(10000).realize()
def test_one_getitem(self):
X = Tensor.empty(10000)
sel = Tensor.arange(1000).contiguous().realize()
Xsel = X[sel]
Tensor.realize(Xsel)
def test_two_getitem(self):
# this is splitting on the child even when it really shouldn't
X = Tensor.empty(10000)
Y = Tensor.empty(10000)
sel = Tensor.arange(1000).contiguous().realize()
Xsel, Ysel = X[sel], Y[sel]
Tensor.realize(Xsel, Ysel)
def test_resnetconv(self):
conv1 = nn.Conv2d(3, 8, kernel_size=7, stride=2, bias=False, padding=3)
conv1.weight.replace(conv1.weight.empty_like())
x = Tensor.empty(1, 3, 56, 56)
x = conv1(x).pad([1,1,1,1])+1
x.realize()
# CPU=1 NOOPT=1 DEBUG=4 RANGEIFY=1 python3 test/test_rangeify.py TestRangeifyOpt.test_matmul_reshaped
def test_matmul_reshaped(self):
A = Tensor.empty(N, N)
B = Tensor.empty(N, N)
(A@B).reshape(N*N).contiguous().realize()
def test_reduce_reshapes(self):
A = Tensor.empty(8,8,8,8).permute(1,0,3,2).flatten()
A.sum().realize()
@unittest.skipIf(CI, "useless in CI, doesn't test anything")
class TestRangeify(unittest.TestCase):
def test_groupnorm(self):
# ranges 1 and 3 are merging
x = nn.GroupNorm(32, 128)
x(Tensor.empty(1, 128, 64, 64)).realize()
def test_expand_children(self):
A = Tensor.empty(N, N).sum(axis=1)
ba = A.expand(N, N)
((ba+1).sum(axis=1) + (ba+2).sum(axis=0)).realize()
def test_partial_contig(self):
A = Tensor.empty(64, 64, 64)
ret = A.sum(axis=2).contiguous(arg=(1,)).sum(axis=1)
ret.realize()
@unittest.skip("RANGEIFY=0 does nothing")
def test_double_gemm_real(self):
def go():
with Context(DEBUG=0):
Tensor.manual_seed(1337)
A,B,C = [Tensor.randn(N, N) for _ in range(3)]
Tensor.realize(A, B, C)
GlobalCounters.reset()
return (A@B@C).realize()
rng = go()
with Context(RANGEIFY=0, DEBUG=2):
ref = go()
mse = ((rng-ref)**2).sum().item()
print(f"mse: {mse}")
self.assertLessEqual(mse, 1e-2)
def test_double_gemm(self):
A = Tensor.empty(N, N)
B = Tensor.empty(N, N)
C = Tensor.empty(N, N)
(A@B@C).realize()
def test_double_gemm_exp(self):
A = Tensor.empty(N, N)
B = Tensor.empty(N, N)
C = Tensor.empty(N, N)
(((A@B).exp()@C).exp()).realize()
def test_double_gemm_exp_child(self):
A = Tensor.empty(N, N)
B = Tensor.empty(N, N)
C = Tensor.empty(N, N)
# A@B is used with exp, and also on the sum. this is two kernels now, is this right?
ret = A@B
((ret.exp()@C)+ret).realize()
def test_double_gemm_relu(self):
A = Tensor.empty(N, N)
B = Tensor.empty(N, N)
C = Tensor.empty(N, N)
(((A@B).relu()@C).relu()).realize()
def test_double_gemm_relu_half_contig(self):
A = Tensor.empty(N, N)
B = Tensor.empty(N, N)
C = Tensor.empty(N, N)
(((A@B).relu().contiguous(arg=(1,))@C).relu()).realize()
def test_double_gemm_half_contig(self):
A = Tensor.empty(N, N)
B = Tensor.empty(N, N)
C = Tensor.empty(N, N)
((A@B).contiguous(arg=(1,))@C).realize()
def test_double_gemm_contig(self):
A = Tensor.empty(N, N)
B = Tensor.empty(N, N)
C = Tensor.empty(N, N)
((A@B).contiguous()@C).realize()
def test_many_gemm(self):
A = Tensor.empty(N, N)
B = Tensor.empty(N, N)
C = Tensor.empty(N, N)
D = Tensor.empty(N, N)
E = Tensor.empty(N, N)
F = Tensor.empty(N, N)
(A@B@C@D@E@F).realize()
def test_conv2d(self):
x = Tensor.empty(1, 4, 32, 32)
w1 = Tensor.empty(8, 4, 3, 3)
x.conv2d(w1).realize()
def test_conv2d_elu(self):
x = Tensor.empty(1, 4, 32, 32)
w1 = Tensor.empty(8, 4, 3, 3)
x.conv2d(w1).elu().realize()
def test_conv2d_t(self):
x = Tensor.empty(1, 4, 32, 32)
w1 = Tensor.empty(8, 4, 3, 3)
(x*2).conv2d(w1).realize()
def test_double_conv2d(self):
x = Tensor.empty(1, 4, 32, 32)
w1 = Tensor.empty(8, 4, 3, 3)
w2 = Tensor.empty(12, 8, 3, 3)
x.conv2d(w1).conv2d(w2).realize()
def test_resnet_conv2d(self):
x = Tensor.empty(1, 8, 32, 32)
w1 = Tensor.empty(8, 8, 3, 3)
w2 = Tensor.empty(8, 8, 1, 1)
x.conv2d(w1).conv2d(w2).realize()
def test_xception_conv2d(self):
# NOTE: this fusion is bad, it's recomputing the inner many times
x = Tensor.empty(1, 4, 32, 32)
w1 = Tensor.empty(8, 4, 1, 1)
w2 = Tensor.empty(8, 1, 3, 3)
x.conv2d(w1).conv2d(w2, groups=8).realize()
def test_conv_maxpool_contig(self): self.test_conv_maxpool(True)
def test_conv_maxpool(self, contig=False):
GlobalCounters.reset()
x = Tensor.empty(32, 16, 64, 64)
l1 = nn.Conv2d(16, 16, 3)
for p in nn.state.get_parameters(l1): p.replace(Tensor.empty(p.shape))
x = l1(x)
if contig: x = x.contiguous()
x.max_pool2d().realize()
def test_double_conv2d_half_contig(self):
x = Tensor.empty(1, 4, 32, 32)
w1 = Tensor.empty(8, 4, 3, 3)
w2 = Tensor.empty(12, 8, 3, 3)
# NOTE: this contiguous doesn't help
x.conv2d(w1).contiguous(arg=(1,)).conv2d(w2).permute(0,2,3,1).contiguous().realize()
def test_double_conv2d_contig(self):
x = Tensor.empty(1, 4, 32, 32)
w1 = Tensor.empty(8, 4, 3, 3)
w2 = Tensor.empty(12, 8, 3, 3)
x.conv2d(w1).contiguous().conv2d(w2).realize()
def test_transformer_ffn(self):
from tinygrad.apps.llm import TransformerBlock
from tinygrad import nn
blk = TransformerBlock(1024, 4096, 1, 1, 1e-5, head_dim=1024, rope_theta=10000.0)
for p in nn.state.get_parameters(blk): p.replace(Tensor.empty(p.shape))
x = Tensor.empty(128, 1024)
out = blk._feed_forward(x)
out.realize()
# contiguous + reduce can support ranges? # contiguous + reduce can support ranges?
@unittest.skip("pm_rangeify no longer exists. test this in a different way") @unittest.skip("pm_rangeify no longer exists. test this in a different way")

View File

@@ -1755,7 +1755,7 @@ class TestSchedule(unittest.TestCase):
@unittest.skipUnless(is_dtype_supported(dtypes.half), "need half") @unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
def test_precompute_freqs_cis(self): def test_precompute_freqs_cis(self):
from extra.models.llama import precompute_freqs_cis from extra.models.llama import precompute_freqs_cis
args = {"dim":32 if CI else 128, "end":2048 if CI else 8192, "theta":10000} args = {"dim":32, "end":2048, "theta":10000}
fused = precompute_freqs_cis(**args) fused = precompute_freqs_cis(**args)
run_schedule(check_schedule(fused, 1)) run_schedule(check_schedule(fused, 1))
if getenv("CHECK", 1): if getenv("CHECK", 1):

View File

@@ -3,7 +3,8 @@ import numpy as np
from tinygrad import Tensor, Device, dtypes from tinygrad import Tensor, Device, dtypes
from tinygrad.dtype import DType from tinygrad.dtype import DType
from tinygrad.nn.state import safe_load, safe_save, get_state_dict, torch_load from tinygrad.nn.state import safe_load, safe_save, get_state_dict, torch_load
from tinygrad.helpers import Timing, fetch, temp, CI, OSX from tinygrad.helpers import Timing, fetch, temp, OSX
from test.helpers import slow
from tinygrad.device import is_dtype_supported from tinygrad.device import is_dtype_supported
def compare_weights_both(url): def compare_weights_both(url):
@@ -340,8 +341,8 @@ class TestDiskTensor(unittest.TestCase):
on_dev = t.to(Device.DEFAULT).realize() on_dev = t.to(Device.DEFAULT).realize()
np.testing.assert_equal(on_dev.numpy(), t.numpy()) np.testing.assert_equal(on_dev.numpy(), t.numpy())
@slow
def test_copy_from_disk_huge(self): def test_copy_from_disk_huge(self):
if CI and not hasattr(Device["DISK"], 'io_uring'): self.skipTest("slow on ci without iouring")
fn = pathlib.Path(temp("dt_copy_from_disk_huge")) fn = pathlib.Path(temp("dt_copy_from_disk_huge"))
fn.unlink(missing_ok=True) fn.unlink(missing_ok=True)

View File

@@ -2,7 +2,8 @@ import unittest, math, operator, subprocess, struct
from tinygrad.tensor import Tensor, dtypes, Device from tinygrad.tensor import Tensor, dtypes, Device
from tinygrad.dtype import DType, DTYPES_DICT, truncate, float_to_fp16, float_to_bf16, _to_np_dtype, least_upper_dtype, least_upper_float from tinygrad.dtype import DType, DTYPES_DICT, truncate, float_to_fp16, float_to_bf16, _to_np_dtype, least_upper_dtype, least_upper_float
from tinygrad.device import is_dtype_supported from tinygrad.device import is_dtype_supported
from tinygrad.helpers import getenv, CI, DEBUG from tinygrad.helpers import getenv, DEBUG
from test.helpers import slow
from hypothesis import given, settings, strategies as strat from hypothesis import given, settings, strategies as strat
import numpy as np import numpy as np
import torch import torch
@@ -594,7 +595,7 @@ class TestAutoCastType(unittest.TestCase):
dtypes.default_float = old_default_float dtypes.default_float = old_default_float
@unittest.skipIf(Device.DEFAULT == "PYTHON", "very slow") @unittest.skipIf(Device.DEFAULT == "PYTHON", "very slow")
@unittest.skipIf(CI and Device.DEFAULT == "AMD", "very slow") @slow
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "Binding size is larger than the maximum storage buffer binding size") @unittest.skipIf(Device.DEFAULT == "WEBGPU", "Binding size is larger than the maximum storage buffer binding size")
@unittest.skipUnless(is_dtype_supported(dtypes.half), "need half") @unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
def test_mean_half_precision_underflow(self): def test_mean_half_precision_underflow(self):

View File

@@ -1,8 +1,8 @@
from typing_extensions import Callable from typing_extensions import Callable
import hashlib, random, unittest import hashlib, random, unittest
from tinygrad import Tensor, Device, getenv, dtypes from tinygrad import Tensor, Device, getenv, dtypes
from test.helpers import slow
from tinygrad.device import is_dtype_supported from tinygrad.device import is_dtype_supported
from tinygrad.helpers import CI
from tinygrad.uop.ops import UOp from tinygrad.uop.ops import UOp
from tinygrad.engine.jit import TinyJit from tinygrad.engine.jit import TinyJit
@@ -58,7 +58,7 @@ class TestKeccak(unittest.TestCase):
self.assertEqual(bytes(Tensor(b"abc").keccak().tolist()), self.assertEqual(bytes(Tensor(b"abc").keccak().tolist()),
bytearray.fromhex("3a985da74fe225b2 045c172d6bd390bd 855f086e3e9d525b 46bfe24511431532")) bytearray.fromhex("3a985da74fe225b2 045c172d6bd390bd 855f086e3e9d525b 46bfe24511431532"))
@unittest.skipIf(CI, "times out in ci") @slow
def test_long(self): def test_long(self):
data = b"\x00" * 4 data = b"\x00" * 4
self.assertEqual(bytes(Tensor(data).keccak("shake_128").tolist()), hashlib.shake_128(data).digest(16)) self.assertEqual(bytes(Tensor(data).keccak("shake_128").tolist()), hashlib.shake_128(data).digest(16))
@@ -74,7 +74,7 @@ class TestKeccak(unittest.TestCase):
self.assertEqual(bytes(out[1].tolist()), bytearray.fromhex("3a985da74fe225b2 045c172d6bd390bd 855f086e3e9d525b 46bfe24511431532")) self.assertEqual(bytes(out[1].tolist()), bytearray.fromhex("3a985da74fe225b2 045c172d6bd390bd 855f086e3e9d525b 46bfe24511431532"))
self.assertEqual(bytes(out[2].tolist()), bytearray.fromhex("8e0d8f672252acb0 ffc5093db8653b18 1513bf9a2097e737 b4f73533dcaf46df")) self.assertEqual(bytes(out[2].tolist()), bytearray.fromhex("8e0d8f672252acb0 ffc5093db8653b18 1513bf9a2097e737 b4f73533dcaf46df"))
@unittest.skipIf(CI, "redundant with test_variable_bs") @slow
def test_variable_bs_jit(self): def test_variable_bs_jit(self):
def f(data): def f(data):
return data.keccak() return data.keccak()

View File

@@ -1,6 +1,6 @@
import ctypes, gzip, unittest, timeit import ctypes, gzip, unittest, timeit
from tinygrad import Variable from tinygrad import Variable
from tinygrad.helpers import Context, ContextVar, argfix, colored, word_wrap, is_numpy_ndarray, CI, mv_address, get_contraction from tinygrad.helpers import Context, ContextVar, argfix, colored, word_wrap, is_numpy_ndarray, mv_address, get_contraction
from tinygrad.helpers import merge_dicts, strip_parens, prod, round_up, fetch, fully_flatten, from_mv, to_mv, polyN, time_to_str, cdiv, cmod, getbits from tinygrad.helpers import merge_dicts, strip_parens, prod, round_up, fetch, fully_flatten, from_mv, to_mv, polyN, time_to_str, cdiv, cmod, getbits
from tinygrad.tensor import Tensor, get_shape from tinygrad.tensor import Tensor, get_shape
import numpy as np import numpy as np
@@ -198,7 +198,7 @@ class TestMemoryview(unittest.TestCase):
mv[0] = 2 mv[0] = 2
assert base[0] == 2 assert base[0] == 2
@unittest.skipIf(CI, "dangerous for CI, it allocates tons of memory") @unittest.skip("allocates tons of memory")
def test_to_mv(self): def test_to_mv(self):
sizes = [ sizes = [
(16, "16 B"), (16, "16 B"),

View File

@@ -5,7 +5,8 @@ import numpy as np
from tinygrad import Tensor, dtypes, Device, TinyJit from tinygrad import Tensor, dtypes, Device, TinyJit
from tinygrad.device import is_dtype_supported from tinygrad.device import is_dtype_supported
from tinygrad.helpers import CI, all_same, prod from tinygrad.helpers import all_same, prod
from test.helpers import slow
random.seed(42) random.seed(42)
@@ -1140,7 +1141,7 @@ def get_set_tensor(indexed: Tensor, indexer):
set_tensor = Tensor.randint(set_count, high=set_count).reshape(set_size) #.cast(dtypes.float64) set_tensor = Tensor.randint(set_count, high=set_count).reshape(set_size) #.cast(dtypes.float64)
return set_tensor return set_tensor
@unittest.skipIf(CI and Device.DEFAULT in ["CPU", "CL", "METAL", "NV", "AMD"], "slow") @slow
class TestAdvancedIndexing(unittest.TestCase): class TestAdvancedIndexing(unittest.TestCase):
def test_integer_array_indexing(self): def test_integer_array_indexing(self):
# pick a random valid indexer type # pick a random valid indexer type

View File

@@ -20,7 +20,7 @@ class TestRawShmBuffer(unittest.TestCase):
assert np.allclose(t.numpy(), t2.numpy()) assert np.allclose(t.numpy(), t2.numpy())
s.unlink() s.unlink()
@unittest.skipIf(CI, "CI doesn't like big shared memory") @unittest.skip("big shared memory")
def test_e2e_big(self): def test_e2e_big(self):
# bigger than this doesn't work on Linux, maybe this is a limit somewhere? # bigger than this doesn't work on Linux, maybe this is a limit somewhere?
t = Tensor.randn(2048, 128, 8).realize() t = Tensor.randn(2048, 128, 8).realize()