mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-07 22:23:55 -05:00
mark slow tests as slow instead of as CI (#13736)
* mark slow tests as slow instead of as CI * CI shouldn't have different behavior * more skips / CI * slow
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
import unittest, ctypes, struct, os, random, numpy as np
|
import unittest, ctypes, struct, os, random, numpy as np
|
||||||
from tinygrad import Device, Tensor, dtypes
|
from tinygrad import Device, Tensor, dtypes
|
||||||
from tinygrad.helpers import getenv, CI, mv_address, DEBUG
|
from tinygrad.helpers import getenv, mv_address, DEBUG
|
||||||
|
from test.helpers import slow
|
||||||
from tinygrad.device import Buffer, BufferSpec
|
from tinygrad.device import Buffer, BufferSpec
|
||||||
from tinygrad.runtime.support.hcq import HCQCompiled, HCQBuffer
|
from tinygrad.runtime.support.hcq import HCQCompiled, HCQBuffer
|
||||||
from tinygrad.runtime.autogen import libc
|
from tinygrad.runtime.autogen import libc
|
||||||
@@ -220,7 +221,7 @@ class TestHCQ(unittest.TestCase):
|
|||||||
mv_buf1 = buf1.as_buffer().cast('Q')
|
mv_buf1 = buf1.as_buffer().cast('Q')
|
||||||
assert libc.memcmp(mv_address(mv_buf1), buf2._buf.va_addr, sz) == 0
|
assert libc.memcmp(mv_address(mv_buf1), buf2._buf.va_addr, sz) == 0
|
||||||
|
|
||||||
@unittest.skipIf(CI, "skip in CI")
|
@slow
|
||||||
def test_copy_64bit(self):
|
def test_copy_64bit(self):
|
||||||
if TestHCQ.d0.hw_copy_queue_t is None: self.skipTest("device does not support copy queue")
|
if TestHCQ.d0.hw_copy_queue_t is None: self.skipTest("device does not support copy queue")
|
||||||
|
|
||||||
|
|||||||
@@ -2,12 +2,11 @@ import unittest
|
|||||||
from tinygrad import Device
|
from tinygrad import Device
|
||||||
from tinygrad.device import Buffer
|
from tinygrad.device import Buffer
|
||||||
from tinygrad.dtype import dtypes
|
from tinygrad.dtype import dtypes
|
||||||
from tinygrad.helpers import CI
|
|
||||||
from tinygrad.runtime.ops_cl import CLDevice, CLAllocator, CLCompiler, CLProgram
|
from tinygrad.runtime.ops_cl import CLDevice, CLAllocator, CLCompiler, CLProgram
|
||||||
|
|
||||||
@unittest.skipUnless(Device.DEFAULT == "CL", "Runs only on OpenCL")
|
@unittest.skipUnless(Device.DEFAULT == "CL", "Runs only on OpenCL")
|
||||||
class TestCLError(unittest.TestCase):
|
class TestCLError(unittest.TestCase):
|
||||||
@unittest.skipIf(CI, "dangerous for CI, it allocates tons of memory")
|
@unittest.skip("allocates tons of memory")
|
||||||
def test_oom(self):
|
def test_oom(self):
|
||||||
with self.assertRaises(RuntimeError) as err:
|
with self.assertRaises(RuntimeError) as err:
|
||||||
allocator = CLAllocator(CLDevice())
|
allocator = CLAllocator(CLDevice())
|
||||||
|
|||||||
4
test/external/external_test_hcq.py
vendored
4
test/external/external_test_hcq.py
vendored
@@ -261,7 +261,7 @@ class TestHCQ(unittest.TestCase):
|
|||||||
et = _time_queue(q, TestHCQ.d0)
|
et = _time_queue(q, TestHCQ.d0)
|
||||||
gb_s = (SZ/1e9)/et
|
gb_s = (SZ/1e9)/et
|
||||||
print(f"same device copy: {et*1e3:.2f} ms, {gb_s:.2f} GB/s")
|
print(f"same device copy: {et*1e3:.2f} ms, {gb_s:.2f} GB/s")
|
||||||
assert (0.3 if CI else 10) <= gb_s <= 1000
|
assert 0.3 <= gb_s <= 1000
|
||||||
|
|
||||||
def test_cross_device_copy_bandwidth(self):
|
def test_cross_device_copy_bandwidth(self):
|
||||||
SZ = 2_000_000_000
|
SZ = 2_000_000_000
|
||||||
@@ -273,7 +273,7 @@ class TestHCQ(unittest.TestCase):
|
|||||||
et = _time_queue(q, TestHCQ.d0)
|
et = _time_queue(q, TestHCQ.d0)
|
||||||
gb_s = (SZ/1e9)/et
|
gb_s = (SZ/1e9)/et
|
||||||
print(f"cross device copy: {et*1e3:.2f} ms, {gb_s:.2f} GB/s")
|
print(f"cross device copy: {et*1e3:.2f} ms, {gb_s:.2f} GB/s")
|
||||||
assert (0.3 if CI else 2) <= gb_s <= 50
|
assert 0.3 <= gb_s <= 50
|
||||||
|
|
||||||
def test_interleave_compute_and_copy(self):
|
def test_interleave_compute_and_copy(self):
|
||||||
q = TestHCQ.compute_queue()
|
q = TestHCQ.compute_queue()
|
||||||
|
|||||||
4
test/external/external_test_mamba.py
vendored
4
test/external/external_test_mamba.py
vendored
@@ -1,12 +1,12 @@
|
|||||||
import unittest
|
import unittest
|
||||||
from tinygrad.helpers import CI
|
from test.helpers import slow
|
||||||
from examples.mamba import Mamba, generate
|
from examples.mamba import Mamba, generate
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
PROMPT = 'Why is gravity '
|
PROMPT = 'Why is gravity '
|
||||||
TOKENIZER = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
|
TOKENIZER = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
|
||||||
|
|
||||||
@unittest.skipIf(CI, "model is slow for CI")
|
@slow
|
||||||
class TestMamba(unittest.TestCase):
|
class TestMamba(unittest.TestCase):
|
||||||
def test_mamba_130M(self):
|
def test_mamba_130M(self):
|
||||||
OUT_130M = '''Why is gravity \nnot a good idea?\n\nA:'''
|
OUT_130M = '''Why is gravity \nnot a good idea?\n\nA:'''
|
||||||
|
|||||||
4
test/external/fuzz_shape_ops.py
vendored
4
test/external/fuzz_shape_ops.py
vendored
@@ -8,11 +8,11 @@ from hypothesis.extra import numpy as stn
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from tinygrad import Tensor
|
from tinygrad import Tensor
|
||||||
from tinygrad.helpers import CI, getenv
|
from tinygrad.helpers import getenv
|
||||||
|
|
||||||
|
|
||||||
settings.register_profile(__file__, settings.default,
|
settings.register_profile(__file__, settings.default,
|
||||||
max_examples=100 if CI else 250, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
|
max_examples=100, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
|
||||||
|
|
||||||
|
|
||||||
# torch wraparound for large numbers
|
# torch wraparound for large numbers
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import time, struct, functools
|
import os, time, struct, functools, unittest
|
||||||
from typing import Any, Callable
|
from typing import Any, Callable
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad import Tensor, dtypes, Device
|
from tinygrad import Tensor, dtypes, Device
|
||||||
@@ -9,6 +9,9 @@ from tinygrad.dtype import DType
|
|||||||
from tinygrad.nn.state import get_parameters
|
from tinygrad.nn.state import get_parameters
|
||||||
from tinygrad.helpers import T, CI
|
from tinygrad.helpers import T, CI
|
||||||
from tinygrad.codegen import full_rewrite
|
from tinygrad.codegen import full_rewrite
|
||||||
|
|
||||||
|
# decorator to skip slow tests by default, run with RUN_SLOW=1 to include them
|
||||||
|
slow = unittest.skipUnless(os.getenv("RUN_SLOW"), "slow test, set RUN_SLOW=1 to run")
|
||||||
from tinygrad.runtime.ops_python import PythonProgram, PythonRenderer, PythonCompiler
|
from tinygrad.runtime.ops_python import PythonProgram, PythonRenderer, PythonCompiler
|
||||||
|
|
||||||
def derandomize_model(model):
|
def derandomize_model(model):
|
||||||
|
|||||||
@@ -4,7 +4,8 @@ import numpy as np
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from tinygrad import Tensor
|
from tinygrad import Tensor
|
||||||
from tinygrad.helpers import getenv, CI
|
from tinygrad.helpers import getenv
|
||||||
|
from test.helpers import slow
|
||||||
from extra.models.efficientnet import EfficientNet
|
from extra.models.efficientnet import EfficientNet
|
||||||
from extra.models.vit import ViT
|
from extra.models.vit import ViT
|
||||||
from extra.models.resnet import ResNet50
|
from extra.models.resnet import ResNet50
|
||||||
@@ -56,12 +57,12 @@ class TestEfficientNet(unittest.TestCase):
|
|||||||
def tearDownClass(cls):
|
def tearDownClass(cls):
|
||||||
del cls.model
|
del cls.model
|
||||||
|
|
||||||
@unittest.skipIf(CI, "covered by test_chicken_car")
|
@slow
|
||||||
def test_chicken(self):
|
def test_chicken(self):
|
||||||
labels = _infer(self.model, chicken_img)
|
labels = _infer(self.model, chicken_img)
|
||||||
self.assertEqual(_LABELS[labels[0]], "hen")
|
self.assertEqual(_LABELS[labels[0]], "hen")
|
||||||
|
|
||||||
@unittest.skipIf(CI, "covered by test_chicken_car")
|
@slow
|
||||||
def test_car(self):
|
def test_car(self):
|
||||||
labels = _infer(self.model, car_img)
|
labels = _infer(self.model, car_img)
|
||||||
self.assertEqual(_LABELS[labels[0]], "sports car, sport car")
|
self.assertEqual(_LABELS[labels[0]], "sports car, sport car")
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import unittest
|
import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad import Tensor, Device
|
from tinygrad import Tensor
|
||||||
from tinygrad.helpers import CI
|
from test.helpers import slow
|
||||||
from tinygrad.nn.state import get_parameters
|
from tinygrad.nn.state import get_parameters
|
||||||
from tinygrad.nn import optim, BatchNorm2d
|
from tinygrad.nn import optim, BatchNorm2d
|
||||||
from extra.training import train, evaluate
|
from extra.training import train, evaluate
|
||||||
@@ -49,7 +49,7 @@ class TinyConvNet:
|
|||||||
x = x.reshape(shape=[x.shape[0], -1])
|
x = x.reshape(shape=[x.shape[0], -1])
|
||||||
return x.dot(self.l1)
|
return x.dot(self.l1)
|
||||||
|
|
||||||
@unittest.skipIf(CI and Device.DEFAULT == "CPU", "slow")
|
@slow
|
||||||
class TestMNIST(unittest.TestCase):
|
class TestMNIST(unittest.TestCase):
|
||||||
def test_sgd_onestep(self):
|
def test_sgd_onestep(self):
|
||||||
np.random.seed(1337)
|
np.random.seed(1337)
|
||||||
|
|||||||
@@ -5,11 +5,12 @@ from tinygrad.nn import optim
|
|||||||
from tinygrad.nn.state import get_parameters
|
from tinygrad.nn.state import get_parameters
|
||||||
from tinygrad.engine.jit import TinyJit
|
from tinygrad.engine.jit import TinyJit
|
||||||
from tinygrad import Tensor, Device, GlobalCounters, dtypes, Variable
|
from tinygrad import Tensor, Device, GlobalCounters, dtypes, Variable
|
||||||
from tinygrad.helpers import CI, Context
|
from tinygrad.helpers import Context
|
||||||
|
from test.helpers import slow
|
||||||
from extra.lr_scheduler import OneCycleLR
|
from extra.lr_scheduler import OneCycleLR
|
||||||
from test.helpers import derandomize_model
|
from test.helpers import derandomize_model
|
||||||
|
|
||||||
from examples.gpt2 import Transformer as GPT2Transformer, MODEL_PARAMS as GPT2_MODEL_PARAMS
|
from examples.gpt2 import Transformer as GPT2Transformer
|
||||||
from examples.hlb_cifar10 import SpeedyResNet, hyp
|
from examples.hlb_cifar10 import SpeedyResNet, hyp
|
||||||
from examples.llama import Transformer as LLaMaTransformer
|
from examples.llama import Transformer as LLaMaTransformer
|
||||||
from examples.stable_diffusion import UNetModel, unet_params
|
from examples.stable_diffusion import UNetModel, unet_params
|
||||||
@@ -20,7 +21,7 @@ global_mem_used = 0
|
|||||||
def helper_test(nm, gen, model, max_memory_allowed, max_kernels_allowed, all_jitted=False):
|
def helper_test(nm, gen, model, max_memory_allowed, max_kernels_allowed, all_jitted=False):
|
||||||
with Context(JIT=2):
|
with Context(JIT=2):
|
||||||
tms = []
|
tms = []
|
||||||
for _ in range(2 if CI else 4):
|
for _ in range(2):
|
||||||
early_gen = [x.realize() if isinstance(x, Tensor) else x for x in gen()]
|
early_gen = [x.realize() if isinstance(x, Tensor) else x for x in gen()]
|
||||||
GlobalCounters.reset()
|
GlobalCounters.reset()
|
||||||
Device[Device.DEFAULT].synchronize()
|
Device[Device.DEFAULT].synchronize()
|
||||||
@@ -52,7 +53,7 @@ class TestRealWorld(unittest.TestCase):
|
|||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
dtypes.default_float = self.old_float
|
dtypes.default_float = self.old_float
|
||||||
|
|
||||||
@unittest.skipIf(CI and Device.DEFAULT == "CPU", "slow, covered by METAL")
|
@slow
|
||||||
@unittest.skipUnless(is_dtype_supported(dtypes.float16), "need dtypes.float16")
|
@unittest.skipUnless(is_dtype_supported(dtypes.float16), "need dtypes.float16")
|
||||||
def test_stable_diffusion(self):
|
def test_stable_diffusion(self):
|
||||||
params = unet_params
|
params = unet_params
|
||||||
@@ -92,14 +93,14 @@ class TestRealWorld(unittest.TestCase):
|
|||||||
dtypes.default_float = dtypes.float16
|
dtypes.default_float = dtypes.float16
|
||||||
|
|
||||||
args_tiny = {"dim": 1024, "n_heads": 8, "n_layers": 8, "norm_eps": 1e-5, "vocab_size": 1000}
|
args_tiny = {"dim": 1024, "n_heads": 8, "n_layers": 8, "norm_eps": 1e-5, "vocab_size": 1000}
|
||||||
model = GPT2Transformer(**(args_tiny if CI else GPT2_MODEL_PARAMS["gpt2-medium"]))
|
model = GPT2Transformer(**args_tiny)
|
||||||
derandomize_model(model)
|
derandomize_model(model)
|
||||||
@TinyJit
|
@TinyJit
|
||||||
def test(t, v):
|
def test(t, v):
|
||||||
with Context(JIT=0): return model(t, v).realize()
|
with Context(JIT=0): return model(t, v).realize()
|
||||||
helper_test("test_gpt2", lambda: (Tensor([[1,]]),Variable("pos", 1, 100).bind(1)), test, 0.23 if CI else 0.9, 160 if CI else 468, all_jitted=True)
|
helper_test("test_gpt2", lambda: (Tensor([[1,]]),Variable("pos", 1, 100).bind(1)), test, 0.23, 160, all_jitted=True)
|
||||||
|
|
||||||
@unittest.skipIf(CI and Device.DEFAULT == "CPU", "slow")
|
@slow
|
||||||
def test_train_mnist(self):
|
def test_train_mnist(self):
|
||||||
from examples.beautiful_mnist import Model
|
from examples.beautiful_mnist import Model
|
||||||
with Tensor.train():
|
with Tensor.train():
|
||||||
@@ -117,7 +118,7 @@ class TestRealWorld(unittest.TestCase):
|
|||||||
|
|
||||||
helper_test("train_mnist", lambda: (Tensor.randn(BS, 1, 28, 28),), train, 0.017, 103)
|
helper_test("train_mnist", lambda: (Tensor.randn(BS, 1, 28, 28),), train, 0.017, 103)
|
||||||
|
|
||||||
@unittest.skipIf(CI and Device.DEFAULT in {"CPU", "CL"}, "slow")
|
@slow
|
||||||
def test_forward_cifar(self):
|
def test_forward_cifar(self):
|
||||||
BS = 32
|
BS = 32
|
||||||
# with training batchnorm still though
|
# with training batchnorm still though
|
||||||
@@ -127,7 +128,7 @@ class TestRealWorld(unittest.TestCase):
|
|||||||
def run(X): return model(X)
|
def run(X): return model(X)
|
||||||
helper_test("forward_cifar", lambda: (Tensor.randn(BS, 3, 32, 32),), run, 0.033, 27)
|
helper_test("forward_cifar", lambda: (Tensor.randn(BS, 3, 32, 32),), run, 0.033, 27)
|
||||||
|
|
||||||
@unittest.skipIf(CI and Device.DEFAULT in {"CPU", "CL"}, "slow")
|
@slow
|
||||||
def test_train_cifar(self):
|
def test_train_cifar(self):
|
||||||
with Tensor.train():
|
with Tensor.train():
|
||||||
model = SpeedyResNet(Tensor.ones((12,3,2,2)))
|
model = SpeedyResNet(Tensor.ones((12,3,2,2)))
|
||||||
@@ -157,7 +158,7 @@ class TestRealWorld(unittest.TestCase):
|
|||||||
final_div_factor=1./(initial_div_factor*final_lr_ratio), total_steps=4)
|
final_div_factor=1./(initial_div_factor*final_lr_ratio), total_steps=4)
|
||||||
assert not np.isnan(lr_scheduler.min_lr), "lr too small or initial_div_facotr too big for half"
|
assert not np.isnan(lr_scheduler.min_lr), "lr too small or initial_div_facotr too big for half"
|
||||||
|
|
||||||
@unittest.skipIf(CI and Device.DEFAULT == "CPU", "slow")
|
@slow
|
||||||
def test_bert(self):
|
def test_bert(self):
|
||||||
with Tensor.train():
|
with Tensor.train():
|
||||||
args_tiny = {"attention_probs_dropout_prob": 0.0, "hidden_dropout_prob": 0.0, "vocab_size": 30522, "type_vocab_size": 2,
|
args_tiny = {"attention_probs_dropout_prob": 0.0, "hidden_dropout_prob": 0.0, "vocab_size": 30522, "type_vocab_size": 2,
|
||||||
|
|||||||
@@ -3,7 +3,8 @@ import numpy as np
|
|||||||
from tinygrad import Device
|
from tinygrad import Device
|
||||||
from tinygrad.nn.state import get_parameters
|
from tinygrad.nn.state import get_parameters
|
||||||
from tinygrad.nn import optim
|
from tinygrad.nn import optim
|
||||||
from tinygrad.helpers import getenv, CI
|
from tinygrad.helpers import getenv
|
||||||
|
from test.helpers import slow
|
||||||
from extra.training import train
|
from extra.training import train
|
||||||
from extra.models.convnext import ConvNeXt
|
from extra.models.convnext import ConvNeXt
|
||||||
from extra.models.efficientnet import EfficientNet
|
from extra.models.efficientnet import EfficientNet
|
||||||
@@ -38,7 +39,7 @@ class TestTrain(unittest.TestCase):
|
|||||||
train_one_step(model,X,Y)
|
train_one_step(model,X,Y)
|
||||||
check_gc()
|
check_gc()
|
||||||
|
|
||||||
@unittest.skipIf(CI, "slow")
|
@slow
|
||||||
def test_efficientnet(self):
|
def test_efficientnet(self):
|
||||||
model = EfficientNet(0)
|
model = EfficientNet(0)
|
||||||
X = np.zeros((BS,3,224,224), dtype=np.float32)
|
X = np.zeros((BS,3,224,224), dtype=np.float32)
|
||||||
@@ -46,7 +47,7 @@ class TestTrain(unittest.TestCase):
|
|||||||
train_one_step(model,X,Y)
|
train_one_step(model,X,Y)
|
||||||
check_gc()
|
check_gc()
|
||||||
|
|
||||||
@unittest.skipIf(CI, "slow")
|
@slow
|
||||||
def test_vit(self):
|
def test_vit(self):
|
||||||
model = ViT()
|
model = ViT()
|
||||||
X = np.zeros((BS,3,224,224), dtype=np.float32)
|
X = np.zeros((BS,3,224,224), dtype=np.float32)
|
||||||
@@ -54,7 +55,7 @@ class TestTrain(unittest.TestCase):
|
|||||||
train_one_step(model,X,Y)
|
train_one_step(model,X,Y)
|
||||||
check_gc()
|
check_gc()
|
||||||
|
|
||||||
@unittest.skipIf(CI, "slow")
|
@slow
|
||||||
def test_transformer(self):
|
def test_transformer(self):
|
||||||
# this should be small GPT-2, but the param count is wrong
|
# this should be small GPT-2, but the param count is wrong
|
||||||
# (real ff_dim is 768*4)
|
# (real ff_dim is 768*4)
|
||||||
@@ -64,7 +65,7 @@ class TestTrain(unittest.TestCase):
|
|||||||
train_one_step(model,X,Y)
|
train_one_step(model,X,Y)
|
||||||
check_gc()
|
check_gc()
|
||||||
|
|
||||||
@unittest.skipIf(CI, "slow")
|
@slow
|
||||||
def test_resnet(self):
|
def test_resnet(self):
|
||||||
X = np.zeros((BS, 3, 224, 224), dtype=np.float32)
|
X = np.zeros((BS, 3, 224, 224), dtype=np.float32)
|
||||||
Y = np.zeros((BS), dtype=np.int32)
|
Y = np.zeros((BS), dtype=np.int32)
|
||||||
|
|||||||
@@ -2,7 +2,8 @@ import unittest
|
|||||||
import pathlib
|
import pathlib
|
||||||
from examples.whisper import init_whisper, load_file_waveform, transcribe_file, transcribe_waveform
|
from examples.whisper import init_whisper, load_file_waveform, transcribe_file, transcribe_waveform
|
||||||
import examples.mlperf.metrics as metrics
|
import examples.mlperf.metrics as metrics
|
||||||
from tinygrad.helpers import CI, fetch, CPU_LLVM
|
from tinygrad.helpers import fetch
|
||||||
|
from test.helpers import slow
|
||||||
from tinygrad import Device, dtypes
|
from tinygrad import Device, dtypes
|
||||||
from tinygrad.device import is_dtype_supported
|
from tinygrad.device import is_dtype_supported
|
||||||
|
|
||||||
@@ -75,11 +76,11 @@ class TestWhisper(unittest.TestCase):
|
|||||||
def test_transcribe_file1(self):
|
def test_transcribe_file1(self):
|
||||||
self.assertEqual(transcribe_file(self.model, self.enc, TEST_FILE_1), TRANSCRIPTION_1)
|
self.assertEqual(transcribe_file(self.model, self.enc, TEST_FILE_1), TRANSCRIPTION_1)
|
||||||
|
|
||||||
@unittest.skipIf(CI or (Device.DEFAULT == "CPU" and CPU_LLVM), "too many tests for CI")
|
@slow
|
||||||
def test_transcribe_file2(self):
|
def test_transcribe_file2(self):
|
||||||
self.assertEqual(transcribe_file(self.model, self.enc, TEST_FILE_2), TRANSCRIPTION_2)
|
self.assertEqual(transcribe_file(self.model, self.enc, TEST_FILE_2), TRANSCRIPTION_2)
|
||||||
|
|
||||||
@unittest.skipIf(CI or (Device.DEFAULT == "CPU" and CPU_LLVM), "too many tests for CI")
|
@slow
|
||||||
def test_transcribe_batch12(self):
|
def test_transcribe_batch12(self):
|
||||||
waveforms = [load_file_waveform(TEST_FILE_1), load_file_waveform(TEST_FILE_2)]
|
waveforms = [load_file_waveform(TEST_FILE_1), load_file_waveform(TEST_FILE_2)]
|
||||||
transcriptions = transcribe_waveform(self.model, self.enc, waveforms)
|
transcriptions = transcribe_waveform(self.model, self.enc, waveforms)
|
||||||
@@ -95,14 +96,14 @@ class TestWhisper(unittest.TestCase):
|
|||||||
self.assertEqual(TRANSCRIPTION_1, transcriptions[1])
|
self.assertEqual(TRANSCRIPTION_1, transcriptions[1])
|
||||||
|
|
||||||
@unittest.skip("file 3 url is broken")
|
@unittest.skip("file 3 url is broken")
|
||||||
@unittest.skipIf(CI or (Device.DEFAULT == "CPU" and CPU_LLVM), "too long for CI")
|
@slow
|
||||||
def test_transcribe_long(self):
|
def test_transcribe_long(self):
|
||||||
waveform = [load_file_waveform(fetch(TEST_FILE_3_URL))]
|
waveform = [load_file_waveform(fetch(TEST_FILE_3_URL))]
|
||||||
transcription = transcribe_waveform(self.model, self.enc, waveform)
|
transcription = transcribe_waveform(self.model, self.enc, waveform)
|
||||||
self.assertWER(transcription, TRANSCRIPTION_3, 0.085)
|
self.assertWER(transcription, TRANSCRIPTION_3, 0.085)
|
||||||
|
|
||||||
@unittest.skip("file 3 url is broken")
|
@unittest.skip("file 3 url is broken")
|
||||||
@unittest.skipIf(CI or (Device.DEFAULT == "CPU" and CPU_LLVM), "too long for CI")
|
@slow
|
||||||
def test_transcribe_long_no_batch(self):
|
def test_transcribe_long_no_batch(self):
|
||||||
waveforms = [load_file_waveform(fetch(TEST_FILE_3_URL)), load_file_waveform(TEST_FILE_1)]
|
waveforms = [load_file_waveform(fetch(TEST_FILE_3_URL)), load_file_waveform(TEST_FILE_1)]
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,8 @@ from tinygrad.tensor import _to_np_dtype
|
|||||||
from tinygrad.uop.ops import Ops
|
from tinygrad.uop.ops import Ops
|
||||||
from tinygrad.dtype import DType
|
from tinygrad.dtype import DType
|
||||||
from tinygrad.device import is_dtype_supported
|
from tinygrad.device import is_dtype_supported
|
||||||
from tinygrad.helpers import AMX, CI, AMD_LLVM, CPU_LLVM
|
from tinygrad.helpers import AMX, AMD_LLVM, CPU_LLVM
|
||||||
|
from test.helpers import slow
|
||||||
from tinygrad.engine.realize import CompiledRunner, get_program
|
from tinygrad.engine.realize import CompiledRunner, get_program
|
||||||
from tinygrad.codegen.opt import Opt, OptOps, KernelOptError
|
from tinygrad.codegen.opt import Opt, OptOps, KernelOptError
|
||||||
|
|
||||||
@@ -119,7 +120,7 @@ class TestTensorCores(unittest.TestCase):
|
|||||||
helper_tc_ensure_uops_and_opts_count(tc.dims[0], tc.dims[1], tc.dims[2]//8, tc.dtype_in, tc.dtype_out, tc_opt=2, ensure_triggered=False)
|
helper_tc_ensure_uops_and_opts_count(tc.dims[0], tc.dims[1], tc.dims[2]//8, tc.dtype_in, tc.dtype_out, tc_opt=2, ensure_triggered=False)
|
||||||
|
|
||||||
@unittest.skipIf(Device.DEFAULT == "PYTHON", "not generated on EMULATED device")
|
@unittest.skipIf(Device.DEFAULT == "PYTHON", "not generated on EMULATED device")
|
||||||
@unittest.skipIf(CI and Device.DEFAULT in {"AMD"}, "AMD CI is really slow here")
|
@slow
|
||||||
@unittest.skipUnless(Device[Device.DEFAULT].renderer.tensor_cores, "test requires tensor cores")
|
@unittest.skipUnless(Device[Device.DEFAULT].renderer.tensor_cores, "test requires tensor cores")
|
||||||
def test_tensor_cores_multi_reduce(self):
|
def test_tensor_cores_multi_reduce(self):
|
||||||
for tc in Device[Device.DEFAULT].renderer.tensor_cores:
|
for tc in Device[Device.DEFAULT].renderer.tensor_cores:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tinygrad import Tensor, GlobalCounters, dtypes, nn, Device, Variable
|
from tinygrad import Tensor, GlobalCounters, dtypes, nn, Device, Variable
|
||||||
from tinygrad.helpers import CI, Context, getenv
|
from tinygrad.helpers import Context, getenv
|
||||||
from tinygrad.engine.realize import run_schedule
|
from tinygrad.engine.realize import run_schedule
|
||||||
from tinygrad.engine.realize import CompiledRunner, ExecItem, get_program
|
from tinygrad.engine.realize import CompiledRunner, ExecItem, get_program
|
||||||
from tinygrad.uop.ops import Ops
|
from tinygrad.uop.ops import Ops
|
||||||
@@ -143,7 +143,7 @@ class TestIndexing(unittest.TestCase):
|
|||||||
|
|
||||||
def test_llama_embedding(self, noopt=1, op_limit=65536):
|
def test_llama_embedding(self, noopt=1, op_limit=65536):
|
||||||
# llama3 is 128256
|
# llama3 is 128256
|
||||||
vocab_size, embed_size = (10, 3) if CI else (32000, 4096)
|
vocab_size, embed_size = (10, 3)
|
||||||
emb = nn.Embedding(vocab_size, embed_size)
|
emb = nn.Embedding(vocab_size, embed_size)
|
||||||
emb_w = emb.weight.numpy()
|
emb_w = emb.weight.numpy()
|
||||||
x = Tensor([1,2,3,4])
|
x = Tensor([1,2,3,4])
|
||||||
@@ -161,7 +161,7 @@ class TestIndexing(unittest.TestCase):
|
|||||||
# TODO: reshape to match torch, should we do this in nn?
|
# TODO: reshape to match torch, should we do this in nn?
|
||||||
np.testing.assert_allclose(z.numpy().reshape(4, embed_size), torch_z.detach().numpy(), atol=1e-8, rtol=1e-8)
|
np.testing.assert_allclose(z.numpy().reshape(4, embed_size), torch_z.detach().numpy(), atol=1e-8, rtol=1e-8)
|
||||||
# at least the arange is being fused
|
# at least the arange is being fused
|
||||||
def test_llama_embedding_opt(self): self.test_llama_embedding(0, 1_736_704_000 if CI else 5_898_240_000)
|
def test_llama_embedding_opt(self): self.test_llama_embedding(0, 1_736_704_000)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import functools, unittest, ctypes
|
|||||||
|
|
||||||
from tinygrad.device import Device, Buffer
|
from tinygrad.device import Device, Buffer
|
||||||
from tinygrad.tensor import Tensor, _to_np_dtype
|
from tinygrad.tensor import Tensor, _to_np_dtype
|
||||||
from tinygrad.helpers import Context, CI, dedup, from_mv
|
from tinygrad.helpers import Context, dedup, from_mv
|
||||||
from tinygrad.dtype import dtypes
|
from tinygrad.dtype import dtypes
|
||||||
from tinygrad.engine.jit import MultiGraphRunner
|
from tinygrad.engine.jit import MultiGraphRunner
|
||||||
from tinygrad.engine.realize import ExecItem, BufferXfer, get_runner, CompiledRunner
|
from tinygrad.engine.realize import ExecItem, BufferXfer, get_runner, CompiledRunner
|
||||||
@@ -12,8 +12,8 @@ from test.helpers import needs_second_gpu
|
|||||||
|
|
||||||
np.random.seed(1337)
|
np.random.seed(1337)
|
||||||
Tensor.manual_seed(1337)
|
Tensor.manual_seed(1337)
|
||||||
BUF_SIZE = 4096 if CI else 4096 * 128
|
BUF_SIZE = 4096
|
||||||
RUN_CNT = 4 if CI else 32
|
RUN_CNT = 4
|
||||||
|
|
||||||
cached_prgs = {}
|
cached_prgs = {}
|
||||||
def helper_exec_op(device, outbuf, inbufs):
|
def helper_exec_op(device, outbuf, inbufs):
|
||||||
|
|||||||
@@ -2,12 +2,12 @@ import unittest, functools, random
|
|||||||
from tinygrad import Tensor, Device, nn, GlobalCounters, TinyJit, dtypes, Variable
|
from tinygrad import Tensor, Device, nn, GlobalCounters, TinyJit, dtypes, Variable
|
||||||
from tinygrad.device import is_dtype_supported
|
from tinygrad.device import is_dtype_supported
|
||||||
from tinygrad.uop.ops import Ops, UOp
|
from tinygrad.uop.ops import Ops, UOp
|
||||||
from tinygrad.helpers import CI, getenv, prod, Context
|
from tinygrad.helpers import getenv, prod, Context
|
||||||
from tinygrad.nn.state import get_parameters, get_state_dict
|
from tinygrad.nn.state import get_parameters, get_state_dict
|
||||||
from tinygrad.engine.realize import lower_schedule, BufferCopy, CompiledRunner, run_schedule
|
from tinygrad.engine.realize import lower_schedule, BufferCopy, CompiledRunner, run_schedule
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from hypothesis import given, strategies as strat, settings
|
from hypothesis import given, strategies as strat, settings
|
||||||
from test.helpers import REAL_DEV, not_support_multi_device, needs_second_gpu
|
from test.helpers import not_support_multi_device, needs_second_gpu, slow
|
||||||
|
|
||||||
settings.register_profile("my_profile", max_examples=200, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
|
settings.register_profile("my_profile", max_examples=200, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
|
||||||
settings.load_profile("my_profile")
|
settings.load_profile("my_profile")
|
||||||
@@ -420,7 +420,7 @@ class TestMultiTensor(unittest.TestCase):
|
|||||||
np.testing.assert_allclose(y.numpy(), y_shard.numpy(), atol=1e-6, rtol=1e-6)
|
np.testing.assert_allclose(y.numpy(), y_shard.numpy(), atol=1e-6, rtol=1e-6)
|
||||||
|
|
||||||
# NOTE: this is failing on LLVM CI, no idea why. Works locally.
|
# NOTE: this is failing on LLVM CI, no idea why. Works locally.
|
||||||
@unittest.skipIf(CI and REAL_DEV in ("CUDA", "NV", "CPU", "AMD"), "slow, and flaky on CPU")
|
@slow
|
||||||
def test_data_parallel_resnet(self):
|
def test_data_parallel_resnet(self):
|
||||||
from extra.models.resnet import ResNet18
|
from extra.models.resnet import ResNet18
|
||||||
|
|
||||||
@@ -456,7 +456,7 @@ class TestMultiTensor(unittest.TestCase):
|
|||||||
# sometimes there is zeros in these grads... why?
|
# sometimes there is zeros in these grads... why?
|
||||||
np.testing.assert_allclose(grad, shard_grad, atol=1e-5, rtol=1e-5)
|
np.testing.assert_allclose(grad, shard_grad, atol=1e-5, rtol=1e-5)
|
||||||
|
|
||||||
@unittest.skipIf(CI and REAL_DEV in ("CUDA", "NV", "CPU", "AMD"), "slow, and flaky on CPU")
|
@slow
|
||||||
@unittest.skip("TODO: pm_rangeify hangs")
|
@unittest.skip("TODO: pm_rangeify hangs")
|
||||||
def test_data_parallel_resnet_train_step(self):
|
def test_data_parallel_resnet_train_step(self):
|
||||||
from extra.models.resnet import ResNet18
|
from extra.models.resnet import ResNet18
|
||||||
|
|||||||
@@ -4,14 +4,14 @@ import numpy as np
|
|||||||
import torch
|
import torch
|
||||||
from tinygrad import Tensor, Device, TinyJit, dtypes
|
from tinygrad import Tensor, Device, TinyJit, dtypes
|
||||||
from tinygrad.uop.ops import Ops
|
from tinygrad.uop.ops import Ops
|
||||||
from tinygrad.helpers import GlobalCounters, CI, Context
|
from tinygrad.helpers import GlobalCounters, Context
|
||||||
from tinygrad.nn import Conv1d, ConvTranspose1d, Conv2d, ConvTranspose2d, Linear, Embedding
|
from tinygrad.nn import Conv1d, ConvTranspose1d, Conv2d, ConvTranspose2d, Linear, Embedding
|
||||||
from tinygrad.nn import BatchNorm, LayerNorm, LayerNorm2d, GroupNorm, InstanceNorm, RMSNorm, LSTMCell
|
from tinygrad.nn import BatchNorm, LayerNorm, LayerNorm2d, GroupNorm, InstanceNorm, RMSNorm, LSTMCell
|
||||||
from tinygrad.nn.state import load_state_dict
|
from tinygrad.nn.state import load_state_dict
|
||||||
from tinygrad.engine.realize import run_schedule
|
from tinygrad.engine.realize import run_schedule
|
||||||
from test.helpers import not_support_multi_device, needs_second_gpu
|
from test.helpers import not_support_multi_device, needs_second_gpu, slow
|
||||||
|
|
||||||
@unittest.skipIf(CI and Device.DEFAULT in {"CUDA", "NV"}, "slow")
|
@slow
|
||||||
class TestNN(unittest.TestCase):
|
class TestNN(unittest.TestCase):
|
||||||
def test_batchnorm2d(self, training=False, threed=False, track_running_stats=True):
|
def test_batchnorm2d(self, training=False, threed=False, track_running_stats=True):
|
||||||
with Tensor.train(training):
|
with Tensor.train(training):
|
||||||
|
|||||||
@@ -11,8 +11,7 @@ if getenv("TINY_BACKEND"):
|
|||||||
import tinygrad.nn.torch # noqa: F401 # pylint: disable=unused-import
|
import tinygrad.nn.torch # noqa: F401 # pylint: disable=unused-import
|
||||||
torch.set_default_device("tiny")
|
torch.set_default_device("tiny")
|
||||||
|
|
||||||
if CI:
|
warnings.filterwarnings("ignore", message="Non-empty compiler output encountered")
|
||||||
warnings.filterwarnings("ignore", message="Non-empty compiler output encountered")
|
|
||||||
|
|
||||||
FORWARD_ONLY = getenv("FORWARD_ONLY", 0)
|
FORWARD_ONLY = getenv("FORWARD_ONLY", 0)
|
||||||
PRINT_TENSORS = getenv("PRINT_TENSORS", 0)
|
PRINT_TENSORS = getenv("PRINT_TENSORS", 0)
|
||||||
|
|||||||
@@ -3,9 +3,8 @@ import torch
|
|||||||
import unittest
|
import unittest
|
||||||
from tinygrad import Tensor, Device, dtypes
|
from tinygrad import Tensor, Device, dtypes
|
||||||
from tinygrad.nn.optim import Adam, SGD, AdamW, Muon, LAMB
|
from tinygrad.nn.optim import Adam, SGD, AdamW, Muon, LAMB
|
||||||
from tinygrad.helpers import CI
|
|
||||||
from tinygrad.device import is_dtype_supported
|
from tinygrad.device import is_dtype_supported
|
||||||
from test.helpers import needs_second_gpu
|
from test.helpers import needs_second_gpu, slow
|
||||||
|
|
||||||
np.random.seed(1337)
|
np.random.seed(1337)
|
||||||
x_init = np.random.randn(1,4).astype(np.float32)
|
x_init = np.random.randn(1,4).astype(np.float32)
|
||||||
@@ -42,7 +41,7 @@ def step(tensor, optim, steps=1, teeny=False, **kwargs):
|
|||||||
optim.step()
|
optim.step()
|
||||||
return net.x.detach().numpy(), net.W.detach().numpy()
|
return net.x.detach().numpy(), net.W.detach().numpy()
|
||||||
|
|
||||||
@unittest.skipIf(CI and Device.DEFAULT in {"CUDA", "NV"}, "slow")
|
@slow
|
||||||
class TestOptim(unittest.TestCase):
|
class TestOptim(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.old_training = Tensor.training
|
self.old_training = Tensor.training
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import unittest
|
import unittest
|
||||||
from tinygrad import Tensor, nn, Device
|
from tinygrad import Tensor, nn, Device
|
||||||
from tinygrad.helpers import Context, GlobalCounters, CI, getenv, PCONTIG, DEBUG
|
from tinygrad.helpers import Context, GlobalCounters, getenv, PCONTIG, DEBUG
|
||||||
from tinygrad.uop.ops import graph_rewrite, PatternMatcher, UPat, Ops
|
from tinygrad.uop.ops import graph_rewrite, PatternMatcher, UPat, Ops
|
||||||
from tinygrad.codegen.opt import OptOps, Opt
|
from tinygrad.codegen.opt import OptOps, Opt
|
||||||
from tinygrad.renderer.ptx import PTXRenderer
|
from tinygrad.renderer.ptx import PTXRenderer
|
||||||
@@ -153,199 +153,6 @@ class TestPcontig(unittest.TestCase):
|
|||||||
opts += (Opt(OptOps.UPCAST, 4, 4),)
|
opts += (Opt(OptOps.UPCAST, 4, 4),)
|
||||||
self.test_flash_attention(opts)
|
self.test_flash_attention(opts)
|
||||||
|
|
||||||
# *** non CI rangeify tests below this line ***
|
|
||||||
|
|
||||||
N = 256
|
|
||||||
|
|
||||||
@unittest.skipIf(CI, "useless in CI, doesn't test anything")
|
|
||||||
class TestRangeifyOpt(unittest.TestCase):
|
|
||||||
def test_randperm(self):
|
|
||||||
Tensor.randperm(10000).realize()
|
|
||||||
|
|
||||||
def test_one_getitem(self):
|
|
||||||
X = Tensor.empty(10000)
|
|
||||||
sel = Tensor.arange(1000).contiguous().realize()
|
|
||||||
Xsel = X[sel]
|
|
||||||
Tensor.realize(Xsel)
|
|
||||||
|
|
||||||
def test_two_getitem(self):
|
|
||||||
# this is splitting on the child even when it really shouldn't
|
|
||||||
X = Tensor.empty(10000)
|
|
||||||
Y = Tensor.empty(10000)
|
|
||||||
sel = Tensor.arange(1000).contiguous().realize()
|
|
||||||
Xsel, Ysel = X[sel], Y[sel]
|
|
||||||
Tensor.realize(Xsel, Ysel)
|
|
||||||
|
|
||||||
def test_resnetconv(self):
|
|
||||||
conv1 = nn.Conv2d(3, 8, kernel_size=7, stride=2, bias=False, padding=3)
|
|
||||||
conv1.weight.replace(conv1.weight.empty_like())
|
|
||||||
x = Tensor.empty(1, 3, 56, 56)
|
|
||||||
x = conv1(x).pad([1,1,1,1])+1
|
|
||||||
x.realize()
|
|
||||||
|
|
||||||
# CPU=1 NOOPT=1 DEBUG=4 RANGEIFY=1 python3 test/test_rangeify.py TestRangeifyOpt.test_matmul_reshaped
|
|
||||||
def test_matmul_reshaped(self):
|
|
||||||
A = Tensor.empty(N, N)
|
|
||||||
B = Tensor.empty(N, N)
|
|
||||||
(A@B).reshape(N*N).contiguous().realize()
|
|
||||||
|
|
||||||
def test_reduce_reshapes(self):
|
|
||||||
A = Tensor.empty(8,8,8,8).permute(1,0,3,2).flatten()
|
|
||||||
A.sum().realize()
|
|
||||||
|
|
||||||
@unittest.skipIf(CI, "useless in CI, doesn't test anything")
|
|
||||||
class TestRangeify(unittest.TestCase):
|
|
||||||
def test_groupnorm(self):
|
|
||||||
# ranges 1 and 3 are merging
|
|
||||||
x = nn.GroupNorm(32, 128)
|
|
||||||
x(Tensor.empty(1, 128, 64, 64)).realize()
|
|
||||||
|
|
||||||
def test_expand_children(self):
|
|
||||||
A = Tensor.empty(N, N).sum(axis=1)
|
|
||||||
ba = A.expand(N, N)
|
|
||||||
((ba+1).sum(axis=1) + (ba+2).sum(axis=0)).realize()
|
|
||||||
|
|
||||||
def test_partial_contig(self):
|
|
||||||
A = Tensor.empty(64, 64, 64)
|
|
||||||
ret = A.sum(axis=2).contiguous(arg=(1,)).sum(axis=1)
|
|
||||||
ret.realize()
|
|
||||||
|
|
||||||
@unittest.skip("RANGEIFY=0 does nothing")
|
|
||||||
def test_double_gemm_real(self):
|
|
||||||
def go():
|
|
||||||
with Context(DEBUG=0):
|
|
||||||
Tensor.manual_seed(1337)
|
|
||||||
A,B,C = [Tensor.randn(N, N) for _ in range(3)]
|
|
||||||
Tensor.realize(A, B, C)
|
|
||||||
GlobalCounters.reset()
|
|
||||||
return (A@B@C).realize()
|
|
||||||
rng = go()
|
|
||||||
with Context(RANGEIFY=0, DEBUG=2):
|
|
||||||
ref = go()
|
|
||||||
mse = ((rng-ref)**2).sum().item()
|
|
||||||
print(f"mse: {mse}")
|
|
||||||
self.assertLessEqual(mse, 1e-2)
|
|
||||||
|
|
||||||
def test_double_gemm(self):
|
|
||||||
A = Tensor.empty(N, N)
|
|
||||||
B = Tensor.empty(N, N)
|
|
||||||
C = Tensor.empty(N, N)
|
|
||||||
(A@B@C).realize()
|
|
||||||
|
|
||||||
def test_double_gemm_exp(self):
|
|
||||||
A = Tensor.empty(N, N)
|
|
||||||
B = Tensor.empty(N, N)
|
|
||||||
C = Tensor.empty(N, N)
|
|
||||||
(((A@B).exp()@C).exp()).realize()
|
|
||||||
|
|
||||||
def test_double_gemm_exp_child(self):
|
|
||||||
A = Tensor.empty(N, N)
|
|
||||||
B = Tensor.empty(N, N)
|
|
||||||
C = Tensor.empty(N, N)
|
|
||||||
# A@B is used with exp, and also on the sum. this is two kernels now, is this right?
|
|
||||||
ret = A@B
|
|
||||||
((ret.exp()@C)+ret).realize()
|
|
||||||
|
|
||||||
def test_double_gemm_relu(self):
|
|
||||||
A = Tensor.empty(N, N)
|
|
||||||
B = Tensor.empty(N, N)
|
|
||||||
C = Tensor.empty(N, N)
|
|
||||||
(((A@B).relu()@C).relu()).realize()
|
|
||||||
|
|
||||||
def test_double_gemm_relu_half_contig(self):
|
|
||||||
A = Tensor.empty(N, N)
|
|
||||||
B = Tensor.empty(N, N)
|
|
||||||
C = Tensor.empty(N, N)
|
|
||||||
(((A@B).relu().contiguous(arg=(1,))@C).relu()).realize()
|
|
||||||
|
|
||||||
def test_double_gemm_half_contig(self):
|
|
||||||
A = Tensor.empty(N, N)
|
|
||||||
B = Tensor.empty(N, N)
|
|
||||||
C = Tensor.empty(N, N)
|
|
||||||
((A@B).contiguous(arg=(1,))@C).realize()
|
|
||||||
|
|
||||||
def test_double_gemm_contig(self):
|
|
||||||
A = Tensor.empty(N, N)
|
|
||||||
B = Tensor.empty(N, N)
|
|
||||||
C = Tensor.empty(N, N)
|
|
||||||
((A@B).contiguous()@C).realize()
|
|
||||||
|
|
||||||
def test_many_gemm(self):
|
|
||||||
A = Tensor.empty(N, N)
|
|
||||||
B = Tensor.empty(N, N)
|
|
||||||
C = Tensor.empty(N, N)
|
|
||||||
D = Tensor.empty(N, N)
|
|
||||||
E = Tensor.empty(N, N)
|
|
||||||
F = Tensor.empty(N, N)
|
|
||||||
(A@B@C@D@E@F).realize()
|
|
||||||
|
|
||||||
def test_conv2d(self):
|
|
||||||
x = Tensor.empty(1, 4, 32, 32)
|
|
||||||
w1 = Tensor.empty(8, 4, 3, 3)
|
|
||||||
x.conv2d(w1).realize()
|
|
||||||
|
|
||||||
def test_conv2d_elu(self):
|
|
||||||
x = Tensor.empty(1, 4, 32, 32)
|
|
||||||
w1 = Tensor.empty(8, 4, 3, 3)
|
|
||||||
x.conv2d(w1).elu().realize()
|
|
||||||
|
|
||||||
def test_conv2d_t(self):
|
|
||||||
x = Tensor.empty(1, 4, 32, 32)
|
|
||||||
w1 = Tensor.empty(8, 4, 3, 3)
|
|
||||||
(x*2).conv2d(w1).realize()
|
|
||||||
|
|
||||||
def test_double_conv2d(self):
|
|
||||||
x = Tensor.empty(1, 4, 32, 32)
|
|
||||||
w1 = Tensor.empty(8, 4, 3, 3)
|
|
||||||
w2 = Tensor.empty(12, 8, 3, 3)
|
|
||||||
x.conv2d(w1).conv2d(w2).realize()
|
|
||||||
|
|
||||||
def test_resnet_conv2d(self):
|
|
||||||
x = Tensor.empty(1, 8, 32, 32)
|
|
||||||
w1 = Tensor.empty(8, 8, 3, 3)
|
|
||||||
w2 = Tensor.empty(8, 8, 1, 1)
|
|
||||||
x.conv2d(w1).conv2d(w2).realize()
|
|
||||||
|
|
||||||
def test_xception_conv2d(self):
|
|
||||||
# NOTE: this fusion is bad, it's recomputing the inner many times
|
|
||||||
x = Tensor.empty(1, 4, 32, 32)
|
|
||||||
w1 = Tensor.empty(8, 4, 1, 1)
|
|
||||||
w2 = Tensor.empty(8, 1, 3, 3)
|
|
||||||
x.conv2d(w1).conv2d(w2, groups=8).realize()
|
|
||||||
|
|
||||||
def test_conv_maxpool_contig(self): self.test_conv_maxpool(True)
|
|
||||||
def test_conv_maxpool(self, contig=False):
|
|
||||||
GlobalCounters.reset()
|
|
||||||
x = Tensor.empty(32, 16, 64, 64)
|
|
||||||
l1 = nn.Conv2d(16, 16, 3)
|
|
||||||
for p in nn.state.get_parameters(l1): p.replace(Tensor.empty(p.shape))
|
|
||||||
x = l1(x)
|
|
||||||
if contig: x = x.contiguous()
|
|
||||||
x.max_pool2d().realize()
|
|
||||||
|
|
||||||
def test_double_conv2d_half_contig(self):
|
|
||||||
x = Tensor.empty(1, 4, 32, 32)
|
|
||||||
w1 = Tensor.empty(8, 4, 3, 3)
|
|
||||||
w2 = Tensor.empty(12, 8, 3, 3)
|
|
||||||
# NOTE: this contiguous doesn't help
|
|
||||||
x.conv2d(w1).contiguous(arg=(1,)).conv2d(w2).permute(0,2,3,1).contiguous().realize()
|
|
||||||
|
|
||||||
def test_double_conv2d_contig(self):
|
|
||||||
x = Tensor.empty(1, 4, 32, 32)
|
|
||||||
w1 = Tensor.empty(8, 4, 3, 3)
|
|
||||||
w2 = Tensor.empty(12, 8, 3, 3)
|
|
||||||
x.conv2d(w1).contiguous().conv2d(w2).realize()
|
|
||||||
|
|
||||||
def test_transformer_ffn(self):
|
|
||||||
from tinygrad.apps.llm import TransformerBlock
|
|
||||||
from tinygrad import nn
|
|
||||||
blk = TransformerBlock(1024, 4096, 1, 1, 1e-5, head_dim=1024, rope_theta=10000.0)
|
|
||||||
for p in nn.state.get_parameters(blk): p.replace(Tensor.empty(p.shape))
|
|
||||||
|
|
||||||
x = Tensor.empty(128, 1024)
|
|
||||||
out = blk._feed_forward(x)
|
|
||||||
out.realize()
|
|
||||||
|
|
||||||
# contiguous + reduce can support ranges?
|
# contiguous + reduce can support ranges?
|
||||||
|
|
||||||
@unittest.skip("pm_rangeify no longer exists. test this in a different way")
|
@unittest.skip("pm_rangeify no longer exists. test this in a different way")
|
||||||
|
|||||||
@@ -1755,7 +1755,7 @@ class TestSchedule(unittest.TestCase):
|
|||||||
@unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
|
@unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
|
||||||
def test_precompute_freqs_cis(self):
|
def test_precompute_freqs_cis(self):
|
||||||
from extra.models.llama import precompute_freqs_cis
|
from extra.models.llama import precompute_freqs_cis
|
||||||
args = {"dim":32 if CI else 128, "end":2048 if CI else 8192, "theta":10000}
|
args = {"dim":32, "end":2048, "theta":10000}
|
||||||
fused = precompute_freqs_cis(**args)
|
fused = precompute_freqs_cis(**args)
|
||||||
run_schedule(check_schedule(fused, 1))
|
run_schedule(check_schedule(fused, 1))
|
||||||
if getenv("CHECK", 1):
|
if getenv("CHECK", 1):
|
||||||
|
|||||||
@@ -3,7 +3,8 @@ import numpy as np
|
|||||||
from tinygrad import Tensor, Device, dtypes
|
from tinygrad import Tensor, Device, dtypes
|
||||||
from tinygrad.dtype import DType
|
from tinygrad.dtype import DType
|
||||||
from tinygrad.nn.state import safe_load, safe_save, get_state_dict, torch_load
|
from tinygrad.nn.state import safe_load, safe_save, get_state_dict, torch_load
|
||||||
from tinygrad.helpers import Timing, fetch, temp, CI, OSX
|
from tinygrad.helpers import Timing, fetch, temp, OSX
|
||||||
|
from test.helpers import slow
|
||||||
from tinygrad.device import is_dtype_supported
|
from tinygrad.device import is_dtype_supported
|
||||||
|
|
||||||
def compare_weights_both(url):
|
def compare_weights_both(url):
|
||||||
@@ -340,8 +341,8 @@ class TestDiskTensor(unittest.TestCase):
|
|||||||
on_dev = t.to(Device.DEFAULT).realize()
|
on_dev = t.to(Device.DEFAULT).realize()
|
||||||
np.testing.assert_equal(on_dev.numpy(), t.numpy())
|
np.testing.assert_equal(on_dev.numpy(), t.numpy())
|
||||||
|
|
||||||
|
@slow
|
||||||
def test_copy_from_disk_huge(self):
|
def test_copy_from_disk_huge(self):
|
||||||
if CI and not hasattr(Device["DISK"], 'io_uring'): self.skipTest("slow on ci without iouring")
|
|
||||||
|
|
||||||
fn = pathlib.Path(temp("dt_copy_from_disk_huge"))
|
fn = pathlib.Path(temp("dt_copy_from_disk_huge"))
|
||||||
fn.unlink(missing_ok=True)
|
fn.unlink(missing_ok=True)
|
||||||
|
|||||||
@@ -2,7 +2,8 @@ import unittest, math, operator, subprocess, struct
|
|||||||
from tinygrad.tensor import Tensor, dtypes, Device
|
from tinygrad.tensor import Tensor, dtypes, Device
|
||||||
from tinygrad.dtype import DType, DTYPES_DICT, truncate, float_to_fp16, float_to_bf16, _to_np_dtype, least_upper_dtype, least_upper_float
|
from tinygrad.dtype import DType, DTYPES_DICT, truncate, float_to_fp16, float_to_bf16, _to_np_dtype, least_upper_dtype, least_upper_float
|
||||||
from tinygrad.device import is_dtype_supported
|
from tinygrad.device import is_dtype_supported
|
||||||
from tinygrad.helpers import getenv, CI, DEBUG
|
from tinygrad.helpers import getenv, DEBUG
|
||||||
|
from test.helpers import slow
|
||||||
from hypothesis import given, settings, strategies as strat
|
from hypothesis import given, settings, strategies as strat
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
@@ -594,7 +595,7 @@ class TestAutoCastType(unittest.TestCase):
|
|||||||
dtypes.default_float = old_default_float
|
dtypes.default_float = old_default_float
|
||||||
|
|
||||||
@unittest.skipIf(Device.DEFAULT == "PYTHON", "very slow")
|
@unittest.skipIf(Device.DEFAULT == "PYTHON", "very slow")
|
||||||
@unittest.skipIf(CI and Device.DEFAULT == "AMD", "very slow")
|
@slow
|
||||||
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "Binding size is larger than the maximum storage buffer binding size")
|
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "Binding size is larger than the maximum storage buffer binding size")
|
||||||
@unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
|
@unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
|
||||||
def test_mean_half_precision_underflow(self):
|
def test_mean_half_precision_underflow(self):
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
from typing_extensions import Callable
|
from typing_extensions import Callable
|
||||||
import hashlib, random, unittest
|
import hashlib, random, unittest
|
||||||
from tinygrad import Tensor, Device, getenv, dtypes
|
from tinygrad import Tensor, Device, getenv, dtypes
|
||||||
|
from test.helpers import slow
|
||||||
from tinygrad.device import is_dtype_supported
|
from tinygrad.device import is_dtype_supported
|
||||||
from tinygrad.helpers import CI
|
|
||||||
from tinygrad.uop.ops import UOp
|
from tinygrad.uop.ops import UOp
|
||||||
from tinygrad.engine.jit import TinyJit
|
from tinygrad.engine.jit import TinyJit
|
||||||
|
|
||||||
@@ -58,7 +58,7 @@ class TestKeccak(unittest.TestCase):
|
|||||||
self.assertEqual(bytes(Tensor(b"abc").keccak().tolist()),
|
self.assertEqual(bytes(Tensor(b"abc").keccak().tolist()),
|
||||||
bytearray.fromhex("3a985da74fe225b2 045c172d6bd390bd 855f086e3e9d525b 46bfe24511431532"))
|
bytearray.fromhex("3a985da74fe225b2 045c172d6bd390bd 855f086e3e9d525b 46bfe24511431532"))
|
||||||
|
|
||||||
@unittest.skipIf(CI, "times out in ci")
|
@slow
|
||||||
def test_long(self):
|
def test_long(self):
|
||||||
data = b"\x00" * 4
|
data = b"\x00" * 4
|
||||||
self.assertEqual(bytes(Tensor(data).keccak("shake_128").tolist()), hashlib.shake_128(data).digest(16))
|
self.assertEqual(bytes(Tensor(data).keccak("shake_128").tolist()), hashlib.shake_128(data).digest(16))
|
||||||
@@ -74,7 +74,7 @@ class TestKeccak(unittest.TestCase):
|
|||||||
self.assertEqual(bytes(out[1].tolist()), bytearray.fromhex("3a985da74fe225b2 045c172d6bd390bd 855f086e3e9d525b 46bfe24511431532"))
|
self.assertEqual(bytes(out[1].tolist()), bytearray.fromhex("3a985da74fe225b2 045c172d6bd390bd 855f086e3e9d525b 46bfe24511431532"))
|
||||||
self.assertEqual(bytes(out[2].tolist()), bytearray.fromhex("8e0d8f672252acb0 ffc5093db8653b18 1513bf9a2097e737 b4f73533dcaf46df"))
|
self.assertEqual(bytes(out[2].tolist()), bytearray.fromhex("8e0d8f672252acb0 ffc5093db8653b18 1513bf9a2097e737 b4f73533dcaf46df"))
|
||||||
|
|
||||||
@unittest.skipIf(CI, "redundant with test_variable_bs")
|
@slow
|
||||||
def test_variable_bs_jit(self):
|
def test_variable_bs_jit(self):
|
||||||
def f(data):
|
def f(data):
|
||||||
return data.keccak()
|
return data.keccak()
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import ctypes, gzip, unittest, timeit
|
import ctypes, gzip, unittest, timeit
|
||||||
from tinygrad import Variable
|
from tinygrad import Variable
|
||||||
from tinygrad.helpers import Context, ContextVar, argfix, colored, word_wrap, is_numpy_ndarray, CI, mv_address, get_contraction
|
from tinygrad.helpers import Context, ContextVar, argfix, colored, word_wrap, is_numpy_ndarray, mv_address, get_contraction
|
||||||
from tinygrad.helpers import merge_dicts, strip_parens, prod, round_up, fetch, fully_flatten, from_mv, to_mv, polyN, time_to_str, cdiv, cmod, getbits
|
from tinygrad.helpers import merge_dicts, strip_parens, prod, round_up, fetch, fully_flatten, from_mv, to_mv, polyN, time_to_str, cdiv, cmod, getbits
|
||||||
from tinygrad.tensor import Tensor, get_shape
|
from tinygrad.tensor import Tensor, get_shape
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -198,7 +198,7 @@ class TestMemoryview(unittest.TestCase):
|
|||||||
mv[0] = 2
|
mv[0] = 2
|
||||||
assert base[0] == 2
|
assert base[0] == 2
|
||||||
|
|
||||||
@unittest.skipIf(CI, "dangerous for CI, it allocates tons of memory")
|
@unittest.skip("allocates tons of memory")
|
||||||
def test_to_mv(self):
|
def test_to_mv(self):
|
||||||
sizes = [
|
sizes = [
|
||||||
(16, "16 B"),
|
(16, "16 B"),
|
||||||
|
|||||||
@@ -5,7 +5,8 @@ import numpy as np
|
|||||||
|
|
||||||
from tinygrad import Tensor, dtypes, Device, TinyJit
|
from tinygrad import Tensor, dtypes, Device, TinyJit
|
||||||
from tinygrad.device import is_dtype_supported
|
from tinygrad.device import is_dtype_supported
|
||||||
from tinygrad.helpers import CI, all_same, prod
|
from tinygrad.helpers import all_same, prod
|
||||||
|
from test.helpers import slow
|
||||||
|
|
||||||
random.seed(42)
|
random.seed(42)
|
||||||
|
|
||||||
@@ -1140,7 +1141,7 @@ def get_set_tensor(indexed: Tensor, indexer):
|
|||||||
set_tensor = Tensor.randint(set_count, high=set_count).reshape(set_size) #.cast(dtypes.float64)
|
set_tensor = Tensor.randint(set_count, high=set_count).reshape(set_size) #.cast(dtypes.float64)
|
||||||
return set_tensor
|
return set_tensor
|
||||||
|
|
||||||
@unittest.skipIf(CI and Device.DEFAULT in ["CPU", "CL", "METAL", "NV", "AMD"], "slow")
|
@slow
|
||||||
class TestAdvancedIndexing(unittest.TestCase):
|
class TestAdvancedIndexing(unittest.TestCase):
|
||||||
def test_integer_array_indexing(self):
|
def test_integer_array_indexing(self):
|
||||||
# pick a random valid indexer type
|
# pick a random valid indexer type
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ class TestRawShmBuffer(unittest.TestCase):
|
|||||||
assert np.allclose(t.numpy(), t2.numpy())
|
assert np.allclose(t.numpy(), t2.numpy())
|
||||||
s.unlink()
|
s.unlink()
|
||||||
|
|
||||||
@unittest.skipIf(CI, "CI doesn't like big shared memory")
|
@unittest.skip("big shared memory")
|
||||||
def test_e2e_big(self):
|
def test_e2e_big(self):
|
||||||
# bigger than this doesn't work on Linux, maybe this is a limit somewhere?
|
# bigger than this doesn't work on Linux, maybe this is a limit somewhere?
|
||||||
t = Tensor.randn(2048, 128, 8).realize()
|
t = Tensor.randn(2048, 128, 8).realize()
|
||||||
|
|||||||
Reference in New Issue
Block a user