mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-08 22:48:25 -05:00
delete more tests (#12043)
* delete more tests * delete and simplify * flaky on windows * a few more, those remained
This commit is contained in:
@@ -20,12 +20,6 @@ repos:
|
||||
language: system
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
- id: devicetests
|
||||
name: select GPU tests
|
||||
entry: env GPU=1 PYTHONPATH="." python3 -m pytest test/test_uops.py test/test_search.py
|
||||
language: system
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
- id: tests
|
||||
name: subset of tests
|
||||
entry: env PYTHONPATH="." python3 -m pytest -n=4 test/test_ops.py test/test_dtype.py test/test_schedule.py test/test_assign.py
|
||||
|
||||
@@ -6,7 +6,7 @@ from tinygrad.runtime.support.hcq import HCQCompiled, HCQBuffer
|
||||
from tinygrad.runtime.autogen import libc
|
||||
from tinygrad.runtime.support.system import PCIIfaceBase
|
||||
from tinygrad.engine.realize import get_runner, CompiledRunner, get_program
|
||||
from tinygrad.codegen.opt.kernel import Opt, OptOps
|
||||
from tinygrad.codegen.opt import Opt, OptOps
|
||||
from tinygrad import Variable
|
||||
|
||||
MOCKGPU = getenv("MOCKGPU")
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
import unittest, contextlib
|
||||
import unittest
|
||||
import numpy as np
|
||||
from tinygrad import Tensor, GlobalCounters, dtypes, nn, Device, Variable
|
||||
from tinygrad.helpers import CI, Context, getenv
|
||||
from tinygrad.engine.realize import run_schedule
|
||||
from tinygrad.codegen.opt.kernel import Opt, OptOps, Kernel, KernelOptError
|
||||
from tinygrad.codegen.opt import Opt, OptOps
|
||||
from tinygrad.engine.realize import CompiledRunner, ExecItem, get_program
|
||||
from tinygrad.codegen.opt.search import get_kernel_actions
|
||||
from tinygrad.uop.ops import Ops
|
||||
from tinygrad.codegen import apply_rewrites, rewrites_for_views
|
||||
|
||||
class TestArange(unittest.TestCase):
|
||||
def _get_flops(self, N, opts=None):
|
||||
@@ -49,28 +47,6 @@ class TestArange(unittest.TestCase):
|
||||
@unittest.skip("doesn't work yet")
|
||||
def test_complexity_w_local_and_padto(self): return self.test_complexity([Opt(OptOps.LOCAL, 0, 16), Opt(OptOps.PADTO, axis=1, arg=32)])
|
||||
|
||||
def test_all_opts(self, opts=None, exclude=None):
|
||||
k = Kernel(apply_rewrites(Tensor.arange(256).schedule()[-1].ast, rewrites_for_views))
|
||||
if opts is not None:
|
||||
for o in opts: k.apply_opt(o)
|
||||
all_opts_256 = [kk.applied_opts for kk in get_kernel_actions(k, include_0=False).values()]
|
||||
k = Kernel(apply_rewrites(Tensor.arange(2560).schedule()[-1].ast, rewrites_for_views))
|
||||
if opts is not None:
|
||||
for o in opts: k.apply_opt(o)
|
||||
all_opts_2560 = [kk.applied_opts for kk in get_kernel_actions(k, include_0=False).values()]
|
||||
all_opts = [x for x in all_opts_256 if x in all_opts_2560]
|
||||
for opts in all_opts:
|
||||
if exclude is not None and opts[-1] in exclude: continue
|
||||
print(opts)
|
||||
self.test_complexity(opts)
|
||||
def test_all_opts_w_local(self):
|
||||
with contextlib.suppress(KernelOptError):
|
||||
return self.test_all_opts([Opt(OptOps.LOCAL, 0, 16)], [Opt(op=OptOps.PADTO, axis=1, arg=32)])
|
||||
def test_all_opts_w_upcast(self): return self.test_all_opts([Opt(OptOps.UPCAST, 0, 4)])
|
||||
def test_all_opts_w_unroll(self): return self.test_all_opts([Opt(OptOps.UNROLL, 0, 4)], [Opt(op=OptOps.GROUP, axis=0, arg=0)])
|
||||
def test_all_opts_w_upcast_and_unroll(self):
|
||||
return self.test_all_opts([Opt(OptOps.UPCAST, 0, 4), Opt(OptOps.UNROLL, 0, 4)], [Opt(op=OptOps.GROUP, axis=0, arg=0)])
|
||||
|
||||
class TestRand(unittest.TestCase):
|
||||
def test_fused_rand_less_ops(self, noopt=1):
|
||||
GlobalCounters.reset()
|
||||
|
||||
@@ -1,165 +0,0 @@
|
||||
# ruff: noqa: E501
|
||||
import unittest
|
||||
from tinygrad import dtypes
|
||||
from tinygrad.codegen.opt.kernel import Kernel
|
||||
from tinygrad.codegen.opt.search import Opt, OptOps, bufs_from_lin
|
||||
from extra.optimization.helpers import time_linearizer
|
||||
|
||||
# stuff needed to unpack a kernel
|
||||
from tinygrad.uop.ops import UOp, Ops
|
||||
from tinygrad.shape.shapetracker import ShapeTracker
|
||||
from tinygrad.shape.view import View
|
||||
|
||||
def _test_overflow(ast, opts):
|
||||
lin = Kernel(ast)
|
||||
lin.apply_opts(opts)
|
||||
bufs = bufs_from_lin(lin)
|
||||
print(bufs)
|
||||
time_linearizer(lin, bufs)
|
||||
|
||||
# NOTE: if you want these to trigger, set launch bounds on HIP kernels
|
||||
@unittest.skip("unneeded without launch bounds")
|
||||
class TestLinearizerOverflow(unittest.TestCase):
|
||||
def test_overflow_1(self):
|
||||
ast = UOp(Ops.SINK, None, arg=None, src=(
|
||||
UOp(Ops.STORE, None, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(51380224), arg=0, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(64, 1, 64, 112, 112, 1, 1, 1), strides=(802816, 0, 12544, 112, 1, 0, 0, 0), offset=0, mask=None, contiguous=True),)), src=()),
|
||||
UOp(Ops.MAX, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.ADD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.MUL, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.MUL, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.ADD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.REDUCE_AXIS, dtypes.float, arg=(Ops.ADD, (7, 6, 5)), src=(
|
||||
UOp(Ops.MUL, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(9633792), arg=1, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(1, 64, 1, 3, 8, 230, 8, 230), strides=(0, 150528, 0, 50176, 0, 224, 0, 1), offset=-675, mask=((0, 1), (0, 64), (0, 1), (0, 3), (0, 8), (3, 227), (0, 8), (3, 227)), contiguous=False), View(shape=(64, 1, 64, 112, 112, 3, 7, 7), strides=(10156800, 0, 0, 3680, 2, 3385600, 425040, 231), offset=0, mask=None, contiguous=False))), src=()),)),
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(9408), arg=2, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(64, 1, 64, 112, 112, 3, 7, 7), strides=(0, 0, 147, 0, 0, 49, 7, 1), offset=0, mask=None, contiguous=False),)), src=()),)),)),)),
|
||||
x16:=UOp(Ops.CONST, dtypes.float, arg=0.0, src=(
|
||||
x17:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(64, 1, 64, 112, 112, 1, 1, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),)), src=()),)),)),
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(64), arg=3, src=()),
|
||||
x20:=UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(64, 1, 64, 112, 112, 1, 1, 1), strides=(0, 0, 1, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),)), src=()),)),)),
|
||||
UOp(Ops.SQRT, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.MUL, dtypes.float, arg=None, src=(
|
||||
x23:=UOp(Ops.CONST, dtypes.float, arg=1.0, src=(
|
||||
x17,)),
|
||||
UOp(Ops.RECIP, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.ADD, dtypes.float, arg=None, src=(
|
||||
x23,
|
||||
UOp(Ops.CONST, dtypes.float, arg=1e-05, src=(
|
||||
x17,)),)),)),)),)),)),
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(64), arg=4, src=()),
|
||||
x20,)),)),
|
||||
x16,)),)),))
|
||||
opts = [Opt(op=OptOps.LOCAL, axis=3, arg=16), Opt(op=OptOps.LOCAL, axis=2, arg=16), Opt(op=OptOps.UPCAST, axis=1, arg=4), Opt(op=OptOps.UPCAST, axis=0, arg=4), Opt(op=OptOps.UPCAST, axis=2, arg=0)]
|
||||
_test_overflow(ast, opts)
|
||||
|
||||
# From BEAM on hlb_cifar.py
|
||||
def test_overflow_2(self):
|
||||
ast = UOp(Ops.SINK, None, arg=None, src=(
|
||||
UOp(Ops.STORE, None, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(33554432), arg=0, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(512, 1, 64, 32, 32, 1, 1, 1), strides=(65536, 0, 1024, 32, 1, 0, 0, 0), offset=0, mask=None, contiguous=True),)), src=()),
|
||||
UOp(Ops.REDUCE_AXIS, dtypes.float, arg=(Ops.ADD, (7, 6, 5)), src=(
|
||||
UOp(Ops.MUL, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(16777216), arg=1, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(1, 512, 1, 32, 4, 34, 4, 34), strides=(0, 32768, 0, 1024, 0, 32, 0, 1), offset=-33, mask=((0, 1), (0, 512), (0, 1), (0, 32), (0, 4), (1, 33), (0, 4), (1, 33)), contiguous=False), View(shape=(512, 1, 64, 32, 32, 32, 3, 3), strides=(591872, 0, 0, 136, 1, 18496, 4760, 35), offset=0, mask=None, contiguous=False))), src=()),)),
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(18432), arg=2, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(512, 1, 64, 32, 32, 32, 3, 3), strides=(0, 0, 288, 0, 0, 9, 3, 1), offset=0, mask=None, contiguous=False),)), src=()),)),)),)),)),))
|
||||
opts = [Opt(op=OptOps.LOCAL, axis=3, arg=16), Opt(op=OptOps.LOCAL, axis=2, arg=4), Opt(op=OptOps.UPCAST, axis=1, arg=4), Opt(op=OptOps.UPCAST, axis=2, arg=0), Opt(op=OptOps.UPCAST, axis=1, arg=4), Opt(op=OptOps.UNROLL, axis=0, arg=0)]
|
||||
_test_overflow(ast, opts)
|
||||
|
||||
# from BEAM on default simple_conv.py (which is quite large):
|
||||
def test_overflow_3(self):
|
||||
ast = UOp(Ops.SINK, None, arg=None, src=(
|
||||
UOp(Ops.STORE, None, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(33554432), arg=0, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(16, 1, 128, 128, 128, 1, 1, 1), strides=(2097152, 0, 16384, 128, 1, 0, 0, 0), offset=0, mask=None, contiguous=True),)), src=()),
|
||||
UOp(Ops.REDUCE_AXIS, dtypes.float, arg=(Ops.ADD, (7, 6, 5)), src=(
|
||||
UOp(Ops.MUL, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(33554432), arg=1, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(1, 16, 1, 128, 4, 130, 4, 130), strides=(0, 2097152, 0, 16384, 0, 128, 0, 1), offset=-129, mask=((0, 1), (0, 16), (0, 1), (0, 128), (0, 4), (1, 129), (0, 4), (1, 129)), contiguous=False), View(shape=(16, 1, 128, 128, 128, 128, 3, 3), strides=(34611200, 0, 0, 520, 1, 270400, 68120, 131), offset=0, mask=None, contiguous=False))), src=()),)),
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(147456), arg=2, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(16, 1, 128, 128, 128, 128, 3, 3), strides=(0, 0, 1152, 0, 0, 9, 3, 1), offset=0, mask=None, contiguous=False),)), src=()),)),)),)),)),))
|
||||
opts = [Opt(op=OptOps.LOCAL, axis=3, arg=16), Opt(op=OptOps.LOCAL, axis=2, arg=8), Opt(op=OptOps.UPCAST, axis=1, arg=4), Opt(op=OptOps.UPCAST, axis=3, arg=0), Opt(op=OptOps.UPCAST, axis=1, arg=2), Opt(op=OptOps.UPCAST, axis=2, arg=2)]
|
||||
_test_overflow(ast, opts)
|
||||
|
||||
# from BEAM on BS=4 simple_conv.py:
|
||||
def test_overflow_4(self):
|
||||
ast = UOp(Ops.SINK, None, arg=None, src=(
|
||||
UOp(Ops.STORE, None, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(8388608), arg=0, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(4, 1, 128, 128, 128, 1, 1, 1), strides=(2097152, 0, 16384, 128, 1, 0, 0, 0), offset=0, mask=None, contiguous=True),)), src=()),
|
||||
UOp(Ops.REDUCE_AXIS, dtypes.float, arg=(Ops.ADD, (7, 6, 5)), src=(
|
||||
UOp(Ops.MUL, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(8388608), arg=1, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(1, 4, 1, 128, 4, 130, 4, 130), strides=(0, 2097152, 0, 16384, 0, 128, 0, 1), offset=-129, mask=((0, 1), (0, 4), (0, 1), (0, 128), (0, 4), (1, 129), (0, 4), (1, 129)), contiguous=False), View(shape=(4, 1, 128, 128, 128, 128, 3, 3), strides=(34611200, 0, 0, 520, 1, 270400, 68120, 131), offset=0, mask=None, contiguous=False))), src=()),)),
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(147456), arg=2, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(4, 1, 128, 128, 128, 128, 3, 3), strides=(0, 0, 1152, 0, 0, 9, 3, 1), offset=0, mask=None, contiguous=False),)), src=()),)),)),)),)),))
|
||||
opts = [Opt(op=OptOps.UPCAST, axis=3, arg=4), Opt(op=OptOps.LOCAL, axis=3, arg=16), Opt(op=OptOps.UPCAST, axis=1, arg=4), Opt(op=OptOps.LOCAL, axis=2, arg=4), Opt(op=OptOps.UPCAST, axis=1, arg=2), Opt(op=OptOps.UPCAST, axis=2, arg=4)]
|
||||
_test_overflow(ast, opts)
|
||||
|
||||
# from BEAM on BS=2 simple_conv.py:
|
||||
def test_overflow_5(self):
|
||||
ast = UOp(Ops.SINK, None, arg=None, src=(
|
||||
UOp(Ops.STORE, None, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(4194304), arg=0, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(2, 1, 128, 128, 128, 1, 1, 1), strides=(2097152, 0, 16384, 128, 1, 0, 0, 0), offset=0, mask=None, contiguous=True),)), src=()),
|
||||
UOp(Ops.REDUCE_AXIS, dtypes.float, arg=(Ops.ADD, (7, 6, 5)), src=(
|
||||
UOp(Ops.MUL, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(4194304), arg=1, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(1, 2, 1, 128, 4, 130, 4, 130), strides=(0, 2097152, 0, 16384, 0, 128, 0, 1), offset=-129, mask=((0, 1), (0, 2), (0, 1), (0, 128), (0, 4), (1, 129), (0, 4), (1, 129)), contiguous=False), View(shape=(2, 1, 128, 128, 128, 128, 3, 3), strides=(34611200, 0, 0, 520, 1, 270400, 68120, 131), offset=0, mask=None, contiguous=False))), src=()),)),
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(147456), arg=2, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(2, 1, 128, 128, 128, 128, 3, 3), strides=(0, 0, 1152, 0, 0, 9, 3, 1), offset=0, mask=None, contiguous=False),)), src=()),)),)),)),)),))
|
||||
opts = [Opt(op=OptOps.LOCAL, axis=3, arg=16), Opt(op=OptOps.UPCAST, axis=1, arg=4), Opt(op=OptOps.UPCAST, axis=3, arg=0), Opt(op=OptOps.LOCAL, axis=2, arg=2), Opt(op=OptOps.UPCAST, axis=1, arg=2), Opt(op=OptOps.UPCAST, axis=2, arg=2)]
|
||||
_test_overflow(ast, opts)
|
||||
|
||||
# from BEAM on BS=3 simple_conv.py:
|
||||
def test_overflow_6(self):
|
||||
ast = UOp(Ops.SINK, None, arg=None, src=(
|
||||
UOp(Ops.STORE, None, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(6291456), arg=0, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(3, 1, 128, 128, 128, 1, 1, 1), strides=(2097152, 0, 16384, 128, 1, 0, 0, 0), offset=0, mask=None, contiguous=True),)), src=()),
|
||||
UOp(Ops.REDUCE_AXIS, dtypes.float, arg=(Ops.ADD, (7, 6, 5)), src=(
|
||||
UOp(Ops.MUL, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(6291456), arg=1, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(1, 3, 1, 128, 4, 130, 4, 130), strides=(0, 2097152, 0, 16384, 0, 128, 0, 1), offset=-129, mask=((0, 1), (0, 3), (0, 1), (0, 128), (0, 4), (1, 129), (0, 4), (1, 129)), contiguous=False), View(shape=(3, 1, 128, 128, 128, 128, 3, 3), strides=(34611200, 0, 0, 520, 1, 270400, 68120, 131), offset=0, mask=None, contiguous=False))), src=()),)),
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(147456), arg=2, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(3, 1, 128, 128, 128, 128, 3, 3), strides=(0, 0, 1152, 0, 0, 9, 3, 1), offset=0, mask=None, contiguous=False),)), src=()),)),)),)),)),))
|
||||
opts = [Opt(op=OptOps.LOCAL, axis=3, arg=16), Opt(op=OptOps.UPCAST, axis=3, arg=0), Opt(op=OptOps.UPCAST, axis=1, arg=4), Opt(op=OptOps.LOCAL, axis=2, arg=8), Opt(op=OptOps.UPCAST, axis=1, arg=2), Opt(op=OptOps.UPCAST, axis=3, arg=2)]
|
||||
_test_overflow(ast, opts)
|
||||
|
||||
# from BEAM on BS=3 simple_conv.py: (alt)
|
||||
def test_overflow_7(self):
|
||||
ast = UOp(Ops.SINK, None, arg=None, src=(
|
||||
UOp(Ops.STORE, None, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(6291456), arg=0, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(3, 1, 128, 128, 128, 1, 1, 1), strides=(2097152, 0, 16384, 128, 1, 0, 0, 0), offset=0, mask=None, contiguous=True),)), src=()),
|
||||
UOp(Ops.REDUCE_AXIS, dtypes.float, arg=(Ops.ADD, (7, 6, 5)), src=(
|
||||
UOp(Ops.MUL, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(6291456), arg=1, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(1, 3, 1, 128, 4, 130, 4, 130), strides=(0, 2097152, 0, 16384, 0, 128, 0, 1), offset=-129, mask=((0, 1), (0, 3), (0, 1), (0, 128), (0, 4), (1, 129), (0, 4), (1, 129)), contiguous=False), View(shape=(3, 1, 128, 128, 128, 128, 3, 3), strides=(34611200, 0, 0, 520, 1, 270400, 68120, 131), offset=0, mask=None, contiguous=False))), src=()),)),
|
||||
UOp(Ops.LOAD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(147456), arg=2, src=()),
|
||||
UOp(Ops.VIEW, None, arg=ShapeTracker(views=(View(shape=(3, 1, 128, 128, 128, 128, 3, 3), strides=(0, 0, 1152, 0, 0, 9, 3, 1), offset=0, mask=None, contiguous=False),)), src=()),)),)),)),)),))
|
||||
opts = [Opt(op=OptOps.UPCAST, axis=3, arg=4), Opt(op=OptOps.LOCAL, axis=3, arg=16), Opt(op=OptOps.UPCAST, axis=1, arg=4), Opt(op=OptOps.LOCAL, axis=2, arg=8), Opt(op=OptOps.UPCAST, axis=1, arg=2), Opt(op=OptOps.UPCAST, axis=2, arg=4)]
|
||||
_test_overflow(ast, opts)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -2,7 +2,7 @@ import numpy as np
|
||||
import unittest
|
||||
from tinygrad import Tensor
|
||||
from tinygrad.helpers import get_single_element
|
||||
from tinygrad.codegen.opt.kernel import Opt, OptOps
|
||||
from tinygrad.codegen.opt import Opt, OptOps
|
||||
from tinygrad.engine.realize import CompiledRunner, ExecItem, get_program
|
||||
|
||||
class TestOptGemm(unittest.TestCase):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import unittest
|
||||
from tinygrad import Tensor, Device
|
||||
from tinygrad.helpers import RANGEIFY
|
||||
from tinygrad.codegen.opt.kernel import Opt, OptOps
|
||||
from tinygrad.codegen.opt import Opt, OptOps
|
||||
from tinygrad.engine.realize import get_program
|
||||
|
||||
@unittest.skipIf(RANGEIFY>0, "arg is partial contig in rangeify")
|
||||
|
||||
@@ -3,11 +3,9 @@ import numpy as np
|
||||
import unittest
|
||||
from dataclasses import replace
|
||||
from tinygrad import Tensor, Context, Device, dtypes
|
||||
from tinygrad.uop.ops import Ops, UOp # noqa: F401 # pylint: disable=unused-import
|
||||
from tinygrad.codegen.opt.kernel import Kernel, Opt, OptOps
|
||||
from tinygrad.uop.ops import Ops
|
||||
from tinygrad.codegen.opt import Opt, OptOps
|
||||
from tinygrad.engine.realize import CompiledRunner, ExecItem, lower_schedule_item, get_program
|
||||
from tinygrad.codegen.opt.search import bufs_from_lin
|
||||
from tinygrad.shape.shapetracker import ShapeTracker, View # noqa: F401 # pylint: disable=unused-import
|
||||
|
||||
N = 512
|
||||
|
||||
@@ -236,129 +234,5 @@ class TestQuantizeOnnx(unittest.TestCase):
|
||||
opts = [Opt(op=OptOps.UPCAST, axis=0, arg=128), Opt(op=OptOps.UNROLL, axis=0, arg=4)]
|
||||
sexec(out, opts)
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT != "DSP", "only tests for DSP")
|
||||
class TestDSPCache(unittest.TestCase):
|
||||
def test_cache_speed(self):
|
||||
# string becuase this breaks Python language server for syntax highlight for some reason
|
||||
ast = eval("""UOp(Ops.SINK, dtypes.void, arg=None, src=(
|
||||
UOp(Ops.STORE, dtypes.void, arg=None, src=(
|
||||
UOp(Ops.VIEW, dtypes.uchar.ptr(25088), arg=ShapeTracker(views=(View(shape=(1, 28, 28, 32, 1), strides=(0, 896, 32, 1, 0), offset=0, mask=None, contiguous=True),)), src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.uchar.ptr(25088), arg=0, src=()),)),
|
||||
UOp(Ops.CAST, dtypes.uchar, arg=None, src=(
|
||||
UOp(Ops.XOR, dtypes.int, arg=None, src=(
|
||||
UOp(Ops.MAX, dtypes.int, arg=None, src=(
|
||||
UOp(Ops.XOR, dtypes.int, arg=None, src=(
|
||||
UOp(Ops.MAX, dtypes.int, arg=None, src=(
|
||||
UOp(Ops.CAST, dtypes.int, arg=None, src=(
|
||||
UOp(Ops.ADD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.ADD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.MUL, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.ADD, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.REDUCE_AXIS, dtypes.float, arg=(Ops.ADD, (4,)), src=(
|
||||
UOp(Ops.MUL, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.MUL, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.CAST, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.CAST, dtypes.int, arg=None, src=(
|
||||
UOp(Ops.LOAD, dtypes.uchar, arg=None, src=(
|
||||
UOp(Ops.VIEW, dtypes.uchar.ptr(150528), arg=ShapeTracker(views=(View(shape=(1, 28, 28, 32, 192), strides=(0, 5376, 192, 0, 1), offset=0, mask=None, contiguous=False),)), src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.uchar.ptr(150528), arg=1, src=()),)),)),)),)),
|
||||
UOp(Ops.CONST, dtypes.float, arg=0.012368360534310341, src=(
|
||||
x22:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 28, 28, 32, 192), strides=(0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),)), src=()),)),)),
|
||||
UOp(Ops.MUL, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.CAST, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.CAST, dtypes.int, arg=None, src=(
|
||||
UOp(Ops.LOAD, dtypes.char, arg=None, src=(
|
||||
UOp(Ops.VIEW, dtypes.char.ptr(6144), arg=ShapeTracker(views=(View(shape=(32, 48, 4), strides=(4, 128, 1), offset=0, mask=None, contiguous=False), View(shape=(1, 28, 28, 32, 192), strides=(0, 0, 0, 192, 1), offset=0, mask=None, contiguous=False))), src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.char.ptr(6144), arg=2, src=()),)),)),)),)),
|
||||
UOp(Ops.CONST, dtypes.float, arg=0.007441135589033365, src=(
|
||||
x22,)),)),)),)),
|
||||
UOp(Ops.MUL, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.CAST, dtypes.float, arg=None, src=(
|
||||
UOp(Ops.LOAD, dtypes.int, arg=None, src=(
|
||||
UOp(Ops.VIEW, dtypes.int.ptr(32), arg=ShapeTracker(views=(View(shape=(1, 28, 28, 32, 1), strides=(0, 0, 0, 1, 0), offset=0, mask=None, contiguous=False),)), src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(32), arg=3, src=()),)),)),)),
|
||||
UOp(Ops.CONST, dtypes.float, arg=9.203465015161783e-05, src=(
|
||||
x36:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 28, 28, 32, 1), strides=(0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),)), src=()),)),)),)),
|
||||
UOp(Ops.CONST, dtypes.float, arg=33.812857328652136, src=(
|
||||
x36,)),)),
|
||||
UOp(Ops.CONST, dtypes.float, arg=0.4999999, src=(
|
||||
x36,)),)),
|
||||
UOp(Ops.CONST, dtypes.float, arg=136.0, src=(
|
||||
x36,)),)),)),
|
||||
UOp(Ops.CONST, dtypes.int, arg=0, src=(
|
||||
x36,)),)),
|
||||
x41:=UOp(Ops.CONST, dtypes.int, arg=-1, src=(
|
||||
x36,)),)),
|
||||
UOp(Ops.CONST, dtypes.int, arg=-256, src=(
|
||||
x36,)),)),
|
||||
x41,)),)),)),))""")
|
||||
opts = [Opt(op=OptOps.UNROLL, axis=0, arg=8), Opt(op=OptOps.UPCAST, axis=1, arg=32), Opt(op=OptOps.UPCAST, axis=0, arg=4)]
|
||||
with Context(DEVECTORIZE=0, QUANTIZE=1):
|
||||
prg = get_program(ast, opts=opts)
|
||||
|
||||
new_src = """
|
||||
typedef int int32 __attribute__((aligned(128),vector_size(128)));
|
||||
typedef signed char signed_char128 __attribute__((aligned(128),vector_size(128)));
|
||||
typedef unsigned char unsigned_char8 __attribute__((aligned(8),vector_size(8)));
|
||||
typedef unsigned char unsigned_char4 __attribute__((aligned(4),vector_size(4)));
|
||||
typedef unsigned char unsigned_char128 __attribute__((aligned(128),vector_size(128)));
|
||||
__attribute__((noinline)) void r_196_32_4_24_8(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, signed char* restrict __attribute__((align_value(
|
||||
128))) data2, int* restrict __attribute__((align_value(128))) data3) {
|
||||
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||
int32 val0 = *((int32*)((data3+0)));
|
||||
for (int ridx0 = 0; ridx0 < 196; ridx0++) {
|
||||
int32 acc0 = cast0;
|
||||
int32 acc1 = cast0;
|
||||
int32 acc2 = cast0;
|
||||
int32 acc3 = cast0;
|
||||
__builtin_HEXAGON_Y2_dcfetch(data1+ridx0*768);
|
||||
__builtin_HEXAGON_Y2_dcfetch(data1+ridx0*768+192);
|
||||
__builtin_HEXAGON_Y2_dcfetch(data1+ridx0*768+384);
|
||||
__builtin_HEXAGON_Y2_dcfetch(data1+ridx0*768+576);
|
||||
for (int ridx1 = 0; ridx1 < 24; ridx1++) {
|
||||
signed_char128 val1 = *((signed_char128*)((data2+(ridx1<<8))));
|
||||
signed_char128 val2 = *((signed_char128*)((data2+((1+(ridx1<<1))<<7))));
|
||||
|
||||
int alu0 = ((ridx0*768)+(ridx1<<3));
|
||||
|
||||
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu0)));
|
||||
__builtin_HEXAGON_Y2_dcfetch(((data1+alu0)+16));
|
||||
unsigned_char8 val4 = *((unsigned_char8*)((data1+(alu0+192))));
|
||||
__builtin_HEXAGON_Y2_dcfetch(((data1+(alu0+192))+16));
|
||||
unsigned_char8 val5 = *((unsigned_char8*)((data1+(alu0+384))));
|
||||
__builtin_HEXAGON_Y2_dcfetch(((data1+(alu0+384))+16));
|
||||
unsigned_char8 val6 = *((unsigned_char8*)((data1+(alu0+576))));
|
||||
__builtin_HEXAGON_Y2_dcfetch(((data1+(alu0+576))+16));
|
||||
|
||||
unsigned_char4 alu5 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3);
|
||||
unsigned_char4 alu6 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3);
|
||||
unsigned_char4 alu7 = __builtin_shufflevector(val5, val5, 0, 1, 2, 3);
|
||||
unsigned_char4 alu8 = __builtin_shufflevector(val6, val6, 0, 1, 2, 3);
|
||||
acc0 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(acc0, val1, (*((unsigned int*)&alu5)));
|
||||
acc1 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(acc1, val1, (*((unsigned int*)&alu6)));
|
||||
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val1, (*((unsigned int*)&alu7)));
|
||||
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val1, (*((unsigned int*)&alu8)));
|
||||
|
||||
unsigned_char4 alu9 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7);
|
||||
unsigned_char4 alu10 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7);
|
||||
unsigned_char4 alu11 = __builtin_shufflevector(val5, val5, 4, 5, 6, 7);
|
||||
unsigned_char4 alu12 = __builtin_shufflevector(val6, val6, 4, 5, 6, 7);
|
||||
acc0 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(acc0, val2, (*((unsigned int*)&alu9)));
|
||||
acc1 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(acc1, val2, (*((unsigned int*)&alu10)));
|
||||
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val2, (*((unsigned int*)&alu11)));
|
||||
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val2, (*((unsigned int*)&alu12)));
|
||||
}
|
||||
unsigned_char128 alu18 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((((acc3+val0)*203)+32767)/65536)+136), (((((acc2+val0)*203)+32767)/65536)+136)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((((acc1+val0)*203)+32767)/65536)+136), (((((acc0+val0)*203)+32767)/65536)+136)));
|
||||
*((unsigned_char128*)((data0+(ridx0<<7)))) = alu18;
|
||||
}
|
||||
}
|
||||
"""
|
||||
prg = replace(prg, src=new_src+prg.src.split("/* DSP boilerplate */ ")[1])
|
||||
rt = CompiledRunner(prg)
|
||||
#Device.default.compiler.disassemble(rt.lib)
|
||||
ei = ExecItem(rt, bufs_from_lin(Kernel(ast)))
|
||||
tm = ei.run(wait=True)
|
||||
print(f"final time {tm*1e6:.2f} us")
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
import unittest
|
||||
|
||||
from tinygrad.codegen.opt.kernel import Opt, OptOps, Kernel
|
||||
from tinygrad.codegen.opt.search import bufs_from_lin, actions, beam_search
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.helpers import Context, GlobalCounters
|
||||
from tinygrad.engine.realize import capturing
|
||||
|
||||
class TestBEAM(unittest.TestCase):
|
||||
def test_dynamic_beam(self):
|
||||
# TODO: make this infra globally usable
|
||||
class Capture:
|
||||
def __init__(self): self.captured = []
|
||||
def add(self, x): self.captured.append(x)
|
||||
|
||||
capturing.append(Capture())
|
||||
kernel_count = GlobalCounters.kernel_count
|
||||
with Context(BEAM=1): Tensor.zeros(16).contiguous().realize()
|
||||
assert GlobalCounters.kernel_count == kernel_count + 1
|
||||
k_beam_1 = capturing[0].captured
|
||||
capturing.clear()
|
||||
|
||||
capturing.append(Capture())
|
||||
kernel_count = GlobalCounters.kernel_count
|
||||
with Context(BEAM=0): Tensor.zeros(16).contiguous().realize()
|
||||
assert GlobalCounters.kernel_count == kernel_count + 1
|
||||
k_beam_0 = capturing[0].captured
|
||||
capturing.clear()
|
||||
self.assertNotEqual(k_beam_0[-1].prg.p.src, k_beam_1[-1].prg.p.src)
|
||||
|
||||
def test_get_kernel_actions_dedup(self):
|
||||
from test.test_linearizer import helper_realized_ast
|
||||
from tinygrad.codegen.opt.search import get_kernel_actions
|
||||
a = Tensor.empty(4, 3)
|
||||
b = Tensor.empty(3)
|
||||
realized_ast, _ = helper_realized_ast(a @ b)
|
||||
candidates = [
|
||||
Opt(op=OptOps.UPCAST, axis=0, arg=0), Opt(op=OptOps.UPCAST, axis=0, arg=4),
|
||||
Opt(op=OptOps.LOCAL, axis=0, arg=0), Opt(op=OptOps.LOCAL, axis=0, arg=4),
|
||||
Opt(op=OptOps.UNROLL, axis=0, arg=0), Opt(op=OptOps.UNROLL, axis=0, arg=3),
|
||||
Opt(op=OptOps.GROUP, axis=0, arg=0), Opt(op=OptOps.GROUP, axis=0, arg=3),
|
||||
Opt(op=OptOps.GROUPTOP, axis=0, arg=0), Opt(op=OptOps.GROUPTOP, axis=0, arg=3),
|
||||
]
|
||||
lins = get_kernel_actions(Kernel(realized_ast), include_0=False, candidates=candidates).values()
|
||||
|
||||
# ensure amt=0 are not duplicated
|
||||
assert all(len(x.applied_opts) == 1 for x in lins)
|
||||
kernel_actions = [x.applied_opts[0] for x in lins]
|
||||
assert Opt(OptOps.UPCAST, axis=0, arg=4) not in kernel_actions, "did not de-dup UPCAST"
|
||||
assert Opt(OptOps.LOCAL, axis=0, arg=4) not in kernel_actions, "did not de-dup LOCAL"
|
||||
assert Opt(OptOps.UNROLL, axis=0, arg=3) not in kernel_actions, "did not de-dup UNROLL"
|
||||
assert Opt(OptOps.GROUP, axis=0, arg=3) not in kernel_actions, "did not de-dup GROUP"
|
||||
assert Opt(OptOps.GROUPTOP, axis=0, arg=3) not in kernel_actions, "did not de-dup GROUPTOP"
|
||||
|
||||
def test_get_kernel_actions_preserves_actions_state(self):
|
||||
from test.test_linearizer import helper_realized_ast
|
||||
from tinygrad.codegen.opt.search import get_kernel_actions
|
||||
a = Tensor.rand(16, 16)
|
||||
b = Tensor.rand(16, 16)
|
||||
realized_ast, _ = helper_realized_ast(a @ b)
|
||||
actions_before = actions.copy()
|
||||
get_kernel_actions(Kernel(realized_ast))
|
||||
actions_after = actions.copy()
|
||||
assert actions_after == actions_before, "actions state was not preserved"
|
||||
|
||||
def test_beam_unnamed_kernels(self):
|
||||
from test.test_linearizer import push_views
|
||||
a = Tensor.rand(100)
|
||||
b = Tensor.rand(100)
|
||||
si = (a+b).schedule()[-1]
|
||||
lin = Kernel(push_views(si.ast))
|
||||
bufs = bufs_from_lin(lin)
|
||||
# TODO: beam should have better instrumentation so we don't have to check this indirect thing
|
||||
kcount = len(Kernel.kernel_cnt)
|
||||
beam_search(lin, bufs, 3, disable_cache=True)
|
||||
self.assertEqual(kcount, len(Kernel.kernel_cnt))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -14,7 +14,7 @@ from tinygrad.engine.realize import CompiledRunner, get_program
|
||||
from tinygrad.codegen import full_rewrite
|
||||
from tinygrad.uop.symbolic import sym
|
||||
from tinygrad.device import is_dtype_supported
|
||||
from tinygrad.codegen.opt.kernel import Opt, OptOps
|
||||
from tinygrad.codegen.opt import Opt, OptOps
|
||||
|
||||
def to_uops_list(u:list[UOp], opts=None, skip_check=False) -> list[UOp]: return full_rewrite(UOp.sink(*u), opts)
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ from tinygrad.renderer import Estimates
|
||||
from tinygrad.codegen import full_rewrite
|
||||
from tinygrad.uop.ops import Ops, UOp
|
||||
from tinygrad.dtype import dtypes
|
||||
from tinygrad.codegen.opt.kernel import Opt, OptOps, KernelOptError
|
||||
from tinygrad.codegen.opt import Opt, OptOps, KernelOptError
|
||||
from tinygrad.device import Device
|
||||
|
||||
def flops_mem(uops, ignore_indexing=False):
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import unittest
|
||||
import unittest, sys
|
||||
import numpy as np
|
||||
from tinygrad import Tensor, GlobalCounters, dtypes, Context, nn
|
||||
from tinygrad.helpers import CI, Profiling, WINO
|
||||
|
||||
@unittest.skipIf(sys.platform.startswith("win"), "flaky on Windows")
|
||||
class TestWinogradClose(unittest.TestCase):
|
||||
def test_close(self):
|
||||
inp = Tensor.rand(1, 16, 16, 16)
|
||||
@@ -18,6 +19,7 @@ class TestWinogradClose(unittest.TestCase):
|
||||
test = conv(inp).realize()
|
||||
np.testing.assert_allclose(cmp.numpy(), test.numpy(), atol=1e-5)
|
||||
|
||||
@unittest.skipIf(sys.platform.startswith("win"), "flaky on Windows")
|
||||
class TestWinograd(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.old = WINO.value
|
||||
|
||||
Reference in New Issue
Block a user