mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-10 23:48:01 -05:00
@@ -521,7 +521,7 @@ class TestLinearizer(unittest.TestCase):
|
||||
first_x = UOp(Ops.LOAD, dtypes.float, (g1, x.lazydata.st.reshape((3, 27, 1, 32)).expand((3, 27, 32, 32)).to_uop()))
|
||||
first_reduce = UOp(Ops.REDUCE_AXIS, dtypes.float, (first_x,), (Ops.ADD, (3,)))
|
||||
neg_mean = first_reduce * ast_const(dtypes.float, -0.03125, (3, 27, 32, 1))
|
||||
# store = UOp(UOps.STORE, src=(g0, ShapeTracker.from_shape((3, 27, 32, 1)).to_uop(), mean))
|
||||
# store = UOp(Ops.STORE, src=(g0, ShapeTracker.from_shape((3, 27, 32, 1)).to_uop(), mean))
|
||||
# verify_lazyop(store)
|
||||
second_x = UOp(Ops.LOAD, dtypes.float, (g1, x.lazydata.st.reshape((3, 27, 32, 1)).to_uop()))
|
||||
squares = (second_x+neg_mean)*(second_x+neg_mean)
|
||||
@@ -854,7 +854,7 @@ class TestLinearizer(unittest.TestCase):
|
||||
ranges = [i for i,u in enumerate(lin.uops) if u.op is Ops.RANGE]
|
||||
assert len(ranges) == 1 # NOTE: it collapses now
|
||||
# RANGE -> LOAD -> RANGE -> ASSIGN
|
||||
#assert any(x.op is UOps.LOAD for x in lin.uops[ranges[0]:ranges[1]])
|
||||
#assert any(x.op is Ops.LOAD for x in lin.uops[ranges[0]:ranges[1]])
|
||||
|
||||
def test_three_nested_range(self):
|
||||
a = Tensor.randn(2, ).realize()
|
||||
@@ -865,7 +865,7 @@ class TestLinearizer(unittest.TestCase):
|
||||
# RANGE -> RANGE -> LOAD -> RANGE -> ASSIGN
|
||||
# NOTE: nothing should toposort between the first two ranges
|
||||
#assert ranges[0]+1 == ranges[1]
|
||||
#assert any(x.op is UOps.LOAD for x in lin.uops[ranges[1]:ranges[2]])
|
||||
#assert any(x.op is Ops.LOAD for x in lin.uops[ranges[1]:ranges[2]])
|
||||
|
||||
def test_two_nested_range_alt_indexing(self):
|
||||
a = Tensor([2, 2]).realize()
|
||||
@@ -895,14 +895,14 @@ class TestLinearizer(unittest.TestCase):
|
||||
assert len(ranges) == 1 # NOTE: it collapses now
|
||||
#if getenv("PTX"):
|
||||
# LOAD -> RANGE -> CAST -> ALU -> ALU -> LOAD -> ALU -> RANGE -> ALU -> ASSIGN
|
||||
# assert lin.uops[ranges[0]-2].op is UOps.LOAD
|
||||
# assert lin.uops[ranges[0]-2].op is Ops.LOAD
|
||||
# assert ranges[1] == ranges[0]+6
|
||||
# assert [x.op for x in lin.uops[ranges[1]-2:ranges[1]]] == [UOps.LOAD, UOps.ALU]
|
||||
# assert [x.op for x in lin.uops[ranges[1]-2:ranges[1]]] == [Ops.LOAD, Ops.ALU]
|
||||
# LOAD -> RANGE -> LOAD -> ALU -> RANGE -> ASSIGN
|
||||
#else:
|
||||
# assert lin.uops[ranges[0]-2].op is UOps.LOAD
|
||||
# assert lin.uops[ranges[0]-2].op is Ops.LOAD
|
||||
# assert ranges[1] == ranges[0]+3
|
||||
# assert [x.op for x in lin.uops[ranges[1]-2:ranges[1]]] == [UOps.LOAD, UOps.ALU]
|
||||
# assert [x.op for x in lin.uops[ranges[1]-2:ranges[1]]] == [Ops.LOAD, Ops.ALU]
|
||||
|
||||
def test_range_outer_op_after_phi(self):
|
||||
a = Tensor.randn(4, 1).realize()
|
||||
@@ -1306,7 +1306,7 @@ class TestLinearizer(unittest.TestCase):
|
||||
# check that the float4 cast collapses
|
||||
store_vals = [u.src[-1] for u in k.uops if u.op is Ops.STORE]
|
||||
for val in store_vals:
|
||||
assert val.dtype == dtypes.float.vec(4) # and val.op is not UOps.VECTORIZE
|
||||
assert val.dtype == dtypes.float.vec(4) # and val.op is not Ops.VECTORIZE
|
||||
|
||||
@unittest.skipUnless(Device[Device.DEFAULT].renderer.has_local, "test requires locals")
|
||||
@unittest.skipUnless(Device[Device.DEFAULT].renderer.has_shared, "test requires shared")
|
||||
@@ -1345,7 +1345,7 @@ class TestLinearizer(unittest.TestCase):
|
||||
barrier = [u for u in k.uops if u.op is Ops.BARRIER][0]
|
||||
# check that the float4 cast collapses for all stores
|
||||
for store in local_stores+global_stores:
|
||||
assert store.src[-1].dtype.count > 1 # and store.src[2].op is not UOps.VECTORIZE
|
||||
assert store.src[-1].dtype.count > 1 # and store.src[2].op is not Ops.VECTORIZE
|
||||
# # check the children's vins
|
||||
# TODO: src ALU are not the same, should it?
|
||||
# assert barrier.src == tuple(local_stores)
|
||||
@@ -1362,7 +1362,7 @@ class TestLinearizer(unittest.TestCase):
|
||||
|
||||
# the float4 value stores directly in lds and we skip upcast
|
||||
self.assertEqual(stores[0].src[-1].dtype, dtypes.float.vec(4))
|
||||
#assert stores[0].src[-1].op is not UOps.VECTORIZE
|
||||
#assert stores[0].src[-1].op is not Ops.VECTORIZE
|
||||
|
||||
# the global store doesn't change
|
||||
assert stores[1].src[-1].dtype == dtypes.float
|
||||
|
||||
@@ -117,7 +117,7 @@ class TestLinearizerFailures(unittest.TestCase):
|
||||
ast_const(dtypes.int, 10, st_src=(
|
||||
UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(10, 1), strides=(0, 0), offset=0, mask=None, contiguous=False),)), src=()),)),)),)),))
|
||||
opts = [Opt(op=OptOps.UPCAST, axis=0, arg=2), Opt(op=OptOps.UPCAST, axis=0, arg=0)]
|
||||
# COMPILE FAILED, KeyError: UOps.CONST
|
||||
# COMPILE FAILED, KeyError: Ops.CONST
|
||||
helper_test_lin(Kernel(ast), opts, failed_platforms=[])
|
||||
|
||||
def test_failure_7(self):
|
||||
@@ -804,7 +804,7 @@ class TestLinearizerFailures(unittest.TestCase):
|
||||
helper_test_lin(Kernel(ast), opts=opts, failed_platforms=[], atol=0.1, rtol=0.05)
|
||||
|
||||
def test_failure_33(self):
|
||||
# UOps.UNMUL left after linearize
|
||||
# Ops.UNMUL left after linearize
|
||||
ast = UOp(Ops.SINK, dtypes.void, arg=None, src=(
|
||||
UOp(Ops.STORE, dtypes.void, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=0, src=()),
|
||||
@@ -868,7 +868,7 @@ class TestLinearizerFailures(unittest.TestCase):
|
||||
|
||||
# from world fuzz_linearizer: PYTHONPATH=. METAL=1 FUZZ_ALL_ACTIONS=1 DEPTH=1 FUZZ_N=100 FUZZ_NTH=84 python3 ./test/external/fuzz_linearizer.py
|
||||
def test_failure_36(self):
|
||||
# UOps.UNMUL left after linearize
|
||||
# Ops.UNMUL left after linearize
|
||||
ast = UOp(Ops.SINK, dtypes.void, arg=None, src=(
|
||||
UOp(Ops.STORE, dtypes.void, arg=None, src=(
|
||||
UOp(Ops.DEFINE_GLOBAL, dtypes.uchar.ptr(), arg=0, src=()),
|
||||
|
||||
@@ -325,8 +325,8 @@ class Kernel:
|
||||
-1: iterates through all available tensor cores in order and uses the first one that matches the requirements (dims and dtypes)
|
||||
[0-N]: uses only the n'th tensor core available; useful for search
|
||||
tc_opt -- controls which kinds of kernels may be eligible for tensor cores application (default 2 during BEAM, 0 otherwise)
|
||||
0: applies to only kernels with a single reduce axis and direct UOps.LOAD into Ops.MUL
|
||||
1: allows kernels with multiple reduce axes and also multiplication of UOps.CAST'd buffers
|
||||
0: applies to only kernels with a single reduce axis and direct Ops.LOAD into Ops.MUL
|
||||
1: allows kernels with multiple reduce axes and also multiplication of Ops.CAST'd buffers
|
||||
2: allows kernels with M, N, K axes that are not multiples of the tensor core dimensions by applying padding those axes as needed
|
||||
"""
|
||||
if tc_select is None: tc_select = TC_SELECT.value
|
||||
|
||||
@@ -638,7 +638,7 @@ class UOp(MathTrait, metaclass=UOpMetaClass):
|
||||
if self.op is Ops.RANGE: return self.src[0].vmin, (self.src[1]-1).vmax
|
||||
if self.op is Ops.BIND: return self.src[0]._min_max # ignore the bound value
|
||||
if self.op in {Ops.UNROLL, Ops.VECTORIZE}: return min(x.vmin for x in self.src), max(x.vmax for x in self.src)
|
||||
# TODO: UOps.SPECIAL is UOps.DEFINE_VAR
|
||||
# TODO: Ops.SPECIAL is Ops.DEFINE_VAR
|
||||
if self.op is Ops.SPECIAL: return 0, self.arg[1]-1 if isinstance(self.arg[1], int) else self.arg[1].vmax
|
||||
if self.op is Ops.CONST: return self.arg, self.arg
|
||||
if self.op is Ops.VCONST: return (min(self.arg), max(self.arg))
|
||||
|
||||
@@ -121,8 +121,8 @@ class Renderer:
|
||||
has_local: bool = True
|
||||
has_shared: bool = True
|
||||
# NOTE: these two should be in (x,y,z) order to match the max_sizes argument in get_grouped_dims
|
||||
global_max: Optional[tuple[int, ...]] = (0x8FFFFFFF,) * (3) # TODO: UOps.SPECIAL int32 indexes right now
|
||||
local_max: Optional[tuple[int, ...]] = (0x8FFFFFFF,) * (3) # TODO: UOps.SPECIAL int32 indexes right now
|
||||
global_max: Optional[tuple[int, ...]] = (0x8FFFFFFF,) * (3) # TODO: Ops.SPECIAL int32 indexes right now
|
||||
local_max: Optional[tuple[int, ...]] = (0x8FFFFFFF,) * (3) # TODO: Ops.SPECIAL int32 indexes right now
|
||||
shared_max: int = 32768
|
||||
tensor_cores: list[TensorCore] = []
|
||||
extra_matcher: Optional[PatternMatcher] = None
|
||||
|
||||
@@ -108,7 +108,7 @@ spec = PatternMatcher([
|
||||
(UPat(Ops.BARRIER, dtypes.void, src=UPat(Ops.STORE, allow_any_len=True)), lambda: True), # NOTE: all pointers must be local
|
||||
|
||||
# NOTE: for testing, we let sinks be anything
|
||||
#(UPat(UOps.SINK, src=UPat(UOps.STORE)), lambda: True),
|
||||
#(UPat(Ops.SINK, src=UPat(Ops.STORE)), lambda: True),
|
||||
(UPat(Ops.SINK, dtypes.void), lambda: True),
|
||||
(UPat((Ops.NOOP, Ops.CUSTOM)), lambda: True),
|
||||
|
||||
|
||||
Reference in New Issue
Block a user