rename ops to have unique names (#7522)

This commit is contained in:
George Hotz
2024-11-04 17:09:45 +08:00
committed by GitHub
parent 9a7cc04843
commit 0c19b6298b
9 changed files with 18 additions and 15 deletions

View File

@@ -50,7 +50,7 @@ def fuzz_schedule(outs:List[LazyBuffer]):
rawbufs: Dict[LazyBuffer, Buffer] = {}
for lsi in ts:
for out in lsi.outputs:
base = rawbufs[lsi.inputs[0]].base if out.op is MetaOps.VIEW else None
base = rawbufs[lsi.inputs[0]].base if out.op is MetaOps.BUFFER_VIEW else None
rawbufs[out] = Buffer(out.buffer.device, out.buffer.size, out.buffer.dtype, base=base)
if out.op is MetaOps.ASSIGN: rawbufs[out].ensure_allocated().copyin(prerealized[out])
for x in lsi.inputs:

View File

@@ -144,13 +144,15 @@ class TestMultiTensor(unittest.TestCase):
O = X.shrink(((0, 2), None)) * W.shrink(((0, 2), None)) < 2
np.testing.assert_allclose(O.numpy(), X.numpy()[0:2]*W.numpy()[0:2] < 2)
@given(strat.sampled_from((4, 5)), strat.sampled_from((devices_2, devices_3)), strat.sampled_from((ReduceOps.SUM, ReduceOps.PROD, ReduceOps.MAX)),
@given(strat.sampled_from((4, 5)), strat.sampled_from((devices_2, devices_3)),
strat.sampled_from((ReduceOps.SUM, ReduceOps.PROD, ReduceOps.REDUCE_MAX)),
strat.sampled_from((None, 0, 1)), strat.sampled_from((None, 0, 1)), strat.sampled_from((1, 0, -1)))
def test_simple_reduce(self, N, devices, rop, shard_axis, reduce_axis, sign):
X = Tensor.rand(N*N).reshape(N, N).mul(sign)
n = X.numpy()
X.shard_(devices, shard_axis)
f = {ReduceOps.SUM: lambda x: x.sum(reduce_axis), ReduceOps.PROD: lambda x: x.prod(reduce_axis), ReduceOps.MAX: lambda x: x.max(reduce_axis)}[rop]
f = {ReduceOps.SUM: lambda x: x.sum(reduce_axis), ReduceOps.PROD: lambda x: x.prod(reduce_axis),
ReduceOps.REDUCE_MAX: lambda x: x.max(reduce_axis)}[rop]
fX = f(X)
fn = f(n)
np.testing.assert_allclose(fX.numpy(), fn, rtol=1e-6, atol=1e-6)

View File

@@ -1495,7 +1495,7 @@ class TestIndexing(unittest.TestCase):
def test_arange_view_op(self):
a = Tensor.arange(12).reshape(4, 3).shrink(((1, 2), (1, 3))).contiguous()
assert isinstance(a.lazydata, LazyBuffer)
self.assertIs(a.lazydata.base.op, MetaOps.VIEW)
self.assertIs(a.lazydata.base.op, MetaOps.BUFFER_VIEW)
self.check_schedule(a, 1)
np.testing.assert_equal(a.numpy(), [[4, 5]])

View File

@@ -48,7 +48,7 @@ class TestVerifyAST(unittest.TestCase):
def test_no_implicit_broadcasting(self):
bufs = [UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), (), i) for i in range(2)]
a = UOp(Ops.LOAD, dtypes.float, (bufs[1], ShapeTracker.from_shape((4, 32)).to_uop()))
b = a + UOp(Ops.REDUCE_AXIS, dtypes.float, (a,), (ReduceOps.MAX, (1,)))
b = a + UOp(Ops.REDUCE_AXIS, dtypes.float, (a,), (ReduceOps.REDUCE_MAX, (1,)))
st = UOp(Ops.STORE, dtypes.void, (bufs[0], ShapeTracker.from_shape((4, 32)).to_uop(), b))
with self.assertRaises(InvalidASTException): helper_test_verify_ast(st)

View File

@@ -151,7 +151,7 @@ def get_realizes(outs:List[LazyBuffer], ctx) -> Tuple[List[List[UOp]], Dict[Buff
for r in reduce_of_const:
group = {tr:None for tr,rop in reduce_for_op.items() if rop is r}
if any(tr.forced_realize for tr in group) or any(x.base in group for x in outs): continue
kernel_children = {c for tr in group for c in children[tr] if c.op not in {MetaOps.COPY, MetaOps.VIEW}}
kernel_children = {c for tr in group for c in children[tr] if c.op not in {MetaOps.COPY, MetaOps.BUFFER_VIEW}}
if len(kernel_children) == 0: continue
for tr in group:
del realizes[tr]

View File

@@ -34,7 +34,7 @@ class LazyBuffer(MathTrait):
self.op, self.arg, self.srcs = op, arg, srcs # this is a UOp, except the src is LazyBuffers and not UOps
assert self.op is not MetaOps.ASSIGN or srcs[0].base.realized is not None, "assign target must be realized"
if self.op is MetaOps.VIEW:
if self.op is MetaOps.BUFFER_VIEW:
# some LazyBuffers can be processed with only a view, no AST required
self.buffer: Buffer = srcs[0].base.buffer.view(st.size, self.dtype, srcs[0].st.views[0].offset * srcs[0].dtype.itemsize)
else:
@@ -89,7 +89,7 @@ class LazyBuffer(MathTrait):
def contiguous(self, allow_buffer_view=True):
if not self.st.contiguous or self.size != self.base.size or self.is_unrealized_const():
ret = self.alu(MetaOps.VIEW) if allow_buffer_view and self.can_view() else self.alu(MetaOps.CONTIGUOUS)
ret = self.alu(MetaOps.BUFFER_VIEW) if allow_buffer_view and self.can_view() else self.alu(MetaOps.CONTIGUOUS)
if (sti := self.st.invert(self.base.shape)) is not None: self.base.contiguous_child = ref(ret), sti
return ret
self.base.forced_realize = True
@@ -111,7 +111,8 @@ class LazyBuffer(MathTrait):
elif getenv("CAST_BEFORE_VIEW", 1) and dtype.itemsize <= self.dtype.itemsize and self is not self.base:
# TODO: applying this makes gpt2 slower
return self.base.cast(dtype, bitcast)._view(self.st)
cast_op: Union[MetaOps, UnaryOps] = (MetaOps.VIEW if self.can_view() and allow_buffer_view else UnaryOps.BITCAST) if bitcast else UnaryOps.CAST
cast_op: Union[MetaOps, UnaryOps] = \
(MetaOps.BUFFER_VIEW if self.can_view() and allow_buffer_view else UnaryOps.BITCAST) if bitcast else UnaryOps.CAST
return create_lazybuffer(self.device, ShapeTracker.from_shape(new_shape), dtype, cast_op, dtype, (self,))
def is_unrealized_const(self): return self.base.realized is None and self.base.op is MetaOps.CONST and not isinstance(self.base.arg, UOp)
@@ -188,7 +189,7 @@ class LazyBuffer(MathTrait):
if self.is_unrealized_unmasked_const() and all_int(self.shape):
if op is ReduceOps.SUM: return self.const_with_shape(self.base.arg * prod(self.shape[i] for i in axis), new_shape)
if op is ReduceOps.PROD: return self.const_with_shape(self.base.arg ** prod(self.shape[i] for i in axis), new_shape)
if op is ReduceOps.MAX: return self.const_with_shape(self.base.arg, new_shape)
if op is ReduceOps.REDUCE_MAX: return self.const_with_shape(self.base.arg, new_shape)
# TODO: can we split symbolic shape if the reduce axis is not symbolic?
if not SPLIT_REDUCEOP or not all_int(self.shape) or (0 in self.shape) or \

View File

@@ -15,7 +15,7 @@ from tinygrad.device import Buffer
sys.setrecursionlimit(10000)
BUF_LIMIT = {"METAL":32}
METAOPS = {MetaOps.COPY:Ops.COPY, MetaOps.EMPTY:Ops.EMPTY, MetaOps.VIEW:Ops.BUFFER_VIEW}
METAOPS = {MetaOps.COPY:Ops.COPY, MetaOps.EMPTY:Ops.EMPTY, MetaOps.BUFFER_VIEW:Ops.BUFFER_VIEW}
# **** ScheduleItem return type

View File

@@ -156,7 +156,7 @@ class Prod(Function):
class Max(Function):
def forward(self, x:LazyBuffer, axis:Tuple[int, ...]) -> LazyBuffer:
self.x, self.ret, self.axis = x, x.r(ReduceOps.MAX, axis), axis
self.x, self.ret, self.axis = x, x.r(ReduceOps.REDUCE_MAX, axis), axis
return self.ret
def backward(self, grad_output:LazyBuffer) -> LazyBuffer:

View File

@@ -31,9 +31,9 @@ class TernaryOps(FastEnum):
WHERE = auto(); MULACC = auto() # noqa: E702
class ReduceOps(FastEnum):
"""A -> B (reduce)"""
SUM = auto(); PROD = auto(); MAX = auto() # noqa: E702
SUM = auto(); PROD = auto(); REDUCE_MAX = auto() # noqa: E702
class MetaOps(FastEnum):
EMPTY = auto(); CONST = auto(); COPY = auto(); CONTIGUOUS = auto(); ASSIGN = auto(); VIEW = auto() # noqa: E702
EMPTY = auto(); CONST = auto(); COPY = auto(); CONTIGUOUS = auto(); ASSIGN = auto(); BUFFER_VIEW = auto() # noqa: E702
Op = Union[UnaryOps, BinaryOps, ReduceOps, MetaOps, TernaryOps]
class SimpleMathTrait:
@@ -118,7 +118,7 @@ class MathTrait(SimpleMathTrait): # pylint: disable=abstract-method
# do not preserve f(0) = 0
UNSAFE_PAD_OPS = {UnaryOps.RECIP, UnaryOps.LOG2, UnaryOps.EXP2, BinaryOps.IDIV}
REDUCE_ALU: Dict[ReduceOps, BinaryOps] = {ReduceOps.SUM:BinaryOps.ADD, ReduceOps.PROD:BinaryOps.MUL, ReduceOps.MAX:BinaryOps.MAX}
REDUCE_ALU: Dict[ReduceOps, BinaryOps] = {ReduceOps.SUM:BinaryOps.ADD, ReduceOps.PROD:BinaryOps.MUL, ReduceOps.REDUCE_MAX:BinaryOps.MAX}
# https://en.wikipedia.org/wiki/Identity_element
def identity_element(op:BinaryOps, dt:DType): return dtypes.as_const({BinaryOps.ADD:0, BinaryOps.MUL:1, BinaryOps.MAX:dtypes.min(dt)}[op], dt)