UOps. -> Ops. [pr] (#9044)

updated the comments and doc except extra
2026-01-10 23:48:01 -05:00 · 2025-02-12 12:53:23 -05:00
parent 6811688d29
commit f53b819648
6 changed files with 19 additions and 19 deletions
--- a/test/test_linearizer.py
+++ b/test/test_linearizer.py
@@ -521,7 +521,7 @@ class TestLinearizer(unittest.TestCase):
    first_x = UOp(Ops.LOAD, dtypes.float, (g1, x.lazydata.st.reshape((3, 27, 1, 32)).expand((3, 27, 32, 32)).to_uop()))
    first_reduce = UOp(Ops.REDUCE_AXIS, dtypes.float, (first_x,), (Ops.ADD, (3,)))
    neg_mean = first_reduce * ast_const(dtypes.float, -0.03125, (3, 27, 32, 1))
-    # store = UOp(UOps.STORE, src=(g0, ShapeTracker.from_shape((3, 27, 32, 1)).to_uop(), mean))
+    # store = UOp(Ops.STORE, src=(g0, ShapeTracker.from_shape((3, 27, 32, 1)).to_uop(), mean))
    # verify_lazyop(store)
    second_x = UOp(Ops.LOAD, dtypes.float, (g1, x.lazydata.st.reshape((3, 27, 32, 1)).to_uop()))
    squares = (second_x+neg_mean)*(second_x+neg_mean)
@@ -854,7 +854,7 @@ class TestLinearizer(unittest.TestCase):
    ranges = [i for i,u in enumerate(lin.uops) if u.op is Ops.RANGE]
    assert len(ranges) == 1 # NOTE: it collapses now
    # RANGE -> LOAD -> RANGE -> ASSIGN
-    #assert any(x.op is UOps.LOAD for x in lin.uops[ranges[0]:ranges[1]])
+    #assert any(x.op is Ops.LOAD for x in lin.uops[ranges[0]:ranges[1]])

  def test_three_nested_range(self):
    a = Tensor.randn(2, ).realize()
@@ -865,7 +865,7 @@ class TestLinearizer(unittest.TestCase):
    # RANGE -> RANGE -> LOAD -> RANGE -> ASSIGN
    # NOTE: nothing should toposort between the first two ranges
    #assert ranges[0]+1 == ranges[1]
-    #assert any(x.op is UOps.LOAD for x in lin.uops[ranges[1]:ranges[2]])
+    #assert any(x.op is Ops.LOAD for x in lin.uops[ranges[1]:ranges[2]])

  def test_two_nested_range_alt_indexing(self):
    a = Tensor([2, 2]).realize()
@@ -895,14 +895,14 @@ class TestLinearizer(unittest.TestCase):
    assert len(ranges) == 1 # NOTE: it collapses now
    #if getenv("PTX"):
    # LOAD -> RANGE -> CAST -> ALU -> ALU -> LOAD -> ALU -> RANGE -> ALU -> ASSIGN
-    #  assert lin.uops[ranges[0]-2].op is UOps.LOAD
+    #  assert lin.uops[ranges[0]-2].op is Ops.LOAD
    #  assert ranges[1] == ranges[0]+6
-    #  assert [x.op for x in lin.uops[ranges[1]-2:ranges[1]]] == [UOps.LOAD, UOps.ALU]
+    #  assert [x.op for x in lin.uops[ranges[1]-2:ranges[1]]] == [Ops.LOAD, Ops.ALU]
    # LOAD -> RANGE -> LOAD -> ALU -> RANGE -> ASSIGN
    #else:
-    #  assert lin.uops[ranges[0]-2].op is UOps.LOAD
+    #  assert lin.uops[ranges[0]-2].op is Ops.LOAD
    #  assert ranges[1] == ranges[0]+3
-    #  assert [x.op for x in lin.uops[ranges[1]-2:ranges[1]]] == [UOps.LOAD, UOps.ALU]
+    #  assert [x.op for x in lin.uops[ranges[1]-2:ranges[1]]] == [Ops.LOAD, Ops.ALU]

  def test_range_outer_op_after_phi(self):
    a = Tensor.randn(4, 1).realize()
@@ -1306,7 +1306,7 @@ class TestLinearizer(unittest.TestCase):
    # check that the float4 cast collapses
    store_vals = [u.src[-1] for u in k.uops if u.op is Ops.STORE]
    for val in store_vals:
-      assert val.dtype == dtypes.float.vec(4) # and val.op is not UOps.VECTORIZE
+      assert val.dtype == dtypes.float.vec(4) # and val.op is not Ops.VECTORIZE

  @unittest.skipUnless(Device[Device.DEFAULT].renderer.has_local, "test requires locals")
  @unittest.skipUnless(Device[Device.DEFAULT].renderer.has_shared, "test requires shared")
@@ -1345,7 +1345,7 @@ class TestLinearizer(unittest.TestCase):
    barrier = [u for u in k.uops if u.op is Ops.BARRIER][0]
    # check that the float4 cast collapses for all stores
    for store in local_stores+global_stores:
-      assert store.src[-1].dtype.count > 1 # and store.src[2].op is not UOps.VECTORIZE
+      assert store.src[-1].dtype.count > 1 # and store.src[2].op is not Ops.VECTORIZE
    # # check the children's vins
    # TODO: src ALU are not the same, should it?
    # assert barrier.src == tuple(local_stores)
@@ -1362,7 +1362,7 @@ class TestLinearizer(unittest.TestCase):

    # the float4 value stores directly in lds and we skip upcast
    self.assertEqual(stores[0].src[-1].dtype, dtypes.float.vec(4))
-    #assert stores[0].src[-1].op is not UOps.VECTORIZE
+    #assert stores[0].src[-1].op is not Ops.VECTORIZE

    # the global store doesn't change
    assert stores[1].src[-1].dtype == dtypes.float
--- a/test/test_linearizer_failures.py
+++ b/test/test_linearizer_failures.py
@@ -117,7 +117,7 @@ class TestLinearizerFailures(unittest.TestCase):
          ast_const(dtypes.int, 10, st_src=(
            UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(10, 1), strides=(0, 0), offset=0, mask=None, contiguous=False),)), src=()),)),)),)),))
    opts = [Opt(op=OptOps.UPCAST, axis=0, arg=2), Opt(op=OptOps.UPCAST, axis=0, arg=0)]
-    # COMPILE FAILED, KeyError: UOps.CONST
+    # COMPILE FAILED, KeyError: Ops.CONST
    helper_test_lin(Kernel(ast), opts, failed_platforms=[])

  def test_failure_7(self):
@@ -804,7 +804,7 @@ class TestLinearizerFailures(unittest.TestCase):
    helper_test_lin(Kernel(ast), opts=opts, failed_platforms=[], atol=0.1, rtol=0.05)

  def test_failure_33(self):
-    # UOps.UNMUL left after linearize
+    # Ops.UNMUL left after linearize
    ast = UOp(Ops.SINK, dtypes.void, arg=None, src=(
      UOp(Ops.STORE, dtypes.void, arg=None, src=(
        UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=0, src=()),
@@ -868,7 +868,7 @@ class TestLinearizerFailures(unittest.TestCase):

  # from world fuzz_linearizer: PYTHONPATH=. METAL=1 FUZZ_ALL_ACTIONS=1 DEPTH=1 FUZZ_N=100 FUZZ_NTH=84 python3 ./test/external/fuzz_linearizer.py
  def test_failure_36(self):
-    # UOps.UNMUL left after linearize
+    # Ops.UNMUL left after linearize
    ast = UOp(Ops.SINK, dtypes.void, arg=None, src=(
      UOp(Ops.STORE, dtypes.void, arg=None, src=(
        UOp(Ops.DEFINE_GLOBAL, dtypes.uchar.ptr(), arg=0, src=()),
--- a/tinygrad/codegen/kernel.py
+++ b/tinygrad/codegen/kernel.py
@@ -325,8 +325,8 @@ class Kernel:
      -1: iterates through all available tensor cores in order and uses the first one that matches the requirements (dims and dtypes)
      [0-N]: uses only the n'th tensor core available; useful for search
    tc_opt -- controls which kinds of kernels may be eligible for tensor cores application (default 2 during BEAM, 0 otherwise)
-      0: applies to only kernels with a single reduce axis and direct UOps.LOAD into Ops.MUL
-      1: allows kernels with multiple reduce axes and also multiplication of UOps.CAST'd buffers
+      0: applies to only kernels with a single reduce axis and direct Ops.LOAD into Ops.MUL
+      1: allows kernels with multiple reduce axes and also multiplication of Ops.CAST'd buffers
      2: allows kernels with M, N, K axes that are not multiples of the tensor core dimensions by applying padding those axes as needed
    """
    if tc_select is None: tc_select = TC_SELECT.value
--- a/tinygrad/ops.py
+++ b/tinygrad/ops.py
@@ -638,7 +638,7 @@ class UOp(MathTrait, metaclass=UOpMetaClass):
    if self.op is Ops.RANGE: return self.src[0].vmin, (self.src[1]-1).vmax
    if self.op is Ops.BIND: return self.src[0]._min_max # ignore the bound value
    if self.op in {Ops.UNROLL, Ops.VECTORIZE}: return min(x.vmin for x in self.src), max(x.vmax for x in self.src)
-    # TODO: UOps.SPECIAL is UOps.DEFINE_VAR
+    # TODO: Ops.SPECIAL is Ops.DEFINE_VAR
    if self.op is Ops.SPECIAL: return 0, self.arg[1]-1 if isinstance(self.arg[1], int) else self.arg[1].vmax
    if self.op is Ops.CONST: return self.arg, self.arg
    if self.op is Ops.VCONST: return (min(self.arg), max(self.arg))
--- a/tinygrad/renderer/init.py
+++ b/tinygrad/renderer/init.py
@@ -121,8 +121,8 @@ class Renderer:
  has_local: bool = True
  has_shared: bool = True
  # NOTE: these two should be in (x,y,z) order to match the max_sizes argument in get_grouped_dims
-  global_max: Optional[tuple[int, ...]] = (0x8FFFFFFF,) * (3) # TODO: UOps.SPECIAL int32 indexes right now
-  local_max: Optional[tuple[int, ...]] = (0x8FFFFFFF,) * (3) # TODO: UOps.SPECIAL int32 indexes right now
+  global_max: Optional[tuple[int, ...]] = (0x8FFFFFFF,) * (3) # TODO: Ops.SPECIAL int32 indexes right now
+  local_max: Optional[tuple[int, ...]] = (0x8FFFFFFF,) * (3) # TODO: Ops.SPECIAL int32 indexes right now
  shared_max: int = 32768
  tensor_cores: list[TensorCore] = []
  extra_matcher: Optional[PatternMatcher] = None
--- a/tinygrad/spec.py
+++ b/tinygrad/spec.py
@@ -108,7 +108,7 @@ spec = PatternMatcher([
  (UPat(Ops.BARRIER, dtypes.void, src=UPat(Ops.STORE, allow_any_len=True)), lambda: True), # NOTE: all pointers must be local

  # NOTE: for testing, we let sinks be anything
-  #(UPat(UOps.SINK, src=UPat(UOps.STORE)), lambda: True),
+  #(UPat(Ops.SINK, src=UPat(Ops.STORE)), lambda: True),
  (UPat(Ops.SINK, dtypes.void), lambda: True),
  (UPat((Ops.NOOP, Ops.CUSTOM)), lambda: True),