remove custom op support, we waste time maintaining this (#6991)

* remove custom op support, we waste time maintaining this * customop is over
2026-04-29 03:00:14 -04:00 · 2024-10-11 14:31:09 +08:00
parent c08521e823
commit e441794c4b
6 changed files with 5 additions and 119 deletions
--- a/test/external/fuzz_schedule.py
+++ b/test/external/fuzz_schedule.py
@@ -2,7 +2,7 @@ import itertools
 import numpy as np
 from typing import DefaultDict, Dict, List, Set, Tuple, TypeVar, Union
 from tinygrad.device import Buffer
-from tinygrad.engine.realize import CustomOp, capturing, lower_schedule_item
+from tinygrad.engine.realize import capturing, lower_schedule_item
 from tinygrad.helpers import DEBUG, MULTIOUTPUT, colored, getenv
 from tinygrad.engine.lazy import LazyBuffer
 from tinygrad.engine.schedule import LBScheduleItem, _graph_schedule, ScheduleItem
@@ -72,7 +72,6 @@ def fuzz_schedule(outs:List[LazyBuffer]):
 def _exec_si(si:ScheduleItem, seed:int):
  ei = lower_schedule_item(si)
  if len(capturing): capturing[0].add(ei)
-  if isinstance(ei.prg, CustomOp): Tensor._seed = seed
  ei.run()

 T = TypeVar("T")
--- a/test/test_custom_function.py
+++ b/test/test_custom_function.py
@@ -1,105 +0,0 @@
-# this is an example of how you can write terrible DSP compute breaking ops like warpPerspective
-# here we use a CUSTOM op to write atan2
-
-import unittest
-import numpy as np
-from typing import Optional, Tuple
-from tinygrad.helpers import prod
-from tinygrad.dtype import dtypes
-
-# *** first, we implement the atan2 op at the lowest level ***
-# `atan2_gpu` for GPUBuffers and `atan2_cpu` for CPUBuffers
-from tinygrad.engine.lazy import Buffer, create_lazybuffer
-from tinygrad.device import Device
-from tinygrad.shape.shapetracker import ShapeTracker
-from tinygrad.engine.realize import CompiledRunner
-from tinygrad.renderer import Program
-
-# we don't always have GPU support, so the type signature is the abstract CompiledBuffer instead of GPUBuffer
-def atan2_gpu(ret:Buffer, a:Buffer, b:Buffer):
-  assert a.dtype == b.dtype and a.dtype == dtypes.float32, "gpu function only supports float32"
-  src = """
-  __kernel void atan2_gpu(global float *c, global float *a, global float *b) {
-    int idx = get_global_id(0);
-    c[idx] = atan2(a[idx], b[idx]);
-  }"""
-  CompiledRunner(Program("atan2_gpu", src, ret.device, global_size=[ret.size,1,1])).exec([ret, a, b])
-
-def atan2_cpu(ret:Buffer, a:Buffer, b:Buffer): ret.copyin(np.require(np.arctan2(a._buf, b._buf), requirements='C').data)
-
-# *** second, we write the ATan2 mlop ***
-# NOTE: The derivative of atan2 doesn't need a custom op! https://www.liquisearch.com/atan2/derivative
-# In general, it is also optional to write a backward function, just your backward pass won't work without it
-
-from tinygrad.ops import MetaOps
-from tinygrad.engine.lazy import LazyBuffer
-from tinygrad.tensor import Function
-
-class ATan2(Function):
-  def forward(self, a:LazyBuffer, b:LazyBuffer) -> LazyBuffer:
-    assert prod(a.shape) == prod(b.shape) and a.device == b.device, "shape or device mismatch"
-    self.a, self.b = a, b
-    return create_lazybuffer(a.device, ShapeTracker.from_shape(a.shape), max(a.dtype, b.dtype), MetaOps.CUSTOM,
-                             arg={"GPU": atan2_gpu, "CPU": atan2_cpu}[a.device], srcs=(a.contiguous(), b.contiguous()))
-  def backward(self, grad_output:LazyBuffer) -> Tuple[Optional[LazyBuffer], Optional[LazyBuffer]]:
-    recip = (self.a * self.a + self.b * self.b).recip()
-    return (grad_output * self.b * recip) if self.needs_input_grad[0] else None, \
-           (grad_output * -self.a * recip) if self.needs_input_grad[1] else None
-
-# *** third, we use our lovely new mlop in some tests ***
-
-from tinygrad.tensor import Tensor
-
-@unittest.skipUnless(Device.DEFAULT in ["CPU", "GPU"], "atan2 is only implemented for CPU and GPU")
-class TestCustomFunction(unittest.TestCase):
-  def test_atan2_forward(self):
-    # create some random Tensors, permute them just because we can
-    a = Tensor.randn(4,4,requires_grad=True).permute(1,0)
-    b = Tensor.randn(4,4,requires_grad=True).permute(1,0)
-
-    # run the forward pass. note: up until the .numpy(), it's all lazy
-    c = ATan2.apply(a, b)
-    print(c.numpy())
-
-    # check the forward pass (in numpy)
-    np.testing.assert_allclose(c.numpy(), np.arctan2(a.numpy(), b.numpy()), atol=1e-5)
-
-  # fun fact, this never actually calls forward, so it works in all the backends
-  def test_atan2_backward(self):
-    # have to go forward before we can go backward
-    a = Tensor.randn(4,4,requires_grad=True).permute(1,0)
-    b = Tensor.randn(4,4,requires_grad=True).permute(1,0)
-    c = ATan2.apply(a, b)
-
-    # run the backward pass
-    c.mean().backward()
-    assert a.grad is not None and b.grad is not None, "tinygrad didn't compute gradients"
-    print(a.grad.numpy())
-    print(b.grad.numpy())
-
-    # check the backward pass (in torch)
-    import torch
-    ta, tb = torch.tensor(a.numpy(), requires_grad=True), torch.tensor(b.numpy(), requires_grad=True)
-    tc = torch.atan2(ta, tb)
-    tc.mean().backward()
-    assert ta.grad is not None and tb.grad is not None, "torch didn't compute gradients"
-    np.testing.assert_allclose(a.grad.numpy(), ta.grad.numpy(), atol=1e-5)
-    np.testing.assert_allclose(b.grad.numpy(), tb.grad.numpy(), atol=1e-5)
-
-  @unittest.skipIf(Device.DEFAULT in ["CPU"], "atan2_cpu not jittable")
-  def test_atan2_jit(self):
-    # custom ops even work in the JIT!
-    from tinygrad.engine.jit import TinyJit
-
-    @TinyJit
-    def jitted_atan2(a:Tensor, b:Tensor) -> Tensor:
-      return ATan2.apply(a, b).realize()
-
-    for _ in range(5):
-      a = Tensor.randn(4,4,requires_grad=True).permute(1,0)
-      b = Tensor.randn(4,4,requires_grad=True).permute(1,0)
-      c = jitted_atan2(a, b)
-      np.testing.assert_allclose(c.numpy(), np.arctan2(a.numpy(), b.numpy()), atol=1e-5)
-
-if __name__ == "__main__":
-  unittest.main()