From 4efe0169bbdede12e2b8bde16f527f9354e66274 Mon Sep 17 00:00:00 2001
From: George Hotz <geohot@gmail.com>
Date: Sat, 18 Feb 2023 13:51:48 -0800
Subject: [PATCH] remove old metal implementation

---
 accel/metal/ops_metal.py | 175 ---------------------------------------
 1 file changed, 175 deletions(-)
 delete mode 100644 accel/metal/ops_metal.py

diff --git a/accel/metal/ops_metal.py b/accel/metal/ops_metal.py
deleted file mode 100644
index 5df2b03c89..0000000000
--- a/accel/metal/ops_metal.py
+++ /dev/null
@@ -1,175 +0,0 @@
-# pip3 install pyobjc-framework-MetalPerformanceShaders
-from tinygrad.tensor import Function
-from tinygrad.helpers import binary_broadcast
-import numpy as np
-import Metal
-import MetalPerformanceShaders
-
-device = Metal.MTLCreateSystemDefaultDevice()
-mtl_queue = device.newCommandQueue()
-mtl_buffers = []
-
-def cmd_buffer():
-  ret = mtl_queue.commandBuffer()
-  mtl_buffers.append(ret)
-  return ret
-
-class MetalBuffer:
-  def __init__(self, shape, hostbuf=None):
-    self.sz = np.prod(shape)*4
-    # TODO: fix this limit
-    assert self.sz < 16384
-    if hostbuf is not None:
-      if isinstance(hostbuf, MetalBuffer):
-        self.mtl = hostbuf.mtl
-      else:
-        self.mtl = device.newBufferWithBytes_length_options_(
-          hostbuf.astype(np.float32).data,
-          self.sz,
-          Metal.MTLResourceStorageModeShared)
-    else:
-      self.mtl = device.newBufferWithLength_options_(
-        self.sz,
-        Metal.MTLResourceStorageModeShared)
-    self.shape = shape
-    self.dtype = np.float32
-
-    self.descriptor = Metal.MTLTextureDescriptor.alloc().init()
-    self.descriptor.setPixelFormat_(Metal.MTLPixelFormatR32Float)
-    self.descriptor.setWidth_(np.prod(shape))
-
-    tsz = (self.sz+15)
-    tsz -= tsz%16
-    self.texture = self.mtl.newTextureWithDescriptor_offset_bytesPerRow_(self.descriptor, 0, tsz)
-
-  @staticmethod
-  def fromCPU(data):
-    return MetalBuffer(data.shape, data)
-
-  def toCPU(self):
-    global mtl_buffers
-    for b in mtl_buffers:
-      b.waitUntilCompleted()
-    mtl_buffers = []
-    return np.frombuffer(b''.join(self.mtl.contents()[0:self.sz]), dtype=self.dtype).reshape(self.shape)
-
-relu_shader = MetalPerformanceShaders.MPSImageThresholdToZero.alloc().initWithDevice_thresholdValue_linearGrayColorTransform_(device, 0, None)
-inv_relu_shader = MetalPerformanceShaders.MPSImageThresholdBinary.alloc().initWithDevice_thresholdValue_maximumValue_linearGrayColorTransform_(device, 0, 1, None)
-add_shader = MetalPerformanceShaders.MPSImageAdd.alloc().initWithDevice_(device)
-sub_shader = MetalPerformanceShaders.MPSImageSubtract.alloc().initWithDevice_(device)
-mul_shader = MetalPerformanceShaders.MPSImageMultiply.alloc().initWithDevice_(device)
-sum_shader = MetalPerformanceShaders.MPSImageReduceRowSum.alloc().initWithDevice_(device)
-
-def unary_op(shader, input):
-  out = MetalBuffer(input.shape, None)
-  mtl_buffer = cmd_buffer()
-  shader.encodeToCommandBuffer_sourceTexture_destinationTexture_(
-    mtl_buffer, input.texture, out.texture
-  )
-  mtl_buffer.commit()
-  return out
-
-def binary_op(shader, x, y):
-  ret = MetalBuffer(x.shape, None)
-  mtl_buffer = cmd_buffer()
-  shader.setPrimaryEdgeMode_(MetalPerformanceShaders.MPSImageEdgeModeClamp)
-  shader.setSecondaryEdgeMode_(MetalPerformanceShaders.MPSImageEdgeModeClamp)
-  shader.encodeToCommandBuffer_primaryTexture_secondaryTexture_destinationTexture_(
-    mtl_buffer, x.texture, y.texture, ret.texture
-  )
-  mtl_buffer.commit()
-  return ret
-
-class Sum(Function):
-  def forward(ctx, input, axis=None):
-    assert axis is None or len(axis) == len(input.shape)
-    ctx.save_for_backward(input.shape, axis)
-    out = MetalBuffer((1,), None)
-    mtl_buffer = cmd_buffer()
-    sum_shader.encodeToCommandBuffer_sourceTexture_destinationTexture_(
-      mtl_buffer, input.texture, out.texture
-    )
-    mtl_buffer.commit()
-    return out
-
-  def backward(ctx, grad_output):
-    shape, axis = ctx.saved_tensors
-    out = MetalBuffer(shape, None)
-    return binary_op(add_shader, out, grad_output)
-
-class ReLU(Function):
-  def forward(ctx, input):
-    ctx.save_for_backward(input)
-    return unary_op(relu_shader, input)
-
-  def backward(ctx, grad_output):
-    input, = ctx.saved_tensors
-    return binary_op(mul_shader, unary_op(inv_relu_shader, input), grad_output)
-
-class Add(Function):
-  def forward(ctx, x, y):
-    ctx.save_for_backward(x, y)
-    return binary_op(add_shader, x, y)
-
-  def backward(ctx, grad_output):
-    x,y = ctx.saved_tensors
-    return grad_output, grad_output
-
-class Sub(Function):
-  def forward(ctx, x, y):
-    ctx.save_for_backward(x, y)
-    return binary_op(sub_shader, x, y)
-
-  def backward(ctx, grad_output):
-    x,y = ctx.saved_tensors
-    out = MetalBuffer(y.shape, None)
-    return grad_output, binary_op(sub_shader, out, grad_output)
-
-class Mul(Function):
-  def forward(ctx, x, y):
-    ctx.save_for_backward(x, y)
-    return binary_op(mul_shader, x, y)
-
-  def backward(ctx, grad_output):
-    x,y = ctx.saved_tensors
-    grad_x = binary_op(mul_shader, y, grad_output)
-    grad_y = binary_op(mul_shader, x, grad_output)
-    return grad_x, grad_y
-
-class Reshape(Function):
-  def forward(ctx, x, shape):
-    ctx.save_for_backward(x.shape)
-    # TODO: move this into global reshape?
-    shape = tuple(-np.prod(x.shape) // np.prod(shape) if s == -1 else s for s in shape)
-    return MetalBuffer(shape, x)
-
-  def backward(ctx, grad_output):
-    in_shape, = ctx.saved_tensors
-    return MetalBuffer(in_shape, grad_output)
-
-# METAL=1 python3 test/test_ops.py TestOps.test_relu
-if __name__ == "__main__":
-  b1 = MetalBuffer(10, np.ones(10))
-  b2 = MetalBuffer(10, np.ones(10))
-  out = MetalBuffer(10, None)
-
-  mtl_buffer = cmd_buffer()
-  add_shader.encodeToCommandBuffer_primaryTexture_secondaryTexture_destinationTexture_(
-    mtl_buffer, b1.texture, b2.texture, out.texture
-  )
-  mtl_buffer.commit()
-
-  print(b1.toCPU())
-  print(b2.toCPU())
-  print(out.toCPU())
-
-  from tinygrad.tensor import Tensor, Device
-
-  r1 = Tensor([-2,-1,0,2,4], device=Device.METAL)
-  r2 = r1.relu()
-  r3 = r2.sum()
-  r3.backward()
-  print(r1.cpu())
-  print(r2.cpu())
-  print(r3.cpu())
-