From 4efe0169bbdede12e2b8bde16f527f9354e66274 Mon Sep 17 00:00:00 2001 From: George Hotz Date: Sat, 18 Feb 2023 13:51:48 -0800 Subject: [PATCH] remove old metal implementation --- accel/metal/ops_metal.py | 175 --------------------------------------- 1 file changed, 175 deletions(-) delete mode 100644 accel/metal/ops_metal.py diff --git a/accel/metal/ops_metal.py b/accel/metal/ops_metal.py deleted file mode 100644 index 5df2b03c89..0000000000 --- a/accel/metal/ops_metal.py +++ /dev/null @@ -1,175 +0,0 @@ -# pip3 install pyobjc-framework-MetalPerformanceShaders -from tinygrad.tensor import Function -from tinygrad.helpers import binary_broadcast -import numpy as np -import Metal -import MetalPerformanceShaders - -device = Metal.MTLCreateSystemDefaultDevice() -mtl_queue = device.newCommandQueue() -mtl_buffers = [] - -def cmd_buffer(): - ret = mtl_queue.commandBuffer() - mtl_buffers.append(ret) - return ret - -class MetalBuffer: - def __init__(self, shape, hostbuf=None): - self.sz = np.prod(shape)*4 - # TODO: fix this limit - assert self.sz < 16384 - if hostbuf is not None: - if isinstance(hostbuf, MetalBuffer): - self.mtl = hostbuf.mtl - else: - self.mtl = device.newBufferWithBytes_length_options_( - hostbuf.astype(np.float32).data, - self.sz, - Metal.MTLResourceStorageModeShared) - else: - self.mtl = device.newBufferWithLength_options_( - self.sz, - Metal.MTLResourceStorageModeShared) - self.shape = shape - self.dtype = np.float32 - - self.descriptor = Metal.MTLTextureDescriptor.alloc().init() - self.descriptor.setPixelFormat_(Metal.MTLPixelFormatR32Float) - self.descriptor.setWidth_(np.prod(shape)) - - tsz = (self.sz+15) - tsz -= tsz%16 - self.texture = self.mtl.newTextureWithDescriptor_offset_bytesPerRow_(self.descriptor, 0, tsz) - - @staticmethod - def fromCPU(data): - return MetalBuffer(data.shape, data) - - def toCPU(self): - global mtl_buffers - for b in mtl_buffers: - b.waitUntilCompleted() - mtl_buffers = [] - return np.frombuffer(b''.join(self.mtl.contents()[0:self.sz]), dtype=self.dtype).reshape(self.shape) - -relu_shader = MetalPerformanceShaders.MPSImageThresholdToZero.alloc().initWithDevice_thresholdValue_linearGrayColorTransform_(device, 0, None) -inv_relu_shader = MetalPerformanceShaders.MPSImageThresholdBinary.alloc().initWithDevice_thresholdValue_maximumValue_linearGrayColorTransform_(device, 0, 1, None) -add_shader = MetalPerformanceShaders.MPSImageAdd.alloc().initWithDevice_(device) -sub_shader = MetalPerformanceShaders.MPSImageSubtract.alloc().initWithDevice_(device) -mul_shader = MetalPerformanceShaders.MPSImageMultiply.alloc().initWithDevice_(device) -sum_shader = MetalPerformanceShaders.MPSImageReduceRowSum.alloc().initWithDevice_(device) - -def unary_op(shader, input): - out = MetalBuffer(input.shape, None) - mtl_buffer = cmd_buffer() - shader.encodeToCommandBuffer_sourceTexture_destinationTexture_( - mtl_buffer, input.texture, out.texture - ) - mtl_buffer.commit() - return out - -def binary_op(shader, x, y): - ret = MetalBuffer(x.shape, None) - mtl_buffer = cmd_buffer() - shader.setPrimaryEdgeMode_(MetalPerformanceShaders.MPSImageEdgeModeClamp) - shader.setSecondaryEdgeMode_(MetalPerformanceShaders.MPSImageEdgeModeClamp) - shader.encodeToCommandBuffer_primaryTexture_secondaryTexture_destinationTexture_( - mtl_buffer, x.texture, y.texture, ret.texture - ) - mtl_buffer.commit() - return ret - -class Sum(Function): - def forward(ctx, input, axis=None): - assert axis is None or len(axis) == len(input.shape) - ctx.save_for_backward(input.shape, axis) - out = MetalBuffer((1,), None) - mtl_buffer = cmd_buffer() - sum_shader.encodeToCommandBuffer_sourceTexture_destinationTexture_( - mtl_buffer, input.texture, out.texture - ) - mtl_buffer.commit() - return out - - def backward(ctx, grad_output): - shape, axis = ctx.saved_tensors - out = MetalBuffer(shape, None) - return binary_op(add_shader, out, grad_output) - -class ReLU(Function): - def forward(ctx, input): - ctx.save_for_backward(input) - return unary_op(relu_shader, input) - - def backward(ctx, grad_output): - input, = ctx.saved_tensors - return binary_op(mul_shader, unary_op(inv_relu_shader, input), grad_output) - -class Add(Function): - def forward(ctx, x, y): - ctx.save_for_backward(x, y) - return binary_op(add_shader, x, y) - - def backward(ctx, grad_output): - x,y = ctx.saved_tensors - return grad_output, grad_output - -class Sub(Function): - def forward(ctx, x, y): - ctx.save_for_backward(x, y) - return binary_op(sub_shader, x, y) - - def backward(ctx, grad_output): - x,y = ctx.saved_tensors - out = MetalBuffer(y.shape, None) - return grad_output, binary_op(sub_shader, out, grad_output) - -class Mul(Function): - def forward(ctx, x, y): - ctx.save_for_backward(x, y) - return binary_op(mul_shader, x, y) - - def backward(ctx, grad_output): - x,y = ctx.saved_tensors - grad_x = binary_op(mul_shader, y, grad_output) - grad_y = binary_op(mul_shader, x, grad_output) - return grad_x, grad_y - -class Reshape(Function): - def forward(ctx, x, shape): - ctx.save_for_backward(x.shape) - # TODO: move this into global reshape? - shape = tuple(-np.prod(x.shape) // np.prod(shape) if s == -1 else s for s in shape) - return MetalBuffer(shape, x) - - def backward(ctx, grad_output): - in_shape, = ctx.saved_tensors - return MetalBuffer(in_shape, grad_output) - -# METAL=1 python3 test/test_ops.py TestOps.test_relu -if __name__ == "__main__": - b1 = MetalBuffer(10, np.ones(10)) - b2 = MetalBuffer(10, np.ones(10)) - out = MetalBuffer(10, None) - - mtl_buffer = cmd_buffer() - add_shader.encodeToCommandBuffer_primaryTexture_secondaryTexture_destinationTexture_( - mtl_buffer, b1.texture, b2.texture, out.texture - ) - mtl_buffer.commit() - - print(b1.toCPU()) - print(b2.toCPU()) - print(out.toCPU()) - - from tinygrad.tensor import Tensor, Device - - r1 = Tensor([-2,-1,0,2,4], device=Device.METAL) - r2 = r1.relu() - r3 = r2.sum() - r3.backward() - print(r1.cpu()) - print(r2.cpu()) - print(r3.cpu()) -