remove old metal implementation

This commit is contained in:
George Hotz
2023-02-18 13:51:48 -08:00
parent 7e8b0305f3
commit 4efe0169bb

View File

@@ -1,175 +0,0 @@
# pip3 install pyobjc-framework-MetalPerformanceShaders
from tinygrad.tensor import Function
from tinygrad.helpers import binary_broadcast
import numpy as np
import Metal
import MetalPerformanceShaders
device = Metal.MTLCreateSystemDefaultDevice()
mtl_queue = device.newCommandQueue()
mtl_buffers = []
def cmd_buffer():
ret = mtl_queue.commandBuffer()
mtl_buffers.append(ret)
return ret
class MetalBuffer:
def __init__(self, shape, hostbuf=None):
self.sz = np.prod(shape)*4
# TODO: fix this limit
assert self.sz < 16384
if hostbuf is not None:
if isinstance(hostbuf, MetalBuffer):
self.mtl = hostbuf.mtl
else:
self.mtl = device.newBufferWithBytes_length_options_(
hostbuf.astype(np.float32).data,
self.sz,
Metal.MTLResourceStorageModeShared)
else:
self.mtl = device.newBufferWithLength_options_(
self.sz,
Metal.MTLResourceStorageModeShared)
self.shape = shape
self.dtype = np.float32
self.descriptor = Metal.MTLTextureDescriptor.alloc().init()
self.descriptor.setPixelFormat_(Metal.MTLPixelFormatR32Float)
self.descriptor.setWidth_(np.prod(shape))
tsz = (self.sz+15)
tsz -= tsz%16
self.texture = self.mtl.newTextureWithDescriptor_offset_bytesPerRow_(self.descriptor, 0, tsz)
@staticmethod
def fromCPU(data):
return MetalBuffer(data.shape, data)
def toCPU(self):
global mtl_buffers
for b in mtl_buffers:
b.waitUntilCompleted()
mtl_buffers = []
return np.frombuffer(b''.join(self.mtl.contents()[0:self.sz]), dtype=self.dtype).reshape(self.shape)
relu_shader = MetalPerformanceShaders.MPSImageThresholdToZero.alloc().initWithDevice_thresholdValue_linearGrayColorTransform_(device, 0, None)
inv_relu_shader = MetalPerformanceShaders.MPSImageThresholdBinary.alloc().initWithDevice_thresholdValue_maximumValue_linearGrayColorTransform_(device, 0, 1, None)
add_shader = MetalPerformanceShaders.MPSImageAdd.alloc().initWithDevice_(device)
sub_shader = MetalPerformanceShaders.MPSImageSubtract.alloc().initWithDevice_(device)
mul_shader = MetalPerformanceShaders.MPSImageMultiply.alloc().initWithDevice_(device)
sum_shader = MetalPerformanceShaders.MPSImageReduceRowSum.alloc().initWithDevice_(device)
def unary_op(shader, input):
out = MetalBuffer(input.shape, None)
mtl_buffer = cmd_buffer()
shader.encodeToCommandBuffer_sourceTexture_destinationTexture_(
mtl_buffer, input.texture, out.texture
)
mtl_buffer.commit()
return out
def binary_op(shader, x, y):
ret = MetalBuffer(x.shape, None)
mtl_buffer = cmd_buffer()
shader.setPrimaryEdgeMode_(MetalPerformanceShaders.MPSImageEdgeModeClamp)
shader.setSecondaryEdgeMode_(MetalPerformanceShaders.MPSImageEdgeModeClamp)
shader.encodeToCommandBuffer_primaryTexture_secondaryTexture_destinationTexture_(
mtl_buffer, x.texture, y.texture, ret.texture
)
mtl_buffer.commit()
return ret
class Sum(Function):
def forward(ctx, input, axis=None):
assert axis is None or len(axis) == len(input.shape)
ctx.save_for_backward(input.shape, axis)
out = MetalBuffer((1,), None)
mtl_buffer = cmd_buffer()
sum_shader.encodeToCommandBuffer_sourceTexture_destinationTexture_(
mtl_buffer, input.texture, out.texture
)
mtl_buffer.commit()
return out
def backward(ctx, grad_output):
shape, axis = ctx.saved_tensors
out = MetalBuffer(shape, None)
return binary_op(add_shader, out, grad_output)
class ReLU(Function):
def forward(ctx, input):
ctx.save_for_backward(input)
return unary_op(relu_shader, input)
def backward(ctx, grad_output):
input, = ctx.saved_tensors
return binary_op(mul_shader, unary_op(inv_relu_shader, input), grad_output)
class Add(Function):
def forward(ctx, x, y):
ctx.save_for_backward(x, y)
return binary_op(add_shader, x, y)
def backward(ctx, grad_output):
x,y = ctx.saved_tensors
return grad_output, grad_output
class Sub(Function):
def forward(ctx, x, y):
ctx.save_for_backward(x, y)
return binary_op(sub_shader, x, y)
def backward(ctx, grad_output):
x,y = ctx.saved_tensors
out = MetalBuffer(y.shape, None)
return grad_output, binary_op(sub_shader, out, grad_output)
class Mul(Function):
def forward(ctx, x, y):
ctx.save_for_backward(x, y)
return binary_op(mul_shader, x, y)
def backward(ctx, grad_output):
x,y = ctx.saved_tensors
grad_x = binary_op(mul_shader, y, grad_output)
grad_y = binary_op(mul_shader, x, grad_output)
return grad_x, grad_y
class Reshape(Function):
def forward(ctx, x, shape):
ctx.save_for_backward(x.shape)
# TODO: move this into global reshape?
shape = tuple(-np.prod(x.shape) // np.prod(shape) if s == -1 else s for s in shape)
return MetalBuffer(shape, x)
def backward(ctx, grad_output):
in_shape, = ctx.saved_tensors
return MetalBuffer(in_shape, grad_output)
# METAL=1 python3 test/test_ops.py TestOps.test_relu
if __name__ == "__main__":
b1 = MetalBuffer(10, np.ones(10))
b2 = MetalBuffer(10, np.ones(10))
out = MetalBuffer(10, None)
mtl_buffer = cmd_buffer()
add_shader.encodeToCommandBuffer_primaryTexture_secondaryTexture_destinationTexture_(
mtl_buffer, b1.texture, b2.texture, out.texture
)
mtl_buffer.commit()
print(b1.toCPU())
print(b2.toCPU())
print(out.toCPU())
from tinygrad.tensor import Tensor, Device
r1 = Tensor([-2,-1,0,2,4], device=Device.METAL)
r2 = r1.relu()
r3 = r2.sum()
r3.backward()
print(r1.cpu())
print(r2.cpu())
print(r3.cpu())