mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-08 22:48:25 -05:00
less lines (#197)
This commit is contained in:
@@ -93,9 +93,7 @@ class Pad2D(Function):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def forward(ctx, x, padding=None):
|
def forward(ctx, x, padding=None):
|
||||||
ctx.save_for_backward(padding)
|
ctx.save_for_backward(padding)
|
||||||
return np.pad(x,
|
return np.pad(x, ((0,0), (0,0), tuple(padding[2:4]), tuple(padding[0:2])))
|
||||||
((0,0), (0,0),
|
|
||||||
(padding[2], padding[3]), (padding[0], padding[1])))
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def backward(ctx, grad_output):
|
def backward(ctx, grad_output):
|
||||||
@@ -127,8 +125,7 @@ class ReLU(Function):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def backward(ctx, grad_output):
|
def backward(ctx, grad_output):
|
||||||
input, = ctx.saved_tensors
|
input, = ctx.saved_tensors
|
||||||
grad_input = grad_output * (input >= 0)
|
return grad_output * (input >= 0)
|
||||||
return grad_input
|
|
||||||
register('relu', ReLU)
|
register('relu', ReLU)
|
||||||
|
|
||||||
class Sigmoid(Function):
|
class Sigmoid(Function):
|
||||||
@@ -146,8 +143,7 @@ class Sigmoid(Function):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def backward(ctx, grad_output):
|
def backward(ctx, grad_output):
|
||||||
ret, = ctx.saved_tensors
|
ret, = ctx.saved_tensors
|
||||||
grad_input = grad_output * (ret * (1 - ret))
|
return grad_output * (ret * (1 - ret))
|
||||||
return grad_input
|
|
||||||
register('sigmoid', Sigmoid)
|
register('sigmoid', Sigmoid)
|
||||||
|
|
||||||
class LogSoftmax(Function):
|
class LogSoftmax(Function):
|
||||||
@@ -185,12 +181,10 @@ class Conv2D(Function):
|
|||||||
|
|
||||||
gx = x.reshape(bs,ctx.groups,cin,x.shape[2],x.shape[3])
|
gx = x.reshape(bs,ctx.groups,cin,x.shape[2],x.shape[3])
|
||||||
tx = np.lib.stride_tricks.as_strided(gx,
|
tx = np.lib.stride_tricks.as_strided(gx,
|
||||||
shape=(bs, ctx.groups, cin, oy, ox, H, W),
|
shape=(bs, ctx.groups, cin, oy, ox, H, W),
|
||||||
strides=(gx.strides[0], gx.strides[1], gx.strides[2],
|
strides=(*gx.strides[0:3], gx.strides[3]*ys, gx.strides[4]*xs, *gx.strides[3:5]),
|
||||||
gx.strides[3]*ys, gx.strides[4]*xs,
|
writeable=False,
|
||||||
gx.strides[3], gx.strides[4]),
|
)
|
||||||
writeable=False,
|
|
||||||
)
|
|
||||||
tw = w.reshape(ctx.groups, rcout, cin, H, W)
|
tw = w.reshape(ctx.groups, rcout, cin, H, W)
|
||||||
ctx.save_for_backward(tx, tw, x.shape)
|
ctx.save_for_backward(tx, tw, x.shape)
|
||||||
|
|
||||||
@@ -258,9 +252,7 @@ class MaxPool2D(Function):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def backward(ctx, grad_output):
|
def backward(ctx, grad_output):
|
||||||
idxs,s = ctx.saved_tensors
|
idxs,s = ctx.saved_tensors
|
||||||
return unstack_for_pool(
|
return unstack_for_pool(lambda idx: grad_output * (idxs == idx), s, *ctx.kernel_size)
|
||||||
lambda idx: grad_output * (idxs == idx),
|
|
||||||
s, *ctx.kernel_size)
|
|
||||||
register('max_pool2d', MaxPool2D)
|
register('max_pool2d', MaxPool2D)
|
||||||
|
|
||||||
class AvgPool2D(Function):
|
class AvgPool2D(Function):
|
||||||
@@ -274,8 +266,6 @@ class AvgPool2D(Function):
|
|||||||
def backward(ctx, grad_output):
|
def backward(ctx, grad_output):
|
||||||
s, = ctx.saved_tensors
|
s, = ctx.saved_tensors
|
||||||
py, px = ctx.kernel_size
|
py, px = ctx.kernel_size
|
||||||
return unstack_for_pool(
|
return unstack_for_pool(lambda idx: grad_output/py/px, s, py, px)
|
||||||
lambda idx: grad_output/py/px,
|
|
||||||
s, py, px)
|
|
||||||
register('avg_pool2d', AvgPool2D)
|
register('avg_pool2d', AvgPool2D)
|
||||||
|
|
||||||
|
|||||||
@@ -346,8 +346,7 @@ class Reshape(Function):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def backward(ctx, grad_output):
|
def backward(ctx, grad_output):
|
||||||
in_shape, = ctx.saved_tensors
|
in_shape, = ctx.saved_tensors
|
||||||
grad_output = GPUBuffer(in_shape, hostbuf=grad_output)
|
return GPUBuffer(in_shape, hostbuf=grad_output)
|
||||||
return grad_output
|
|
||||||
register('reshape', Reshape, device=Tensor.GPU)
|
register('reshape', Reshape, device=Tensor.GPU)
|
||||||
|
|
||||||
# ************* activation ops *************
|
# ************* activation ops *************
|
||||||
@@ -449,6 +448,10 @@ class Conv2D(Function):
|
|||||||
# output buffer
|
# output buffer
|
||||||
ret = buffer_new(ctx, (bs, cout, oy, ox))
|
ret = buffer_new(ctx, (bs, cout, oy, ox))
|
||||||
|
|
||||||
|
# input = (bs, groups, cin, iy, ix)
|
||||||
|
# weight = (groups, rcout, cin, H, W)
|
||||||
|
# output = (bs, groups, rcout, oy, ox)
|
||||||
|
|
||||||
conv = clbuild(ctx.cl_ctx, "conv", """
|
conv = clbuild(ctx.cl_ctx, "conv", """
|
||||||
__kernel void conv(__global const float *input, __global const float *weight, __global float *output,
|
__kernel void conv(__global const float *input, __global const float *weight, __global float *output,
|
||||||
int H, int W, int groups, int rcout, int cin, int oy, int ox, int iy, int ix, int ys, int xs) {
|
int H, int W, int groups, int rcout, int cin, int oy, int ox, int iy, int ix, int ys, int xs) {
|
||||||
@@ -462,9 +465,6 @@ class Conv2D(Function):
|
|||||||
int IY = Y*ys;
|
int IY = Y*ys;
|
||||||
int IX = X*xs;
|
int IX = X*xs;
|
||||||
|
|
||||||
// input = (bs, groups, cin, iy, ix)
|
|
||||||
// weight = (groups, rcout, cin, H, W)
|
|
||||||
// output = (bs, groups, rcout, oy, ox)
|
|
||||||
float acc = 0.0;
|
float acc = 0.0;
|
||||||
for (int ci = 0; ci < cin; ci++) {
|
for (int ci = 0; ci < cin; ci++) {
|
||||||
for (int y = IY; y < IY+H; y++) {
|
for (int y = IY; y < IY+H; y++) {
|
||||||
|
|||||||
@@ -8,9 +8,8 @@ from collections import defaultdict
|
|||||||
|
|
||||||
DEBUG = os.getenv("DEBUG", None) is not None
|
DEBUG = os.getenv("DEBUG", None) is not None
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
import collections, atexit, time
|
import atexit, time
|
||||||
debug_counts = collections.defaultdict(int)
|
debug_counts, debug_times = defaultdict(int), defaultdict(float)
|
||||||
debug_times = collections.defaultdict(float)
|
|
||||||
def print_debug_exit():
|
def print_debug_exit():
|
||||||
for name, _ in sorted(debug_times.items(), key=lambda x: -x[1]):
|
for name, _ in sorted(debug_times.items(), key=lambda x: -x[1]):
|
||||||
print(f"{name:>20} : {debug_counts[name]:>6} {debug_times[name]:>10.2f} ms")
|
print(f"{name:>20} : {debug_counts[name]:>6} {debug_times[name]:>10.2f} ms")
|
||||||
@@ -88,9 +87,7 @@ class Tensor:
|
|||||||
Tensor.did_float_warning = True
|
Tensor.did_float_warning = True
|
||||||
self.device = Tensor.CPU
|
self.device = Tensor.CPU
|
||||||
|
|
||||||
self.data = data
|
self.data, self.grad, self.requires_grad = data, None, requires_grad
|
||||||
self.grad = None
|
|
||||||
self.requires_grad = requires_grad
|
|
||||||
|
|
||||||
if gpu:
|
if gpu:
|
||||||
self.cuda_()
|
self.cuda_()
|
||||||
@@ -157,12 +154,11 @@ class Tensor:
|
|||||||
if len(t0._ctx.parents) == 1:
|
if len(t0._ctx.parents) == 1:
|
||||||
grads = [grads]
|
grads = [grads]
|
||||||
for t,g in zip(t0._ctx.parents, grads):
|
for t,g in zip(t0._ctx.parents, grads):
|
||||||
if g is None:
|
if g is not None:
|
||||||
continue
|
assert g.shape == t.shape, \
|
||||||
assert g.shape == t.shape, \
|
f"grad shape must match tensor shape in {self._ctx!r}, {g.shape!r} != {t.shape!r}"
|
||||||
f"grad shape must match tensor shape in {self._ctx!r}, {g.shape!r} != {t.shape!r}"
|
gt = Tensor(g, requires_grad=False)
|
||||||
gt = Tensor(g, requires_grad=False)
|
t.grad = gt if t.grad is None else (t.grad + gt)
|
||||||
t.grad = gt if t.grad is None else (t.grad + gt)
|
|
||||||
|
|
||||||
# ***** tinygrad supports CPU and GPU *****
|
# ***** tinygrad supports CPU and GPU *****
|
||||||
|
|
||||||
@@ -197,8 +193,7 @@ class Tensor:
|
|||||||
if self.grad:
|
if self.grad:
|
||||||
ret.grad = self.grad.cuda()
|
ret.grad = self.grad.cuda()
|
||||||
return ret
|
return ret
|
||||||
else:
|
return self
|
||||||
return self
|
|
||||||
|
|
||||||
def ane(self):
|
def ane(self):
|
||||||
assert(not self.gpu)
|
assert(not self.gpu)
|
||||||
|
|||||||
Reference in New Issue
Block a user