mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-07 03:00:26 -04:00
fix strided convs, GPU env var for enet
This commit is contained in:
@@ -2,6 +2,8 @@
|
||||
# https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth
|
||||
# a rough copy of
|
||||
# https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/model.py
|
||||
import os
|
||||
GPU = os.getenv("GPU", None) is not None
|
||||
import sys
|
||||
import io
|
||||
import numpy as np
|
||||
@@ -114,6 +116,8 @@ class EfficientNet:
|
||||
mv = eval(mk.replace(".bias", "_bias"))
|
||||
vnp = v.numpy().astype(np.float32)
|
||||
mv.data[:] = vnp if k != '_fc.weight' else vnp.T
|
||||
if GPU:
|
||||
mv.cuda_()
|
||||
|
||||
if __name__ == "__main__":
|
||||
# instantiate my net
|
||||
@@ -154,7 +158,10 @@ if __name__ == "__main__":
|
||||
# run the net
|
||||
import time
|
||||
st = time.time()
|
||||
out = model.forward(Tensor(img))
|
||||
if GPU:
|
||||
out = model.forward(Tensor(img).cuda())
|
||||
else:
|
||||
out = model.forward(Tensor(img))
|
||||
|
||||
# if you want to look at the outputs
|
||||
"""
|
||||
|
||||
@@ -77,10 +77,10 @@ class TestOps(unittest.TestCase):
|
||||
H,W = 3,3
|
||||
helper_test_op([(bs,cin,11,28), (4,cin,H,W)],
|
||||
lambda x,w: torch.nn.functional.conv2d(x,w,stride=2).relu(),
|
||||
lambda x,w: Tensor.conv2d(x,w,stride=2).relu(), atol=2e-5, grad_atol=2e-6, gpu=self.gpu)
|
||||
lambda x,w: Tensor.conv2d(x,w,stride=2).relu(), atol=2e-5, grad_atol=2e-6, gpu=self.gpu, forward_only=self.gpu)
|
||||
helper_test_op([(bs,cin,11,28), (4,cin,H,W)],
|
||||
lambda x,w: torch.nn.functional.conv2d(x,w,stride=(2,1)).relu(),
|
||||
lambda x,w: Tensor.conv2d(x,w,stride=(2,1)).relu(), atol=2e-5, grad_atol=2e-6, gpu=self.gpu)
|
||||
lambda x,w: Tensor.conv2d(x,w,stride=(2,1)).relu(), atol=2e-5, grad_atol=2e-6, gpu=self.gpu, forward_only=self.gpu)
|
||||
|
||||
def test_maxpool2x2(self):
|
||||
helper_test_op([(32,2,110,28)], lambda x: torch.nn.functional.max_pool2d(x, (2,2)), Tensor.max_pool2d, gpu=self.gpu)
|
||||
|
||||
@@ -12,7 +12,7 @@ class BatchNorm2D:
|
||||
# TODO: need running_mean and running_var
|
||||
self.running_mean = Tensor.zeros(sz)
|
||||
self.running_var = Tensor.zeros(sz)
|
||||
self.num_batches_tracked = Tensor.zeros(0)
|
||||
self.num_batches_tracked = Tensor.zeros(1)
|
||||
|
||||
def __call__(self, x):
|
||||
# this work at inference?
|
||||
|
||||
@@ -324,11 +324,13 @@ class Conv2D(Function):
|
||||
|
||||
prg = clbuild(ctx.cl_ctx, """
|
||||
__kernel void conv(__global const float *input, __global const float *weight, __global float *output,
|
||||
int H, int W, int groups, int rcout, int cin, int oy, int ox, int iy, int ix) {
|
||||
int H, int W, int groups, int rcout, int cin, int oy, int ox, int iy, int ix, int ys, int xs) {
|
||||
|
||||
int B = get_global_id(0); // range 0-bs
|
||||
int Y = get_global_id(1); // range 0-oy
|
||||
int X = get_global_id(2); // range 0-ox
|
||||
int IY = Y*ys;
|
||||
int IX = X*xs;
|
||||
|
||||
// input = (bs, groups, cin, iy, ix)
|
||||
// weight = (groups, rcout, cin, H, W)
|
||||
@@ -337,10 +339,10 @@ class Conv2D(Function):
|
||||
for (int c = 0; c < rcout; c++) {
|
||||
float acc = 0.0;
|
||||
for (int ci = 0; ci < cin; ci++) {
|
||||
for (int y = Y; y < Y+H; y++) {
|
||||
for (int x = X; x < X+W; x++) {
|
||||
for (int y = IY; y < IY+H; y++) {
|
||||
for (int x = IX; x < IX+W; x++) {
|
||||
acc += input[B*groups*cin*iy*ix + g*cin*iy*ix + ci*iy*ix + y*ix + x] * \
|
||||
weight[g*rcout*cin*H*W + c*cin*H*W + ci*H*W + (y-Y)*W + (x-X)];
|
||||
weight[g*rcout*cin*H*W + c*cin*H*W + ci*H*W + (y-IY)*W + (x-IX)];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -355,7 +357,8 @@ class Conv2D(Function):
|
||||
np.int32(H), np.int32(W),
|
||||
np.int32(groups), np.int32(rcout), np.int32(cin),
|
||||
np.int32(oy), np.int32(ox),
|
||||
np.int32(iy), np.int32(ix)
|
||||
np.int32(iy), np.int32(ix),
|
||||
np.int32(ys), np.int32(xs)
|
||||
)
|
||||
return ret
|
||||
|
||||
|
||||
@@ -123,7 +123,7 @@ class Tensor:
|
||||
if not self.gpu:
|
||||
require_init_gpu()
|
||||
assert self.data.dtype == np.float32 # only float32 on GPU
|
||||
data = cl.Buffer(cl_ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.data)
|
||||
data = cl.Buffer(cl_ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.data.ravel())
|
||||
data.shape = self.shape
|
||||
data.dtype = self.data.dtype
|
||||
ret = Tensor(data)
|
||||
|
||||
Reference in New Issue
Block a user