.cpu().numpy() -> .numpy() (#1594)

* .cpu().numpy() -> .numpy()

* restore ops_torch

* restore test_speed_v_torch
This commit is contained in:
Yixiang Gao
2023-08-21 11:53:29 -05:00
committed by GitHub
parent 35bf21276f
commit 8d6662a741
18 changed files with 84 additions and 198 deletions

View File

@@ -1,114 +0,0 @@
import numpy as np
import torch
import time
import platform
from torch import nn
from torch import optim
from extra.datasets import fetch_cifar
from tinygrad.helpers import getenv
# allow TF32
torch.set_float32_matmul_precision('high')
OSX = platform.system() == "Darwin"
device = 'mps' if OSX else 'cuda'
num_classes = 10
class ConvGroup(nn.Module):
def __init__(self, channels_in, channels_out, short, se=True):
super().__init__()
self.short, self.se = short, se and not short
self.conv = nn.ModuleList([nn.Conv2d(channels_in if i == 0 else channels_out, channels_out, kernel_size=3, padding=1, bias=False) for i in range(1 if short else 3)])
self.norm = nn.ModuleList([nn.BatchNorm2d(channels_out, track_running_stats=False, eps=1e-12, momentum=0.8) for _ in range(1 if short else 3)])
if self.se: self.se1, self.se2 = nn.Linear(channels_out, channels_out//16), nn.Linear(channels_out//16, channels_out)
def forward(self, x):
x = nn.functional.max_pool2d(self.conv[0](x), 2)
x = self.norm[0](x).relu()
if self.short: return x
residual = x
mult = self.se2(self.se1(residual.mean((2,3))).relu()).sigmoid().reshape(x.shape[0], x.shape[1], 1, 1) if self.se else 1.0
x = self.norm[1](self.conv[1](x)).relu()
x = self.norm[2](self.conv[2](x) * mult).relu()
return x + residual
class GlobalMaxPool(nn.Module):
def forward(self, x): return torch.amax(x, dim=(2,3))
class SpeedyResNet(nn.Module):
def __init__(self):
super().__init__()
# TODO: add whitening
self.net = nn.ModuleList([
nn.Conv2d(3, 64, kernel_size=1),
nn.BatchNorm2d(64, track_running_stats=False, eps=1e-12, momentum=0.8),
nn.ReLU(),
ConvGroup(64, 128, short=False),
ConvGroup(128, 256, short=True),
ConvGroup(256, 512, short=False),
GlobalMaxPool(),
nn.Linear(512, num_classes, bias=False)
])
# note, pytorch just uses https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html instead of log_softmax
def forward(self, x):
for layer in self.net:
x = layer(x)
return x.log_softmax(-1)
def train_step_jitted(model, optimizer, X, Y):
out = model(X)
loss = (out * Y).mean()
optimizer.zero_grad()
loss.backward()
optimizer.step()
correct = out.detach().argmax(axis=1) == Y.detach().argmin(axis=1)
return loss, correct
def fetch_batch(X_train, Y_train, BS):
# fetch a batch
samp = np.random.randint(0, X_train.shape[0], size=(BS))
Y = np.zeros((BS, num_classes), np.float32)
Y[range(BS),Y_train[samp]] = -1.0*num_classes
X = torch.tensor(X_train[samp])
Y = torch.tensor(Y.reshape(BS, num_classes))
return X.to(device), Y.to(device)
def train_cifar():
BS = getenv("BS", 512)
if getenv("FAKEDATA"):
N = 2048
X_train = np.random.default_rng().standard_normal(size=(N, 3, 32, 32), dtype=np.float32)
Y_train = np.random.randint(0,10,size=(N), dtype=np.int32)
X_test, Y_test = X_train, Y_train
else:
X_train,Y_train = fetch_cifar(train=True)
X_test,Y_test = fetch_cifar(train=False)
print(X_train.shape, Y_train.shape)
Xt, Yt = fetch_batch(X_test, Y_test, BS=BS)
model = SpeedyResNet().to(device)
model.train()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.85, nesterov=True)
X, Y = fetch_batch(X_train, Y_train, BS=BS)
for i in range(getenv("STEPS", 10)):
#for param_group in optimizer.param_groups: print(param_group['lr'])
if i%10 == 0:
# use training batchnorm (and no_grad would change the kernels)
out = model(Xt).detach()
loss = (out * Yt).mean().cpu().numpy()
outs = out.cpu().numpy().argmax(axis=1)
correct = outs == Yt.detach().cpu().numpy().argmin(axis=1)
print(f"eval {sum(correct)}/{len(correct)} {sum(correct)/len(correct)*100.0:.2f}%, {loss:7.2f} val_loss")
st = time.monotonic()
loss, correct = train_step_jitted(model, optimizer, X, Y)
et = time.monotonic()
X, Y = fetch_batch(X_train, Y_train, BS=BS) # do this here
loss_cpu = loss.detach().cpu().item()
correct = correct.cpu().numpy()
cl = time.monotonic()
print(f"{i:3d} {(cl-st)*1000.0:7.2f} ms run, {(et-st)*1000.0:7.2f} ms python, {(cl-et)*1000.0:7.2f} ms CL, {loss_cpu:7.2f} loss, {sum(correct)/len(correct)*100.0:7.2f}% acc")
if __name__ == "__main__":
train_cifar()

View File

@@ -59,7 +59,7 @@ def train_discriminator(optimizer, data_real, data_fake):
loss_real.backward() loss_real.backward()
loss_fake.backward() loss_fake.backward()
optimizer.step() optimizer.step()
return (loss_real + loss_fake).cpu().numpy() return (loss_real + loss_fake).numpy()
def train_generator(optimizer, data_fake): def train_generator(optimizer, data_fake):
real_labels = make_labels(batch_size, 1) real_labels = make_labels(batch_size, 1)
@@ -68,7 +68,7 @@ def train_generator(optimizer, data_fake):
loss = (output * real_labels).mean() loss = (output * real_labels).mean()
loss.backward() loss.backward()
optimizer.step() optimizer.step()
return loss.cpu().numpy() return loss.numpy()
if __name__ == "__main__": if __name__ == "__main__":
# data for training and validation # data for training and validation
@@ -100,7 +100,7 @@ if __name__ == "__main__":
data_fake = generator.forward(noise) data_fake = generator.forward(noise)
loss_g += train_generator(optim_g, data_fake) loss_g += train_generator(optim_g, data_fake)
if (epoch + 1) % sample_interval == 0: if (epoch + 1) % sample_interval == 0:
fake_images = generator.forward(ds_noise).detach().cpu().numpy() fake_images = generator.forward(ds_noise).detach().numpy()
fake_images = (fake_images.reshape(-1, 1, 28, 28) + 1) / 2 # 0 - 1 range. fake_images = (fake_images.reshape(-1, 1, 28, 28) + 1) / 2 # 0 - 1 range.
save_image(make_grid(torch.tensor(fake_images)), output_dir / f"image_{epoch+1}.jpg") save_image(make_grid(torch.tensor(fake_images)), output_dir / f"image_{epoch+1}.jpg")
t.set_description(f"Generator loss: {loss_g/n_steps}, Discriminator loss: {loss_d/n_steps}") t.set_description(f"Generator loss: {loss_g/n_steps}, Discriminator loss: {loss_d/n_steps}")

View File

@@ -72,14 +72,14 @@ class BigConvNet:
with open(filename+'.npy', 'wb') as f: with open(filename+'.npy', 'wb') as f:
for par in get_parameters(self): for par in get_parameters(self):
#if par.requires_grad: #if par.requires_grad:
np.save(f, par.cpu().numpy()) np.save(f, par.numpy())
def load(self, filename): def load(self, filename):
with open(filename+'.npy', 'rb') as f: with open(filename+'.npy', 'rb') as f:
for par in get_parameters(self): for par in get_parameters(self):
#if par.requires_grad: #if par.requires_grad:
try: try:
par.cpu().numpy()[:] = np.load(f) par.numpy()[:] = np.load(f)
if GPU: if GPU:
par.gpu() par.gpu()
except: except:

View File

@@ -89,8 +89,8 @@ if __name__ == "__main__":
opt_time = (time.time()-st)*1000.0 opt_time = (time.time()-st)*1000.0
st = time.time() st = time.time()
loss = loss.cpu().numpy() loss = loss.numpy()
cat = np.argmax(out.cpu().numpy(), axis=1) cat = np.argmax(out.numpy(), axis=1)
accuracy = (cat == Y).mean() accuracy = (cat == Y).mean()
finish_time = (time.time()-st)*1000.0 finish_time = (time.time()-st)*1000.0

View File

@@ -44,6 +44,6 @@ img -= 0.5
img /= 0.5 img /= 0.5
out = m.forward(Tensor(img)) out = m.forward(Tensor(img))
outnp = out.cpu().numpy().ravel() outnp = out.numpy().ravel()
choice = outnp.argmax() choice = outnp.argmax()
print(out.shape, choice, outnp[choice], lbls[choice]) print(out.shape, choice, outnp[choice], lbls[choice])

View File

@@ -13,7 +13,7 @@ from extra.utils import fetch
def show_labels(prediction, confidence=0.5, num_classes=80): def show_labels(prediction, confidence=0.5, num_classes=80):
coco_labels = fetch('https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names') coco_labels = fetch('https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names')
coco_labels = coco_labels.decode('utf-8').split('\n') coco_labels = coco_labels.decode('utf-8').split('\n')
prediction = prediction.detach().cpu().numpy() prediction = prediction.detach().numpy()
conf_mask = (prediction[:,:,4] > confidence) conf_mask = (prediction[:,:,4] > confidence)
prediction *= np.expand_dims(conf_mask, 2) prediction *= np.expand_dims(conf_mask, 2)
labels = [] labels = []
@@ -82,7 +82,7 @@ def bbox_iou(box1, box2):
return iou return iou
def process_results(prediction, confidence=0.9, num_classes=80, nms_conf=0.4): def process_results(prediction, confidence=0.9, num_classes=80, nms_conf=0.4):
prediction = prediction.detach().cpu().numpy() prediction = prediction.detach().numpy()
conf_mask = (prediction[:,:,4] > confidence) conf_mask = (prediction[:,:,4] > confidence)
conf_mask = np.expand_dims(conf_mask, 2) conf_mask = np.expand_dims(conf_mask, 2)
prediction = prediction * conf_mask prediction = prediction * conf_mask
@@ -176,7 +176,7 @@ def predict_transform(prediction, inp_dim, anchors, num_classes):
prediction = prediction.reshape(shape=(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)) prediction = prediction.reshape(shape=(batch_size, bbox_attrs*num_anchors, grid_size*grid_size))
prediction = prediction.transpose(1, 2) prediction = prediction.transpose(1, 2)
prediction = prediction.reshape(shape=(batch_size, grid_size*grid_size*num_anchors, bbox_attrs)) prediction = prediction.reshape(shape=(batch_size, grid_size*grid_size*num_anchors, bbox_attrs))
prediction_cpu = prediction.cpu().numpy() prediction_cpu = prediction.numpy()
for i in (0, 1, 4): for i in (0, 1, 4):
prediction_cpu[:,:,i] = 1 / (1 + np.exp(-prediction_cpu[:,:,i])) prediction_cpu[:,:,i] = 1 / (1 + np.exp(-prediction_cpu[:,:,i]))
# Add the center offsets # Add the center offsets
@@ -233,7 +233,7 @@ class Darknet:
size, stride = int(x["size"]), int(x["stride"]) size, stride = int(x["size"]), int(x["stride"])
module.append(lambda x: x.max_pool2d(kernel_size=(size, size), stride=stride)) module.append(lambda x: x.max_pool2d(kernel_size=(size, size), stride=stride))
elif module_type == "upsample": elif module_type == "upsample":
module.append(lambda x: Tensor(x.cpu().numpy().repeat(2, axis=-2).repeat(2, axis=-1))) module.append(lambda x: Tensor(x.numpy().repeat(2, axis=-2).repeat(2, axis=-1)))
elif module_type == "route": elif module_type == "route":
x["layers"] = x["layers"].split(",") x["layers"] = x["layers"].split(",")
# Start of route # Start of route
@@ -272,11 +272,11 @@ class Darknet:
print(self.blocks[i + 1]["type"], "weights", i) print(self.blocks[i + 1]["type"], "weights", i)
model = self.module_list[i] model = self.module_list[i]
conv = model[0] conv = model[0]
print(conv.weight.cpu().numpy()[0][0][0]) print(conv.weight.numpy()[0][0][0])
if conv.bias is not None: if conv.bias is not None:
print("biases") print("biases")
print(conv.bias.shape) print(conv.bias.shape)
print(conv.bias.cpu().numpy()[0][0:5]) print(conv.bias.numpy()[0][0:5])
else: else:
print("None biases for layer", i) print("None biases for layer", i)
@@ -352,7 +352,7 @@ class Darknet:
if (layers[1]) > 0: layers[1] = layers[1] - i if (layers[1]) > 0: layers[1] = layers[1] - i
map1 = outputs[i + layers[0]] map1 = outputs[i + layers[0]]
map2 = outputs[i + layers[1]] map2 = outputs[i + layers[1]]
x = Tensor(np.concatenate((map1.cpu().numpy(), map2.cpu().numpy()), axis=1)) x = Tensor(np.concatenate((map1.numpy(), map2.numpy()), axis=1))
elif module_type == "shortcut": elif module_type == "shortcut":
from_ = int(module["from"]) from_ = int(module["from"])
x = outputs[i - 1] + outputs[i + from_] x = outputs[i - 1] + outputs[i + from_]
@@ -364,7 +364,7 @@ class Darknet:
if not write: if not write:
detections, write = x, True detections, write = x, True
else: else:
detections = Tensor(np.concatenate((detections.cpu().numpy(), x.cpu().numpy()), axis=1)) detections = Tensor(np.concatenate((detections.numpy(), x.numpy()), axis=1))
outputs[i] = x outputs[i] = x
return detections return detections

View File

@@ -93,7 +93,7 @@ def postprocess(preds, img, orig_imgs):
print('copying to CPU now for post processing') print('copying to CPU now for post processing')
#if you are on CPU, this causes an overflow runtime error. doesn't "seem" to make any difference in the predictions though. #if you are on CPU, this causes an overflow runtime error. doesn't "seem" to make any difference in the predictions though.
# TODO: make non_max_suppression in tinygrad - to make this faster # TODO: make non_max_suppression in tinygrad - to make this faster
preds = preds.cpu().numpy() if isinstance(preds, Tensor) else preds preds = preds.numpy() if isinstance(preds, Tensor) else preds
preds = non_max_suppression(prediction=preds, conf_thres=0.25, iou_thres=0.7, agnostic=False, max_det=300) preds = non_max_suppression(prediction=preds, conf_thres=0.25, iou_thres=0.7, agnostic=False, max_det=300)
all_preds = [] all_preds = []
for i, pred in enumerate(preds): for i, pred in enumerate(preds):

View File

@@ -33,10 +33,10 @@ def train(model, X_train, Y_train, optim, steps, BS=128, lossfn=sparse_categoric
# printing # printing
if not noloss: if not noloss:
cat = np.argmax(out.cpu().numpy(), axis=-1) cat = np.argmax(out.numpy(), axis=-1)
accuracy = (cat == y).mean() accuracy = (cat == y).mean()
loss = loss.detach().cpu().numpy() loss = loss.detach().numpy()
losses.append(loss) losses.append(loss)
accuracies.append(accuracy) accuracies.append(accuracy)
t.set_description("loss %.2f accuracy %.2f" % (loss, accuracy)) t.set_description("loss %.2f accuracy %.2f" % (loss, accuracy))
@@ -51,7 +51,7 @@ def evaluate(model, X_test, Y_test, num_classes=None, BS=128, return_predict=Fal
for i in trange((len(Y_test)-1)//BS+1, disable=getenv('CI', False)): for i in trange((len(Y_test)-1)//BS+1, disable=getenv('CI', False)):
x = Tensor(transform(X_test[i*BS:(i+1)*BS])) x = Tensor(transform(X_test[i*BS:(i+1)*BS]))
out = model.forward(x) if hasattr(model, 'forward') else model(x) out = model.forward(x) if hasattr(model, 'forward') else model(x)
Y_test_preds_out[i*BS:(i+1)*BS] = out.cpu().numpy() Y_test_preds_out[i*BS:(i+1)*BS] = out.numpy()
Y_test_preds = np.argmax(Y_test_preds_out, axis=-1) Y_test_preds = np.argmax(Y_test_preds_out, axis=-1)
Y_test = target_transform(Y_test) Y_test = target_transform(Y_test)
return (Y_test == Y_test_preds).mean(), Y_test_preds return (Y_test == Y_test_preds).mean(), Y_test_preds

View File

@@ -50,7 +50,7 @@ class Transformer:
def forward(self, x): def forward(self, x):
bs = x.shape[0] bs = x.shape[0]
xnp = x.cpu().numpy().astype(np.int32) xnp = x.numpy().astype(np.int32)
onehot = np.zeros((bs, x.shape[1], self.maxlen+self.syms), dtype=np.float32) onehot = np.zeros((bs, x.shape[1], self.maxlen+self.syms), dtype=np.float32)
for i in range(x.shape[1]): for i in range(x.shape[1]):
onehot[range(bs), i, i] = 1 onehot[range(bs), i, i] = 1

View File

@@ -63,14 +63,14 @@ class TestYOLOv8(unittest.TestCase):
onnx_session = ort.InferenceSession(weights_location_onnx) onnx_session = ort.InferenceSession(weights_location_onnx)
onnx_input_name = onnx_session.get_inputs()[0].name onnx_input_name = onnx_session.get_inputs()[0].name
onnx_output_name = onnx_session.get_outputs()[0].name onnx_output_name = onnx_session.get_outputs()[0].name
onnx_output = onnx_session.run([onnx_output_name], {onnx_input_name: input_image.cpu().numpy()}) onnx_output = onnx_session.run([onnx_output_name], {onnx_input_name: input_image.numpy()})
tiny_output = TinyYolov8(input_image) tiny_output = TinyYolov8(input_image)
# currently rtol is 0.025 because there is a 1-2% difference in our predictions # currently rtol is 0.025 because there is a 1-2% difference in our predictions
# because of the zero padding in SPPF module (line 280) maxpooling layers rather than the -infinity in torch. # because of the zero padding in SPPF module (line 280) maxpooling layers rather than the -infinity in torch.
# This difference does not make a difference "visually". # This difference does not make a difference "visually".
np.testing.assert_allclose(onnx_output[0], tiny_output.cpu().numpy(), atol=5e-4, rtol=0.025) np.testing.assert_allclose(onnx_output[0], tiny_output.numpy(), atol=5e-4, rtol=0.025)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@@ -31,7 +31,7 @@ class TestRNNT(unittest.TestCase):
for _ in range(3): for _ in range(3):
x = Tensor.randn(SQ, BS, IS) x = Tensor.randn(SQ, BS, IS)
z, hc = layer(x, None) z, hc = layer(x, None)
torch_x = torch.tensor(x.cpu().numpy()) torch_x = torch.tensor(x.numpy())
torch_z, torch_hc = torch_layer(torch_x) torch_z, torch_hc = torch_layer(torch_x)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3) np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3)
@@ -39,7 +39,7 @@ class TestRNNT(unittest.TestCase):
for _ in range(3): for _ in range(3):
x = Tensor.randn(SQ, BS, IS) x = Tensor.randn(SQ, BS, IS)
z, hc = layer(x, hc) z, hc = layer(x, hc)
torch_x = torch.tensor(x.cpu().numpy()) torch_x = torch.tensor(x.numpy())
torch_z, torch_hc = torch_layer(torch_x, torch_hc) torch_z, torch_hc = torch_layer(torch_x, torch_hc)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3) np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3)

View File

@@ -104,7 +104,7 @@ class TestJit(unittest.TestCase):
def f(a, b): return (a+b).realize() def f(a, b): return (a+b).realize()
a = Tensor([1, 2, 3]) a = Tensor([1, 2, 3])
for i in range(5): for i in range(5):
np.testing.assert_allclose(f(a, Tensor([i])).cpu().numpy(), (a+i).cpu().numpy(), atol=1e-4, rtol=1e-5) np.testing.assert_allclose(f(a, Tensor([i])).numpy(), (a+i).numpy(), atol=1e-4, rtol=1e-5)
assert len(f.jit_cache) == 1 assert len(f.jit_cache) == 1
def test_jit_output_non_tensor_fail(self): def test_jit_output_non_tensor_fail(self):

View File

@@ -44,7 +44,7 @@ class TestNN(unittest.TestCase):
outt = bn(inn) outt = bn(inn)
# in torch # in torch
toutt = tbn(torch.tensor(inn.cpu().numpy())) toutt = tbn(torch.tensor(inn.numpy()))
# close # close
np.testing.assert_allclose(outt.numpy(), toutt.detach().numpy(), rtol=5e-4, atol=1e-6) np.testing.assert_allclose(outt.numpy(), toutt.detach().numpy(), rtol=5e-4, atol=1e-6)
@@ -68,7 +68,7 @@ class TestNN(unittest.TestCase):
torch_layer = torch.nn.Linear(in_dim, out_dim).eval() torch_layer = torch.nn.Linear(in_dim, out_dim).eval()
torch_layer.weight[:] = torch.tensor(model.weight.numpy(), dtype=torch.float32) torch_layer.weight[:] = torch.tensor(model.weight.numpy(), dtype=torch.float32)
torch_layer.bias[:] = torch.tensor(model.bias.numpy(), dtype=torch.float32) torch_layer.bias[:] = torch.tensor(model.bias.numpy(), dtype=torch.float32)
torch_x = torch.tensor(x.cpu().numpy(), dtype=torch.float32) torch_x = torch.tensor(x.numpy(), dtype=torch.float32)
torch_z = torch_layer(torch_x) torch_z = torch_layer(torch_x)
# test # test
@@ -94,7 +94,7 @@ class TestNN(unittest.TestCase):
# test # test
x = Tensor.uniform(BS, C1, W) x = Tensor.uniform(BS, C1, W)
z = layer(x) z = layer(x)
torch_x = torch.tensor(x.cpu().numpy()) torch_x = torch.tensor(x.numpy())
torch_z = torch_layer(torch_x) torch_z = torch_layer(torch_x)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-4, rtol=1e-5) np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-4, rtol=1e-5)
@@ -114,7 +114,7 @@ class TestNN(unittest.TestCase):
# test # test
x = Tensor.uniform(BS, C1, H, W) x = Tensor.uniform(BS, C1, H, W)
z = layer(x) z = layer(x)
torch_x = torch.tensor(x.cpu().numpy()) torch_x = torch.tensor(x.numpy())
torch_z = torch_layer(torch_x) torch_z = torch_layer(torch_x)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-4, rtol=1e-5) np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-4, rtol=1e-5)
@@ -135,7 +135,7 @@ class TestNN(unittest.TestCase):
# test # test
x = Tensor.uniform(BS, C1, W) x = Tensor.uniform(BS, C1, W)
z = layer(x) z = layer(x)
torch_x = torch.tensor(x.cpu().numpy()) torch_x = torch.tensor(x.numpy())
torch_z = torch_layer(torch_x) torch_z = torch_layer(torch_x)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-4, rtol=1e-5) np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-4, rtol=1e-5)
@@ -156,7 +156,7 @@ class TestNN(unittest.TestCase):
# test # test
x = Tensor.uniform(BS, C1, H, W) x = Tensor.uniform(BS, C1, H, W)
z = layer(x) z = layer(x)
torch_x = torch.tensor(x.cpu().numpy()) torch_x = torch.tensor(x.numpy())
torch_z = torch_layer(torch_x) torch_z = torch_layer(torch_x)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-4, rtol=1e-5) np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-4, rtol=1e-5)
@@ -175,7 +175,7 @@ class TestNN(unittest.TestCase):
# test # test
x = Tensor.randn(BS, C, H, W) x = Tensor.randn(BS, C, H, W)
z = layer(x) z = layer(x)
torch_x = torch.tensor(x.cpu().numpy()) torch_x = torch.tensor(x.numpy())
torch_z = torch_layer(torch_x) torch_z = torch_layer(torch_x)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3) np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3)
@@ -194,7 +194,7 @@ class TestNN(unittest.TestCase):
# test # test
x = Tensor.randn(N, C, H, W) x = Tensor.randn(N, C, H, W)
z = layer(x) z = layer(x)
torch_x = torch.tensor(x.cpu().numpy()) torch_x = torch.tensor(x.numpy())
torch_z = torch_layer(torch_x) torch_z = torch_layer(torch_x)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3) np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3)
@@ -213,7 +213,7 @@ class TestNN(unittest.TestCase):
# test # test
x = Tensor.randn(N, C, H, W) x = Tensor.randn(N, C, H, W)
z = layer(x) z = layer(x)
torch_x = torch.tensor(x.cpu().numpy()) torch_x = torch.tensor(x.numpy())
torch_z = torch_layer(torch_x.permute(0,2,3,1)).permute(0,3,1,2) torch_z = torch_layer(torch_x.permute(0,2,3,1)).permute(0,3,1,2)
def test_instancenorm_2d(self): def test_instancenorm_2d(self):
@@ -231,7 +231,7 @@ class TestNN(unittest.TestCase):
# test # test
x = Tensor.randn(N, C, H, W) x = Tensor.randn(N, C, H, W)
z = layer(x) z = layer(x)
torch_x = torch.tensor(x.cpu().numpy()) torch_x = torch.tensor(x.numpy())
torch_z = torch_layer(torch_x) torch_z = torch_layer(torch_x)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3) np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3) np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3)
@@ -251,7 +251,7 @@ class TestNN(unittest.TestCase):
# test # test
x = Tensor.randn(N, C, D, H, W) x = Tensor.randn(N, C, D, H, W)
z = layer(x) z = layer(x)
torch_x = torch.tensor(x.cpu().numpy()) torch_x = torch.tensor(x.numpy())
torch_z = torch_layer(torch_x) torch_z = torch_layer(torch_x)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3) np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3) np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-3, rtol=5e-3)
@@ -269,7 +269,7 @@ class TestNN(unittest.TestCase):
# test # test
x = Tensor(np.random.randint(0, VS, (B, T)).astype(np.float32)) x = Tensor(np.random.randint(0, VS, (B, T)).astype(np.float32))
z = layer(x) z = layer(x)
torch_x = torch.tensor(x.cpu().numpy().astype(np.int32)) torch_x = torch.tensor(x.numpy().astype(np.int32))
torch_z = torch_layer(torch_x) torch_z = torch_layer(torch_x)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=1e-8, rtol=1e-8) np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=1e-8, rtol=1e-8)
@@ -281,7 +281,7 @@ class TestNN(unittest.TestCase):
for _ in range(3): for _ in range(3):
x = Tensor(np.random.randint(0, VS, (B, T)).astype(np.float32)) x = Tensor(np.random.randint(0, VS, (B, T)).astype(np.float32))
z = layer_jit(x) z = layer_jit(x)
torch_x = torch.tensor(x.cpu().numpy().astype(np.int32)) torch_x = torch.tensor(x.numpy().astype(np.int32))
torch_z = torch_layer(torch_x) torch_z = torch_layer(torch_x)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=1e-8, rtol=1e-8) np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=1e-8, rtol=1e-8)

View File

@@ -45,7 +45,7 @@ def kstest(l1, l2):
def normal_test(func, shape=(20, 23), alpha=0.05): def normal_test(func, shape=(20, 23), alpha=0.05):
Tensor.manual_seed(1337) Tensor.manual_seed(1337)
np.random.seed(1337) np.random.seed(1337)
x = func(*shape).cpu().numpy().flatten() x = func(*shape).numpy().flatten()
y = np.random.randn(*shape).flatten() y = np.random.randn(*shape).flatten()
return kstest(x, y) >= alpha return kstest(x, y) >= alpha
@@ -53,7 +53,7 @@ def equal_distribution(tiny_func, torch_func, numpy_func=None, shape=(20, 23), a
Tensor.manual_seed(1337) Tensor.manual_seed(1337)
torch.manual_seed(1337) torch.manual_seed(1337)
np.random.seed(1337) np.random.seed(1337)
x = tiny_func(*shape).cpu().numpy().flatten() x = tiny_func(*shape).numpy().flatten()
if numpy_func is not None: y = numpy_func(shape).flatten() if numpy_func is not None: y = numpy_func(shape).flatten()
z = torch_func(shape).numpy().flatten() z = torch_func(shape).numpy().flatten()
return (numpy_func is None or kstest(x, y) >= alpha) and kstest(x, z) >= alpha return (numpy_func is None or kstest(x, y) >= alpha) and kstest(x, z) >= alpha

View File

@@ -74,7 +74,7 @@ def helper_test_speed(f1, *args):
if i >= 1: ets.append(et) if i >= 1: ets.append(et)
if GlobalCounters.global_ops: if GlobalCounters.global_ops:
save_ops, save_mem = GlobalCounters.global_ops, GlobalCounters.global_mem save_ops, save_mem = GlobalCounters.global_ops, GlobalCounters.global_mem
return ret.cpu().numpy(), np.min(ets) return ret.numpy(), np.min(ets)
def helper_test_generic_square(name, N, f1, f2, onearg=False): def helper_test_generic_square(name, N, f1, f2, onearg=False):
torch.manual_seed(0) torch.manual_seed(0)

View File

@@ -14,8 +14,8 @@ class TestSymbolicJit(unittest.TestCase):
vi = Variable("i", 1, 10) vi = Variable("i", 1, 10)
for i in range(1, 5): for i in range(1, 5):
a = Tensor.rand(3, i) a = Tensor.rand(3, i)
symbolic = jf(a.reshape(3, vi)).reshape(3, i).cpu().numpy() symbolic = jf(a.reshape(3, vi)).reshape(3, i).numpy()
expected = f(a).cpu().numpy() expected = f(a).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
assert len(jf.jit_cache) == 1 assert len(jf.jit_cache) == 1
@@ -26,8 +26,8 @@ class TestSymbolicJit(unittest.TestCase):
for i in range(1, 5): for i in range(1, 5):
a = Tensor.rand(3, i) a = Tensor.rand(3, i)
b = Tensor.rand(3, i) b = Tensor.rand(3, i)
symbolic = jf(a.reshape(3, vi), b.reshape(3, vi)).reshape(3, i).cpu().numpy() symbolic = jf(a.reshape(3, vi), b.reshape(3, vi)).reshape(3, i).numpy()
expected = f(a, b).cpu().numpy() expected = f(a, b).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
assert len(jf.jit_cache) == 1 assert len(jf.jit_cache) == 1
@@ -38,8 +38,8 @@ class TestSymbolicJit(unittest.TestCase):
for i in range(1, 5): for i in range(1, 5):
a = Tensor.rand(3, i) a = Tensor.rand(3, i)
b = Tensor.rand(i, 5) b = Tensor.rand(i, 5)
symbolic = jf(a.reshape(3, vi), b.reshape(vi, 5)).cpu().numpy() symbolic = jf(a.reshape(3, vi), b.reshape(vi, 5)).numpy()
expected = f(a, b).cpu().numpy() expected = f(a, b).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
assert len(jf.jit_cache) == 1 assert len(jf.jit_cache) == 1
@@ -53,8 +53,8 @@ class TestSymbolicJit(unittest.TestCase):
vi = Variable("i", 1, 10) vi = Variable("i", 1, 10)
a = Tensor.rand(3, i) a = Tensor.rand(3, i)
b = Tensor.rand(i, 5) b = Tensor.rand(i, 5)
symbolic = jf(a.reshape(3, vi), b.reshape(vi, 5)).cpu().numpy() symbolic = jf(a.reshape(3, vi), b.reshape(vi, 5)).numpy()
expected = f(a, b).cpu().numpy() expected = f(a, b).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
assert len(jf.jit_cache) == 2 assert len(jf.jit_cache) == 2
@@ -67,8 +67,8 @@ class TestSymbolicJit(unittest.TestCase):
q = Tensor.rand(2, 1, 4, 8) q = Tensor.rand(2, 1, 4, 8)
k = Tensor.rand(2, i, 4, 8) k = Tensor.rand(2, i, 4, 8)
v = Tensor.rand(2, i, 4, 8) v = Tensor.rand(2, i, 4, 8)
symbolic = jf(q, k.reshape(2, vi, 4, 8), v.reshape(2, vi, 4, 8)).reshape(2, 4, 1, 8).cpu().numpy() symbolic = jf(q, k.reshape(2, vi, 4, 8), v.reshape(2, vi, 4, 8)).reshape(2, 4, 1, 8).numpy()
expected = f(q, k, v).cpu().numpy() expected = f(q, k, v).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
assert len(jf.jit_cache) == 6 assert len(jf.jit_cache) == 6
@@ -79,8 +79,8 @@ class TestSymbolicJit(unittest.TestCase):
for i in range(1, 5): for i in range(1, 5):
a = Tensor.rand(i, 3) a = Tensor.rand(i, 3)
b = Tensor.rand(2, 3) b = Tensor.rand(2, 3)
symbolic = jf(a.reshape(vi, 3), b).reshape(i+2, 3).cpu().numpy() symbolic = jf(a.reshape(vi, 3), b).reshape(i+2, 3).numpy()
expected = f(a, b).cpu().numpy() expected = f(a, b).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
assert len(jf.jit_cache) == 1 assert len(jf.jit_cache) == 1
@@ -91,8 +91,8 @@ class TestSymbolicJit(unittest.TestCase):
for i in range(1, 5): for i in range(1, 5):
a = Tensor.rand(3, i) a = Tensor.rand(3, i)
b = Tensor.rand(3, 2) b = Tensor.rand(3, 2)
symbolic = jf(a.reshape(3, vi), b).reshape(3, i+2).cpu().numpy() symbolic = jf(a.reshape(3, vi), b).reshape(3, i+2).numpy()
expected = f(a, b).cpu().numpy() expected = f(a, b).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
assert len(jf.jit_cache) == 1 assert len(jf.jit_cache) == 1
@@ -105,8 +105,8 @@ class TestSymbolicJit(unittest.TestCase):
for j in range(1, 5): for j in range(1, 5):
a = Tensor.rand(i, 3) a = Tensor.rand(i, 3)
b = Tensor.rand(j, 3) b = Tensor.rand(j, 3)
symbolic = jf(a.reshape(vi, 3), b.reshape(vj, 3)).reshape(i+j, 3).cpu().numpy() symbolic = jf(a.reshape(vi, 3), b.reshape(vj, 3)).reshape(i+j, 3).numpy()
expected = f(a, b).cpu().numpy() expected = f(a, b).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
assert len(jf.jit_cache) == 1 assert len(jf.jit_cache) == 1
@@ -119,8 +119,8 @@ class TestSymbolicJit(unittest.TestCase):
for j in range(1, 5): for j in range(1, 5):
a = Tensor.rand(3, i) a = Tensor.rand(3, i)
b = Tensor.rand(3, j) b = Tensor.rand(3, j)
symbolic = jf(a.reshape(3, vi), b.reshape(3, vj)).reshape(3, i+j).cpu().numpy() symbolic = jf(a.reshape(3, vi), b.reshape(3, vj)).reshape(3, i+j).numpy()
expected = f(a, b).cpu().numpy() expected = f(a, b).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
assert len(jf.jit_cache) == 1 assert len(jf.jit_cache) == 1
@@ -133,8 +133,8 @@ class TestSymbolicJit(unittest.TestCase):
for j in range(1, 5): for j in range(1, 5):
a = Tensor.rand(i, 3) a = Tensor.rand(i, 3)
b = Tensor.rand(3, j) b = Tensor.rand(3, j)
symbolic = jf(a.reshape(vi, 3), b.reshape(3, vj)).reshape(i, j).cpu().numpy() symbolic = jf(a.reshape(vi, 3), b.reshape(3, vj)).reshape(i, j).numpy()
expected = f(a, b).cpu().numpy() expected = f(a, b).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
assert len(jf.jit_cache) == 1 assert len(jf.jit_cache) == 1

View File

@@ -12,8 +12,8 @@ class TestSymbolicOps(unittest.TestCase):
vi = Variable("i", 1, 10) vi = Variable("i", 1, 10)
for i in range(1, 5): for i in range(1, 5):
a = Tensor.rand(3, i) a = Tensor.rand(3, i)
symbolic = f(a.reshape(3, vi)).reshape(3, i).cpu().numpy() symbolic = f(a.reshape(3, vi)).reshape(3, i).numpy()
expected = f(a).cpu().numpy() expected = f(a).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
def test_add(self): def test_add(self):
@@ -22,8 +22,8 @@ class TestSymbolicOps(unittest.TestCase):
for i in range(1, 5): for i in range(1, 5):
a = Tensor.rand(3, i) a = Tensor.rand(3, i)
b = Tensor.rand(3, i) b = Tensor.rand(3, i)
symbolic = f(a.reshape(3, vi), b.reshape(3, vi)).reshape(3, i).cpu().numpy() symbolic = f(a.reshape(3, vi), b.reshape(3, vi)).reshape(3, i).numpy()
expected = f(a, b).cpu().numpy() expected = f(a, b).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
def test_matmul(self): def test_matmul(self):
@@ -32,8 +32,8 @@ class TestSymbolicOps(unittest.TestCase):
for i in range(1, 5): for i in range(1, 5):
a = Tensor.rand(3, i) a = Tensor.rand(3, i)
b = Tensor.rand(i, 5) b = Tensor.rand(i, 5)
symbolic = f(a.reshape(3, vi), b.reshape(vi, 5)).cpu().numpy() symbolic = f(a.reshape(3, vi), b.reshape(vi, 5)).numpy()
expected = f(a, b).cpu().numpy() expected = f(a, b).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
def test_matmul_same_var_different_val(self): def test_matmul_same_var_different_val(self):
@@ -42,7 +42,7 @@ class TestSymbolicOps(unittest.TestCase):
a = Tensor.rand(3, 4) a = Tensor.rand(3, 4)
b = Tensor.rand(7, 5) b = Tensor.rand(7, 5)
with self.assertRaises(AssertionError): with self.assertRaises(AssertionError):
f(a.reshape(3, vi), b.reshape(vi, 5)).cpu().numpy() f(a.reshape(3, vi), b.reshape(vi, 5)).numpy()
@unittest.skipIf(Device.DEFAULT == "CLANG" and CI, "broken on CLANG CI") @unittest.skipIf(Device.DEFAULT == "CLANG" and CI, "broken on CLANG CI")
def test_attention(self): def test_attention(self):
@@ -52,8 +52,8 @@ class TestSymbolicOps(unittest.TestCase):
q = Tensor.rand(2, 1, 4, 8) q = Tensor.rand(2, 1, 4, 8)
k = Tensor.rand(2, i, 4, 8) k = Tensor.rand(2, i, 4, 8)
v = Tensor.rand(2, i, 4, 8) v = Tensor.rand(2, i, 4, 8)
symbolic = f(q, k.reshape(2, vi, 4, 8), v.reshape(2, vi, 4, 8)).reshape(2, 4, 1, 8).cpu().numpy() symbolic = f(q, k.reshape(2, vi, 4, 8), v.reshape(2, vi, 4, 8)).reshape(2, 4, 1, 8).numpy()
expected = f(q, k, v).cpu().numpy() expected = f(q, k, v).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
def test_cat_dim0(self): def test_cat_dim0(self):
@@ -62,8 +62,8 @@ class TestSymbolicOps(unittest.TestCase):
for i in range(1, 5): for i in range(1, 5):
a = Tensor.rand(i, 3) a = Tensor.rand(i, 3)
b = Tensor.rand(2, 3) b = Tensor.rand(2, 3)
symbolic = f(a.reshape(vi, 3), b).reshape(i+2, 3).cpu().numpy() symbolic = f(a.reshape(vi, 3), b).reshape(i+2, 3).numpy()
expected = f(a, b).cpu().numpy() expected = f(a, b).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
def test_cat_dim1(self): def test_cat_dim1(self):
@@ -72,8 +72,8 @@ class TestSymbolicOps(unittest.TestCase):
for i in range(1, 5): for i in range(1, 5):
a = Tensor.rand(3, i) a = Tensor.rand(3, i)
b = Tensor.rand(3, 2) b = Tensor.rand(3, 2)
symbolic = f(a.reshape(3, vi), b).reshape(3, i+2).cpu().numpy() symbolic = f(a.reshape(3, vi), b).reshape(3, i+2).numpy()
expected = f(a, b).cpu().numpy() expected = f(a, b).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
def test_cat_dim0_two_vars(self): def test_cat_dim0_two_vars(self):
@@ -84,8 +84,8 @@ class TestSymbolicOps(unittest.TestCase):
for j in range(1, 5): for j in range(1, 5):
a = Tensor.rand(i, 3) a = Tensor.rand(i, 3)
b = Tensor.rand(j, 3) b = Tensor.rand(j, 3)
symbolic = f(a.reshape(vi, 3), b.reshape(vj, 3)).reshape(i+j, 3).cpu().numpy() symbolic = f(a.reshape(vi, 3), b.reshape(vj, 3)).reshape(i+j, 3).numpy()
expected = f(a, b).cpu().numpy() expected = f(a, b).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
def test_cat_dim1_two_vars(self): def test_cat_dim1_two_vars(self):
@@ -96,8 +96,8 @@ class TestSymbolicOps(unittest.TestCase):
for j in range(1, 5): for j in range(1, 5):
a = Tensor.rand(3, i) a = Tensor.rand(3, i)
b = Tensor.rand(3, j) b = Tensor.rand(3, j)
symbolic = f(a.reshape(3, vi), b.reshape(3, vj)).reshape(3, i+j).cpu().numpy() symbolic = f(a.reshape(3, vi), b.reshape(3, vj)).reshape(3, i+j).numpy()
expected = f(a, b).cpu().numpy() expected = f(a, b).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
def test_two_vars_plus1(self): def test_two_vars_plus1(self):
@@ -108,8 +108,8 @@ class TestSymbolicOps(unittest.TestCase):
for j in range(1, 5): for j in range(1, 5):
a = Tensor.rand(i, 3) a = Tensor.rand(i, 3)
b = Tensor.rand(3, j) b = Tensor.rand(3, j)
symbolic = f(a.reshape(vi, 3), b.reshape(3, vj)).reshape(i, j).cpu().numpy() symbolic = f(a.reshape(vi, 3), b.reshape(3, vj)).reshape(i, j).numpy()
expected = f(a, b).cpu().numpy() expected = f(a, b).numpy()
np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6) np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=1e-6)
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -37,7 +37,7 @@ class TestTinygrad(unittest.TestCase):
out = out.log_softmax() out = out.log_softmax()
out = out.mul(m).add(m).sum() out = out.mul(m).add(m).sum()
out.backward() out.backward()
return out.cpu().numpy(), x.grad.cpu().numpy(), W.grad.cpu().numpy() return out.numpy(), x.grad.numpy(), W.grad.numpy()
def test_pytorch(): def test_pytorch():
x = torch.tensor(x_init, requires_grad=True) x = torch.tensor(x_init, requires_grad=True)
@@ -64,7 +64,7 @@ class TestTinygrad(unittest.TestCase):
out = out.log_softmax() out = out.log_softmax()
out = out.sum() out = out.sum()
out.backward() out.backward()
return out.cpu().numpy(), u.cpu().grad.numpy(), v.cpu().grad.numpy(), w.cpu().grad.numpy() return out.numpy(), u.grad.numpy(), v.grad.numpy(), w.grad.numpy()
def test_pytorch(): def test_pytorch():
u = torch.tensor(U_init, requires_grad=True) u = torch.tensor(U_init, requires_grad=True)
@@ -100,7 +100,7 @@ class TestTinygrad(unittest.TestCase):
Tensor.training = True Tensor.training = True
n, rate = 1_000_000, 0.1 n, rate = 1_000_000, 0.1
w = Tensor.ones(n).dropout(rate) w = Tensor.ones(n).dropout(rate)
non_zeros = np.count_nonzero(w.cpu().numpy()) non_zeros = np.count_nonzero(w.numpy())
expected = n * (1 - rate) expected = n * (1 - rate)
np.testing.assert_allclose(non_zeros, expected, rtol=2e-3) np.testing.assert_allclose(non_zeros, expected, rtol=2e-3)