All devices are equal! (#196)

* Update all devices to be tested ANE, CPU and OCL all now support all tests. However tests are not currently passing on GPU and I cannot test on CPU. Failing GPU test are not an issue caused by this update. Tests have not been passing due to a missing "six" required installation. OpenCL Tests have not been run since commit: 1a1c63a08b devices have 3 types and are handle by a new DeviceTypes enum. (The goal is to revert to Tensor.<type>, but this current setup allows for keyword argument defaults: `device=DeviceType.CPU`) All references to Tensor.GPU/CPU/ANE as been converted to the corresponding `DeviceTypes` enum. Refactor of the conversion code to allow for any device to any device conversion. * Add six dependency in requirements.txt * Resolve failure to run tests Move six into gpu required installs. Remove six from standard installation. * Remove repeated data conversion * Refactor method names Also reduce code with .to and .to_ * Dynamic device handlers * Refactor DeviceTypes -> Device * Add mem copy profiling back * test_backward_pass_diamond_model passing * Resolve Sum issue on GPU * Revert batchnorm2d tests * Update README with upadated API * ANE testing with * Last minute line gains
2026-01-08 22:48:25 -05:00 · 2020-12-16 08:44:08 +01:00
parent 78210b5e40
commit bcf1518309
15 changed files with 246 additions and 181 deletions
--- a/extra/training.py
+++ b/extra/training.py
@@ -2,21 +2,22 @@ import os
 import numpy as np
 from tqdm import trange
 from extra.utils import get_parameters
-from tinygrad.tensor import Tensor, GPU
+from tinygrad.tensor import Tensor, GPU, Device

-def train(model, X_train, Y_train, optim, steps, num_classes=None, BS=128, gpu=False, lossfn = lambda out,y: out.mul(y).mean()):
-  if gpu is True: [x.cuda_() for x in get_parameters([model, optim])]
+def train(model, X_train, Y_train, optim, steps, num_classes=None, BS=128, device=Device.CPU, lossfn = lambda out,y: out.mul(y).mean()):
+  if device == Device.GPU: [x.gpu_() for x in get_parameters([model, optim])]
+  elif device == Device.ANE: [x.ane_() for x in get_parameters([model, optim])]
  if num_classes is None: num_classes = Y_train.max().astype(int)+1
  losses, accuracies = [], []
  for i in (t := trange(steps, disable=os.getenv('CI') is not None)):
    samp = np.random.randint(0, X_train.shape[0], size=(BS))

-    x = Tensor(X_train[samp].reshape((-1, 28*28)).astype(np.float32), gpu=gpu)
+    x = Tensor(X_train[samp].reshape((-1, 28*28)).astype(np.float32), device=device)
    Y = Y_train[samp]
    y = np.zeros((len(samp),num_classes), np.float32)
    # correct loss for NLL, torch NLL loss returns one per row
    y[range(y.shape[0]),Y] = -1.0*num_classes
-    y = Tensor(y, gpu=gpu)
+    y = Tensor(y, device=device)

    # network
    out = model.forward(x)
@@ -36,11 +37,11 @@ def train(model, X_train, Y_train, optim, steps, num_classes=None, BS=128, gpu=F
    accuracies.append(accuracy)
    t.set_description("loss %.2f accuracy %.2f" % (loss, accuracy))

-def evaluate(model, X_test, Y_test, num_classes=None, gpu=False, BS=128):
+def evaluate(model, X_test, Y_test, num_classes=None, device=Device.CPU, BS=128):
  def numpy_eval(num_classes):
    Y_test_preds_out = np.zeros((len(Y_test),num_classes))
    for i in trange(len(Y_test)//BS, disable=os.getenv('CI') is not None):
-      Y_test_preds_out[i*BS:(i+1)*BS] = model.forward(Tensor(X_test[i*BS:(i+1)*BS].reshape((-1, 28*28)).astype(np.float32), gpu=gpu)).cpu().data
+      Y_test_preds_out[i*BS:(i+1)*BS] = model.forward(Tensor(X_test[i*BS:(i+1)*BS].reshape((-1, 28*28)).astype(np.float32), device=device)).cpu().data
    Y_test_preds = np.argmax(Y_test_preds_out, axis=1)
    return (Y_test == Y_test_preds).mean()