mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-02-19 02:44:40 -05:00
add a return statement to the train function in order to provide access to the losses and accuracies lists
64 lines
2.3 KiB
Python
64 lines
2.3 KiB
Python
import numpy as np
|
|
from tqdm import trange
|
|
from tinygrad.tensor import Tensor, Device
|
|
from tinygrad.helpers import getenv
|
|
|
|
def sparse_categorical_crossentropy(out, Y):
|
|
num_classes = out.shape[-1]
|
|
YY = Y.flatten().astype(np.int32)
|
|
y = np.zeros((YY.shape[0], num_classes), np.float32)
|
|
# correct loss for NLL, torch NLL loss returns one per row
|
|
y[range(y.shape[0]),YY] = -1.0*num_classes
|
|
y = y.reshape(list(Y.shape)+[num_classes])
|
|
y = Tensor(y)
|
|
return out.mul(y).mean()
|
|
|
|
def train(model, X_train, Y_train, optim, steps, BS=128, lossfn=sparse_categorical_crossentropy,
|
|
transform=lambda x: x, target_transform=lambda x: x, noloss=False):
|
|
Tensor.training = True
|
|
losses, accuracies = [], []
|
|
for i in (t := trange(steps, disable=getenv('CI', False))):
|
|
samp = np.random.randint(0, X_train.shape[0], size=(BS))
|
|
x = Tensor(transform(X_train[samp]), requires_grad=False)
|
|
y = target_transform(Y_train[samp])
|
|
|
|
# network
|
|
out = model.forward(x) if hasattr(model, 'forward') else model(x)
|
|
|
|
loss = lossfn(out, y)
|
|
optim.zero_grad()
|
|
loss.backward()
|
|
if noloss: del loss
|
|
optim.step()
|
|
|
|
# printing
|
|
if not noloss:
|
|
cat = np.argmax(out.cpu().numpy(), axis=-1)
|
|
accuracy = (cat == y).mean()
|
|
|
|
loss = loss.detach().cpu().numpy()
|
|
losses.append(loss)
|
|
accuracies.append(accuracy)
|
|
t.set_description("loss %.2f accuracy %.2f" % (loss, accuracy))
|
|
return [losses, accuracies]
|
|
|
|
|
|
def evaluate(model, X_test, Y_test, num_classes=None, BS=128, return_predict=False, transform=lambda x: x,
|
|
target_transform=lambda y: y):
|
|
Tensor.training = False
|
|
def numpy_eval(Y_test, num_classes):
|
|
Y_test_preds_out = np.zeros(list(Y_test.shape)+[num_classes])
|
|
for i in trange((len(Y_test)-1)//BS+1, disable=getenv('CI', False)):
|
|
x = Tensor(transform(X_test[i*BS:(i+1)*BS]))
|
|
out = model.forward(x) if hasattr(model, 'forward') else model(x)
|
|
Y_test_preds_out[i*BS:(i+1)*BS] = out.cpu().numpy()
|
|
Y_test_preds = np.argmax(Y_test_preds_out, axis=-1)
|
|
Y_test = target_transform(Y_test)
|
|
return (Y_test == Y_test_preds).mean(), Y_test_preds
|
|
|
|
if num_classes is None: num_classes = Y_test.max().astype(int)+1
|
|
acc, Y_test_pred = numpy_eval(Y_test, num_classes)
|
|
print("test set accuracy is %f" % acc)
|
|
return (acc, Y_test_pred) if return_predict else acc
|
|
|