diff --git a/examples/train_efficientnet.py b/examples/train_efficientnet.py index 688793c105..4346ef534b 100644 --- a/examples/train_efficientnet.py +++ b/examples/train_efficientnet.py @@ -12,7 +12,7 @@ if __name__ == "__main__": img = np.zeros((BS,3,224,224), dtype=np.float32) for i in range(32): - print("running batch %d" % i) + print("running batch %d, %d tensors allocated" % (i, Tensor.allocated)) st = time.time() out = model.forward(Tensor(img)) @@ -31,3 +31,5 @@ if __name__ == "__main__": et = time.time() print("backward %.2f s" % (et-st)) + del out, y, loss + diff --git a/test/test_gc.py b/test/test_gc.py new file mode 100644 index 0000000000..524c9d0206 --- /dev/null +++ b/test/test_gc.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +import unittest +from tinygrad.tensor import Tensor, GPU + +class TestGC(unittest.TestCase): + gpu = False + def test_gc(self): + a = Tensor.zeros(4,4, gpu=self.gpu) + b = Tensor.zeros(4,4, gpu=self.gpu) + (a*b).mean().backward() + assert(Tensor.allocated > 0) + del a,b + assert(Tensor.allocated == 0) + + def test_gc_complex(self): + a = Tensor.zeros(4,4, gpu=self.gpu) + b = Tensor.zeros(4,4, gpu=self.gpu) + assert(Tensor.allocated == 2) + (a*b).mean().backward() + assert(Tensor.allocated == 4) + del b + assert(Tensor.allocated == 2) + b = Tensor.zeros(4,4, gpu=self.gpu) + print(Tensor.allocated) + (a*b).mean().backward() + print(Tensor.allocated) + assert(Tensor.allocated == 4) + del b + assert(Tensor.allocated == 2) + + + +if GPU: + class TestGCGPU(TestGC): + gpu = True + +if __name__ == '__main__': + unittest.main() diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index ba46bb008e..838acb7629 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -59,9 +59,21 @@ class GPUBuffer: # **** start with two base classes **** +def deepwalk(node, visited=None, nodes=None): + if visited == None and nodes == None: + visited, nodes = set(), [] + visited.add(node) + if node._ctx: + for i in node._ctx.parents: + if i not in visited: + deepwalk(i, visited, nodes) + nodes.append(node) + return nodes + class Tensor: did_float_warning = False default_gpu = False + allocated = 0 def __init__(self, data, gpu=None): if gpu is None: @@ -89,6 +101,12 @@ class Tensor: # internal variables used for autograd graph construction self._ctx = None + Tensor.allocated += 1 + + def __del__(self): + #print("cleanup", self.shape) + Tensor.allocated -= 1 + def __repr__(self): return "Tensor %r with grad %r" % (self.data, self.grad.data if self.grad else None) @@ -104,20 +122,20 @@ class Tensor: return self.data.dtype @staticmethod - def zeros(*shape): - return Tensor(np.zeros(shape, dtype=np.float32)) + def zeros(*shape, gpu=None): + return Tensor(np.zeros(shape, dtype=np.float32), gpu) @staticmethod - def ones(*shape): - return Tensor(np.ones(shape, dtype=np.float32)) + def ones(*shape, gpu=None): + return Tensor(np.ones(shape, dtype=np.float32), gpu) @staticmethod - def randn(*shape): - return Tensor(np.random.randn(*shape).astype(np.float32)) + def randn(*shape, gpu=None): + return Tensor(np.random.randn(*shape).astype(np.float32), gpu) @staticmethod - def eye(dim): - return Tensor(np.eye(dim).astype(np.float32)) + def eye(dim, gpu=None): + return Tensor(np.eye(dim).astype(np.float32), gpu) def backward(self, allow_fill=True): if self._ctx is None: @@ -129,17 +147,7 @@ class Tensor: assert self.shape == (1,) self.grad = Tensor(np.ones(self.shape, dtype=self.dtype), gpu=self.gpu) - visited, nodes = set(), [] - def deepwalk(node): - visited.add(node) - if node._ctx: - for i in node._ctx.parents: - if i not in visited: - deepwalk(i) - nodes.append(node) - deepwalk(self) - - for t0 in reversed(nodes): + for t0 in reversed(deepwalk(self)): assert (t0.grad is not None) with ProfileOp(t0._ctx.__class__.__name__, [t0.grad], backward=True): grads = t0._ctx.backward(t0._ctx, t0.grad.data) @@ -151,6 +159,8 @@ class Tensor: assert g.shape == t.shape, \ "grad shape must match tensor shape in %r, %r != %r" % (self._ctx, g.shape, t.shape) t.grad = Tensor(g) if t.grad is None else (t.grad + Tensor(g)) + del t.grad._ctx # no backward pass through the add + # ***** tinygrad supports CPU and GPU *****