From bf9ba8718ace27ff45c62551753aa2389db44dfc Mon Sep 17 00:00:00 2001 From: Liam <3579535@myuwc.ac.za> Date: Sat, 12 Dec 2020 21:15:47 +0100 Subject: [PATCH] Profile GPU and CPU copying. (#182) Moving memory is slow, and therefor monitoring the time spent converting and limiting the number of copy operations can improve performance. --- tinygrad/tensor.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index a6e5a12782..976ee534d7 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -157,11 +157,12 @@ class Tensor: def cpu(self): if self.gpu: - ret = Tensor(np.empty(self.shape, dtype=np.float32), gpu=False) - cl.enqueue_copy(cl_queue, ret.data, self.data.cl, is_blocking=True) - if self.grad: - ret.grad = self.grad.cpu() - return ret + with ProfileOp("toCPU", [self]): + ret = Tensor(np.empty(self.shape, dtype=np.float32), gpu=False) + cl.enqueue_copy(cl_queue, ret.data, self.data.cl, is_blocking=True) + if self.grad: + ret.grad = self.grad.cpu() + return ret else: return self @@ -173,11 +174,12 @@ class Tensor: if not GPU: raise Exception("No GPU Support, install pyopencl") if not self.gpu: - require_init_gpu() - ret = Tensor(GPUBuffer(self.shape, self.data)) - if self.grad: - ret.grad = self.grad.cuda() - return ret + with ProfileOp("toGPU", [self]): + require_init_gpu() + ret = Tensor(GPUBuffer(self.shape, self.data)) + if self.grad: + ret.grad = self.grad.cuda() + return ret else: return self