From bf9ba8718ace27ff45c62551753aa2389db44dfc Mon Sep 17 00:00:00 2001
From: Liam <3579535@myuwc.ac.za>
Date: Sat, 12 Dec 2020 21:15:47 +0100
Subject: [PATCH] Profile GPU and CPU copying. (#182)

Moving memory is slow, and therefor monitoring the time spent converting
and limiting the number of copy operations can improve performance.
---
 tinygrad/tensor.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py
index a6e5a12782..976ee534d7 100644
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -157,11 +157,12 @@ class Tensor:
 
   def cpu(self):
     if self.gpu:
-      ret = Tensor(np.empty(self.shape, dtype=np.float32), gpu=False)
-      cl.enqueue_copy(cl_queue, ret.data, self.data.cl, is_blocking=True)
-      if self.grad:
-        ret.grad = self.grad.cpu()
-      return ret
+      with ProfileOp("toCPU", [self]):
+        ret = Tensor(np.empty(self.shape, dtype=np.float32), gpu=False)
+        cl.enqueue_copy(cl_queue, ret.data, self.data.cl, is_blocking=True)
+        if self.grad:
+          ret.grad = self.grad.cpu()
+        return ret
     else:
       return self
 
@@ -173,11 +174,12 @@ class Tensor:
     if not GPU:
       raise Exception("No GPU Support, install pyopencl")
     if not self.gpu:
-      require_init_gpu()
-      ret = Tensor(GPUBuffer(self.shape, self.data))
-      if self.grad:
-        ret.grad = self.grad.cuda()
-      return ret
+      with ProfileOp("toGPU", [self]):
+        require_init_gpu()
+        ret = Tensor(GPUBuffer(self.shape, self.data))
+        if self.grad:
+          ret.grad = self.grad.cuda()
+        return ret
     else:
       return self