diff --git a/tinygrad/lazy.py b/tinygrad/lazy.py
index 68c25b64c4..94577046df 100644
--- a/tinygrad/lazy.py
+++ b/tinygrad/lazy.py
@@ -184,8 +184,7 @@ class LazyBuffer:
     # NOTE: dtypes.from_np(self.dtype.np) to deal with image types
     return self.loadop(LoadOps.CONST, tuple(), dtypes.from_np(self.dtype.np), self.device, arg=val).reshape((1,)*len(self.shape)).expand(self.shape)
 
-  # NOTE: we also have to copy the numpy array on the way out...otherwise the underlying Tensor could be freed and use after free. improve this?
-  def toCPU(self):
+  def toCPU(self) -> np.ndarray:
     assert self.dtype.np, f"{self.dtype} is not supported in toCPU"
     realized = self.cast((dtypes.from_np(self.dtype.np), False)).contiguous().realize().realized
     return cast(RawBuffer, realized).toCPU().reshape(self.shape)
@@ -371,6 +370,8 @@ def _realize_custom(buffer: LazyBuffer) -> None:
 
 def _realize_from(buffer: LazyBuffer) -> None:
   rawbuf = buffer.op.src[0].realize()
+  assert rawbuf.realized, "realize failed?"
+  if DEBUG >= 3: print(f"*** copy {buffer.device} <- {rawbuf.device} size {rawbuf.realized.size} dtype {rawbuf.realized.dtype}")
   # TODO: make this generic
   if isinstance(rawbuf.realized, RawDiskBuffer) and issubclass(Device[buffer.device].buffer, RawBufferMapped):
     buffer.realized = Device[buffer.device].buffer(prod(buffer.shape), buffer.dtype, **buffer._device_extra_args())
diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py
index 75c08fde22..60d78374ed 100644
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -109,7 +109,7 @@ class Tensor:
     return self
 
   def detach(self): return Tensor(self.lazydata, device=self.device, requires_grad=False)
-  def numpy(self) -> np.ndarray: return self.lazydata.toCPU()
+  def numpy(self) -> np.ndarray: return self.to('CPU').lazydata.toCPU()
 
   # TODO: if things are realized this won't work
   def to_(self, device:str):
@@ -117,7 +117,7 @@ class Tensor:
     self.lazydata.device = device
     if self.grad: self.grad.to_(device)
 
-  def to(self, device:str):
+  def to(self, device:str) -> Tensor:
     ret = Tensor(self.lazydata, device)
     if self.grad: ret.grad = self.grad.to(device)
     return ret