Fix evaluation stage in examples/transformer.py when using CUDA (#1150)

* make test data as contiguous array * standardise contiguous array for all input data in cuda ops * swap to x.ravel
2026-01-09 23:18:04 -05:00 · 2023-07-07 09:07:10 +08:00
parent 9975f24452
commit c5aea13a65
1 changed files with 1 additions and 1 deletions
--- a/tinygrad/runtime/ops_cuda.py
+++ b/tinygrad/runtime/ops_cuda.py
@@ -49,7 +49,7 @@ else:
  import pycuda.driver as cuda # type: ignore
  class RawCUDABuffer(RawBufferCopyInOut): # type: ignore
    def __init__(self, size, dtype): super().__init__(size, dtype, cuda.mem_alloc(size * dtype.itemsize)) # type: ignore
-    def _copyin(self, x:np.ndarray, stream:Optional[cuda.Stream]=None): cuda.memcpy_htod_async(self._buf, x, stream) # type: ignore
+    def _copyin(self, x:np.ndarray, stream:Optional[cuda.Stream]=None): cuda.memcpy_htod_async(self._buf, x.ravel(), stream) # type: ignore
    def _copyout(self, x:np.ndarray): cuda.memcpy_dtoh(x, self._buf) # type: ignore

 class CUDAProgram: