mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 23:18:04 -05:00
Fix evaluation stage in examples/transformer.py when using CUDA (#1150)
* make test data as contiguous array * standardise contiguous array for all input data in cuda ops * swap to x.ravel
This commit is contained in:
@@ -49,7 +49,7 @@ else:
|
||||
import pycuda.driver as cuda # type: ignore
|
||||
class RawCUDABuffer(RawBufferCopyInOut): # type: ignore
|
||||
def __init__(self, size, dtype): super().__init__(size, dtype, cuda.mem_alloc(size * dtype.itemsize)) # type: ignore
|
||||
def _copyin(self, x:np.ndarray, stream:Optional[cuda.Stream]=None): cuda.memcpy_htod_async(self._buf, x, stream) # type: ignore
|
||||
def _copyin(self, x:np.ndarray, stream:Optional[cuda.Stream]=None): cuda.memcpy_htod_async(self._buf, x.ravel(), stream) # type: ignore
|
||||
def _copyout(self, x:np.ndarray): cuda.memcpy_dtoh(x, self._buf) # type: ignore
|
||||
|
||||
class CUDAProgram:
|
||||
|
||||
Reference in New Issue
Block a user