Fix evaluation stage in examples/transformer.py when using CUDA (#1150)

* make test data as contiguous array

* standardise contiguous array for all input data in cuda ops

* swap to x.ravel
This commit is contained in:
Barath
2023-07-07 09:07:10 +08:00
committed by GitHub
parent 9975f24452
commit c5aea13a65

View File

@@ -49,7 +49,7 @@ else:
import pycuda.driver as cuda # type: ignore
class RawCUDABuffer(RawBufferCopyInOut): # type: ignore
def __init__(self, size, dtype): super().__init__(size, dtype, cuda.mem_alloc(size * dtype.itemsize)) # type: ignore
def _copyin(self, x:np.ndarray, stream:Optional[cuda.Stream]=None): cuda.memcpy_htod_async(self._buf, x, stream) # type: ignore
def _copyin(self, x:np.ndarray, stream:Optional[cuda.Stream]=None): cuda.memcpy_htod_async(self._buf, x.ravel(), stream) # type: ignore
def _copyout(self, x:np.ndarray): cuda.memcpy_dtoh(x, self._buf) # type: ignore
class CUDAProgram: