Fix cuda tc emu test (#5663)

* fix acc folding for NV tensor cores

* fix correctness of reduce_before_expand

* fix test emulated CUDA tensor cores

* test_gemm_fp16 on some devices
This commit is contained in:
George Hotz
2024-07-23 15:04:25 -07:00
committed by GitHub
parent c34f9db0f7
commit e3f00ac77d
2 changed files with 6 additions and 3 deletions

View File

@@ -795,10 +795,13 @@ class TestOps(unittest.TestCase):
np.arange(64,128,dtype=np.float32).reshape(8,8)])
def test_small_gemm_eye(self):
helper_test_op(None, lambda x,y: x.matmul(y), lambda x,y: x@y, vals=[np.eye(8).astype(np.float32), np.eye(8).astype(np.float32)])
@unittest.skipIf(Device.DEFAULT in ["NV", "LLVM", "GPU", "CUDA"], "not supported on these in CI")
def test_gemm_fp16(self):
helper_test_op([(64,64), (64,64)], lambda x,y: x.half().matmul(y.half()))
def test_gemm(self):
helper_test_op([(64,64), (64,64)], lambda x,y: x.matmul(y), Tensor.dot)
helper_test_op([(64,64), (64,64)], lambda x,y: x.matmul(y))
def test_big_gemm(self):
helper_test_op([(256,256), (256,256)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-4)
helper_test_op([(256,256), (256,256)], lambda x,y: x.matmul(y), atol=1e-4)
@unittest.skipIf(IMAGE>0, "no 0 in shape matmul on images")
def test_gemm_with_zeros_shape(self):
helper_test_op([(8,8), (8,0)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-7)