diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a5a04c1929..258288bd58 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -43,7 +43,7 @@ jobs: PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py - name: Test emulated CUDA tensor cores - run: DEBUG=2 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm + run: DEBUG=2 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16 - name: Full test tensor cores run: | PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores diff --git a/test/test_ops.py b/test/test_ops.py index 43e2d3795e..24bb3d6798 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -795,10 +795,13 @@ class TestOps(unittest.TestCase): np.arange(64,128,dtype=np.float32).reshape(8,8)]) def test_small_gemm_eye(self): helper_test_op(None, lambda x,y: x.matmul(y), lambda x,y: x@y, vals=[np.eye(8).astype(np.float32), np.eye(8).astype(np.float32)]) + @unittest.skipIf(Device.DEFAULT in ["NV", "LLVM", "GPU", "CUDA"], "not supported on these in CI") + def test_gemm_fp16(self): + helper_test_op([(64,64), (64,64)], lambda x,y: x.half().matmul(y.half())) def test_gemm(self): - helper_test_op([(64,64), (64,64)], lambda x,y: x.matmul(y), Tensor.dot) + helper_test_op([(64,64), (64,64)], lambda x,y: x.matmul(y)) def test_big_gemm(self): - helper_test_op([(256,256), (256,256)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-4) + helper_test_op([(256,256), (256,256)], lambda x,y: x.matmul(y), atol=1e-4) @unittest.skipIf(IMAGE>0, "no 0 in shape matmul on images") def test_gemm_with_zeros_shape(self): helper_test_op([(8,8), (8,0)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-7)