From e3f00ac77db2050c6fd26a7309322eee4e696031 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Tue, 23 Jul 2024 15:04:25 -0700 Subject: [PATCH] Fix cuda tc emu test (#5663) * fix acc folding for NV tensor cores * fix correctness of reduce_before_expand * fix test emulated CUDA tensor cores * test_gemm_fp16 on some devices --- .github/workflows/test.yml | 2 +- test/test_ops.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a5a04c1929..258288bd58 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -43,7 +43,7 @@ jobs: PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py - name: Test emulated CUDA tensor cores - run: DEBUG=2 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm + run: DEBUG=2 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16 - name: Full test tensor cores run: | PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores diff --git a/test/test_ops.py b/test/test_ops.py index 43e2d3795e..24bb3d6798 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -795,10 +795,13 @@ class TestOps(unittest.TestCase): np.arange(64,128,dtype=np.float32).reshape(8,8)]) def test_small_gemm_eye(self): helper_test_op(None, lambda x,y: x.matmul(y), lambda x,y: x@y, vals=[np.eye(8).astype(np.float32), np.eye(8).astype(np.float32)]) + @unittest.skipIf(Device.DEFAULT in ["NV", "LLVM", "GPU", "CUDA"], "not supported on these in CI") + def test_gemm_fp16(self): + helper_test_op([(64,64), (64,64)], lambda x,y: x.half().matmul(y.half())) def test_gemm(self): - helper_test_op([(64,64), (64,64)], lambda x,y: x.matmul(y), Tensor.dot) + helper_test_op([(64,64), (64,64)], lambda x,y: x.matmul(y)) def test_big_gemm(self): - helper_test_op([(256,256), (256,256)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-4) + helper_test_op([(256,256), (256,256)], lambda x,y: x.matmul(y), atol=1e-4) @unittest.skipIf(IMAGE>0, "no 0 in shape matmul on images") def test_gemm_with_zeros_shape(self): helper_test_op([(8,8), (8,0)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-7)