lower test_gemm_8192 amd_tflops to 65 (#9364)

This commit is contained in:
chenyu
2025-03-05 14:06:11 -05:00
committed by GitHub
parent b3ac60ce53
commit 2cb2fce8d9

View File

@@ -91,7 +91,7 @@ class TestKernelSpeed(unittest.TestCase):
# theoretical is nv_tflops=165, amd_tflops=123
def test_gemm_4096(self): self._test_matmul(4096, nv_tflops=115, amd_tflops=80)
def test_gemm_8192(self): self._test_matmul(8192, nv_tflops=125, amd_tflops=70)
def test_gemm_8192(self): self._test_matmul(8192, nv_tflops=125, amd_tflops=65)
# theoretical is nv_gbs=1008, amd_gbs=960
def test_gemv_16384_4096(self): self._test_matmul(16384, 4096, 1, nv_gbs=840, amd_gbs=750)