Fix masked load (#262)

* Fix the issue with masked load

Cherry-picked from a0b60eb187

* Remove tests in test_gemm that use too much LDS

---------

Co-authored-by: Shucai Xiao <shucai.xiao@amd.com>
This commit is contained in:
Lixun Zhang
2023-07-26 10:45:21 -05:00
committed by GitHub
parent 1cccf14f62
commit 2fbffe2784
2 changed files with 39 additions and 35 deletions

View File

@@ -31,11 +31,11 @@ def matmul_no_scf_kernel(
@pytest.mark.parametrize('SIZE_M,SIZE_N,SIZE_K,NUM_WARPS', [
[128, 256, 32, 4],
[128, 64, 32, 4],
[256, 128, 16, 4],
[128, 16, 32, 4],
[128, 32, 32, 4],
[32, 128, 64, 4],
[128, 128, 64, 4],
[128, 32, 64, 4],
[64, 128, 128, 4],
[64, 128, 128, 2],
])
@@ -106,16 +106,16 @@ def get_variant_golden(a, b):
[128, 64, 128, 4, 128, 64, 128],
# K-Forloop
[64, 32, 128, 4, 64, 32, 64],
[128, 16, 128, 4, 128, 16, 32],
[32, 16, 128, 4, 32, 16, 32],
[128, 32, 128, 4, 128, 32, 32],
[32, 32, 128, 4, 32, 32, 32],
[32, 64, 128, 4, 32, 64, 32],
[32, 128, 256, 4, 32, 128, 64],
[64, 128, 64, 4, 64, 128, 32],
[64, 64, 128, 4, 64, 64, 32],
[128, 128, 64, 4, 128, 128, 32],
[128, 128, 128, 4, 128, 128, 32],
[128, 128, 256, 4, 128, 128, 64],
[128, 256, 128, 4, 128, 256, 32],
[128, 64, 64, 4, 128, 64, 32],
[128, 64, 128, 4, 128, 64, 32],
[64, 64, 256, 4, 64, 64, 64],
[128, 64, 128, 4, 128, 64, 32],
[256, 128, 64, 4, 256, 128, 16],
[128, 64, 128, 4, 128, 64, 32],
])