mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
Fix masked load (#262)
* Fix the issue with masked load
Cherry-picked from a0b60eb187
* Remove tests in test_gemm that use too much LDS
---------
Co-authored-by: Shucai Xiao <shucai.xiao@amd.com>
This commit is contained in:
@@ -31,11 +31,11 @@ def matmul_no_scf_kernel(
|
||||
|
||||
|
||||
@pytest.mark.parametrize('SIZE_M,SIZE_N,SIZE_K,NUM_WARPS', [
|
||||
[128, 256, 32, 4],
|
||||
[128, 64, 32, 4],
|
||||
[256, 128, 16, 4],
|
||||
[128, 16, 32, 4],
|
||||
[128, 32, 32, 4],
|
||||
[32, 128, 64, 4],
|
||||
[128, 128, 64, 4],
|
||||
[128, 32, 64, 4],
|
||||
[64, 128, 128, 4],
|
||||
[64, 128, 128, 2],
|
||||
])
|
||||
@@ -106,16 +106,16 @@ def get_variant_golden(a, b):
|
||||
[128, 64, 128, 4, 128, 64, 128],
|
||||
# K-Forloop
|
||||
[64, 32, 128, 4, 64, 32, 64],
|
||||
[128, 16, 128, 4, 128, 16, 32],
|
||||
[32, 16, 128, 4, 32, 16, 32],
|
||||
[128, 32, 128, 4, 128, 32, 32],
|
||||
[32, 32, 128, 4, 32, 32, 32],
|
||||
[32, 64, 128, 4, 32, 64, 32],
|
||||
[32, 128, 256, 4, 32, 128, 64],
|
||||
[64, 128, 64, 4, 64, 128, 32],
|
||||
[64, 64, 128, 4, 64, 64, 32],
|
||||
[128, 128, 64, 4, 128, 128, 32],
|
||||
[128, 128, 128, 4, 128, 128, 32],
|
||||
[128, 128, 256, 4, 128, 128, 64],
|
||||
[128, 256, 128, 4, 128, 256, 32],
|
||||
[128, 64, 64, 4, 128, 64, 32],
|
||||
[128, 64, 128, 4, 128, 64, 32],
|
||||
[64, 64, 256, 4, 64, 64, 64],
|
||||
[128, 64, 128, 4, 128, 64, 32],
|
||||
[256, 128, 64, 4, 256, 128, 16],
|
||||
[128, 64, 128, 4, 128, 64, 32],
|
||||
])
|
||||
|
||||
Reference in New Issue
Block a user