Add configurable wavefront size support for Navi/MI.

[To squash] Configurable warp size in test_core_amd.py::test_convert2d

Note: test_core_amd.py::test_convert2d unit tests have been changed
because some of the old layouts exceed the shared memory limit (64KiB)
This commit is contained in:
Xinya Zhang
2023-06-20 18:59:46 +00:00
parent 2cb9ecd009
commit 75b86da598
11 changed files with 82 additions and 69 deletions

View File

@@ -149,7 +149,8 @@ private:
SmallVector<Value> mmaColIdx(4);
SmallVector<Value> mmaRowIdx(2);
Value threadId = getThreadId(rewriter, loc);
Value warpSize = i32_val(32);
unsigned iWaveSize = triton::gpu::getWarpSize(layout);
Value warpSize = i32_val(iWaveSize);
Value laneId = urem(threadId, warpSize);
Value warpId = udiv(threadId, warpSize);
// TODO: fix the bug in MMAEncodingAttr document
@@ -207,7 +208,8 @@ private:
SmallVector<Value> mfmaColIdx(4);
SmallVector<Value> mfmaRowIdx(16);
Value threadId = getThreadId(rewriter, loc);
Value warpSize = i32_val(64);
unsigned iWaveSize = triton::gpu::getWarpSize(layout);
Value warpSize = i32_val(iWaveSize);
Value laneId = urem(threadId, warpSize);
Value warpId = udiv(threadId, warpSize);
// TODO: fix the bug in MMAEncodingAttr document