Add configurable wavefront size support for Navi/MI.

[To squash] Configurable warp size in test_core_amd.py::test_convert2d Note: test_core_amd.py::test_convert2d unit tests have been changed because some of the old layouts exceed the shared memory limit (64KiB)
2026-04-05 03:01:17 -04:00 · 2023-06-20 18:59:46 +00:00
parent 2cb9ecd009
commit 75b86da598
11 changed files with 82 additions and 69 deletions
--- a/lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM.cpp
+++ b/lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM.cpp
@@ -149,7 +149,8 @@ private:
      SmallVector<Value> mmaColIdx(4);
      SmallVector<Value> mmaRowIdx(2);
      Value threadId = getThreadId(rewriter, loc);
-      Value warpSize = i32_val(32);
+      unsigned iWaveSize = triton::gpu::getWarpSize(layout);
+      Value warpSize = i32_val(iWaveSize);
      Value laneId = urem(threadId, warpSize);
      Value warpId = udiv(threadId, warpSize);
      // TODO: fix the bug in MMAEncodingAttr document
@@ -207,7 +208,8 @@ private:
      SmallVector<Value> mfmaColIdx(4);
      SmallVector<Value> mfmaRowIdx(16);
      Value threadId = getThreadId(rewriter, loc);
-      Value warpSize = i32_val(64);
+      unsigned iWaveSize = triton::gpu::getWarpSize(layout);
+      Value warpSize = i32_val(iWaveSize);
      Value laneId = urem(threadId, warpSize);
      Value warpId = udiv(threadId, warpSize);
      // TODO: fix the bug in MMAEncodingAttr document