mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
Add configurable wavefront size support for Navi/MI.
[To squash] Configurable warp size in test_core_amd.py::test_convert2d Note: test_core_amd.py::test_convert2d unit tests have been changed because some of the old layouts exceed the shared memory limit (64KiB)
This commit is contained in:
@@ -149,7 +149,8 @@ private:
|
||||
SmallVector<Value> mmaColIdx(4);
|
||||
SmallVector<Value> mmaRowIdx(2);
|
||||
Value threadId = getThreadId(rewriter, loc);
|
||||
Value warpSize = i32_val(32);
|
||||
unsigned iWaveSize = triton::gpu::getWarpSize(layout);
|
||||
Value warpSize = i32_val(iWaveSize);
|
||||
Value laneId = urem(threadId, warpSize);
|
||||
Value warpId = udiv(threadId, warpSize);
|
||||
// TODO: fix the bug in MMAEncodingAttr document
|
||||
@@ -207,7 +208,8 @@ private:
|
||||
SmallVector<Value> mfmaColIdx(4);
|
||||
SmallVector<Value> mfmaRowIdx(16);
|
||||
Value threadId = getThreadId(rewriter, loc);
|
||||
Value warpSize = i32_val(64);
|
||||
unsigned iWaveSize = triton::gpu::getWarpSize(layout);
|
||||
Value warpSize = i32_val(iWaveSize);
|
||||
Value laneId = urem(threadId, warpSize);
|
||||
Value warpId = udiv(threadId, warpSize);
|
||||
// TODO: fix the bug in MMAEncodingAttr document
|
||||
|
||||
Reference in New Issue
Block a user