mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
[BACKEND] Fix reductions when number of unique element is smaller than layout (#1913)
Fix calculation of unique number of threads within a warp. We need to consider the number of elements per thread in the calculation. Also change the layout test to integer sum in order to catch bugs with unique data as max reduction may hide those kind of problems.
This commit is contained in:
@@ -918,7 +918,8 @@ unsigned ModuleAxisInfoAnalysis::getPtrContiguity(Value ptr) {
|
||||
auto order = triton::gpu::getOrder(layout);
|
||||
unsigned align = getPtrAlignment(ptr);
|
||||
|
||||
auto uniqueContigPerThread = triton::gpu::getUniqueContigPerThread(tensorTy);
|
||||
auto uniqueContigPerThread =
|
||||
triton::gpu::getUniqueContigPerThread(layout, tensorTy.getShape());
|
||||
assert(order[0] < uniqueContigPerThread.size() &&
|
||||
"Unxpected uniqueContigPerThread size");
|
||||
unsigned contiguity = uniqueContigPerThread[order[0]];
|
||||
|
||||
Reference in New Issue
Block a user