mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
[BACKEND] Add a configurable parameter for the number of threads per warp (#1719)
Add a configurable parameter for the number of threads per warp for other GPU. Like: Intel GPU. Make it default to be 32 not change code logic on the CUDA/AMD GPU. Note: The Intel GPU GenX ISA is explicit SIMD and can support variant number of threads lane per HW execution unit.
This commit is contained in:
@@ -72,7 +72,9 @@ SmallVector<SmallVector<unsigned>> ReduceOpHelper::getScratchConfigsFast() {
|
||||
/// shared memory block1:
|
||||
auto mod = op->getParentOfType<ModuleOp>();
|
||||
unsigned numWarps = triton::gpu::TritonGPUDialect::getNumWarps(mod);
|
||||
smemShapes[1].push_back(numWarps * 32);
|
||||
unsigned threadsPerWarp =
|
||||
triton::gpu::TritonGPUDialect::getThreadsPerWarp(mod);
|
||||
smemShapes[1].push_back(numWarps * threadsPerWarp);
|
||||
|
||||
return smemShapes;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user