[BACKEND] Add a configurable parameter for the number of threads per warp (#1719)

Add a configurable parameter for the number of threads per warp for other GPU. Like: Intel GPU. Make it default to be 32 not change code logic on the CUDA/AMD GPU. Note: The Intel GPU GenX ISA is explicit SIMD and can support variant number of threads lane per HW execution unit.
2026-04-05 03:01:17 -04:00 · 2023-06-03 07:55:06 +08:00
parent 035381aa28
commit 45ba9af6ed
12 changed files with 81 additions and 40 deletions
--- a/lib/Analysis/Utility.cpp
+++ b/lib/Analysis/Utility.cpp
@@ -72,7 +72,9 @@ SmallVector<SmallVector<unsigned>> ReduceOpHelper::getScratchConfigsFast() {
  /// shared memory block1:
  auto mod = op->getParentOfType<ModuleOp>();
  unsigned numWarps = triton::gpu::TritonGPUDialect::getNumWarps(mod);
-  smemShapes[1].push_back(numWarps * 32);
+  unsigned threadsPerWarp =
+      triton::gpu::TritonGPUDialect::getThreadsPerWarp(mod);
+  smemShapes[1].push_back(numWarps * threadsPerWarp);

  return smemShapes;
 }