[FEATURE] Add associative_scan support (#1858)

Implement associative_scan in the front end and implement lowering to LLVM for blocked layout where the scan happens on the fastest moving dimension. This will later be generalized to support more layout.
2026-04-05 03:01:17 -04:00 · 2023-06-29 14:37:51 -07:00
parent a5fb71eed8
commit 3be060849a
20 changed files with 801 additions and 10 deletions
--- a/lib/Analysis/Allocation.cpp
+++ b/lib/Analysis/Allocation.cpp
@@ -168,6 +168,10 @@ private:
      ReduceOpHelper helper(reduceOp);
      unsigned bytes = helper.getScratchSizeInBytes();
      allocation->addBuffer<BufferT::BufferKind::Scratch>(op, bytes);
+    } else if (auto scanOp = dyn_cast<triton::ScanOp>(op)) {
+      ScanLoweringHelper helper(scanOp);
+      unsigned bytes = helper.getScratchSizeInBytes();
+      allocation->addBuffer<BufferT::BufferKind::Scratch>(op, bytes);
    } else if (auto cvtLayout = dyn_cast<triton::gpu::ConvertLayoutOp>(op)) {
      auto srcTy = cvtLayout.getSrc().getType().cast<RankedTensorType>();
      auto dstTy = cvtLayout.getResult().getType().cast<RankedTensorType>();