mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
[FEATURE] Add associative_scan support (#1858)
Implement associative_scan in the front end and implement lowering to LLVM for blocked layout where the scan happens on the fastest moving dimension. This will later be generalized to support more layout.
This commit is contained in:
@@ -168,6 +168,10 @@ private:
|
||||
ReduceOpHelper helper(reduceOp);
|
||||
unsigned bytes = helper.getScratchSizeInBytes();
|
||||
allocation->addBuffer<BufferT::BufferKind::Scratch>(op, bytes);
|
||||
} else if (auto scanOp = dyn_cast<triton::ScanOp>(op)) {
|
||||
ScanLoweringHelper helper(scanOp);
|
||||
unsigned bytes = helper.getScratchSizeInBytes();
|
||||
allocation->addBuffer<BufferT::BufferKind::Scratch>(op, bytes);
|
||||
} else if (auto cvtLayout = dyn_cast<triton::gpu::ConvertLayoutOp>(op)) {
|
||||
auto srcTy = cvtLayout.getSrc().getType().cast<RankedTensorType>();
|
||||
auto dstTy = cvtLayout.getResult().getType().cast<RankedTensorType>();
|
||||
|
||||
Reference in New Issue
Block a user