Files
concrete/compilers/concrete-compiler/compiler/lib/Transforms/SCFForallToSCFFor.cpp
Andi Drebes 5a7bf5aed2 feat(compiler): Add pass converting scf.forall loops to nested scf.for operations
This adds a new pass that converts `scf.forall` loops into nested
`scf.for` operations. The conversion carries parallel output tensors
from the original loop as dependencies through the loop nest and
replaces any occurrence of `tensor.parallel_insert_slice` operations
in the `scf.forall.in_parallel` terminator with equivalent
`tensor.insert_slice` operations.
2024-03-14 06:32:19 +01:00

172 lines
6.1 KiB
C++

// Part of the Concrete Compiler Project, under the BSD3 License with Zama
// Exceptions. See
// https://github.com/zama-ai/concrete-compiler-internal/blob/main/LICENSE.txt
// for license information.
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Transforms/DialectConversion.h"
#include "concretelang/Transforms/Passes.h"
namespace {
// Processes an array of OpFoldResults and returns the same array,
// but with all Values remapped using the provided IRMapping
llvm::SmallVector<mlir::OpFoldResult>
remapMixedOperands(llvm::ArrayRef<mlir::OpFoldResult> operands,
const mlir::IRMapping &mapping) {
return llvm::to_vector(llvm::map_range(
operands, [&](mlir::OpFoldResult v) -> mlir::OpFoldResult {
return v.is<mlir::Value>()
? mapping.lookupOrDefault(v.get<mlir::Value>())
: v;
}));
}
struct SCFForallToSCFForPattern
: public mlir::OpRewritePattern<mlir::scf::ForallOp> {
SCFForallToSCFForPattern(::mlir::MLIRContext *context,
mlir::PatternBenefit benefit = 1)
: ::mlir::OpRewritePattern<mlir::scf::ForallOp>(context, benefit) {}
::mlir::LogicalResult
matchAndRewrite(mlir::scf::ForallOp forallOp,
::mlir::PatternRewriter &rewriter) const override {
rewriter.setInsertionPoint(forallOp);
mlir::Location loc = forallOp.getLoc();
llvm::SmallVector<mlir::Value> lbs =
mlir::getAsValues(rewriter, loc, forallOp.getMixedLowerBound());
llvm::SmallVector<mlir::Value> ubs =
mlir::getAsValues(rewriter, loc, forallOp.getMixedUpperBound());
llvm::SmallVector<mlir::Value> steps =
mlir::getAsValues(rewriter, loc, forallOp.getMixedStep());
llvm::SmallVector<mlir::Value> iterArgs =
llvm::to_vector_of<mlir::Value>(forallOp.getOutputs());
mlir::ValueRange ivs = forallOp.getInductionVars();
llvm::SmallVector<mlir::scf::ForOp> forOps;
// Build an empty loop nest with the right bounds and iteration
// arguments, propagating the iteration arguments inward
mlir::IRMapping mapping;
for (auto [lb, ub, step, iv] : llvm::zip_equal(lbs, ubs, steps, ivs)) {
mlir::scf::ForOp forOp =
rewriter.create<mlir::scf::ForOp>(loc, lb, ub, step, iterArgs);
mlir::Block &body = *forOp.getLoopBody().getBlocks().begin();
rewriter.setInsertionPoint(&body, body.begin());
iterArgs = llvm::to_vector_of<mlir::Value>(forOp.getRegionIterArgs());
mapping.map(iv, forOp.getInductionVar());
forOps.push_back(forOp);
}
// Map the outputs of the original forall loop to the region
// iteration arguments of the innermost loop
for (auto [outArg, iterArg] :
llvm::zip_equal(forallOp.getRegionOutArgs(), iterArgs)) {
mapping.map(outArg, iterArg);
}
// Clone all operations of the original loop body, except the
// scf.forall.in_parallel terminator
mlir::Block *loopBodyBlock = forallOp.getBody();
mlir::Block::iterator srcBlockEnd = std::prev(loopBodyBlock->end(), 2);
for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++) {
rewriter.clone(*it, mapping);
}
// Rewrite the terminator, replacing all
// `tensor.parallel_insert_slice` instances with equivalent
// `tensor.insert_slice` ops.
mlir::scf::InParallelOp ip = forallOp.getTerminator();
// Handle scf.forall.in_parallel terminator
for (auto it = ip.getBody()->begin(); it != ip.getBody()->end(); it++) {
if (mlir::tensor::ParallelInsertSliceOp pis =
llvm::dyn_cast<mlir::tensor::ParallelInsertSliceOp>(*it)) {
mlir::Value updatedTensor =
rewriter.create<mlir::tensor::InsertSliceOp>(
pis.getLoc(), mapping.lookupOrDefault(pis.getSource()),
mapping.lookupOrDefault(pis.getDest()),
remapMixedOperands(pis.getMixedOffsets(), mapping),
remapMixedOperands(pis.getMixedSizes(), mapping),
remapMixedOperands(pis.getMixedStrides(), mapping));
mapping.map(pis.getDest(), updatedTensor);
} else {
rewriter.clone(*it, mapping);
}
}
// Create an `scf.yield` operation for each of the loops in the
// loop nest, returning the updated tensors corresponding to the
// output tensors of the forall operation for the innermost loop
// and returning the produced values of the contained loop for the
// outer loops.
mlir::SmallVector<mlir::Value> retVals = llvm::to_vector(
llvm::map_range(forallOp.getRegionOutArgs(), [&](mlir::Value v) {
return mapping.lookupOrDefault(v);
}));
for (mlir::scf::ForOp forOp : llvm::reverse(forOps)) {
rewriter.setInsertionPoint(forOp.getBody(), forOp.getBody()->end());
rewriter.create<mlir::scf::YieldOp>(loc, retVals);
retVals = forOp.getResults();
}
mlir::scf::ForOp outermostFor = forOps[0];
rewriter.replaceOp(forallOp, outermostFor.getResults());
return ::mlir::success();
};
};
struct SCFForallToSCFForPass
: public SCFForallToSCFForBase<SCFForallToSCFForPass> {
SCFForallToSCFForPass() {}
void runOnOperation() override {
auto op = this->getOperation();
mlir::ConversionTarget target(getContext());
mlir::RewritePatternSet patterns(&getContext());
patterns.add<SCFForallToSCFForPattern>(&getContext());
// Mark ops from the target dialect as legal operations
target.addIllegalOp<mlir::scf::ForallOp>();
target.addIllegalOp<mlir::scf::InParallelOp>();
target.addIllegalOp<mlir::tensor::ParallelInsertSliceOp>();
// Mark all other ops as legal
target.markUnknownOpDynamicallyLegal(
[](mlir::Operation *) { return true; });
// Apply conversion
if (mlir::applyPartialConversion(op, target, std::move(patterns))
.failed()) {
this->signalPassFailure();
}
}
};
} // namespace
namespace mlir {
namespace concretelang {
std::unique_ptr<OperationPass<ModuleOp>> createSCFForallToSCFForPass() {
return std::make_unique<SCFForallToSCFForPass>();
}
} // namespace concretelang
} // namespace mlir