mirror of
https://github.com/zama-ai/concrete.git
synced 2026-02-08 19:44:57 -05:00
feat(compiler): add a parallel loop coalescing pass.
This commit is contained in:
committed by
Quentin Bourgerie
parent
4e53b83045
commit
aa2e0479b3
@@ -17,6 +17,8 @@
|
||||
namespace mlir {
|
||||
namespace concretelang {
|
||||
|
||||
std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
|
||||
createCollapseParallelLoops();
|
||||
std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>> createForLoopToParallel();
|
||||
std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>> createBatchingPass();
|
||||
} // namespace concretelang
|
||||
|
||||
@@ -3,6 +3,15 @@
|
||||
|
||||
include "mlir/Pass/PassBase.td"
|
||||
|
||||
def CollapseParallelLoops : Pass<"collapse-parallel-loops", "mlir::ModuleOp"> {
|
||||
let summary =
|
||||
"Coalesce nested scf.for operations that are marked with "
|
||||
"the custom attribute parallel = true into a single scf.for "
|
||||
"loop which can subsequently be converted to scf.parallel.";
|
||||
let constructor = "mlir::concretelang::createCollapseParallelLoops()";
|
||||
let dependentDialects = ["mlir::scf::SCFDialect"];
|
||||
}
|
||||
|
||||
def ForLoopToParallel : Pass<"for-loop-to-parallel", "mlir::ModuleOp"> {
|
||||
let summary =
|
||||
"Transform scf.for marked with the custom attribute parallel = true loop "
|
||||
|
||||
@@ -341,7 +341,8 @@ lowerStdToLLVMDialect(mlir::MLIRContext &context, mlir::ModuleOp &module,
|
||||
pm, mlir::concretelang::createBufferizeDataflowTaskOpsPass(), enablePass);
|
||||
|
||||
if (parallelizeLoops) {
|
||||
addPotentiallyNestedPass(pm, mlir::createLoopCoalescingPass(), enablePass);
|
||||
addPotentiallyNestedPass(
|
||||
pm, mlir::concretelang::createCollapseParallelLoops(), enablePass);
|
||||
addPotentiallyNestedPass(pm, mlir::concretelang::createForLoopToParallel(),
|
||||
enablePass);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
add_mlir_library(
|
||||
ConcretelangTransforms
|
||||
Batching.cpp
|
||||
CollapseParallelLoops.cpp
|
||||
ForLoopToParallel.cpp
|
||||
ADDITIONAL_HEADER_DIRS
|
||||
${PROJECT_SOURCE_DIR}/include/concretelang/Transforms
|
||||
|
||||
100
compiler/lib/Transforms/CollapseParallelLoops.cpp
Normal file
100
compiler/lib/Transforms/CollapseParallelLoops.cpp
Normal file
@@ -0,0 +1,100 @@
|
||||
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
|
||||
// Exceptions. See
|
||||
// https://github.com/zama-ai/concrete-compiler-internal/blob/main/LICENSE.txt
|
||||
// for license information.
|
||||
|
||||
#include "concretelang/Transforms/Passes.h"
|
||||
|
||||
#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
|
||||
#include "mlir/Dialect/SCF/IR/SCF.h"
|
||||
#include "mlir/Dialect/SCF/Utils/Utils.h"
|
||||
#include "mlir/IR/BlockAndValueMapping.h"
|
||||
#include "mlir/IR/Operation.h"
|
||||
#include "mlir/Transforms/DialectConversion.h"
|
||||
#include "mlir/Transforms/Passes.h"
|
||||
#include "mlir/Transforms/RegionUtils.h"
|
||||
#include <mlir/Transforms/GreedyPatternRewriteDriver.h>
|
||||
|
||||
namespace {
|
||||
struct CollapseParallelLoopsPass
|
||||
: public CollapseParallelLoopsBase<CollapseParallelLoopsPass> {
|
||||
|
||||
/// Walk either an scf.for or an affine.for to find a band to coalesce.
|
||||
template <typename LoopOpTy> static void walkLoop(LoopOpTy op) {}
|
||||
|
||||
void runOnOperation() override {
|
||||
mlir::ModuleOp module = getOperation();
|
||||
module.walk([&](mlir::scf::ForOp forOp) {
|
||||
// Ignore nested loops.
|
||||
if (forOp->getParentOfType<mlir::scf::ForOp>())
|
||||
return;
|
||||
|
||||
// Determine which sequences of nested loops can be coalesced
|
||||
// TODO: add loop interchange and hoisting to find more
|
||||
// opportunities by getting multiple parallel loops in sequence
|
||||
mlir::SmallVector<mlir::scf::ForOp, 4> loops;
|
||||
getPerfectlyNestedLoops(loops, forOp);
|
||||
mlir::SmallVector<unsigned, 4> coalesceableLoopRanges(loops.size());
|
||||
for (unsigned i = 0, e = loops.size(); i < e; ++i) {
|
||||
// Any loop is coalesceable to itself
|
||||
coalesceableLoopRanges[i] = i;
|
||||
|
||||
// The outermost loop doesn't have any outer loop to collapse into
|
||||
if (i == 0)
|
||||
continue;
|
||||
|
||||
// A loop will only be coalesced with another if both are
|
||||
// parallel. Otherwise it is irrelevant in this pass.
|
||||
// If this loop itself is not parallel, then nothing we can do.
|
||||
auto attr = loops[i]->getAttrOfType<mlir::BoolAttr>("parallel");
|
||||
if (attr == nullptr || attr.getValue() == false)
|
||||
continue;
|
||||
|
||||
// Find how many loops are able to be coalesced
|
||||
for (unsigned j = 0; j < i; ++j) {
|
||||
if (mlir::areValuesDefinedAbove(loops[i].getOperands(),
|
||||
loops[j].getRegion())) {
|
||||
coalesceableLoopRanges[i] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Now ensure that all loops in this sequence
|
||||
// [coalesceableLoopRanges[i], i] are parallel. Otherwise
|
||||
// update the range's lower bound.
|
||||
for (int k = i - 1; k >= (int)coalesceableLoopRanges[i]; --k) {
|
||||
auto attrK = loops[k]->getAttrOfType<mlir::BoolAttr>("parallel");
|
||||
if (attrK == nullptr || attrK.getValue() == false) {
|
||||
coalesceableLoopRanges[i] = k + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned end = loops.size(); end > 0; --end) {
|
||||
unsigned start = 0;
|
||||
for (; start < end - 1; ++start) {
|
||||
auto maxPos = *std::max_element(
|
||||
std::next(coalesceableLoopRanges.begin(), start),
|
||||
std::next(coalesceableLoopRanges.begin(), end));
|
||||
if (maxPos > start)
|
||||
continue;
|
||||
|
||||
auto band =
|
||||
llvm::makeMutableArrayRef(loops.data() + start, end - start);
|
||||
(void)mlir::coalesceLoops(band);
|
||||
break;
|
||||
}
|
||||
// If a band was found and transformed, keep looking at the loops above
|
||||
// the outermost transformed loop.
|
||||
if (start != end - 1)
|
||||
end = start + 1;
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
|
||||
mlir::concretelang::createCollapseParallelLoops() {
|
||||
return std::make_unique<CollapseParallelLoopsPass>();
|
||||
}
|
||||
Reference in New Issue
Block a user