feat(compiler): add a parallel loop coalescing pass.

2026-02-08 19:44:57 -05:00 · 2022-12-19 09:54:59 +00:00
parent 4e53b83045
commit aa2e0479b3
5 changed files with 114 additions and 1 deletions
--- a/compiler/include/concretelang/Transforms/Passes.h
+++ b/compiler/include/concretelang/Transforms/Passes.h
@@ -17,6 +17,8 @@
 namespace mlir {
 namespace concretelang {

+std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
+createCollapseParallelLoops();
 std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>> createForLoopToParallel();
 std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>> createBatchingPass();
 } // namespace concretelang
--- a/compiler/include/concretelang/Transforms/Passes.td
+++ b/compiler/include/concretelang/Transforms/Passes.td
@@ -3,6 +3,15 @@

 include "mlir/Pass/PassBase.td"

+def CollapseParallelLoops : Pass<"collapse-parallel-loops", "mlir::ModuleOp"> {
+  let summary =
+      "Coalesce nested scf.for operations that are marked with "
+      "the custom attribute parallel = true into a single scf.for "
+      "loop which can subsequently be converted to scf.parallel.";
+  let constructor = "mlir::concretelang::createCollapseParallelLoops()";
+  let dependentDialects = ["mlir::scf::SCFDialect"];
+}
+
 def ForLoopToParallel : Pass<"for-loop-to-parallel", "mlir::ModuleOp"> {
  let summary =
      "Transform scf.for marked with the custom attribute parallel = true loop "
--- a/compiler/lib/Support/Pipeline.cpp
+++ b/compiler/lib/Support/Pipeline.cpp
@@ -341,7 +341,8 @@ lowerStdToLLVMDialect(mlir::MLIRContext &context, mlir::ModuleOp &module,
      pm, mlir::concretelang::createBufferizeDataflowTaskOpsPass(), enablePass);

  if (parallelizeLoops) {
-    addPotentiallyNestedPass(pm, mlir::createLoopCoalescingPass(), enablePass);
+    addPotentiallyNestedPass(
+        pm, mlir::concretelang::createCollapseParallelLoops(), enablePass);
    addPotentiallyNestedPass(pm, mlir::concretelang::createForLoopToParallel(),
                             enablePass);
  }
--- a/compiler/lib/Transforms/CMakeLists.txt
+++ b/compiler/lib/Transforms/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_mlir_library(
  ConcretelangTransforms
  Batching.cpp
+  CollapseParallelLoops.cpp
  ForLoopToParallel.cpp
  ADDITIONAL_HEADER_DIRS
  ${PROJECT_SOURCE_DIR}/include/concretelang/Transforms
--- a/compiler/lib/Transforms/CollapseParallelLoops.cpp
+++ b/compiler/lib/Transforms/CollapseParallelLoops.cpp
@@ -0,0 +1,100 @@
+// Part of the Concrete Compiler Project, under the BSD3 License with Zama
+// Exceptions. See
+// https://github.com/zama-ai/concrete-compiler-internal/blob/main/LICENSE.txt
+// for license information.
+
+#include "concretelang/Transforms/Passes.h"
+
+#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/Dialect/SCF/Utils/Utils.h"
+#include "mlir/IR/BlockAndValueMapping.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/Passes.h"
+#include "mlir/Transforms/RegionUtils.h"
+#include <mlir/Transforms/GreedyPatternRewriteDriver.h>
+
+namespace {
+struct CollapseParallelLoopsPass
+    : public CollapseParallelLoopsBase<CollapseParallelLoopsPass> {
+
+  /// Walk either an scf.for or an affine.for to find a band to coalesce.
+  template <typename LoopOpTy> static void walkLoop(LoopOpTy op) {}
+
+  void runOnOperation() override {
+    mlir::ModuleOp module = getOperation();
+    module.walk([&](mlir::scf::ForOp forOp) {
+      // Ignore nested loops.
+      if (forOp->getParentOfType<mlir::scf::ForOp>())
+        return;
+
+      // Determine which sequences of nested loops can be coalesced
+      // TODO: add loop interchange and hoisting to find more
+      // opportunities by getting multiple parallel loops in sequence
+      mlir::SmallVector<mlir::scf::ForOp, 4> loops;
+      getPerfectlyNestedLoops(loops, forOp);
+      mlir::SmallVector<unsigned, 4> coalesceableLoopRanges(loops.size());
+      for (unsigned i = 0, e = loops.size(); i < e; ++i) {
+        // Any loop is coalesceable to itself
+        coalesceableLoopRanges[i] = i;
+
+        // The outermost loop doesn't have any outer loop to collapse into
+        if (i == 0)
+          continue;
+
+        // A loop will only be coalesced with another if both are
+        // parallel.  Otherwise it is irrelevant in this pass.
+        // If this loop itself is not parallel, then nothing we can do.
+        auto attr = loops[i]->getAttrOfType<mlir::BoolAttr>("parallel");
+        if (attr == nullptr || attr.getValue() == false)
+          continue;
+
+        // Find how many loops are able to be coalesced
+        for (unsigned j = 0; j < i; ++j) {
+          if (mlir::areValuesDefinedAbove(loops[i].getOperands(),
+                                          loops[j].getRegion())) {
+            coalesceableLoopRanges[i] = j;
+            break;
+          }
+        }
+        // Now ensure that all loops in this sequence
+        // [coalesceableLoopRanges[i], i] are parallel. Otherwise
+        // update the range's lower bound.
+        for (int k = i - 1; k >= (int)coalesceableLoopRanges[i]; --k) {
+          auto attrK = loops[k]->getAttrOfType<mlir::BoolAttr>("parallel");
+          if (attrK == nullptr || attrK.getValue() == false) {
+            coalesceableLoopRanges[i] = k + 1;
+            break;
+          }
+        }
+      }
+
+      for (unsigned end = loops.size(); end > 0; --end) {
+        unsigned start = 0;
+        for (; start < end - 1; ++start) {
+          auto maxPos = *std::max_element(
+              std::next(coalesceableLoopRanges.begin(), start),
+              std::next(coalesceableLoopRanges.begin(), end));
+          if (maxPos > start)
+            continue;
+
+          auto band =
+              llvm::makeMutableArrayRef(loops.data() + start, end - start);
+          (void)mlir::coalesceLoops(band);
+          break;
+        }
+        // If a band was found and transformed, keep looking at the loops above
+        // the outermost transformed loop.
+        if (start != end - 1)
+          end = start + 1;
+      }
+    });
+  }
+};
+} // namespace
+
+std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
+mlir::concretelang::createCollapseParallelLoops() {
+  return std::make_unique<CollapseParallelLoopsPass>();
+}