mirror of
https://github.com/zama-ai/concrete.git
synced 2026-01-14 23:38:10 -05:00
This commit rebases the compiler onto commit 465ee9bfb26d from
llvm-project with locally maintained patches on top, i.e.:
* 5d8669d669ee: Fix the element alignment (size) for memrefCopy
* 4239163ea337: fix: Do not fold the memref.subview if the offset are
!= 0 and strides != 1
* 72c5decfcc21: remove github stuff from llvm
* 8d0ce8f9eca1: Support arbitrary element types in named operations
via attributes
* 94f64805c38c: Copy attributes of scf.for on bufferization and make
it an allocation hoisting barrier
Main upstream changes from llvm-project that required modification of
concretecompiler:
* Switch to C++17
* Various changes in the interfaces for linalg named operations
* Transition from `llvm::Optional` to `std::optional`
* Use of enums instead of string values for iterator types in linalg
* Changed default naming convention of getter methods in
ODS-generated operation classes from `some_value()` to
`getSomeValue()`
* Renaming of Arithmetic dialect to Arith
* Refactoring of side effect interfaces (i.e., renaming from
`NoSideEffect` to `Pure`)
* Re-design of the data flow analysis framework
* Refactoring of build targets for Python bindings
* Refactoring of array attributes with integer values
* Renaming of `linalg.init_tensor` to `tensor.empty`
* Emission of `linalg.map` operations in bufferization of the Tensor
dialect requiring another linalg conversion pass and registration
of the bufferization op interfaces for linalg operations
* Refactoring of the one-shot bufferizer
* Necessity to run the expand-strided-metadata, affine-to-std and
finalize-memref-to-llvm passes before converson to the LLVM
dialect
* Renaming of `BlockAndValueMapping` to `IRMapping`
* Changes in the build function of `LLVM::CallOp`
* Refactoring of the construction of `llvm::ArrayRef` and
`llvm::MutableArrayRef` (direct invocation of constructor instead
of builder functions for some cases)
* New naming conventions for generated SSA values requiring rewrite
of some check tests
* Refactoring of `mlir::LLVM::lookupOrCreateMallocFn()`
* Interface changes in generated type parsers
* New dependencies for to mlir_float16_utils and
MLIRSparseTensorRuntime for the runtime
* Overhaul of MLIR-c deleting `mlir-c/Registration.h`
* Deletion of library MLIRLinalgToSPIRV
* Deletion of library MLIRLinalgAnalysis
* Deletion of library MLIRMemRefUtils
* Deletion of library MLIRQuantTransforms
* Deletion of library MLIRVectorToROCDL
100 lines
3.7 KiB
C++
100 lines
3.7 KiB
C++
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
|
|
// Exceptions. See
|
|
// https://github.com/zama-ai/concrete-compiler-internal/blob/main/LICENSE.txt
|
|
// for license information.
|
|
|
|
#include "concretelang/Transforms/Passes.h"
|
|
|
|
#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
|
|
#include "mlir/Dialect/SCF/IR/SCF.h"
|
|
#include "mlir/Dialect/SCF/Utils/Utils.h"
|
|
#include "mlir/IR/IRMapping.h"
|
|
#include "mlir/IR/Operation.h"
|
|
#include "mlir/Transforms/DialectConversion.h"
|
|
#include "mlir/Transforms/Passes.h"
|
|
#include "mlir/Transforms/RegionUtils.h"
|
|
#include <mlir/Transforms/GreedyPatternRewriteDriver.h>
|
|
|
|
namespace {
|
|
struct CollapseParallelLoopsPass
|
|
: public CollapseParallelLoopsBase<CollapseParallelLoopsPass> {
|
|
|
|
/// Walk either an scf.for or an affine.for to find a band to coalesce.
|
|
template <typename LoopOpTy> static void walkLoop(LoopOpTy op) {}
|
|
|
|
void runOnOperation() override {
|
|
mlir::ModuleOp module = getOperation();
|
|
module.walk([&](mlir::scf::ForOp forOp) {
|
|
// Ignore nested loops.
|
|
if (forOp->getParentOfType<mlir::scf::ForOp>())
|
|
return;
|
|
|
|
// Determine which sequences of nested loops can be coalesced
|
|
// TODO: add loop interchange and hoisting to find more
|
|
// opportunities by getting multiple parallel loops in sequence
|
|
mlir::SmallVector<mlir::scf::ForOp, 4> loops;
|
|
getPerfectlyNestedLoops(loops, forOp);
|
|
mlir::SmallVector<unsigned, 4> coalesceableLoopRanges(loops.size());
|
|
for (unsigned i = 0, e = loops.size(); i < e; ++i) {
|
|
// Any loop is coalesceable to itself
|
|
coalesceableLoopRanges[i] = i;
|
|
|
|
// The outermost loop doesn't have any outer loop to collapse into
|
|
if (i == 0)
|
|
continue;
|
|
|
|
// A loop will only be coalesced with another if both are
|
|
// parallel. Otherwise it is irrelevant in this pass.
|
|
// If this loop itself is not parallel, then nothing we can do.
|
|
auto attr = loops[i]->getAttrOfType<mlir::BoolAttr>("parallel");
|
|
if (attr == nullptr || attr.getValue() == false)
|
|
continue;
|
|
|
|
// Find how many loops are able to be coalesced
|
|
for (unsigned j = 0; j < i; ++j) {
|
|
if (mlir::areValuesDefinedAbove(loops[i].getOperands(),
|
|
loops[j].getRegion())) {
|
|
coalesceableLoopRanges[i] = j;
|
|
break;
|
|
}
|
|
}
|
|
// Now ensure that all loops in this sequence
|
|
// [coalesceableLoopRanges[i], i] are parallel. Otherwise
|
|
// update the range's lower bound.
|
|
for (int k = i - 1; k >= (int)coalesceableLoopRanges[i]; --k) {
|
|
auto attrK = loops[k]->getAttrOfType<mlir::BoolAttr>("parallel");
|
|
if (attrK == nullptr || attrK.getValue() == false) {
|
|
coalesceableLoopRanges[i] = k + 1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (unsigned end = loops.size(); end > 0; --end) {
|
|
unsigned start = 0;
|
|
for (; start < end - 1; ++start) {
|
|
auto maxPos = *std::max_element(
|
|
std::next(coalesceableLoopRanges.begin(), start),
|
|
std::next(coalesceableLoopRanges.begin(), end));
|
|
if (maxPos > start)
|
|
continue;
|
|
|
|
auto band = llvm::MutableArrayRef(loops.data() + start, end - start);
|
|
(void)mlir::coalesceLoops(band);
|
|
break;
|
|
}
|
|
// If a band was found and transformed, keep looking at the loops above
|
|
// the outermost transformed loop.
|
|
if (start != end - 1)
|
|
end = start + 1;
|
|
}
|
|
});
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
|
|
mlir::concretelang::createCollapseParallelLoops() {
|
|
return std::make_unique<CollapseParallelLoopsPass>();
|
|
}
|