[Refine] remove const ref of mlir::Attribute (#1486)

https://mlir.llvm.org/docs/DefiningDialects/AttributesAndTypes/ https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#f16-for-in-parameters-pass-cheaply-copied-types-by-value-and-others-by-reference-to-const ``` The C++ Attribute and Type classes in MLIR (like Ops, and many other things) are value-typed. This means that instances of Attribute or Type are passed around by-value, as opposed to by-pointer or by-reference. The Attribute and Type classes act as wrappers around internal storage objects that are uniqued within an instance of an MLIRContext. ```
2026-04-05 03:01:17 -04:00 · 2023-04-09 01:38:59 +08:00
parent 82ec1a89ea
commit f7ad8ae022
6 changed files with 25 additions and 25 deletions
--- a/include/triton/Dialect/TritonGPU/IR/Dialect.h
+++ b/include/triton/Dialect/TritonGPU/IR/Dialect.h
@@ -23,23 +23,23 @@ namespace gpu {

 unsigned getElemsPerThread(Type type);

-SmallVector<unsigned> getThreadsPerWarp(const Attribute &layout);
+SmallVector<unsigned> getThreadsPerWarp(Attribute layout);

-SmallVector<unsigned> getWarpsPerCTA(const Attribute &layout);
+SmallVector<unsigned> getWarpsPerCTA(Attribute layout);

-SmallVector<unsigned> getSizePerThread(const Attribute &layout);
+SmallVector<unsigned> getSizePerThread(Attribute layout);

-SmallVector<unsigned> getContigPerThread(const Attribute &layout);
+SmallVector<unsigned> getContigPerThread(Attribute layout);

-SmallVector<unsigned> getThreadsPerCTA(const Attribute &layout);
+SmallVector<unsigned> getThreadsPerCTA(Attribute layout);

 SmallVector<unsigned>
-getShapePerCTA(const Attribute &layout,
+getShapePerCTA(Attribute layout,
               ArrayRef<int64_t> tensorShape = ArrayRef<int64_t>());

-SmallVector<unsigned> getOrder(const Attribute &layout);
+SmallVector<unsigned> getOrder(Attribute layout);

-bool isaDistributedLayout(const Attribute &layout);
+bool isaDistributedLayout(Attribute layout);

 } // namespace gpu
 } // namespace triton
--- a/lib/Analysis/Allocation.cpp
+++ b/lib/Analysis/Allocation.cpp
@@ -33,7 +33,7 @@ namespace triton {
 constexpr int kPtrBitWidth = 64;

 static std::pair<SmallVector<unsigned>, SmallVector<unsigned>>
-getCvtOrder(const Attribute &srcLayout, const Attribute &dstLayout) {
+getCvtOrder(Attribute srcLayout, Attribute dstLayout) {
  auto srcMmaLayout = srcLayout.dyn_cast<MmaEncodingAttr>();
  auto srcDotLayout = srcLayout.dyn_cast<DotOperandEncodingAttr>();
  auto dstMmaLayout = dstLayout.dyn_cast<MmaEncodingAttr>();
--- a/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVMBase.h
+++ b/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVMBase.h
@@ -502,7 +502,7 @@ public:

  SmallVector<Value> emitBaseIndexForLayout(Location loc,
                                            ConversionPatternRewriter &rewriter,
-                                            const Attribute &layout,
+                                            Attribute layout,
                                            RankedTensorType type) const {
    IndexCacheKeyT key = std::make_pair(layout, type);
    auto cache = indexCacheInfo.baseIndexCache;
@@ -532,7 +532,7 @@ public:
  }

  SmallVector<SmallVector<unsigned>>
-  emitOffsetForLayout(const Attribute &layout, RankedTensorType type) const {
+  emitOffsetForLayout(Attribute layout, RankedTensorType type) const {
    if (auto blockedLayout = layout.dyn_cast<BlockedEncodingAttr>())
      return emitOffsetForBlockedLayout(blockedLayout, type);
    if (auto mmaLayout = layout.dyn_cast<MmaEncodingAttr>()) {
@@ -549,7 +549,7 @@ public:
  // -----------------------------------------------------------------------
  SmallVector<SmallVector<Value>> emitIndices(Location loc,
                                              ConversionPatternRewriter &b,
-                                              const Attribute &layout,
+                                              Attribute layout,
                                              RankedTensorType type) const {
    IndexCacheKeyT key(layout, type);
    auto cache = indexCacheInfo.indexCache;
@@ -861,8 +861,8 @@ private:
  // Emit indices calculation within each ConversionPattern, and returns a
  // [elemsPerThread X rank] index matrix.
  SmallVector<SmallVector<Value>> emitIndicesForDistributedLayout(
-      Location loc, ConversionPatternRewriter &rewriter,
-      const Attribute &layout, RankedTensorType type) const {
+      Location loc, ConversionPatternRewriter &rewriter, Attribute layout,
+      RankedTensorType type) const {
    // step 1, delinearize threadId to get the base index
    auto multiDimBase = emitBaseIndexForLayout(loc, rewriter, layout, type);
    // step 2, get offset of each element
--- a/lib/Dialect/TritonGPU/IR/Dialect.cpp
+++ b/lib/Dialect/TritonGPU/IR/Dialect.cpp
@@ -48,7 +48,7 @@ unsigned getElemsPerThread(Type type) {
                           tensorType.getElementType());
 }

-SmallVector<unsigned> getThreadsPerWarp(const Attribute &layout) {
+SmallVector<unsigned> getThreadsPerWarp(Attribute layout) {
  if (auto blockedLayout = layout.dyn_cast<BlockedEncodingAttr>()) {
    return SmallVector<unsigned>(blockedLayout.getThreadsPerWarp().begin(),
                                 blockedLayout.getThreadsPerWarp().end());
@@ -63,7 +63,7 @@ SmallVector<unsigned> getThreadsPerWarp(const Attribute &layout) {
  return {};
 }

-SmallVector<unsigned> getWarpsPerCTA(const Attribute &layout) {
+SmallVector<unsigned> getWarpsPerCTA(Attribute layout) {
  if (auto blockedLayout = layout.dyn_cast<BlockedEncodingAttr>()) {
    return SmallVector<unsigned>(blockedLayout.getWarpsPerCTA().begin(),
                                 blockedLayout.getWarpsPerCTA().end());
@@ -76,7 +76,7 @@ SmallVector<unsigned> getWarpsPerCTA(const Attribute &layout) {
  return {};
 }

-SmallVector<unsigned> getSizePerThread(const Attribute &layout) {
+SmallVector<unsigned> getSizePerThread(Attribute layout) {
  if (auto blockedLayout = layout.dyn_cast<BlockedEncodingAttr>()) {
    return SmallVector<unsigned>(blockedLayout.getSizePerThread().begin(),
                                 blockedLayout.getSizePerThread().end());
@@ -120,7 +120,7 @@ SmallVector<unsigned> getSizePerThread(const Attribute &layout) {
  }
 }

-SmallVector<unsigned> getContigPerThread(const Attribute &layout) {
+SmallVector<unsigned> getContigPerThread(Attribute layout) {
  if (auto mmaLayout = layout.dyn_cast<MmaEncodingAttr>()) {
    assert(mmaLayout.isVolta() || mmaLayout.isAmpere());
    return {1, 2};
@@ -129,7 +129,7 @@ SmallVector<unsigned> getContigPerThread(const Attribute &layout) {
  }
 }

-SmallVector<unsigned> getThreadsPerCTA(const Attribute &layout) {
+SmallVector<unsigned> getThreadsPerCTA(Attribute layout) {
  SmallVector<unsigned> threads;
  if (auto blockedLayout = layout.dyn_cast<BlockedEncodingAttr>()) {
    for (int d = 0, n = blockedLayout.getOrder().size(); d < n; ++d)
@@ -148,7 +148,7 @@ SmallVector<unsigned> getThreadsPerCTA(const Attribute &layout) {
  return threads;
 }

-SmallVector<unsigned> getShapePerCTA(const Attribute &layout,
+SmallVector<unsigned> getShapePerCTA(Attribute layout,
                                     ArrayRef<int64_t> tensorShape) {
  SmallVector<unsigned> shape;
  if (auto blockedLayout = layout.dyn_cast<BlockedEncodingAttr>()) {
@@ -202,7 +202,7 @@ SmallVector<unsigned> getShapePerCTA(const Attribute &layout,
  return shape;
 }

-SmallVector<unsigned> getOrder(const Attribute &layout) {
+SmallVector<unsigned> getOrder(Attribute layout) {
  if (auto blockedLayout = layout.dyn_cast<BlockedEncodingAttr>()) {
    return SmallVector<unsigned>(blockedLayout.getOrder().begin(),
                                 blockedLayout.getOrder().end());
@@ -232,7 +232,7 @@ SmallVector<unsigned> getOrder(const Attribute &layout) {
  }
 };

-bool isaDistributedLayout(const Attribute &layout) {
+bool isaDistributedLayout(Attribute layout) {
  return layout.isa<BlockedEncodingAttr>() || layout.isa<MmaEncodingAttr>() ||
         layout.isa<SliceEncodingAttr>();
 }
@@ -241,7 +241,7 @@ bool isaDistributedLayout(const Attribute &layout) {
 } // namespace triton
 } // namespace mlir

-static LogicalResult parseIntAttrValue(AsmParser &parser, const Attribute &attr,
+static LogicalResult parseIntAttrValue(AsmParser &parser, Attribute attr,
                                       unsigned &value, StringRef desc) {
  auto intAttr = attr.dyn_cast<IntegerAttr>();
  if (!intAttr) {
--- a/lib/Dialect/TritonGPU/Transforms/Utility.cpp
+++ b/lib/Dialect/TritonGPU/Transforms/Utility.cpp
@@ -134,7 +134,7 @@ bool expensiveToRemat(Operation *op, Attribute &targetEncoding) {
 int simulateBackwardRematerialization(
    Operation *initOp, SetVector<Operation *> &processed,
    SetVector<Attribute> &layout, llvm::MapVector<Value, Attribute> &toConvert,
-    const Attribute &targetEncoding) {
+    Attribute targetEncoding) {
  // DFS
  std::vector<std::pair<Operation *, Attribute>> queue;
  queue.emplace_back(initOp, targetEncoding);
--- a/lib/Dialect/TritonGPU/Transforms/Utility.h
+++ b/lib/Dialect/TritonGPU/Transforms/Utility.h
@@ -19,7 +19,7 @@ bool expensiveToRemat(Operation *op, Attribute &targetEncoding);
 int simulateBackwardRematerialization(
    Operation *initOp, SetVector<Operation *> &processed,
    SetVector<Attribute> &layout, llvm::MapVector<Value, Attribute> &toConvert,
-    const Attribute &targetEncoding);
+    Attribute targetEncoding);

 Operation *cloneWithInferType(mlir::PatternRewriter &rewriter, Operation *op,
                              IRMapping &mapping);