mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
This PR merges the `triton-mlir` branch, in which we have been quietly rewriting the Triton backend from scratch to increase maintainability, stability and ultimately performance. Changes to the runtime are minimal, and this new version aims to remain backward-compatible with the previous commit. The legacy backend is now officially deprecated, but can still be accessed via the `legacy-backend` tag. Co-authored-by: Keren Zhou <kerenzhou@openai.com> Co-authored-by: Yan Chunwei <yanchunwei@outlook.com> Co-authored-by: goostavz <109190422+goostavz@users.noreply.github.com> Co-authored-by: Shintaro Iwasaki <siwasaki@fb.com> Co-authored-by: Yan Da <dyanab@connect.ust.hk> Co-authored-by: Jun Yang <yangjunpro@gmail.com> Co-authored-by: Ian Bearman <ianb@microsoft.com> Co-authored-by: Jason Ansel <jansel@jansel.net> Co-authored-by: Qingyi Liu <qingyil@nvidia.com> Co-authored-by: ben-zhang-609 <110140741+ben-zhang-609@users.noreply.github.com> Co-authored-by: Chenggang Zhao <lyricz@yeah.net> Co-authored-by: ben-zhang-609 <benzh609@gmail.com> Co-authored-by: dongdongl <dongdongl@nvidia.com>
83 lines
1.9 KiB
C++
83 lines
1.9 KiB
C++
#ifndef TRITON_ANALYSIS_UTILITY_H
|
|
#define TRITON_ANALYSIS_UTILITY_H
|
|
|
|
#include "triton/Dialect/TritonGPU/IR/Dialect.h"
|
|
#include <algorithm>
|
|
#include <numeric>
|
|
#include <string>
|
|
|
|
namespace mlir {
|
|
|
|
class ReduceOpHelper {
|
|
public:
|
|
explicit ReduceOpHelper(triton::ReduceOp op) : op(op) {
|
|
srcTy = op.operand().getType().cast<RankedTensorType>();
|
|
}
|
|
|
|
ArrayRef<int64_t> getSrcShape() { return srcTy.getShape(); }
|
|
|
|
Attribute getSrcLayout() { return srcTy.getEncoding(); }
|
|
|
|
bool isFastReduction();
|
|
|
|
unsigned getInterWarpSize();
|
|
|
|
unsigned getIntraWarpSize();
|
|
|
|
unsigned getThreadsReductionAxis();
|
|
|
|
SmallVector<unsigned> getScratchConfigBasic();
|
|
|
|
SmallVector<SmallVector<unsigned>> getScratchConfigsFast();
|
|
|
|
unsigned getScratchSizeInBytes();
|
|
|
|
private:
|
|
triton::ReduceOp op;
|
|
RankedTensorType srcTy{};
|
|
};
|
|
|
|
bool isSharedEncoding(Value value);
|
|
|
|
bool maybeSharedAllocationOp(Operation *op);
|
|
|
|
bool maybeAliasOp(Operation *op);
|
|
|
|
bool supportMMA(triton::DotOp op, int version);
|
|
|
|
bool supportMMA(Value value, int version);
|
|
|
|
Type getElementType(Value value);
|
|
|
|
std::string getValueOperandName(Value value, AsmState &state);
|
|
|
|
template <typename T_OUT, typename T_IN>
|
|
inline SmallVector<T_OUT> convertType(ArrayRef<T_IN> in) {
|
|
SmallVector<T_OUT> out;
|
|
for (const T_IN &i : in)
|
|
out.push_back(T_OUT(i));
|
|
return out;
|
|
}
|
|
|
|
template <typename Int> Int product(llvm::ArrayRef<Int> arr) {
|
|
return std::accumulate(arr.begin(), arr.end(), 1, std::multiplies{});
|
|
}
|
|
|
|
template <typename Int> Int ceil(Int m, Int n) { return (m + n - 1) / n; }
|
|
|
|
// output[i] = input[order[i]]
|
|
template <typename T, typename RES_T = T>
|
|
SmallVector<RES_T> reorder(ArrayRef<T> input, ArrayRef<unsigned> order) {
|
|
size_t rank = order.size();
|
|
assert(input.size() == rank);
|
|
SmallVector<RES_T> result(rank);
|
|
for (auto it : llvm::enumerate(order)) {
|
|
result[it.index()] = input[it.value()];
|
|
}
|
|
return result;
|
|
}
|
|
|
|
} // namespace mlir
|
|
|
|
#endif // TRITON_ANALYSIS_UTILITY_H
|