mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-27 03:01:52 -04:00
[MFMA] Switch between MFMA types (#352)
This PR introduces matrix_instr_nonkdim flag to switch between MFMA 16 and MFMA 32.
This commit is contained in:
@@ -115,7 +115,7 @@ bool maybeSharedAllocationOp(Operation *op);
|
||||
bool maybeAliasOp(Operation *op);
|
||||
|
||||
#ifdef USE_ROCM
|
||||
bool supportMFMA(triton::DotOp op, int64_t nonKDim);
|
||||
bool supportMFMA(triton::DotOp op);
|
||||
#endif
|
||||
|
||||
bool supportMMA(triton::DotOp op, int version);
|
||||
|
||||
@@ -16,6 +16,10 @@ std::unique_ptr<Pass> createTritonGPUStreamPipelinePass();
|
||||
std::unique_ptr<Pass>
|
||||
createTritonGPUAccelerateMatmulPass(int computeCapability = 80);
|
||||
|
||||
std::unique_ptr<Pass>
|
||||
createTritonAMDGPUAccelerateMatmulPass(int matrixCoreVersion = 0,
|
||||
int matrixInstructionSize = 0);
|
||||
|
||||
std::unique_ptr<Pass> createTritonGPUPrefetchPass();
|
||||
|
||||
std::unique_ptr<Pass> createTritonGPUCanonicalizeLoopsPass();
|
||||
|
||||
@@ -85,6 +85,29 @@ def TritonGPUAccelerateMatmul : Pass<"tritongpu-accelerate-matmul", "mlir::Modul
|
||||
];
|
||||
}
|
||||
|
||||
def TritonAMDGPUAccelerateMatmul : Pass<"tritonamdgpu-accelerate-matmul", "mlir::ModuleOp"> {
|
||||
let summary = "accelerate matmul";
|
||||
|
||||
let description = [{
|
||||
Optimize the input/output layout of `dot` instruction to make them compatible hardware accelerators
|
||||
(e.g., AMD matrix cores)
|
||||
}];
|
||||
|
||||
let constructor = "mlir::createTritonAMDGPUAccelerateMatmulPass()";
|
||||
|
||||
let dependentDialects = ["mlir::triton::gpu::TritonGPUDialect",
|
||||
"mlir::triton::TritonDialect"];
|
||||
|
||||
let options = [
|
||||
Option<"matrixCoreVersion", "matrix-core-version",
|
||||
"int32_t", /*default*/"0",
|
||||
"device matrix core version">,
|
||||
Option<"matrixInstructionSize", "matrix-instruction-size",
|
||||
"int32_t", /*default*/"0",
|
||||
"enforce matrix instruction MN size">
|
||||
];
|
||||
}
|
||||
|
||||
def TritonGPUOptimizeDotOperands : Pass<"tritongpu-optimize-dot-operands", "mlir::ModuleOp"> {
|
||||
let summary = "fuse transpositions";
|
||||
|
||||
|
||||
Reference in New Issue
Block a user