[MFMA] Switch between MFMA types (#352)

This PR introduces matrix_instr_nonkdim flag to switch
between MFMA 16 and MFMA 32.
This commit is contained in:
Alexander Efimov
2023-10-18 16:57:34 +02:00
committed by GitHub
parent 4d539d7dae
commit 20f316b19a
14 changed files with 371 additions and 247 deletions

View File

@@ -115,7 +115,7 @@ bool maybeSharedAllocationOp(Operation *op);
bool maybeAliasOp(Operation *op);
#ifdef USE_ROCM
bool supportMFMA(triton::DotOp op, int64_t nonKDim);
bool supportMFMA(triton::DotOp op);
#endif
bool supportMMA(triton::DotOp op, int version);

View File

@@ -16,6 +16,10 @@ std::unique_ptr<Pass> createTritonGPUStreamPipelinePass();
std::unique_ptr<Pass>
createTritonGPUAccelerateMatmulPass(int computeCapability = 80);
std::unique_ptr<Pass>
createTritonAMDGPUAccelerateMatmulPass(int matrixCoreVersion = 0,
int matrixInstructionSize = 0);
std::unique_ptr<Pass> createTritonGPUPrefetchPass();
std::unique_ptr<Pass> createTritonGPUCanonicalizeLoopsPass();

View File

@@ -85,6 +85,29 @@ def TritonGPUAccelerateMatmul : Pass<"tritongpu-accelerate-matmul", "mlir::Modul
];
}
def TritonAMDGPUAccelerateMatmul : Pass<"tritonamdgpu-accelerate-matmul", "mlir::ModuleOp"> {
let summary = "accelerate matmul";
let description = [{
Optimize the input/output layout of `dot` instruction to make them compatible hardware accelerators
(e.g., AMD matrix cores)
}];
let constructor = "mlir::createTritonAMDGPUAccelerateMatmulPass()";
let dependentDialects = ["mlir::triton::gpu::TritonGPUDialect",
"mlir::triton::TritonDialect"];
let options = [
Option<"matrixCoreVersion", "matrix-core-version",
"int32_t", /*default*/"0",
"device matrix core version">,
Option<"matrixInstructionSize", "matrix-instruction-size",
"int32_t", /*default*/"0",
"enforce matrix instruction MN size">
];
}
def TritonGPUOptimizeDotOperands : Pass<"tritongpu-optimize-dot-operands", "mlir::ModuleOp"> {
let summary = "fuse transpositions";