mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
[MFMA] Support BFloat16 on MI100 (#295)
* [MFMA] Support BFloat16 on MI100 This PR makes use of mfma_f32_32x32x4bf16 instruction, available on MI100. * fix tests, fix mfma encoding comment, fix switch between mfma versions. * replace kDim from mfma layout with kWidth from dotOp layout * rebase fix * fix mfma to dot op shortcut for bfloat16 * fix review comments
This commit is contained in:
@@ -388,6 +388,7 @@ bool isMfmaToDotShortcut(RankedTensorType &srcTy, RankedTensorType &dstTy) {
|
||||
// layout when opIdx == 1.
|
||||
return mfmaLayout.getWarpsPerCTA()[1] == 1 &&
|
||||
dotOperandLayout.getOpIdx() == 0 &&
|
||||
dotOperandLayout.getKWidth() == 8 &&
|
||||
dotOperandLayout.getParent() == mfmaLayout &&
|
||||
mfmaLayout.getIsTransposed() && (srcTy.getElementType().isF16() || srcTy.getElementType().isBF16());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user