[MFMA] Support BFloat16 on MI100 (#295)

* [MFMA] Support BFloat16 on MI100 This PR makes use of mfma_f32_32x32x4bf16 instruction, available on MI100. * fix tests, fix mfma encoding comment, fix switch between mfma versions. * replace kDim from mfma layout with kWidth from dotOp layout * rebase fix * fix mfma to dot op shortcut for bfloat16 * fix review comments
2026-04-05 03:01:17 -04:00 · 2023-09-08 22:08:34 +02:00
parent 491eb9ddfe
commit 6691de65db
12 changed files with 109 additions and 71 deletions
--- a/lib/Analysis/Utility.cpp
+++ b/lib/Analysis/Utility.cpp
@@ -388,6 +388,7 @@ bool isMfmaToDotShortcut(RankedTensorType &srcTy, RankedTensorType &dstTy) {
  // layout when opIdx == 1.
  return mfmaLayout.getWarpsPerCTA()[1] == 1 &&
         dotOperandLayout.getOpIdx() == 0 &&
+         dotOperandLayout.getKWidth() == 8 &&
         dotOperandLayout.getParent() == mfmaLayout &&
         mfmaLayout.getIsTransposed() && (srcTy.getElementType().isF16() || srcTy.getElementType().isBF16());
 }