[MFMA] Support BFloat16 on MI100 (#295)

* [MFMA] Support BFloat16 on MI100 This PR makes use of mfma_f32_32x32x4bf16 instruction, available on MI100. * fix tests, fix mfma encoding comment, fix switch between mfma versions. * replace kDim from mfma layout with kWidth from dotOp layout * rebase fix * fix mfma to dot op shortcut for bfloat16 * fix review comments
2026-04-05 03:01:17 -04:00 · 2023-09-08 22:08:34 +02:00
parent 491eb9ddfe
commit 6691de65db
12 changed files with 109 additions and 71 deletions
--- a/lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM.cpp
+++ b/lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM.cpp
@@ -675,7 +675,13 @@ private:
      // TODO: Support types other than float16 and
      // bf16 (represented as int16 in llvm ir).
      assert((type::isFloat(elemTy) || type::isInt(elemTy)) && elemSize == 16);
-      unsigned vecSize = 4;
+      // vecSize is an number of sequential elements stored by one thread
+      // - For MFMA (nonKDim == 32) encoding it is 4
+      // - For MFMA (nonKDim == 32) operand encoding it is dotOperandEndocing::kWidth,
+      //   which is 4 for fp16 and bfloat16 dtypes
+      //
+      // For mentioned types MFMA and MFMA operand layouts are the same
+      const unsigned vecSize = 4;
      Type vecTy = vec_ty(elemTy, vecSize);
      types = SmallVector<Type>(elems / vecSize, vecTy);
      for (unsigned i = 0; i < elems; i += vecSize) {