ROCM IFU: Fix minimize_alloc

ROCM IFU: Small fixes
2026-04-05 03:01:17 -04:00 · 2023-10-02 13:25:46 -05:00
parent 88ce3b8985
commit 31fe8aadc5
3 changed files with 22 additions and 21 deletions
--- a/python/test/unit/language/test_core_amd.py
+++ b/python/test/unit/language/test_core_amd.py
@@ -2580,7 +2580,7 @@ module attributes {"triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 4 :
    %21 = tt.load %13 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<32x32xf16, #blocked>
    %22 = triton_gpu.convert_layout %21 : (tensor<32x32xf16, #blocked>) -> tensor<32x32xf16, #shared2>
    %23 = triton_gpu.convert_layout %22 : (tensor<32x32xf16, #shared2>) -> tensor<32x32xf16, #triton_gpu.dot_op<{opIdx = 1, parent = #mfma, kWidth=4}>>
-    %24 = tt.dot %20, %23, %cst {allowTF32 = false} : tensor<32x32xf16, #triton_gpu.dot_op<{opIdx = 0, parent = #mfma, kWidth=4}>> * tensor<32x32xf16, #triton_gpu.dot_op<{opIdx = 1, parent = #mfma, kWidth=8}>> -> tensor<32x32xf32, #mfma>
+    %24 = tt.dot %20, %23, %cst {allowTF32 = false} : tensor<32x32xf16, #triton_gpu.dot_op<{opIdx = 0, parent = #mfma, kWidth=4}>> * tensor<32x32xf16, #triton_gpu.dot_op<{opIdx = 1, parent = #mfma, kWidth=4}>> -> tensor<32x32xf32, #mfma>
    %25 = triton_gpu.convert_layout %24 : (tensor<32x32xf32, #mfma>) -> tensor<32x32xf32, #blocked>
    %26 = arith.truncf %25 : tensor<32x32xf32, #blocked> to tensor<32x32xf16, #blocked>
    tt.store %17, %26 {cache = 1 : i32, evict = 1 : i32} : tensor<32x32xf16, #blocked>
--- a/python/triton/compiler/compiler.py
+++ b/python/triton/compiler/compiler.py
@@ -76,7 +76,7 @@ def optimize_ttir(mod, arch):
 def ttir_to_ttgir(mod, num_warps, warpsize, num_ctas, arch):
    pm = ir.pass_manager(mod.context)
    pm.enable_debug()
-     if is_hip():
+    if is_hip():
        pm.add_convert_triton_to_tritongpu_pass(num_warps, warpsize, num_ctas, 0)
    else:
        pm.add_convert_triton_to_tritongpu_pass(num_warps, warpsize, num_ctas, arch)