ROCM IFU: Fix minimize_alloc

ROCM IFU: Small fixes
This commit is contained in:
Michael Melesse
2023-10-02 13:25:46 -05:00
committed by Jason Furmanek
parent 88ce3b8985
commit 31fe8aadc5
3 changed files with 22 additions and 21 deletions

View File

@@ -2580,7 +2580,7 @@ module attributes {"triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 4 :
%21 = tt.load %13 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<32x32xf16, #blocked>
%22 = triton_gpu.convert_layout %21 : (tensor<32x32xf16, #blocked>) -> tensor<32x32xf16, #shared2>
%23 = triton_gpu.convert_layout %22 : (tensor<32x32xf16, #shared2>) -> tensor<32x32xf16, #triton_gpu.dot_op<{opIdx = 1, parent = #mfma, kWidth=4}>>
%24 = tt.dot %20, %23, %cst {allowTF32 = false} : tensor<32x32xf16, #triton_gpu.dot_op<{opIdx = 0, parent = #mfma, kWidth=4}>> * tensor<32x32xf16, #triton_gpu.dot_op<{opIdx = 1, parent = #mfma, kWidth=8}>> -> tensor<32x32xf32, #mfma>
%24 = tt.dot %20, %23, %cst {allowTF32 = false} : tensor<32x32xf16, #triton_gpu.dot_op<{opIdx = 0, parent = #mfma, kWidth=4}>> * tensor<32x32xf16, #triton_gpu.dot_op<{opIdx = 1, parent = #mfma, kWidth=4}>> -> tensor<32x32xf32, #mfma>
%25 = triton_gpu.convert_layout %24 : (tensor<32x32xf32, #mfma>) -> tensor<32x32xf32, #blocked>
%26 = arith.truncf %25 : tensor<32x32xf32, #blocked> to tensor<32x32xf16, #blocked>
tt.store %17, %26 {cache = 1 : i32, evict = 1 : i32} : tensor<32x32xf16, #blocked>

View File

@@ -76,7 +76,7 @@ def optimize_ttir(mod, arch):
def ttir_to_ttgir(mod, num_warps, warpsize, num_ctas, arch):
pm = ir.pass_manager(mod.context)
pm.enable_debug()
if is_hip():
if is_hip():
pm.add_convert_triton_to_tritongpu_pass(num_warps, warpsize, num_ctas, 0)
else:
pm.add_convert_triton_to_tritongpu_pass(num_warps, warpsize, num_ctas, arch)