mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
ROCM IFU: Fix minimize_alloc
ROCM IFU: Small fixes
This commit is contained in:
committed by
Jason Furmanek
parent
88ce3b8985
commit
31fe8aadc5
@@ -2580,7 +2580,7 @@ module attributes {"triton_gpu.num-ctas" = 1 : i32, "triton_gpu.num-warps" = 4 :
|
||||
%21 = tt.load %13 {cache = 1 : i32, evict = 1 : i32, isVolatile = false} : tensor<32x32xf16, #blocked>
|
||||
%22 = triton_gpu.convert_layout %21 : (tensor<32x32xf16, #blocked>) -> tensor<32x32xf16, #shared2>
|
||||
%23 = triton_gpu.convert_layout %22 : (tensor<32x32xf16, #shared2>) -> tensor<32x32xf16, #triton_gpu.dot_op<{opIdx = 1, parent = #mfma, kWidth=4}>>
|
||||
%24 = tt.dot %20, %23, %cst {allowTF32 = false} : tensor<32x32xf16, #triton_gpu.dot_op<{opIdx = 0, parent = #mfma, kWidth=4}>> * tensor<32x32xf16, #triton_gpu.dot_op<{opIdx = 1, parent = #mfma, kWidth=8}>> -> tensor<32x32xf32, #mfma>
|
||||
%24 = tt.dot %20, %23, %cst {allowTF32 = false} : tensor<32x32xf16, #triton_gpu.dot_op<{opIdx = 0, parent = #mfma, kWidth=4}>> * tensor<32x32xf16, #triton_gpu.dot_op<{opIdx = 1, parent = #mfma, kWidth=4}>> -> tensor<32x32xf32, #mfma>
|
||||
%25 = triton_gpu.convert_layout %24 : (tensor<32x32xf32, #mfma>) -> tensor<32x32xf32, #blocked>
|
||||
%26 = arith.truncf %25 : tensor<32x32xf32, #blocked> to tensor<32x32xf16, #blocked>
|
||||
tt.store %17, %26 {cache = 1 : i32, evict = 1 : i32} : tensor<32x32xf16, #blocked>
|
||||
|
||||
@@ -76,7 +76,7 @@ def optimize_ttir(mod, arch):
|
||||
def ttir_to_ttgir(mod, num_warps, warpsize, num_ctas, arch):
|
||||
pm = ir.pass_manager(mod.context)
|
||||
pm.enable_debug()
|
||||
if is_hip():
|
||||
if is_hip():
|
||||
pm.add_convert_triton_to_tritongpu_pass(num_warps, warpsize, num_ctas, 0)
|
||||
else:
|
||||
pm.add_convert_triton_to_tritongpu_pass(num_warps, warpsize, num_ctas, arch)
|
||||
|
||||
Reference in New Issue
Block a user