[BACKEND] Add back dot.wait when generating async_dot (#2478)

Based on discussion this is needed to make sure there is no race condition when reading shared memory.
2026-04-05 03:01:17 -04:00 · 2023-10-10 21:45:28 -07:00
parent 4749072fbd
commit 6f46c93b9e
3 changed files with 16 additions and 3 deletions
--- a/test/TritonGPU/wspipeline.mlir
+++ b/test/TritonGPU/wspipeline.mlir
@@ -22,9 +22,10 @@
 // CHECK: triton_gpu.extract_slice
 // CHECK: triton_gpu.extract_slice
 // CHECK: triton_nvidia_gpu.dot_async
+// CHECK: triton_nvidia_gpu.dot_wait {{.*}} pendings = 1
 // CHECK: triton_nvidia_gpu.consumer_release
 // CHECK: scf.yield
-// CHECK: triton_nvidia_gpu.dot_wait
+// CHECK: triton_nvidia_gpu.dot_wait {{.*}} pendings = 0
 // CHECK: async_agent = dense<1> : vector<1xi32>

 #blocked = #triton_gpu.blocked<{sizePerThread = [8, 1], threadsPerWarp = [4, 8], warpsPerCTA = [1, 4], order = [0, 1], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [0, 1]}>