mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
[BACKEND] Add back dot.wait when generating async_dot (#2478)
Based on discussion this is needed to make sure there is no race condition when reading shared memory.
This commit is contained in:
@@ -22,9 +22,10 @@
|
||||
// CHECK: triton_gpu.extract_slice
|
||||
// CHECK: triton_gpu.extract_slice
|
||||
// CHECK: triton_nvidia_gpu.dot_async
|
||||
// CHECK: triton_nvidia_gpu.dot_wait {{.*}} pendings = 1
|
||||
// CHECK: triton_nvidia_gpu.consumer_release
|
||||
// CHECK: scf.yield
|
||||
// CHECK: triton_nvidia_gpu.dot_wait
|
||||
// CHECK: triton_nvidia_gpu.dot_wait {{.*}} pendings = 0
|
||||
// CHECK: async_agent = dense<1> : vector<1xi32>
|
||||
|
||||
#blocked = #triton_gpu.blocked<{sizePerThread = [8, 1], threadsPerWarp = [4, 8], warpsPerCTA = [1, 4], order = [0, 1], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [0, 1]}>
|
||||
|
||||
Reference in New Issue
Block a user