mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
[OPTIMIZER] Thread local reduction optimization (#2542)
Co-authored-by: Phil Tillet <phil@openai.com>
This commit is contained in:
@@ -148,6 +148,8 @@ def optimize_ttgir(mod, num_stages, num_warps, num_ctas, target,
|
||||
if capability // 10 >= 9:
|
||||
pm.add_tritongpu_fence_insertion_pass()
|
||||
pm.add_tritongpu_ws_fixup_missing_attrs_pass()
|
||||
pm.add_tritongpu_optimize_thread_locality_pass()
|
||||
pm.add_canonicalizer_pass()
|
||||
pm.run(mod)
|
||||
return mod
|
||||
|
||||
|
||||
Reference in New Issue
Block a user