Enable usage of block pointer semantics for AMD gpus (#301)

* Enable usage of block pointer semantics for AMD gpus This commit enables usage of block pointer semantics by enabling rewrite_tensor_pointer_pass that rewrites block pointer loads/stores to legacy loads/stores. * Update FA fwd in tutorial to use the block pointers * use 90 compute capability for amd gpus in python/triton/compiler/compiler.py Co-authored-by: Alexander Efimov <efimov.alexander@gmail.com> --------- Co-authored-by: Ognjen Plavsic <ognjen.plavsic@dxc.com> Co-authored-by: Lixun Zhang <lixun.zhang@amd.com> Co-authored-by: Aleksandr Efimov <130555951+alefimov-amd@users.noreply.github.com> Co-authored-by: Alexander Efimov <efimov.alexander@gmail.com>
2026-04-05 03:01:17 -04:00 · 2023-08-24 13:05:12 -05:00
parent fa429316d4
commit ff7e707f87
6 changed files with 171 additions and 123 deletions
--- a/python/test/unit/language/test_block_pointer.py
+++ b/python/test/unit/language/test_block_pointer.py
@@ -23,7 +23,7 @@ def block_copy_kernel(a_ptr, b_ptr, N, BLOCK_SIZE: tl.constexpr, padding_option:
                          for padding in ("zero", "nan")])
 def test_block_copy(dtype_str, n, padding_option):
    capability = torch.cuda.get_device_capability()
-    if capability[0] >= 9:
+    if torch.version.hip is None and capability[0] >= 9:
        pytest.skip("Hopper support is working in progress")

    dtype = getattr(torch, dtype_str)
@@ -82,7 +82,7 @@ def matmul_no_scf_with_advance_kernel(
 ])
 def test_block_ptr_matmul_no_scf(shape, num_warps):
    capability = torch.cuda.get_device_capability()
-    if capability[0] >= 9:
+    if torch.version.hip is None and capability[0] >= 9:
        pytest.skip("Hopper support is working in progress")

    m, n, k = shape