mirror of
https://github.com/ROCm/ROCm.git
synced 2026-04-05 03:01:17 -04:00
Enable usage of block pointer semantics for AMD gpus (#301)
* Enable usage of block pointer semantics for AMD gpus This commit enables usage of block pointer semantics by enabling rewrite_tensor_pointer_pass that rewrites block pointer loads/stores to legacy loads/stores. * Update FA fwd in tutorial to use the block pointers * use 90 compute capability for amd gpus in python/triton/compiler/compiler.py Co-authored-by: Alexander Efimov <efimov.alexander@gmail.com> --------- Co-authored-by: Ognjen Plavsic <ognjen.plavsic@dxc.com> Co-authored-by: Lixun Zhang <lixun.zhang@amd.com> Co-authored-by: Aleksandr Efimov <130555951+alefimov-amd@users.noreply.github.com> Co-authored-by: Alexander Efimov <efimov.alexander@gmail.com>
This commit is contained in:
@@ -23,7 +23,7 @@ def block_copy_kernel(a_ptr, b_ptr, N, BLOCK_SIZE: tl.constexpr, padding_option:
|
||||
for padding in ("zero", "nan")])
|
||||
def test_block_copy(dtype_str, n, padding_option):
|
||||
capability = torch.cuda.get_device_capability()
|
||||
if capability[0] >= 9:
|
||||
if torch.version.hip is None and capability[0] >= 9:
|
||||
pytest.skip("Hopper support is working in progress")
|
||||
|
||||
dtype = getattr(torch, dtype_str)
|
||||
@@ -82,7 +82,7 @@ def matmul_no_scf_with_advance_kernel(
|
||||
])
|
||||
def test_block_ptr_matmul_no_scf(shape, num_warps):
|
||||
capability = torch.cuda.get_device_capability()
|
||||
if capability[0] >= 9:
|
||||
if torch.version.hip is None and capability[0] >= 9:
|
||||
pytest.skip("Hopper support is working in progress")
|
||||
|
||||
m, n, k = shape
|
||||
|
||||
Reference in New Issue
Block a user