Files
ROCm/python/test/unit/hopper/ttgir_tests/test_tma.py
Justin Lebar df08301e76 Reformat Python code with yapf. (#2589)
I've add an option to yapf to do what we want for long lines, see
https://github.com/google/yapf/pull/1177.  We can now have a real Python
formatter, yay!

To make this PR, I ran my modified yapf over the repository, then looked
over the full diff.  Where yapf was mangling the param list of long
function decls/calls (mostly kernels), I manually added `#` to put
linebreaks where we want.  I fixed up other formatting too -- mostly
adding or removing a trailing comma from lists.

Overall, trailing `#` was sufficient to get formatting similar to our
current code.  I didn't have to disable yapf anywhere.

---------

Co-authored-by: Phil Tillet <phil@openai.com>
2023-11-02 20:44:17 -07:00

67 lines
2.5 KiB
Python

# Copyright (c) 2023 NVIDIA Corporation & Affiliates. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import os
import pytest
import torch
from torch.testing import assert_close
import triton
@pytest.mark.parametrize('TTGIR,TRANS_A,TRANS_B', [
# TODO: uncomment when it's done
# ["wgmma_tma_64_64_16_f16.ttgir", False, True],
])
def test_tma_wgmma_64_64_16_f16(TTGIR, TRANS_A, TRANS_B):
capability = torch.cuda.get_device_capability()
if capability[0] < 9:
pytest.skip("Only test wgmma on devices with sm >= 90")
SIZE_M = 64
SIZE_N = 64
SIZE_K = 16
if (TRANS_A):
a = torch.randn((SIZE_K, SIZE_M), device='cuda', dtype=torch.float16).T
else:
a = torch.randn((SIZE_M, SIZE_K), device='cuda', dtype=torch.float16)
if (TRANS_B):
b = torch.randn((SIZE_N, SIZE_K), device='cuda', dtype=torch.float16).T
else:
b = torch.randn((SIZE_K, SIZE_N), device='cuda', dtype=torch.float16)
c = torch.empty((SIZE_M, SIZE_N), device=a.device, dtype=torch.float32)
ttgir_path = os.path.dirname(__file__) + "/" + TTGIR
kernel = triton.compile(ttgir_path)
kernel[(1, 1, 1)]( #
a.data_ptr(), b.data_ptr(), c.data_ptr(), #
SIZE_M, SIZE_N, SIZE_K, #
a.stride(0), a.stride(1), #
b.stride(0), b.stride(1), #
c.stride(0))
golden = torch.matmul(a, b)
torch.set_printoptions(profile="full", sci_mode=False)
assert_close(c, golden, rtol=1e-2, atol=1e-3, check_dtype=False)