mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
lds bank count tests from pmc counters (#13667)
* lds bank count tests from pmc counters * these tests run on the RDNA3 card too * rename duration to cycles, other rename comment * add SQ_LDS_IDX_ACTIVE to gfx9 defaults
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
import ctypes, pathlib, argparse, pickle, re, functools, dataclasses, itertools, threading
|
||||
from tabulate import tabulate
|
||||
from typing import Generator
|
||||
from tinygrad.helpers import temp, unwrap, DEBUG
|
||||
from tinygrad.device import ProfileEvent, ProfileDeviceEvent, ProfileProgramEvent
|
||||
@@ -161,6 +160,7 @@ def decode(profile:list[ProfileEvent]) -> _ROCParseCtx:
|
||||
|
||||
def print_pmc(events:list[ProfilePMCEvent]) -> None:
|
||||
from tinygrad.viz.serve import unpack_pmc
|
||||
from tabulate import tabulate
|
||||
for e in events:
|
||||
print("**", e.kern)
|
||||
data = unpack_pmc(e)
|
||||
|
||||
@@ -6,8 +6,11 @@ import unittest
|
||||
import functools, contextlib
|
||||
import numpy as np
|
||||
from tinygrad import Tensor, Context, Device
|
||||
from tinygrad.uop.ops import UOp, KernelInfo, AxisType
|
||||
from tinygrad.dtype import dtypes, AddrSpace
|
||||
from tinygrad.uop.ops import UOp, Ops, KernelInfo, AxisType
|
||||
from tinygrad.runtime.ops_amd import ProfilePMCEvent
|
||||
from tinygrad.engine.realize import get_runner
|
||||
from tinygrad.viz.serve import unpack_pmc
|
||||
from extra.sqtt.roc import print_pmc
|
||||
|
||||
def copy_kernel(B, A, stride=1):
|
||||
@@ -19,6 +22,16 @@ def copy_kernel(B, A, stride=1):
|
||||
index = (i * stride) % A.size
|
||||
return B[index].store(A[index]).sink(arg=KernelInfo(name=f"copy_{A.size}_stride_{stride}", opts_to_apply=()))
|
||||
|
||||
def lds_kernel(offset:UOp, size:int, inst:str) -> UOp:
|
||||
tid = UOp.range(offset.size, 0, AxisType.LOCAL)
|
||||
dst = UOp.placeholder((size,), dtypes.float32, 1, AddrSpace.REG)
|
||||
#lds = UOp.placeholder((1024,), dtypes.float32, 2, AddrSpace.LOCAL)
|
||||
u = UOp(Ops.CUSTOM, arg='__builtin_amdgcn_s_waitcnt(0);')
|
||||
u = UOp(Ops.CUSTOM, arg='__builtin_amdgcn_s_barrier();', src=(u,))
|
||||
u = UOp(Ops.CUSTOM, arg='__builtin_amdgcn_sched_barrier(0);', src=(u,))
|
||||
u = UOp(Ops.CUSTOM, arg=f'asm volatile("{inst} '+'%0, %1" : "=v"({0}) : "v"({1}));', src=(dst, offset[tid], u))
|
||||
return UOp.sink(u, arg=KernelInfo(name="test_lds", opts_to_apply=()))
|
||||
|
||||
dev = Device[Device.DEFAULT]
|
||||
|
||||
@contextlib.contextmanager
|
||||
@@ -45,5 +58,30 @@ class TestPMC(unittest.TestCase):
|
||||
|
||||
def test_copy_uncoalesced(self): return self.test_copy(stride=17)
|
||||
|
||||
# test with two threads issuing ds_reads at different offsets
|
||||
def test_ds_read(self, size=1, inst='ds_read_b32'):
|
||||
test_banks = 256
|
||||
offsets = [Tensor([0, b*4]) for b in range(1, test_banks)]
|
||||
with Context(DEBUG=0): Tensor.realize(*offsets)
|
||||
k = Tensor.custom_kernel(offsets[0], fxn=functools.partial(lds_kernel, size=size, inst=inst))[0]
|
||||
# sample all kernels
|
||||
with save_pmc() as pmc_events:
|
||||
runner = get_runner(Device.DEFAULT, k.schedule()[0].ast)
|
||||
# TODO: llvm eliminates lds definition from the ELF, is there another way to pin lds size?
|
||||
runner._prg.group_segment_size = 1024
|
||||
for offset in offsets: runner([offset.uop.buffer])
|
||||
# find read offsets that created bank conflicts from the pmc counters
|
||||
found:list[Tensor] = []
|
||||
for i,e in enumerate(pmc_events):
|
||||
pmc = unpack_pmc(e)["rows"]
|
||||
# SQ on gfx9, renamed to SQC after gfx10
|
||||
val = next(total for name,total,_all_instances in pmc if name in {"SQ_LDS_BANK_CONFLICT", "SQC_LDS_BANK_CONFLICT"})
|
||||
if val > 0: found.append(offsets[i])
|
||||
print("Found bank conflicts at offsets:", [s.numpy() for s in found])
|
||||
|
||||
def test_ds_read_b64(self): self.test_ds_read(2, 'ds_read_b64')
|
||||
|
||||
def test_ds_read_b128(self): self.test_ds_read(4, 'ds_read_b128')
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user