amd: alive wgps (#14149)

* amd: disabled wgps

* l

* wgp

* uoops

* mockgpu

* drm

* ad this

* fi

* reg
This commit is contained in:
nimlgen
2026-01-23 00:08:45 +03:00
committed by GitHub
parent a738c4bb22
commit 8cd22df2dd
6 changed files with 3359 additions and 4 deletions

View File

@@ -40,13 +40,13 @@ jobs:
mesa: 'true'
pydeps: 'pyyaml mako'
- name: Install autogen support packages
run: sudo apt-get install -y --no-install-recommends libclang-20-dev llvm-20-dev hip-dev libusb-1.0-0-dev
run: sudo apt-get install -y --no-install-recommends libclang-20-dev llvm-20-dev hip-dev libusb-1.0-0-dev libdrm-dev
- name: Regenerate autogen files
run: |
find tinygrad/runtime/autogen -type f -name "*.py" -not -name "__init__.py" -not -name "comgr_3.py" -not -name "metal.py" -not -name "iokit.py" -not -name "corefoundation.py" -not -name "libclang.py" -delete
python3 -c "from tinygrad.runtime.autogen import opencl"
python3 -c "from tinygrad.runtime.autogen import cuda, nvrtc, nvjitlink, nv_570, nv_580, nv"
python3 -c "from tinygrad.runtime.autogen import comgr, hsa, hip, amd_gpu, sqtt, rocprof, amdgpu_kd"
python3 -c "from tinygrad.runtime.autogen import comgr, hsa, hip, amd_gpu, sqtt, rocprof, amdgpu_kd, amdgpu_drm"
python3 -c "from tinygrad.runtime.autogen.am import am, pm4_soc15, pm4_nv, sdma_4_0_0, sdma_5_0_0, sdma_6_0_0, smu_v13_0_0, smu_v13_0_6, smu_v14_0_2"
python3 -c "from tinygrad.runtime.autogen import libc, kfd, io_uring, ib, pci, vfio"
python3 -c "from tinygrad.runtime.autogen import llvm"

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,7 @@
import pathlib, re, ctypes, mmap, collections, functools, copy, os
import tinygrad.runtime.autogen.kfd as kfd
import tinygrad.runtime.autogen.am.am as am
import tinygrad.runtime.autogen.amdgpu_drm as amdgpu_drm
from tinygrad.helpers import from_mv
from test.mockgpu.driver import VirtDriver, VirtFileDesc, TextFileDesc, DirFileDesc, VirtFile
from test.mockgpu.amd.amdgpu import AMDGPU, gpu_props
@@ -33,6 +34,16 @@ class DRMFileDesc(VirtFileDesc):
super().__init__(fd)
self.driver, self.gpu = driver, gpu
def ioctl(self, fd, request, argp):
struct = amdgpu_drm.struct_drm_amdgpu_info.from_address(argp)
if struct.query == amdgpu_drm.AMDGPU_INFO_DEV_INFO:
dev_info = amdgpu_drm.struct_drm_amdgpu_info_device.from_address(struct.return_pointer)
# mock of gfx1100
for se in range(4):
for sa in range(4): dev_info.cu_bitmap[se][sa] = 0xff if (se * 4 + sa) < 12 else 0
return 0
raise NotImplementedError(f"unknown DRM ioctl query {struct.query}")
def mmap(self, start, sz, prot, flags, fd, offset): return libc.mmap(start, sz, prot, flags|mmap.MAP_ANONYMOUS, -1, 0)
class AMDDriver(VirtDriver):

View File

@@ -105,6 +105,7 @@ def __getattr__(nm):
case "amd_gpu": return load("amd_gpu", None, [root/f"extra/hip_gpu_driver/{s}.h" for s in ["sdma_registers", "nvd", "gc_11_0_0_offset",
"sienna_cichlid_ip_offset"]],
args=["-I/opt/rocm/include", "-x", "c++"])
case "amdgpu_drm": return load("amdgpu_drm", None, [ "/usr/include/drm/drm.h", *[root/f"extra/hip_gpu_driver/{s}.h" for s in ["amdgpu_drm"]]])
case "kgsl": return load("kgsl", None, [root/"extra/qcom_gpu_driver/msm_kgsl.h"], args=["-D__user="])
case "qcom_dsp":
return load("qcom_dsp", None, [root/f"extra/dsp/include/{s}.h" for s in ["ion", "msm_ion", "adsprpc_shared", "remote_default", "apps_std"]])

File diff suppressed because it is too large Load Diff

View File

@@ -11,7 +11,7 @@ from tinygrad.helpers import getenv, round_up, data64_le, DEBUG, PROFILE, Profil
from tinygrad.helpers import VIZ, AMD_CC, AMD_LLVM, ceildiv
from tinygrad.renderer.cstyle import AMDHIPRenderer, AMDHIPCCRenderer
from tinygrad.renderer.llvmir import AMDLLVMRenderer
from tinygrad.runtime.autogen import kfd, hsa, pci, sqtt, amdgpu_kd
from tinygrad.runtime.autogen import kfd, hsa, pci, sqtt, amdgpu_kd, amdgpu_drm
from tinygrad.runtime.autogen.am import am
from tinygrad.runtime.support.elf import elf_loader
from tinygrad.runtime.support.am.amdev import AMDev, AMMemoryManager
@@ -181,11 +181,13 @@ class AMDComputeQueue(HWQueue):
for xcc in range(s.xcc):
with self.pred_exec(xcc_mask=1 << xcc):
for inst, se_idx, sa_idx, wgp_idx in itertools.product(range(s.inst), range(s.se), range(s.sa), range(s.wgp)):
loff = next(offset)
if s.wgp > 1 and not self.dev.iface.is_wgp_active(xcc, se_idx, sa_idx, wgp_idx): continue
self.set_grbm(**({'instance':inst} if s.inst > 1 else ({'se':se_idx}|({'sh':sa_idx, 'wgp':wgp_idx} if self.dev.target[0] != 9 else {}))))
# Copy counter to memory (src_sel = perf, dst_sel = tc_l2)
lo, hi = getattr(self.gc, f'{s.regsample}_LO'), getattr(self.gc, f'{s.regsample}_HI', None)
self.pkt3(self.pm4.PACKET3_COPY_DATA, (2 << 8) | 4, lo.addr[0], 0, *data64_le(buf.va_addr+(loff:=next(offset))))
self.pkt3(self.pm4.PACKET3_COPY_DATA, (2 << 8) | 4, lo.addr[0], 0, *data64_le(buf.va_addr+loff))
if hi is not None: self.pkt3(self.pm4.PACKET3_COPY_DATA, (2 << 8) | 4, hi.addr[0], 0, *data64_le(buf.va_addr+loff+4))
return self.pmc_reset_counters(en=True)
@@ -806,6 +808,13 @@ class KFDIface:
else:
raise RuntimeError("PMC/SQTT requires stable power state: run `amd-smi set -l stable_std` for KFD iface")
@functools.cached_property
def drm_dev_info(self) -> amdgpu_drm.struct_drm_amdgpu_info_device:
amdgpu_drm.DRM_IOCTL_AMDGPU_INFO(self.drm_fd, query=amdgpu_drm.AMDGPU_INFO_DEV_INFO,
return_pointer=ctypes.addressof(inf:=amdgpu_drm.struct_drm_amdgpu_info_device()), return_size=ctypes.sizeof(inf))
return inf
def is_wgp_active(self, xcc, se, sa, wgp) -> bool: return ((self.drm_dev_info.cu_bitmap[se % 4][sa + (se // 4) * 2] >> (2 * wgp)) & 0x3) == 0x3
class PCIIface(PCIIfaceBase):
gpus:ClassVar[list[str]] = []
@@ -816,6 +825,7 @@ class PCIIface(PCIIfaceBase):
self.pci_dev.write_config(pci.PCI_COMMAND, self.pci_dev.read_config(pci.PCI_COMMAND, 2) | pci.PCI_COMMAND_MASTER, 2)
def require_profile_mode(self): return True
def is_wgp_active(self, xcc, se, sa, wgp) -> bool: return True # TODO: account for WGP disablement on some asics.
def _setup_adev(self, pci_dev:PCIDevice, dma_regions:list[tuple[int, MMIOInterface]]|None=None):
self.dev_impl:AMDev = AMDev(pci_dev, dma_regions)