mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
amd: alive wgps (#14149)
* amd: disabled wgps * l * wgp * uoops * mockgpu * drm * ad this * fi * reg
This commit is contained in:
4
.github/workflows/autogen.yml
vendored
4
.github/workflows/autogen.yml
vendored
@@ -40,13 +40,13 @@ jobs:
|
||||
mesa: 'true'
|
||||
pydeps: 'pyyaml mako'
|
||||
- name: Install autogen support packages
|
||||
run: sudo apt-get install -y --no-install-recommends libclang-20-dev llvm-20-dev hip-dev libusb-1.0-0-dev
|
||||
run: sudo apt-get install -y --no-install-recommends libclang-20-dev llvm-20-dev hip-dev libusb-1.0-0-dev libdrm-dev
|
||||
- name: Regenerate autogen files
|
||||
run: |
|
||||
find tinygrad/runtime/autogen -type f -name "*.py" -not -name "__init__.py" -not -name "comgr_3.py" -not -name "metal.py" -not -name "iokit.py" -not -name "corefoundation.py" -not -name "libclang.py" -delete
|
||||
python3 -c "from tinygrad.runtime.autogen import opencl"
|
||||
python3 -c "from tinygrad.runtime.autogen import cuda, nvrtc, nvjitlink, nv_570, nv_580, nv"
|
||||
python3 -c "from tinygrad.runtime.autogen import comgr, hsa, hip, amd_gpu, sqtt, rocprof, amdgpu_kd"
|
||||
python3 -c "from tinygrad.runtime.autogen import comgr, hsa, hip, amd_gpu, sqtt, rocprof, amdgpu_kd, amdgpu_drm"
|
||||
python3 -c "from tinygrad.runtime.autogen.am import am, pm4_soc15, pm4_nv, sdma_4_0_0, sdma_5_0_0, sdma_6_0_0, smu_v13_0_0, smu_v13_0_6, smu_v14_0_2"
|
||||
python3 -c "from tinygrad.runtime.autogen import libc, kfd, io_uring, ib, pci, vfio"
|
||||
python3 -c "from tinygrad.runtime.autogen import llvm"
|
||||
|
||||
1740
extra/hip_gpu_driver/amdgpu_drm.h
Normal file
1740
extra/hip_gpu_driver/amdgpu_drm.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,7 @@
|
||||
import pathlib, re, ctypes, mmap, collections, functools, copy, os
|
||||
import tinygrad.runtime.autogen.kfd as kfd
|
||||
import tinygrad.runtime.autogen.am.am as am
|
||||
import tinygrad.runtime.autogen.amdgpu_drm as amdgpu_drm
|
||||
from tinygrad.helpers import from_mv
|
||||
from test.mockgpu.driver import VirtDriver, VirtFileDesc, TextFileDesc, DirFileDesc, VirtFile
|
||||
from test.mockgpu.amd.amdgpu import AMDGPU, gpu_props
|
||||
@@ -33,6 +34,16 @@ class DRMFileDesc(VirtFileDesc):
|
||||
super().__init__(fd)
|
||||
self.driver, self.gpu = driver, gpu
|
||||
|
||||
def ioctl(self, fd, request, argp):
|
||||
struct = amdgpu_drm.struct_drm_amdgpu_info.from_address(argp)
|
||||
if struct.query == amdgpu_drm.AMDGPU_INFO_DEV_INFO:
|
||||
dev_info = amdgpu_drm.struct_drm_amdgpu_info_device.from_address(struct.return_pointer)
|
||||
# mock of gfx1100
|
||||
for se in range(4):
|
||||
for sa in range(4): dev_info.cu_bitmap[se][sa] = 0xff if (se * 4 + sa) < 12 else 0
|
||||
return 0
|
||||
raise NotImplementedError(f"unknown DRM ioctl query {struct.query}")
|
||||
|
||||
def mmap(self, start, sz, prot, flags, fd, offset): return libc.mmap(start, sz, prot, flags|mmap.MAP_ANONYMOUS, -1, 0)
|
||||
|
||||
class AMDDriver(VirtDriver):
|
||||
|
||||
@@ -105,6 +105,7 @@ def __getattr__(nm):
|
||||
case "amd_gpu": return load("amd_gpu", None, [root/f"extra/hip_gpu_driver/{s}.h" for s in ["sdma_registers", "nvd", "gc_11_0_0_offset",
|
||||
"sienna_cichlid_ip_offset"]],
|
||||
args=["-I/opt/rocm/include", "-x", "c++"])
|
||||
case "amdgpu_drm": return load("amdgpu_drm", None, [ "/usr/include/drm/drm.h", *[root/f"extra/hip_gpu_driver/{s}.h" for s in ["amdgpu_drm"]]])
|
||||
case "kgsl": return load("kgsl", None, [root/"extra/qcom_gpu_driver/msm_kgsl.h"], args=["-D__user="])
|
||||
case "qcom_dsp":
|
||||
return load("qcom_dsp", None, [root/f"extra/dsp/include/{s}.h" for s in ["ion", "msm_ion", "adsprpc_shared", "remote_default", "apps_std"]])
|
||||
|
||||
1593
tinygrad/runtime/autogen/amdgpu_drm.py
Normal file
1593
tinygrad/runtime/autogen/amdgpu_drm.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -11,7 +11,7 @@ from tinygrad.helpers import getenv, round_up, data64_le, DEBUG, PROFILE, Profil
|
||||
from tinygrad.helpers import VIZ, AMD_CC, AMD_LLVM, ceildiv
|
||||
from tinygrad.renderer.cstyle import AMDHIPRenderer, AMDHIPCCRenderer
|
||||
from tinygrad.renderer.llvmir import AMDLLVMRenderer
|
||||
from tinygrad.runtime.autogen import kfd, hsa, pci, sqtt, amdgpu_kd
|
||||
from tinygrad.runtime.autogen import kfd, hsa, pci, sqtt, amdgpu_kd, amdgpu_drm
|
||||
from tinygrad.runtime.autogen.am import am
|
||||
from tinygrad.runtime.support.elf import elf_loader
|
||||
from tinygrad.runtime.support.am.amdev import AMDev, AMMemoryManager
|
||||
@@ -181,11 +181,13 @@ class AMDComputeQueue(HWQueue):
|
||||
for xcc in range(s.xcc):
|
||||
with self.pred_exec(xcc_mask=1 << xcc):
|
||||
for inst, se_idx, sa_idx, wgp_idx in itertools.product(range(s.inst), range(s.se), range(s.sa), range(s.wgp)):
|
||||
loff = next(offset)
|
||||
if s.wgp > 1 and not self.dev.iface.is_wgp_active(xcc, se_idx, sa_idx, wgp_idx): continue
|
||||
self.set_grbm(**({'instance':inst} if s.inst > 1 else ({'se':se_idx}|({'sh':sa_idx, 'wgp':wgp_idx} if self.dev.target[0] != 9 else {}))))
|
||||
|
||||
# Copy counter to memory (src_sel = perf, dst_sel = tc_l2)
|
||||
lo, hi = getattr(self.gc, f'{s.regsample}_LO'), getattr(self.gc, f'{s.regsample}_HI', None)
|
||||
self.pkt3(self.pm4.PACKET3_COPY_DATA, (2 << 8) | 4, lo.addr[0], 0, *data64_le(buf.va_addr+(loff:=next(offset))))
|
||||
self.pkt3(self.pm4.PACKET3_COPY_DATA, (2 << 8) | 4, lo.addr[0], 0, *data64_le(buf.va_addr+loff))
|
||||
if hi is not None: self.pkt3(self.pm4.PACKET3_COPY_DATA, (2 << 8) | 4, hi.addr[0], 0, *data64_le(buf.va_addr+loff+4))
|
||||
|
||||
return self.pmc_reset_counters(en=True)
|
||||
@@ -806,6 +808,13 @@ class KFDIface:
|
||||
else:
|
||||
raise RuntimeError("PMC/SQTT requires stable power state: run `amd-smi set -l stable_std` for KFD iface")
|
||||
|
||||
@functools.cached_property
|
||||
def drm_dev_info(self) -> amdgpu_drm.struct_drm_amdgpu_info_device:
|
||||
amdgpu_drm.DRM_IOCTL_AMDGPU_INFO(self.drm_fd, query=amdgpu_drm.AMDGPU_INFO_DEV_INFO,
|
||||
return_pointer=ctypes.addressof(inf:=amdgpu_drm.struct_drm_amdgpu_info_device()), return_size=ctypes.sizeof(inf))
|
||||
return inf
|
||||
def is_wgp_active(self, xcc, se, sa, wgp) -> bool: return ((self.drm_dev_info.cu_bitmap[se % 4][sa + (se // 4) * 2] >> (2 * wgp)) & 0x3) == 0x3
|
||||
|
||||
class PCIIface(PCIIfaceBase):
|
||||
gpus:ClassVar[list[str]] = []
|
||||
|
||||
@@ -816,6 +825,7 @@ class PCIIface(PCIIfaceBase):
|
||||
self.pci_dev.write_config(pci.PCI_COMMAND, self.pci_dev.read_config(pci.PCI_COMMAND, 2) | pci.PCI_COMMAND_MASTER, 2)
|
||||
|
||||
def require_profile_mode(self): return True
|
||||
def is_wgp_active(self, xcc, se, sa, wgp) -> bool: return True # TODO: account for WGP disablement on some asics.
|
||||
|
||||
def _setup_adev(self, pci_dev:PCIDevice, dma_regions:list[tuple[int, MMIOInterface]]|None=None):
|
||||
self.dev_impl:AMDev = AMDev(pci_dev, dma_regions)
|
||||
|
||||
Reference in New Issue
Block a user