am: download regs (#10419)

* am: download regs

* x

* linter

* mypy

* after merge

* raise

* fixed name

* fix

* xx

* remove

* missing reg

* missing reg

* move to online

* ops
This commit is contained in:
nimlgen
2025-05-20 18:59:56 +03:00
committed by GitHub
parent 965f9e0696
commit 2895198c36
25 changed files with 48 additions and 1417994 deletions

View File

@@ -349,50 +349,6 @@ generate_am() {
-o $BASE/am/soc24.py
fixup $BASE/am/soc24.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/mp/mp_13_0_0_offset.h \
$AMKERN_INC/asic_reg/mp/mp_13_0_0_sh_mask.h \
-o $BASE/am/mp_13_0_0.py
fixup $BASE/am/mp_13_0_0.py
# 14_0_3 reuses 14_0_2
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/mp/mp_14_0_2_offset.h \
$AMKERN_INC/asic_reg/mp/mp_14_0_2_sh_mask.h \
-o $BASE/am/mp_14_0_3.py
fixup $BASE/am/mp_14_0_3.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/mp/mp_11_0_offset.h \
$AMKERN_INC/asic_reg/mp/mp_11_0_sh_mask.h \
-o $BASE/am/mp_11_0.py
fixup $BASE/am/mp_11_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/gc/gc_9_4_3_offset.h \
$AMKERN_INC/asic_reg/gc/gc_9_4_3_sh_mask.h \
extra/amdpci/overlay/gc_9_4_3.h \
-o $BASE/am/gc_9_4_3.py
fixup $BASE/am/gc_9_4_3.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/gc/gc_10_3_0_offset.h \
$AMKERN_INC/asic_reg/gc/gc_10_3_0_sh_mask.h \
-o $BASE/am/gc_10_3_0.py
fixup $BASE/am/gc_10_3_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/gc/gc_11_0_0_offset.h \
$AMKERN_INC/asic_reg/gc/gc_11_0_0_sh_mask.h \
-o $BASE/am/gc_11_0_0.py
fixup $BASE/am/gc_11_0_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/gc/gc_12_0_0_offset.h \
$AMKERN_INC/asic_reg/gc/gc_12_0_0_sh_mask.h \
-o $BASE/am/gc_12_0_0.py
fixup $BASE/am/gc_12_0_0.py
clang2py -k cdefstum \
extra/hip_gpu_driver/sdma_registers.h \
$AMKERN_AMD/amdgpu/vega10_sdma_pkt_open.h \
@@ -414,72 +370,6 @@ generate_am() {
-o $BASE/am/sdma_6_0_0.py
fixup $BASE/am/sdma_6_0_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/mmhub/mmhub_3_0_0_offset.h \
$AMKERN_INC/asic_reg/mmhub/mmhub_3_0_0_sh_mask.h \
-o $BASE/am/mmhub_3_0_0.py
fixup $BASE/am/mmhub_3_0_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/mmhub/mmhub_3_0_2_offset.h \
$AMKERN_INC/asic_reg/mmhub/mmhub_3_0_2_sh_mask.h \
-o $BASE/am/mmhub_3_0_2.py
fixup $BASE/am/mmhub_3_0_2.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/nbio/nbio_2_3_offset.h \
$AMKERN_INC/asic_reg/nbio/nbio_2_3_sh_mask.h \
-o $BASE/am/nbio_2_3_0.py
fixup $BASE/am/nbio_2_3_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/nbio/nbio_7_2_0_offset.h \
$AMKERN_INC/asic_reg/nbio/nbio_7_2_0_sh_mask.h \
-o $BASE/am/nbio_7_2_0.py
fixup $BASE/am/nbio_7_2_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/mmhub/mmhub_4_1_0_offset.h \
$AMKERN_INC/asic_reg/mmhub/mmhub_4_1_0_sh_mask.h \
-o $BASE/am/mmhub_4_1_0.py
fixup $BASE/am/mmhub_4_1_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/nbio/nbio_4_3_0_offset.h \
$AMKERN_INC/asic_reg/nbio/nbio_4_3_0_sh_mask.h \
-o $BASE/am/nbio_4_3_0.py
fixup $BASE/am/nbio_4_3_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/nbif/nbif_6_3_1_offset.h \
$AMKERN_INC/asic_reg/nbif/nbif_6_3_1_sh_mask.h \
-o $BASE/am/nbif_6_3_1.py
fixup $BASE/am/nbif_6_3_1.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/nbio/nbio_7_9_0_offset.h \
$AMKERN_INC/asic_reg/nbio/nbio_7_9_0_sh_mask.h \
-o $BASE/am/nbio_7_9_0.py
fixup $BASE/am/nbio_7_9_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/nbio/nbio_7_11_0_offset.h \
$AMKERN_INC/asic_reg/nbio/nbio_7_11_0_sh_mask.h \
-o $BASE/am/nbio_7_11_0.py
fixup $BASE/am/nbio_7_11_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/oss/osssys_6_0_0_offset.h \
$AMKERN_INC/asic_reg/oss/osssys_6_0_0_sh_mask.h \
-o $BASE/am/osssys_6_0_0.py
fixup $BASE/am/osssys_6_0_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/oss/osssys_7_0_0_offset.h \
$AMKERN_INC/asic_reg/oss/osssys_7_0_0_sh_mask.h \
-o $BASE/am/osssys_7_0_0.py
fixup $BASE/am/osssys_7_0_0.py
clang2py -k cdefstum \
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h \
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h \
@@ -495,18 +385,6 @@ generate_am() {
--clang-args="-include stdint.h" \
-o $BASE/am/smu_v14_0_3.py
fixup $BASE/am/smu_v14_0_3.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/hdp/hdp_6_0_0_offset.h \
$AMKERN_INC/asic_reg/hdp/hdp_6_0_0_sh_mask.h \
-o $BASE/am/hdp_6_0_0.py
fixup $BASE/am/hdp_6_0_0.py
clang2py -k cdefstum \
$AMKERN_INC/asic_reg/hdp/hdp_7_0_0_offset.h \
$AMKERN_INC/asic_reg/hdp/hdp_7_0_0_sh_mask.h \
-o $BASE/am/hdp_7_0_0.py
fixup $BASE/am/hdp_7_0_0.py
}
generate_sqtt() {

View File

@@ -1,7 +1,6 @@
import unittest
from tinygrad.runtime.support.am.amdev import AMMemoryManager, AMPageTableTraverseContext
from tinygrad.runtime.support.am.ip import AM_GMC
from tinygrad.runtime.support.amd import import_module
from tinygrad.runtime.support.hcq import MMIOInterface
from tinygrad.runtime.autogen.am import am
from tinygrad.helpers import mv_address
@@ -177,19 +176,5 @@ class TestAMPageTable(unittest.TestCase):
must_cover_checker(va, sz)
not_cover_checker(va, sz)
class TestAM(unittest.TestCase):
def test_imports(self):
with self.assertRaises(ImportError): import_module("gc", (7, 0, 0))
x = import_module("gc", (11, 0, 0))
assert x.__name__ == "tinygrad.runtime.autogen.am.gc_11_0_0"
x = import_module("gc", (11, 6, 0))
assert x.__name__ == "tinygrad.runtime.autogen.am.gc_11_0_0"
x = import_module("gc", (12, 0, 0))
assert x.__name__ == "tinygrad.runtime.autogen.am.gc_12_0_0"
x = import_module("gc", (10, 3, 0))
assert x.__name__ == "tinygrad.runtime.autogen.am.gc_10_3_0"
x = import_module("gc", (10, 3, 3))
assert x.__name__ == "tinygrad.runtime.autogen.am.gc_10_3_0"
if __name__ == "__main__":
unittest.main()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -128,7 +128,7 @@ class AMDComputeQueue(HWQueue):
self.wreg(self.gc.regCOMPUTE_TG_CHUNK_SIZE, 1)
for xcc_id in range(self.dev.xccs):
with self.pred_exec(xcc_mask=1 << xcc_id):
self.wreg(self.gc.regCOMPUTE_CURRENT_LOGIC_XCC_ID, xcc_id)
self.wreg(self.dev.regCOMPUTE_CURRENT_LOGIC_XCC_ID, xcc_id)
return self
def spi_config(self, tracing:bool):
@@ -873,6 +873,9 @@ class AMDDevice(HCQCompiled):
self.sdma = import_module('sdma', min(self.dev_iface.ip_versions[am.SDMA0_HWIP], (6, 0, 0)))
self.gc = AMDIP('gc', self.dev_iface.ip_versions[am.GC_HWIP], self.dev_iface.ip_offsets[am.GC_HWIP])
# Define the regCOMPUTE_CURRENT_LOGIC_XCC_ID register, which is missing from the asic_regs files.
if self.target[:2] == (9,4): self.regCOMPUTE_CURRENT_LOGIC_XCC_ID = AMDReg("regCOMPUTE_CURRENT_LOGIC_XCC_ID", 0xe25, 0, {}, self.gc.bases)
nbio_name = 'nbio' if self.target[0] < 12 else 'nbif'
nbio_pad = (0,) if self.target[0] == 9 else ()
self.nbio = AMDIP(nbio_name, self.dev_iface.ip_versions[am.NBIF_HWIP], nbio_pad+self.dev_iface.ip_offsets[am.NBIF_HWIP])

View File

@@ -1,9 +1,9 @@
from __future__ import annotations
import ctypes, collections, time, dataclasses, functools, fcntl, os, hashlib
from tinygrad.helpers import mv_address, getenv, round_up, DEBUG, temp, fetch
from tinygrad.runtime.autogen.am import am, mp_11_0
from tinygrad.runtime.autogen.am import am
from tinygrad.runtime.support.hcq import MMIOInterface
from tinygrad.runtime.support.amd import AMDReg, collect_registers, import_module
from tinygrad.runtime.support.amd import AMDReg, import_module, import_asic_regs
from tinygrad.runtime.support.allocator import TLSFAllocator
from tinygrad.runtime.support.am.ip import AM_SOC, AM_GMC, AM_IH, AM_PSP, AM_SMU, AM_GFX, AM_SDMA
@@ -383,9 +383,10 @@ class AMDev:
def _ip_module(self, prefix:str, hwip, prever_prefix:str=""): return import_module(prefix, self.ip_ver[hwip], prever_prefix)
def _build_regs(self):
mods = [("MP0", self._ip_module("mp", am.MP0_HWIP)), ("HDP", self._ip_module("hdp", am.HDP_HWIP)), ("GC", self._ip_module("gc", am.GC_HWIP)),
("MP1", mp_11_0), ("MMHUB", self._ip_module("mmhub", am.MMHUB_HWIP)), ("OSSSYS", self._ip_module("osssys", am.OSSSYS_HWIP)),
("NBIO", self._ip_module("nbio" if self.ip_ver[am.GC_HWIP] < (12,0,0) else "nbif", am.NBIO_HWIP))]
mods = [("mp", am.MP0_HWIP), ("hdp", am.HDP_HWIP), ("gc", am.GC_HWIP), ("mmhub", am.MMHUB_HWIP), ("osssys", am.OSSSYS_HWIP),
("nbio" if self.ip_ver[am.GC_HWIP] < (12,0,0) else "nbif", am.NBIO_HWIP)]
for prefix, hwip in mods:
self.__dict__.update(import_asic_regs(prefix, self.ip_ver[hwip], cls=functools.partial(AMRegister, adev=self, bases=self.regs_offset[hwip][0])))
self.__dict__.update(import_asic_regs('mp', (11, 0), cls=functools.partial(AMRegister, adev=self, bases=self.regs_offset[am.MP1_HWIP][0])))
for ip, mod in mods:
self.__dict__.update(collect_registers(mod, cls=functools.partial(AMRegister, adev=self, bases=self.regs_offset[getattr(am, f"{ip}_HWIP")][0])))

View File

@@ -1,7 +1,7 @@
import functools, importlib
import functools, importlib, re, urllib
from collections import defaultdict
from dataclasses import dataclass
from tinygrad.helpers import getbits, round_up
from tinygrad.helpers import getbits, round_up, fetch
from tinygrad.runtime.autogen import pci
from tinygrad.runtime.support.usb import ASM24Controller
@@ -20,16 +20,13 @@ class AMDIP:
name:str; version:tuple[int, ...]; bases:tuple[int, ...] # noqa: E702
@functools.cached_property
def module(self): return import_module(self.name, self.version)
@functools.cached_property
def regs(self): return collect_registers(self.module, cls=functools.partial(AMDReg, bases=self.bases))
def regs(self): return import_asic_regs(self.name, self.version, cls=functools.partial(AMDReg, bases=self.bases))
def __getattr__(self, name:str):
if name in self.regs: return self.regs[name]
# NOTE: gfx10 gc registers always start with mm, no reg prefix
if (mmname:=name.replace('reg', 'mm')) in self.regs: return self.regs[mmname]
return getattr(self.module, name)
return self.regs[name.replace('reg', 'mm')]
def fixup_ip_version(ip:str, version:tuple[int, ...]) -> list[tuple[int, ...]]:
# override versions
@@ -39,19 +36,9 @@ def fixup_ip_version(ip:str, version:tuple[int, ...]) -> list[tuple[int, ...]]:
return version
if ip in ['nbio', 'nbif']: version = _apply_ovrd({(3,3): (2,3,0)})
return [version, version[:2]+(0,), version[:1]+(0, 0)]
elif ip == 'mp': version = _apply_ovrd({(14,0,3): (14,0,2)})
def collect_registers(module, cls=AMDReg) -> dict[str, AMDReg]:
def _split_name(name): return name[:(pos:=next((i for i,c in enumerate(name) if c.isupper()), len(name)))], name[pos:]
offsets = {k:v for k,v in module.__dict__.items() if _split_name(k)[0] in {'reg', 'mm'} and not k.endswith('_BASE_IDX')}
bases = {k[:-len('_BASE_IDX')]:v for k,v in module.__dict__.items() if _split_name(k)[0] in {'reg', 'mm'} and k.endswith('_BASE_IDX')}
fields: defaultdict[str, dict[str, tuple[int, int]]] = defaultdict(dict)
for field_name,field_mask in module.__dict__.items():
if not ('__' in field_name and field_name.endswith('_MASK')): continue
reg_name, reg_field_name = field_name[:-len('_MASK')].split('__')
fields[reg_name][reg_field_name.lower()] = ((field_mask & -field_mask).bit_length()-1, field_mask.bit_length()-1)
# NOTE: Some registers like regGFX_IMU_FUSESTRAP in gc_11_0_0 are missing base idx, just skip them
return {reg:cls(name=reg, offset=off, segment=bases[reg], fields=fields[_split_name(reg)[1]]) for reg,off in offsets.items() if reg in bases}
return [version, version[:2]+(0,), version[:1]+(0, 0)]
def import_module(name:str, version:tuple[int, ...], version_prefix:str=""):
for ver in fixup_ip_version(name, version):
@@ -59,6 +46,35 @@ def import_module(name:str, version:tuple[int, ...], version_prefix:str=""):
except ImportError: pass
raise ImportError(f"Failed to load autogen module for {name.upper()} {'.'.join(map(str, version))}")
def import_asic_regs(prefix:str, version:tuple[int, ...], cls=AMDReg) -> dict[str, AMDReg]:
def _split_name(name): return name[:(pos:=next((i for i,c in enumerate(name) if c.isupper()), len(name)))], name[pos:]
def _extract_regs(txt):
return {m.group(1): int(m.group(2), 0) for line in txt.splitlines() if (m:=re.match(r'#define\s+(\S+)\s+(0x[\da-fA-F]+|\d+)', line))}
def _download_file(ver, suff) -> str:
dir_prefix = {"osssys": "oss"}.get(prefix, prefix)
fetch_name, file_name = f"{prefix}_{'_'.join(map(str, ver))}_{suff}.h", f"{prefix}_{'_'.join(map(str, version))}_{suff}.h"
url = "https://gitlab.com/linux-kernel/linux-next/-/raw/cf6d949a409e09539477d32dbe7c954e4852e744/drivers/gpu/drm/amd/include/asic_reg"
return fetch(f"{url}/{dir_prefix}/{fetch_name}", name=file_name, subdir="asic_regs").read_text()
for ver in fixup_ip_version(prefix, version):
try: offs, sh_masks = _extract_regs(_download_file(ver, "offset")), _extract_regs(_download_file(ver, "sh_mask"))
except urllib.error.HTTPError as e:
if e.code == 404: continue
raise
offsets = {k:v for k,v in offs.items() if _split_name(k)[0] in {'reg', 'mm'} and not k.endswith('_BASE_IDX')}
bases = {k[:-len('_BASE_IDX')]:v for k,v in offs.items() if _split_name(k)[0] in {'reg', 'mm'} and k.endswith('_BASE_IDX')}
fields: defaultdict[str, dict[str, tuple[int, int]]] = defaultdict(dict)
for field_name, field_mask in sh_masks.items():
if not ('__' in field_name and field_name.endswith('_MASK')): continue
reg_name, reg_field_name = field_name[:-len('_MASK')].split('__')
fields[reg_name][reg_field_name.lower()] = ((field_mask & -field_mask).bit_length()-1, field_mask.bit_length()-1)
# NOTE: Some registers like regGFX_IMU_FUSESTRAP in gc_11_0_0 are missing base idx, just skip them
return {reg:cls(name=reg, offset=off, segment=bases[reg], fields=fields[_split_name(reg)[1]]) for reg,off in offsets.items() if reg in bases}
raise ImportError(f"Failed to load ASIC registers for {prefix.upper()} {'.'.join(map(str, version))}")
def setup_pci_bars(usb:ASM24Controller, gpu_bus:int, mem_base:int, pref_mem_base:int) -> dict[int, tuple[int, int]]:
for bus in range(gpu_bus):
# All 3 values must be written at the same time.