mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-08 22:48:25 -05:00
am: download regs (#10419)
* am: download regs * x * linter * mypy * after merge * raise * fixed name * fix * xx * remove * missing reg * missing reg * move to online * ops
This commit is contained in:
122
autogen_stubs.sh
122
autogen_stubs.sh
@@ -349,50 +349,6 @@ generate_am() {
|
||||
-o $BASE/am/soc24.py
|
||||
fixup $BASE/am/soc24.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/mp/mp_13_0_0_offset.h \
|
||||
$AMKERN_INC/asic_reg/mp/mp_13_0_0_sh_mask.h \
|
||||
-o $BASE/am/mp_13_0_0.py
|
||||
fixup $BASE/am/mp_13_0_0.py
|
||||
|
||||
# 14_0_3 reuses 14_0_2
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/mp/mp_14_0_2_offset.h \
|
||||
$AMKERN_INC/asic_reg/mp/mp_14_0_2_sh_mask.h \
|
||||
-o $BASE/am/mp_14_0_3.py
|
||||
fixup $BASE/am/mp_14_0_3.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/mp/mp_11_0_offset.h \
|
||||
$AMKERN_INC/asic_reg/mp/mp_11_0_sh_mask.h \
|
||||
-o $BASE/am/mp_11_0.py
|
||||
fixup $BASE/am/mp_11_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/gc/gc_9_4_3_offset.h \
|
||||
$AMKERN_INC/asic_reg/gc/gc_9_4_3_sh_mask.h \
|
||||
extra/amdpci/overlay/gc_9_4_3.h \
|
||||
-o $BASE/am/gc_9_4_3.py
|
||||
fixup $BASE/am/gc_9_4_3.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/gc/gc_10_3_0_offset.h \
|
||||
$AMKERN_INC/asic_reg/gc/gc_10_3_0_sh_mask.h \
|
||||
-o $BASE/am/gc_10_3_0.py
|
||||
fixup $BASE/am/gc_10_3_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/gc/gc_11_0_0_offset.h \
|
||||
$AMKERN_INC/asic_reg/gc/gc_11_0_0_sh_mask.h \
|
||||
-o $BASE/am/gc_11_0_0.py
|
||||
fixup $BASE/am/gc_11_0_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/gc/gc_12_0_0_offset.h \
|
||||
$AMKERN_INC/asic_reg/gc/gc_12_0_0_sh_mask.h \
|
||||
-o $BASE/am/gc_12_0_0.py
|
||||
fixup $BASE/am/gc_12_0_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
extra/hip_gpu_driver/sdma_registers.h \
|
||||
$AMKERN_AMD/amdgpu/vega10_sdma_pkt_open.h \
|
||||
@@ -414,72 +370,6 @@ generate_am() {
|
||||
-o $BASE/am/sdma_6_0_0.py
|
||||
fixup $BASE/am/sdma_6_0_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/mmhub/mmhub_3_0_0_offset.h \
|
||||
$AMKERN_INC/asic_reg/mmhub/mmhub_3_0_0_sh_mask.h \
|
||||
-o $BASE/am/mmhub_3_0_0.py
|
||||
fixup $BASE/am/mmhub_3_0_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/mmhub/mmhub_3_0_2_offset.h \
|
||||
$AMKERN_INC/asic_reg/mmhub/mmhub_3_0_2_sh_mask.h \
|
||||
-o $BASE/am/mmhub_3_0_2.py
|
||||
fixup $BASE/am/mmhub_3_0_2.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/nbio/nbio_2_3_offset.h \
|
||||
$AMKERN_INC/asic_reg/nbio/nbio_2_3_sh_mask.h \
|
||||
-o $BASE/am/nbio_2_3_0.py
|
||||
fixup $BASE/am/nbio_2_3_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/nbio/nbio_7_2_0_offset.h \
|
||||
$AMKERN_INC/asic_reg/nbio/nbio_7_2_0_sh_mask.h \
|
||||
-o $BASE/am/nbio_7_2_0.py
|
||||
fixup $BASE/am/nbio_7_2_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/mmhub/mmhub_4_1_0_offset.h \
|
||||
$AMKERN_INC/asic_reg/mmhub/mmhub_4_1_0_sh_mask.h \
|
||||
-o $BASE/am/mmhub_4_1_0.py
|
||||
fixup $BASE/am/mmhub_4_1_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/nbio/nbio_4_3_0_offset.h \
|
||||
$AMKERN_INC/asic_reg/nbio/nbio_4_3_0_sh_mask.h \
|
||||
-o $BASE/am/nbio_4_3_0.py
|
||||
fixup $BASE/am/nbio_4_3_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/nbif/nbif_6_3_1_offset.h \
|
||||
$AMKERN_INC/asic_reg/nbif/nbif_6_3_1_sh_mask.h \
|
||||
-o $BASE/am/nbif_6_3_1.py
|
||||
fixup $BASE/am/nbif_6_3_1.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/nbio/nbio_7_9_0_offset.h \
|
||||
$AMKERN_INC/asic_reg/nbio/nbio_7_9_0_sh_mask.h \
|
||||
-o $BASE/am/nbio_7_9_0.py
|
||||
fixup $BASE/am/nbio_7_9_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/nbio/nbio_7_11_0_offset.h \
|
||||
$AMKERN_INC/asic_reg/nbio/nbio_7_11_0_sh_mask.h \
|
||||
-o $BASE/am/nbio_7_11_0.py
|
||||
fixup $BASE/am/nbio_7_11_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/oss/osssys_6_0_0_offset.h \
|
||||
$AMKERN_INC/asic_reg/oss/osssys_6_0_0_sh_mask.h \
|
||||
-o $BASE/am/osssys_6_0_0.py
|
||||
fixup $BASE/am/osssys_6_0_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/oss/osssys_7_0_0_offset.h \
|
||||
$AMKERN_INC/asic_reg/oss/osssys_7_0_0_sh_mask.h \
|
||||
-o $BASE/am/osssys_7_0_0.py
|
||||
fixup $BASE/am/osssys_7_0_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h \
|
||||
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h \
|
||||
@@ -495,18 +385,6 @@ generate_am() {
|
||||
--clang-args="-include stdint.h" \
|
||||
-o $BASE/am/smu_v14_0_3.py
|
||||
fixup $BASE/am/smu_v14_0_3.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/hdp/hdp_6_0_0_offset.h \
|
||||
$AMKERN_INC/asic_reg/hdp/hdp_6_0_0_sh_mask.h \
|
||||
-o $BASE/am/hdp_6_0_0.py
|
||||
fixup $BASE/am/hdp_6_0_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_INC/asic_reg/hdp/hdp_7_0_0_offset.h \
|
||||
$AMKERN_INC/asic_reg/hdp/hdp_7_0_0_sh_mask.h \
|
||||
-o $BASE/am/hdp_7_0_0.py
|
||||
fixup $BASE/am/hdp_7_0_0.py
|
||||
}
|
||||
|
||||
generate_sqtt() {
|
||||
|
||||
15
test/external/external_test_am.py
vendored
15
test/external/external_test_am.py
vendored
@@ -1,7 +1,6 @@
|
||||
import unittest
|
||||
from tinygrad.runtime.support.am.amdev import AMMemoryManager, AMPageTableTraverseContext
|
||||
from tinygrad.runtime.support.am.ip import AM_GMC
|
||||
from tinygrad.runtime.support.amd import import_module
|
||||
from tinygrad.runtime.support.hcq import MMIOInterface
|
||||
from tinygrad.runtime.autogen.am import am
|
||||
from tinygrad.helpers import mv_address
|
||||
@@ -177,19 +176,5 @@ class TestAMPageTable(unittest.TestCase):
|
||||
must_cover_checker(va, sz)
|
||||
not_cover_checker(va, sz)
|
||||
|
||||
class TestAM(unittest.TestCase):
|
||||
def test_imports(self):
|
||||
with self.assertRaises(ImportError): import_module("gc", (7, 0, 0))
|
||||
x = import_module("gc", (11, 0, 0))
|
||||
assert x.__name__ == "tinygrad.runtime.autogen.am.gc_11_0_0"
|
||||
x = import_module("gc", (11, 6, 0))
|
||||
assert x.__name__ == "tinygrad.runtime.autogen.am.gc_11_0_0"
|
||||
x = import_module("gc", (12, 0, 0))
|
||||
assert x.__name__ == "tinygrad.runtime.autogen.am.gc_12_0_0"
|
||||
x = import_module("gc", (10, 3, 0))
|
||||
assert x.__name__ == "tinygrad.runtime.autogen.am.gc_10_3_0"
|
||||
x = import_module("gc", (10, 3, 3))
|
||||
assert x.__name__ == "tinygrad.runtime.autogen.am.gc_10_3_0"
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -128,7 +128,7 @@ class AMDComputeQueue(HWQueue):
|
||||
self.wreg(self.gc.regCOMPUTE_TG_CHUNK_SIZE, 1)
|
||||
for xcc_id in range(self.dev.xccs):
|
||||
with self.pred_exec(xcc_mask=1 << xcc_id):
|
||||
self.wreg(self.gc.regCOMPUTE_CURRENT_LOGIC_XCC_ID, xcc_id)
|
||||
self.wreg(self.dev.regCOMPUTE_CURRENT_LOGIC_XCC_ID, xcc_id)
|
||||
return self
|
||||
|
||||
def spi_config(self, tracing:bool):
|
||||
@@ -873,6 +873,9 @@ class AMDDevice(HCQCompiled):
|
||||
self.sdma = import_module('sdma', min(self.dev_iface.ip_versions[am.SDMA0_HWIP], (6, 0, 0)))
|
||||
self.gc = AMDIP('gc', self.dev_iface.ip_versions[am.GC_HWIP], self.dev_iface.ip_offsets[am.GC_HWIP])
|
||||
|
||||
# Define the regCOMPUTE_CURRENT_LOGIC_XCC_ID register, which is missing from the asic_regs files.
|
||||
if self.target[:2] == (9,4): self.regCOMPUTE_CURRENT_LOGIC_XCC_ID = AMDReg("regCOMPUTE_CURRENT_LOGIC_XCC_ID", 0xe25, 0, {}, self.gc.bases)
|
||||
|
||||
nbio_name = 'nbio' if self.target[0] < 12 else 'nbif'
|
||||
nbio_pad = (0,) if self.target[0] == 9 else ()
|
||||
self.nbio = AMDIP(nbio_name, self.dev_iface.ip_versions[am.NBIF_HWIP], nbio_pad+self.dev_iface.ip_offsets[am.NBIF_HWIP])
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
from __future__ import annotations
|
||||
import ctypes, collections, time, dataclasses, functools, fcntl, os, hashlib
|
||||
from tinygrad.helpers import mv_address, getenv, round_up, DEBUG, temp, fetch
|
||||
from tinygrad.runtime.autogen.am import am, mp_11_0
|
||||
from tinygrad.runtime.autogen.am import am
|
||||
from tinygrad.runtime.support.hcq import MMIOInterface
|
||||
from tinygrad.runtime.support.amd import AMDReg, collect_registers, import_module
|
||||
from tinygrad.runtime.support.amd import AMDReg, import_module, import_asic_regs
|
||||
from tinygrad.runtime.support.allocator import TLSFAllocator
|
||||
from tinygrad.runtime.support.am.ip import AM_SOC, AM_GMC, AM_IH, AM_PSP, AM_SMU, AM_GFX, AM_SDMA
|
||||
|
||||
@@ -383,9 +383,10 @@ class AMDev:
|
||||
def _ip_module(self, prefix:str, hwip, prever_prefix:str=""): return import_module(prefix, self.ip_ver[hwip], prever_prefix)
|
||||
|
||||
def _build_regs(self):
|
||||
mods = [("MP0", self._ip_module("mp", am.MP0_HWIP)), ("HDP", self._ip_module("hdp", am.HDP_HWIP)), ("GC", self._ip_module("gc", am.GC_HWIP)),
|
||||
("MP1", mp_11_0), ("MMHUB", self._ip_module("mmhub", am.MMHUB_HWIP)), ("OSSSYS", self._ip_module("osssys", am.OSSSYS_HWIP)),
|
||||
("NBIO", self._ip_module("nbio" if self.ip_ver[am.GC_HWIP] < (12,0,0) else "nbif", am.NBIO_HWIP))]
|
||||
mods = [("mp", am.MP0_HWIP), ("hdp", am.HDP_HWIP), ("gc", am.GC_HWIP), ("mmhub", am.MMHUB_HWIP), ("osssys", am.OSSSYS_HWIP),
|
||||
("nbio" if self.ip_ver[am.GC_HWIP] < (12,0,0) else "nbif", am.NBIO_HWIP)]
|
||||
|
||||
for prefix, hwip in mods:
|
||||
self.__dict__.update(import_asic_regs(prefix, self.ip_ver[hwip], cls=functools.partial(AMRegister, adev=self, bases=self.regs_offset[hwip][0])))
|
||||
self.__dict__.update(import_asic_regs('mp', (11, 0), cls=functools.partial(AMRegister, adev=self, bases=self.regs_offset[am.MP1_HWIP][0])))
|
||||
|
||||
for ip, mod in mods:
|
||||
self.__dict__.update(collect_registers(mod, cls=functools.partial(AMRegister, adev=self, bases=self.regs_offset[getattr(am, f"{ip}_HWIP")][0])))
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import functools, importlib
|
||||
import functools, importlib, re, urllib
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from tinygrad.helpers import getbits, round_up
|
||||
from tinygrad.helpers import getbits, round_up, fetch
|
||||
from tinygrad.runtime.autogen import pci
|
||||
from tinygrad.runtime.support.usb import ASM24Controller
|
||||
|
||||
@@ -20,16 +20,13 @@ class AMDIP:
|
||||
name:str; version:tuple[int, ...]; bases:tuple[int, ...] # noqa: E702
|
||||
|
||||
@functools.cached_property
|
||||
def module(self): return import_module(self.name, self.version)
|
||||
|
||||
@functools.cached_property
|
||||
def regs(self): return collect_registers(self.module, cls=functools.partial(AMDReg, bases=self.bases))
|
||||
def regs(self): return import_asic_regs(self.name, self.version, cls=functools.partial(AMDReg, bases=self.bases))
|
||||
|
||||
def __getattr__(self, name:str):
|
||||
if name in self.regs: return self.regs[name]
|
||||
|
||||
# NOTE: gfx10 gc registers always start with mm, no reg prefix
|
||||
if (mmname:=name.replace('reg', 'mm')) in self.regs: return self.regs[mmname]
|
||||
return getattr(self.module, name)
|
||||
return self.regs[name.replace('reg', 'mm')]
|
||||
|
||||
def fixup_ip_version(ip:str, version:tuple[int, ...]) -> list[tuple[int, ...]]:
|
||||
# override versions
|
||||
@@ -39,19 +36,9 @@ def fixup_ip_version(ip:str, version:tuple[int, ...]) -> list[tuple[int, ...]]:
|
||||
return version
|
||||
|
||||
if ip in ['nbio', 'nbif']: version = _apply_ovrd({(3,3): (2,3,0)})
|
||||
return [version, version[:2]+(0,), version[:1]+(0, 0)]
|
||||
elif ip == 'mp': version = _apply_ovrd({(14,0,3): (14,0,2)})
|
||||
|
||||
def collect_registers(module, cls=AMDReg) -> dict[str, AMDReg]:
|
||||
def _split_name(name): return name[:(pos:=next((i for i,c in enumerate(name) if c.isupper()), len(name)))], name[pos:]
|
||||
offsets = {k:v for k,v in module.__dict__.items() if _split_name(k)[0] in {'reg', 'mm'} and not k.endswith('_BASE_IDX')}
|
||||
bases = {k[:-len('_BASE_IDX')]:v for k,v in module.__dict__.items() if _split_name(k)[0] in {'reg', 'mm'} and k.endswith('_BASE_IDX')}
|
||||
fields: defaultdict[str, dict[str, tuple[int, int]]] = defaultdict(dict)
|
||||
for field_name,field_mask in module.__dict__.items():
|
||||
if not ('__' in field_name and field_name.endswith('_MASK')): continue
|
||||
reg_name, reg_field_name = field_name[:-len('_MASK')].split('__')
|
||||
fields[reg_name][reg_field_name.lower()] = ((field_mask & -field_mask).bit_length()-1, field_mask.bit_length()-1)
|
||||
# NOTE: Some registers like regGFX_IMU_FUSESTRAP in gc_11_0_0 are missing base idx, just skip them
|
||||
return {reg:cls(name=reg, offset=off, segment=bases[reg], fields=fields[_split_name(reg)[1]]) for reg,off in offsets.items() if reg in bases}
|
||||
return [version, version[:2]+(0,), version[:1]+(0, 0)]
|
||||
|
||||
def import_module(name:str, version:tuple[int, ...], version_prefix:str=""):
|
||||
for ver in fixup_ip_version(name, version):
|
||||
@@ -59,6 +46,35 @@ def import_module(name:str, version:tuple[int, ...], version_prefix:str=""):
|
||||
except ImportError: pass
|
||||
raise ImportError(f"Failed to load autogen module for {name.upper()} {'.'.join(map(str, version))}")
|
||||
|
||||
def import_asic_regs(prefix:str, version:tuple[int, ...], cls=AMDReg) -> dict[str, AMDReg]:
|
||||
def _split_name(name): return name[:(pos:=next((i for i,c in enumerate(name) if c.isupper()), len(name)))], name[pos:]
|
||||
def _extract_regs(txt):
|
||||
return {m.group(1): int(m.group(2), 0) for line in txt.splitlines() if (m:=re.match(r'#define\s+(\S+)\s+(0x[\da-fA-F]+|\d+)', line))}
|
||||
def _download_file(ver, suff) -> str:
|
||||
dir_prefix = {"osssys": "oss"}.get(prefix, prefix)
|
||||
fetch_name, file_name = f"{prefix}_{'_'.join(map(str, ver))}_{suff}.h", f"{prefix}_{'_'.join(map(str, version))}_{suff}.h"
|
||||
url = "https://gitlab.com/linux-kernel/linux-next/-/raw/cf6d949a409e09539477d32dbe7c954e4852e744/drivers/gpu/drm/amd/include/asic_reg"
|
||||
return fetch(f"{url}/{dir_prefix}/{fetch_name}", name=file_name, subdir="asic_regs").read_text()
|
||||
|
||||
for ver in fixup_ip_version(prefix, version):
|
||||
try: offs, sh_masks = _extract_regs(_download_file(ver, "offset")), _extract_regs(_download_file(ver, "sh_mask"))
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 404: continue
|
||||
raise
|
||||
|
||||
offsets = {k:v for k,v in offs.items() if _split_name(k)[0] in {'reg', 'mm'} and not k.endswith('_BASE_IDX')}
|
||||
bases = {k[:-len('_BASE_IDX')]:v for k,v in offs.items() if _split_name(k)[0] in {'reg', 'mm'} and k.endswith('_BASE_IDX')}
|
||||
|
||||
fields: defaultdict[str, dict[str, tuple[int, int]]] = defaultdict(dict)
|
||||
for field_name, field_mask in sh_masks.items():
|
||||
if not ('__' in field_name and field_name.endswith('_MASK')): continue
|
||||
reg_name, reg_field_name = field_name[:-len('_MASK')].split('__')
|
||||
fields[reg_name][reg_field_name.lower()] = ((field_mask & -field_mask).bit_length()-1, field_mask.bit_length()-1)
|
||||
|
||||
# NOTE: Some registers like regGFX_IMU_FUSESTRAP in gc_11_0_0 are missing base idx, just skip them
|
||||
return {reg:cls(name=reg, offset=off, segment=bases[reg], fields=fields[_split_name(reg)[1]]) for reg,off in offsets.items() if reg in bases}
|
||||
raise ImportError(f"Failed to load ASIC registers for {prefix.upper()} {'.'.join(map(str, version))}")
|
||||
|
||||
def setup_pci_bars(usb:ASM24Controller, gpu_bus:int, mem_base:int, pref_mem_base:int) -> dict[int, tuple[int, int]]:
|
||||
for bus in range(gpu_bus):
|
||||
# All 3 values must be written at the same time.
|
||||
|
||||
Reference in New Issue
Block a user