mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
amd: fix ip offsets (#12867)
This commit is contained in:
@@ -15,7 +15,7 @@ from tinygrad.runtime.autogen.am import am
|
||||
from tinygrad.runtime.support.compiler_amd import HIPCompiler, AMDLLVMCompiler
|
||||
from tinygrad.runtime.support.elf import elf_loader
|
||||
from tinygrad.runtime.support.am.amdev import AMDev, AMMemoryManager
|
||||
from tinygrad.runtime.support.amd import AMDReg, AMDIP, import_module, import_soc, setup_pci_bars
|
||||
from tinygrad.runtime.support.amd import AMDReg, AMDIP, import_module, import_soc, import_ip_offsets, setup_pci_bars
|
||||
from tinygrad.runtime.support.system import System, PCIIfaceBase, PCIAllocationMeta, MAP_FIXED, MAP_NORESERVE
|
||||
from tinygrad.runtime.support.usb import ASM24Controller, USBMMIOInterface
|
||||
if getenv("IOCTL"): import extra.hip_gpu_driver.hip_ioctl # noqa: F401 # pylint: disable=unused-import
|
||||
@@ -559,8 +559,6 @@ class KFDIface:
|
||||
id2ip = {am.GC_HWID: am.GC_HWIP, am.SDMA0_HWID: am.SDMA0_HWIP, am.NBIF_HWID: am.NBIF_HWIP}
|
||||
ip_hw = [(id2ip[int(hwid)], int(hwid)) for hwid in FileIOInterface(ip_base).listdir() if hwid.isnumeric() and int(hwid) in id2ip]
|
||||
self.ip_versions = {ip:tuple(int(FileIOInterface(f'{ip_base}/{hw}/0/{part}').read()) for part in ['major','minor','revision']) for ip,hw in ip_hw}
|
||||
self.ip_offsets = {ip:{int(i):tuple(int(x, 16) for x in FileIOInterface(f'{ip_base}/{hw}/{i}/base_addr').read().splitlines())
|
||||
for i in FileIOInterface(f'{ip_base}/{hw}').listdir()} for ip,hw in ip_hw }
|
||||
self.drm_fd = FileIOInterface(f"/dev/dri/renderD{self.props['drm_render_minor']}", os.O_RDWR)
|
||||
|
||||
self.kfd_ver = ((ver_st:=kfd.AMDKFD_IOC_GET_VERSION(KFDIface.kfd)).major_version, ver_st.minor_version)
|
||||
@@ -676,7 +674,7 @@ class PCIIface(PCIIfaceBase):
|
||||
|
||||
def _setup_adev(self, name, vram:MMIOInterface, doorbell:MMIOInterface, mmio:MMIOInterface, dma_regions:list[tuple[int, MMIOInterface]]|None=None):
|
||||
self.dev_impl:AMDev = AMDev(name, vram, doorbell, mmio, dma_regions)
|
||||
self.ip_offsets, self.ip_versions = self.dev_impl.regs_offset, self.dev_impl.ip_ver
|
||||
self.ip_versions = self.dev_impl.ip_ver
|
||||
|
||||
gfxver = int(f"{self.dev_impl.ip_ver[am.GC_HWIP][0]:02d}{self.dev_impl.ip_ver[am.GC_HWIP][1]:02d}{self.dev_impl.ip_ver[am.GC_HWIP][2]:02d}")
|
||||
array_count = self.dev_impl.gc_info.gc_num_sa_per_se * self.dev_impl.gc_info.gc_num_se
|
||||
@@ -776,14 +774,15 @@ class AMDDevice(HCQCompiled):
|
||||
debug_memory_size = round_up((self.max_cu_id + 1 if self.target >= (10,1,0) else 1) * (self.max_wave_id + 1) * 32, 64)
|
||||
if self.target[0] == 10: ctl_stack_size = min(ctl_stack_size, 0x7000)
|
||||
|
||||
self.ip_off = import_ip_offsets(self.target)
|
||||
self.soc = import_soc(self.target)
|
||||
self.pm4 = importlib.import_module(f"tinygrad.runtime.autogen.am.pm4_{'nv' if self.target[0] >= 10 else 'soc15'}")
|
||||
self.sdma = import_module('sdma', min(self.iface.ip_versions[am.SDMA0_HWIP], (6, 0, 0)))
|
||||
self.gc = AMDIP('gc', self.iface.ip_versions[am.GC_HWIP], self.iface.ip_offsets[am.GC_HWIP])
|
||||
self.gc = AMDIP('gc', self.iface.ip_versions[am.GC_HWIP],
|
||||
bases={i: tuple(getattr(self.ip_off, f'GC_BASE__INST{i}_SEG{s}', 0) for s in range(6)) for i in range(6)})
|
||||
|
||||
nbio_name = 'nbio' if self.target[0] < 12 else 'nbif'
|
||||
nbio_pad = (0,) if self.target[0] == 9 else ()
|
||||
self.nbio = AMDIP(nbio_name, self.iface.ip_versions[am.NBIF_HWIP], {i:nbio_pad+x for i,x in self.iface.ip_offsets[am.NBIF_HWIP].items()})
|
||||
self.nbio = AMDIP('nbio' if self.target[0] < 12 else 'nbif', self.iface.ip_versions[am.NBIF_HWIP],
|
||||
bases={i: tuple(getattr(self.ip_off, f'NBIO_BASE__INST{i}_SEG{s}', 0) for s in range(9)) for i in range(6)})
|
||||
|
||||
self.is_aql = getenv("AMD_AQL", int(self.xccs > 1))
|
||||
if self.is_aql:
|
||||
|
||||
@@ -49,7 +49,9 @@ def header_download(file, name=None, subdir="defines", url=None) -> str:
|
||||
|
||||
def import_header(path:str, url=None):
|
||||
t = re.sub(r'//.*|/\*.*?\*/','', header_download(path, subdir="defines", url=url), flags=re.S)
|
||||
return {k:int(v,0) for k,v in re.findall(r'\b([A-Za-z_]\w*)\s*=\s*(0x[0-9A-Fa-f]+|\d+)', t)}
|
||||
# TODO: refactor when clang2py is replaced
|
||||
return {k:int(v,0) for k,v in re.findall(r'\b([A-Za-z_]\w*)\s*=\s*(0x[0-9A-Fa-f]+|\d+)', t) + \
|
||||
re.findall(r'^\s*#\s*define\s+([A-Za-z_0-9]\w*)\s+(0x[0-9A-Fa-f]+|\d+)', t, re.M)}
|
||||
|
||||
def import_module(name:str, version:tuple[int, ...], version_prefix:str=""):
|
||||
for ver in fixup_ip_version(name, version):
|
||||
@@ -62,6 +64,8 @@ def import_soc(ip):
|
||||
url = "https://raw.githubusercontent.com/ROCm/rocm-systems/cccc350dc620e61ae2554978b62ab3532dc10bd9/projects"
|
||||
return type("SOC", (object,), import_header(f"aqlprofile/linux/{({9: 'vega10', 10: 'navi10', 11: 'soc21', 12: 'soc24'}[ip[0]])}_enum.h", url=url))
|
||||
|
||||
def import_ip_offsets(ip): return type("IPOFF", (object,), import_header(f"include/{('sienna_cichlid' if ip[0] > 9 else 'vega20')}_ip_offset.h"))
|
||||
|
||||
def import_asic_regs(prefix:str, version:tuple[int, ...], cls=AMDReg) -> dict[str, AMDReg]:
|
||||
def _split_name(name): return name[:(pos:=next((i for i,c in enumerate(name) if c.isupper()), len(name)))], name[pos:]
|
||||
def _extract_regs(txt):
|
||||
|
||||
Reference in New Issue
Block a user