diff --git a/tinygrad/runtime/ops_amd.py b/tinygrad/runtime/ops_amd.py index 010d69c117..c7e3935cbf 100644 --- a/tinygrad/runtime/ops_amd.py +++ b/tinygrad/runtime/ops_amd.py @@ -441,7 +441,7 @@ class PCIIface: iommu_group = HWInterface.readlink(f"/sys/bus/pci/devices/{self.pcibus}/iommu_group").split('/')[-1] except OSError: - if DEBUG >= 1: print("AM: failed to init vfio-pci module (not inserted or no-iommu mode is not supported).") + if DEBUG >= 1: print(f"am {self.pcibus}: failed to init vfio-pci module (not inserted or no-iommu mode is not supported).") PCIIface.vfio = False # Init vfio for the device @@ -464,7 +464,7 @@ class PCIIface: self.pagemap = HWInterface("/proc/self/pagemap", os.O_RDONLY) self.bar_fds = {bar: HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource{bar}", os.O_RDWR | os.O_SYNC) for bar in [0, 2, 5]} - self.adev = AMDev(self.pcidev, self._map_pci_range(0), dbell:=self._map_pci_range(2).cast('Q'), self._map_pci_range(5).cast('I')) + self.adev = AMDev(self.pcidev, self.pcibus, self._map_pci_range(0), dbell:=self._map_pci_range(2).cast('Q'), self._map_pci_range(5).cast('I')) self.doorbell_cpu_addr = mv_address(dbell) libpciaccess.pci_device_cfg_read_u16(self.adev.pcidev, ctypes.byref(val:=ctypes.c_uint16()), libpciaccess.PCI_COMMAND) diff --git a/tinygrad/runtime/support/am/amdev.py b/tinygrad/runtime/support/am/amdev.py index c02d4496ec..a070ba0b99 100644 --- a/tinygrad/runtime/support/am/amdev.py +++ b/tinygrad/runtime/support/am/amdev.py @@ -1,6 +1,6 @@ from __future__ import annotations import ctypes, collections, time, dataclasses, pathlib -from tinygrad.helpers import to_mv, mv_address, getenv, round_up +from tinygrad.helpers import to_mv, mv_address, getenv, round_up, DEBUG from tinygrad.runtime.autogen.am import am, mp_11_0, mp_13_0_0, nbio_4_3_0, mmhub_3_0_0, gc_11_0_0, osssys_6_0_0 from tinygrad.runtime.support.allocator import TLSFAllocator from tinygrad.runtime.support.am.ip import AM_SOC21, AM_GMC, AM_IH, AM_PSP, AM_SMU, AM_GFX, AM_SDMA @@ -251,8 +251,8 @@ class AMMemoryManager: def pfree(self, pm:AMPhysicalMemoryBlock): self.pa_allocator.free(pm.paddr) class AMDev: - def __init__(self, pcidev, vram_bar:memoryview, doorbell_bar:memoryview, mmio_bar:memoryview): - self.pcidev = pcidev + def __init__(self, pcidev, devfmt, vram_bar:memoryview, doorbell_bar:memoryview, mmio_bar:memoryview): + self.pcidev, self.devfmt = pcidev, devfmt self.vram, self.doorbell64, self.mmio = vram_bar, doorbell_bar, mmio_bar self._run_discovery() @@ -267,7 +267,7 @@ class AMDev: # To enable this, AM uses a separate boot memory that is guaranteed not to be overwritten. This physical memory is utilized for # all blocks that are initialized only during the initial AM boot. # To determine if the GPU is in the third state, AM uses regSCRATCH_REG7 as a flag. - self.is_booting = True + self.is_booting = True # During boot only boot memory can be allocated. This flag is to validate this. self.partial_boot = (self.reg("regSCRATCH_REG7").read() == (am_version:=0xA0000001)) and (getenv("AM_RESET", 0) != 1) # Memory manager & firmware @@ -284,20 +284,26 @@ class AMDev: self.sdma:AM_SDMA = AM_SDMA(self) if self.partial_boot and (self.reg("regCP_MEC_RS64_CNTL").read() & gc_11_0_0.CP_MEC_RS64_CNTL__MEC_HALT_MASK == 0): - print("am: MEC is active. Someone might be using the GPU? Issue a full reset.") + print(f"am {self.devfmt}: MEC is active. Someone might be using the GPU? Issue a full reset.") self.partial_boot = False if not self.partial_boot: if self.psp.is_sos_alive(): self.smu.mode1_reset() - for ip in [self.soc21, self.gmc, self.ih, self.psp, self.smu]: ip.init() + for ip in [self.soc21, self.gmc, self.ih, self.psp, self.smu]: + ip.init() + if DEBUG >= 2: print(f"am {self.devfmt}: {ip.__class__.__name__} initialized") # Booting done self.is_booting = False # Re-initialize main blocks - for ip in [self.gfx, self.sdma]: ip.init() + for ip in [self.gfx, self.sdma]: + ip.init() + if DEBUG >= 2: print(f"am {self.devfmt}: {ip.__class__.__name__} initialized") + self.gfx.set_clockgating_state() self.reg("regSCRATCH_REG7").write(am_version) + if DEBUG >= 2: print(f"am {self.devfmt}: boot done") def fini(self): for ip in [self.sdma, self.gfx]: ip.fini() @@ -308,12 +314,12 @@ class AMDev: def rreg(self, reg:int) -> int: val = self.indirect_rreg(reg * 4) if reg > len(self.mmio) else self.mmio[reg] - if AM_DEBUG >= 4 and getattr(self, '_prev_rreg', None) != (reg, val): print(f"Reading register {reg:#x} with value {val:#x}") + if AM_DEBUG >= 4 and getattr(self, '_prev_rreg', None) != (reg, val): print(f"am {self.devfmt}: Reading register {reg:#x} with value {val:#x}") self._prev_rreg = (reg, val) return val def wreg(self, reg:int, val:int): - if AM_DEBUG >= 4: print(f"Writing register {reg:#x} with value {val:#x}") + if AM_DEBUG >= 4: print(f"am {self.devfmt}: Writing register {reg:#x} with value {val:#x}") if reg > len(self.mmio): self.indirect_wreg(reg * 4, val) else: self.mmio[reg] = val diff --git a/tinygrad/runtime/support/am/ip.py b/tinygrad/runtime/support/am/ip.py index 6e436eb084..65cd8fa550 100644 --- a/tinygrad/runtime/support/am/ip.py +++ b/tinygrad/runtime/support/am/ip.py @@ -1,7 +1,7 @@ import ctypes, time from typing import Literal from tinygrad.runtime.autogen.am import am, smu_v13_0_0 -from tinygrad.helpers import to_mv, data64, lo32, hi32 +from tinygrad.helpers import to_mv, data64, lo32, hi32, DEBUG class AM_IP: def __init__(self, adev): self.adev = adev @@ -107,6 +107,7 @@ class AM_SMU(AM_IP): self._smu_cmn_send_smc_msg_with_param(smu_v13_0_0.PPSMC_MSG_SetSoftMaxByFreq, clck, poll=True) def mode1_reset(self): + if DEBUG >= 2: print(f"am {self.adev.devfmt}: mode1 reset") self._smu_cmn_send_smc_msg_with_param(smu_v13_0_0.PPSMC_MSG_Mode1Reset, 0, poll=True) time.sleep(0.5) # 500ms @@ -394,6 +395,7 @@ class AM_PSP(AM_IP): return cmd def _load_ip_fw_cmd(self, psp_desc): + if DEBUG >= 2: print(f"am {self.adev.devfmt}: loading fw: {am.psp_gfx_fw_type__enumvalues[psp_desc[0]]}") fw_type, fw_bytes = psp_desc self._prep_msg1(fw_bytes)