am: reset mi300 with pm4 (#14727)

This commit is contained in:
nimlgen
2026-02-13 11:22:32 +03:00
committed by GitHub
parent c0de4f75b1
commit ba67425680
2 changed files with 3 additions and 2 deletions

View File

@@ -193,7 +193,7 @@ class AMDev(PCIDevImplBase):
if DEBUG >= 2: print(f"am {self.devfmt}: boot done")
def init_sw(self, smi_dev=False):
self.smi_dev, self.is_err_state = smi_dev, False
self.smi_dev, self.is_err_state, self.has_aql_queue = smi_dev, False, False
# Memory manager & firmware
self.mm = AMMemoryManager(self, self.vram_size - self.reserved_vram_size, boot_size=(32 << 20), pt_t=AMPageTableEntry, va_shifts=[12, 21, 30, 39],
@@ -226,7 +226,7 @@ class AMDev(PCIDevImplBase):
self.reg("regSCRATCH_REG6").write(self.is_err_state) # set finalized state.
def recover(self) -> bool:
if self.is_hive() or not self.is_err_state: return False # TODO: support mi300
if (self.has_aql_queue and self.is_hive()) or not self.is_err_state: return False # TODO: support aql queue recovery on hive
if DEBUG >= 2: print(f"am {self.devfmt}: Start recovery")
self.ih.interrupt_handler()
self.gfx.reset_mec()

View File

@@ -291,6 +291,7 @@ class AM_GFX(AM_IP):
self._enable_mec()
def setup_ring(self, ring_addr:int, ring_size:int, rptr_addr:int, wptr_addr:int, eop_addr:int, eop_size:int, idx:int, aql:bool) -> tuple[int, int]:
self.adev.has_aql_queue |= aql
pipe, queue, doorbell = idx // 4, idx % 4, am.AMDGPU_NAVI10_DOORBELL_MEC_RING0
self._grbm_select(me=1, pipe=pipe, queue=queue, inst=0)
restore_queue = aql and self.xccs > 1 and self.adev.partial_boot and (self.adev.regCP_HQD_ACTIVE.read(inst=0) & 1)