From 3f25eb302655109666a5fed3c2c9b2995ddbde49 Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Mon, 26 Jan 2026 20:11:04 +0300 Subject: [PATCH] am: ih (#14346) * am: ih * um * fix * line * no trap and fix ring * keep * fix --- tinygrad/runtime/autogen/am/__init__.py | 9 +- tinygrad/runtime/autogen/am/am.py | 127 +++++++++++++++++++++++- tinygrad/runtime/ops_amd.py | 3 +- tinygrad/runtime/support/am/amdev.py | 2 +- tinygrad/runtime/support/am/ip.py | 46 +++++++-- 5 files changed, 168 insertions(+), 19 deletions(-) diff --git a/tinygrad/runtime/autogen/am/__init__.py b/tinygrad/runtime/autogen/am/__init__.py index 0b281a35e9..894df019d4 100644 --- a/tinygrad/runtime/autogen/am/__init__.py +++ b/tinygrad/runtime/autogen/am/__init__.py @@ -1,14 +1,15 @@ from tinygrad.runtime.autogen import load, root am_src="https://github.com/ROCm/ROCK-Kernel-Driver/archive/ceb12c04e2b5b53ec0779362831f5ee40c4921e4.tar.gz" -AMD="{}/drivers/gpu/drm/amd" -inc = ["-include", "stdint.h"] +AMD, AMDINC = "{}/drivers/gpu/drm/amd", "{}/drivers/gpu/drm/amd/include" +inc, kern_rules = ["-include", "stdint.h"], [(r'le32_to_cpu', ''),] def __getattr__(nm): match nm: case "am": return load("am/am", [], [root/f"extra/amdpci/headers/{s}.h" for s in ["v11_structs", "v12_structs", "amdgpu_vm", - "discovery", "amdgpu_ucode", "psp_gfx_if", "amdgpu_psp", "amdgpu_irq", "amdgpu_doorbell"]] + \ - [f"{AMD}/include/{s}.h" for s in ["v9_structs", "soc15_ih_clientid"]], args=inc, tarball=am_src) + "discovery", "amdgpu_ucode", "psp_gfx_if", "amdgpu_psp", "amdgpu_irq", "amdgpu_doorbell"]] + [f"{AMD}/amdkfd/soc15_int.h"] + \ + [f"{AMDINC}/ivsrcid/{s}.h" for s in [f"gfx/irqsrcs_gfx_{x}_0" for x in ('9','11_0','12_0')] + [f"sdma0/irqsrcs_sdma0_{x}_0" for x in (4,5)]] + \ + [f"{AMDINC}/{s}.h" for s in ["v9_structs", "soc15_ih_clientid"]], args=inc, tarball=am_src, rules=kern_rules) case "pm4_soc15": return load("am/pm4_soc15", [], [f"{AMD}/amdkfd/kfd_pm4_headers_ai.h", f"{AMD}/amdgpu/soc15d.h"], tarball=am_src) case "pm4_nv": return load("am/pm4_nv", [], [f"{AMD}/amdkfd/kfd_pm4_headers_ai.h", f"{AMD}/amdgpu/nvd.h"], tarball=am_src) case "sdma_4_0_0": return load("am/sdma_4_0_0", [], [root/"extra/hip_gpu_driver/sdma_registers.h", f"{AMD}/amdgpu/vega10_sdma_pkt_open.h"], diff --git a/tinygrad/runtime/autogen/am/am.py b/tinygrad/runtime/autogen/am/am.py index 94c1a6aa1b..67383c91de 100644 --- a/tinygrad/runtime/autogen/am/am.py +++ b/tinygrad/runtime/autogen/am/am.py @@ -4796,4 +4796,129 @@ AMDGPU_MAX_IRQ_SRC_ID = 0x100 # type: ignore AMDGPU_MAX_IRQ_CLIENT_ID = 0x100 # type: ignore AMDGPU_IRQ_CLIENTID_LEGACY = 0 # type: ignore AMDGPU_IRQ_CLIENTID_MAX = SOC15_IH_CLIENTID_MAX # type: ignore -AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW = 4 # type: ignore \ No newline at end of file +AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW = 4 # type: ignore +SOC15_INTSRC_CP_END_OF_PIPE = 181 # type: ignore +SOC15_INTSRC_CP_BAD_OPCODE = 183 # type: ignore +SOC15_INTSRC_SQ_INTERRUPT_MSG = 239 # type: ignore +SOC15_INTSRC_VMC_FAULT = 0 # type: ignore +SOC15_INTSRC_VMC_UTCL2_POISON = 1 # type: ignore +SOC15_INTSRC_SDMA_TRAP = 224 # type: ignore +SOC15_INTSRC_SDMA_ECC = 220 # type: ignore +SOC21_INTSRC_SDMA_TRAP = 49 # type: ignore +SOC21_INTSRC_SDMA_ECC = 62 # type: ignore +SOC15_CLIENT_ID_FROM_IH_ENTRY = lambda entry: ((entry[0]) & 0xff) # type: ignore +SOC15_SOURCE_ID_FROM_IH_ENTRY = lambda entry: ((entry[0]) >> 8 & 0xff) # type: ignore +SOC15_RING_ID_FROM_IH_ENTRY = lambda entry: ((entry[0]) >> 16 & 0xff) # type: ignore +SOC15_VMID_FROM_IH_ENTRY = lambda entry: ((entry[0]) >> 24 & 0xf) # type: ignore +SOC15_VMID_TYPE_FROM_IH_ENTRY = lambda entry: ((entry[0]) >> 31 & 0x1) # type: ignore +SOC15_PASID_FROM_IH_ENTRY = lambda entry: ((entry[3]) & 0xffff) # type: ignore +SOC15_NODEID_FROM_IH_ENTRY = lambda entry: ((entry[3]) >> 16 & 0xff) # type: ignore +SOC15_CONTEXT_ID0_FROM_IH_ENTRY = lambda entry: ((entry[4])) # type: ignore +SOC15_CONTEXT_ID1_FROM_IH_ENTRY = lambda entry: ((entry[5])) # type: ignore +SOC15_CONTEXT_ID2_FROM_IH_ENTRY = lambda entry: ((entry[6])) # type: ignore +SOC15_CONTEXT_ID3_FROM_IH_ENTRY = lambda entry: ((entry[7])) # type: ignore +GFX_9_0__SRCID__CP_RB_INTERRUPT_PKT = 176 # type: ignore +GFX_9_0__SRCID__CP_IB1_INTERRUPT_PKT = 177 # type: ignore +GFX_9_0__SRCID__CP_IB2_INTERRUPT_PKT = 178 # type: ignore +GFX_9_0__SRCID__CP_PM4_PKT_RSVD_BIT_ERROR = 180 # type: ignore +GFX_9_0__SRCID__CP_EOP_INTERRUPT = 181 # type: ignore +GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR = 183 # type: ignore +GFX_9_0__SRCID__CP_PRIV_REG_FAULT = 184 # type: ignore +GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT = 185 # type: ignore +GFX_9_0__SRCID__CP_WAIT_MEM_SEM_FAULT = 186 # type: ignore +GFX_9_0__SRCID__CP_CTX_EMPTY_INTERRUPT = 187 # type: ignore +GFX_9_0__SRCID__CP_CTX_BUSY_INTERRUPT = 188 # type: ignore +GFX_9_0__SRCID__CP_ME_WAIT_REG_MEM_POLL_TIMEOUT = 192 # type: ignore +GFX_9_0__SRCID__CP_SIG_INCOMPLETE = 193 # type: ignore +GFX_9_0__SRCID__CP_PREEMPT_ACK = 194 # type: ignore +GFX_9_0__SRCID__CP_GPF = 195 # type: ignore +GFX_9_0__SRCID__CP_GDS_ALLOC_ERROR = 196 # type: ignore +GFX_9_0__SRCID__CP_ECC_ERROR = 197 # type: ignore +GFX_9_0__SRCID__CP_COMPUTE_QUERY_STATUS = 199 # type: ignore +GFX_9_0__SRCID__CP_VM_DOORBELL = 200 # type: ignore +GFX_9_0__SRCID__CP_FUE_ERROR = 201 # type: ignore +GFX_9_0__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT = 202 # type: ignore +GFX_9_0__SRCID__GRBM_RD_TIMEOUT_ERROR = 232 # type: ignore +GFX_9_0__SRCID__GRBM_REG_GUI_IDLE = 233 # type: ignore +GFX_9_0__SRCID__SQ_INTERRUPT_ID = 239 # type: ignore +GFX_11_0_0__SRCID__UTCL2_FAULT = 0 # type: ignore +GFX_11_0_0__SRCID__UTCL2_DATA_POISONING = 1 # type: ignore +GFX_11_0_0__SRCID__MEM_ACCES_MON = 10 # type: ignore +GFX_11_0_0__SRCID__SDMA_ATOMIC_RTN_DONE = 48 # type: ignore +GFX_11_0_0__SRCID__SDMA_TRAP = 49 # type: ignore +GFX_11_0_0__SRCID__SDMA_SRBMWRITE = 50 # type: ignore +GFX_11_0_0__SRCID__SDMA_CTXEMPTY = 51 # type: ignore +GFX_11_0_0__SRCID__SDMA_PREEMPT = 52 # type: ignore +GFX_11_0_0__SRCID__SDMA_IB_PREEMPT = 53 # type: ignore +GFX_11_0_0__SRCID__SDMA_DOORBELL_INVALID = 54 # type: ignore +GFX_11_0_0__SRCID__SDMA_QUEUE_HANG = 55 # type: ignore +GFX_11_0_0__SRCID__SDMA_ATOMIC_TIMEOUT = 56 # type: ignore +GFX_11_0_0__SRCID__SDMA_POLL_TIMEOUT = 57 # type: ignore +GFX_11_0_0__SRCID__SDMA_PAGE_TIMEOUT = 58 # type: ignore +GFX_11_0_0__SRCID__SDMA_PAGE_NULL = 59 # type: ignore +GFX_11_0_0__SRCID__SDMA_PAGE_FAULT = 60 # type: ignore +GFX_11_0_0__SRCID__SDMA_VM_HOLE = 61 # type: ignore +GFX_11_0_0__SRCID__SDMA_ECC = 62 # type: ignore +GFX_11_0_0__SRCID__SDMA_FROZEN = 63 # type: ignore +GFX_11_0_0__SRCID__SDMA_SRAM_ECC = 64 # type: ignore +GFX_11_0_0__SRCID__SDMA_SEM_INCOMPLETE_TIMEOUT = 65 # type: ignore +GFX_11_0_0__SRCID__SDMA_SEM_WAIT_FAIL_TIMEOUT = 66 # type: ignore +GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT = 128 # type: ignore +GFX_11_0_0__SRCID__CP_GENERIC_INT = 177 # type: ignore +GFX_11_0_0__SRCID__CP_PM4_PKT_RSVD_BIT_ERROR = 180 # type: ignore +GFX_11_0_0__SRCID__CP_EOP_INTERRUPT = 181 # type: ignore +GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR = 183 # type: ignore +GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT = 184 # type: ignore +GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT = 185 # type: ignore +GFX_11_0_0__SRCID__CP_WAIT_MEM_SEM_FAULT = 186 # type: ignore +GFX_11_0_0__SRCID__CP_CTX_EMPTY_INTERRUPT = 187 # type: ignore +GFX_11_0_0__SRCID__CP_CTX_BUSY_INTERRUPT = 188 # type: ignore +GFX_11_0_0__SRCID__CP_ME_WAIT_REG_MEM_POLL_TIMEOUT = 192 # type: ignore +GFX_11_0_0__SRCID__CP_SIG_INCOMPLETE = 193 # type: ignore +GFX_11_0_0__SRCID__CP_PREEMPT_ACK = 194 # type: ignore +GFX_11_0_0__SRCID__CP_GPF = 195 # type: ignore +GFX_11_0_0__SRCID__CP_GDS_ALLOC_ERROR = 196 # type: ignore +GFX_11_0_0__SRCID__CP_ECC_ERROR = 197 # type: ignore +GFX_11_0_0__SRCID__CP_COMPUTE_QUERY_STATUS = 199 # type: ignore +GFX_11_0_0__SRCID__CP_VM_DOORBELL = 200 # type: ignore +GFX_11_0_0__SRCID__CP_FUE_ERROR = 201 # type: ignore +GFX_11_0_0__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT = 202 # type: ignore +GFX_11_0_0__SRCID__GRBM_RD_TIMEOUT_ERROR = 232 # type: ignore +GFX_11_0_0__SRCID__GRBM_REG_GUI_IDLE = 233 # type: ignore +GFX_11_0_0__SRCID__SQ_INTERRUPT_ID = 239 # type: ignore +SDMA0_4_0__SRCID__SDMA_ATOMIC_RTN_DONE = 217 # type: ignore +SDMA0_4_0__SRCID__SDMA_ATOMIC_TIMEOUT = 218 # type: ignore +SDMA0_4_0__SRCID__SDMA_IB_PREEMPT = 219 # type: ignore +SDMA0_4_0__SRCID__SDMA_ECC = 220 # type: ignore +SDMA0_4_0__SRCID__SDMA_PAGE_FAULT = 221 # type: ignore +SDMA0_4_0__SRCID__SDMA_PAGE_NULL = 222 # type: ignore +SDMA0_4_0__SRCID__SDMA_XNACK = 223 # type: ignore +SDMA0_4_0__SRCID__SDMA_TRAP = 224 # type: ignore +SDMA0_4_0__SRCID__SDMA_SEM_INCOMPLETE_TIMEOUT = 225 # type: ignore +SDMA0_4_0__SRCID__SDMA_SEM_WAIT_FAIL_TIMEOUT = 226 # type: ignore +SDMA0_4_0__SRCID__SDMA_SRAM_ECC = 228 # type: ignore +SDMA0_4_0__SRCID__SDMA_PREEMPT = 240 # type: ignore +SDMA0_4_0__SRCID__SDMA_VM_HOLE = 242 # type: ignore +SDMA0_4_0__SRCID__SDMA_CTXEMPTY = 243 # type: ignore +SDMA0_4_0__SRCID__SDMA_DOORBELL_INVALID = 244 # type: ignore +SDMA0_4_0__SRCID__SDMA_FROZEN = 245 # type: ignore +SDMA0_4_0__SRCID__SDMA_POLL_TIMEOUT = 246 # type: ignore +SDMA0_4_0__SRCID__SDMA_SRBMWRITE = 247 # type: ignore +SDMA0_5_0__SRCID__SDMA_ATOMIC_RTN_DONE = 217 # type: ignore +SDMA0_5_0__SRCID__SDMA_ATOMIC_TIMEOUT = 218 # type: ignore +SDMA0_5_0__SRCID__SDMA_IB_PREEMPT = 219 # type: ignore +SDMA0_5_0__SRCID__SDMA_ECC = 220 # type: ignore +SDMA0_5_0__SRCID__SDMA_PAGE_FAULT = 221 # type: ignore +SDMA0_5_0__SRCID__SDMA_PAGE_NULL = 222 # type: ignore +SDMA0_5_0__SRCID__SDMA_XNACK = 223 # type: ignore +SDMA0_5_0__SRCID__SDMA_TRAP = 224 # type: ignore +SDMA0_5_0__SRCID__SDMA_SEM_INCOMPLETE_TIMEOUT = 225 # type: ignore +SDMA0_5_0__SRCID__SDMA_SEM_WAIT_FAIL_TIMEOUT = 226 # type: ignore +SDMA0_5_0__SRCID__SDMA_SRAM_ECC = 228 # type: ignore +SDMA0_5_0__SRCID__SDMA_PREEMPT = 240 # type: ignore +SDMA0_5_0__SRCID__SDMA_VM_HOLE = 242 # type: ignore +SDMA0_5_0__SRCID__SDMA_CTXEMPTY = 243 # type: ignore +SDMA0_5_0__SRCID__SDMA_DOORBELL_INVALID = 244 # type: ignore +SDMA0_5_0__SRCID__SDMA_FROZEN = 245 # type: ignore +SDMA0_5_0__SRCID__SDMA_POLL_TIMEOUT = 246 # type: ignore +SDMA0_5_0__SRCID__SDMA_SRBMWRITE = 247 # type: ignore \ No newline at end of file diff --git a/tinygrad/runtime/ops_amd.py b/tinygrad/runtime/ops_amd.py index 3e5bdc27e3..48db08cc4e 100644 --- a/tinygrad/runtime/ops_amd.py +++ b/tinygrad/runtime/ops_amd.py @@ -480,7 +480,6 @@ class AMDCopyQueue(HWQueue): if (dev:=signal.owner) is not None and signal.is_timeline and not dev.is_am(): self.q(self.sdma.SDMA_OP_FENCE | fence_flags, *data64_le(dev.queue_event_mailbox_ptr), dev.queue_event.event_id) self.q(self.sdma.SDMA_OP_TRAP, self.sdma.SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(dev.queue_event.event_id)) - elif dev is not None and dev.is_am(): self.q(self.sdma.SDMA_OP_TRAP, self.sdma.SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)) return self @@ -863,7 +862,7 @@ class PCIIface(PCIIfaceBase): def sleep(self, timeout) -> bool: if hasattr(self.pci_dev, 'irq_poller') and self.pci_dev.irq_poller is not None and (events_cnt:=len(self.pci_dev.irq_poller.poll(timeout))): self.pci_dev.irq_fd.read(8 * events_cnt) - self.dev_impl.ih.interrupt_handler() + self.dev_impl.ih.interrupt_handler() return self.dev_impl.gmc.check_fault() is not None def on_device_hang(self): diff --git a/tinygrad/runtime/support/am/amdev.py b/tinygrad/runtime/support/am/amdev.py index 0c61a3f5a6..1ddc1fa668 100644 --- a/tinygrad/runtime/support/am/amdev.py +++ b/tinygrad/runtime/support/am/amdev.py @@ -144,7 +144,7 @@ class AMMemoryManager(MemoryManager): self.dev.gmc.flush_tlb(ip='MM', vmid=0) class AMDev(PCIDevImplBase): - Version = 0xA0000007 + Version = 0xA0000008 def __init__(self, pci_dev:PCIDevice, dma_regions:list[tuple[int, MMIOInterface]]|None=None, reset_mode=False): self.pci_dev, self.devfmt, self.dma_regions = pci_dev, pci_dev.pcibus, dma_regions diff --git a/tinygrad/runtime/support/am/ip.py b/tinygrad/runtime/support/am/ip.py index 9c95a640a2..b4e48a15c0 100644 --- a/tinygrad/runtime/support/am/ip.py +++ b/tinygrad/runtime/support/am/ip.py @@ -13,7 +13,19 @@ class AM_IP: def set_clockgating_state(self): pass # Set clockgating state for this IP class AM_SOC(AM_IP): - def init_sw(self): self.module = import_soc(self.adev.ip_ver[am.GC_HWIP]) + def init_sw(self): + self.module = import_soc(self.adev.ip_ver[am.GC_HWIP]) + self.ih_clients = am.enum_soc21_ih_clientid if (ih_soc21:=self.adev.ip_ver[am.GC_HWIP][0] >= 11) else am.enum_soc15_ih_clientid + + self.gfx_ih_clients = [am.SOC21_IH_CLIENTID_GRBM_CP, am.SOC21_IH_CLIENTID_GFX] \ + if ih_soc21 else [am.SOC15_IH_CLIENTID_GRBM_CP] + [getattr(am, f'SOC15_IH_CLIENTID_SE{i}SH') for i in range(4)] + self.sdma_ih_clients = [] if ih_soc21 else [getattr(am, f'SOC15_IH_CLIENTID_SDMA{i}') for i in range(8)] + + def _ih_srcs(pref:str, hwip:int) -> dict[int, str]: + return {getattr(am, k): k[off+9:] for k in dir(am) if k.startswith(f'{pref}_{self.adev.ip_ver[hwip][0]}') and (off:=k.find('__SRCID__')) != -1} + + gfx_srcs, sdma_srcs = _ih_srcs('GFX', am.GC_HWIP), _ih_srcs('SDMA0', am.SDMA0_HWIP) + self.ih_scrs_names:dict[int, dict[int, str]] = {**{k: gfx_srcs for k in self.gfx_ih_clients}, **{k: sdma_srcs for k in self.sdma_ih_clients}} def init_hw(self): if self.adev.ip_ver[am.NBIO_HWIP] in {(7,9,0), (7,9,1)}: @@ -325,8 +337,6 @@ class AM_GFX(AM_IP): self.adev.gmc.flush_hdp() self._grbm_select(inst=xcc) - - self.adev.reg(f"regCP_ME1_PIPE{pipe}_INT_CNTL").update(time_stamp_int_enable=1, generic0_int_enable=1, inst=xcc) return restore_ptr // 16, doorbell def set_clockgating_state(self): @@ -373,15 +383,16 @@ class AM_GFX(AM_IP): class AM_IH(AM_IP): def init_sw(self): - self.ring_size = 512 << 10 + self.ring_size = 256 << 10 def _alloc_ring(size): return (self.adev.mm.palloc(size, zero=False, boot=True), self.adev.mm.palloc(0x1000, zero=False, boot=True)) self.rings = [(*_alloc_ring(self.ring_size), "", 0), (*_alloc_ring(self.ring_size), "_RING1", 1)] + self.ring_view = self.adev.vram.view(offset=self.rings[0][0], size=self.ring_size, fmt='I') def init_hw(self): for ring_vm, rwptr_vm, suf, ring_id in self.rings: self.adev.wreg_pair("regIH_RB_BASE", suf, f"_HI{suf}", self.adev.paddr2mc(ring_vm) >> 8) - self.adev.reg(f"regIH_RB_CNTL{suf}").write(mc_space=4, wptr_overflow_clear=1, rb_size=(self.ring_size//4).bit_length(), + self.adev.reg(f"regIH_RB_CNTL{suf}").write(mc_space=4, wptr_overflow_clear=1, rb_size=((self.ring_size//4)-1).bit_length(), mc_snoop=1, mc_ro=0, mc_vmid=0, **({'wptr_overflow_enable': 1, 'rptr_rearm': 1} if ring_id == 0 else {'rb_full_drain_enable': 1})) if ring_id == 0: self.adev.wreg_pair("regIH_RB_WPTR_ADDR", "_LO", "_HI", self.adev.paddr2mc(rwptr_vm)) @@ -404,14 +415,28 @@ class AM_IH(AM_IP): self.adev.soc.doorbell_enable(port=1, awid=0x0, awaddr_31_28_value=0x0, offset=am.AMDGPU_NAVI10_DOORBELL_IH*2, size=2) def interrupt_handler(self): - _, rwptr_vm, suf, _ = self.rings[0] - wptr = self.adev.vram.view(offset=rwptr_vm, size=8, fmt='Q')[0] + _, _, suf, _ = self.rings[0] + wptr = self.adev.reg(f"regIH_RB_WPTR{suf}").read_bitfields() + rptr = self.adev.regIH_RB_RPTR.read() - if self.adev.reg(f"regIH_RB_WPTR{suf}").read_bitfields()['rb_overflow']: + while rptr != wptr['offset']: + entry = [self.ring_view[(rptr + i) % (self.ring_size // 4)] for i in range(8)] + client, src, ring_id, vmid, vmid_type, pasid, node = \ + [getattr(am, f'SOC15_{n}_FROM_IH_ENTRY')(entry) for n in ['CLIENT_ID', 'SOURCE_ID', 'RING_ID', 'VMID', 'VMID_TYPE', 'PASID', 'NODEID']] + ctx = [getattr(am, f'SOC15_CONTEXT_ID{i}_FROM_IH_ENTRY')(entry) for i in range(4)] + + src_name = self.adev.soc.ih_scrs_names.get(client, {}).get(src, '') + print(f"am {self.adev.devfmt}: IH ({rptr:#x}/{wptr['offset']:#x}) client={self.adev.soc.ih_clients.get(client)} src={src_name}({src}) " + f"ring={ring_id} vmid={vmid}({vmid_type}) pasid={pasid} node={node} ctx=[{ctx[0]:#x}, {ctx[1]:#x}, {ctx[2]:#x}, {ctx[3]:#x}]") + + rptr = (rptr + 8) % (self.ring_size // 4) + + if wptr['rb_overflow']: self.adev.reg(f"regIH_RB_WPTR{suf}").update(rb_overflow=0) self.adev.reg(f"regIH_RB_CNTL{suf}").update(wptr_overflow_clear=1) self.adev.reg(f"regIH_RB_CNTL{suf}").update(wptr_overflow_clear=0) - self.adev.regIH_RB_RPTR.write(wptr % self.ring_size) + + self.adev.regIH_RB_RPTR.write(wptr['offset'] % (self.ring_size // 4)) class AM_SDMA(AM_IP): def init_sw(self): self.sdma_reginst, self.sdma_name = [], "F32" if self.adev.ip_ver[am.SDMA0_HWIP] < (7,0,0) else "MCU" @@ -428,8 +453,7 @@ class AM_SDMA(AM_IP): inst=inst) self.adev.reg(f"regSDMA{pipe}_{self.sdma_name}_CNTL").update(halt=0, **{f"{'th1_' if self.sdma_name == 'F32' else ''}reset":0}, inst=inst) - self.adev.reg(f"regSDMA{pipe}_CNTL").update(ctxempty_int_enable=1, trap_enable=1, - **({'utc_l1_enable':1} if self.adev.ip_ver[am.SDMA0_HWIP] <= (5,2,0) else {}), inst=inst) + self.adev.reg(f"regSDMA{pipe}_CNTL").update(**({'utc_l1_enable':1} if self.adev.ip_ver[am.SDMA0_HWIP] <= (5,2,0) else {}), inst=inst) if self.adev.ip_ver[am.NBIO_HWIP] in {(7,9,0), (7,9,1)}: for aid_id in range(4):