From e41da0c39675b0cb266eca5a8b8fe4988bcd6ebd Mon Sep 17 00:00:00 2001 From: kevvz <92408564+kevvz@users.noreply.github.com> Date: Mon, 16 Feb 2026 11:59:46 -0800 Subject: [PATCH] use relative address for MOCKGPU rdna4 tracing (#14801) * rdna3/4 trace separation * remove comments --- test/mockgpu/amd/amdgpu.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/test/mockgpu/amd/amdgpu.py b/test/mockgpu/amd/amdgpu.py index 8a73fd8824..11bd0f8f39 100644 --- a/test/mockgpu/amd/amdgpu.py +++ b/test/mockgpu/amd/amdgpu.py @@ -17,13 +17,17 @@ regCOMPUTE_USER_DATA_0 = 0x1be0 + amd_gpu.GC_BASE__INST0_SEG0 regCOMPUTE_NUM_THREAD_X = 0x1ba7 + amd_gpu.GC_BASE__INST0_SEG0 regGRBM_GFX_INDEX = 0x2200 + amd_gpu.GC_BASE__INST0_SEG1 regSQ_THREAD_TRACE_BUF0_BASE = 0x39e8 + amd_gpu.GC_BASE__INST0_SEG1 -regSQ_THREAD_TRACE_BUF0_SIZE = 0x39e9 + amd_gpu.GC_BASE__INST0_SEG1 +regSQ_THREAD_TRACE_BUF0_SIZE = {"rdna3": 0x39e9, "rdna4": 0x39e6}[MOCKGPU_ARCH] + amd_gpu.GC_BASE__INST0_SEG1 regSQ_THREAD_TRACE_WPTR = 0x39ef + amd_gpu.GC_BASE__INST0_SEG1 regSQ_THREAD_TRACE_STATUS = 0x39f4 + amd_gpu.GC_BASE__INST0_SEG1 regCP_PERFMON_CNTL = 0x3808 + amd_gpu.GC_BASE__INST0_SEG1 regCPG_PERFCOUNTER1_LO = 0x3000 + amd_gpu.GC_BASE__INST0_SEG1 regGUS_PERFCOUNTER_HI = 0x3643 + amd_gpu.GC_BASE__INST0_SEG1 +# RDNA 4 +regSQ_THREAD_TRACE_BUF0_BASE_LO = 0x39e7 + amd_gpu.GC_BASE__INST0_SEG1 +regSQ_THREAD_TRACE_BUF0_BASE_HI = regSQ_THREAD_TRACE_BUF0_BASE + class SQTT_EVENTS: THREAD_TRACE_FINISH = 0x00000037 @@ -235,14 +239,18 @@ class PM4Executor(AMDQueue): for se in range(self.gpu.regs.n_se): self.gpu.regs.grbm_index = 0b011 << 29 | se << 16 # select se, broadcast sa and instance self.gpu.regs[regSQ_THREAD_TRACE_STATUS] = 1 << 12 # FINISH_PENDING==0 FINISH_DONE==1 BUSY==0 - buf_addr = ((self.gpu.regs[regSQ_THREAD_TRACE_BUF0_SIZE]&0xf)<<32|self.gpu.regs[regSQ_THREAD_TRACE_BUF0_BASE])<<12 - + if MOCKGPU_ARCH == "rdna3": + buf_addr = ((self.gpu.regs[regSQ_THREAD_TRACE_BUF0_SIZE]&0xf)<<32|self.gpu.regs[regSQ_THREAD_TRACE_BUF0_BASE])<<12 + else: + buf_addr = ((self.gpu.regs[regSQ_THREAD_TRACE_BUF0_BASE_HI])<<32|self.gpu.regs[regSQ_THREAD_TRACE_BUF0_BASE_LO])<<12 # Use real trace blob for SE 0 (which has itrace enabled), empty blob for other SEs se_blob = blob if se == 0 else b'' # Write blob to trace buffer if se_blob: ctypes.memmove(buf_addr, se_blob, len(se_blob)) - self.gpu.regs[regSQ_THREAD_TRACE_WPTR] = ((buf_addr + len(se_blob)) // 32) & 0x1FFFFFFF + # RDNA3 has absolute address for wptr, RDNA4 has relative + wptr_val = (((buf_addr if MOCKGPU_ARCH == "rdna3" else 0) + len(se_blob)) // 32) & 0x1FFFFFFF + self.gpu.regs[regSQ_THREAD_TRACE_WPTR] = wptr_val self.gpu.regs.grbm_index = old_idx case _: pass # NOTE: for now most events aren't emulated