am: enable all sdma engines (#13970)

This commit is contained in:
nimlgen
2026-01-02 15:25:15 +03:00
committed by GitHub
parent 5f52266225
commit ebbaad6bfd
2 changed files with 16 additions and 12 deletions

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
import ctypes, collections, dataclasses, functools, hashlib, array
from tinygrad.helpers import mv_address, getenv, DEBUG, fetch
from tinygrad.helpers import mv_address, getenv, DEBUG, fetch, lo32, hi32
from tinygrad.runtime.autogen.am import am
from tinygrad.runtime.support.hcq import MMIOInterface
from tinygrad.runtime.support.amd import AMDReg, import_module, import_asic_regs
@@ -55,7 +55,8 @@ class AMFirmware:
# SDMA firmware
blob, hdr = self.load_fw(f"sdma_{fmt_ver(am.SDMA0_HWIP)}.bin", versioned_header="struct_sdma_firmware_header")
if hdr.header.header_version_major == 1:
self.descs += [self.desc(blob, hdr.header.ucode_array_offset_bytes, hdr.header.ucode_size_bytes, am.GFX_FW_TYPE_SDMA0)]
self.descs += [self.desc(blob, hdr.header.ucode_array_offset_bytes, hdr.header.ucode_size_bytes, am.GFX_FW_TYPE_SDMA0,
am.GFX_FW_TYPE_SDMA1, am.GFX_FW_TYPE_SDMA2, am.GFX_FW_TYPE_SDMA3)]
elif hdr.header.header_version_major == 2:
self.descs += [self.desc(blob, hdr.ctl_ucode_offset, hdr.ctl_ucode_size_bytes, am.GFX_FW_TYPE_SDMA_UCODE_TH1)]
self.descs += [self.desc(blob, hdr.header.ucode_array_offset_bytes, hdr.ctx_ucode_size_bytes, am.GFX_FW_TYPE_SDMA_UCODE_TH0)]
@@ -250,10 +251,11 @@ class AMDev(PCIDevImplBase):
self.reg("regBIF_BX_PF0_RSMU_DATA").write(val)
def indirect_wreg_pcie(self, reg:int, val:int, aid:int=0):
self.reg("regBIF_BX0_PCIE_INDEX2").write(reg * 4 + ((((aid & 0b11) << 32) | (1 << 34)) if aid > 0 else 0))
self.reg("regBIF_BX0_PCIE_INDEX2").read()
reg_addr = reg * 4 + ((((aid & 0b11) << 32) | (1 << 34)) if aid > 0 else 0)
self.reg("regBIF_BX0_PCIE_INDEX2").write(lo32(reg_addr))
if reg_addr >> 32: self.reg("regBIF_BX0_PCIE_INDEX2_HI").write(hi32(reg_addr) & 0xff)
self.reg("regBIF_BX0_PCIE_DATA2").write(val)
self.reg("regBIF_BX0_PCIE_DATA2").read()
if reg_addr >> 32: self.reg("regBIF_BX0_PCIE_INDEX2_HI").write(0)
def _read_vram(self, addr, size) -> bytes:
assert addr % 4 == 0 and size % 4 == 0, f"Invalid address {addr:#x} or size {size:#x}"

View File

@@ -25,12 +25,12 @@ class AM_SOC(AM_IP):
def set_clockgating_state(self):
if self.adev.ip_ver[am.HDP_HWIP] >= (5,2,1): self.adev.regHDP_MEM_POWER_CTRL.update(atomic_mem_power_ctrl_en=1, atomic_mem_power_ds_en=1)
def doorbell_enable(self, port, awid=0, awaddr_31_28_value=0, offset=0, size=0):
def doorbell_enable(self, port, awid=0, awaddr_31_28_value=0, offset=0, size=0, aid=0):
reg = self.adev.reg(f"{'regGDC_S2A0_S2A' if self.adev.ip_ver[am.GC_HWIP] >= (12,0,0) else 'regS2A'}_DOORBELL_ENTRY_{port}_CTRL")
val = reg.encode(**{f"s2a_doorbell_port{port}_enable":1, f"s2a_doorbell_port{port}_awid":awid, f"s2a_doorbell_port{port}_range_size":size,
f"s2a_doorbell_port{port}_awaddr_31_28_value":awaddr_31_28_value, f"s2a_doorbell_port{port}_range_offset":offset})
if self.adev.ip_ver[am.NBIO_HWIP] in {(7,9,0), (7,9,1)}: self.adev.indirect_wreg_pcie(reg.addr[0], val)
if self.adev.ip_ver[am.NBIO_HWIP] in {(7,9,0), (7,9,1)}: self.adev.indirect_wreg_pcie(reg.addr[0], val, aid=aid)
else: reg.write(val)
class AM_GMC(AM_IP):
@@ -432,9 +432,12 @@ class AM_SDMA(AM_IP):
**({'utc_l1_enable':1} if self.adev.ip_ver[am.SDMA0_HWIP] <= (5,2,0) else {}), inst=inst)
if self.adev.ip_ver[am.NBIO_HWIP] in {(7,9,0), (7,9,1)}:
for i in range(16): self.adev.reg(f"regDOORBELL0_CTRL_ENTRY_{i+1}").write(**{f"bif_doorbell{i+1}_range_size_entry":4,
f"bif_doorbell{i+1}_range_offset_entry":(am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 + i * 0xA) * 2})
self.adev.soc.doorbell_enable(port=2, awid=0xe, awaddr_31_28_value=0x1, offset=0xe, size=4)
for aid_id in range(4):
for dev_inst, (port, awid, offset, awaddr) in enumerate([(1, 0xe, 0xe, 0x1), (2, 0x8, 0x8, 0x2), (5, 0x9, 0x9, 0x8), (6, 0xa, 0xa, 0x9)]):
entry = dev_inst + 1 + 4 * aid_id
self.adev.reg(f"regDOORBELL0_CTRL_ENTRY_{entry}").write(**{f"bif_doorbell{entry}_range_size_entry": 20,
f"bif_doorbell{entry}_range_offset_entry": (am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 + (entry - 1) * 0xA) * 2})
self.adev.soc.doorbell_enable(port=port, awid=awid, awaddr_31_28_value=awaddr, offset=offset, size=4, aid=aid_id)
else: self.adev.soc.doorbell_enable(port=2, awid=0xe, awaddr_31_28_value=0x3, offset=am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0*2, size=4)
def fini_hw(self):
@@ -448,8 +451,7 @@ class AM_SDMA(AM_IP):
self.adev.regGRBM_SOFT_RESET.write(0x0)
def setup_ring(self, ring_addr:int, ring_size:int, rptr_addr:int, wptr_addr:int, idx:int) -> tuple[int, int]:
assert idx <= 3, "only 4 SDMA queues supported in am"
pipe, queue = idx // 4, idx % 4
pipe, queue = idx % 4, idx // 4
reg, inst = ("regSDMA_GFX", pipe+queue*4) if self.adev.ip_ver[am.SDMA0_HWIP][:2] == (4,4) else (f"regSDMA{pipe}_QUEUE{queue}", 0)
doorbell = am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 + (pipe+queue*4) * 0xA
self.sdma_reginst.append((reg, inst))