mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-07 22:23:55 -05:00
am: enable all sdma engines (#13970)
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
import ctypes, collections, dataclasses, functools, hashlib, array
|
import ctypes, collections, dataclasses, functools, hashlib, array
|
||||||
from tinygrad.helpers import mv_address, getenv, DEBUG, fetch
|
from tinygrad.helpers import mv_address, getenv, DEBUG, fetch, lo32, hi32
|
||||||
from tinygrad.runtime.autogen.am import am
|
from tinygrad.runtime.autogen.am import am
|
||||||
from tinygrad.runtime.support.hcq import MMIOInterface
|
from tinygrad.runtime.support.hcq import MMIOInterface
|
||||||
from tinygrad.runtime.support.amd import AMDReg, import_module, import_asic_regs
|
from tinygrad.runtime.support.amd import AMDReg, import_module, import_asic_regs
|
||||||
@@ -55,7 +55,8 @@ class AMFirmware:
|
|||||||
# SDMA firmware
|
# SDMA firmware
|
||||||
blob, hdr = self.load_fw(f"sdma_{fmt_ver(am.SDMA0_HWIP)}.bin", versioned_header="struct_sdma_firmware_header")
|
blob, hdr = self.load_fw(f"sdma_{fmt_ver(am.SDMA0_HWIP)}.bin", versioned_header="struct_sdma_firmware_header")
|
||||||
if hdr.header.header_version_major == 1:
|
if hdr.header.header_version_major == 1:
|
||||||
self.descs += [self.desc(blob, hdr.header.ucode_array_offset_bytes, hdr.header.ucode_size_bytes, am.GFX_FW_TYPE_SDMA0)]
|
self.descs += [self.desc(blob, hdr.header.ucode_array_offset_bytes, hdr.header.ucode_size_bytes, am.GFX_FW_TYPE_SDMA0,
|
||||||
|
am.GFX_FW_TYPE_SDMA1, am.GFX_FW_TYPE_SDMA2, am.GFX_FW_TYPE_SDMA3)]
|
||||||
elif hdr.header.header_version_major == 2:
|
elif hdr.header.header_version_major == 2:
|
||||||
self.descs += [self.desc(blob, hdr.ctl_ucode_offset, hdr.ctl_ucode_size_bytes, am.GFX_FW_TYPE_SDMA_UCODE_TH1)]
|
self.descs += [self.desc(blob, hdr.ctl_ucode_offset, hdr.ctl_ucode_size_bytes, am.GFX_FW_TYPE_SDMA_UCODE_TH1)]
|
||||||
self.descs += [self.desc(blob, hdr.header.ucode_array_offset_bytes, hdr.ctx_ucode_size_bytes, am.GFX_FW_TYPE_SDMA_UCODE_TH0)]
|
self.descs += [self.desc(blob, hdr.header.ucode_array_offset_bytes, hdr.ctx_ucode_size_bytes, am.GFX_FW_TYPE_SDMA_UCODE_TH0)]
|
||||||
@@ -250,10 +251,11 @@ class AMDev(PCIDevImplBase):
|
|||||||
self.reg("regBIF_BX_PF0_RSMU_DATA").write(val)
|
self.reg("regBIF_BX_PF0_RSMU_DATA").write(val)
|
||||||
|
|
||||||
def indirect_wreg_pcie(self, reg:int, val:int, aid:int=0):
|
def indirect_wreg_pcie(self, reg:int, val:int, aid:int=0):
|
||||||
self.reg("regBIF_BX0_PCIE_INDEX2").write(reg * 4 + ((((aid & 0b11) << 32) | (1 << 34)) if aid > 0 else 0))
|
reg_addr = reg * 4 + ((((aid & 0b11) << 32) | (1 << 34)) if aid > 0 else 0)
|
||||||
self.reg("regBIF_BX0_PCIE_INDEX2").read()
|
self.reg("regBIF_BX0_PCIE_INDEX2").write(lo32(reg_addr))
|
||||||
|
if reg_addr >> 32: self.reg("regBIF_BX0_PCIE_INDEX2_HI").write(hi32(reg_addr) & 0xff)
|
||||||
self.reg("regBIF_BX0_PCIE_DATA2").write(val)
|
self.reg("regBIF_BX0_PCIE_DATA2").write(val)
|
||||||
self.reg("regBIF_BX0_PCIE_DATA2").read()
|
if reg_addr >> 32: self.reg("regBIF_BX0_PCIE_INDEX2_HI").write(0)
|
||||||
|
|
||||||
def _read_vram(self, addr, size) -> bytes:
|
def _read_vram(self, addr, size) -> bytes:
|
||||||
assert addr % 4 == 0 and size % 4 == 0, f"Invalid address {addr:#x} or size {size:#x}"
|
assert addr % 4 == 0 and size % 4 == 0, f"Invalid address {addr:#x} or size {size:#x}"
|
||||||
|
|||||||
@@ -25,12 +25,12 @@ class AM_SOC(AM_IP):
|
|||||||
def set_clockgating_state(self):
|
def set_clockgating_state(self):
|
||||||
if self.adev.ip_ver[am.HDP_HWIP] >= (5,2,1): self.adev.regHDP_MEM_POWER_CTRL.update(atomic_mem_power_ctrl_en=1, atomic_mem_power_ds_en=1)
|
if self.adev.ip_ver[am.HDP_HWIP] >= (5,2,1): self.adev.regHDP_MEM_POWER_CTRL.update(atomic_mem_power_ctrl_en=1, atomic_mem_power_ds_en=1)
|
||||||
|
|
||||||
def doorbell_enable(self, port, awid=0, awaddr_31_28_value=0, offset=0, size=0):
|
def doorbell_enable(self, port, awid=0, awaddr_31_28_value=0, offset=0, size=0, aid=0):
|
||||||
reg = self.adev.reg(f"{'regGDC_S2A0_S2A' if self.adev.ip_ver[am.GC_HWIP] >= (12,0,0) else 'regS2A'}_DOORBELL_ENTRY_{port}_CTRL")
|
reg = self.adev.reg(f"{'regGDC_S2A0_S2A' if self.adev.ip_ver[am.GC_HWIP] >= (12,0,0) else 'regS2A'}_DOORBELL_ENTRY_{port}_CTRL")
|
||||||
val = reg.encode(**{f"s2a_doorbell_port{port}_enable":1, f"s2a_doorbell_port{port}_awid":awid, f"s2a_doorbell_port{port}_range_size":size,
|
val = reg.encode(**{f"s2a_doorbell_port{port}_enable":1, f"s2a_doorbell_port{port}_awid":awid, f"s2a_doorbell_port{port}_range_size":size,
|
||||||
f"s2a_doorbell_port{port}_awaddr_31_28_value":awaddr_31_28_value, f"s2a_doorbell_port{port}_range_offset":offset})
|
f"s2a_doorbell_port{port}_awaddr_31_28_value":awaddr_31_28_value, f"s2a_doorbell_port{port}_range_offset":offset})
|
||||||
|
|
||||||
if self.adev.ip_ver[am.NBIO_HWIP] in {(7,9,0), (7,9,1)}: self.adev.indirect_wreg_pcie(reg.addr[0], val)
|
if self.adev.ip_ver[am.NBIO_HWIP] in {(7,9,0), (7,9,1)}: self.adev.indirect_wreg_pcie(reg.addr[0], val, aid=aid)
|
||||||
else: reg.write(val)
|
else: reg.write(val)
|
||||||
|
|
||||||
class AM_GMC(AM_IP):
|
class AM_GMC(AM_IP):
|
||||||
@@ -432,9 +432,12 @@ class AM_SDMA(AM_IP):
|
|||||||
**({'utc_l1_enable':1} if self.adev.ip_ver[am.SDMA0_HWIP] <= (5,2,0) else {}), inst=inst)
|
**({'utc_l1_enable':1} if self.adev.ip_ver[am.SDMA0_HWIP] <= (5,2,0) else {}), inst=inst)
|
||||||
|
|
||||||
if self.adev.ip_ver[am.NBIO_HWIP] in {(7,9,0), (7,9,1)}:
|
if self.adev.ip_ver[am.NBIO_HWIP] in {(7,9,0), (7,9,1)}:
|
||||||
for i in range(16): self.adev.reg(f"regDOORBELL0_CTRL_ENTRY_{i+1}").write(**{f"bif_doorbell{i+1}_range_size_entry":4,
|
for aid_id in range(4):
|
||||||
f"bif_doorbell{i+1}_range_offset_entry":(am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 + i * 0xA) * 2})
|
for dev_inst, (port, awid, offset, awaddr) in enumerate([(1, 0xe, 0xe, 0x1), (2, 0x8, 0x8, 0x2), (5, 0x9, 0x9, 0x8), (6, 0xa, 0xa, 0x9)]):
|
||||||
self.adev.soc.doorbell_enable(port=2, awid=0xe, awaddr_31_28_value=0x1, offset=0xe, size=4)
|
entry = dev_inst + 1 + 4 * aid_id
|
||||||
|
self.adev.reg(f"regDOORBELL0_CTRL_ENTRY_{entry}").write(**{f"bif_doorbell{entry}_range_size_entry": 20,
|
||||||
|
f"bif_doorbell{entry}_range_offset_entry": (am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 + (entry - 1) * 0xA) * 2})
|
||||||
|
self.adev.soc.doorbell_enable(port=port, awid=awid, awaddr_31_28_value=awaddr, offset=offset, size=4, aid=aid_id)
|
||||||
else: self.adev.soc.doorbell_enable(port=2, awid=0xe, awaddr_31_28_value=0x3, offset=am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0*2, size=4)
|
else: self.adev.soc.doorbell_enable(port=2, awid=0xe, awaddr_31_28_value=0x3, offset=am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0*2, size=4)
|
||||||
|
|
||||||
def fini_hw(self):
|
def fini_hw(self):
|
||||||
@@ -448,8 +451,7 @@ class AM_SDMA(AM_IP):
|
|||||||
self.adev.regGRBM_SOFT_RESET.write(0x0)
|
self.adev.regGRBM_SOFT_RESET.write(0x0)
|
||||||
|
|
||||||
def setup_ring(self, ring_addr:int, ring_size:int, rptr_addr:int, wptr_addr:int, idx:int) -> tuple[int, int]:
|
def setup_ring(self, ring_addr:int, ring_size:int, rptr_addr:int, wptr_addr:int, idx:int) -> tuple[int, int]:
|
||||||
assert idx <= 3, "only 4 SDMA queues supported in am"
|
pipe, queue = idx % 4, idx // 4
|
||||||
pipe, queue = idx // 4, idx % 4
|
|
||||||
reg, inst = ("regSDMA_GFX", pipe+queue*4) if self.adev.ip_ver[am.SDMA0_HWIP][:2] == (4,4) else (f"regSDMA{pipe}_QUEUE{queue}", 0)
|
reg, inst = ("regSDMA_GFX", pipe+queue*4) if self.adev.ip_ver[am.SDMA0_HWIP][:2] == (4,4) else (f"regSDMA{pipe}_QUEUE{queue}", 0)
|
||||||
doorbell = am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 + (pipe+queue*4) * 0xA
|
doorbell = am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 + (pipe+queue*4) * 0xA
|
||||||
self.sdma_reginst.append((reg, inst))
|
self.sdma_reginst.append((reg, inst))
|
||||||
|
|||||||
Reference in New Issue
Block a user