system: alloc_sysmem return view (#12724)

* system: alloc_sysmem return view * e
2026-01-10 23:48:01 -05:00 · 2025-10-16 17:55:01 +08:00
parent b86a33a312
commit e7c057d5dc
3 changed files with 17 additions and 17 deletions
--- a/tinygrad/runtime/support/nv/ip.py
+++ b/tinygrad/runtime/support/nv/ip.py
@@ -139,7 +139,7 @@ class NV_FLCN(NV_IP):

      return System.alloc_sysmem(len(patched_image), contiguous=True, data=patched_image)

-    self.frts_image_va, self.frts_image_sysmem = __patch(0x15, bytes(frts_cmd))
+    _, self.frts_image_sysmem = __patch(0x15, bytes(frts_cmd))

  def prep_booter(self):
    image = self.nvdev.extract_fw("kgspBinArchiveBooterLoadUcode", "image_prod_data")
@@ -150,7 +150,7 @@ class NV_FLCN(NV_IP):

    patched_image = bytearray(image)
    patched_image[patch_loc:patch_loc+sig_len] = sig[:sig_len]
-    self.booter_image_va, self.booter_image_sysmem = System.alloc_sysmem(len(patched_image), contiguous=True, data=patched_image)
+    _, self.booter_image_sysmem = System.alloc_sysmem(len(patched_image), contiguous=True, data=patched_image)
    _, _, self.booter_data_off, self.booter_data_sz, _, self.booter_code_off, self.booter_code_sz, _, _ = struct.unpack("9I", header)

  def init_hw(self):
@@ -327,10 +327,10 @@ class NV_GSP(NV_IP):
    # Alloc queues
    pte_cnt = ((queue_pte_cnt:=(queue_size * 2) // 0x1000)) + round_up(queue_pte_cnt * 8, 0x1000) // 0x1000
    pt_size = round_up(pte_cnt * 8, 0x1000)
-    queues_va, queues_sysmem = System.alloc_sysmem(pt_size + queue_size * 2, contiguous=False)
+    queues_view, queues_sysmem = System.alloc_sysmem(pt_size + queue_size * 2, contiguous=False)

    # Fill up ptes
-    for i, sysmem in enumerate(queues_sysmem): to_mv(queues_va + i * 0x8, 0x8).cast('Q')[0] = sysmem
+    for i, sysmem in enumerate(queues_sysmem): queues_view.view(i * 0x8, 0x8, fmt='Q')[0] = sysmem

    # Fill up arguments
    queue_args = nv.MESSAGE_QUEUE_INIT_ARGUMENTS(sharedMemPhysAddr=queues_sysmem[0], pageTableEntryCount=pte_cnt, cmdQueueOffset=pt_size,
@@ -338,7 +338,7 @@ class NV_GSP(NV_IP):
    _, self.rm_args_sysmem = self.nvdev._alloc_boot_struct(nv.GSP_ARGUMENTS_CACHED(bDmemStack=True, messageQueueInitArguments=queue_args))

    # Build command queue header
-    self.cmd_q_va, self.stat_q_va = queues_va + pt_size, queues_va + pt_size + queue_size
+    self.cmd_q_va, self.stat_q_va = queues_view.addr + pt_size, queues_view.addr + pt_size + queue_size

    cmd_q_tx = nv.msgqTxHeader(version=0, size=queue_size, entryOff=0x1000, msgSize=0x1000, msgCount=(queue_size - 0x1000) // 0x1000,
      writePtr=0, flags=1, rxHdrOff=ctypes.sizeof(nv.msgqTxHeader))
@@ -348,9 +348,9 @@ class NV_GSP(NV_IP):

  def init_libos_args(self):
    _, logbuf_sysmem = System.alloc_sysmem((2 << 20), contiguous=True)
-    libos_args_va, self.libos_args_sysmem = System.alloc_sysmem(0x1000, contiguous=True)
+    libos_args_view, self.libos_args_sysmem = System.alloc_sysmem(0x1000, contiguous=True)

-    libos_structs = (nv.LibosMemoryRegionInitArgument * 6).from_address(libos_args_va)
+    libos_structs = (nv.LibosMemoryRegionInitArgument * 6).from_address(libos_args_view.addr)
    for i, name in enumerate(["INIT", "INTR", "RM", "MNOC", "KRNL"]):
      libos_structs[i] = nv.LibosMemoryRegionInitArgument(kind=nv.LIBOS_MEMORY_REGION_CONTIGUOUS, loc=nv.LIBOS_MEMORY_REGION_LOC_SYSMEM, size=0x10000,
        id8=int.from_bytes(bytes(f"LOG{name}", 'utf-8'), 'big'), pa=logbuf_sysmem[0] + 0x10000 * i)
@@ -370,18 +370,18 @@ class NV_GSP(NV_IP):
    for i in range(3, 0, -1): npages[i-1] = ((npages[i] - 1) >> (nv.LIBOS_MEMORY_REGION_RADIX_PAGE_LOG2 - 3)) + 1

    offsets = [sum(npages[:i]) * 0x1000 for i in range(4)]
-    radix_va, self.gsp_radix3_sysmem = System.alloc_sysmem(offsets[-1] + len(self.gsp_image), contiguous=False)
+    radix_view, self.gsp_radix3_sysmem = System.alloc_sysmem(offsets[-1] + len(self.gsp_image), contiguous=False)

    # Copy image
-    to_mv(radix_va + offsets[-1], len(self.gsp_image))[:] = self.gsp_image
+    radix_view.view(offsets[-1], len(self.gsp_image))[:] = self.gsp_image

    # Copy level and image pages.
    for i in range(0, 3):
      cur_offset = sum(npages[:i+1])
-      to_mv(radix_va + offsets[i], npages[i+1] * 8).cast('Q')[:] = array.array('Q', self.gsp_radix3_sysmem[cur_offset:cur_offset+npages[i+1]])
+      radix_view.view(offsets[i], npages[i+1] * 8, fmt='Q')[:] = array.array('Q', self.gsp_radix3_sysmem[cur_offset:cur_offset+npages[i+1]])

    # Copy signature
-    self.gsp_signature_va, self.gsp_signature_sysmem = System.alloc_sysmem(len(signature), contiguous=True, data=signature)
+    _, self.gsp_signature_sysmem = System.alloc_sysmem(len(signature), contiguous=True, data=signature)

  def init_boot_binary_image(self):
    self.booter_image = self.nvdev.extract_fw("kgspBinArchiveGspRmBoot", "ucode_image_prod_data")
--- a/tinygrad/runtime/support/nv/nvdev.py
+++ b/tinygrad/runtime/support/nv/nvdev.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 import ctypes, time, functools, re, gzip, struct
-from tinygrad.helpers import getenv, DEBUG, fetch, getbits, to_mv
+from tinygrad.helpers import getenv, DEBUG, fetch, getbits
 from tinygrad.runtime.support.hcq import MMIOInterface
 from tinygrad.runtime.support.memory import TLSFAllocator, MemoryManager
 from tinygrad.runtime.support.nv.ip import NV_FLCN, NV_FLCN_COT, NV_GSP
@@ -137,9 +137,9 @@ class NVDev(PCIDevImplBase):
    self.large_bar = self.vram.nbytes >= self.vram_size

  def _alloc_boot_struct(self, struct:ctypes.Structure) -> tuple[ctypes.Structure, int]:
-    va, paddrs = System.alloc_sysmem(sz:=ctypes.sizeof(type(struct)), contiguous=True)
-    to_mv(va, sz)[:] = bytes(struct)
-    return type(struct).from_address(va), paddrs[0]
+    view, paddrs = System.alloc_sysmem(sz:=ctypes.sizeof(type(struct)), contiguous=True)
+    view[:sz] = bytes(struct)
+    return type(struct).from_address(view.addr), paddrs[0]

  def _download(self, file:str) -> str:
    url = f"https://raw.githubusercontent.com/NVIDIA/open-gpu-kernel-modules/8ec351aeb96a93a4bb69ccc12a542bf8a8df2b6f/{file}"
--- a/tinygrad/runtime/support/system.py
+++ b/tinygrad/runtime/support/system.py
@@ -60,13 +60,13 @@ class _System:
    self.pagemap.seek(vaddr // mmap.PAGESIZE * 8)
    return [(x & ((1<<55) - 1)) * mmap.PAGESIZE for x in array.array('Q', self.pagemap.read(size//mmap.PAGESIZE*8, binary=True))]

-  def alloc_sysmem(self, size:int, vaddr:int=0, contiguous:bool=False, data:bytes|None=None) -> tuple[int, list[int]]:
+  def alloc_sysmem(self, size:int, vaddr:int=0, contiguous:bool=False, data:bytes|None=None) -> tuple[MMIOInterface, list[int]]:
    assert not contiguous or size <= (2 << 20), "Contiguous allocation is only supported for sizes up to 2MB"
    flags = (libc.MAP_HUGETLB if contiguous and (size:=round_up(size, mmap.PAGESIZE)) > 0x1000 else 0) | (MAP_FIXED if vaddr else 0)
    va = FileIOInterface.anon_mmap(vaddr, size, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED|mmap.MAP_ANONYMOUS|MAP_POPULATE|MAP_LOCKED|flags, 0)

    if data is not None: to_mv(va, len(data))[:] = data
-    return va, self.system_paddrs(va, size)
+    return MMIOInterface(va, size), self.system_paddrs(va, size)

  def pci_reset(self, gpu): os.system(f"sudo sh -c 'echo 1 > /sys/bus/pci/devices/{gpu}/reset'")
  def pci_scan_bus(self, target_vendor:int, target_devices:list[int]) -> list[str]: