system: alloc_sysmem return view (#12724)

* system: alloc_sysmem return view

* e
This commit is contained in:
nimlgen
2025-10-16 17:55:01 +08:00
committed by GitHub
parent b86a33a312
commit e7c057d5dc
3 changed files with 17 additions and 17 deletions

View File

@@ -139,7 +139,7 @@ class NV_FLCN(NV_IP):
return System.alloc_sysmem(len(patched_image), contiguous=True, data=patched_image)
self.frts_image_va, self.frts_image_sysmem = __patch(0x15, bytes(frts_cmd))
_, self.frts_image_sysmem = __patch(0x15, bytes(frts_cmd))
def prep_booter(self):
image = self.nvdev.extract_fw("kgspBinArchiveBooterLoadUcode", "image_prod_data")
@@ -150,7 +150,7 @@ class NV_FLCN(NV_IP):
patched_image = bytearray(image)
patched_image[patch_loc:patch_loc+sig_len] = sig[:sig_len]
self.booter_image_va, self.booter_image_sysmem = System.alloc_sysmem(len(patched_image), contiguous=True, data=patched_image)
_, self.booter_image_sysmem = System.alloc_sysmem(len(patched_image), contiguous=True, data=patched_image)
_, _, self.booter_data_off, self.booter_data_sz, _, self.booter_code_off, self.booter_code_sz, _, _ = struct.unpack("9I", header)
def init_hw(self):
@@ -327,10 +327,10 @@ class NV_GSP(NV_IP):
# Alloc queues
pte_cnt = ((queue_pte_cnt:=(queue_size * 2) // 0x1000)) + round_up(queue_pte_cnt * 8, 0x1000) // 0x1000
pt_size = round_up(pte_cnt * 8, 0x1000)
queues_va, queues_sysmem = System.alloc_sysmem(pt_size + queue_size * 2, contiguous=False)
queues_view, queues_sysmem = System.alloc_sysmem(pt_size + queue_size * 2, contiguous=False)
# Fill up ptes
for i, sysmem in enumerate(queues_sysmem): to_mv(queues_va + i * 0x8, 0x8).cast('Q')[0] = sysmem
for i, sysmem in enumerate(queues_sysmem): queues_view.view(i * 0x8, 0x8, fmt='Q')[0] = sysmem
# Fill up arguments
queue_args = nv.MESSAGE_QUEUE_INIT_ARGUMENTS(sharedMemPhysAddr=queues_sysmem[0], pageTableEntryCount=pte_cnt, cmdQueueOffset=pt_size,
@@ -338,7 +338,7 @@ class NV_GSP(NV_IP):
_, self.rm_args_sysmem = self.nvdev._alloc_boot_struct(nv.GSP_ARGUMENTS_CACHED(bDmemStack=True, messageQueueInitArguments=queue_args))
# Build command queue header
self.cmd_q_va, self.stat_q_va = queues_va + pt_size, queues_va + pt_size + queue_size
self.cmd_q_va, self.stat_q_va = queues_view.addr + pt_size, queues_view.addr + pt_size + queue_size
cmd_q_tx = nv.msgqTxHeader(version=0, size=queue_size, entryOff=0x1000, msgSize=0x1000, msgCount=(queue_size - 0x1000) // 0x1000,
writePtr=0, flags=1, rxHdrOff=ctypes.sizeof(nv.msgqTxHeader))
@@ -348,9 +348,9 @@ class NV_GSP(NV_IP):
def init_libos_args(self):
_, logbuf_sysmem = System.alloc_sysmem((2 << 20), contiguous=True)
libos_args_va, self.libos_args_sysmem = System.alloc_sysmem(0x1000, contiguous=True)
libos_args_view, self.libos_args_sysmem = System.alloc_sysmem(0x1000, contiguous=True)
libos_structs = (nv.LibosMemoryRegionInitArgument * 6).from_address(libos_args_va)
libos_structs = (nv.LibosMemoryRegionInitArgument * 6).from_address(libos_args_view.addr)
for i, name in enumerate(["INIT", "INTR", "RM", "MNOC", "KRNL"]):
libos_structs[i] = nv.LibosMemoryRegionInitArgument(kind=nv.LIBOS_MEMORY_REGION_CONTIGUOUS, loc=nv.LIBOS_MEMORY_REGION_LOC_SYSMEM, size=0x10000,
id8=int.from_bytes(bytes(f"LOG{name}", 'utf-8'), 'big'), pa=logbuf_sysmem[0] + 0x10000 * i)
@@ -370,18 +370,18 @@ class NV_GSP(NV_IP):
for i in range(3, 0, -1): npages[i-1] = ((npages[i] - 1) >> (nv.LIBOS_MEMORY_REGION_RADIX_PAGE_LOG2 - 3)) + 1
offsets = [sum(npages[:i]) * 0x1000 for i in range(4)]
radix_va, self.gsp_radix3_sysmem = System.alloc_sysmem(offsets[-1] + len(self.gsp_image), contiguous=False)
radix_view, self.gsp_radix3_sysmem = System.alloc_sysmem(offsets[-1] + len(self.gsp_image), contiguous=False)
# Copy image
to_mv(radix_va + offsets[-1], len(self.gsp_image))[:] = self.gsp_image
radix_view.view(offsets[-1], len(self.gsp_image))[:] = self.gsp_image
# Copy level and image pages.
for i in range(0, 3):
cur_offset = sum(npages[:i+1])
to_mv(radix_va + offsets[i], npages[i+1] * 8).cast('Q')[:] = array.array('Q', self.gsp_radix3_sysmem[cur_offset:cur_offset+npages[i+1]])
radix_view.view(offsets[i], npages[i+1] * 8, fmt='Q')[:] = array.array('Q', self.gsp_radix3_sysmem[cur_offset:cur_offset+npages[i+1]])
# Copy signature
self.gsp_signature_va, self.gsp_signature_sysmem = System.alloc_sysmem(len(signature), contiguous=True, data=signature)
_, self.gsp_signature_sysmem = System.alloc_sysmem(len(signature), contiguous=True, data=signature)
def init_boot_binary_image(self):
self.booter_image = self.nvdev.extract_fw("kgspBinArchiveGspRmBoot", "ucode_image_prod_data")

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
import ctypes, time, functools, re, gzip, struct
from tinygrad.helpers import getenv, DEBUG, fetch, getbits, to_mv
from tinygrad.helpers import getenv, DEBUG, fetch, getbits
from tinygrad.runtime.support.hcq import MMIOInterface
from tinygrad.runtime.support.memory import TLSFAllocator, MemoryManager
from tinygrad.runtime.support.nv.ip import NV_FLCN, NV_FLCN_COT, NV_GSP
@@ -137,9 +137,9 @@ class NVDev(PCIDevImplBase):
self.large_bar = self.vram.nbytes >= self.vram_size
def _alloc_boot_struct(self, struct:ctypes.Structure) -> tuple[ctypes.Structure, int]:
va, paddrs = System.alloc_sysmem(sz:=ctypes.sizeof(type(struct)), contiguous=True)
to_mv(va, sz)[:] = bytes(struct)
return type(struct).from_address(va), paddrs[0]
view, paddrs = System.alloc_sysmem(sz:=ctypes.sizeof(type(struct)), contiguous=True)
view[:sz] = bytes(struct)
return type(struct).from_address(view.addr), paddrs[0]
def _download(self, file:str) -> str:
url = f"https://raw.githubusercontent.com/NVIDIA/open-gpu-kernel-modules/8ec351aeb96a93a4bb69ccc12a542bf8a8df2b6f/{file}"

View File

@@ -60,13 +60,13 @@ class _System:
self.pagemap.seek(vaddr // mmap.PAGESIZE * 8)
return [(x & ((1<<55) - 1)) * mmap.PAGESIZE for x in array.array('Q', self.pagemap.read(size//mmap.PAGESIZE*8, binary=True))]
def alloc_sysmem(self, size:int, vaddr:int=0, contiguous:bool=False, data:bytes|None=None) -> tuple[int, list[int]]:
def alloc_sysmem(self, size:int, vaddr:int=0, contiguous:bool=False, data:bytes|None=None) -> tuple[MMIOInterface, list[int]]:
assert not contiguous or size <= (2 << 20), "Contiguous allocation is only supported for sizes up to 2MB"
flags = (libc.MAP_HUGETLB if contiguous and (size:=round_up(size, mmap.PAGESIZE)) > 0x1000 else 0) | (MAP_FIXED if vaddr else 0)
va = FileIOInterface.anon_mmap(vaddr, size, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED|mmap.MAP_ANONYMOUS|MAP_POPULATE|MAP_LOCKED|flags, 0)
if data is not None: to_mv(va, len(data))[:] = data
return va, self.system_paddrs(va, size)
return MMIOInterface(va, size), self.system_paddrs(va, size)
def pci_reset(self, gpu): os.system(f"sudo sh -c 'echo 1 > /sys/bus/pci/devices/{gpu}/reset'")
def pci_scan_bus(self, target_vendor:int, target_devices:list[int]) -> list[str]: