mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-10 23:48:01 -05:00
system: alloc_sysmem return view (#12724)
* system: alloc_sysmem return view * e
This commit is contained in:
@@ -139,7 +139,7 @@ class NV_FLCN(NV_IP):
|
||||
|
||||
return System.alloc_sysmem(len(patched_image), contiguous=True, data=patched_image)
|
||||
|
||||
self.frts_image_va, self.frts_image_sysmem = __patch(0x15, bytes(frts_cmd))
|
||||
_, self.frts_image_sysmem = __patch(0x15, bytes(frts_cmd))
|
||||
|
||||
def prep_booter(self):
|
||||
image = self.nvdev.extract_fw("kgspBinArchiveBooterLoadUcode", "image_prod_data")
|
||||
@@ -150,7 +150,7 @@ class NV_FLCN(NV_IP):
|
||||
|
||||
patched_image = bytearray(image)
|
||||
patched_image[patch_loc:patch_loc+sig_len] = sig[:sig_len]
|
||||
self.booter_image_va, self.booter_image_sysmem = System.alloc_sysmem(len(patched_image), contiguous=True, data=patched_image)
|
||||
_, self.booter_image_sysmem = System.alloc_sysmem(len(patched_image), contiguous=True, data=patched_image)
|
||||
_, _, self.booter_data_off, self.booter_data_sz, _, self.booter_code_off, self.booter_code_sz, _, _ = struct.unpack("9I", header)
|
||||
|
||||
def init_hw(self):
|
||||
@@ -327,10 +327,10 @@ class NV_GSP(NV_IP):
|
||||
# Alloc queues
|
||||
pte_cnt = ((queue_pte_cnt:=(queue_size * 2) // 0x1000)) + round_up(queue_pte_cnt * 8, 0x1000) // 0x1000
|
||||
pt_size = round_up(pte_cnt * 8, 0x1000)
|
||||
queues_va, queues_sysmem = System.alloc_sysmem(pt_size + queue_size * 2, contiguous=False)
|
||||
queues_view, queues_sysmem = System.alloc_sysmem(pt_size + queue_size * 2, contiguous=False)
|
||||
|
||||
# Fill up ptes
|
||||
for i, sysmem in enumerate(queues_sysmem): to_mv(queues_va + i * 0x8, 0x8).cast('Q')[0] = sysmem
|
||||
for i, sysmem in enumerate(queues_sysmem): queues_view.view(i * 0x8, 0x8, fmt='Q')[0] = sysmem
|
||||
|
||||
# Fill up arguments
|
||||
queue_args = nv.MESSAGE_QUEUE_INIT_ARGUMENTS(sharedMemPhysAddr=queues_sysmem[0], pageTableEntryCount=pte_cnt, cmdQueueOffset=pt_size,
|
||||
@@ -338,7 +338,7 @@ class NV_GSP(NV_IP):
|
||||
_, self.rm_args_sysmem = self.nvdev._alloc_boot_struct(nv.GSP_ARGUMENTS_CACHED(bDmemStack=True, messageQueueInitArguments=queue_args))
|
||||
|
||||
# Build command queue header
|
||||
self.cmd_q_va, self.stat_q_va = queues_va + pt_size, queues_va + pt_size + queue_size
|
||||
self.cmd_q_va, self.stat_q_va = queues_view.addr + pt_size, queues_view.addr + pt_size + queue_size
|
||||
|
||||
cmd_q_tx = nv.msgqTxHeader(version=0, size=queue_size, entryOff=0x1000, msgSize=0x1000, msgCount=(queue_size - 0x1000) // 0x1000,
|
||||
writePtr=0, flags=1, rxHdrOff=ctypes.sizeof(nv.msgqTxHeader))
|
||||
@@ -348,9 +348,9 @@ class NV_GSP(NV_IP):
|
||||
|
||||
def init_libos_args(self):
|
||||
_, logbuf_sysmem = System.alloc_sysmem((2 << 20), contiguous=True)
|
||||
libos_args_va, self.libos_args_sysmem = System.alloc_sysmem(0x1000, contiguous=True)
|
||||
libos_args_view, self.libos_args_sysmem = System.alloc_sysmem(0x1000, contiguous=True)
|
||||
|
||||
libos_structs = (nv.LibosMemoryRegionInitArgument * 6).from_address(libos_args_va)
|
||||
libos_structs = (nv.LibosMemoryRegionInitArgument * 6).from_address(libos_args_view.addr)
|
||||
for i, name in enumerate(["INIT", "INTR", "RM", "MNOC", "KRNL"]):
|
||||
libos_structs[i] = nv.LibosMemoryRegionInitArgument(kind=nv.LIBOS_MEMORY_REGION_CONTIGUOUS, loc=nv.LIBOS_MEMORY_REGION_LOC_SYSMEM, size=0x10000,
|
||||
id8=int.from_bytes(bytes(f"LOG{name}", 'utf-8'), 'big'), pa=logbuf_sysmem[0] + 0x10000 * i)
|
||||
@@ -370,18 +370,18 @@ class NV_GSP(NV_IP):
|
||||
for i in range(3, 0, -1): npages[i-1] = ((npages[i] - 1) >> (nv.LIBOS_MEMORY_REGION_RADIX_PAGE_LOG2 - 3)) + 1
|
||||
|
||||
offsets = [sum(npages[:i]) * 0x1000 for i in range(4)]
|
||||
radix_va, self.gsp_radix3_sysmem = System.alloc_sysmem(offsets[-1] + len(self.gsp_image), contiguous=False)
|
||||
radix_view, self.gsp_radix3_sysmem = System.alloc_sysmem(offsets[-1] + len(self.gsp_image), contiguous=False)
|
||||
|
||||
# Copy image
|
||||
to_mv(radix_va + offsets[-1], len(self.gsp_image))[:] = self.gsp_image
|
||||
radix_view.view(offsets[-1], len(self.gsp_image))[:] = self.gsp_image
|
||||
|
||||
# Copy level and image pages.
|
||||
for i in range(0, 3):
|
||||
cur_offset = sum(npages[:i+1])
|
||||
to_mv(radix_va + offsets[i], npages[i+1] * 8).cast('Q')[:] = array.array('Q', self.gsp_radix3_sysmem[cur_offset:cur_offset+npages[i+1]])
|
||||
radix_view.view(offsets[i], npages[i+1] * 8, fmt='Q')[:] = array.array('Q', self.gsp_radix3_sysmem[cur_offset:cur_offset+npages[i+1]])
|
||||
|
||||
# Copy signature
|
||||
self.gsp_signature_va, self.gsp_signature_sysmem = System.alloc_sysmem(len(signature), contiguous=True, data=signature)
|
||||
_, self.gsp_signature_sysmem = System.alloc_sysmem(len(signature), contiguous=True, data=signature)
|
||||
|
||||
def init_boot_binary_image(self):
|
||||
self.booter_image = self.nvdev.extract_fw("kgspBinArchiveGspRmBoot", "ucode_image_prod_data")
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from __future__ import annotations
|
||||
import ctypes, time, functools, re, gzip, struct
|
||||
from tinygrad.helpers import getenv, DEBUG, fetch, getbits, to_mv
|
||||
from tinygrad.helpers import getenv, DEBUG, fetch, getbits
|
||||
from tinygrad.runtime.support.hcq import MMIOInterface
|
||||
from tinygrad.runtime.support.memory import TLSFAllocator, MemoryManager
|
||||
from tinygrad.runtime.support.nv.ip import NV_FLCN, NV_FLCN_COT, NV_GSP
|
||||
@@ -137,9 +137,9 @@ class NVDev(PCIDevImplBase):
|
||||
self.large_bar = self.vram.nbytes >= self.vram_size
|
||||
|
||||
def _alloc_boot_struct(self, struct:ctypes.Structure) -> tuple[ctypes.Structure, int]:
|
||||
va, paddrs = System.alloc_sysmem(sz:=ctypes.sizeof(type(struct)), contiguous=True)
|
||||
to_mv(va, sz)[:] = bytes(struct)
|
||||
return type(struct).from_address(va), paddrs[0]
|
||||
view, paddrs = System.alloc_sysmem(sz:=ctypes.sizeof(type(struct)), contiguous=True)
|
||||
view[:sz] = bytes(struct)
|
||||
return type(struct).from_address(view.addr), paddrs[0]
|
||||
|
||||
def _download(self, file:str) -> str:
|
||||
url = f"https://raw.githubusercontent.com/NVIDIA/open-gpu-kernel-modules/8ec351aeb96a93a4bb69ccc12a542bf8a8df2b6f/{file}"
|
||||
|
||||
@@ -60,13 +60,13 @@ class _System:
|
||||
self.pagemap.seek(vaddr // mmap.PAGESIZE * 8)
|
||||
return [(x & ((1<<55) - 1)) * mmap.PAGESIZE for x in array.array('Q', self.pagemap.read(size//mmap.PAGESIZE*8, binary=True))]
|
||||
|
||||
def alloc_sysmem(self, size:int, vaddr:int=0, contiguous:bool=False, data:bytes|None=None) -> tuple[int, list[int]]:
|
||||
def alloc_sysmem(self, size:int, vaddr:int=0, contiguous:bool=False, data:bytes|None=None) -> tuple[MMIOInterface, list[int]]:
|
||||
assert not contiguous or size <= (2 << 20), "Contiguous allocation is only supported for sizes up to 2MB"
|
||||
flags = (libc.MAP_HUGETLB if contiguous and (size:=round_up(size, mmap.PAGESIZE)) > 0x1000 else 0) | (MAP_FIXED if vaddr else 0)
|
||||
va = FileIOInterface.anon_mmap(vaddr, size, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED|mmap.MAP_ANONYMOUS|MAP_POPULATE|MAP_LOCKED|flags, 0)
|
||||
|
||||
if data is not None: to_mv(va, len(data))[:] = data
|
||||
return va, self.system_paddrs(va, size)
|
||||
return MMIOInterface(va, size), self.system_paddrs(va, size)
|
||||
|
||||
def pci_reset(self, gpu): os.system(f"sudo sh -c 'echo 1 > /sys/bus/pci/devices/{gpu}/reset'")
|
||||
def pci_scan_bus(self, target_vendor:int, target_devices:list[int]) -> list[str]:
|
||||
|
||||
Reference in New Issue
Block a user