mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
system: alloc_sysmem is part of interface (#14226)
This commit is contained in:
@@ -150,7 +150,7 @@ class NV_FLCN(NV_IP):
|
||||
patched_image[(cmd_off:=self.desc_v3.IMEMLoadSize+dmem.cmd_in_buffer_offset) : cmd_off+len(cmd)] = cmd
|
||||
patched_image[(sig_off:=self.desc_v3.IMEMLoadSize+self.desc_v3.PKCDataOffset) : sig_off+0x180] = signature[-0x180:]
|
||||
|
||||
return System.alloc_sysmem(len(patched_image), contiguous=True, data=patched_image)
|
||||
return self.nvdev._alloc_sysmem(len(patched_image), contiguous=True, data=patched_image)
|
||||
|
||||
_, self.frts_image_sysmem = __patch(0x15, bytes(frts_cmd))
|
||||
|
||||
@@ -163,7 +163,7 @@ class NV_FLCN(NV_IP):
|
||||
|
||||
patched_image = bytearray(image)
|
||||
patched_image[patch_loc:patch_loc+sig_len] = sig[:sig_len]
|
||||
_, self.booter_image_sysmem = System.alloc_sysmem(len(patched_image), contiguous=True, data=patched_image)
|
||||
_, self.booter_image_sysmem = self.nvdev._alloc_sysmem(len(patched_image), contiguous=True, data=patched_image)
|
||||
_, _, self.booter_data_off, self.booter_data_sz, _, self.booter_code_off, self.booter_code_sz, _, _ = struct.unpack("9I", header)
|
||||
|
||||
def init_hw(self):
|
||||
@@ -287,7 +287,7 @@ class NV_FLCN_COT(NV_IP):
|
||||
self.fmc_booter_hash = memoryview(self.nvdev.extract_fw("kgspBinArchiveGspRmFmcGfwProdSigned", "ucode_hash_data")).cast('I')
|
||||
self.fmc_booter_sig = memoryview(self.nvdev.extract_fw("kgspBinArchiveGspRmFmcGfwProdSigned", "ucode_sig_data")).cast('I')
|
||||
self.fmc_booter_pkey = memoryview(self.nvdev.extract_fw("kgspBinArchiveGspRmFmcGfwProdSigned", "ucode_pkey_data") + b'\x00\x00\x00').cast('I')
|
||||
_, self.fmc_booter_sysmem = System.alloc_sysmem(len(self.fmc_booter_image), contiguous=True, data=self.fmc_booter_image)
|
||||
_, self.fmc_booter_sysmem = self.nvdev._alloc_sysmem(len(self.fmc_booter_image), contiguous=True, data=self.fmc_booter_image)
|
||||
|
||||
def init_hw(self):
|
||||
self.falcon = 0x00110000
|
||||
@@ -344,7 +344,7 @@ class NV_GSP(NV_IP):
|
||||
# Alloc queues
|
||||
pte_cnt = ((queue_pte_cnt:=(queue_size * 2) // 0x1000)) + round_up(queue_pte_cnt * 8, 0x1000) // 0x1000
|
||||
pt_size = round_up(pte_cnt * 8, 0x1000)
|
||||
queues_view, queues_sysmem = System.alloc_sysmem(pt_size + queue_size * 2, contiguous=False)
|
||||
queues_view, queues_sysmem = self.nvdev._alloc_sysmem(pt_size + queue_size * 2, contiguous=False)
|
||||
|
||||
# Fill up ptes
|
||||
for i, sysmem in enumerate(queues_sysmem): queues_view.view(i * 0x8, 0x8, fmt='Q')[0] = sysmem
|
||||
@@ -364,8 +364,8 @@ class NV_GSP(NV_IP):
|
||||
self.cmd_q = NVRpcQueue(self, self.cmd_q_va, None)
|
||||
|
||||
def init_libos_args(self):
|
||||
_, logbuf_sysmem = System.alloc_sysmem((2 << 20), contiguous=True)
|
||||
libos_args_view, self.libos_args_sysmem = System.alloc_sysmem(0x1000, contiguous=True)
|
||||
_, logbuf_sysmem = self.nvdev._alloc_sysmem((2 << 20), contiguous=True)
|
||||
libos_args_view, self.libos_args_sysmem = self.nvdev._alloc_sysmem(0x1000, contiguous=True)
|
||||
|
||||
libos_structs = (nv.LibosMemoryRegionInitArgument * 6).from_address(libos_args_view.addr)
|
||||
for i, name in enumerate(["INIT", "INTR", "RM", "MNOC", "KRNL"]):
|
||||
@@ -387,7 +387,7 @@ class NV_GSP(NV_IP):
|
||||
for i in range(3, 0, -1): npages[i-1] = ((npages[i] - 1) >> (nv.LIBOS_MEMORY_REGION_RADIX_PAGE_LOG2 - 3)) + 1
|
||||
|
||||
offsets = [sum(npages[:i]) * 0x1000 for i in range(4)]
|
||||
radix_view, self.gsp_radix3_sysmem = System.alloc_sysmem(offsets[-1] + len(self.gsp_image), contiguous=False)
|
||||
radix_view, self.gsp_radix3_sysmem = self.nvdev._alloc_sysmem(offsets[-1] + len(self.gsp_image), contiguous=False)
|
||||
|
||||
# Copy image
|
||||
radix_view.view(offsets[-1], len(self.gsp_image))[:] = self.gsp_image
|
||||
@@ -398,12 +398,12 @@ class NV_GSP(NV_IP):
|
||||
radix_view.view(offsets[i], npages[i+1] * 8, fmt='Q')[:] = array.array('Q', self.gsp_radix3_sysmem[cur_offset:cur_offset+npages[i+1]])
|
||||
|
||||
# Copy signature
|
||||
_, self.gsp_signature_sysmem = System.alloc_sysmem(len(signature), contiguous=True, data=signature)
|
||||
_, self.gsp_signature_sysmem = self.nvdev._alloc_sysmem(len(signature), contiguous=True, data=signature)
|
||||
|
||||
def init_boot_binary_image(self):
|
||||
self.booter_image = self.nvdev.extract_fw("kgspBinArchiveGspRmBoot", "ucode_image_prod_data")
|
||||
self.booter_desc = nv.RM_RISCV_UCODE_DESC.from_buffer_copy(self.nvdev.extract_fw("kgspBinArchiveGspRmBoot", "ucode_desc_prod_data"))
|
||||
_, self.booter_sysmem = System.alloc_sysmem(len(self.booter_image), contiguous=True, data=self.booter_image)
|
||||
_, self.booter_sysmem = self.nvdev._alloc_sysmem(len(self.booter_image), contiguous=True, data=self.booter_image)
|
||||
|
||||
def init_wpr_meta(self):
|
||||
self.init_gsp_image()
|
||||
@@ -499,7 +499,7 @@ class NV_GSP(NV_IP):
|
||||
params.ramfcMem = nv_gpu.NV_MEMORY_DESC_PARAMS(base=ramfc_alloc.paddrs[0][0], size=0x200, addressSpace=2, cacheAttrib=0)
|
||||
params.instanceMem = nv_gpu.NV_MEMORY_DESC_PARAMS(base=ramfc_alloc.paddrs[0][0], size=0x1000, addressSpace=2, cacheAttrib=0)
|
||||
|
||||
_, method_sysmem = System.alloc_sysmem(0x5000, contiguous=True)
|
||||
_, method_sysmem = self.nvdev._alloc_sysmem(0x5000, contiguous=True)
|
||||
params.mthdbufMem = nv_gpu.NV_MEMORY_DESC_PARAMS(base=method_sysmem[0], size=0x5000, addressSpace=1, cacheAttrib=0)
|
||||
|
||||
if client is not None and client != self.priv_root and params.hObjectError != 0:
|
||||
|
||||
@@ -3,7 +3,7 @@ import ctypes, time, functools, re, gzip, struct
|
||||
from tinygrad.helpers import getenv, DEBUG, fetch, getbits
|
||||
from tinygrad.runtime.support.memory import TLSFAllocator, MemoryManager, AddrSpace
|
||||
from tinygrad.runtime.support.nv.ip import NV_FLCN, NV_FLCN_COT, NV_GSP
|
||||
from tinygrad.runtime.support.system import System, PCIDevice, PCIDevImplBase
|
||||
from tinygrad.runtime.support.system import PCIDevice, PCIDevImplBase, MMIOInterface
|
||||
|
||||
NV_DEBUG = getenv("NV_DEBUG", 0)
|
||||
|
||||
@@ -140,8 +140,13 @@ class NVDev(PCIDevImplBase):
|
||||
self.mm = NVMemoryManager(self, self.vram_size, boot_size=(2 << 20), pt_t=NVPageTableEntry, va_bits=bits, va_shifts=shifts, va_base=0,
|
||||
palloc_ranges=[(x, x) for x in [512 << 20, 2 << 20, 4 << 10]], reserve_ptable=not self.large_bar)
|
||||
|
||||
def _alloc_sysmem(self, size:int, vaddr:int=0, contiguous:bool=False, data:bytes|None=None) -> tuple[MMIOInterface, list[int]]:
|
||||
view, paddrs = self.pci_dev.alloc_sysmem(size, vaddr, contiguous=contiguous)
|
||||
if data is not None: view[:size] = data
|
||||
return view, paddrs
|
||||
|
||||
def _alloc_boot_struct(self, struct:ctypes.Structure) -> tuple[ctypes.Structure, int]:
|
||||
view, paddrs = System.alloc_sysmem(sz:=ctypes.sizeof(type(struct)), contiguous=True)
|
||||
view, paddrs = self._alloc_sysmem(sz:=ctypes.sizeof(type(struct)), contiguous=True)
|
||||
view[:sz] = bytes(struct)
|
||||
return type(struct).from_address(view.addr), paddrs[0]
|
||||
|
||||
|
||||
@@ -79,20 +79,6 @@ class _System:
|
||||
self.pagemap.seek(vaddr // mmap.PAGESIZE * 8)
|
||||
return [(x & ((1<<55) - 1)) * mmap.PAGESIZE for x in array.array('Q', self.pagemap.read(size//mmap.PAGESIZE*8, binary=True))]
|
||||
|
||||
def alloc_sysmem(self, size:int, vaddr:int=0, contiguous:bool=False, data:bytes|None=None) -> tuple[MMIOInterface, list[int]]:
|
||||
if OSX:
|
||||
sysmem_view = System.iokit_pci_memmap(round_up(size, mmap.PAGESIZE))
|
||||
paddrs = list(itertools.takewhile(lambda p: p[1] != 0, zip(sysmem_view.view(fmt='Q')[0::2], sysmem_view.view(fmt='Q')[1::2])))
|
||||
assert not contiguous or len(paddrs) == 1, "not contiguous, but required"
|
||||
else:
|
||||
assert not contiguous or size <= (2 << 20), "Contiguous allocation is only supported for sizes up to 2MB"
|
||||
flags = (libc.MAP_HUGETLB if contiguous and (size:=round_up(size, mmap.PAGESIZE)) > mmap.PAGESIZE else 0) | (MAP_FIXED if vaddr else 0)
|
||||
va = FileIOInterface.anon_mmap(vaddr, size, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED|mmap.MAP_ANONYMOUS|MAP_POPULATE|MAP_LOCKED|flags, 0)
|
||||
sysmem_view, paddrs = MMIOInterface(va, size), [(x, mmap.PAGESIZE) for x in self.system_paddrs(va, size)]
|
||||
|
||||
if data is not None: sysmem_view[:len(data)] = data
|
||||
return sysmem_view, [p + i for p, sz in paddrs for i in range(0, sz, 0x1000)][:ceildiv(size, 0x1000)]
|
||||
|
||||
def pci_scan_bus(self, target_vendor:int, target_devices:list[tuple[int, list[int]]], base_class:int|None=None) -> list[str]:
|
||||
result = []
|
||||
for pcibus in FileIOInterface("/sys/bus/pci/devices").listdir():
|
||||
@@ -210,6 +196,12 @@ class PCIDevice:
|
||||
res = FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource", os.O_RDONLY).read().splitlines()
|
||||
self.bar_info = {j:PCIBarInfo(int(s,16), int(e,16)-int(s,16)+1) for j,(s,e,_) in enumerate(l.split() for l in res)}
|
||||
|
||||
def alloc_sysmem(self, size:int, vaddr:int=0, contiguous:bool=False) -> tuple[MMIOInterface, list[int]]:
|
||||
assert not contiguous or size <= (2 << 20), "Contiguous allocation is only supported for sizes up to 2MB"
|
||||
flags = (libc.MAP_HUGETLB if contiguous and (size:=round_up(size, mmap.PAGESIZE)) > mmap.PAGESIZE else 0) | (MAP_FIXED if vaddr else 0)
|
||||
va = FileIOInterface.anon_mmap(vaddr, size, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED|mmap.MAP_ANONYMOUS|MAP_POPULATE|MAP_LOCKED|flags, 0)
|
||||
sysmem_view, paddrs = MMIOInterface(va, size), [(x, mmap.PAGESIZE) for x in System.system_paddrs(va, size)]
|
||||
return sysmem_view, [p + i for p, sz in paddrs for i in range(0, sz, 0x1000)][:ceildiv(size, 0x1000)]
|
||||
def read_config(self, offset:int, size:int): return int.from_bytes(self.cfg_fd.read(size, binary=True, offset=offset), byteorder='little')
|
||||
def write_config(self, offset:int, value:int, size:int): self.cfg_fd.write(value.to_bytes(size, byteorder='little'), binary=True, offset=offset)
|
||||
def map_bar(self, bar:int, off:int=0, addr:int=0, size:int|None=None, fmt='B') -> MMIOInterface:
|
||||
@@ -223,6 +215,11 @@ class APLPCIDevice(PCIDevice):
|
||||
self.lock_fd = System.flock_acquire(f"{devpref.lower()}_{pcibus.lower()}.lock")
|
||||
self.pcibus, self.bars = pcibus, {b: System.iokit_pci_memmap(b) for b in bars}
|
||||
self.bar_info = {b:PCIBarInfo(0, self.bars[b].nbytes-1 if b in self.bars else 0) for b in range(6)} # NOTE: fake bar info for nv.
|
||||
def alloc_sysmem(self, size:int, vaddr:int=0, contiguous:bool=False) -> tuple[MMIOInterface, list[int]]:
|
||||
sysmem_view = System.iokit_pci_memmap(round_up(size, mmap.PAGESIZE))
|
||||
paddrs = list(itertools.takewhile(lambda p: p[1] != 0, zip(sysmem_view.view(fmt='Q')[0::2], sysmem_view.view(fmt='Q')[1::2])))
|
||||
assert not contiguous or len(paddrs) == 1, "not contiguous, but required"
|
||||
return sysmem_view, [p + i for p, sz in paddrs for i in range(0, sz, 0x1000)][:ceildiv(size, 0x1000)]
|
||||
def map_bar(self, bar:int, off:int=0, addr:int=0, size:int|None=None, fmt='B') -> MMIOInterface: return self.bars[bar].view(off, size, fmt)
|
||||
def read_config(self, offset:int, size:int): return System.iokit_pci_rpc(__TinyGPURPCReadCfg:=0, offset, size)[0]
|
||||
def write_config(self, offset:int, value:int, size:int): System.iokit_pci_rpc(__TinyGPURPCWriteCfg:=1, offset, size, value)
|
||||
@@ -261,7 +258,7 @@ class LNXPCIIfaceBase:
|
||||
should_use_sysmem = host or ((cpu_access if OSX else (uncached and cpu_access)) and not force_devmem)
|
||||
if should_use_sysmem:
|
||||
vaddr = self.dev_impl.mm.alloc_vaddr(size:=round_up(size, mmap.PAGESIZE), align=mmap.PAGESIZE)
|
||||
memview, paddrs = System.alloc_sysmem(size, vaddr=vaddr, contiguous=contiguous)
|
||||
memview, paddrs = self.pci_dev.alloc_sysmem(size, vaddr=vaddr, contiguous=contiguous)
|
||||
mapping = self.dev_impl.mm.map_range(vaddr, size, [(paddr, 0x1000) for paddr in paddrs], aspace=AddrSpace.SYS, snooped=True, uncached=True)
|
||||
return HCQBuffer(vaddr, size, meta=PCIAllocationMeta(mapping, has_cpu_mapping=True, hMemory=paddrs[0]), view=memview, owner=self.dev)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user