nv: rename iface mem functions (#10931)

This commit is contained in:
nimlgen
2025-06-23 16:34:51 +03:00
committed by GitHub
parent 4e864bd304
commit eceb7a00d2
2 changed files with 10 additions and 10 deletions

View File

@@ -276,15 +276,15 @@ class NVProgram(HCQProgram):
class NVAllocator(HCQAllocator['NVDevice']):
def _alloc(self, size:int, options:BufferSpec) -> HCQBuffer:
return self.dev.iface._gpu_alloc(size, cpu_access=options.cpu_access, host=options.host)
return self.dev.iface.alloc(size, cpu_access=options.cpu_access, host=options.host)
def _free(self, opaque:HCQBuffer, options:BufferSpec):
try:
self.dev.synchronize()
self.dev.iface._gpu_free(opaque)
self.dev.iface.free(opaque)
except AttributeError: pass
def map(self, buf:HCQBuffer): self.dev.iface._gpu_map(buf._base if buf._base is not None else buf)
def map(self, buf:HCQBuffer): self.dev.iface.map(buf._base if buf._base is not None else buf)
@dataclass
class GPFifo:
@@ -385,7 +385,7 @@ class NVKIface:
if made.params.status != 0: raise RuntimeError(f"_gpu_map_to_cpu returned {get_error_str(made.params.status)}")
return fd_dev.mmap(target, size, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED | (MAP_FIXED if target is not None else 0), 0)
def _gpu_alloc(self, size:int, host=False, uncached=False, cpu_access=False, contiguous=False, map_flags=0) -> HCQBuffer:
def alloc(self, size:int, host=False, uncached=False, cpu_access=False, contiguous=False, map_flags=0) -> HCQBuffer:
# Uncached memory is "system". Use huge pages only for gpu memory.
page_size = (4 << (12 if OSX else 10)) if uncached or host else ((2 << 20) if size >= (8 << 20) else (4 << (12 if OSX else 10)))
size = round_up(size, page_size)
@@ -423,7 +423,7 @@ class NVKIface:
return self._gpu_uvm_map(va_addr, size, mem_handle, has_cpu_mapping=cpu_access or host)
def _gpu_free(self, mem:HCQBuffer):
def free(self, mem:HCQBuffer):
if mem.meta.hMemory > NVKIface.host_object_enumerator: # not a host object, clear phys mem.
made = nv_gpu.NVOS00_PARAMETERS(hRoot=self.root, hObjectParent=self.dev.nvdevice, hObjectOld=mem.meta.hMemory)
nv_iowr(self.fd_ctl, nv_gpu.NV_ESC_RM_FREE, made)
@@ -442,7 +442,7 @@ class NVKIface:
mapped_gpu_ids=[self.gpu_uuid], has_cpu_mapping=has_cpu_mapping),
view=MMIOInterface(va_base, size, fmt='B') if has_cpu_mapping else None)
def _gpu_map(self, mem:HCQBuffer):
def map(self, mem:HCQBuffer):
if self.gpu_uuid in mem.meta.mapped_gpu_ids: return
mem.meta.mapped_gpu_ids.append(self.gpu_uuid)
self._gpu_uvm_map(mem.va_addr, mem.size, mem.meta.hMemory, create_range=False)
@@ -478,7 +478,7 @@ class NVDevice(HCQCompiled[NVSignal]):
channel_params = nv_gpu.NV_CHANNEL_GROUP_ALLOCATION_PARAMETERS(engineType=nv_gpu.NV2080_ENGINE_TYPE_GRAPHICS)
channel_group = self.iface.rm_alloc(self.nvdevice, nv_gpu.KEPLER_CHANNEL_GROUP_A, channel_params)
gpfifo_area = self.iface._gpu_alloc(0x200000, contiguous=True, cpu_access=True, map_flags=0x10d0000)
gpfifo_area = self.iface.alloc(0x200000, contiguous=True, cpu_access=True, map_flags=0x10d0000)
ctxshare_params = nv_gpu.NV_CTXSHARE_ALLOCATION_PARAMETERS(hVASpace=vaspace, flags=nv_gpu.NV_CTXSHARE_ALLOCATION_FLAGS_SUBCONTEXT_ASYNC)
ctxshare = self.iface.rm_alloc(channel_group, nv_gpu.FERMI_CONTEXT_SHARE_A, ctxshare_params)
@@ -487,7 +487,7 @@ class NVDevice(HCQCompiled[NVSignal]):
self.dma_gpfifo = self._new_gpu_fifo(gpfifo_area, ctxshare, channel_group, offset=0x100000, entries=0x10000, compute=False)
self.iface.rm_control(channel_group, nv_gpu.NVA06C_CTRL_CMD_GPFIFO_SCHEDULE, nv_gpu.NVA06C_CTRL_GPFIFO_SCHEDULE_PARAMS(bEnable=1))
self.cmdq_page:HCQBuffer = self.iface._gpu_alloc(0x200000, cpu_access=True)
self.cmdq_page:HCQBuffer = self.iface.alloc(0x200000, cpu_access=True)
self.cmdq_allocator = BumpAllocator(size=self.cmdq_page.size, base=cast(int, self.cmdq_page.va_addr), wrap=True)
self.cmdq = MMIOInterface(cast(int, self.cmdq_page.va_addr), 0x200000, fmt='I')
@@ -505,7 +505,7 @@ class NVDevice(HCQCompiled[NVSignal]):
self._setup_gpfifos()
def _new_gpu_fifo(self, gpfifo_area, ctxshare, channel_group, offset=0, entries=0x400, compute=False) -> GPFifo:
notifier = self.iface._gpu_alloc(48 << 20, uncached=True)
notifier = self.iface.alloc(48 << 20, uncached=True)
params = nv_gpu.NV_CHANNELGPFIFO_ALLOCATION_PARAMETERS(hObjectError=notifier.meta.hMemory, hObjectBuffer=gpfifo_area.meta.hMemory,
gpFifoOffset=gpfifo_area.va_addr+offset, gpFifoEntries=entries, hContextShare=ctxshare,
hUserdMemory=(ctypes.c_uint32*8)(gpfifo_area.meta.hMemory), userdOffset=(ctypes.c_uint64*8)(entries*8+offset))

View File

@@ -105,7 +105,7 @@ class VirtMapping: va_addr:int; size:int; paddrs:list[tuple[int, int]]; uncached
class PageTableTraverseContext:
def __init__(self, dev, pt, vaddr, create_pts=False, free_pts=False, boot=False):
self.dev, self.vaddr, self.create_pts, self.free_pts, self.boot = dev, vaddr - dev.mm.va_base, create_pts, free_pts, boot
self.pt_stack:list[tuple[Any, int, int]] = [(pt, self._pt_pte_idx(pt, vaddr), self._pt_pte_size(pt))]
self.pt_stack:list[tuple[Any, int, int]] = [(pt, self._pt_pte_idx(pt, self.vaddr), self._pt_pte_size(pt))]
def _pt_pte_cnt(self, lv): return self.dev.mm.pte_cnt[lv]
def _pt_pte_size(self, pt): return self.dev.mm.pte_covers[pt.lv]