mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-24 14:28:09 -05:00
nv: rename iface mem functions (#10931)
This commit is contained in:
@@ -276,15 +276,15 @@ class NVProgram(HCQProgram):
|
||||
|
||||
class NVAllocator(HCQAllocator['NVDevice']):
|
||||
def _alloc(self, size:int, options:BufferSpec) -> HCQBuffer:
|
||||
return self.dev.iface._gpu_alloc(size, cpu_access=options.cpu_access, host=options.host)
|
||||
return self.dev.iface.alloc(size, cpu_access=options.cpu_access, host=options.host)
|
||||
|
||||
def _free(self, opaque:HCQBuffer, options:BufferSpec):
|
||||
try:
|
||||
self.dev.synchronize()
|
||||
self.dev.iface._gpu_free(opaque)
|
||||
self.dev.iface.free(opaque)
|
||||
except AttributeError: pass
|
||||
|
||||
def map(self, buf:HCQBuffer): self.dev.iface._gpu_map(buf._base if buf._base is not None else buf)
|
||||
def map(self, buf:HCQBuffer): self.dev.iface.map(buf._base if buf._base is not None else buf)
|
||||
|
||||
@dataclass
|
||||
class GPFifo:
|
||||
@@ -385,7 +385,7 @@ class NVKIface:
|
||||
if made.params.status != 0: raise RuntimeError(f"_gpu_map_to_cpu returned {get_error_str(made.params.status)}")
|
||||
return fd_dev.mmap(target, size, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED | (MAP_FIXED if target is not None else 0), 0)
|
||||
|
||||
def _gpu_alloc(self, size:int, host=False, uncached=False, cpu_access=False, contiguous=False, map_flags=0) -> HCQBuffer:
|
||||
def alloc(self, size:int, host=False, uncached=False, cpu_access=False, contiguous=False, map_flags=0) -> HCQBuffer:
|
||||
# Uncached memory is "system". Use huge pages only for gpu memory.
|
||||
page_size = (4 << (12 if OSX else 10)) if uncached or host else ((2 << 20) if size >= (8 << 20) else (4 << (12 if OSX else 10)))
|
||||
size = round_up(size, page_size)
|
||||
@@ -423,7 +423,7 @@ class NVKIface:
|
||||
|
||||
return self._gpu_uvm_map(va_addr, size, mem_handle, has_cpu_mapping=cpu_access or host)
|
||||
|
||||
def _gpu_free(self, mem:HCQBuffer):
|
||||
def free(self, mem:HCQBuffer):
|
||||
if mem.meta.hMemory > NVKIface.host_object_enumerator: # not a host object, clear phys mem.
|
||||
made = nv_gpu.NVOS00_PARAMETERS(hRoot=self.root, hObjectParent=self.dev.nvdevice, hObjectOld=mem.meta.hMemory)
|
||||
nv_iowr(self.fd_ctl, nv_gpu.NV_ESC_RM_FREE, made)
|
||||
@@ -442,7 +442,7 @@ class NVKIface:
|
||||
mapped_gpu_ids=[self.gpu_uuid], has_cpu_mapping=has_cpu_mapping),
|
||||
view=MMIOInterface(va_base, size, fmt='B') if has_cpu_mapping else None)
|
||||
|
||||
def _gpu_map(self, mem:HCQBuffer):
|
||||
def map(self, mem:HCQBuffer):
|
||||
if self.gpu_uuid in mem.meta.mapped_gpu_ids: return
|
||||
mem.meta.mapped_gpu_ids.append(self.gpu_uuid)
|
||||
self._gpu_uvm_map(mem.va_addr, mem.size, mem.meta.hMemory, create_range=False)
|
||||
@@ -478,7 +478,7 @@ class NVDevice(HCQCompiled[NVSignal]):
|
||||
channel_params = nv_gpu.NV_CHANNEL_GROUP_ALLOCATION_PARAMETERS(engineType=nv_gpu.NV2080_ENGINE_TYPE_GRAPHICS)
|
||||
channel_group = self.iface.rm_alloc(self.nvdevice, nv_gpu.KEPLER_CHANNEL_GROUP_A, channel_params)
|
||||
|
||||
gpfifo_area = self.iface._gpu_alloc(0x200000, contiguous=True, cpu_access=True, map_flags=0x10d0000)
|
||||
gpfifo_area = self.iface.alloc(0x200000, contiguous=True, cpu_access=True, map_flags=0x10d0000)
|
||||
|
||||
ctxshare_params = nv_gpu.NV_CTXSHARE_ALLOCATION_PARAMETERS(hVASpace=vaspace, flags=nv_gpu.NV_CTXSHARE_ALLOCATION_FLAGS_SUBCONTEXT_ASYNC)
|
||||
ctxshare = self.iface.rm_alloc(channel_group, nv_gpu.FERMI_CONTEXT_SHARE_A, ctxshare_params)
|
||||
@@ -487,7 +487,7 @@ class NVDevice(HCQCompiled[NVSignal]):
|
||||
self.dma_gpfifo = self._new_gpu_fifo(gpfifo_area, ctxshare, channel_group, offset=0x100000, entries=0x10000, compute=False)
|
||||
self.iface.rm_control(channel_group, nv_gpu.NVA06C_CTRL_CMD_GPFIFO_SCHEDULE, nv_gpu.NVA06C_CTRL_GPFIFO_SCHEDULE_PARAMS(bEnable=1))
|
||||
|
||||
self.cmdq_page:HCQBuffer = self.iface._gpu_alloc(0x200000, cpu_access=True)
|
||||
self.cmdq_page:HCQBuffer = self.iface.alloc(0x200000, cpu_access=True)
|
||||
self.cmdq_allocator = BumpAllocator(size=self.cmdq_page.size, base=cast(int, self.cmdq_page.va_addr), wrap=True)
|
||||
self.cmdq = MMIOInterface(cast(int, self.cmdq_page.va_addr), 0x200000, fmt='I')
|
||||
|
||||
@@ -505,7 +505,7 @@ class NVDevice(HCQCompiled[NVSignal]):
|
||||
self._setup_gpfifos()
|
||||
|
||||
def _new_gpu_fifo(self, gpfifo_area, ctxshare, channel_group, offset=0, entries=0x400, compute=False) -> GPFifo:
|
||||
notifier = self.iface._gpu_alloc(48 << 20, uncached=True)
|
||||
notifier = self.iface.alloc(48 << 20, uncached=True)
|
||||
params = nv_gpu.NV_CHANNELGPFIFO_ALLOCATION_PARAMETERS(hObjectError=notifier.meta.hMemory, hObjectBuffer=gpfifo_area.meta.hMemory,
|
||||
gpFifoOffset=gpfifo_area.va_addr+offset, gpFifoEntries=entries, hContextShare=ctxshare,
|
||||
hUserdMemory=(ctypes.c_uint32*8)(gpfifo_area.meta.hMemory), userdOffset=(ctypes.c_uint64*8)(entries*8+offset))
|
||||
|
||||
@@ -105,7 +105,7 @@ class VirtMapping: va_addr:int; size:int; paddrs:list[tuple[int, int]]; uncached
|
||||
class PageTableTraverseContext:
|
||||
def __init__(self, dev, pt, vaddr, create_pts=False, free_pts=False, boot=False):
|
||||
self.dev, self.vaddr, self.create_pts, self.free_pts, self.boot = dev, vaddr - dev.mm.va_base, create_pts, free_pts, boot
|
||||
self.pt_stack:list[tuple[Any, int, int]] = [(pt, self._pt_pte_idx(pt, vaddr), self._pt_pte_size(pt))]
|
||||
self.pt_stack:list[tuple[Any, int, int]] = [(pt, self._pt_pte_idx(pt, self.vaddr), self._pt_pte_size(pt))]
|
||||
|
||||
def _pt_pte_cnt(self, lv): return self.dev.mm.pte_cnt[lv]
|
||||
def _pt_pte_size(self, pt): return self.dev.mm.pte_covers[pt.lv]
|
||||
|
||||
Reference in New Issue
Block a user