_gpu_alloc -> allocator.alloc (#6189)

* _gpu_alloc -> allocator.alloc

* not needed this import

* pylint
This commit is contained in:
nimlgen
2024-08-19 23:34:22 +03:00
committed by GitHub
parent 96d502d8b7
commit bc44e6501b
2 changed files with 6 additions and 8 deletions

View File

@@ -1,5 +1,5 @@
from __future__ import annotations
from typing import Tuple, List, Any, cast
from typing import Tuple, List, Any
import os, ctypes, ctypes.util, functools, pathlib, mmap, errno, time, array, contextlib, decimal
from dataclasses import dataclass
from tinygrad.device import HCQCompiled, HCQAllocator, HCQBuffer, HWComputeQueue, HWCopyQueue, HCQArgsState, \
@@ -61,8 +61,7 @@ class AMDComputeQueue(HWComputeQueue):
def __del__(self):
if self.binded_device is not None:
self.binded_device.synchronize()
self.binded_device._gpu_free(self.hw_page)
self.binded_device.allocator.free(self.hw_page, self.hw_page.size, BufferOptions(cpu_access=True, nolru=True, uncached=True))
def _acquire_mem(self, addr=0x0, sz=(1 << 64)-1, gli=1, glm=1, glk=1, glv=1, gl1=1, gl2=1):
self.q += [amd_gpu.PACKET3(amd_gpu.PACKET3_ACQUIRE_MEM, 6), 0, *data64_le(sz), *data64_le(addr), 0,
@@ -161,7 +160,7 @@ class AMDComputeQueue(HWComputeQueue):
def bind(self, device):
self.binded_device = device
self.hw_page = cast(AMDDevice, device)._gpu_alloc(len(self.q) * 4, kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT, uncached=True)
self.hw_page = device.allocator.alloc(len(self.q) * 4, BufferOptions(cpu_access=True, nolru=True, uncached=True))
hw_view = to_mv(self.hw_page.va_addr, self.hw_page.size).cast("I")
for i, value in enumerate(self.q): hw_view[i] = value
@@ -303,6 +302,7 @@ class AMDAllocator(HCQAllocator):
def _alloc(self, size:int, options:BufferOptions) -> HCQBuffer:
if options.host: return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, public=True)
if options.cpu_access and options.uncached: return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT, uncached=True)
return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM, public=options.cpu_access)
def _free(self, opaque, options:BufferOptions):

View File

@@ -84,9 +84,7 @@ class NVSignal(HCQSignal):
class NVCommandQueue(HWCommandQueue): # pylint: disable=abstract-method
def __del__(self):
if self.binded_device is not None:
self.binded_device.synchronize() # Synchronize to ensure the buffer is no longer in use.
self.binded_device._gpu_free(self.hw_page)
if self.binded_device is not None: self.binded_device.allocator.free(self.hw_page, self.hw_page.size, BufferOptions(cpu_access=True, nolru=True))
@hcq_command
def setup(self, compute_class=None, copy_class=None, local_mem_window=None, shared_mem_window=None, local_mem=None, local_mem_tpc_bytes=None):
@@ -109,7 +107,7 @@ class NVCommandQueue(HWCommandQueue): # pylint: disable=abstract-method
def bind(self, device):
self.binded_device = device
self.hw_page = cast(NVDevice, device)._gpu_alloc(len(self.q) * 4, map_to_cpu=True)
self.hw_page = device.allocator.alloc(len(self.q) * 4, BufferOptions(cpu_access=True, nolru=True))
hw_view = to_mv(self.hw_page.va_addr, self.hw_page.size).cast("I")
for i, value in enumerate(self.q): hw_view[i] = value