mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-24 06:18:01 -05:00
_gpu_alloc -> allocator.alloc (#6189)
* _gpu_alloc -> allocator.alloc * not needed this import * pylint
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
from __future__ import annotations
|
||||
from typing import Tuple, List, Any, cast
|
||||
from typing import Tuple, List, Any
|
||||
import os, ctypes, ctypes.util, functools, pathlib, mmap, errno, time, array, contextlib, decimal
|
||||
from dataclasses import dataclass
|
||||
from tinygrad.device import HCQCompiled, HCQAllocator, HCQBuffer, HWComputeQueue, HWCopyQueue, HCQArgsState, \
|
||||
@@ -61,8 +61,7 @@ class AMDComputeQueue(HWComputeQueue):
|
||||
|
||||
def __del__(self):
|
||||
if self.binded_device is not None:
|
||||
self.binded_device.synchronize()
|
||||
self.binded_device._gpu_free(self.hw_page)
|
||||
self.binded_device.allocator.free(self.hw_page, self.hw_page.size, BufferOptions(cpu_access=True, nolru=True, uncached=True))
|
||||
|
||||
def _acquire_mem(self, addr=0x0, sz=(1 << 64)-1, gli=1, glm=1, glk=1, glv=1, gl1=1, gl2=1):
|
||||
self.q += [amd_gpu.PACKET3(amd_gpu.PACKET3_ACQUIRE_MEM, 6), 0, *data64_le(sz), *data64_le(addr), 0,
|
||||
@@ -161,7 +160,7 @@ class AMDComputeQueue(HWComputeQueue):
|
||||
|
||||
def bind(self, device):
|
||||
self.binded_device = device
|
||||
self.hw_page = cast(AMDDevice, device)._gpu_alloc(len(self.q) * 4, kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT, uncached=True)
|
||||
self.hw_page = device.allocator.alloc(len(self.q) * 4, BufferOptions(cpu_access=True, nolru=True, uncached=True))
|
||||
hw_view = to_mv(self.hw_page.va_addr, self.hw_page.size).cast("I")
|
||||
for i, value in enumerate(self.q): hw_view[i] = value
|
||||
|
||||
@@ -303,6 +302,7 @@ class AMDAllocator(HCQAllocator):
|
||||
|
||||
def _alloc(self, size:int, options:BufferOptions) -> HCQBuffer:
|
||||
if options.host: return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, public=True)
|
||||
if options.cpu_access and options.uncached: return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT, uncached=True)
|
||||
return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM, public=options.cpu_access)
|
||||
|
||||
def _free(self, opaque, options:BufferOptions):
|
||||
|
||||
@@ -84,9 +84,7 @@ class NVSignal(HCQSignal):
|
||||
|
||||
class NVCommandQueue(HWCommandQueue): # pylint: disable=abstract-method
|
||||
def __del__(self):
|
||||
if self.binded_device is not None:
|
||||
self.binded_device.synchronize() # Synchronize to ensure the buffer is no longer in use.
|
||||
self.binded_device._gpu_free(self.hw_page)
|
||||
if self.binded_device is not None: self.binded_device.allocator.free(self.hw_page, self.hw_page.size, BufferOptions(cpu_access=True, nolru=True))
|
||||
|
||||
@hcq_command
|
||||
def setup(self, compute_class=None, copy_class=None, local_mem_window=None, shared_mem_window=None, local_mem=None, local_mem_tpc_bytes=None):
|
||||
@@ -109,7 +107,7 @@ class NVCommandQueue(HWCommandQueue): # pylint: disable=abstract-method
|
||||
|
||||
def bind(self, device):
|
||||
self.binded_device = device
|
||||
self.hw_page = cast(NVDevice, device)._gpu_alloc(len(self.q) * 4, map_to_cpu=True)
|
||||
self.hw_page = device.allocator.alloc(len(self.q) * 4, BufferOptions(cpu_access=True, nolru=True))
|
||||
hw_view = to_mv(self.hw_page.va_addr, self.hw_page.size).cast("I")
|
||||
for i, value in enumerate(self.q): hw_view[i] = value
|
||||
|
||||
|
||||
Reference in New Issue
Block a user