mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
device: call free for external_ptr (#14448)
* device: call free for external_ptr * lin
This commit is contained in:
@@ -147,8 +147,9 @@ class Buffer:
|
||||
def deallocate(self):
|
||||
assert hasattr(self, '_buf'), "buffer must be allocated to deallocate"
|
||||
if DEBUG is not None and DEBUG >= 7: print(f"buffer: deallocate {self.nbytes} bytes on {self.device}")
|
||||
if self._base is None and (self.options is None or self.options.external_ptr is None):
|
||||
if GlobalCounters is not None and not self.device.startswith("DISK"): GlobalCounters.mem_used -= self.nbytes
|
||||
if self._base is None:
|
||||
if GlobalCounters is not None and not self.device.startswith("DISK") and (self.options is None or self.options.external_ptr is None):
|
||||
GlobalCounters.mem_used -= self.nbytes
|
||||
if PROFILE: Buffer.profile_events.append(ProfilePointEvent(self.device, "free", self.trace_num))
|
||||
self.allocator.free(self._buf, self.nbytes, self.options)
|
||||
elif self._base is not None: self._base.allocated_views -= 1
|
||||
@@ -263,7 +264,7 @@ class LRUAllocator(Allocator, Generic[DeviceType]):
|
||||
for opaque in opaques: super().free(opaque, sz, options)
|
||||
opaques.clear()
|
||||
def free(self, opaque:Any, size:int, options:BufferSpec|None=None):
|
||||
if LRU and (options is None or not options.nolru): self.cache[(size, options)].append(opaque)
|
||||
if LRU and (options is None or (not options.nolru and options.external_ptr is None)): self.cache[(size, options)].append(opaque)
|
||||
else: super().free(opaque, size, options)
|
||||
|
||||
# **************** for Compiled Devices ****************
|
||||
|
||||
@@ -69,11 +69,11 @@ class CUDAAllocator(LRUAllocator['CUDADevice']):
|
||||
if options.external_ptr: return cuda.CUdeviceptr_v2(options.external_ptr)
|
||||
if options.host: return init_c_var(ctypes.c_void_p, lambda x: check(cuda.cuMemHostAlloc(ctypes.byref(x), size, 0x01)))
|
||||
return init_c_var(cuda.CUdeviceptr, lambda x: check(cuda.cuMemAlloc_v2(ctypes.byref(x), size)))
|
||||
@suppress_finalizing
|
||||
def _free(self, opaque, options:BufferSpec):
|
||||
try:
|
||||
if options.host: check(cuda.cuMemFreeHost(opaque))
|
||||
else: check(cuda.cuMemFree_v2(opaque))
|
||||
except (TypeError, AttributeError): pass
|
||||
if options.external_ptr: return
|
||||
if options.host: check(cuda.cuMemFreeHost(opaque))
|
||||
else: check(cuda.cuMemFree_v2(opaque))
|
||||
def _copyin(self, dest, src:memoryview):
|
||||
check(cuda.cuCtxSetCurrent(self.dev.context))
|
||||
host_mem = self.alloc(len(src), BufferSpec(host=True))
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import subprocess, pathlib, struct, ctypes, tempfile, functools, contextlib, decimal, platform, sys
|
||||
from tinygrad.helpers import prod, to_mv, getenv, round_up, cache_dir, PROFILE, ProfileRangeEvent, cpu_profile, unwrap
|
||||
import subprocess, pathlib, struct, ctypes, tempfile, functools, contextlib, decimal, platform
|
||||
from tinygrad.helpers import prod, to_mv, getenv, round_up, cache_dir, PROFILE, ProfileRangeEvent, cpu_profile, unwrap, suppress_finalizing
|
||||
import tinygrad.runtime.support.objc as objc
|
||||
from tinygrad.device import Compiled, Compiler, CompileError, LRUAllocator, ProfileDeviceEvent, CompilerSet, CompilerPair
|
||||
from tinygrad.renderer.cstyle import MetalRenderer
|
||||
@@ -167,8 +167,9 @@ class MetalAllocator(LRUAllocator[MetalDevice]):
|
||||
ret.retain = False
|
||||
if ret.value is None: raise MemoryError(f"Metal OOM while allocating {size=}")
|
||||
return MetalBuffer(ret, size)
|
||||
@suppress_finalizing
|
||||
def _free(self, opaque:MetalBuffer, options):
|
||||
if not sys.is_finalizing(): opaque.buf.release
|
||||
if not options.external_ptr: opaque.buf.release
|
||||
def _transfer(self, dest:MetalBuffer, src:MetalBuffer, sz:int, src_dev:MetalDevice, dest_dev:MetalDevice):
|
||||
dest_dev.synchronize()
|
||||
src_command_buffer = src_dev.mtl_queue.commandBuffer().retained()
|
||||
|
||||
@@ -399,9 +399,11 @@ class QCOMDevice(HCQCompiled):
|
||||
raise RuntimeError("Failed to map external pointer to GPU memory") from e
|
||||
|
||||
def _gpu_free(self, mem:HCQBuffer):
|
||||
if mem.meta[0] is None: return
|
||||
kgsl.IOCTL_KGSL_GPUOBJ_FREE(self.fd, id=mem.meta[0].id)
|
||||
if mem.meta[1]: FileIOInterface.munmap(mem.va_addr, mem.meta[0].mmapsize)
|
||||
if mem.meta[0] is None: return # external (gpu) ptr
|
||||
if not mem.meta[1]: kgsl.IOCTL_KGSL_SHAREDMEM_FREE(self.fd, gpuaddr=mem.meta[0].gpuaddr) # external (cpu) ptr
|
||||
else:
|
||||
kgsl.IOCTL_KGSL_GPUOBJ_FREE(self.fd, id=mem.meta[0].id)
|
||||
FileIOInterface.munmap(mem.va_addr, mem.meta[0].mmapsize)
|
||||
|
||||
def _ensure_stack_size(self, sz):
|
||||
if not hasattr(self, '_stack'): self._stack = self._gpu_alloc(sz)
|
||||
|
||||
Reference in New Issue
Block a user