remove del spam from CI (#10699)

* remove del spam from CI

* more

* preconstruct default buffer spec

* ignore those errors

* check exception

* more exception check

* skip stuff
This commit is contained in:
George Hotz
2025-06-08 10:14:30 -07:00
committed by GitHub
parent 32141ec867
commit 67a1c92fc0
9 changed files with 33 additions and 19 deletions

View File

@@ -28,7 +28,6 @@ lint.select = [
"RET506", # superfluous-else-raise
"RET507", # superfluous-else-continue
"A", # builtin-variable-shadowing, builtin-argument-shadowing, builtin-attribute-shadowing
"SIM105", # suppressible-exception
"FURB110",# if-exp-instead-of-or-operator
"RUF018", # assignment-in-assert
]

View File

@@ -149,9 +149,9 @@ class Buffer:
return self
def deallocate(self):
assert self.is_allocated(), "buffer must be allocated to deallocate"
if DEBUG >= 7: print(f"buffer: deallocate {self.nbytes} bytes on {self.device}")
if DEBUG is not None and DEBUG >= 7: print(f"buffer: deallocate {self.nbytes} bytes on {self.device}")
if self._base is None and (self.options is None or self.options.external_ptr is None):
if not self.device.startswith("DISK"): GlobalCounters.mem_used -= self.nbytes
if GlobalCounters is not None and not self.device.startswith("DISK"): GlobalCounters.mem_used -= self.nbytes
self.allocator.free(self._buf, self.nbytes, self.options)
elif self._base is not None: self._base.allocated_views -= 1
del self._buf
@@ -205,12 +205,15 @@ DeviceType = TypeVar('DeviceType', bound='Compiled')
# TODO: size, dest, src are the same type. can we enforce this?
class Allocator(Generic[DeviceType]):
def __init__(self, dev:DeviceType): self.dev: DeviceType = dev
def __init__(self, dev:DeviceType):
self.dev: DeviceType = dev
self.default_buffer_spec: BufferSpec = BufferSpec()
# overridden in LRUAllocator
def alloc(self, size:int, options:Optional[BufferSpec]=None):
assert size > 0, f"alloc size must be positive, getting {size}"
return self._alloc(size, options if options is not None else BufferSpec())
def free(self, opaque, size:int, options:Optional[BufferSpec]=None): self._free(opaque, options if options is not None else BufferSpec())
return self._alloc(size, options if options is not None else self.default_buffer_spec)
def free(self, opaque, size:int, options:Optional[BufferSpec]=None):
self._free(opaque, options if options is not None else self.default_buffer_spec)
# implemented by the runtime
def _alloc(self, size:int, options:BufferSpec): raise NotImplementedError("need alloc")

View File

@@ -471,8 +471,10 @@ class AMDAllocator(HCQAllocator['AMDDevice']):
return self.dev.dev_iface.alloc(size, host=options.host, uncached=options.uncached, cpu_access=options.cpu_access)
def _free(self, opaque, options:BufferSpec):
self.dev.synchronize()
self.dev.dev_iface.free(opaque)
try:
self.dev.synchronize()
self.dev.dev_iface.free(opaque)
except AttributeError: pass
def map(self, buf:HCQBuffer): self.dev.dev_iface.map(buf._base if buf._base is not None else buf)

View File

@@ -46,7 +46,8 @@ class CUDAProgram:
if self.smem > 0: check(cuda.cuFuncSetAttribute(self.prg, cuda.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, self.smem))
def __del__(self):
if hasattr(self, 'module'): check(cuda.cuModuleUnload(self.module))
try: check(cuda.cuModuleUnload(self.module))
except AttributeError: pass
def __call__(self, *args, global_size:tuple[int,int,int]=(1,1,1), local_size:tuple[int,int,int]=(1,1,1), vals:tuple[int, ...]=(), wait=False):
check(cuda.cuCtxSetCurrent(self.dev.context))
@@ -67,8 +68,10 @@ class CUDAAllocator(LRUAllocator['CUDADevice']):
if options.host: return init_c_var(ctypes.c_void_p(), lambda x: check(cuda.cuMemHostAlloc(ctypes.byref(x), size, 0x01)))
return init_c_var(cuda.CUdeviceptr(), lambda x: check(cuda.cuMemAlloc_v2(ctypes.byref(x), size)))
def _free(self, opaque, options:BufferSpec):
if options.host: check(cuda.cuMemFreeHost(opaque))
else: check(cuda.cuMemFree_v2(opaque))
try:
if options.host: check(cuda.cuMemFreeHost(opaque))
else: check(cuda.cuMemFree_v2(opaque))
except (TypeError, AttributeError): pass
def _copyin(self, dest, src:memoryview):
check(cuda.cuCtxSetCurrent(self.dev.context))
host_mem = self.alloc(len(src), BufferSpec(host=True))

View File

@@ -72,7 +72,7 @@ class DiskBuffer:
MAP_LOCKED, MAP_POPULATE = 0 if OSX else 0x2000, getattr(mmap, "MAP_POPULATE", 0 if OSX else 0x008000)
class DiskAllocator(Allocator):
def __init__(self, dev:DiskDevice): self.dev = dev
def __init__(self, dev:DiskDevice): super().__init__(dev)
def _alloc(self, size:int, options):
self.dev._might_open(size)
return DiskBuffer(self.dev, size)

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
from typing import Optional, cast
import ctypes, functools, hashlib, contextlib
import ctypes, functools, hashlib
from tinygrad.runtime.autogen import opencl as cl
from tinygrad.helpers import init_c_var, to_char_p_p, from_mv, OSX, DEBUG, getenv, mv_address
from tinygrad.renderer.cstyle import OpenCLRenderer, IntelRenderer
@@ -41,8 +41,10 @@ class CLProgram:
self.kernel = checked(cl.clCreateKernel(self.program, name.encode(), status := ctypes.c_int32()), status)
def __del__(self):
with contextlib.suppress(TypeError, AttributeError): check(cl.clReleaseKernel(self.kernel))
with contextlib.suppress(TypeError, AttributeError): check(cl.clReleaseProgram(self.program))
try: check(cl.clReleaseKernel(self.kernel))
except (TypeError, AttributeError): pass
try: check(cl.clReleaseProgram(self.program))
except (TypeError, AttributeError): pass
def __call__(self, *bufs:tuple[ctypes._CData, BufferSpec], global_size:tuple[int,int,int]=(1,1,1), local_size:Optional[tuple[int,int,int]]=None, vals:tuple[int, ...]=(), wait=False) -> Optional[float]: # noqa: E501
for i,(b,_) in enumerate(bufs): cl.clSetKernelArg(self.kernel, i, ctypes.sizeof(b), ctypes.byref(b))

View File

@@ -302,8 +302,10 @@ class NVAllocator(HCQAllocator['NVDevice']):
return self.dev._gpu_alloc(size, cpu_access=options.cpu_access, tag=f"user memory ({options})")
def _free(self, opaque:HCQBuffer, options:BufferSpec):
self.dev.synchronize()
self.dev._gpu_free(opaque)
try:
self.dev.synchronize()
self.dev._gpu_free(opaque)
except AttributeError: pass
def map(self, buf:HCQBuffer): self.dev._gpu_map(buf._base if buf._base is not None else buf)

View File

@@ -246,7 +246,9 @@ class RemoteAllocator(Allocator['RemoteDevice']):
self.dev.q(BufferAlloc(buffer_num:=next(self.dev.buffer_num), size, options))
return buffer_num
# TODO: options should not be here in any Allocator
def _free(self, opaque:int, options): self.dev.q(BufferFree(opaque))
def _free(self, opaque:int, options):
try: self.dev.q(BufferFree(opaque))
except (TypeError, AttributeError): pass
def _copyin(self, dest:int, src:memoryview): self.dev.q(CopyIn(dest, self.dev.conn.req.h(src)))
def _copyout(self, dest:memoryview, src:int):
resp = self.dev.q(CopyOut(src), wait=True)

View File

@@ -189,7 +189,8 @@ class WebGpuAllocator(Allocator['WGPUDevPtr']):
buffer_data = read_buffer(self.dev, src)
dest[:] = buffer_data[:dest.nbytes] if webgpu.wgpuBufferGetSize(src) > dest.nbytes else buffer_data
def _free(self, opaque:WGPUBufPtr, options:BufferSpec):
webgpu.wgpuBufferDestroy(opaque)
try: webgpu.wgpuBufferDestroy(opaque)
except AttributeError: pass
class WebGpuDevice(Compiled):
def __init__(self, device:str):