No dtype alloc (#2570)

* fix all allocs

* improve docs

* ugh fix fake alloc
This commit is contained in:
George Hotz
2023-12-02 13:29:40 -08:00
committed by GitHub
parent c8774713c5
commit d6b404ac11
14 changed files with 157 additions and 123 deletions

View File

@@ -2,7 +2,7 @@ from __future__ import annotations
from typing import Tuple, Optional, Union, List, cast
import ctypes, functools
import gpuctypes.opencl as cl
from tinygrad.helpers import to_char_p_p, from_mv, diskcache, OSX, DType, ImageDType
from tinygrad.helpers import to_char_p_p, from_mv, diskcache, OSX, ImageDType
from tinygrad.codegen.kernel import LinearizerOptions
from tinygrad.renderer.opencl import OpenCLRenderer
from tinygrad.device import Compiled, LRUAllocator
@@ -66,14 +66,15 @@ class CLAllocator(LRUAllocator):
def __init__(self, device:CLDevice):
self.device = device
super().__init__()
def _alloc(self, size:int, dtype:DType):
if isinstance(dtype, ImageDType):
return checked(cl.clCreateImage2D(self.device.context, cl.CL_MEM_READ_WRITE,
cl.cl_image_format(cl.CL_RGBA, {2: cl.CL_HALF_FLOAT, 4: cl.CL_FLOAT}[dtype.itemsize]), dtype.shape[1], dtype.shape[0],
0, None, ctypes.byref(status := ctypes.c_int32())), status)
else:
return checked(cl.clCreateBuffer(self.device.context, cl.CL_MEM_READ_WRITE, size*dtype.itemsize, None, ctypes.byref(status := ctypes.c_int32())), status)
def _alloc(self, size:int) -> cl.cl_mem:
return checked(cl.clCreateBuffer(self.device.context, cl.CL_MEM_READ_WRITE, size, None, ctypes.byref(status := ctypes.c_int32())), status)
def _free(self, buf:cl.cl_mem): check(cl.clReleaseMemObject(buf))
def _cast_image(self, buf:cl.cl_mem, dtype:ImageDType, row_pitch:int) -> cl.cl_mem:
desc = cl.cl_image_desc(image_type=cl.CL_MEM_OBJECT_IMAGE2D, image_width=dtype.shape[1], image_height=dtype.shape[0], image_row_pitch=row_pitch)
desc._0.mem_object = buf
return checked(cl.clCreateImage(self.device.context, cl.CL_MEM_READ_WRITE,
cl.cl_image_format(cl.CL_RGBA, {2: cl.CL_HALF_FLOAT, 4: cl.CL_FLOAT}[dtype.itemsize]),
desc, None, ctypes.byref(status := ctypes.c_int32())), status)
def copyin(self, dest:cl.cl_mem, src:memoryview):
check(cl.clEnqueueWriteBuffer(self.device.queue, dest, False, 0, len(src)*src.itemsize, from_mv(src), 0, None, None))
self.device.pending_copyin.append(src) # NOTE: these can't be freed until the GPU actually executes this command