mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-07 03:00:26 -04:00
No dtype alloc (#2570)
* fix all allocs * improve docs * ugh fix fake alloc
This commit is contained in:
@@ -2,7 +2,7 @@ from __future__ import annotations
|
||||
from typing import Tuple, Optional, Union, List, cast
|
||||
import ctypes, functools
|
||||
import gpuctypes.opencl as cl
|
||||
from tinygrad.helpers import to_char_p_p, from_mv, diskcache, OSX, DType, ImageDType
|
||||
from tinygrad.helpers import to_char_p_p, from_mv, diskcache, OSX, ImageDType
|
||||
from tinygrad.codegen.kernel import LinearizerOptions
|
||||
from tinygrad.renderer.opencl import OpenCLRenderer
|
||||
from tinygrad.device import Compiled, LRUAllocator
|
||||
@@ -66,14 +66,15 @@ class CLAllocator(LRUAllocator):
|
||||
def __init__(self, device:CLDevice):
|
||||
self.device = device
|
||||
super().__init__()
|
||||
def _alloc(self, size:int, dtype:DType):
|
||||
if isinstance(dtype, ImageDType):
|
||||
return checked(cl.clCreateImage2D(self.device.context, cl.CL_MEM_READ_WRITE,
|
||||
cl.cl_image_format(cl.CL_RGBA, {2: cl.CL_HALF_FLOAT, 4: cl.CL_FLOAT}[dtype.itemsize]), dtype.shape[1], dtype.shape[0],
|
||||
0, None, ctypes.byref(status := ctypes.c_int32())), status)
|
||||
else:
|
||||
return checked(cl.clCreateBuffer(self.device.context, cl.CL_MEM_READ_WRITE, size*dtype.itemsize, None, ctypes.byref(status := ctypes.c_int32())), status)
|
||||
def _alloc(self, size:int) -> cl.cl_mem:
|
||||
return checked(cl.clCreateBuffer(self.device.context, cl.CL_MEM_READ_WRITE, size, None, ctypes.byref(status := ctypes.c_int32())), status)
|
||||
def _free(self, buf:cl.cl_mem): check(cl.clReleaseMemObject(buf))
|
||||
def _cast_image(self, buf:cl.cl_mem, dtype:ImageDType, row_pitch:int) -> cl.cl_mem:
|
||||
desc = cl.cl_image_desc(image_type=cl.CL_MEM_OBJECT_IMAGE2D, image_width=dtype.shape[1], image_height=dtype.shape[0], image_row_pitch=row_pitch)
|
||||
desc._0.mem_object = buf
|
||||
return checked(cl.clCreateImage(self.device.context, cl.CL_MEM_READ_WRITE,
|
||||
cl.cl_image_format(cl.CL_RGBA, {2: cl.CL_HALF_FLOAT, 4: cl.CL_FLOAT}[dtype.itemsize]),
|
||||
desc, None, ctypes.byref(status := ctypes.c_int32())), status)
|
||||
def copyin(self, dest:cl.cl_mem, src:memoryview):
|
||||
check(cl.clEnqueueWriteBuffer(self.device.queue, dest, False, 0, len(src)*src.itemsize, from_mv(src), 0, None, None))
|
||||
self.device.pending_copyin.append(src) # NOTE: these can't be freed until the GPU actually executes this command
|
||||
|
||||
Reference in New Issue
Block a user