mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-23 22:08:08 -05:00
might fix tests
This commit is contained in:
@@ -141,6 +141,7 @@ class OpenCLBuffer(GPUBuffer):
|
||||
}
|
||||
def __init__(self, shape, hostbuf:Optional[OpenCLBuffer]=None, backing:Optional[np.ndarray]=None):
|
||||
self._image = hostbuf._image if hostbuf is not None else None
|
||||
self.copied_backing = False
|
||||
super().__init__(shape, hostbuf, backing)
|
||||
assert not (self._image and self._buf)
|
||||
|
||||
@@ -152,18 +153,18 @@ class OpenCLBuffer(GPUBuffer):
|
||||
@property
|
||||
def cl(self):
|
||||
if self._buf is None:
|
||||
if self._backing is not None:
|
||||
if self._backing is not None and not self.copied_backing:
|
||||
self._buf = CLBuffer(4*roundup(prod(self._backing.shape)))
|
||||
CL.enqueue_copy(self._buf.cl, self._backing, is_blocking=False)
|
||||
self._backing = None
|
||||
self.copied_backing = True
|
||||
elif self.st.contiguous:
|
||||
self._buf = CLBuffer(4*roundup(prod(self.shape)))
|
||||
|
||||
if self._image is not None:
|
||||
self._buf = CLBuffer(4*roundup(prod(self._image.shape)*4))
|
||||
if self._backing is not None:
|
||||
if self._backing is not None and not self.copied_backing:
|
||||
CL.enqueue_copy(self._buf.cl, self._backing, is_blocking=False)
|
||||
self._backing = None
|
||||
self.copied_backing = True
|
||||
#print(f"converting {self.shape} back to buffer, image shape is {self._image.shape}")
|
||||
CLProgram("from_image", f"""
|
||||
__kernel void from_image(
|
||||
@@ -187,6 +188,7 @@ class OpenCLBuffer(GPUBuffer):
|
||||
def image(self):
|
||||
if self._image is None:
|
||||
assert len(self.shape) == 3 and self.shape[2] == 4, f"bad shape for image {self.shape}"
|
||||
assert self.st.contiguous, f"{self} is not contiguous"
|
||||
self._image = CLImage(shape=(self.shape[1], self.shape[0]))
|
||||
if self._buf is not None:
|
||||
assert prod(self.shape) <= prod(self._image.cl.shape)*4
|
||||
|
||||
@@ -57,8 +57,8 @@ def preprocessing_op(x,w,C):
|
||||
#x = contiguous(ctx, x, x.shapetracker) if not x.shapetracker.contiguous else x
|
||||
#w = contiguous(ctx, w, w.shapetracker) if not w.shapetracker.contiguous else w
|
||||
|
||||
# okay but unneeded
|
||||
#x = x.contiguous_op()
|
||||
# contiguous before image, always
|
||||
x = x.contiguous_op()
|
||||
w = w.contiguous_op()
|
||||
|
||||
# early realize on the weights
|
||||
|
||||
@@ -125,7 +125,7 @@ class GPUBuffer:
|
||||
|
||||
def unary_op(x, op:UnaryOps): return type(x)(x.shape)._processing_op([("A", x)], GPUBuffer.code_for_op[op])
|
||||
def binary_op(x, op:BinaryOps, y:GPUBuffer): return type(x)(x.shape)._processing_op([("A", x), ("B", y)], GPUBuffer.code_for_op[op])
|
||||
def contiguous_op(x): return type(x)(x.shape, x, x._backing) if x.st.contiguous else x.unary_op(UnaryOps.NOOP)
|
||||
def contiguous_op(x): return x if x.st.contiguous else x.unary_op(UnaryOps.NOOP)
|
||||
def movement_op(x, op:MovementOps, arg) -> GPUBuffer: return type(x)(ShapeTracker(x.st).movement_op(op, arg), x)
|
||||
def reduce_op(x, op:ReduceOps, new_shape:Tuple[int, ...]): return type(x)(new_shape)._processing_op([("A", x)], code="acc", earlycode=GPUBuffer.code_for_op[op], earlybufs=set("A"), op=op)
|
||||
|
||||
|
||||
@@ -124,7 +124,8 @@ def _realize_loadops(self:LazyBuffer) -> Tuple[DeviceBuffer, List[DeviceBuffer],
|
||||
return Device._buffers[self.device].fromCPU(self.op.arg), [], LoadOps
|
||||
elif self.op.op == LoadOps.CONTIGUOUS:
|
||||
real_src = self.op.src[0].realize(self.device)
|
||||
return real_src.contiguous_op(), [real_src], LoadOps
|
||||
ret = real_src.contiguous_op()
|
||||
return ret, [real_src], LoadOps if ret != real_src else None
|
||||
else:
|
||||
assert NotImplementedError(f"unknown LoadOp {self.op.op}")
|
||||
|
||||
@@ -260,7 +261,8 @@ class LazyBuffer:
|
||||
# we haven't realized the Buffer yet
|
||||
self.realized, real_srcs, real_type = _realize[self.optype](self)
|
||||
# in lazy mode, we don't log until we realize
|
||||
log_op(real_type, [x.op for x in get_lazyops(self.op)], self.realized, real_srcs)
|
||||
if real_type is not None:
|
||||
log_op(real_type, [x.op for x in get_lazyops(self.op)], self.realized, real_srcs)
|
||||
# no need to keep the op after realization
|
||||
del self.op
|
||||
|
||||
|
||||
Reference in New Issue
Block a user