might fix tests

This commit is contained in:
George Hotz
2022-10-20 16:27:11 -07:00
parent fd6ba8e7ac
commit 9f8c414589
4 changed files with 13 additions and 9 deletions

View File

@@ -141,6 +141,7 @@ class OpenCLBuffer(GPUBuffer):
}
def __init__(self, shape, hostbuf:Optional[OpenCLBuffer]=None, backing:Optional[np.ndarray]=None):
self._image = hostbuf._image if hostbuf is not None else None
self.copied_backing = False
super().__init__(shape, hostbuf, backing)
assert not (self._image and self._buf)
@@ -152,18 +153,18 @@ class OpenCLBuffer(GPUBuffer):
@property
def cl(self):
if self._buf is None:
if self._backing is not None:
if self._backing is not None and not self.copied_backing:
self._buf = CLBuffer(4*roundup(prod(self._backing.shape)))
CL.enqueue_copy(self._buf.cl, self._backing, is_blocking=False)
self._backing = None
self.copied_backing = True
elif self.st.contiguous:
self._buf = CLBuffer(4*roundup(prod(self.shape)))
if self._image is not None:
self._buf = CLBuffer(4*roundup(prod(self._image.shape)*4))
if self._backing is not None:
if self._backing is not None and not self.copied_backing:
CL.enqueue_copy(self._buf.cl, self._backing, is_blocking=False)
self._backing = None
self.copied_backing = True
#print(f"converting {self.shape} back to buffer, image shape is {self._image.shape}")
CLProgram("from_image", f"""
__kernel void from_image(
@@ -187,6 +188,7 @@ class OpenCLBuffer(GPUBuffer):
def image(self):
if self._image is None:
assert len(self.shape) == 3 and self.shape[2] == 4, f"bad shape for image {self.shape}"
assert self.st.contiguous, f"{self} is not contiguous"
self._image = CLImage(shape=(self.shape[1], self.shape[0]))
if self._buf is not None:
assert prod(self.shape) <= prod(self._image.cl.shape)*4

View File

@@ -57,8 +57,8 @@ def preprocessing_op(x,w,C):
#x = contiguous(ctx, x, x.shapetracker) if not x.shapetracker.contiguous else x
#w = contiguous(ctx, w, w.shapetracker) if not w.shapetracker.contiguous else w
# okay but unneeded
#x = x.contiguous_op()
# contiguous before image, always
x = x.contiguous_op()
w = w.contiguous_op()
# early realize on the weights

View File

@@ -125,7 +125,7 @@ class GPUBuffer:
def unary_op(x, op:UnaryOps): return type(x)(x.shape)._processing_op([("A", x)], GPUBuffer.code_for_op[op])
def binary_op(x, op:BinaryOps, y:GPUBuffer): return type(x)(x.shape)._processing_op([("A", x), ("B", y)], GPUBuffer.code_for_op[op])
def contiguous_op(x): return type(x)(x.shape, x, x._backing) if x.st.contiguous else x.unary_op(UnaryOps.NOOP)
def contiguous_op(x): return x if x.st.contiguous else x.unary_op(UnaryOps.NOOP)
def movement_op(x, op:MovementOps, arg) -> GPUBuffer: return type(x)(ShapeTracker(x.st).movement_op(op, arg), x)
def reduce_op(x, op:ReduceOps, new_shape:Tuple[int, ...]): return type(x)(new_shape)._processing_op([("A", x)], code="acc", earlycode=GPUBuffer.code_for_op[op], earlybufs=set("A"), op=op)

View File

@@ -124,7 +124,8 @@ def _realize_loadops(self:LazyBuffer) -> Tuple[DeviceBuffer, List[DeviceBuffer],
return Device._buffers[self.device].fromCPU(self.op.arg), [], LoadOps
elif self.op.op == LoadOps.CONTIGUOUS:
real_src = self.op.src[0].realize(self.device)
return real_src.contiguous_op(), [real_src], LoadOps
ret = real_src.contiguous_op()
return ret, [real_src], LoadOps if ret != real_src else None
else:
assert NotImplementedError(f"unknown LoadOp {self.op.op}")
@@ -260,7 +261,8 @@ class LazyBuffer:
# we haven't realized the Buffer yet
self.realized, real_srcs, real_type = _realize[self.optype](self)
# in lazy mode, we don't log until we realize
log_op(real_type, [x.op for x in get_lazyops(self.op)], self.realized, real_srcs)
if real_type is not None:
log_op(real_type, [x.op for x in get_lazyops(self.op)], self.realized, real_srcs)
# no need to keep the op after realization
del self.op