FOLD_CONSTANTS_INTO_KERNELS and shapetracker OOB tweak

This commit is contained in:
George Hotz
2022-06-21 19:47:15 -07:00
parent 83d50e2687
commit 0b820f7966
2 changed files with 3 additions and 2 deletions

View File

@@ -17,6 +17,7 @@ SHUFFLE_MOVEMENT_OPS = True # this breaks maxpool
REMOVE_MOVEMENT_NOPS = True
MERGE_ELEMENTWISE_OPS = True
MERGE_ELEMENTWISE_INTO_CONV_OUTPUT = True
FOLD_CONSTANTS_INTO_KERNELS = True
class LazyOp(NamedTuple):
op: Op
@@ -174,7 +175,7 @@ def _realize_binary_op(self:LazyBuffer) -> Tuple[gops.GPUBuffer, List[gops.GPUBu
for s in lazy_srcs:
if s.optype == MovementOps and s.realized is None:
root = get_root(s.op)
if root.realized is None and root.optype == LoadOps and root.op.op == LoadOps.FROMCPU and root.shape == (1,):
if FOLD_CONSTANTS_INTO_KERNELS and root.realized is None and root.optype == LoadOps and root.op.op == LoadOps.FROMCPU and root.shape == (1,):
if not s.st.needs_valid():
real_dict[s] = f"({root.op.arg[0]}f)"
else:

View File

@@ -36,7 +36,7 @@ class View:
acc = 1
for i,(d,s) in enumerate(self.shape_strides[::-1]):
if d != 1 and s != 0:
lr = divmodidx(acc, d, i != len(self.shape_strides)-1)
lr = divmodidx(acc, d, i != len(self.shape_strides)-1 and d != prod(self.shape))
lr = f"({lr}*{s})" if s != 1 else lr
ret.append(lr)
acc *= d