mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-07 03:00:26 -04:00
FOLD_CONSTANTS_INTO_KERNELS and shapetracker OOB tweak
This commit is contained in:
@@ -17,6 +17,7 @@ SHUFFLE_MOVEMENT_OPS = True # this breaks maxpool
|
||||
REMOVE_MOVEMENT_NOPS = True
|
||||
MERGE_ELEMENTWISE_OPS = True
|
||||
MERGE_ELEMENTWISE_INTO_CONV_OUTPUT = True
|
||||
FOLD_CONSTANTS_INTO_KERNELS = True
|
||||
|
||||
class LazyOp(NamedTuple):
|
||||
op: Op
|
||||
@@ -174,7 +175,7 @@ def _realize_binary_op(self:LazyBuffer) -> Tuple[gops.GPUBuffer, List[gops.GPUBu
|
||||
for s in lazy_srcs:
|
||||
if s.optype == MovementOps and s.realized is None:
|
||||
root = get_root(s.op)
|
||||
if root.realized is None and root.optype == LoadOps and root.op.op == LoadOps.FROMCPU and root.shape == (1,):
|
||||
if FOLD_CONSTANTS_INTO_KERNELS and root.realized is None and root.optype == LoadOps and root.op.op == LoadOps.FROMCPU and root.shape == (1,):
|
||||
if not s.st.needs_valid():
|
||||
real_dict[s] = f"({root.op.arg[0]}f)"
|
||||
else:
|
||||
|
||||
@@ -36,7 +36,7 @@ class View:
|
||||
acc = 1
|
||||
for i,(d,s) in enumerate(self.shape_strides[::-1]):
|
||||
if d != 1 and s != 0:
|
||||
lr = divmodidx(acc, d, i != len(self.shape_strides)-1)
|
||||
lr = divmodidx(acc, d, i != len(self.shape_strides)-1 and d != prod(self.shape))
|
||||
lr = f"({lr}*{s})" if s != 1 else lr
|
||||
ret.append(lr)
|
||||
acc *= d
|
||||
|
||||
Reference in New Issue
Block a user