default opt level 2

This commit is contained in:
George Hotz
2022-07-17 14:54:40 -07:00
parent 73b0471b25
commit eda6f071b2
3 changed files with 10 additions and 6 deletions

View File

@@ -31,9 +31,6 @@ def train(model, X_train, Y_train, optim, steps, BS=128, lossfn=sparse_categoric
loss.backward()
optim.step()
# TODO: corealize
for p in optim.params: p.realize()
# printing
if not noloss:
cat = np.argmax(out.cpu().data, axis=-1)

View File

@@ -22,12 +22,12 @@ OpType = Union[Type[UnaryOps], Type[BinaryOps], Type[ReduceOps], Type[MovementOp
DEBUG = int(os.getenv("DEBUG", "0"))
GRAPH = int(os.getenv("GRAPH", "0"))
OPT = int(os.getenv("OPT", "1"))
OPT = int(os.getenv("OPT", "2"))
NOCONV = int(os.getenv("NOCONV", "0"))
# TODO: movement ops that only change shape are really nops. treat them as such
REMOVE_MOVEMENT_NOPS, MERGE_UNARY_OPS, MERGE_ELEMENTWISE_INTO_REDUCE = OPT>=1, OPT>=1, OPT>=1
MERGE_ELEMENTWISE_OPS, MERGE_ONE_CONV_INTO_ELEMENTWISE, SHUFFLE_RESHAPE_OPS = OPT>=2, OPT>=2, OPT>=2
MERGE_ELEMENTWISE_OPS, MERGE_ONE_CONV_INTO_ELEMENTWISE = OPT>=2, OPT>=2
SHUFFLE_MOVEMENT_OPS = OPT>=3
SHUFFLE_PAD_OPS = OPT>=4 # NOTE: 0/0 is NaN if you pad, so this can change the output
@@ -251,7 +251,7 @@ class LazyBuffer:
# some permutes are actually just reshapes
if op == MovementOps.PERMUTE and ShapeTracker(x.shape).movement_op(op, arg).contiguous: return x.movement_op(MovementOps.RESHAPE, tuple(x.shape[i] for i in arg))
if (SHUFFLE_MOVEMENT_OPS or (SHUFFLE_RESHAPE_OPS and op == MovementOps.RESHAPE)) and x.optype == BinaryOps and x.realized is None and (SHUFFLE_PAD_OPS or op != MovementOps.PAD) and op != MovementOps.STRIDED:
if SHUFFLE_MOVEMENT_OPS and x.optype == BinaryOps and x.realized is None and (SHUFFLE_PAD_OPS or op != MovementOps.PAD) and op != MovementOps.STRIDED:
# if this MovementOp is being applied to a BinaryOp, apply the MovementOp to all the BinaryOp inputs instead
def replace_with_movement_op(y:Union[LazyOp, LazyBuffer]) -> LazyBuffer:
if isinstance(y, LazyBuffer): return y.movement_op(op, arg)

View File

@@ -9,6 +9,10 @@ class Optimizer:
for param in self.params:
param.grad = None
def realize(self, extra=[]):
# TODO: corealize
for p in self.params + extra: p.realize()
class SGD(Optimizer):
def __init__(self, params, lr=0.001):
super().__init__(params)
@@ -17,6 +21,7 @@ class SGD(Optimizer):
def step(self):
for t in self.params:
t -= t.grad * self.lr
self.realize()
class RMSprop(Optimizer):
def __init__(self, params, lr=0.001, decay=0.9, eps=1e-8):
@@ -29,6 +34,7 @@ class RMSprop(Optimizer):
for i, t in enumerate(self.params):
self.v[i] = self.decay * self.v[i] + (1.0 - self.decay) * (t.grad * t.grad)
t -= (t.grad * self.lr).div(self.v[i].sqrt() + self.eps)
self.realize(self.v)
class Adam(Optimizer):
def __init__(self, params, lr=0.001, b1=0.9, b2=0.999, eps=1e-8):
@@ -45,3 +51,4 @@ class Adam(Optimizer):
self.m[i] = self.b1 * self.m[i] + (1.0 - self.b1) * t.grad
self.v[i] = self.b2 * self.v[i] + (1.0 - self.b2) * (t.grad * t.grad)
t -= a * self.m[i].div(self.v[i].sqrt() + self.eps)
self.realize(self.m + self.v)