mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 23:18:04 -05:00
default opt level 2
This commit is contained in:
@@ -31,9 +31,6 @@ def train(model, X_train, Y_train, optim, steps, BS=128, lossfn=sparse_categoric
|
||||
loss.backward()
|
||||
optim.step()
|
||||
|
||||
# TODO: corealize
|
||||
for p in optim.params: p.realize()
|
||||
|
||||
# printing
|
||||
if not noloss:
|
||||
cat = np.argmax(out.cpu().data, axis=-1)
|
||||
|
||||
@@ -22,12 +22,12 @@ OpType = Union[Type[UnaryOps], Type[BinaryOps], Type[ReduceOps], Type[MovementOp
|
||||
|
||||
DEBUG = int(os.getenv("DEBUG", "0"))
|
||||
GRAPH = int(os.getenv("GRAPH", "0"))
|
||||
OPT = int(os.getenv("OPT", "1"))
|
||||
OPT = int(os.getenv("OPT", "2"))
|
||||
NOCONV = int(os.getenv("NOCONV", "0"))
|
||||
|
||||
# TODO: movement ops that only change shape are really nops. treat them as such
|
||||
REMOVE_MOVEMENT_NOPS, MERGE_UNARY_OPS, MERGE_ELEMENTWISE_INTO_REDUCE = OPT>=1, OPT>=1, OPT>=1
|
||||
MERGE_ELEMENTWISE_OPS, MERGE_ONE_CONV_INTO_ELEMENTWISE, SHUFFLE_RESHAPE_OPS = OPT>=2, OPT>=2, OPT>=2
|
||||
MERGE_ELEMENTWISE_OPS, MERGE_ONE_CONV_INTO_ELEMENTWISE = OPT>=2, OPT>=2
|
||||
SHUFFLE_MOVEMENT_OPS = OPT>=3
|
||||
SHUFFLE_PAD_OPS = OPT>=4 # NOTE: 0/0 is NaN if you pad, so this can change the output
|
||||
|
||||
@@ -251,7 +251,7 @@ class LazyBuffer:
|
||||
# some permutes are actually just reshapes
|
||||
if op == MovementOps.PERMUTE and ShapeTracker(x.shape).movement_op(op, arg).contiguous: return x.movement_op(MovementOps.RESHAPE, tuple(x.shape[i] for i in arg))
|
||||
|
||||
if (SHUFFLE_MOVEMENT_OPS or (SHUFFLE_RESHAPE_OPS and op == MovementOps.RESHAPE)) and x.optype == BinaryOps and x.realized is None and (SHUFFLE_PAD_OPS or op != MovementOps.PAD) and op != MovementOps.STRIDED:
|
||||
if SHUFFLE_MOVEMENT_OPS and x.optype == BinaryOps and x.realized is None and (SHUFFLE_PAD_OPS or op != MovementOps.PAD) and op != MovementOps.STRIDED:
|
||||
# if this MovementOp is being applied to a BinaryOp, apply the MovementOp to all the BinaryOp inputs instead
|
||||
def replace_with_movement_op(y:Union[LazyOp, LazyBuffer]) -> LazyBuffer:
|
||||
if isinstance(y, LazyBuffer): return y.movement_op(op, arg)
|
||||
|
||||
@@ -9,6 +9,10 @@ class Optimizer:
|
||||
for param in self.params:
|
||||
param.grad = None
|
||||
|
||||
def realize(self, extra=[]):
|
||||
# TODO: corealize
|
||||
for p in self.params + extra: p.realize()
|
||||
|
||||
class SGD(Optimizer):
|
||||
def __init__(self, params, lr=0.001):
|
||||
super().__init__(params)
|
||||
@@ -17,6 +21,7 @@ class SGD(Optimizer):
|
||||
def step(self):
|
||||
for t in self.params:
|
||||
t -= t.grad * self.lr
|
||||
self.realize()
|
||||
|
||||
class RMSprop(Optimizer):
|
||||
def __init__(self, params, lr=0.001, decay=0.9, eps=1e-8):
|
||||
@@ -29,6 +34,7 @@ class RMSprop(Optimizer):
|
||||
for i, t in enumerate(self.params):
|
||||
self.v[i] = self.decay * self.v[i] + (1.0 - self.decay) * (t.grad * t.grad)
|
||||
t -= (t.grad * self.lr).div(self.v[i].sqrt() + self.eps)
|
||||
self.realize(self.v)
|
||||
|
||||
class Adam(Optimizer):
|
||||
def __init__(self, params, lr=0.001, b1=0.9, b2=0.999, eps=1e-8):
|
||||
@@ -45,3 +51,4 @@ class Adam(Optimizer):
|
||||
self.m[i] = self.b1 * self.m[i] + (1.0 - self.b1) * t.grad
|
||||
self.v[i] = self.b2 * self.v[i] + (1.0 - self.b2) * (t.grad * t.grad)
|
||||
t -= a * self.m[i].div(self.v[i].sqrt() + self.eps)
|
||||
self.realize(self.m + self.v)
|
||||
|
||||
Reference in New Issue
Block a user