mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-10 07:28:15 -05:00
CosineAnnealingLRWithWarmup (#10981)
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import math
|
||||
from tinygrad import dtypes
|
||||
from tinygrad.nn.optim import Optimizer
|
||||
|
||||
@@ -20,3 +21,19 @@ class PolynomialDecayWithWarmup(LR_Scheduler):
|
||||
warmup_lr = (self.epoch_counter * (1.0 / self.warmup)) * self.initial_lr
|
||||
x = (1 - (self.epoch_counter - self.warmup) / (self.epochs - self.warmup + 1))
|
||||
return (self.epoch_counter <= self.warmup).where(warmup_lr, (self.initial_lr - self.end_lr) * x ** self.power + self.end_lr).cast(self.optimizer.lr.dtype)
|
||||
|
||||
class CosineAnnealingLRWithWarmup(LR_Scheduler):
|
||||
def __init__(self, optimizer:Optimizer, base_lr, end_lr, warmup_steps:int, decay_steps:int):
|
||||
assert warmup_steps > 0 and decay_steps > 0
|
||||
super().__init__(optimizer)
|
||||
self.base_lr = base_lr
|
||||
self.end_lr = end_lr
|
||||
self.warmup_steps = warmup_steps
|
||||
self.decay_steps = decay_steps
|
||||
# set lr for first warmup step
|
||||
self.optimizer.lr.assign(self.get_lr()).realize()
|
||||
|
||||
def get_lr(self):
|
||||
warmup_lr = ((self.epoch_counter+1) / self.warmup_steps) * self.base_lr
|
||||
decay_lr = self.end_lr + 0.5 * (self.base_lr-self.end_lr) * (1 + (((self.epoch_counter+1-self.warmup_steps)/self.decay_steps) * math.pi).cos())
|
||||
return (self.epoch_counter < self.warmup_steps).where(warmup_lr, decay_lr).cast(self.optimizer.lr.dtype)
|
||||
27
test/external/external_test_optim.py
vendored
27
test/external/external_test_optim.py
vendored
@@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
import unittest
|
||||
import unittest, math
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import tensorflow_addons as tfa
|
||||
@@ -7,11 +7,11 @@ from tensorflow.python.ops import math_ops
|
||||
from extra.lr_scheduler import LRSchedulerGroup
|
||||
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.nn.optim import LAMB, LARS, SGD, OptimizerGroup
|
||||
from tinygrad.nn.optim import LAMB, LARS, SGD, OptimizerGroup, AdamW
|
||||
|
||||
from test.external.mlperf_resnet.lars_optimizer import LARSOptimizer
|
||||
|
||||
from examples.mlperf.lr_schedulers import PolynomialDecayWithWarmup
|
||||
from examples.mlperf.lr_schedulers import PolynomialDecayWithWarmup, CosineAnnealingLRWithWarmup
|
||||
from test.external.mlperf_resnet.lars_util import PolynomialDecayWithWarmup as PolynomialDecayWithWarmup_tf
|
||||
|
||||
np.random.seed(1337)
|
||||
@@ -171,5 +171,26 @@ class ExternalTestOptim(unittest.TestCase):
|
||||
'warmup': steps_per_epoch * warmup_epochs,
|
||||
}, 1e-5, 1e-5, do_optim=False)
|
||||
|
||||
|
||||
class TestCosineAnnealingLRWithWarmup(unittest.TestCase):
|
||||
# only tests the lr
|
||||
def _test_lr(self, base_lr, end_lr, warmup_steps, decay_steps):
|
||||
net = TinyNet()
|
||||
optim = AdamW([net.W], lr=0.0)
|
||||
tiny_lr = CosineAnnealingLRWithWarmup(optim, base_lr, end_lr, warmup_steps, decay_steps)
|
||||
lr = []
|
||||
for _ in range(warmup_steps+decay_steps):
|
||||
lr.append(optim.lr.item())
|
||||
tiny_lr.step()
|
||||
# reimplemented in python
|
||||
expected = []
|
||||
for i in range(warmup_steps): expected.append((i+1)/warmup_steps*base_lr)
|
||||
for i in range(decay_steps): expected.append(end_lr+(base_lr-end_lr)*(1+math.cos((i+1)/decay_steps*math.pi))/2)
|
||||
np.testing.assert_allclose(lr, expected, rtol=1e-5)
|
||||
|
||||
def test_lr_0(self): self._test_lr(3e-4, 8e-5, 3, 5)
|
||||
def test_lr_1(self): self._test_lr(3e-4, 8e-5, 10, 20)
|
||||
def test_lr_llama3(self): self._test_lr(8e-5, 8e-7, 20, 100)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user