CosineAnnealingLRWithWarmup (#10981)

This commit is contained in:
chenyu
2025-06-25 17:45:21 -04:00
committed by GitHub
parent 21f1c4cc09
commit 8751d47985
2 changed files with 41 additions and 3 deletions

View File

@@ -1,3 +1,4 @@
import math
from tinygrad import dtypes
from tinygrad.nn.optim import Optimizer
@@ -20,3 +21,19 @@ class PolynomialDecayWithWarmup(LR_Scheduler):
warmup_lr = (self.epoch_counter * (1.0 / self.warmup)) * self.initial_lr
x = (1 - (self.epoch_counter - self.warmup) / (self.epochs - self.warmup + 1))
return (self.epoch_counter <= self.warmup).where(warmup_lr, (self.initial_lr - self.end_lr) * x ** self.power + self.end_lr).cast(self.optimizer.lr.dtype)
class CosineAnnealingLRWithWarmup(LR_Scheduler):
def __init__(self, optimizer:Optimizer, base_lr, end_lr, warmup_steps:int, decay_steps:int):
assert warmup_steps > 0 and decay_steps > 0
super().__init__(optimizer)
self.base_lr = base_lr
self.end_lr = end_lr
self.warmup_steps = warmup_steps
self.decay_steps = decay_steps
# set lr for first warmup step
self.optimizer.lr.assign(self.get_lr()).realize()
def get_lr(self):
warmup_lr = ((self.epoch_counter+1) / self.warmup_steps) * self.base_lr
decay_lr = self.end_lr + 0.5 * (self.base_lr-self.end_lr) * (1 + (((self.epoch_counter+1-self.warmup_steps)/self.decay_steps) * math.pi).cos())
return (self.epoch_counter < self.warmup_steps).where(warmup_lr, decay_lr).cast(self.optimizer.lr.dtype)

View File

@@ -1,5 +1,5 @@
#!/usr/bin/env python
import unittest
import unittest, math
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
@@ -7,11 +7,11 @@ from tensorflow.python.ops import math_ops
from extra.lr_scheduler import LRSchedulerGroup
from tinygrad.tensor import Tensor
from tinygrad.nn.optim import LAMB, LARS, SGD, OptimizerGroup
from tinygrad.nn.optim import LAMB, LARS, SGD, OptimizerGroup, AdamW
from test.external.mlperf_resnet.lars_optimizer import LARSOptimizer
from examples.mlperf.lr_schedulers import PolynomialDecayWithWarmup
from examples.mlperf.lr_schedulers import PolynomialDecayWithWarmup, CosineAnnealingLRWithWarmup
from test.external.mlperf_resnet.lars_util import PolynomialDecayWithWarmup as PolynomialDecayWithWarmup_tf
np.random.seed(1337)
@@ -171,5 +171,26 @@ class ExternalTestOptim(unittest.TestCase):
'warmup': steps_per_epoch * warmup_epochs,
}, 1e-5, 1e-5, do_optim=False)
class TestCosineAnnealingLRWithWarmup(unittest.TestCase):
# only tests the lr
def _test_lr(self, base_lr, end_lr, warmup_steps, decay_steps):
net = TinyNet()
optim = AdamW([net.W], lr=0.0)
tiny_lr = CosineAnnealingLRWithWarmup(optim, base_lr, end_lr, warmup_steps, decay_steps)
lr = []
for _ in range(warmup_steps+decay_steps):
lr.append(optim.lr.item())
tiny_lr.step()
# reimplemented in python
expected = []
for i in range(warmup_steps): expected.append((i+1)/warmup_steps*base_lr)
for i in range(decay_steps): expected.append(end_lr+(base_lr-end_lr)*(1+math.cos((i+1)/decay_steps*math.pi))/2)
np.testing.assert_allclose(lr, expected, rtol=1e-5)
def test_lr_0(self): self._test_lr(3e-4, 8e-5, 3, 5)
def test_lr_1(self): self._test_lr(3e-4, 8e-5, 10, 20)
def test_lr_llama3(self): self._test_lr(8e-5, 8e-7, 20, 100)
if __name__ == '__main__':
unittest.main()