use at least float32 for optim.lr (#4297)

* use at least float32 for optim.lr

when doing mixed precision training (float32 weight, default_float=half), still use float32 to store lr.
it would have been upcasted later in actual weight update, but would have lost precision.
this improved resnet convergence significantly

* undo type annotation
This commit is contained in:
chenyu
2024-04-25 14:42:28 -04:00
committed by GitHub
parent 6f792b727b
commit 5ae252ae83
3 changed files with 20 additions and 6 deletions

View File

@@ -1,9 +1,10 @@
import numpy as np
import torch
import unittest
from tinygrad import Tensor, Device
from tinygrad import Tensor, Device, dtypes
from tinygrad.nn.optim import Adam, SGD, AdamW
from tinygrad.helpers import CI
from test.helpers import is_dtype_supported
np.random.seed(1337)
x_init = np.random.randn(1,4).astype(np.float32)
@@ -105,5 +106,14 @@ class TestOptim(unittest.TestCase):
np.testing.assert_allclose(losses[0], losses[1], atol=1e-4, rtol=0)
@unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
def test_mixed_precision(self):
old_default_float, dtypes.default_float = dtypes.default_float, dtypes.half
# weight update would overflow without upcasting
self._test_sgd(10, {'lr': 1e10}, 1e-6, 3e-4)
self._test_adam(1, {'lr': 1e10}, 1e-4, 1e-4)
self._test_adamw(1, {'lr': 1e10}, 1e-4, 1e-4)
dtypes.default_float = old_default_float
if __name__ == '__main__':
unittest.main()