mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
use at least float32 for optim.lr (#4297)
* use at least float32 for optim.lr when doing mixed precision training (float32 weight, default_float=half), still use float32 to store lr. it would have been upcasted later in actual weight update, but would have lost precision. this improved resnet convergence significantly * undo type annotation
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
import unittest
|
||||
from tinygrad import Tensor, Device
|
||||
from tinygrad import Tensor, Device, dtypes
|
||||
from tinygrad.nn.optim import Adam, SGD, AdamW
|
||||
from tinygrad.helpers import CI
|
||||
from test.helpers import is_dtype_supported
|
||||
|
||||
np.random.seed(1337)
|
||||
x_init = np.random.randn(1,4).astype(np.float32)
|
||||
@@ -105,5 +106,14 @@ class TestOptim(unittest.TestCase):
|
||||
|
||||
np.testing.assert_allclose(losses[0], losses[1], atol=1e-4, rtol=0)
|
||||
|
||||
@unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
|
||||
def test_mixed_precision(self):
|
||||
old_default_float, dtypes.default_float = dtypes.default_float, dtypes.half
|
||||
# weight update would overflow without upcasting
|
||||
self._test_sgd(10, {'lr': 1e10}, 1e-6, 3e-4)
|
||||
self._test_adam(1, {'lr': 1e10}, 1e-4, 1e-4)
|
||||
self._test_adamw(1, {'lr': 1e10}, 1e-4, 1e-4)
|
||||
dtypes.default_float = old_default_float
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user