diff --git a/test/test_tensor.py b/test/test_tensor.py index 2515d03cc0..8979a4cb12 100644 --- a/test/test_tensor.py +++ b/test/test_tensor.py @@ -145,5 +145,14 @@ class TestTinygrad(unittest.TestCase): # coarse approx. since a "big" eps and the non-linearities of the model self.assertFalse(gradcheck(tiny_func, tiny_x, eps = 0.1)) + def test_random_fns_are_deterministic_with_seed(self): + for random_fn in [Tensor.randn, Tensor.uniform, Tensor.scaled_uniform, Tensor.glorot_uniform]: + with self.subTest(msg=f"Tensor.{random_fn.__name__}"): + Tensor.manual_seed(1337) + a = random_fn(10,10) + Tensor.manual_seed(1337) + b = random_fn(10,10) + np.testing.assert_allclose(a.numpy(), b.numpy()) + if __name__ == '__main__': unittest.main() diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index 96198300ad..3e1cd9a507 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -35,6 +35,7 @@ import tinygrad.mlops as mlops class Tensor: __deletable__ = ('_ctx',) + _rng : ClassVar[np.random.Generator] = np.random.default_rng() training : ClassVar[bool] = False no_grad : ClassVar[bool] = False @@ -76,6 +77,10 @@ class Tensor: @property def device(self) -> str: return self.lazydata.device + @staticmethod + def manual_seed(seed=None): + Tensor._rng = np.random.default_rng(seed=seed) + # ***** data handlers **** def realize(self) -> Tensor: @@ -124,7 +129,7 @@ class Tensor: def empty(cls, *shape, **kwargs): return cls(np.empty(shape, dtype=np.float32), **kwargs) @classmethod - def randn(cls, *shape, **kwargs): return cls(np.random.default_rng().standard_normal(size=shape, dtype=np.float32), **kwargs) + def randn(cls, *shape, **kwargs): return cls(Tensor._rng.standard_normal(size=shape, dtype=np.float32), **kwargs) @classmethod def arange(cls, stop, start=0, **kwargs): return cls(np.arange(start=start, stop=stop, dtype=np.float32), **kwargs) @@ -133,14 +138,14 @@ class Tensor: # Return random number between -1 and 1 # NOTE: this behavior changed from depending on the shape to not @classmethod - def uniform(cls, *shape, **kwargs): return cls((np.random.default_rng().random(size=shape, dtype=np.float32) * 2 - 1), **kwargs) + def uniform(cls, *shape, **kwargs): return cls((Tensor._rng.random(size=shape, dtype=np.float32) * 2 - 1), **kwargs) @classmethod - def scaled_uniform(cls, *shape, **kwargs): return cls((np.random.default_rng().random(size=shape, dtype=np.float32) * 2 - 1) * (prod(shape)**-0.5), **kwargs) + def scaled_uniform(cls, *shape, **kwargs): return cls((Tensor._rng.random(size=shape, dtype=np.float32) * 2 - 1) * (prod(shape)**-0.5), **kwargs) @classmethod # https://www.tensorflow.org/api_docs/python/tf/keras/initializers/GlorotUniform - def glorot_uniform(cls, *shape, **kwargs): return cls((np.random.default_rng().random(size=shape, dtype=np.float32) * 2 - 1) * ((6/(shape[0]+prod(shape[1:])))**0.5), **kwargs) + def glorot_uniform(cls, *shape, **kwargs): return cls((Tensor._rng.random(size=shape, dtype=np.float32) * 2 - 1) * ((6/(shape[0]+prod(shape[1:])))**0.5), **kwargs) @classmethod def eye(cls, dim, **kwargs): return cls(np.eye(dim, dtype=np.float32), **kwargs) @@ -282,7 +287,7 @@ class Tensor: def dropout(self, p=0.5) -> Tensor: if not Tensor.training: return self - _mask : np.ndarray = np.asarray(np.random.binomial(1, 1.0-p, size=self.shape), dtype=self.dtype) + _mask : np.ndarray = np.asarray(Tensor._rng.binomial(1, 1.0-p, size=self.shape), dtype=self.dtype) return self * Tensor(_mask, requires_grad=False, device=self.device) * (1/(1.0 - p)) # TODO: support arbitrary strides