mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-08 22:48:25 -05:00
Clip model updates for Stable Diffusion mlperf training (#12313)
* stable diffusion mlperf clip changes * add clip tests * set gelu as attribute * add more tests * factor out GPUS * rerun CI * add imports to if blocks * remove unneeded axis * add clip tests to CI * move clip tests * add deps, disable max buf size
This commit is contained in:
@@ -17,6 +17,10 @@ def he_normal(*shape, a: float = 0.00, **kwargs) -> Tensor:
|
||||
std = math.sqrt(2.0 / (1 + a ** 2)) / math.sqrt(prod(argfix(*shape)[1:])) / 0.87962566103423978
|
||||
return std * rand_truncn(*shape, **kwargs)
|
||||
|
||||
# Stable Diffusion v2 training uses default torch gelu, which doesn't use tanh approximation
|
||||
def gelu_erf(x:Tensor) -> Tensor:
|
||||
return 0.5 * x * (1.0 + (x / 1.4142135623730951).erf())
|
||||
|
||||
class Conv2dHeNormal(nn.Conv2d):
|
||||
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True):
|
||||
super().__init__(in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
|
||||
|
||||
Reference in New Issue
Block a user