Merge branch 'master' into retinanet_mlperf

This commit is contained in:
Francis Lata
2024-11-18 04:42:57 -08:00
57 changed files with 1197 additions and 1047 deletions

View File

@@ -114,7 +114,7 @@ class SpeedyResNet:
def __call__(self, x, training=True):
# pad to 32x32 because whitening conv creates 31x31 images that are awfully slow to compute with
# TODO: remove the pad but instead let the kernel optimize itself
forward = lambda x: x.conv2d(self.whitening).pad2d((1,0,0,1)).sequential(self.net)
forward = lambda x: x.conv2d(self.whitening).pad((1,0,0,1)).sequential(self.net)
return forward(x) if training else (forward(x) + forward(x[..., ::-1])) / 2.
# hyper-parameters were exactly the same as the original repo

View File

@@ -8,7 +8,7 @@ from tinygrad.helpers import dedup, to_function_name, flatten, getenv, GlobalCou
from tinygrad.engine.schedule import create_schedule
from tinygrad.engine.realize import get_kernel, run_schedule
from tinygrad.engine.memory import memory_planner
from tinygrad.ops import MetaOps, Ops
from tinygrad.ops import Ops
TIMING = getenv("TIMING")

View File

@@ -45,7 +45,7 @@ class ConvBlock:
def __call__(self, input):
x = input.reshape(shape=(-1, self.inp, self.w, self.h))
for cweight, cbias in zip(self.cweights, self.cbiases):
x = x.pad2d(padding=[1,1,1,1]).conv2d(cweight).add(cbias).relu()
x = x.pad(padding=[1,1,1,1]).conv2d(cweight).add(cbias).relu()
x = self._bn(x)
x = self._seb(x)
return x

View File

@@ -361,21 +361,21 @@ class SineGen:
self.dim = self.harmonic_num + 1
def _f02uv(self, f0): return (f0 > self.voiced_threshold).float() #generate uv signal
def _f02sine(self, f0_values):
def padDiff(x : Tensor): return (x.pad2d((0,0,-1,1)) - x).pad2d((0,0,0,-1))
def padDiff(x : Tensor): return (x.pad((0,0,-1,1)) - x).pad((0,0,0,-1))
def mod(x: Tensor, n: int) -> Tensor: return x - n * x.div(n).floor() # this is what the % operator does in pytorch.
rad_values = mod((f0_values / self.sampling_rate) , 1) # convert to F0 in rad
rand_ini = Tensor.rand(f0_values.shape[0], f0_values.shape[2], device=f0_values.device) # initial phase noise
#rand_ini[:, 0] = 0
m = Tensor.ones(f0_values.shape[0]).unsqueeze(1).pad2d((0,f0_values.shape[2]-1,0,0)).cast(dtypes.bool)
m = Tensor.ones(f0_values.shape[0]).unsqueeze(1).pad((0,f0_values.shape[2]-1,0,0)).cast(dtypes.bool)
m = tilde(m)
rand_ini = m.where(rand_ini, 0)
#rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini
tmp = rad_values[:, 0, :] + rand_ini
m = Tensor.ones(tmp.shape).pad2d((0,0,0,rad_values.shape[1]-1,0)).cast(dtypes.bool)
m = Tensor.ones(tmp.shape).pad((0,0,0,rad_values.shape[1]-1,0)).cast(dtypes.bool)
m = tilde(m)
tmp = tmp.unsqueeze(1).pad2d((0,0,0,rad_values.shape[1]-1,0))
tmp = tmp.unsqueeze(1).pad((0,0,0,rad_values.shape[1]-1,0))
rad_values = m.where(rad_values, tmp)
tmp_over_one = mod(rad_values.cumsum(1), 1)
@@ -383,7 +383,7 @@ class SineGen:
cumsum_shift = Tensor.zeros_like(rad_values)
#cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0
tmp_over_one_idx = (tmp_over_one_idx * -1.0).pad2d((0,0,1,0))
tmp_over_one_idx = (tmp_over_one_idx * -1.0).pad((0,0,1,0))
cumsum_shift = tmp_over_one_idx
sines = ((rad_values + cumsum_shift).cumsum(1) * 2 * np.pi).sin()

View File

@@ -137,7 +137,7 @@ class Resample:
waveform = waveform.reshape(-1, shape[-1]) # pack batch
num_wavs, length = waveform.shape
target_length = int(math.ceil(new_freq * length / orig_freq))
waveform = waveform.pad2d((self.width, self.width + orig_freq))
waveform = waveform.pad((self.width, self.width + orig_freq))
resampled = waveform[:, None].conv2d(self.kernel, stride=orig_freq)
resampled = resampled.transpose(1, 2).reshape(num_wavs, -1)
resampled = resampled[..., :target_length]

View File

@@ -282,7 +282,7 @@ class SPPF:
self.cv2 = Conv_Block(c_ * 4, c2, 1, 1, padding=None)
# TODO: this pads with 0s, whereas torch function pads with -infinity. This results in a < 2% difference in prediction which does not make a difference visually.
self.maxpool = lambda x : x.pad2d((k // 2, k // 2, k // 2, k // 2)).max_pool2d(kernel_size=k, stride=1)
self.maxpool = lambda x : x.pad((k // 2, k // 2, k // 2, k // 2)).max_pool2d(kernel_size=k, stride=1)
def __call__(self, x):
x = self.cv1(x)