Merge branch 'master' into retinanet_mlperf

2026-04-29 03:00:14 -04:00 · 2024-11-18 04:42:57 -08:00
parent a0c0a77f54 6ea4a173e7
commit 99efa2cfde
57 changed files with 1197 additions and 1047 deletions
--- a/examples/hlb_cifar10.py
+++ b/examples/hlb_cifar10.py
@@ -114,7 +114,7 @@ class SpeedyResNet:
  def __call__(self, x, training=True):
    # pad to 32x32 because whitening conv creates 31x31 images that are awfully slow to compute with
    # TODO: remove the pad but instead let the kernel optimize itself
-    forward = lambda x: x.conv2d(self.whitening).pad2d((1,0,0,1)).sequential(self.net)
+    forward = lambda x: x.conv2d(self.whitening).pad((1,0,0,1)).sequential(self.net)
    return forward(x) if training else (forward(x) + forward(x[..., ::-1])) / 2.

 # hyper-parameters were exactly the same as the original repo
--- a/examples/llm.c/export.py
+++ b/examples/llm.c/export.py
@@ -8,7 +8,7 @@ from tinygrad.helpers import dedup, to_function_name, flatten, getenv, GlobalCou
 from tinygrad.engine.schedule import create_schedule
 from tinygrad.engine.realize import get_kernel, run_schedule
 from tinygrad.engine.memory import memory_planner
-from tinygrad.ops import MetaOps, Ops
+from tinygrad.ops import Ops

 TIMING = getenv("TIMING")

--- a/examples/serious_mnist.py
+++ b/examples/serious_mnist.py
@@ -45,7 +45,7 @@ class ConvBlock:
  def __call__(self, input):
    x = input.reshape(shape=(-1, self.inp, self.w, self.h))
    for cweight, cbias in zip(self.cweights, self.cbiases):
-      x = x.pad2d(padding=[1,1,1,1]).conv2d(cweight).add(cbias).relu()
+      x = x.pad(padding=[1,1,1,1]).conv2d(cweight).add(cbias).relu()
    x = self._bn(x)
    x = self._seb(x)
    return x
--- a/examples/so_vits_svc.py
+++ b/examples/so_vits_svc.py
@@ -361,21 +361,21 @@ class SineGen:
    self.dim = self.harmonic_num + 1
  def _f02uv(self, f0): return (f0 > self.voiced_threshold).float()  #generate uv signal
  def _f02sine(self, f0_values):
-    def padDiff(x : Tensor): return (x.pad2d((0,0,-1,1)) - x).pad2d((0,0,0,-1))
+    def padDiff(x : Tensor): return (x.pad((0,0,-1,1)) - x).pad((0,0,0,-1))
    def mod(x: Tensor, n: int) -> Tensor: return x - n * x.div(n).floor()  # this is what the % operator does in pytorch.
    rad_values = mod((f0_values / self.sampling_rate) , 1)  # convert to F0 in rad
    rand_ini = Tensor.rand(f0_values.shape[0], f0_values.shape[2], device=f0_values.device)  # initial phase noise

    #rand_ini[:, 0] = 0
-    m = Tensor.ones(f0_values.shape[0]).unsqueeze(1).pad2d((0,f0_values.shape[2]-1,0,0)).cast(dtypes.bool)
+    m = Tensor.ones(f0_values.shape[0]).unsqueeze(1).pad((0,f0_values.shape[2]-1,0,0)).cast(dtypes.bool)
    m = tilde(m)
    rand_ini = m.where(rand_ini, 0)

    #rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini
    tmp = rad_values[:, 0, :] + rand_ini
-    m = Tensor.ones(tmp.shape).pad2d((0,0,0,rad_values.shape[1]-1,0)).cast(dtypes.bool)
+    m = Tensor.ones(tmp.shape).pad((0,0,0,rad_values.shape[1]-1,0)).cast(dtypes.bool)
    m = tilde(m)
-    tmp = tmp.unsqueeze(1).pad2d((0,0,0,rad_values.shape[1]-1,0))
+    tmp = tmp.unsqueeze(1).pad((0,0,0,rad_values.shape[1]-1,0))
    rad_values = m.where(rad_values, tmp)

    tmp_over_one = mod(rad_values.cumsum(1), 1)
@@ -383,7 +383,7 @@ class SineGen:
    cumsum_shift = Tensor.zeros_like(rad_values)

    #cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0
-    tmp_over_one_idx = (tmp_over_one_idx * -1.0).pad2d((0,0,1,0))
+    tmp_over_one_idx = (tmp_over_one_idx * -1.0).pad((0,0,1,0))
    cumsum_shift = tmp_over_one_idx

    sines = ((rad_values + cumsum_shift).cumsum(1) * 2 * np.pi).sin()
--- a/examples/sovits_helpers/preprocess.py
+++ b/examples/sovits_helpers/preprocess.py
@@ -137,7 +137,7 @@ class Resample:
    waveform = waveform.reshape(-1, shape[-1])  # pack batch
    num_wavs, length = waveform.shape
    target_length = int(math.ceil(new_freq * length / orig_freq))
-    waveform = waveform.pad2d((self.width, self.width + orig_freq))
+    waveform = waveform.pad((self.width, self.width + orig_freq))
    resampled = waveform[:, None].conv2d(self.kernel, stride=orig_freq)
    resampled = resampled.transpose(1, 2).reshape(num_wavs, -1)
    resampled = resampled[..., :target_length]
--- a/examples/yolov8.py
+++ b/examples/yolov8.py
@@ -282,7 +282,7 @@ class SPPF:
    self.cv2 = Conv_Block(c_ * 4, c2, 1, 1, padding=None)

    # TODO: this pads with 0s, whereas torch function pads with -infinity. This results in a < 2% difference in prediction which does not make a difference visually.
-    self.maxpool = lambda x : x.pad2d((k // 2, k // 2, k // 2, k // 2)).max_pool2d(kernel_size=k, stride=1)
+    self.maxpool = lambda x : x.pad((k // 2, k // 2, k // 2, k // 2)).max_pool2d(kernel_size=k, stride=1)

  def __call__(self, x):
    x = self.cv1(x)