From f8f026e8bb570d92c544cbef7b260febf1ff30e7 Mon Sep 17 00:00:00 2001 From: George Hotz Date: Fri, 24 Feb 2023 21:48:47 -0800 Subject: [PATCH] oversized expand for HLOP convs --- test/test_ops.py | 1 + test/test_symbolic.py | 4 ++++ tinygrad/tensor.py | 7 +++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index fb53063edf..f219b4c9f9 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -91,6 +91,7 @@ class TestOps(unittest.TestCase): helper_test_op([(45,65)], lambda x: x.sigmoid(), Tensor.sigmoid) def test_softplus(self): helper_test_op([(45,65)], lambda x: torch.nn.functional.softplus(x), Tensor.softplus, atol=1e-6, grad_atol=1e-6) + @unittest.skip("not supported in older pytorch") def test_gelu(self): helper_test_op([(45,65)], lambda x: torch.nn.functional.gelu(x, approximate="tanh"), Tensor.gelu) def test_quick_gelu(self): diff --git a/test/test_symbolic.py b/test/test_symbolic.py index b6288c8e84..6b7d76f697 100644 --- a/test/test_symbolic.py +++ b/test/test_symbolic.py @@ -44,6 +44,10 @@ class TestSymbolic(unittest.TestCase): @unittest.skip("mod max is wrong") def test_mod_factor(self): self.helper_test_variable(Variable.sum([Variable("a", 0, 7)*100, Variable("b", 0, 3)*50]) % 100, 0, 50, "(((a*100)+(b*50))%100)") + + @unittest.skip("this doesn't work yet") + def test_mod_mul(self): + self.helper_test_variable((Variable("a", 0, 6)*10)%9, 0, 6, "a") def test_sum_0(self): self.helper_test_variable(Variable.sum([Variable("a", 0, 7)]), 0, 7, "a") diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index ef91dfefe0..c602f8b1ef 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -1,6 +1,6 @@ # inspired by https://github.com/karpathy/micrograd/blob/master/micrograd/engine.py from __future__ import annotations -import functools, itertools +import math, functools, itertools import numpy as np from typing import List, Tuple, Callable, Optional, ClassVar, Type, Union from tinygrad.helpers import prod, argfix, make_pair, getenv, DEBUG @@ -284,7 +284,10 @@ class Tensor: oy = (iy - dy * (ky-1) - 1)//sy + 1 ox = (ix - dx * (kx-1) - 1)//sx + 1 # duplicate the inputs for each of the kernels - xup = self.reshape(bs, c, 1, iy, 1, ix).expand(bs, c, ky, iy, kx, ix).reshape(bs, c, ky*iy, kx*ix) + #xup = self.reshape(bs, c, 1, iy, 1, ix).expand(bs, c, ky, iy, kx, ix).reshape(bs, c, ky*iy, kx*ix) + # NOTE: if you oversize this, you can avoid the ZeroView creation. remove when optimizer can fix + ey, ex = math.ceil(ky*(iy+dy) / iy), math.ceil(kx*(ix+dx) / ix) + xup = self.reshape(bs, c, 1, iy, 1, ix).expand(bs, c, ey, iy, ex, ix).reshape(bs, c, ey*iy, ex*ix) # slide by dilation xup = xup.slice(((0,bs), (0,c), (0,ky*(iy+dy)), (0,kx*(ix+dx)))) xup = xup.reshape(bs, c, ky, iy+dy, kx, ix+dx)