NOOP means contiguous

This commit is contained in:
George Hotz
2023-03-01 21:54:51 -08:00
parent d062cc82b8
commit fca055bd66
3 changed files with 10 additions and 4 deletions

View File

@@ -1,5 +1,8 @@
import os
os.environ["NVIDIA_TF32_OVERRIDE"] = "0"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"
import unittest
import torch
torch.set_num_threads(1)
@@ -10,7 +13,7 @@ from functools import partial
from tinygrad.ops import GlobalCounters
from tinygrad.tensor import Tensor
from tinygrad.nn import Conv2d
from tinygrad.helpers import colored, getenv, DEBUG
from tinygrad.helpers import colored, getenv, DEBUG, Timing
from tinygrad.jit import TinyJit
IN_CHANS = [int(x) for x in getenv("IN_CHANS", "4,16,64").split(",")]
@@ -39,6 +42,9 @@ def helper_test_speed(f1, *args):
# force syncing
[x.numpy() if isinstance(x, Tensor) or str(torch_device) == "cpu" else x.cpu().numpy() for x in args if x is not None]
# L2 defeat (64 MB)
np.zeros((4096, 4096), dtype=np.float32) - 1
GlobalCounters.global_ops = 0
GlobalCounters.global_mem = 0
if DEBUG >= 4: print("benchmark start")

View File

@@ -5,7 +5,7 @@ from tinygrad.ops import UnaryOps, BinaryOps, MovementOps, ReduceOps, FusedOps,
from tinygrad.helpers import shape_to_axis
base_fxn_for_op : Dict[Op, Callable] = {
UnaryOps.NOOP: lambda x: x[:], UnaryOps.NEG: lambda x: -x, UnaryOps.NOT: lambda x: (1.0 - x),
UnaryOps.NEG: lambda x: -x, UnaryOps.NOT: lambda x: (1.0 - x),
BinaryOps.ADD: operator.add, BinaryOps.SUB: operator.sub, BinaryOps.MUL: operator.mul, BinaryOps.DIV: operator.truediv, BinaryOps.POW: operator.pow,
ReduceOps.SUM: lambda x, new_shape: x.sum(shape_to_axis(x.shape, new_shape), keepdims=True) if tuple(x.shape) != tuple(new_shape) else x[:],
ReduceOps.MAX: lambda x, new_shape: (x.amax if hasattr(x, 'amax') else x.max)(shape_to_axis(x.shape, new_shape), keepdims=True) if tuple(x.shape) != tuple(new_shape) else x[:],
@@ -23,7 +23,7 @@ def einsum_mulacc(einsum, get_strides, expand):
return mulacc
numpy_fxn_for_op : Dict[Op, Callable] = {**base_fxn_for_op, **{
UnaryOps.EXP: lambda x: np.exp(x), UnaryOps.LOG: lambda x: np.log(x),
UnaryOps.NOOP: lambda x: np.ascontiguousarray(x), UnaryOps.EXP: lambda x: np.exp(x), UnaryOps.LOG: lambda x: np.log(x),
BinaryOps.MAX: np.maximum, BinaryOps.CMPEQ: lambda x,y: (x==y).astype(np.float32),
MovementOps.FLIP: lambda x, axis: np.flip(x, axis), MovementOps.PERMUTE: lambda x, order: x.transpose(order),
MovementOps.PAD: lambda x, padding: np.pad(x, padding), MovementOps.EXPAND: lambda x, new_shape: np.broadcast_to(x, new_shape),

View File

@@ -5,7 +5,7 @@ from tinygrad.helpers import getenv
from tinygrad.runtime.ops_cpu import base_fxn_for_op, einsum_mulacc
torch_fxn_for_op : Dict[Op, Callable] = {**base_fxn_for_op, **{
UnaryOps.EXP: lambda x: x.exp(), UnaryOps.LOG: lambda x: x.log(),
UnaryOps.NOOP: lambda x: x.contiguous(), UnaryOps.EXP: lambda x: x.exp(), UnaryOps.LOG: lambda x: x.log(),
BinaryOps.MAX: torch.maximum, BinaryOps.CMPEQ: lambda x,y: (x==y).float(),
MovementOps.PAD: lambda x, padding: torch.nn.functional.pad(x, [item for sublist in padding[::-1] for item in sublist]),
FusedOps.MULACC: einsum_mulacc(torch.einsum, lambda x: x.stride(), lambda x,s: x.expand(s))