mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
NOOP means contiguous
This commit is contained in:
@@ -1,5 +1,8 @@
|
||||
import os
|
||||
os.environ["NVIDIA_TF32_OVERRIDE"] = "0"
|
||||
os.environ["MKL_NUM_THREADS"] = "1"
|
||||
os.environ["NUMEXPR_NUM_THREADS"] = "1"
|
||||
os.environ["OMP_NUM_THREADS"] = "1"
|
||||
import unittest
|
||||
import torch
|
||||
torch.set_num_threads(1)
|
||||
@@ -10,7 +13,7 @@ from functools import partial
|
||||
from tinygrad.ops import GlobalCounters
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.nn import Conv2d
|
||||
from tinygrad.helpers import colored, getenv, DEBUG
|
||||
from tinygrad.helpers import colored, getenv, DEBUG, Timing
|
||||
from tinygrad.jit import TinyJit
|
||||
|
||||
IN_CHANS = [int(x) for x in getenv("IN_CHANS", "4,16,64").split(",")]
|
||||
@@ -39,6 +42,9 @@ def helper_test_speed(f1, *args):
|
||||
# force syncing
|
||||
[x.numpy() if isinstance(x, Tensor) or str(torch_device) == "cpu" else x.cpu().numpy() for x in args if x is not None]
|
||||
|
||||
# L2 defeat (64 MB)
|
||||
np.zeros((4096, 4096), dtype=np.float32) - 1
|
||||
|
||||
GlobalCounters.global_ops = 0
|
||||
GlobalCounters.global_mem = 0
|
||||
if DEBUG >= 4: print("benchmark start")
|
||||
|
||||
@@ -5,7 +5,7 @@ from tinygrad.ops import UnaryOps, BinaryOps, MovementOps, ReduceOps, FusedOps,
|
||||
from tinygrad.helpers import shape_to_axis
|
||||
|
||||
base_fxn_for_op : Dict[Op, Callable] = {
|
||||
UnaryOps.NOOP: lambda x: x[:], UnaryOps.NEG: lambda x: -x, UnaryOps.NOT: lambda x: (1.0 - x),
|
||||
UnaryOps.NEG: lambda x: -x, UnaryOps.NOT: lambda x: (1.0 - x),
|
||||
BinaryOps.ADD: operator.add, BinaryOps.SUB: operator.sub, BinaryOps.MUL: operator.mul, BinaryOps.DIV: operator.truediv, BinaryOps.POW: operator.pow,
|
||||
ReduceOps.SUM: lambda x, new_shape: x.sum(shape_to_axis(x.shape, new_shape), keepdims=True) if tuple(x.shape) != tuple(new_shape) else x[:],
|
||||
ReduceOps.MAX: lambda x, new_shape: (x.amax if hasattr(x, 'amax') else x.max)(shape_to_axis(x.shape, new_shape), keepdims=True) if tuple(x.shape) != tuple(new_shape) else x[:],
|
||||
@@ -23,7 +23,7 @@ def einsum_mulacc(einsum, get_strides, expand):
|
||||
return mulacc
|
||||
|
||||
numpy_fxn_for_op : Dict[Op, Callable] = {**base_fxn_for_op, **{
|
||||
UnaryOps.EXP: lambda x: np.exp(x), UnaryOps.LOG: lambda x: np.log(x),
|
||||
UnaryOps.NOOP: lambda x: np.ascontiguousarray(x), UnaryOps.EXP: lambda x: np.exp(x), UnaryOps.LOG: lambda x: np.log(x),
|
||||
BinaryOps.MAX: np.maximum, BinaryOps.CMPEQ: lambda x,y: (x==y).astype(np.float32),
|
||||
MovementOps.FLIP: lambda x, axis: np.flip(x, axis), MovementOps.PERMUTE: lambda x, order: x.transpose(order),
|
||||
MovementOps.PAD: lambda x, padding: np.pad(x, padding), MovementOps.EXPAND: lambda x, new_shape: np.broadcast_to(x, new_shape),
|
||||
|
||||
@@ -5,7 +5,7 @@ from tinygrad.helpers import getenv
|
||||
from tinygrad.runtime.ops_cpu import base_fxn_for_op, einsum_mulacc
|
||||
|
||||
torch_fxn_for_op : Dict[Op, Callable] = {**base_fxn_for_op, **{
|
||||
UnaryOps.EXP: lambda x: x.exp(), UnaryOps.LOG: lambda x: x.log(),
|
||||
UnaryOps.NOOP: lambda x: x.contiguous(), UnaryOps.EXP: lambda x: x.exp(), UnaryOps.LOG: lambda x: x.log(),
|
||||
BinaryOps.MAX: torch.maximum, BinaryOps.CMPEQ: lambda x,y: (x==y).float(),
|
||||
MovementOps.PAD: lambda x, padding: torch.nn.functional.pad(x, [item for sublist in padding[::-1] for item in sublist]),
|
||||
FusedOps.MULACC: einsum_mulacc(torch.einsum, lambda x: x.stride(), lambda x,s: x.expand(s))
|
||||
|
||||
Reference in New Issue
Block a user