mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 23:18:04 -05:00
example to benchmark onnx [pr] (#8459)
* example to benchmark onnx [pr] * reset global count
This commit is contained in:
39
examples/benchmark_onnx.py
Normal file
39
examples/benchmark_onnx.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import sys, onnx, time
|
||||
from tinygrad import Tensor, TinyJit, Device, GlobalCounters, fetch
|
||||
from tinygrad.tensor import _from_np_dtype
|
||||
from extra.onnx import get_run_onnx
|
||||
|
||||
if __name__ == "__main__":
|
||||
onnx_file = fetch(sys.argv[1])
|
||||
print(onnx_file)
|
||||
onnx_model = onnx.load(onnx_file)
|
||||
Tensor.no_grad = True
|
||||
Tensor.training = False
|
||||
run_onnx = get_run_onnx(onnx_model)
|
||||
print("loaded model")
|
||||
|
||||
# find preinitted tensors and ignore them
|
||||
initted_tensors = {inp.name:None for inp in onnx_model.graph.initializer}
|
||||
expected_inputs = [inp for inp in onnx_model.graph.input if inp.name not in initted_tensors]
|
||||
|
||||
# get real inputs
|
||||
input_shapes = {inp.name:tuple(x.dim_value for x in inp.type.tensor_type.shape.dim) for inp in expected_inputs}
|
||||
input_types = {inp.name:onnx.helper.tensor_dtype_to_np_dtype(inp.type.tensor_type.elem_type) for inp in expected_inputs}
|
||||
run_onnx_jit = TinyJit(lambda **kwargs: next(iter(run_onnx({k:v.to(Device.DEFAULT) for k,v in kwargs.items()}).values())), prune=True)
|
||||
|
||||
for i in range(3):
|
||||
new_inputs = {k:Tensor.randn(*shp, dtype=_from_np_dtype(input_types[k])).mul(8).realize() for k,shp in sorted(input_shapes.items())}
|
||||
GlobalCounters.reset()
|
||||
print(f"run {i}")
|
||||
run_onnx_jit(**new_inputs)
|
||||
|
||||
# run 20 times
|
||||
for _ in range(20):
|
||||
new_inputs = {k:Tensor.randn(*shp, dtype=_from_np_dtype(input_types[k])).mul(8).realize() for k,shp in sorted(input_shapes.items())}
|
||||
GlobalCounters.reset()
|
||||
st = time.perf_counter()
|
||||
out = run_onnx_jit(**new_inputs)
|
||||
mt = time.perf_counter()
|
||||
val = out.numpy()
|
||||
et = time.perf_counter()
|
||||
print(f"enqueue {(mt-st)*1e3:6.2f} ms -- total run {(et-st)*1e3:6.2f} ms")
|
||||
@@ -293,10 +293,13 @@ def SpaceToDepth(X:Tensor, blocksize:int):
|
||||
return X.rearrange("b c (h h1) (w w1) -> b (h1 w1 c) h w", h1=blocksize, w1=blocksize)
|
||||
|
||||
# Reimplemented here because you need legacy RNG for passing ONNX tests.
|
||||
def Dropout(data:Tensor, ratio:float=0.5, training_mode:bool=False, seed:int|None=None):
|
||||
def Dropout_7(data:Tensor, ratio:float=0.5, training_mode:bool=False, seed:int|None=None):
|
||||
if not training_mode: return data, Tensor.ones(data.shape, dtype=dtypes.bool) # if mask is requested as output it will contain all True's.
|
||||
mask = Tensor(np.random.RandomState(seed).random(cast(tuple[int,...], data.shape)) >= ratio, requires_grad=False, device=data.device)
|
||||
return data * mask * (1/(1.0 - ratio)), mask
|
||||
# 6 with 'is_test' needed for https://github.com/MTlab/onnx2caffe/raw/refs/heads/master/model/MobileNetV2.onnx
|
||||
def Dropout_6(data:Tensor, ratio:float=0.5, is_test=0): return Dropout_7(data, ratio, training_mode=not is_test)
|
||||
Dropout = {6:Dropout_6, 7:Dropout_7}
|
||||
|
||||
def LRN(x:Tensor, size:int, alpha:float=1e-4, beta:float=0.75, bias:float=1.0):
|
||||
pooled_x = (x**2).rearrange('b c h w -> b 1 c (h w)').pad((0,0,(size-1)//2, size//2)).avg_pool2d((size, 1), 1)
|
||||
|
||||
Reference in New Issue
Block a user