Files
tinygrad/extra/onnx_helpers.py
geohotstan 0bed9b6cd2 benchmark huggingface onnx models (#8493)
* add ability to ORT=1

* test_vs_ort

* useless f

* actually have benchmark take in modelproto for more flexibility in huggingface stuff

* ok runs

* good

* oops fix benchmark_onnx __main__

* 224 as default

* add ORT=1 option to huggingface_onnx

* use Tensor to get_input

* add abilty to do single onnx model testing

* better names

* merge properly...

* copy in onnx_helpers

* better

* decent script

* need to add debug tool first

* new limit usage

* why did narrowing_error come back..

* pretty decent

* revert validate change

* more ops bug fixes

* revert unnecessary changes

* fix InstanceNorm too

* remove op from O4

* minimize diff

* address old feedback

* unsure of this, just revert

* remove that assert

* working attention

* to_python_const Attention

* cant init from np constant so just do this

* final

* fix bug in attention

* attention clean ups

* add hard TODOs and REPOPATH and TRUNCATE envvar

* fix input_ids default value

* final

* fix scatter

* cleaner _prepare_quantize

* use new attention and tempfile for huggingface script

* more stats

* update

* remove outdated code

* big refactor to something usable by CI

* booooooom

* clean up

* update to using yaml as env var input

* add dry run

* try

* valid pad

* use argparser and fix gather bug

* ignore all yaml

* tiny bit more polish

* woah ignoring all yaml was not right

* typo

* decouple huggingface_onnx_run debug run with huggingface_onnx_download

* bug fix for downloading single model

* WOOOO ok much better

* oops argparse 'required' is an invalid argument for positionals

* oops argparse 'required' is an invalid argument for positionals

* add assert

* fix types

---------

Co-authored-by: chenyu <chenyu@fastmail.com>
2025-03-12 20:13:12 -04:00

65 lines
3.0 KiB
Python

from tinygrad import Tensor
from tinygrad.tensor import _to_np_dtype
from extra.onnx import OnnxRunner, OnnxValue
import onnx
import numpy as np
import onnxruntime as ort
def get_example_inputs(graph_inputs:dict[str, OnnxValue], config={}):
def _get_shape(onnx_shape: tuple[str|int]):
shape = []
for onnx_dim in onnx_shape:
match onnx_dim:
case int(): shape.append(onnx_dim)
case "width" | "height":
size = config.get("size", {})
shape.append(size) if isinstance(size, int) else shape.append(size.get(onnx_dim, 224))
case "sequence" | "sequence_length" | "decoder_sequence_length": shape.append(64)
case "encoder_sequence_length": shape.append(config.get("nb_max_frames", 64))
case "past_decoder_sequence_length" | "encoder_sequence_length_out": shape.append(64)
case "encoder_sequence_length / 2": shape.append(32)
case "batch_size": shape.append(1)
case "num_channels": shape.append(config.get("in_channels", 3))
case "num_channels_latent": shape.append(config.get("latent_channels", 4))
case "height_latent" | "width_latent": shape.append(config.get("sample_size", 1024) // 8)
case "feature_size": shape.append(config.get("num_mel_bins", 128))
case _: shape.append(1)
return shape
def _get_value(name, shape, dtype):
match name:
case "input_ids":
vocab_size = config.get("text_config", {}).get("vocab_size") or config.get("vocab_size", 32)
val = np.random.randint(0, vocab_size-1, shape)
case "attention_mask": val = np.random.randint(0, 2, size=shape)
case "token_type_ids": val = np.random.randint(0, config.get("type_vocab_size", 2), shape)
case "image_tensor": val = np.random.randint(0, 256, shape)
case "task_id": return Tensor(0, dtype=dtype)
case _: val = np.random.uniform(size=shape) * 8
return Tensor(val.astype(_to_np_dtype(dtype))).realize()
ret: dict[str, Tensor] = {}
for name, spec in graph_inputs.items():
assert not spec.is_optional and not spec.is_sequence, "only allow tensor input for now"
shape = _get_shape(spec.shape)
value = _get_value(name, shape, spec.dtype)
ret.update({name:value})
return ret
def validate(onnx_file, inputs, rtol=1e-5, atol=1e-5):
run_onnx = OnnxRunner(onnx.load(onnx_file))
ort_options = ort.SessionOptions()
ort_options.log_severity_level = 3
ort_sess = ort.InferenceSession(onnx_file, ort_options, ["CPUExecutionProvider"])
np_inputs = {k:v.numpy() if isinstance(v, Tensor) else v for k,v in inputs.items()}
out_names = list(run_onnx.graph_outputs)
out_values = ort_sess.run(out_names, np_inputs)
ort_out = dict(zip(out_names, out_values))
tinygrad_out = run_onnx(inputs)
assert tinygrad_out.keys() == ort_out.keys()
for k in tinygrad_out.keys():
tiny_v, onnx_v = tinygrad_out[k], ort_out[k]
if tiny_v is None: assert onnx_v is None, f"{k}: {tiny_v=}, {onnx_v=}"
else: np.testing.assert_allclose(tiny_v.numpy(), onnx_v, rtol=rtol, atol=atol, err_msg=f"For tensor '{k}' in {tinygrad_out.keys()}")