Files
tinygrad/extra/huggingface_onnx/collect_metadata.py
geohotstan 0bed9b6cd2 benchmark huggingface onnx models (#8493)
* add ability to ORT=1

* test_vs_ort

* useless f

* actually have benchmark take in modelproto for more flexibility in huggingface stuff

* ok runs

* good

* oops fix benchmark_onnx __main__

* 224 as default

* add ORT=1 option to huggingface_onnx

* use Tensor to get_input

* add abilty to do single onnx model testing

* better names

* merge properly...

* copy in onnx_helpers

* better

* decent script

* need to add debug tool first

* new limit usage

* why did narrowing_error come back..

* pretty decent

* revert validate change

* more ops bug fixes

* revert unnecessary changes

* fix InstanceNorm too

* remove op from O4

* minimize diff

* address old feedback

* unsure of this, just revert

* remove that assert

* working attention

* to_python_const Attention

* cant init from np constant so just do this

* final

* fix bug in attention

* attention clean ups

* add hard TODOs and REPOPATH and TRUNCATE envvar

* fix input_ids default value

* final

* fix scatter

* cleaner _prepare_quantize

* use new attention and tempfile for huggingface script

* more stats

* update

* remove outdated code

* big refactor to something usable by CI

* booooooom

* clean up

* update to using yaml as env var input

* add dry run

* try

* valid pad

* use argparser and fix gather bug

* ignore all yaml

* tiny bit more polish

* woah ignoring all yaml was not right

* typo

* decouple huggingface_onnx_run debug run with huggingface_onnx_download

* bug fix for downloading single model

* WOOOO ok much better

* oops argparse 'required' is an invalid argument for positionals

* oops argparse 'required' is an invalid argument for positionals

* add assert

* fix types

---------

Co-authored-by: chenyu <chenyu@fastmail.com>
2025-03-12 20:13:12 -04:00

86 lines
3.7 KiB
Python

import yaml, time, requests, argparse
from pathlib import Path
from huggingface_hub import list_models, HfApi
from tinygrad.helpers import tqdm
HUGGINGFACE_URL = "https://huggingface.co"
SKIPPED_FILES = [
"fp16", "int8", "uint8", "quantized", # numerical accuracy issues
"avx2", "arm64", "avx512", "avx512_vnni", # numerical accuracy issues
"q4", "q4f16", "bnb4", # unimplemented quantization
"model_O4", # requires non cpu ort runner and MemcpyFromHost op
"merged", # TODO implement attribute with graph type and Loop op
]
SKIPPED_REPO_PATHS = [
# Invalid model-index
"AdamCodd/vit-base-nsfw-detector",
# TODO: implement attribute with graph type and Loop op
"minishlab/potion-base-8M", "minishlab/M2V_base_output", "minishlab/potion-retrieval-32M",
# TODO: implement SimplifiedLayerNormalization, SkipSimplifiedLayerNormalization, GroupQueryAttention
"HuggingFaceTB/SmolLM2-360M-Instruct",
# TODO: implement SimplifiedLayerNormalization, SkipSimplifiedLayerNormalization, RotaryEmbedding, MultiHeadAttention
"HuggingFaceTB/SmolLM2-1.7B-Instruct",
# TODO: implmement RandomNormalLike
"stabilityai/stable-diffusion-xl-base-1.0", "stabilityai/sdxl-turbo", 'SimianLuo/LCM_Dreamshaper_v7',
# TODO: implement NonZero
"mangoapps/fb_zeroshot_mnli_onnx",
# TODO huge Concat in here with 1024 (1, 3, 32, 32) Tensors, and maybe a MOD bug with const folding
"briaai/RMBG-2.0",
]
def get_top_repos(n: int, sort: str) -> list[str]: # list["FacebookAI/xlm-roberta-large", ...]
print(f"** Getting top {n} models sorted by {sort} **")
repos = []
i = 0
for model in list_models(filter="onnx", sort=sort):
if model.id in SKIPPED_REPO_PATHS: continue
print(f"{i+1}/{n}: {model.id} ({getattr(model, sort)})")
repos.append(model.id)
i += 1
if i == n: break
return repos
def get_metadata(repos:list[str]) -> dict:
api = HfApi()
repos_metadata = {"repositories": {}}
total_size = 0
# TODO: speed head requests up with async?
for repo in tqdm(repos, desc="Getting metadata"):
files_metadata = []
model_info = api.model_info(repo)
for file in model_info.siblings:
filename = file.rfilename
if not (filename.endswith('.onnx') or filename.endswith('.onnx_data')): continue
if any(skip_str in filename for skip_str in SKIPPED_FILES): continue
head = requests.head(f"{HUGGINGFACE_URL}/{repo}/resolve/main/{filename}", allow_redirects=True)
file_size = file.size or int(head.headers.get('Content-Length', 0))
files_metadata.append({"file": filename, "size": f"{file_size/1e6:.2f}MB"})
total_size += file_size
repos_metadata["repositories"][repo] = {
"url": f"{HUGGINGFACE_URL}/{repo}",
"download_path": None,
"files": files_metadata,
}
repos_metadata['total_size'] = f"{total_size/1e9:.2f}GB"
repos_metadata['created_at'] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
return repos_metadata
if __name__ == "__main__":
sort = "downloads" # recent 30 days downloads
huggingface_onnx_dir = Path(__file__).parent
parser = argparse.ArgumentParser(description="Produces a YAML file with metadata of top huggingface onnx models")
parser.add_argument("--limit", type=int, required=True, help="Number of top repositories to process (e.g., 100)")
parser.add_argument("--output", type=str, default="huggingface_repos.yaml", help="Output YAML file name to save the report")
args = parser.parse_args()
top_repos = get_top_repos(args.limit, sort)
metadata = get_metadata(top_repos)
yaml_path = huggingface_onnx_dir / args.output
with open(yaml_path, 'w') as f:
yaml.dump(metadata, f, sort_keys=False)
print(f"YAML saved to: {str(yaml_path)}")