very tiny generate_dataset (#11013)

one minute to gen on my mac
This commit is contained in:
chenyu
2025-06-27 17:10:45 -04:00
committed by GitHub
parent 382fa6a325
commit a6485d00c8
3 changed files with 5 additions and 17 deletions

View File

@@ -6,24 +6,12 @@ export CAPTURE_PROCESS_REPLAY=1
rm $LOGOPS
test/external/process_replay/reset.py
python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20
STEPS=3 python3 examples/hlb_cifar10.py
WINO=1 STEPS=3 python3 examples/hlb_cifar10.py
python3 examples/stable_diffusion.py --noshow
python3 examples/llama.py --prompt "hello" --count 5
python3 examples/gpt2.py --count 5
HALF=1 python3 examples/gpt2.py --count 5
python3 examples/beautiful_mnist.py
python3 examples/beautiful_cartpole.py
python3 examples/mlperf/model_spec.py
python3 examples/yolov8.py ./test/models/efficientnet/Chicken.jpg
examples/openpilot/go.sh
JIT=2 BIG=1 MPS=1 pytest -n=auto test/ --ignore=test/test_fusion_op.py --ignore=test/test_gc.py --ignore=test/test_speed_v_torch.py --ignore=test/test_jit.py
JIT=2 BIG=1 MPS=1 python -m pytest test/test_gc.py
JIT=2 BIG=1 MPS=1 python -m pytest test/test_jit.py
JIT=2 BIG=1 MPS=1 python -m pytest test/test_speed_v_torch.py
CI=1 python3 -m pytest -n=auto test/test_ops.py test/test_nn.py test/test_winograd.py test/models/test_real_world.py --durations=20
GPU=1 python3 -m pytest test/test_tiny.py
# extract, sort and uniq
extra/optimization/extract_dataset.py
sort -u /tmp/ops > /tmp/sops
ls -lh /tmp/ops /tmp/sops
# gzip -k /tmp/sops
# mv /tmp/sops.gz extra/datasets/

View File

@@ -27,7 +27,7 @@ from tinygrad.helpers import dedup, DEBUG
def load_worlds(filter_reduce=True, filter_noimage=True, filter_novariable=True):
fn = Path(__file__).parent.parent / "datasets/sops.gz"
ast_strs = dedup(gzip.open(fn).read().decode('utf-8').strip().split("\n"))
assert len(ast_strs) >= getenv("MIN_ASTS", 5000), f"dataset size = {len(ast_strs)} is too small"
assert len(ast_strs) >= getenv("MIN_ASTS", 1000), f"dataset size = {len(ast_strs)} is too small"
if DEBUG >= 1: print(f"loaded {len(ast_strs)=} before filters")
if filter_reduce: ast_strs = [x for x in ast_strs if "REDUCE_AXIS" in x]
if filter_noimage: ast_strs = [x for x in ast_strs if "dtypes.image" not in x]