diff --git a/examples/mlperf/model_train.py b/examples/mlperf/model_train.py index 0bce7bb72c..849d1ed80c 100644 --- a/examples/mlperf/model_train.py +++ b/examples/mlperf/model_train.py @@ -3,7 +3,7 @@ from pathlib import Path import multiprocessing from tinygrad import Device, GlobalCounters, Tensor, TinyJit, dtypes -from tinygrad.helpers import getenv, BEAM, WINO, round_up, diskcache_clear, FUSE_CONV_BW +from tinygrad.helpers import getenv, BEAM, WINO, round_up, diskcache_clear, FUSE_CONV_BW, Profiling from tinygrad.nn.state import get_parameters, get_state_dict, safe_load, safe_save from tinygrad.nn.optim import LAMB, LARS, SGD, OptimizerGroup, Adam @@ -1211,4 +1211,4 @@ if __name__ == "__main__": nm = f"train_{m}" if nm in globals(): print(f"training {m}") - globals()[nm]() + with Profiling(enabled=getenv("PYPROFILE")): globals()[nm]() diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index eb5f2afa82..bf0bdf426f 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -26,7 +26,7 @@ def _apply_map_to_tensors(applied_map:dict[UOp, UOp], name:str|None=None) -> Non all_uops: set[UOp] = set() search_uops = list(applied_map) while len(search_uops): - x = search_uops.pop(0) + x = search_uops.pop() if x in all_uops: continue all_uops.add(x) search_uops.extend([u for c in x.children if (u:=c()) is not None])