check datasets/sops.gz size to be > 5000 (#7555)

it has > 12000 rows now, but it depends on the backend that generates these so setting a lower but meaningful threshold
This commit is contained in:
chenyu
2024-11-05 09:03:19 -05:00
committed by GitHub
parent 24f0a24f95
commit 0db5f52b2a

View File

@@ -28,6 +28,7 @@ from tinygrad.helpers import dedup
def load_worlds(filter_reduce=True, filter_noimage=True, filter_novariable=True):
fn = Path(__file__).parent.parent / "datasets/sops.gz"
ast_strs = dedup(gzip.open(fn).read().decode('utf-8').strip().split("\n"))
assert len(ast_strs) > 5000, "dataset size too small"
if filter_reduce: ast_strs = [x for x in ast_strs if "REDUCE_AXIS" in x]
if filter_noimage: ast_strs = [x for x in ast_strs if "dtypes.image" not in x]
if filter_novariable: ast_strs = [x for x in ast_strs if "Variable" not in x]