fix extract_dataset + add tests to CI (#10995)

* fix extract_dataset + tests

* add CI

* sops.gz itself is same as master

* yml + gzip -c + ge

* don't commit that

* bump limit to 1000

* axis=7

* test_tiny
This commit is contained in:
qazal
2025-06-27 01:51:36 +03:00
committed by GitHub
parent 4572e65f0f
commit 712980e167
5 changed files with 41 additions and 24 deletions

View File

@@ -6,8 +6,8 @@ from test.external.process_replay.process_replay import _pmap
LOGOPS = os.getenv("LOGOPS", "/tmp/sops")
def extract_ast(*args) -> None:
open(LOGOPS, "a").write(str(args[0]).replace("\n", "").replace(" ", "")+"\n")
open(LOGOPS, "a").write(str(args[1]).replace("\n", "").replace(" ", "")+"\n")
return None
if __name__ == "__main__":
_pmap("kernel", extract_ast)
_pmap({"get_program":extract_ast})

View File

@@ -5,6 +5,7 @@ from tinygrad.uop.ops import UOp, Ops, KernelInfo
from tinygrad.dtype import dtypes, PtrDType
from tinygrad.shape.shapetracker import ShapeTracker
from tinygrad.shape.view import View
from tinygrad.helpers import getenv
inf, nan = float('inf'), float('nan')
UOps = Ops
@@ -26,7 +27,7 @@ from tinygrad.helpers import dedup, DEBUG
def load_worlds(filter_reduce=True, filter_noimage=True, filter_novariable=True):
fn = Path(__file__).parent.parent / "datasets/sops.gz"
ast_strs = dedup(gzip.open(fn).read().decode('utf-8').strip().split("\n"))
assert len(ast_strs) > 5000, f"dataset size = {len(ast_strs)} is too small"
assert len(ast_strs) >= getenv("MIN_ASTS", 5000), f"dataset size = {len(ast_strs)} is too small"
if DEBUG >= 1: print(f"loaded {len(ast_strs)=} before filters")
if filter_reduce: ast_strs = [x for x in ast_strs if "REDUCE_AXIS" in x]
if filter_noimage: ast_strs = [x for x in ast_strs if "dtypes.image" not in x]