diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fe59e83e0a..99d0cd2048 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -320,7 +320,7 @@ jobs: CLOUDDEV=GPU IMAGE=2 CLOUD=1 python3 test/test_tiny.py - if: ${{ matrix.task == 'onnx' }} name: Test Action Space - run: PYTHONPATH="." GPU=1 python3 extra/optimization/get_action_space.py + run: PYTHONPATH="." DEBUG=1 GPU=1 python3 extra/optimization/get_action_space.py - if: ${{ matrix.task == 'onnx' }} name: Test Beam Search run: PYTHONPATH="." GPU=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py diff --git a/extra/datasets/sops.gz b/extra/datasets/sops.gz index 3e41b88fa4..5e1d668ce4 100644 Binary files a/extra/datasets/sops.gz and b/extra/datasets/sops.gz differ diff --git a/extra/optimization/extract_dataset.py b/extra/optimization/extract_dataset.py index e1eccca05f..17683006f1 100755 --- a/extra/optimization/extract_dataset.py +++ b/extra/optimization/extract_dataset.py @@ -1,11 +1,8 @@ #!/usr/bin/env python3 # extract asts from process replay artifacts import os -from tinygrad.helpers import db_connection, VERSION from test.external.process_replay.process_replay import _pmap -PAGE_SIZE = 100 -TABLE_NAME = f"kernel_process_replay_{VERSION}" LOGOPS = os.getenv("LOGOPS", "/tmp/sops") def extract_ast(*args) -> bool: @@ -13,6 +10,4 @@ def extract_ast(*args) -> bool: return args[-1] if __name__ == "__main__": - conn = db_connection() - row_count = conn.execute(f"SELECT COUNT(*) FROM '{TABLE_NAME}'").fetchone()[0] _pmap("kernel", extract_ast) diff --git a/extra/optimization/generate_dataset.sh b/extra/optimization/generate_dataset.sh index 600634235e..afff89185e 100755 --- a/extra/optimization/generate_dataset.sh +++ b/extra/optimization/generate_dataset.sh @@ -1,4 +1,5 @@ #!/bin/bash +export PAGE_SIZE=1 export LOGOPS=/tmp/ops export RUN_PROCESS_REPLAY=1 rm $LOGOPS diff --git a/extra/optimization/helpers.py b/extra/optimization/helpers.py index 3f86af98d1..9dcaf8133c 100644 --- a/extra/optimization/helpers.py +++ b/extra/optimization/helpers.py @@ -24,14 +24,16 @@ def kern_str_to_lin(kern_str:str, opts=None): import gzip from pathlib import Path import random -from tinygrad.helpers import dedup +from tinygrad.helpers import dedup, DEBUG def load_worlds(filter_reduce=True, filter_noimage=True, filter_novariable=True): fn = Path(__file__).parent.parent / "datasets/sops.gz" ast_strs = dedup(gzip.open(fn).read().decode('utf-8').strip().split("\n")) assert len(ast_strs) > 5000, f"dataset size = {len(ast_strs)} is too small" + if DEBUG >= 1: print(f"loaded {len(ast_strs)=} before filters") if filter_reduce: ast_strs = [x for x in ast_strs if "REDUCE_AXIS" in x] if filter_noimage: ast_strs = [x for x in ast_strs if "dtypes.image" not in x] if filter_novariable: ast_strs = [x for x in ast_strs if "Variable" not in x] + if DEBUG >= 1: print(f"loaded {len(ast_strs)=} after filters") random.seed(1337) random.shuffle(ast_strs) return ast_strs diff --git a/test/external/process_replay/process_replay.py b/test/external/process_replay/process_replay.py index b25fc537ab..5ac274e011 100755 --- a/test/external/process_replay/process_replay.py +++ b/test/external/process_replay/process_replay.py @@ -12,7 +12,7 @@ from test.helpers import print_diff # *** process replay settings # internal -PAGE_SIZE = 100 +PAGE_SIZE = getenv("PAGE_SIZE", 100) REF = os.getenv("GITHUB_REF_NAME", "") MAX_DIFF_PCT = getenv("PROCESS_REPLAY_MAX_DIFF_PCT", 20) TABLE_NAME = f"process_replay_{VERSION}"