diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8526fee94e..0e304cb7ba 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -337,8 +337,8 @@ jobs: run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py - name: Test Beam Search run: PYTHONPATH="." METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py - - name: Fuzz Test linearizer, TODO fix failure - run: PYTHONPATH="." METAL=1 FUZZ_ALL_ACTIONS=1 DEPTH=2 FUZZ_N=24 FUZZ_MAX_SIZE=10000000 python test/external/fuzz_linearizer.py --expected-failures 1 + - name: Fuzz Test linearizer + run: PYTHONPATH="." METAL=1 FUZZ_ALL_ACTIONS=1 DEPTH=2 FUZZ_N=24 FUZZ_MAX_SIZE=10000000 python test/external/fuzz_linearizer.py - name: Fuzz Test models schedule run: FUZZ_SCHEDULE=1 FUZZ_SCHEDULE_MAX_PATHS=5 python -m pytest test/models/test_train.py test/models/test_end2end.py - name: Run TRANSCENDENTAL math diff --git a/extra/datasets/sops.gz b/extra/datasets/sops.gz index f15ff0b6a6..bce0f6f7b2 100644 Binary files a/extra/datasets/sops.gz and b/extra/datasets/sops.gz differ diff --git a/extra/optimization/extract_dataset.py b/extra/optimization/extract_dataset.py new file mode 100755 index 0000000000..595ddb7dfe --- /dev/null +++ b/extra/optimization/extract_dataset.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +# extract asts from process replay artifacts +import os, pickle +from tinygrad.helpers import db_connection, getenv, VERSION +from test.external.process_replay.process_replay import _run_differ + +PAGE_SIZE = 100 +RUN_ID = os.getenv("GITHUB_RUN_ID", "HEAD") +TABLE_NAME = f"process_replay_{RUN_ID}_{getenv('GITHUB_RUN_ATTEMPT')}_{VERSION}" +LOGOPS = os.getenv("LOGOPS", "/tmp/sops") + +def extract_ast(offset:int): + logops = open(LOGOPS, "a") + conn = db_connection() + for row in conn.execute(f"SELECT val FROM '{TABLE_NAME}' LIMIT ? OFFSET ?", (PAGE_SIZE, offset)).fetchall(): + logops.write(str(pickle.loads(row[0])[0]).replace("\n", "").replace(" ", "")+"\n") + return False + +if __name__ == "__main__": + conn = db_connection() + row_count = conn.execute(f"SELECT COUNT(*) FROM '{TABLE_NAME}'").fetchone()[0] + _run_differ(row_count, extract_ast) diff --git a/extra/optimization/generate_dataset.sh b/extra/optimization/generate_dataset.sh index a9a41e8f51..36f57890f0 100755 --- a/extra/optimization/generate_dataset.sh +++ b/extra/optimization/generate_dataset.sh @@ -1,12 +1,10 @@ #!/bin/bash -export LOGOPS=/tmp/ops +export LOGOPS=/tmp/sops +export RUN_PROCESS_REPLAY=1 rm $LOGOPS +test/external/process_replay/reset.py -# generate many kernels -PYTHONPATH="." OPT=2 GPU=1 python3 test/external/external_test_opt.py -PYTHONPATH="." OPT=3 GPU=1 python3 test/external/external_test_opt.py -GPU=1 IMAGE=1 python3 test/test_ops.py -FORWARD_ONLY=1 GPU=1 IMAGE=2 python test/test_ops.py +python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20 STEPS=3 python3 examples/hlb_cifar10.py WINO=1 STEPS=3 python3 examples/hlb_cifar10.py python3 examples/stable_diffusion.py --noshow @@ -23,6 +21,7 @@ JIT=2 BIG=1 MPS=1 python -m pytest test/test_gc.py JIT=2 BIG=1 MPS=1 python -m pytest test/test_jit.py JIT=2 BIG=1 MPS=1 python -m pytest test/test_speed_v_torch.py -# sort and uniq -sort -u /tmp/ops > /tmp/sops -ls -lh /tmp/ops /tmp/sops +# extract, sort and uniq +extra/optimization/extract_dataset.py +sort -u /tmp/ops > /tmp/sops +ls -lh /tmp/ops /tmp/sops diff --git a/tinygrad/engine/schedule.py b/tinygrad/engine/schedule.py index 35736fb702..5976969aa5 100644 --- a/tinygrad/engine/schedule.py +++ b/tinygrad/engine/schedule.py @@ -17,9 +17,6 @@ from tinygrad.shape.view import View, strides_for_shape # creation can recurse a lot sys.setrecursionlimit(10000) -# optionally log the ops to disk -logops = open(getenv("LOGOPS", ""), "a") if getenv("LOGOPS", "") else None - # *** ScheduleItem return type *** @dataclass(frozen=True) @@ -471,9 +468,7 @@ def create_schedule_with_vars(outs:List[LazyBuffer], seen:Optional[Set[LazyBuffe for out in lsi.outputs: realized_lazybuffer(out, kernel_number) var_vals = merge_dicts([var_vals, lsi.var_vals]) for out in lsi.outputs: del out.srcs # can only schedule once - schedule.append(si:=ScheduleItem(lsi.ast, tuple(x.buffer for x in lsi.outputs+lsi.inputs if x.size != 0), lsi.metadata)) - if logops and si.ast.op is UOps.SINK and not any(i.device.startswith("DISK:") for i in si.inputs): - logops.write(str(si.ast).replace("\n", "").replace(" ", "")+"\n") + schedule.append(ScheduleItem(lsi.ast, tuple(x.buffer for x in lsi.outputs+lsi.inputs if x.size != 0), lsi.metadata)) for x in graph[lsi]: in_degree[x] -= 1 if in_degree[x] == 0: queue.append(x)