diff --git a/extra/optimization/helpers.py b/extra/optimization/helpers.py index 2b51ba3cb7..cda0916418 100644 --- a/extra/optimization/helpers.py +++ b/extra/optimization/helpers.py @@ -10,7 +10,7 @@ inf, nan = float('inf'), float('nan') # kernel unpacker from tinygrad.codegen.kernel import Kernel -def ast_str_to_ast(ast_str:str) -> Tuple[LazyOp,...]: return LazyOp(MetaOps.SINK, val) if isinstance(val:=eval(ast_str), tuple) else val +def ast_str_to_ast(ast_str:str) -> LazyOp: return LazyOp(MetaOps.SINK, val) if isinstance(val:=eval(ast_str), tuple) else val def ast_str_to_lin(ast_str:str, opts=None): return Kernel(ast_str_to_ast(ast_str), opts=opts) def kern_str_to_lin(kern_str:str, opts=None): (ast, applied_opts,) = eval(kern_str) diff --git a/test/external/fuzz_linearizer.py b/test/external/fuzz_linearizer.py index 319830c873..25f0a89b7c 100644 --- a/test/external/fuzz_linearizer.py +++ b/test/external/fuzz_linearizer.py @@ -2,7 +2,7 @@ import random, traceback, ctypes, argparse from typing import List, Tuple, DefaultDict import numpy as np from collections import defaultdict -from extra.optimization.helpers import load_worlds, ast_str_to_lin +from extra.optimization.helpers import load_worlds, ast_str_to_lin, kern_str_to_lin from tinygrad import Tensor, Device, dtypes from tinygrad.tensor import _to_np_dtype @@ -157,7 +157,14 @@ def fuzz_linearizer(lin: Kernel, rtol=1e-2, atol=1e-2): if not FUZZ_ALL_ACTIONS and test_lin.applied_opts: print(f"applied opts: {test_lin.applied_opts}") # stop if kernel uops repeat - tuops = tuplize_uops(test_lin.linearize().uops.uops) + try: tuops = tuplize_uops(test_lin.linearize().uops.uops) + except BaseException as e: + print(test_lin.ast) + print(test_lin.applied_opts) + print(e) + failures["LINEARIZE_ERROR"].append((test_lin.ast, test_lin.applied_opts)) + continue + if tuops in seen_uops: continue seen_uops[tuops] = tuple(test_lin.applied_opts) @@ -187,6 +194,7 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run a fuzz testing on one or more kernels", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--ast", type=str, default=None, help="the ast for the kernel to be optimized") parser.add_argument("--file", type=str, default=None, help="a file containing asts to be optimized, one per line") + parser.add_argument("--logfile", type=str, default=None, help="a file containing a tuple of ast and applied_opts, one per line") parser.add_argument("--expected-failures", type=int, default=0, help="the number of expected failed kernels") parser.add_argument("--rtol", type=float, default=1e-2, help="relative tolerance for numerical comparison") parser.add_argument("--atol", type=float, default=1e-2, help="absolute tolerance for numerical comparison") @@ -199,6 +207,12 @@ if __name__ == "__main__": print(f"loading ASTs from file '{args.file}'") with open(args.file, 'r') as file: ast_strs = file.readlines() + elif args.logfile is not None: + print(f"loading ASTs from LOGKERNS file '{args.file}'") + with open(args.logfile, 'r') as file: + kern_strs = file.readlines() + test_lins = [kern_str_to_lin(kern_str) for kern_str in kern_strs] + ast_strs = [f"{lin.ast}" for lin in test_lins] else: print("loading ASTs from world") ast_strs = load_worlds(filter_reduce=False, filter_novariable=False) diff --git a/test/external/verify_kernel.py b/test/external/verify_kernel.py index c614bfad6c..c9f34b2527 100644 --- a/test/external/verify_kernel.py +++ b/test/external/verify_kernel.py @@ -51,9 +51,8 @@ if __name__ == "__main__": failures = defaultdict(list) for i, test_lin in enumerate(test_lins): print(f"testing kernel {i}") - for op in test_lin.ast: - print_tree(op) - print(op) + print_tree(test_lin.ast) + print(test_lin.ast) print(test_lin.applied_opts) unoptimized_lin = Kernel(test_lin.ast) unoptimized_lin.required_optimizations() diff --git a/test/test_linearizer_failures.py b/test/test_linearizer_failures.py index d9ccb798ad..db1cb7bfd0 100644 --- a/test/test_linearizer_failures.py +++ b/test/test_linearizer_failures.py @@ -280,5 +280,37 @@ class TestLinearizerFailures(unittest.TestCase): opts = [Opt(op=OptOps.GROUPTOP, axis=0, amt=16)] helper_test_lin(Kernel(ast), opts=opts, failed_platforms=["METAL", "GPU", "CUDA", "AMD", "NV"]) + # from world fuzz_linearizer: PYTHONPATH=. METAL=1 FUZZ_ALL_ACTIONS=1 DEPTH=1 FUZZ_N=100 FUZZ_NTH=84 python3 ./test/external/fuzz_linearizer.py + def test_failure_36(self): + # UOps.UNMUL left after linearize + ast = LazyOp(op=MetaOps.SINK, src=(LazyOp(op=BufferOps.STORE, src=(LazyOp(op=UnaryOps.CAST, src=(LazyOp(op=BinaryOps.ADD, src=(LazyOp(op=ReduceOps.SUM, src=(LazyOp(op=UnaryOps.CAST, src=(LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=1, dtype=dtypes.uchar, st=ShapeTracker(views=(View(shape=(6, 9), strides=(0, 0), offset=0, mask=((0, 6), (4, 9)), contiguous=False), View(shape=(5, 5), strides=(1, 10), offset=0, mask=None, contiguous=False))))),), arg=dtypes.uint),), arg=(1,)), LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=-1, dtype=dtypes.uint, st=ShapeTracker(views=(View(shape=(5, 1), strides=(0, 0), offset=0, mask=None, contiguous=False),))))), arg=None),), arg=dtypes.uchar),), arg=MemBuffer(idx=0, dtype=dtypes.uchar, st=ShapeTracker(views=(View(shape=(5, 1), strides=(1, 0), offset=0, mask=None, contiguous=True),)))),), arg=None) + opts = [Opt(op=OptOps.UPCAST, axis=0, amt=0)] + helper_test_lin(Kernel(ast), opts=opts, failed_platforms=["METAL", "GPU", "CUDA", "AMD", "NV", "CLANG", "LLVM"]) + + # BEGIN METAL=1 ./examples/beautiful_mnist.py failures + # log : PYTHONPATH=. LOGKERNS=/tmp/beautiful_mnist.kernels.txt METAL=1 python3 ./examples/beautiful_mnist.py + def test_failure_37(self): + # beautiful mnist kernel number 28: 6 possible TC axis_choices (3 for axis_buf1 and 2 reduce) and all fail + # fuzz: PYTHONPATH=. METAL=1 FUZZ_ALL_ACTIONS=1 DEPTH=1 FUZZ_NTH=28 DEBUG=2 python3 ./test/external/fuzz_linearizer.py --logfile /tmp/beautiful_mnist.kernels.txt + ast = LazyOp(op=MetaOps.SINK, src=(LazyOp(op=BufferOps.STORE, src=(LazyOp(op=BinaryOps.MAX, src=(LazyOp(op=BinaryOps.ADD, src=(LazyOp(op=ReduceOps.SUM, src=(LazyOp(op=BinaryOps.MUL, src=(LazyOp(op=UnaryOps.CAST, src=(LazyOp(op=BufferOps.LOAD, src=(), arg=MemBuffer(idx=1, dtype=dtypes.uchar, st=ShapeTracker(views=(View(shape=(512, 1, 32, 24, 24, 1, 5, 5), strides=(784, 0, 0, 28, 1, 0, 28, 1), offset=0, mask=None, contiguous=False),)))),), arg=dtypes.float), LazyOp(op=BufferOps.LOAD, src=(), arg=MemBuffer(idx=2, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(512, 1, 32, 24, 24, 1, 5, 5), strides=(0, 0, 25, 0, 0, 0, 5, 1), offset=0, mask=None, contiguous=False),))))), arg=None),), arg=(6, 7)), LazyOp(op=BufferOps.LOAD, src=(), arg=MemBuffer(idx=3, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(512, 1, 32, 24, 24, 1, 1, 1), strides=(0, 0, 1, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None), LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=0.0, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(512, 1, 32, 24, 24, 1, 1, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None),), arg=MemBuffer(idx=0, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(512, 1, 32, 24, 24, 1, 1, 1), strides=(18432, 0, 576, 24, 1, 0, 0, 0), offset=0, mask=None, contiguous=True),)))),), arg=None) + for axis in [0,1,2,3,4,5]: + opts = [Opt(op=OptOps.TC, axis=axis, amt=2)] + helper_test_lin(Kernel(ast), opts=opts, failed_platforms=["METAL"]) + def test_failure_38(self): + # beautiful mnist kernel number 87: 6 possible TC axis_choices (2 for axis_buf1 and 3 reduce) and first/second reduce axis fail for both axis_buf1 choices + # fuzz: PYTHONPATH=. METAL=1 FUZZ_ALL_ACTIONS=1 DEPTH=1 FUZZ_NTH=87 DEBUG=2 python3 ./test/external/fuzz_linearizer.py --logfile /tmp/beautiful_mnist.kernels.txt + ast = LazyOp(op=MetaOps.SINK, src=(LazyOp(op=BufferOps.STORE, src=(LazyOp(op=ReduceOps.SUM, src=(LazyOp(op=BinaryOps.MUL, src=(LazyOp(op=UnaryOps.CAST, src=(LazyOp(op=BufferOps.LOAD, src=(), arg=MemBuffer(idx=1, dtype=dtypes.uchar, st=ShapeTracker(views=(View(shape=(2, 1, 32, 24, 24, 1, 5, 5, 256), strides=(784, 0, 0, 28, 1, 0, 28, 1, 1568), offset=0, mask=None, contiguous=False),)))),), arg=dtypes.float), LazyOp(op=BufferOps.LOAD, src=(), arg=MemBuffer(idx=2, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(2, 1, 32, 24, 24, 1, 5, 5, 256), strides=(18432, 0, 576, 24, 1, 0, 0, 0, 36864), offset=0, mask=None, contiguous=False),))))), arg=None),), arg=(0, 3, 4)),), arg=MemBuffer(idx=0, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(1, 1, 32, 1, 1, 1, 5, 5, 256), strides=(0, 0, 6400, 0, 0, 0, 1280, 256, 1), offset=0, mask=None, contiguous=True),)))),), arg=None) + for axis in [0,1,3,4]: + opts = [Opt(op=OptOps.TC, axis=axis, amt=2)] + helper_test_lin(Kernel(ast), opts=opts, failed_platforms=["METAL"]) + def test_failure_39(self): + # beautiful mnist kernel number 127: 6 possible TC axis_choices (3 for axis_buf1 and 2 reduce) and all fail + # fuzz: PYTHONPATH=. METAL=1 FUZZ_ALL_ACTIONS=1 DEPTH=1 FUZZ_NTH=127 DEBUG=2 python3 ./test/external/fuzz_linearizer.py --logfile /tmp/beautiful_mnist.kernels.txt + ast = LazyOp(op=MetaOps.SINK, src=(LazyOp(op=BufferOps.STORE, src=(LazyOp(op=BinaryOps.MAX, src=(LazyOp(op=BinaryOps.ADD, src=(LazyOp(op=ReduceOps.SUM, src=(LazyOp(op=BinaryOps.MUL, src=(LazyOp(op=UnaryOps.CAST, src=(LazyOp(op=BufferOps.LOAD, src=(), arg=MemBuffer(idx=1, dtype=dtypes.uchar, st=ShapeTracker(views=(View(shape=(10000, 1, 32, 24, 24, 1, 5, 5), strides=(784, 0, 0, 28, 1, 0, 28, 1), offset=0, mask=None, contiguous=False),)))),), arg=dtypes.float), LazyOp(op=BufferOps.LOAD, src=(), arg=MemBuffer(idx=2, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(10000, 1, 32, 24, 24, 1, 5, 5), strides=(0, 0, 25, 0, 0, 0, 5, 1), offset=0, mask=None, contiguous=False),))))), arg=None),), arg=(6, 7)), LazyOp(op=BufferOps.LOAD, src=(), arg=MemBuffer(idx=3, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(10000, 1, 32, 24, 24, 1, 1, 1), strides=(0, 0, 1, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None), LazyOp(op=BufferOps.CONST, src=(), arg=ConstBuffer(val=0.0, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(10000, 1, 32, 24, 24, 1, 1, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),))))), arg=None),), arg=MemBuffer(idx=0, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(10000, 1, 32, 24, 24, 1, 1, 1), strides=(18432, 0, 576, 24, 1, 0, 0, 0), offset=0, mask=None, contiguous=True),)))),), arg=None) + for axis in [0,1,2,3,4,5]: + opts = [Opt(op=OptOps.TC, axis=axis, amt=2)] + helper_test_lin(Kernel(ast), opts=opts, failed_platforms=["METAL"]) + # END METAL=1 ./examples/beautiful_mnist.py failures + if __name__ == '__main__': unittest.main()