From 7f1d41c9f9bdb425db1b57930d42b9ddc6f0de49 Mon Sep 17 00:00:00 2001 From: chenyu Date: Mon, 22 Dec 2025 15:54:18 -0500 Subject: [PATCH] delete files that import ShapeTracker (#13805) --- docs/developer/developer.md | 2 +- extra/optimization/extract_policynet.py | 114 ---------------- extra/optimization/extract_sa_pairs.py | 129 ------------------ extra/optimization/helpers.py | 124 ----------------- extra/optimization/pretrain_valuenet.py | 88 ------------ extra/test_pyrender.py | 20 --- .../external_test_hcq_fuzz_failures.py | 61 --------- test/external/external_test_nv.py | 60 -------- test/external/fuzz_shapetracker.py | 61 --------- test/external/fuzz_shapetracker_math.py | 34 ----- 10 files changed, 1 insertion(+), 692 deletions(-) delete mode 100644 extra/optimization/extract_policynet.py delete mode 100644 extra/optimization/extract_sa_pairs.py delete mode 100644 extra/optimization/helpers.py delete mode 100644 extra/optimization/pretrain_valuenet.py delete mode 100644 extra/test_pyrender.py delete mode 100644 test/external/external_test_hcq_fuzz_failures.py delete mode 100644 test/external/external_test_nv.py delete mode 100644 test/external/fuzz_shapetracker.py delete mode 100644 test/external/fuzz_shapetracker_math.py diff --git a/docs/developer/developer.md b/docs/developer/developer.md index d7c7518ff6..f83f59dd58 100644 --- a/docs/developer/developer.md +++ b/docs/developer/developer.md @@ -13,7 +13,7 @@ There's also a [doc describing speed](../developer/speed.md) Everything in [Tensor](../tensor/index.md) is syntactic sugar around constructing a graph of [UOps](../developer/uop.md). -The `UOp` graph specifies the compute in terms of low level tinygrad ops. Not all UOps will actually become realized. There's two types of UOps, base and view. base contains compute into a contiguous buffer, and view is a view (specified by a ShapeTracker). Inputs to a base can be either base or view, inputs to a view can only be a single base. +The `UOp` graph specifies the compute in terms of low level tinygrad ops. Not all UOps will actually become realized. There's two types of UOps, base and view. base contains compute into a contiguous buffer, and view is a view. Inputs to a base can be either base or view, inputs to a view can only be a single base. ## Scheduling diff --git a/extra/optimization/extract_policynet.py b/extra/optimization/extract_policynet.py deleted file mode 100644 index c6a6865c7c..0000000000 --- a/extra/optimization/extract_policynet.py +++ /dev/null @@ -1,114 +0,0 @@ -import os, sys, sqlite3, pickle, random -from tqdm import tqdm, trange -from copy import deepcopy -from tinygrad.nn import Linear -from tinygrad.tensor import Tensor -from tinygrad.nn.optim import Adam -from tinygrad.nn.state import get_parameters, get_state_dict, safe_save, safe_load, load_state_dict -from tinygrad.codegen.opt.search import actions -from extra.optimization.helpers import load_worlds, ast_str_to_lin, lin_to_feats, assert_same_lin -from tinygrad.codegen.opt.kernel import Kernel -from tinygrad.helpers import getenv - -# stuff needed to unpack a kernel -from tinygrad.uop.ops import LazyOp, TernaryOps, BinaryOps, UnaryOps, ReduceOps, BufferOps, MemBuffer, ConstBuffer -from tinygrad.dtype import dtypes -from tinygrad.shape.shapetracker import ShapeTracker -from tinygrad.shape.view import View -from tinygrad.uop.ops import Variable -inf, nan = float('inf'), float('nan') -from tinygrad.codegen.opt.kernel import Opt, OptOps - -INNER = 256 -class PolicyNet: - def __init__(self): - self.l1 = Linear(1021,INNER) - self.l2 = Linear(INNER,INNER) - self.l3 = Linear(INNER,1+len(actions)) - def __call__(self, x): - x = self.l1(x).relu() - x = self.l2(x).relu().dropout(0.9) - return self.l3(x).log_softmax() - -def dataset_from_cache(fn): - conn = sqlite3.connect(fn) - cur = conn.cursor() - cur.execute("SELECT * FROM beam_search") - X,A = [], [] - for f in tqdm(cur.fetchall()): - Xs,As = [], [] - try: - lin = Kernel(eval(f[0])) - opts = pickle.loads(f[-1]) - for o in opts: - Xs.append(lin_to_feats(lin, use_sts=True)) - As.append(actions.index(o)) - lin.apply_opt(o) - Xs.append(lin_to_feats(lin, use_sts=True)) - As.append(0) - except Exception: - pass - X += Xs - A += As - return X,A - -if __name__ == "__main__": - if getenv("REGEN"): - X,V = dataset_from_cache(sys.argv[1] if len(sys.argv) > 1 else "/tmp/tinygrad_cache") - safe_save({"X": Tensor(X), "V": Tensor(V)}, "/tmp/dataset_policy") - else: - ld = safe_load("/tmp/dataset_policy") - X,V = ld['X'].numpy(), ld['V'].numpy() - - print(X.shape, V.shape) - order = list(range(X.shape[0])) - random.shuffle(order) - X, V = X[order], V[order] - - ratio = -256 - X_test, V_test = Tensor(X[ratio:]), Tensor(V[ratio:]) - X,V = X[:ratio], V[:ratio] - print(X.shape, V.shape) - - net = PolicyNet() - #if os.path.isfile("/tmp/policynet.safetensors"): load_state_dict(net, safe_load("/tmp/policynet.safetensors")) - optim = Adam(get_parameters(net)) - - def get_minibatch(X,Y,bs): - xs, ys = [], [] - for _ in range(bs): - sel = random.randint(0, len(X)-1) - xs.append(X[sel]) - ys.append(Y[sel]) - return Tensor(xs), Tensor(ys) - - Tensor.training = True - losses = [] - test_losses = [] - test_accuracy = 0 - test_loss = float('inf') - for i in (t:=trange(500)): - x,y = get_minibatch(X,V,bs=256) - out = net(x) - loss = out.sparse_categorical_crossentropy(y) - optim.zero_grad() - loss.backward() - optim.step() - cat = out.argmax(axis=-1) - accuracy = (cat == y).mean() - t.set_description(f"loss {loss.numpy():7.2f} accuracy {accuracy.numpy()*100:7.2f}%, test loss {test_loss:7.2f} test accuracy {test_accuracy*100:7.2f}%") - - losses.append(loss.numpy().item()) - test_losses.append(test_loss) - if i % 10: - out = net(X_test) - test_loss = out.sparse_categorical_crossentropy(V_test).square().mean().numpy().item() - cat = out.argmax(axis=-1) - test_accuracy = (cat == y).mean().numpy() - - safe_save(get_state_dict(net), "/tmp/policynet.safetensors") - - import matplotlib.pyplot as plt - plt.plot(losses[10:]) - plt.plot(test_losses[10:]) - plt.show() diff --git a/extra/optimization/extract_sa_pairs.py b/extra/optimization/extract_sa_pairs.py deleted file mode 100644 index 2a91e49326..0000000000 --- a/extra/optimization/extract_sa_pairs.py +++ /dev/null @@ -1,129 +0,0 @@ -import sys, sqlite3, pickle, math -from collections import defaultdict -from tqdm import tqdm, trange -import numpy as np - -# stuff needed to unpack a kernel -from tinygrad.uop.ops import LazyOp, TernaryOps, BinaryOps, UnaryOps, ReduceOps, BufferOps, MemBuffer, ConstBuffer -from tinygrad.dtype import dtypes -from tinygrad.shape.shapetracker import ShapeTracker -from tinygrad.shape.view import View -from tinygrad.uop.ops import Variable -inf, nan = float('inf'), float('nan') -from tinygrad.codegen.opt.kernel import Opt, OptOps - -# more stuff -from tinygrad.codegen.opt.kernel import Kernel -from tinygrad.codegen.opt.search import actions -from extra.optimization.helpers import lin_to_feats -from extra.optimization.pretrain_valuenet import ValueNet -from tinygrad.nn.optim import Adam -from tinygrad.nn.state import get_parameters, get_state_dict, safe_save, safe_load, load_state_dict -import random -from tinygrad.tensor import Tensor -from tinygrad.helpers import getenv - -def dataset_from_cache(fn): - conn = sqlite3.connect(fn) - cur = conn.cursor() - cur.execute("SELECT * FROM time_linearizer") - grouped = defaultdict(dict) - for f in tqdm(cur.fetchall()): grouped[f[0]][f[1:-1]] = pickle.loads(f[-1]) - - opts_to_outcome = {} - - for ast,sk in grouped.items(): - cnts = defaultdict(int) - for sks,tm in sk.items(): - if sks[1] != 1: continue - opts = eval(sks[0]) - cnts[(len(opts), sks[1])] += 1 - opts_to_outcome[(ast, tuple(opts))] = tm - #print(cnts) - - S,A,V = [], [], [] - for ast,k in tqdm(opts_to_outcome): - if len(k) == 0: continue - old_tm = min(opts_to_outcome[(ast,k[:-1])]) - new_tm = min(opts_to_outcome[(ast,k)]) - if math.isinf(old_tm) or math.isinf(new_tm) or old_tm < 1e-9 or new_tm < 1e-9: continue - try: - lin = Kernel(eval(ast)) - except Exception: - continue - lin.apply_opts(k[:-1]) - act = k[-1] - log_ratio = math.log(old_tm/new_tm) - #print(f"ratio: {old_tm/new_tm:6.2f}x (log {log_ratio:5.2f}) from {str(act):50s} on {lin.colored_shape()}") - S.append(lin_to_feats(lin, use_sts=True)) - A.append(actions.index(act)) - V.append([log_ratio]) # NOTE: i have written the bug many times with this having the wrong dim - - S, A, V = np.array(S), np.array(A), np.array(V, dtype=np.float32) - X = np.zeros((S.shape[0], S.shape[1]+len(actions)), dtype=np.float32) - X[:, :S.shape[1]] = S - X[range(S.shape[0]), S.shape[1]+A] = 1.0 - return X, V - -def log_likelihood(x:Tensor, mu:Tensor, log_sigma:Tensor): - #print(x.shape, mu.shape, log_sigma.shape) - #return (x-mu).abs() * (-log_sigma).exp() + log_sigma - return (x-mu).square() * (-2*log_sigma).exp() / 2 + log_sigma - -if __name__ == "__main__": - if getenv("REGEN"): - X,V = dataset_from_cache(sys.argv[1] if len(sys.argv) > 1 else "/tmp/tinygrad_cache") - safe_save({"X": Tensor(X), "V": Tensor(V)}, "/tmp/dataset") - else: - ld = safe_load("/tmp/dataset") - X,V = ld['X'].numpy(), ld['V'].numpy() - - print(X.shape, V.shape) - order = list(range(X.shape[0])) - random.shuffle(order) - X, V = X[order], V[order] - - ratio = -512 - X_test, V_test = Tensor(X[ratio:]), Tensor(V[ratio:]) - X,V = X[:ratio], V[:ratio] - print(X.shape, V.shape) - - #print(X[0], V[0]) - #print(X[-1], V[-1]) - print(X.shape) - - net = ValueNet(X.shape[1], 2) - optim = Adam(get_parameters(net)) - - def get_minibatch(X,Y,bs): - xs, ys = [], [] - #random.seed(1337) - for _ in range(bs): - sel = random.randint(0, len(X)-1) - xs.append(X[sel]) - ys.append(Y[sel]) - return Tensor(xs), Tensor(ys) - - Tensor.training = True - losses = [] - test_losses = [] - test_loss = float('inf') - for i in (t:=trange(2000)): - x,y = get_minibatch(X,V,bs=256) - out = net(x) - #loss = (out-y).square().mean() - loss = log_likelihood(y, out[:, 0:1], out[:, 1:2]).mean() - optim.zero_grad() - loss.backward() - optim.step() - t.set_description(f"loss {loss.numpy():7.2f}, test loss {test_loss:7.2f}") - losses.append(loss.numpy().item()) - test_losses.append(test_loss) - if i % 10: test_loss = (net(X_test)[:, 0:1]-V_test).square().mean().numpy().item() - - safe_save(get_state_dict(net), "/tmp/qnet.safetensors") - - import matplotlib.pyplot as plt - plt.plot(losses[20:]) - plt.plot(test_losses[20:]) - plt.show() diff --git a/extra/optimization/helpers.py b/extra/optimization/helpers.py deleted file mode 100644 index 88807fba97..0000000000 --- a/extra/optimization/helpers.py +++ /dev/null @@ -1,124 +0,0 @@ -# stuff needed to unpack a kernel -from tinygrad import Variable -from tinygrad.codegen.opt import Opt, OptOps -from tinygrad.uop.ops import UOp, Ops, KernelInfo -from tinygrad.dtype import dtypes, PtrDType -from tinygrad.shape.shapetracker import ShapeTracker -from tinygrad.shape.view import View -from tinygrad.helpers import getenv -from tinygrad.engine.realize import get_program -inf, nan = float('inf'), float('nan') -UOps = Ops - -# kernel unpacker -from tinygrad.codegen.opt.kernel import Kernel -def ast_str_to_ast(ast_str:str) -> UOp: return eval(ast_str) -def ast_str_to_lin(ast_str:str, opts=None): return Kernel(ast_str_to_ast(ast_str), opts=opts) -def kern_str_to_lin(kern_str:str, opts=None): - (ast, applied_opts,) = eval(kern_str) - k = Kernel(ast, opts=opts) - k.apply_opts(applied_opts) - return k - -# load worlds, a dataset of about 12k kernels -import gzip -from pathlib import Path -import random -from tinygrad.helpers import dedup, DEBUG -def load_worlds(filter_reduce=True, filter_noimage=True, filter_novariable=True): - fn = Path(__file__).parent.parent / "datasets/sops.gz" - ast_strs = dedup(gzip.open(fn).read().decode('utf-8').strip().split("\n")) - assert len(ast_strs) >= getenv("MIN_ASTS", 1000), f"dataset size = {len(ast_strs)} is too small" - if DEBUG >= 1: print(f"loaded {len(ast_strs)=} before filters") - if filter_reduce: ast_strs = [x for x in ast_strs if "REDUCE_AXIS" in x] - if filter_noimage: ast_strs = [x for x in ast_strs if "dtypes.image" not in x] - if filter_novariable: ast_strs = [x for x in ast_strs if "DEFINE_VAR" not in x] - if DEBUG >= 1: print(f"loaded {len(ast_strs)=} after filters {filter_reduce=}, {filter_noimage=}, {filter_novariable=}") - random.seed(1337) - random.shuffle(ast_strs) - return ast_strs - -def assert_same_lin(l1, l2): - assert l1.colored_shape() == l2.colored_shape() - assert all(x==y for x,y in zip(l1.sts, l2.sts)) - -# get features -import math - -MAX_DIMS = 16 -MAX_BUFS = 9 -def lin_to_feats(lin:Kernel, use_sts=True): - assert lin.shape_len < MAX_DIMS, "too many dims" - - all_colors = ["blue", "cyan", "white", "green", "red", "magenta", "yellow"] - lc = [all_colors.index(x) for x in lin.colors()] - - ret = [] - # before, some generic linearizer stuff - ret.append(lin.upcasted) - ret.append(lin.local_dims) - - # first, the full shape, including the colors - for s,os,c in zip(lin.full_shape,lin.output_shape,lc): - if isinstance(s, UOp): - ret.append(False) - ret += [0]*9 - else: - ret.append(True) - ret.append(math.log2(s)) - ret.append(min(33, s)) - ret.append(math.log2(os)) - ret.append(min(33, os)) - ret.append(s%2 == 0) - ret.append(s%3 == 0) - ret.append(s%4 == 0) - ret.append(s%8 == 0) - ret.append(s%16 == 0) - cc = [0]*7 - cc[c] = 1 - ret += cc - ret += [0] * (17*(MAX_DIMS-len(lin.full_shape))) - ret = [float(x) for x in ret] - - if use_sts: - my_sts = dedup([(x.shape == lin.full_shape, x.is_expanded(), any(v.mask is not None for v in x.views), len(x.views)) for x in lin.sts]) - assert len(my_sts) < MAX_BUFS - sts_len = 3 + 5*MAX_DIMS - for s in my_sts: - ret.append(s[0]) # reduce - ret.append(s[2]) # has mask - ret.append(s[3]) # len views - for d in s[1]: - ret.append(d is None) - ret.append(d == 0) - ret.append(d == 1) - ret.append(min(33, d) if d is not None else -1) - if d is not None and d >= 1: ret.append(math.log2(d)) - else: ret.append(-1) - ret += [0] * (5*(MAX_DIMS - len(s[1]))) - ret += [0] * (sts_len*(MAX_BUFS - len(my_sts))) - assert len(ret) == 1021, f"wrong len {len(ret)}" - else: - assert len(ret) == 274, f"wrong len {len(ret)}" - return ret - -from tinygrad.device import Device, Buffer -from tinygrad.codegen.opt.search import _ensure_buffer_alloc, _time_program -from tinygrad.helpers import to_function_name, CACHELEVEL, diskcache_get, diskcache_put - -def time_linearizer(lin:Kernel, rawbufs:list[Buffer], allow_test_size=True, max_global_size=65536, cnt=3, disable_cache=False, clear_l2=False) -> float: # noqa: E501 - key = {"ast": lin.ast.key, "opts": str(lin.applied_opts), "allow_test_size": allow_test_size, - "max_global_size": max_global_size, "clear_l2": clear_l2, "device": lin.opts.device, "suffix": lin.opts.suffix} - if not disable_cache and CACHELEVEL >= 2 and (val:=diskcache_get("time_linearizer", key)) is not None: return min(val) - - dev = Device[lin.opts.device] - assert dev.compiler is not None - - rawbufs = _ensure_buffer_alloc(rawbufs) - var_vals: dict[str, int] = {k.expr:int(k.vmax+k.vmin)//2 for k in lin.ast.variables()} - p = get_program(lin.get_optimized_ast(), lin.opts) - tms = _time_program(p, dev.compiler.compile(p.src), var_vals, rawbufs, - max_global_size=max_global_size if allow_test_size else None, clear_l2=clear_l2, cnt=cnt, name=to_function_name(lin.name)) - - if CACHELEVEL >= 2: diskcache_put("time_linearizer", key, tms) - return min(tms) diff --git a/extra/optimization/pretrain_valuenet.py b/extra/optimization/pretrain_valuenet.py deleted file mode 100644 index c73e429a49..0000000000 --- a/extra/optimization/pretrain_valuenet.py +++ /dev/null @@ -1,88 +0,0 @@ -from tinygrad.codegen.opt.kernel import Kernel -from tqdm import tqdm, trange -import math -import random -from tinygrad.tensor import Tensor -from tinygrad.nn import Linear -from tinygrad.nn.optim import Adam -from tinygrad.nn.state import get_parameters, get_state_dict, safe_save, safe_load, load_state_dict - -# stuff needed to unpack a kernel -from tinygrad.uop.ops import LazyOp, TernaryOps, BinaryOps, UnaryOps, ReduceOps, BufferOps, MemBuffer, ConstBuffer -from tinygrad.dtype import dtypes -from tinygrad.shape.shapetracker import ShapeTracker -from tinygrad.shape.view import View -from tinygrad.uop.ops import Variable -inf, nan = float('inf'), float('nan') -from tinygrad.codegen.opt.kernel import Opt, OptOps - -from extra.optimization.helpers import lin_to_feats, MAX_DIMS - -# NOTE: this is not real value of the state, it's just a prediction of the runtime -INNER = 512 -class ValueNet: - def __init__(self, feats=240, out=1): - self.l1 = Linear(feats,INNER) - self.l2 = Linear(INNER,INNER) - self.l3 = Linear(INNER,INNER) - self.l4 = Linear(INNER,out) - def __call__(self, x): - x = self.l1(x).relu() - x = self.l2(x).relu() - x = self.l3(x).relu().dropout(0.8) - return self.l4(x) - -if __name__ == "__main__": - net = ValueNet() - optim = Adam(get_parameters(net)) - - TEST_SIZE = 256 - - dset = open("/tmp/logtm").read().strip().split("\n") - random.seed(1337) - random.shuffle(dset) - - X,Y = [], [] - for i,x in enumerate(tqdm(dset)): - ast, opts, tms = eval(x) - lin = Kernel(ast) - for o in opts: lin.apply_opt(o) - if lin.shape_len >= MAX_DIMS: continue - if min(tms) == float('inf'): continue - X.append(lin_to_feats(lin)) - Y.append([math.log(min(tms))]) - print(f"got {len(X)} samples") - - X_test,Y_test = Tensor(X[-TEST_SIZE:]), Tensor(Y[-TEST_SIZE:]) - X,Y = X[:-TEST_SIZE], Y[:-TEST_SIZE] - - def get_minibatch(X,Y,bs): - xs, ys = [], [] - for _ in range(bs): - sel = random.randint(0, len(X)-1) - xs.append(X[sel]) - ys.append(Y[sel]) - return Tensor(xs), Tensor(ys) - - Tensor.training = True - losses = [] - test_losses = [] - test_loss = float('inf') - for i in (t:=trange(2000)): - x,y = get_minibatch(X,Y,bs=256) - out = net(x) - loss = (out-y).square().mean() - optim.zero_grad() - loss.backward() - optim.step() - t.set_description(f"loss {loss.numpy():7.2f}, test loss {test_loss:7.2f}") - losses.append(loss.numpy().item()) - test_losses.append(test_loss) - if i % 10: test_loss = (net(X_test)-Y_test).square().mean().numpy().item() - - safe_save(get_state_dict(net), "/tmp/valuenet.safetensors") - - import matplotlib.pyplot as plt - plt.plot(losses[200:]) - plt.plot(test_losses[200:]) - plt.show() diff --git a/extra/test_pyrender.py b/extra/test_pyrender.py deleted file mode 100644 index 8954dde4a6..0000000000 --- a/extra/test_pyrender.py +++ /dev/null @@ -1,20 +0,0 @@ -from extra.optimization.helpers import load_worlds, ast_str_to_ast -from tinygrad.helpers import tqdm -from tinygrad.uop.ops import pyrender, UOp, Ops -from tinygrad import dtypes -from tinygrad.shape.shapetracker import ShapeTracker, View -inf, nan = float('inf'), float('nan') - -if __name__ == "__main__": - ast_strs = load_worlds() - for i, ast_str in enumerate(tqdm(ast_strs)): - good_ast = ast_str_to_ast(ast_str) - code = '\n'.join(pyrender(good_ast)) - print("\n***************\n\n"+code) - exec(code) - if str(good_ast) != str(ast): - print(code) - print("MISMATCH") - print(good_ast) - print(ast) - break \ No newline at end of file diff --git a/test/external/external_test_hcq_fuzz_failures.py b/test/external/external_test_hcq_fuzz_failures.py deleted file mode 100644 index 4a381d5336..0000000000 --- a/test/external/external_test_hcq_fuzz_failures.py +++ /dev/null @@ -1,61 +0,0 @@ -# ruff: noqa: E501 -import os -os.environ["VALIDATE_HCQ"]="1" - -import unittest, random -import numpy as np -from tinygrad.codegen.opt.kernel import Kernel, KernelOptError -from tinygrad.device import is_dtype_supported -from tinygrad.uop.ops import UOp, Ops -from tinygrad.codegen.opt.search import Opt, OptOps -from tinygrad import Device, dtypes, Tensor -from test.external.fuzz_linearizer import compare_linearizer, compare_states, get_fuzz_rawbuf_like - -from tinygrad.shape.shapetracker import ShapeTracker -from tinygrad.shape.view import View - -def helper_test_lin(lin: Kernel, opts, failed_platforms, validate_device, rtol=1e-2, atol=1e-2): - if any(b.dtype.base == dtypes.half for b in lin.bufs) and not is_dtype_supported(dtypes.half): return - if any(b.dtype.base == dtypes.bfloat16 for b in lin.bufs) and not is_dtype_supported(dtypes.bfloat16): return - - try: - lin.apply_opts(opts) - except KernelOptError: - # it's considered fixed if we invalidated the opts - assert Device.DEFAULT not in failed_platforms, f"unexpected success on {Device.DEFAULT}" - return - - (msg, rawbufs, var_vals, ground_truth, state1) = compare_linearizer(lin, rtol=rtol, atol=atol) - if msg in ["PASS", "KernelOptError"]: - # it's considered fixed if we invalidated the opts - assert Device.DEFAULT not in failed_platforms, f"unexpected success on {Device.DEFAULT}" - else: - assert Device.DEFAULT in failed_platforms, f"failed on {Device.DEFAULT} with {msg}" - - validate_lin = lin.copy() - validate_lin.opts = validate_device.renderer - validate_rawbufs = [get_fuzz_rawbuf_like(x, copy=True, force_device=validate_device.dname) for x in rawbufs] - (_msg, _, _, _, state2) = compare_linearizer(validate_lin, validate_rawbufs, var_vals, ground_truth, rtol=rtol, atol=atol) - - if _msg in ["PASS"] and compare_states(state1, state2): - assert Device.DEFAULT not in failed_platforms, f"unexpected success on {Device.DEFAULT}" - else: - assert Device.DEFAULT in failed_platforms, f"failed on {Device.DEFAULT} with {msg}" - - return lin - -class TestHCQFuzzFailures(unittest.TestCase): - def setUp(self): - random.seed(42) - np.random.seed(42) - Tensor.manual_seed(42) - - @unittest.skipUnless(Device.DEFAULT in {"QCOM"}, "for QCOM") - def test_failure_1(self): - ast = UOp(Ops.SINK, dtypes.void, arg=None, src=( UOp(Ops.STORE, dtypes.void, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=0, src=()), UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=0, mask=None, contiguous=True),)), src=()), UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 2, 4)), arg=1, src=()), x39:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=0, mask=((0, 1), (0, 6)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=2, src=()), x39,)),)),)), UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 2, 4)), arg=3, src=()), x46:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=-6, mask=((0, 1), (6, 12)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=4, src=()), x46,)),)),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=5, src=()), x54:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (12, 13)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=6, src=()), x54,)),)),)),)),)), UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=7, src=()), UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=-13, mask=((0, 1), (13, 17)), contiguous=False),)), src=()),)),)), UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=8, src=()), UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=-17, mask=((0, 1), (17, 21)), contiguous=False),)), src=()),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=9, src=()), x68:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (21, 22)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=10, src=()), x68,)),)),)),)),)), UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=11, src=()), UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=-22, mask=((0, 1), (22, 26)), contiguous=False),)), src=()),)),)), UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=12, src=()), UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=-26, mask=((0, 1), (26, 30)), contiguous=False),)), src=()),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=13, src=()), x82:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (30, 31)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=14, src=()), x82,)),)),)),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=15, src=()), x90:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (31, 32)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=16, src=()), x90,)),)),)),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=17, src=()), x98:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (32, 33)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=18, src=()), x98,)),)),)),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=19, src=()), x106:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (33, 34)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=20, src=()), x106,)),)),)),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=21, src=()), x114:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (34, 35)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=22, src=()), x114,)),)),)),)),)), UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=23, src=()), UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=-35, mask=((0, 1), (35, 39)), contiguous=False),)), src=()),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=24, src=()), x125:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (39, 40)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=25, src=()), x125,)),)),)),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=26, src=()), x133:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (40, 41)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=27, src=()), x133,)),)),)),)),)), UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 2, 4)), arg=28, src=()), x140:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=-41, mask=((0, 1), (41, 47)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=29, src=()), x140,)),)),)),)), UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 2, 4)), arg=30, src=()), x147:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=-47, mask=((0, 1), (47, 53)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=31, src=()), x147,)),)),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=32, src=()), x155:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (53, 54)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=33, src=()), x155,)),)),)),)),)), UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=34, src=()), UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=-54, mask=((0, 1), (54, 58)), contiguous=False),)), src=()),)),)), UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=35, src=()), UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=-58, mask=((0, 1), (58, 62)), contiguous=False),)), src=()),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=36, src=()), x169:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (62, 63)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=37, src=()), x169,)),)),)),)),)), UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=38, src=()), UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=-63, mask=((0, 1), (63, 67)), contiguous=False),)), src=()),)),)), UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=39, src=()), UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=-67, mask=((0, 1), (67, 71)), contiguous=False),)), src=()),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=40, src=()), x183:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (71, 72)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=41, src=()), x183,)),)),)),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=42, src=()), x191:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (72, 73)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=43, src=()), x191,)),)),)),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=44, src=()), x199:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (73, 74)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=45, src=()), x199,)),)),)),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=46, src=()), x207:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (74, 75)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=47, src=()), x207,)),)),)),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=48, src=()), x215:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (75, 76)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=49, src=()), x215,)),)),)),)),)), UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=50, src=()), UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=-76, mask=((0, 1), (76, 80)), contiguous=False),)), src=()),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=51, src=()), x226:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (80, 81)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=52, src=()), x226,)),)),)),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=53, src=()), x234:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (81, 82)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=54, src=()), x234,)),)),)),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=55, src=()), x243:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (82, 83)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=56, src=()), x243,)),)),)), UOp(Ops.ADD, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 1, 4)), arg=57, src=()), x250:=UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 0), offset=0, mask=((0, 1), (83, 84)), contiguous=False),)), src=()),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(), arg=58, src=()), x250,)),)),)),)),)),)), UOp(Ops.CAST, dtypes.float, arg=None, src=( UOp(Ops.LOAD, dtypes.float, arg=None, src=( UOp(Ops.DEFINE_GLOBAL, dtypes.imageh((1, 128, 4)), arg=59, src=()), UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1, 596), strides=(0, 1), offset=-84, mask=((0, 1), (84, 596)), contiguous=False),)), src=()),)),)),)),)),)) # noqa: E501 - - opts = [Opt(op=OptOps.UPCAST, axis=0, arg=4)] - helper_test_lin(Kernel(ast), opts, failed_platforms=[], validate_device=Device["CL"]) - -if __name__ == '__main__': - unittest.main() diff --git a/test/external/external_test_nv.py b/test/external/external_test_nv.py deleted file mode 100644 index 8be738d951..0000000000 --- a/test/external/external_test_nv.py +++ /dev/null @@ -1,60 +0,0 @@ -import unittest, struct, array, ctypes -from tinygrad import Device, dtypes, Tensor -from tinygrad.helpers import to_mv -from tinygrad.runtime.ops_nv import NVDevice, HWQueue -from tinygrad.codegen.opt.search import Opt, OptOps -from tinygrad.engine.realize import get_runner, CompiledRunner, get_program -from test.external.fuzz_linearizer import get_fuzz_rawbufs - -from tinygrad.codegen.opt.kernel import Kernel -from tinygrad.uop.ops import LazyOp, Ops, ReduceOps, BufferOps, MemBuffer -from tinygrad.shape.shapetracker import ShapeTracker -from tinygrad.shape.view import View - -@unittest.skipUnless(Device.DEFAULT == "NV", "NV specific tests/fixes") -class TestNV(unittest.TestCase): - @classmethod - def setUpClass(self): - TestNV.d0: NVDevice = Device["NV"] - TestNV.a = Tensor([0.,1.], device="NV").realize() - TestNV.b = self.a + 1 - si = self.b.schedule()[-1] - TestNV.d0_runner = get_runner(TestNV.d0.device, si.ast) - TestNV.b.uop.buffer.allocate() - TestNV.addr = struct.pack("QQ", TestNV.b.uop.buffer._buf.va_addr, TestNV.a.uop.buffer._buf.va_addr) - - def test_error_on_huge_dims(self): - ast = LazyOp(op=BufferOps.STORE, src=(LazyOp(op=ReduceOps.SUM, src=(LazyOp(op=Ops.CAST, src=(LazyOp(op=Ops.MUL, src=(LazyOp(op=BufferOps.LOAD, src=(), arg=MemBuffer(idx=1, dtype=dtypes.half, st=ShapeTracker(views=(View(shape=(1, 1, 1024, 683), strides=(0, 0, 0, 1), offset=0, mask=None, contiguous=False),)))), LazyOp(op=BufferOps.LOAD, src=(), arg=MemBuffer(idx=2, dtype=dtypes.half, st=ShapeTracker(views=(View(shape=(1, 1, 1024, 683), strides=(0, 0, 683, 1), offset=0, mask=None, contiguous=True),))))), arg=None),), arg=dtypes.float),), arg=(3,)),), arg=MemBuffer(idx=0, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(1, 1, 1024, 1), strides=(0, 0, 1, 0), offset=0, mask=None, contiguous=True),)))) # noqa: E501 - opts = [Opt(op=OptOps.GROUP, axis=0, arg=0), Opt(op=OptOps.PADTO, axis=1, arg=32), Opt(op=OptOps.UNROLL, axis=0, arg=4), Opt(op=OptOps.LOCAL, axis=0, arg=2), Opt(op=OptOps.LOCAL, axis=0, arg=2)] # noqa: E501 - with self.assertRaises(RuntimeError) as cm: - lin = Kernel(ast) - lin.apply_opts(opts) - rawbufs = get_fuzz_rawbufs(lin) - prg = CompiledRunner(get_program(lin.get_optimized_ast(), lin.opts)) - prg(rawbufs, {}, wait=True) - self.assertEqual(str(cm.exception), "This is a runtime error message") - - def test_buf4_usage(self): - TestNV.along = Tensor([105615], device="NV").realize() - ast = LazyOp(op=BufferOps.STORE, src=(LazyOp(op=Ops.SIN, src=(LazyOp(op=Ops.CAST, src=(LazyOp(op=BufferOps.LOAD, src=(), arg=MemBuffer(idx=1, dtype=dtypes.ulong, st=ShapeTracker(views=(View(shape=(3,), strides=(1,), offset=0, mask=None, contiguous=True),)))),), arg=dtypes.float),), arg=None),), arg=MemBuffer(idx=0, dtype=dtypes.float, st=ShapeTracker(views=(View(shape=(3,), strides=(1,), offset=0, mask=None, contiguous=True),)))) # noqa: E501 - temp_runner = get_runner(TestNV.d0.device, (ast,)) - temp_runner([TestNV.b.uop.buffer, TestNV.along.uop.buffer], var_vals={}) - val = TestNV.b.uop.buffer.as_buffer().cast("f")[0] - assert abs(val - 0.80647) < 0.001, f"got val {val}" - - def test_kernargs_no_oob_access(self): - kernargs_start = TestNV.d0._gpu_alloc((2 << 20), map_to_cpu=True).va_addr - kernargs = kernargs_start + ((2 << 20) - TestNV.d0_runner._prg.kernargs_alloc_size) - to_mv(kernargs, 0x160).cast('I')[:] = array.array('I', TestNV.d0_runner._prg.constbuffer_0) - ctypes.memmove(kernargs + TestNV.d0_runner._prg.kernargs_offset, TestNV.addr, len(TestNV.addr)) - - q = HWQueue() - q.exec(TestNV.d0_runner._prg, kernargs, TestNV.d0_runner.global_size, TestNV.d0_runner.local_size) - q.signal(TestNV.d0.timeline_signal, TestNV.d0.timeline_value).submit(TestNV.d0) - TestNV.d0._wait_signal(TestNV.d0.timeline_signal, TestNV.d0.timeline_value) - TestNV.d0.timeline_value += 1 - val = TestNV.b.uop.buffer.as_buffer().cast("f")[0] - assert val == 1.0, f"got val {val}" - -if __name__ == "__main__": - unittest.main() diff --git a/test/external/fuzz_shapetracker.py b/test/external/fuzz_shapetracker.py deleted file mode 100644 index ba11fa1ebd..0000000000 --- a/test/external/fuzz_shapetracker.py +++ /dev/null @@ -1,61 +0,0 @@ -import random -from tinygrad.helpers import DEBUG, getenv -from test.unit.test_shapetracker import CheckingShapeTracker - -def do_permute(st): - perm = list(range(0, len(st.shape))) - random.shuffle(perm) - perm = tuple(perm) - if DEBUG >= 1: print("st.permute(", perm, ")") - st.permute(perm) - -def do_pad(st): - c = random.randint(0, len(st.shape)-1) - pad = tuple((random.randint(0,2), random.randint(0,2)) if i==c else (0,0) for i in range(len(st.shape))) - if DEBUG >= 1: print("st.pad(", pad, ")") - st.pad(pad) - -def do_reshape_split_one(st): - c = random.randint(0, len(st.shape)-1) - poss = [n for n in [1,2,3,4,5] if st.shape[c]%n == 0] - spl = random.choice(poss) - shp = st.shape[0:c] + (st.shape[c]//spl, spl) + st.shape[c+1:] - if DEBUG >= 1: print("st.reshape(", shp, ")") - st.reshape(shp) - -def do_reshape_combine_two(st): - if len(st.shape) < 2: return - c = random.randint(0, len(st.shape)-2) - shp = st.shape[:c] + (st.shape[c] * st.shape[c+1], ) + st.shape[c+2:] - if DEBUG >= 1: print("st.reshape(", shp, ")") - st.reshape(shp) - -def do_shrink(st): - c = random.randint(0, len(st.shape)-1) - while 1: - shrink = tuple((random.randint(0,s), random.randint(0,s)) if i == c else (0,s) for i,s in enumerate(st.shape)) - if all(x= 1: print("st.shrink(", shrink, ")") - st.shrink(shrink) - -def do_flip(st): - flip = tuple(random.random() < 0.5 for _ in st.shape) - if DEBUG >= 1: print("st.flip(", flip, ")") - st.flip(flip) - -def do_expand(st): - c = [i for i,s in enumerate(st.shape) if s==1] - if len(c) == 0: return - c = random.choice(c) - expand = tuple(random.choice([2,3,4]) if i==c else s for i,s in enumerate(st.shape)) - if DEBUG >= 1: print("st.expand(", expand, ")") - st.expand(expand) - -shapetracker_ops = [do_permute, do_pad, do_shrink, do_reshape_split_one, do_reshape_combine_two, do_flip, do_expand] - -if __name__ == "__main__": - random.seed(42) - for _ in range(getenv("CNT", 200)): - st = CheckingShapeTracker((random.randint(2, 10), random.randint(2, 10), random.randint(2, 10))) - for i in range(8): random.choice(shapetracker_ops)(st) - st.assert_same() diff --git a/test/external/fuzz_shapetracker_math.py b/test/external/fuzz_shapetracker_math.py deleted file mode 100644 index 9d1e86a654..0000000000 --- a/test/external/fuzz_shapetracker_math.py +++ /dev/null @@ -1,34 +0,0 @@ -import random -from tinygrad.helpers import getenv, DEBUG, colored, trange -from tinygrad.shape.shapetracker import ShapeTracker -from test.external.fuzz_shapetracker import shapetracker_ops -from test.unit.test_shapetracker_math import st_equal, MultiShapeTracker - -def fuzz_plus() -> tuple[ShapeTracker, ShapeTracker]: - m = MultiShapeTracker([ShapeTracker.from_shape((random.randint(1, 10), random.randint(1, 10), random.randint(1, 10)))]) - for _ in range(4): random.choice(shapetracker_ops)(m) - backup = m.sts[0] - m.sts.append(ShapeTracker.from_shape(m.sts[0].shape)) - for _ in range(4): random.choice(shapetracker_ops)(m) - st_sum = backup + m.sts[1] - return m.sts[0], st_sum - -if __name__ == "__main__": - if seed:=getenv("SEED"): random.seed(seed) - total = getenv("CNT", 1000) - for fuzz in [globals()[f'fuzz_{x}'] for x in getenv("FUZZ", "plus").split(",")]: - same_but_neq = 0 - for _ in trange(total, desc=f"{fuzz}"): - st1, st2 = fuzz() - eq = st_equal(st1, st2) - if getenv("CHECK_NEQ") and eq and st1.simplify() != st2.simplify(): - print(colored("same but unequal", "yellow")) - print(st1.simplify()) - print(st2.simplify()) - same_but_neq += 1 - if DEBUG >= 1: - print(f"EXP: {st1}") - print(f"GOT: {st2}") - print(colored("****", "green" if eq else "red")) - if not eq: exit(0) - if getenv("CHECK_NEQ"): print(f"same but unequal {same_but_neq}/{total} = {(same_but_neq/total)*100:.2f}%")