mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
remove required_optimizations (#9848)
This commit is contained in:
@@ -54,7 +54,6 @@ def randoms():
|
||||
|
||||
def ast_to_cuda_prog(compiler, ast, opts):
|
||||
k = Kernel(ast)
|
||||
k.required_optimizations()
|
||||
k.apply_opts(opts)
|
||||
p = k.to_program()
|
||||
return CUDAProgram(device, p.function_name, compiler.compile(p.src))
|
||||
|
||||
1
test/external/fuzz_linearizer.py
vendored
1
test/external/fuzz_linearizer.py
vendored
@@ -132,7 +132,6 @@ def compare_linearizer(lin: Kernel, rawbufs=None, var_vals=None, ground_truth=No
|
||||
|
||||
if ground_truth is None and not has_bf16:
|
||||
unoptimized = Kernel(lin.ast)
|
||||
unoptimized.required_optimizations()
|
||||
if run_linearizer(unoptimized, rawbufs, var_vals)[0] != "PASS":
|
||||
return ("BASELINE_ERROR", rawbufs, var_vals, ground_truth, None)
|
||||
ground_truth = np.frombuffer(rawbufs[0].as_buffer(), _to_np_dtype(rawbufs[0].dtype)).copy()
|
||||
|
||||
2
test/external/speed_beam_v_hcopt.py
vendored
2
test/external/speed_beam_v_hcopt.py
vendored
@@ -19,7 +19,6 @@ if __name__ == "__main__":
|
||||
def new_lin(): return ast_str_to_lin(ast, opts=dev.renderer)
|
||||
|
||||
k = new_lin()
|
||||
# k.required_optimizations()
|
||||
|
||||
if not (used_tensor_cores:=k.apply_tensor_cores(getenv("TC", 1))): k = hand_coded_optimizations(k)
|
||||
|
||||
@@ -30,7 +29,6 @@ if __name__ == "__main__":
|
||||
lins.append(("hc", new_lin()))
|
||||
lins[-1][1] = hand_coded_optimizations(lins[-1][1])
|
||||
kb = new_lin()
|
||||
# kb.required_optimizations()
|
||||
test_rawbuffers = bufs_from_lin(kb) # allocate scratch buffers for optimization
|
||||
lins.append((f"beam{BEAM.value}", beam_search(kb, test_rawbuffers, BEAM.value, bool(getenv("BEAM_ESTIMATE", 1)))))
|
||||
timed = sorted([(nm, tk, time_linearizer(tk, test_rawbuffers, allow_test_size=False, clear_l2=True)) for nm, tk in lins], key=lambda x: x[2])
|
||||
|
||||
1
test/external/verify_kernel.py
vendored
1
test/external/verify_kernel.py
vendored
@@ -51,7 +51,6 @@ if __name__ == "__main__":
|
||||
print(test_lin.ast)
|
||||
print(test_lin.applied_opts)
|
||||
unoptimized_lin = Kernel(test_lin.ast)
|
||||
unoptimized_lin.required_optimizations()
|
||||
print(f"{unoptimized_lin.colored_shape()} -> {test_lin.colored_shape()}")
|
||||
(msg,rb,vv,gt) = compare_linearizer(test_lin, None, None, None, rtol=args.rtol, atol=args.atol)
|
||||
if msg != "PASS":
|
||||
|
||||
@@ -37,7 +37,6 @@ class TestLinearizerDumb(unittest.TestCase):
|
||||
UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(64, 1, 512, 7, 7, 1, 1, 1), strides=(0, 0, 0, 0, 0, 0, 0, 0), offset=0, mask=None, contiguous=False),)), src=()),)),)),)),))
|
||||
opts = [Opt(op=OptOps.TC, axis=2, arg=(-1, 2)), Opt(op=OptOps.UPCAST, axis=2, arg=0), Opt(op=OptOps.UNROLL, axis=1, arg=0)]
|
||||
k = Kernel(ast, opts=Device["METAL"].renderer)
|
||||
k.required_optimizations()
|
||||
k.apply_opts(opts)
|
||||
prg = k.to_program()
|
||||
print(prg.src)
|
||||
@@ -72,7 +71,6 @@ class TestLinearizerDumb(unittest.TestCase):
|
||||
UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(1000, 1), strides=(0, 0), offset=0, mask=None, contiguous=False),)), src=()),)),)),)),)),))
|
||||
opts = [Opt(op=OptOps.UNROLL, axis=0, arg=4), Opt(op=OptOps.LOCAL, axis=0, arg=8)]
|
||||
k = Kernel(ast, opts=Device[Device.DEFAULT].renderer)
|
||||
k.required_optimizations()
|
||||
k.apply_opts(opts)
|
||||
prg = k.to_program()
|
||||
print(prg.src)
|
||||
@@ -90,7 +88,6 @@ class TestLinearizerDumb(unittest.TestCase):
|
||||
UOp(Ops.VIEW, dtypes.void, arg=ShapeTracker(views=(View(shape=(26, 49), strides=(0, -1), offset=48, mask=((0, 26), (24, 49)), contiguous=False), View(shape=(25, 25), strides=(1, 50), offset=0, mask=None, contiguous=False))), src=()),)),)),)),))
|
||||
opts = [Opt(op=OptOps.GROUP, axis=0, arg=0), Opt(op=OptOps.PADTO, axis=0, arg=32), Opt(op=OptOps.LOCAL, axis=0, arg=4), Opt(op=OptOps.UPCAST, axis=0, arg=0)]
|
||||
k = Kernel(ast, opts=Device[Device.DEFAULT].renderer)
|
||||
k.required_optimizations()
|
||||
k.apply_opts(opts)
|
||||
prg = k.to_program()
|
||||
print(prg.src)
|
||||
|
||||
@@ -22,7 +22,8 @@ def simplify_valid_load(buf:UOp, start_idx:UOp, valid:UOp) -> UOp|None:
|
||||
# can drop valid if idx is out of bound when valid is False
|
||||
drop_stmt = []
|
||||
for stmt in split_uop(valid, Ops.AND):
|
||||
X, is_upper_bound, c = parse_valid(stmt)
|
||||
try: X, is_upper_bound, c = parse_valid(stmt)
|
||||
except ValueError: return None
|
||||
|
||||
# for X0 + X1 + ... >= 1, check if it's out of bound when Xi = 0 for all i
|
||||
if not is_upper_bound and c == 1 and all(u.op in GroupOp.Irreducible and u.vmin == 0 for u in split_uop(X, Ops.ADD)):
|
||||
|
||||
@@ -6,7 +6,7 @@ from tinygrad.ops import Ops, resolve
|
||||
|
||||
def hand_coded_optimizations(k:Kernel) -> Kernel:
|
||||
# make a copy so it does not mutate the input
|
||||
k = k.copy().required_optimizations()
|
||||
k = k.copy()
|
||||
|
||||
# should use matvec - TODO: adjust/tune based on the wide vs tall/large vs small mat
|
||||
MV_BLOCKSIZE, MV_THREADS_PER_ROW, MV_ROWS_PER_THREAD = getenv("MV_BLOCKSIZE", 4), getenv("MV_THREADS_PER_ROW", 8), getenv("MV_ROWS_PER_THREAD", 4)
|
||||
|
||||
@@ -432,13 +432,6 @@ class Kernel:
|
||||
def apply_opts(self, opts:Sequence[Opt]):
|
||||
for opt in opts: self.apply_opt(opt)
|
||||
|
||||
def required_optimizations(self) -> Kernel:
|
||||
if isinstance(self.membufs[0].dtype, ImageDType):
|
||||
unit_stride_axes_mul_4 = [i for i in self.sts[0].unit_stride_axes(ignore_valid=True) if self.sts[0].shape[i]%4 == 0]
|
||||
assert unit_stride_axes_mul_4, f"needs a unit stride axis in {self.bufs[0]}"
|
||||
if all(x < self.first_upcast for x in unit_stride_axes_mul_4): self.apply_opt(Opt(OptOps.UPCAST, unit_stride_axes_mul_4[0], 4))
|
||||
return self
|
||||
|
||||
# **** kernel outputs ****
|
||||
|
||||
kernel_cnt: Final[defaultdict[str, int]] = defaultdict(int)
|
||||
|
||||
@@ -14,12 +14,12 @@ from tinygrad.engine.schedule import ScheduleItem
|
||||
|
||||
logkerns, logkerns_level = open(getenv("LOGKERNS", ""), "a") if getenv("LOGKERNS", "") else None, getenv("LOGKERNS_LEVEL", 1)
|
||||
def get_kernel(renderer:Renderer, ast:UOp) -> Kernel:
|
||||
k = Kernel(ast, opts=renderer).required_optimizations()
|
||||
k = Kernel(ast, opts=renderer)
|
||||
if not NOOPT:
|
||||
if not k.apply_tensor_cores(getenv("TC", 1)): k = hand_coded_optimizations(k)
|
||||
if BEAM >= 1:
|
||||
from tinygrad.engine.search import beam_search, bufs_from_lin
|
||||
kb = Kernel(ast, opts=renderer).required_optimizations()
|
||||
kb = Kernel(ast, opts=renderer)
|
||||
rawbufs = bufs_from_lin(kb, allocate=False)
|
||||
k = beam_search(kb, rawbufs, BEAM.value, bool(getenv("BEAM_ESTIMATE", 1)))
|
||||
if logkerns is not None: logkerns.writelines([f"{(k.ast, k.applied_opts)}\n"])
|
||||
|
||||
Reference in New Issue
Block a user