Remove Zeroview (#748)

* no zeroview start

* closer

* stride mask

* st tests pass, delete ZeroView

* byebye zv

* close to working

* not contiguous with mask

* subtract, don't add

* mask on view

* ugh, that shouldn't have been in there

* shape merge

* bugfixes

* fuzzer + 4 fuzzer failures

* fuzzer for symbolic

* more fuzzing and nothing

* that fuzzer doesn't hit either

* fixes padding...ugh

* no more offsets

* working

* rewrite load and store

* all checks

* fix idxs

* progress

* bugfix

* float4_axis

* works

* cleanups

* complex valids_okay
This commit is contained in:
George Hotz
2023-04-17 08:21:46 -07:00
committed by GitHub
parent 4e17d27d09
commit 8b7ecd63bb
9 changed files with 391 additions and 181 deletions

View File

@@ -127,6 +127,7 @@ def uops_to_cstyle(uops:List[UOp], bufs:List[Union[LocalBuffer,LazyBuffer]], lan
else:
kk(f"{newvar.render(True)} = {code_for_op[args](*[x.render() for x in vin])};")
elif uop == UOps.LOAD and newvar is not None and newvar.ltype == LocalTypes.float:
assert not isinstance(bufs[args.i].dtype, ImageDType), "image load must be float4"
# TODO: merge with CONST?
if bufs[args.i] is not None and isinstance(bufs[args.i].realized, RawConst):
# nan? inf?
@@ -151,6 +152,7 @@ def uops_to_cstyle(uops:List[UOp], bufs:List[Union[LocalBuffer,LazyBuffer]], lan
if args[2].min == 1: kk(f"{newvar.render(True)} = {val};")
else: kk(f"{newvar.render(True)} = ({args.valid.render(render_cl)}) ? ({val}) : {lang.float4}(0.0f, 0.0f, 0.0f, 0.0f);")
elif uop == UOps.STORE and (vin[0].ltype == LocalTypes.float or (vin[0].ltype == LocalTypes.float4 and vin[0].offset is not None)):
assert not isinstance(bufs[args.i].dtype, ImageDType), "image store must be float4"
assert args.valid.min == 1, "store must be valid"
if lang.uses_vload and bufs[args.i].dtype == dtypes.float16:
kk(f"vstore_half({vin[0].render()}, {args.idx.render(render_cl)}, {bufnames[args.i]});")