add loads at the end (#12988)

* add loads at the end

* simpler

* late load

* tests passing

* fix matvec

* spec test passes

* fix where on load

* fix abs2

* fix more tests
This commit is contained in:
George Hotz
2025-10-30 10:42:19 +08:00
committed by GitHub
parent 4b001ec723
commit 2da02f1ae1
21 changed files with 120 additions and 101 deletions

View File

@@ -53,9 +53,7 @@ b = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struc
idx = UOp.const(dtypes.index, 0)
buf_1 = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 1)
buf_2 = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 2)
ld_1 = UOp(Ops.LOAD, dtypes.int32, (buf_1.index(idx),))
ld_2 = UOp(Ops.LOAD, dtypes.int32, (buf_2.index(idx),))
alu = ld_1 + ld_2
alu = buf_1.index(idx) + buf_2.index(idx)
output_buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 0)
st_0 = UOp(Ops.STORE, dtypes.void, (output_buf.index(idx), alu))
s = UOp(Ops.SINK, dtypes.void, (st_0,))