assign after copy shouldn't contig (#14847)

* assign after copy shouldn't contig

* fix assign copy
This commit is contained in:
George Hotz
2026-02-18 12:23:49 +08:00
committed by GitHub
parent ab55e8c6b9
commit d5636fba90
2 changed files with 13 additions and 0 deletions

View File

@@ -35,6 +35,14 @@ class TestAssign(unittest.TestCase):
a.realize()
np.testing.assert_allclose(b.numpy(), 0)
def test_assign_copy(self):
a = Tensor([1.,2,3], device="PYTHON")
c = Tensor.empty(3).assign(a.to(None))
# it should copy into the empty buffer
GlobalCounters.reset()
c.realize()
self.assertEqual(GlobalCounters.kernel_count, 1)
def test_assign_add(self):
for T in (1, 2, 10):#, 100): # this crashes in CI, not sure why
x = Tensor([0]).realize()

View File

@@ -167,6 +167,11 @@ def run_rangeify(tsink:UOp, debug:bool=False) -> tuple[UOp, IndexingContext]:
# get ops to realize
graph_rewrite(tsink, pm_generate_realize_map, ctx=rctx.realize_map, bottom_up=True, name="get realize")
# don't realize COPY/BUFFER_VIEW/ENCDEC when they are the direct source of ASSIGN — the ASSIGN target buffer is the output
for u in tsink.toposort():
if u.op is Ops.ASSIGN and u.src[1].op in {Ops.COPY, Ops.BUFFER_VIEW, Ops.ENCDEC} and u.src[1] in rctx.realize_map \
and not u.src[0].op_in_backward_slice_with_self(Ops.SHRINK, Ops.PERMUTE, Ops.FLIP, Ops.PAD):
del rctx.realize_map[u.src[1]]
# get the consumer map
with cpu_profile("consumer map in rangeify", "TINY"):