add device to local, fix PCONTIG=2 (#14266)

* add device to local, fix PCONTIG=2

* regression test

* remove the device when we render

* viz slowness

* no long
This commit is contained in:
George Hotz
2026-01-21 22:12:18 +09:00
committed by GitHub
parent c1d14ea832
commit 41d00a046d
4 changed files with 18 additions and 3 deletions

View File

@@ -76,6 +76,18 @@ class TestRangeifyEdgeCase(unittest.TestCase):
res = Tensor.cat(a, c, dim=0)
self.assertEqual(res.numpy()[-1, :16].tolist(), [512] * 16)
def test_pcontig_multi_gather(self):
# regression test: local bufferize must have device set for const_like to work
with Context(PCONTIG=2):
# NOTE: with uint type, this will become a long and fail on WEBGPU
forest = Tensor(list(range(8)), dtype='int')
idx = Tensor([0, 0], dtype='int')
node_val = forest.gather(0, idx)
idx2 = idx * 2 + 1
node_val2 = forest.gather(0, idx2)
result = (node_val + node_val2).numpy()
self.assertEqual(result.tolist(), [1, 1])
if getenv("BIG") > 2:
# llama 8B (8192)
BS, HEADS, SEQLEN, EMB = 4, 32, 8192, 128