no_vectorized_acc keeps single DEFINE_REG (#11387)

* no_vectorized_acc keeps single DEFINE_REG

* fix ptx, skip flaky test
This commit is contained in:
George Hotz
2025-07-26 11:44:09 -07:00
committed by GitHub
parent 4866ad57da
commit 3923e78061
3 changed files with 15 additions and 8 deletions

View File

@@ -1126,6 +1126,7 @@ class TestMultiRamUsage(unittest.TestCase):
# NOTE: the first one on the DEFAULT device should be freed
self.assertUsed(self.N*self.N*4*2)
@unittest.skip("flaky")
def test_zeros_shard(self, devices=(d1, d2)):
_ = Tensor.zeros(self.N, self.N).contiguous().shard(devices, axis=0).realize()
self.assertUsed(self.N*self.N*4) # sharding should not increase total ram usage