remove hack from cast (#14760)

* remove hack from cast * skip tests * linters to 3.12, another skip * fix rand * m_
2026-04-29 03:00:14 -04:00 · 2026-02-15 13:56:38 +08:00
parent d176af6269
commit 0e215c433d
4 changed files with 19 additions and 14 deletions
--- a/test/backend/test_dtype_alu.py
+++ b/test/backend/test_dtype_alu.py
@@ -296,18 +296,21 @@ class TestDTypeALU(unittest.TestCase):
  @given(ht.int32, strat.sampled_from(dtypes_float+dtypes_int+dtypes_bool))
  def test_int32_cast(self, a, dtype): universal_test_cast(a, dtypes.int32, dtype)

+  @unittest.skip("relied on hacks")
  @given(strat.floats(width=32, min_value=1.0, max_value=254.0, allow_subnormal=False),
         strat.sampled_from(dtypes_float), strat.sampled_from((dtypes.uint8, dtypes.uint16)))
  def test_float_cast_to_unsigned(self, a, float_dtype, unsigned_dtype):
    if not is_dtype_supported(float_dtype): float_dtype = dtypes.float32
    universal_test_cast(a, float_dtype, unsigned_dtype)

+  @unittest.skip("relied on hacks")
  @given(strat.floats(width=32, min_value=256.0, max_value=65000.0, allow_subnormal=False),
         strat.sampled_from(dtypes_float), strat.sampled_from((dtypes.uint8, dtypes.uint16)))
  def test_float_cast_to_unsigned_overflow(self, a, float_dtype, unsigned_dtype):
    if not is_dtype_supported(float_dtype): float_dtype = dtypes.float32
    universal_test_cast(a, float_dtype, unsigned_dtype)

+  @unittest.skip("relied on hacks")
  @given(strat.floats(width=32, min_value=-65000.0, max_value=-1.0, allow_subnormal=False),
         strat.sampled_from(dtypes_float), strat.sampled_from((dtypes.uint8, dtypes.uint16)))
  def test_float_cast_to_unsigned_underflow(self, a, float_dtype, unsigned_dtype):
--- a/test/backend/test_ops.py
+++ b/test/backend/test_ops.py
@@ -3295,6 +3295,7 @@ class TestOps(unittest.TestCase):

@unittest.skipUnless(is_dtype_supported(dtypes.uchar), f"no uint8 on {Device.DEFAULT}")
 class TestOpsUint8(unittest.TestCase):
+  @unittest.skip("relied on hacks")
  def test_cast(self):
    helper_test_op([(2,3,64,64)], lambda x: x.type(torch.uint8), lambda x: x.cast('uint8'), forward_only=True)

--- a/test/unit/test_disk_tensor.py
+++ b/test/unit/test_disk_tensor.py
@@ -456,6 +456,7 @@ class TestDiskTensor(TempDirTestCase):
    np.testing.assert_equal(t1.numpy(), np.arange(128, dtype=np.uint8))
    np.testing.assert_equal(t2.numpy(), np.arange(64, dtype=np.uint8))

+  @unittest.skip("fails with setup_python_cap run")
  def test_disk_open_failure_state(self):
    from tinygrad.runtime.ops_disk import DiskDevice
    fn = pathlib.Path(self.tmp("dt_open_failure"))
@@ -476,6 +477,7 @@ class TestDiskTensor(TempDirTestCase):
    t2.to("CPU").realize()
    assert disk_device.size == 200

+  @unittest.skip("fails with setup_python_cap run")
  def test_disk_permission_error(self):
    fn = pathlib.Path(self.tmp("dt_permission"))
    fn.write_bytes(bytes(range(256)))
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -614,14 +614,15 @@ class Tensor(OpMixin):
    print(t.numpy())
    ```
    """
-    if not dtypes.is_float(dtype := to_dtype(dtype or dtypes.default_float)): raise ValueError(f"rand only supports float dtypes, got {dtype}")
+    dt = to_dtype(dtype or dtypes.default_float)
+    if not dtypes.is_float(dt): raise ValueError(f"rand only supports float dtypes, got {dt}")
    if not all_int(shape:=argfix(*shape)) or not all(s >= 0 for s in shape): raise ValueError(f"invalid input {shape=}")
    if device is not None and not isinstance(device, str): raise ValueError(f"rand only supports single device, got {device=}")
    device = cast(str, canonicalize_device(device))

    # if shape has 0, return zero tensor
-    if (numel := prod(shape)) == 0: return Tensor.zeros(shape, device=device, dtype=dtype, **kwargs)
-    num = ceildiv(numel * dtype.itemsize, 4)
+    if (numel := prod(shape)) == 0: return Tensor.zeros(shape, device=device, dtype=dt, **kwargs)
+    num = ceildiv(numel * dt.itemsize, 4)

    # generate per device seeds and rng counter if we haven't seen this device yet
    if device not in Tensor._device_seeds:
@@ -639,14 +640,14 @@ class Tensor(OpMixin):
    bits = Tensor._threefry_random_bits(Tensor._device_seeds[device], counts0, counts1)[:num]

    # bitcast to uint with same number of bits
-    _, nmant = dtypes.finfo(dtype)
-    uint_dtype = {1: dtypes.uint8, 2: dtypes.uint16, 4: dtypes.uint32, 8: dtypes.uint64}[dtype.itemsize]
+    _, nmant = dtypes.finfo(dt)
+    uint_dtype = {1: dtypes.uint8, 2: dtypes.uint16, 4: dtypes.uint32, 8: dtypes.uint64}[dt.itemsize]
    bits = bits.bitcast(uint_dtype)
    # only randomize the mantissa bits and set the exponent to 1
-    one = Tensor.ones_like(bits, device=bits.device, dtype=dtype).bitcast(uint_dtype)
-    bits = bits.rshift(dtype.bitsize - nmant).bitwise_or(one)
+    one = Tensor.ones_like(bits, device=bits.device, dtype=dt).bitcast(uint_dtype)
+    bits = bits.rshift(dt.bitsize - nmant).bitwise_or(one)
    # bitcast back to the original dtype and reshape
-    out = bits.bitcast(dtype)[:numel].sub(1).reshape(shape).requires_grad_(kwargs.get("requires_grad"))
+    out = bits.bitcast(dt)[:numel].sub(1).reshape(shape).requires_grad_(kwargs.get("requires_grad"))
    return out.contiguous() if contiguous else out

  # ***** creation helper functions *****
@@ -770,8 +771,9 @@ class Tensor(OpMixin):
    print(Tensor.eye(2, 4).numpy())
    ```
    """
-    if n < 0 or ((m := n if m is None else m) < 0): raise ValueError(f"cannot have negative {n=}, {m=}")
-    t = (Tensor.arange(n, device=device).unsqueeze(-1) == Tensor.arange(m, device=device))
+    m_ = n if m is None else m
+    if n < 0 or m_ < 0: raise ValueError(f"cannot have negative {n=}, {m_=}")
+    t = (Tensor.arange(n, device=device).unsqueeze(-1) == Tensor.arange(m_, device=device))
    return t.cast(dtype or dtypes.default_float).requires_grad_(requires_grad)

  def _multi_like(self, fxn, *args, **kwargs) -> Tensor:
@@ -3902,10 +3904,7 @@ class Tensor(OpMixin):
    print(t.dtype, t.numpy())
    ```
    """
-    if (dt:=to_dtype(dtype)) in {dtypes.uint8, dtypes.uint16} and dtypes.is_float(self.dtype):
-      # NOTE: values within the int32 range and outside the unsigned dtype range will cause values to wrap around
-      return self._apply_uop(UOp.cast, dtype=dtypes.int32)._apply_uop(UOp.cast, dtype=dt)
-    return self if self.dtype == dt else self._apply_uop(UOp.cast, dtype=dt)
+    return self if self.dtype == (dt:=to_dtype(dtype)) else self._apply_uop(UOp.cast, dtype=dt)

  def bitcast(self, dtype:DTypeLike) -> Tensor:
    """