diff --git a/tinygrad/fastmath.py b/tinygrad/fastmath.py
index 109c861ef3..de57efaf3d 100644
--- a/tinygrad/fastmath.py
+++ b/tinygrad/fastmath.py
@@ -147,13 +147,11 @@ def payne_hanek_reduction(d: LazyBuffer, d_base: LazyBuffer) -> LazyBuffer:
   a1 = _take(i.const(0).cast(dtypes.uint32), 0)
   a2 = _take(i.const(0).cast(dtypes.uint32), 1)
   a3 = _take(i.const(0).cast(dtypes.uint32), 2)
-  a1p1 = _take(a1.const(0), 1)
-  a2p1 = _take(a2.const(0), 2)
-  a3p1 = _take(a3.const(0), 3)
+  a4 = _take(i.const(0).cast(dtypes.uint32), 3)
   # assume e != 0 because this reduction is only applied for x >= 39000.0
-  hi = _shl_lazy(a1, e).e(BinaryOps.OR, _shr_lazy(a1p1, offset))
-  mi = _shl_lazy(a2, e).e(BinaryOps.OR, _shr_lazy(a2p1, offset))
-  lo = _shl_lazy(a3, e).e(BinaryOps.OR, _shr_lazy(a3p1, offset))
+  hi = _shl_lazy(a1, e).e(BinaryOps.OR, _shr_lazy(a2, offset))
+  mi = _shl_lazy(a2, e).e(BinaryOps.OR, _shr_lazy(a3, offset))
+  lo = _shl_lazy(a3, e).e(BinaryOps.OR, _shr_lazy(a4, offset))
 
   def _hp_mul(x: LazyBuffer, y: LazyBuffer) -> LazyBuffer: return x.cast(dtypes.uint64).e(BinaryOps.MUL, y.cast(dtypes.uint64))
   p = _hp_mul(ia, lo)