diff --git a/docs/abstractions4.py b/docs/abstractions4.py
index c646327ae4..466f64f261 100644
--- a/docs/abstractions4.py
+++ b/docs/abstractions4.py
@@ -16,12 +16,13 @@ def eval_harness(name, tensor, fxn, check=None):
   print(f"computed in {GlobalCounters.time_sum_s*1000:.2f} ms, {(a.nbytes()/1e9)/GlobalCounters.time_sum_s:.2f} GB/s")
   return out
 
-SZ = 32*1024 if getenv("MOCKGPU") else 1024*1024*1024
+SZ = 256*1024 if getenv("MOCKGPU") else 1024*1024*1024
 
 def example_2_hip(a:Tensor, correct):
   GLOBALS = 1024
   THREADS = 256
   def hip_reduce_sum(out:UOp, buf:UOp) -> UOp:
+    assert SZ % (GLOBALS * THREADS) == 0
     CHUNK = SZ // (GLOBALS * THREADS)
     # NOTE: tinygrad doesn't populate HIP hidden kernargs, so blockDim.x/gridDim.x read as 0.
     # We hardcode block/grid sizes as constexpr to avoid any dependency on those builtins.