diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 09711578de..82120bd264 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -664,6 +664,9 @@ jobs:
         run: python3 -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --durations=20
       - name: Run process replay tests
         uses: ./.github/actions/process-replay
+      - name: Run macOS-specific unit test
+        if: matrix.backend == 'cpu'
+        run: python3 -m pytest test/unit/test_disk_tensor.py::TestDiskTensor::test_copy_to_cpu_not_truncated
 
 # ****** Windows Tests ******
 
diff --git a/test/unit/test_disk_tensor.py b/test/unit/test_disk_tensor.py
index 8f37ff7eb0..33d9c3f153 100644
--- a/test/unit/test_disk_tensor.py
+++ b/test/unit/test_disk_tensor.py
@@ -343,6 +343,11 @@ class TestDiskTensor(unittest.TestCase):
       on_dev = t.to(Device.DEFAULT).realize()
       np.testing.assert_equal(on_dev.numpy(), t.numpy())
 
+  @unittest.skipUnless(OSX, "seems to only be an issue on macOS with file size >2 GiB")
+  def test_copy_to_cpu_not_truncated(self):
+    with open((fn:=temp("dt_copy_to_cpu_not_truncated")), "wb") as f: f.write(b'\x01' * (size := int(2 * 1024**3)) + (test := b"test"))
+    x = Tensor.empty(size + len(test), dtype=dtypes.uint8, device=f"disk:{fn}").to("CPU").realize()
+    assert x[size:].data().tobytes() == test
 
 class TestPathTensor(unittest.TestCase):
   def setUp(self):
diff --git a/tinygrad/runtime/ops_disk.py b/tinygrad/runtime/ops_disk.py
index 7039389a1d..fb680340b4 100644
--- a/tinygrad/runtime/ops_disk.py
+++ b/tinygrad/runtime/ops_disk.py
@@ -84,7 +84,8 @@ class DiskAllocator(Allocator):
       # OSX doesn't seem great at mmap, this is faster
       with io.FileIO(self.dev.fd, "a+b", closefd=False) as fo:
         fo.seek(src.offset)
-        fo.readinto(dest)
+        bytes_read = 0
+        while (n := fo.readinto(dest[bytes_read:])) is not None and n > 0: bytes_read += n
     else:
       dest[:] = src._buf()