|
|
|
|
@@ -1,5 +1,5 @@
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
import os, sys, mmap, io, ctypes, ctypes.util, platform, contextlib
|
|
|
|
|
import os, sys, mmap, io, ctypes, ctypes.util, contextlib
|
|
|
|
|
from typing import Optional, Generator, Tuple, Callable, List
|
|
|
|
|
from tinygrad.helpers import OSX, round_up
|
|
|
|
|
from tinygrad.device import Compiled, Allocator
|
|
|
|
|
@@ -84,7 +84,7 @@ class DiskDevice(Compiled):
|
|
|
|
|
filename = self.dname[len("disk:"):]
|
|
|
|
|
self.size = size
|
|
|
|
|
|
|
|
|
|
if filename.startswith("shm:"):
|
|
|
|
|
if sys.platform != "win32" and filename.startswith("shm:"):
|
|
|
|
|
fd = _posixshmem.shm_open("/"+filename[4:].lstrip("/"), os.O_RDWR, 0o600)
|
|
|
|
|
self.mem = mmap.mmap(fd, self.size, mmap.MAP_SHARED | MAP_POPULATE | MAP_LOCKED)
|
|
|
|
|
os.close(fd)
|
|
|
|
|
@@ -93,7 +93,7 @@ class DiskDevice(Compiled):
|
|
|
|
|
except OSError: self.fd = os.open(filename, os.O_RDWR|os.O_CREAT)
|
|
|
|
|
if os.fstat(self.fd).st_size < self.size: os.ftruncate(self.fd, self.size)
|
|
|
|
|
self.mem = mmap.mmap(self.fd, self.size)
|
|
|
|
|
if (hp := getattr(mmap, "MADV_HUGEPAGE", None)) is not None:
|
|
|
|
|
if hasattr(self.mem, 'madvise') and (hp := getattr(mmap, "MADV_HUGEPAGE", None)) is not None:
|
|
|
|
|
with contextlib.suppress(OSError): self.mem.madvise(hp) # some systems have transparent_hugepage disabled
|
|
|
|
|
def _might_close(self):
|
|
|
|
|
self.count -= 1
|
|
|
|
|
@@ -103,22 +103,22 @@ class DiskDevice(Compiled):
|
|
|
|
|
def _iouring_setup(self):
|
|
|
|
|
DiskDevice._tried_io_uring_init = True
|
|
|
|
|
|
|
|
|
|
if platform.system() != 'Linux' or hasattr(sys, "getandroidapilevel"): return
|
|
|
|
|
if sys.platform == 'linux' and not hasattr(sys, "getandroidapilevel"):
|
|
|
|
|
fd = libc.syscall(io_uring.NR_io_uring_setup, 4096, ctypes.byref(p:=io_uring.struct_io_uring_params()))
|
|
|
|
|
if fd < 0: return
|
|
|
|
|
|
|
|
|
|
fd = libc.syscall(io_uring.NR_io_uring_setup, 4096, ctypes.byref(p:=io_uring.struct_io_uring_params()))
|
|
|
|
|
if fd < 0: return
|
|
|
|
|
sq_ptr = libc.mmap(0, p.sq_off.array + p.sq_entries * 4, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | MAP_POPULATE, fd, 0)
|
|
|
|
|
cq_ptr = libc.mmap(0, p.cq_off.cqes + p.cq_entries * ctypes.sizeof(io_uring.struct_io_uring_cqe),
|
|
|
|
|
mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | MAP_POPULATE, fd, io_uring.IORING_OFF_CQ_RING)
|
|
|
|
|
sqes = libc.mmap(0, p.sq_entries * ctypes.sizeof(io_uring.struct_io_uring_sqe),
|
|
|
|
|
mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | MAP_POPULATE, fd, io_uring.IORING_OFF_SQES)
|
|
|
|
|
|
|
|
|
|
sq_ptr = libc.mmap(0, p.sq_off.array + p.sq_entries * 4, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | MAP_POPULATE, fd, 0)
|
|
|
|
|
cq_ptr = libc.mmap(0, p.cq_off.cqes + p.cq_entries * ctypes.sizeof(io_uring.struct_io_uring_cqe),
|
|
|
|
|
mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | MAP_POPULATE, fd, io_uring.IORING_OFF_CQ_RING)
|
|
|
|
|
sqes = libc.mmap(0, p.sq_entries * ctypes.sizeof(io_uring.struct_io_uring_sqe),
|
|
|
|
|
mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | MAP_POPULATE, fd, io_uring.IORING_OFF_SQES)
|
|
|
|
|
def u32ptr(val): return ctypes.cast(val, ctypes.POINTER(ctypes.c_uint32))
|
|
|
|
|
sqdesc = io_uring.struct_io_uring_sq(khead=u32ptr(sq_ptr+p.sq_off.head), ktail=u32ptr(sq_ptr+p.sq_off.tail),
|
|
|
|
|
array=u32ptr(sq_ptr+p.sq_off.array),
|
|
|
|
|
kring_mask=u32ptr(sq_ptr+p.sq_off.ring_mask), sqes=ctypes.cast(sqes, ctypes.POINTER(io_uring.struct_io_uring_sqe)))
|
|
|
|
|
|
|
|
|
|
def u32ptr(val): return ctypes.cast(val, ctypes.POINTER(ctypes.c_uint32))
|
|
|
|
|
sqdesc = io_uring.struct_io_uring_sq(khead=u32ptr(sq_ptr+p.sq_off.head), ktail=u32ptr(sq_ptr+p.sq_off.tail), array=u32ptr(sq_ptr+p.sq_off.array),
|
|
|
|
|
kring_mask=u32ptr(sq_ptr+p.sq_off.ring_mask), sqes=ctypes.cast(sqes, ctypes.POINTER(io_uring.struct_io_uring_sqe)))
|
|
|
|
|
cqdesc = io_uring.struct_io_uring_cq(khead=u32ptr(cq_ptr+p.cq_off.head), ktail=u32ptr(cq_ptr+p.cq_off.tail),
|
|
|
|
|
kring_mask=u32ptr(sq_ptr+p.cq_off.ring_mask), cqes=ctypes.cast(cq_ptr+p.cq_off.cqes, ctypes.POINTER(io_uring.struct_io_uring_cqe)))
|
|
|
|
|
|
|
|
|
|
cqdesc = io_uring.struct_io_uring_cq(khead=u32ptr(cq_ptr+p.cq_off.head), ktail=u32ptr(cq_ptr+p.cq_off.tail),
|
|
|
|
|
kring_mask=u32ptr(sq_ptr+p.cq_off.ring_mask), cqes=ctypes.cast(cq_ptr+p.cq_off.cqes, ctypes.POINTER(io_uring.struct_io_uring_cqe)))
|
|
|
|
|
|
|
|
|
|
DiskDevice.io_uring = io_uring.struct_io_uring(ring_fd=fd, sq=sqdesc, cq=cqdesc) # type: ignore
|
|
|
|
|
DiskDevice.io_uring = io_uring.struct_io_uring(ring_fd=fd, sq=sqdesc, cq=cqdesc) # type: ignore
|
|
|
|
|
|