rename HWInterface -> FileIOInterface (#9989)

* rename HWInterface -> FileIOInterface

* ugh
This commit is contained in:
nimlgen
2025-04-22 22:18:57 +03:00
committed by GitHub
parent c1539b0319
commit db51133537
8 changed files with 69 additions and 69 deletions

View File

@@ -80,8 +80,8 @@ generate_kfd() {
fixup $BASE/kfd.py
sed -i "s/import ctypes/import ctypes, os/g" $BASE/kfd.py
sed -i "s/import fcntl, functools/import functools/g" $BASE/kfd.py
sed -i "/import functools/a from tinygrad.runtime.support.hcq import HWInterface" $BASE/kfd.py
sed -i "s/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd, \*\*kwargs):/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:HWInterface, \*\*kwargs):/g" $BASE/kfd.py
sed -i "/import functools/a from tinygrad.runtime.support.hcq import FileIOInterface" $BASE/kfd.py
sed -i "s/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd, \*\*kwargs):/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:FileIOInterface, \*\*kwargs):/g" $BASE/kfd.py
sed -i "s/fcntl.ioctl(__fd, (__idir<<30)/__fd.ioctl((__idir<<30)/g" $BASE/kfd.py
python3 -c "import tinygrad.runtime.autogen.kfd"
}
@@ -287,7 +287,7 @@ generate_vfio() {
fixup $BASE/vfio.py
sed -i "s\import ctypes\import ctypes, os\g" $BASE/vfio.py
sed -i "s\import fcntl, functools\import functools" $BASE/vfio.py
sed -i "s\import ctypes,os\a from tinygrad.runtime.support import HWInterface\g" $BASE/vfio.py
sed -i "s\import ctypes,os\a from tinygrad.runtime.support import FileIOInterface\g" $BASE/vfio.py
sed -i "s\fcntl.ioctl(__fd, (__idir<<30)\return __fd.ioctl((__idir<<30)\g" $BASE/vfio.py
}

View File

@@ -1,5 +1,5 @@
import ctypes, ctypes.util, time, os, builtins, fcntl
from tinygrad.runtime.support.hcq import HWInterface
from tinygrad.runtime.support.hcq import FileIOInterface
from test.mockgpu.nv.nvdriver import NVDriver
from test.mockgpu.amd.amddriver import AMDDriver
start = time.perf_counter()
@@ -53,7 +53,7 @@ def _open(path, flags):
return virtfd.fd
return os.open(path, flags, 0o777) if os.path.exists(path) else None
class MockHWInterface(HWInterface):
class MockFileIOInterface(FileIOInterface):
def __init__(self, path:str="", flags:int=os.O_RDONLY, fd:int|None=None):
self.path = path
self.fd = fd or _open(path, flags)

View File

@@ -11,9 +11,9 @@ import ctypes, os
import functools
from tinygrad.runtime.support.hcq import HWInterface
from tinygrad.runtime.support.hcq import FileIOInterface
def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:HWInterface, **kwargs):
def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:FileIOInterface, **kwargs):
ret = __fd.ioctl((__idir<<30) | (ctypes.sizeof(made := __user_struct(**kwargs))<<16) | (__base<<8) | __nr, made)
if ret != 0: raise RuntimeError(f"ioctl returned {ret}")
return made

View File

@@ -9,13 +9,13 @@
import ctypes
from tinygrad.runtime.support.hcq import HWInterface
from tinygrad.runtime.support.hcq import FileIOInterface
import functools
def _do_ioctl_io(__idir, __base, __nr, __fd:HWInterface, val=0, __len=0):
def _do_ioctl_io(__idir, __base, __nr, __fd:FileIOInterface, val=0, __len=0):
return __fd.ioctl((__idir<<30) | (__len<<16) | (__base<<8) | __nr, val)
def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:HWInterface, __val=None, **kwargs):
def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:FileIOInterface, __val=None, **kwargs):
ret = __fd.ioctl((__idir<<30) | (ctypes.sizeof(made := (__made or __user_struct(**kwargs)))<<16) | (__base<<8) | __nr, made)
if ret != 0: raise RuntimeError(f"ioctl returned {ret}")
return made

View File

@@ -3,7 +3,7 @@ from typing import Any, cast, ClassVar
import os, ctypes, ctypes.util, struct, hashlib, functools, importlib, mmap, errno, array, contextlib, sys, select
assert sys.platform != 'win32'
from dataclasses import dataclass
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQSignal, HCQProgram, HWInterface
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQSignal, HCQProgram, FileIOInterface
from tinygrad.ops import sint
from tinygrad.device import Compiled, ProfileEvent, BufferSpec, CPUProgram, PROFILE
from tinygrad.helpers import getenv, to_mv, round_up, data64_le, mv_address, all_same, flatten, DEBUG, OSX
@@ -520,9 +520,9 @@ class AMDIP:
return getattr(self.module, name)
class KFDIface:
kfd:HWInterface|None = None
kfd:FileIOInterface|None = None
event_page:HCQBuffer|None = None
gpus:list[HWInterface] = []
gpus:list[FileIOInterface] = []
def _is_usable_gpu(self, gpu_id):
with contextlib.suppress(OSError): return int(gpu_id.read()) != 0
@@ -535,23 +535,23 @@ class KFDIface:
# Initialize KFD interface during first run
if KFDIface.kfd is None:
KFDIface.kfd = HWInterface("/dev/kfd", os.O_RDWR)
gpus = [g for g in HWInterface(kfd_topo_path).listdir() if self._is_usable_gpu(HWInterface(f"{kfd_topo_path}/{g}/gpu_id"))]
KFDIface.kfd = FileIOInterface("/dev/kfd", os.O_RDWR)
gpus = [g for g in FileIOInterface(kfd_topo_path).listdir() if self._is_usable_gpu(FileIOInterface(f"{kfd_topo_path}/{g}/gpu_id"))]
gpus = sorted(gpus, key=lambda x: int(x.split('/')[-1]))
visible_devices = [int(x) for x in (getenv('VISIBLE_DEVICES', getenv('HIP_VISIBLE_DEVICES', ''))).split(',') if x.strip()]
KFDIface.gpus = [gpus[x] for x in visible_devices] if visible_devices else gpus
if device_id >= len(KFDIface.gpus): raise RuntimeError(f"No device found for {device_id}. Requesting more devices than the system has?")
self.gpu_id = int(HWInterface(f"{kfd_topo_path}/{KFDIface.gpus[device_id]}/gpu_id").read())
self.props = {l.split()[0]: int(l.split()[1]) for l in HWInterface(f"{kfd_topo_path}/{KFDIface.gpus[device_id]}/properties").read().splitlines()}
self.gpu_id = int(FileIOInterface(f"{kfd_topo_path}/{KFDIface.gpus[device_id]}/gpu_id").read())
self.props = {(p:=l.split())[0]: int(p[1]) for l in FileIOInterface(f"{kfd_topo_path}/{KFDIface.gpus[device_id]}/properties").read().splitlines()}
ip_base = f"/sys/class/drm/renderD{self.props['drm_render_minor']}/device/ip_discovery/die/0"
id2ip = {am.GC_HWID: am.GC_HWIP, am.SDMA0_HWID: am.SDMA0_HWIP, am.NBIF_HWID: am.NBIF_HWIP}
self.ip_versions = {id2ip[int(hwid)]:tuple(int(HWInterface(f'{ip_base}/{hwid}/0/{part}').read()) for part in ['major', 'minor', 'revision'])
for hwid in HWInterface(ip_base).listdir() if hwid.isnumeric() and int(hwid) in id2ip}
self.ip_offsets = {id2ip[int(hwid)]:tuple(int(x, 16) for x in HWInterface(f'{ip_base}/{hwid}/0/base_addr').read().splitlines())
for hwid in HWInterface(ip_base).listdir() if hwid.isnumeric() and int(hwid) in id2ip}
self.drm_fd = HWInterface(f"/dev/dri/renderD{self.props['drm_render_minor']}", os.O_RDWR)
self.ip_versions = {id2ip[int(hwid)]:tuple(int(FileIOInterface(f'{ip_base}/{hwid}/0/{part}').read()) for part in ['major', 'minor', 'revision'])
for hwid in FileIOInterface(ip_base).listdir() if hwid.isnumeric() and int(hwid) in id2ip}
self.ip_offsets = {id2ip[int(hwid)]:tuple(int(x, 16) for x in FileIOInterface(f'{ip_base}/{hwid}/0/base_addr').read().splitlines())
for hwid in FileIOInterface(ip_base).listdir() if hwid.isnumeric() and int(hwid) in id2ip}
self.drm_fd = FileIOInterface(f"/dev/dri/renderD{self.props['drm_render_minor']}", os.O_RDWR)
kfd.AMDKFD_IOC_ACQUIRE_VM(KFDIface.kfd, drm_fd=self.drm_fd.fd, gpu_id=self.gpu_id)
@@ -580,8 +580,8 @@ class KFDIface:
if cpu_access or host: flags |= kfd.KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC
if flags & kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR:
buf = addr = HWInterface.anon_mmap(0, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
else: buf, addr = 0, HWInterface.anon_mmap(0, size, 0, mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS | MAP_NORESERVE, 0)
buf = addr = FileIOInterface.anon_mmap(0, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
else: buf, addr = 0, FileIOInterface.anon_mmap(0, size, 0, mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS | MAP_NORESERVE, 0)
assert addr != 0xffffffffffffffff
try: mem = kfd.AMDKFD_IOC_ALLOC_MEMORY_OF_GPU(self.kfd, va_addr=addr, size=size, base=addr, length=size, gpu_id=self.gpu_id,
@@ -604,7 +604,7 @@ class KFDIface:
c_gpus = (ctypes.c_int32 * len(gpus))(*gpus)
stm = kfd.AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU(self.kfd, handle=mem.meta.handle, device_ids_array_ptr=ctypes.addressof(c_gpus), n_devices=len(gpus))
assert stm.n_success == len(gpus)
if mem.va_addr: HWInterface.munmap(mem.va_addr, mem.size)
if mem.va_addr: FileIOInterface.munmap(mem.va_addr, mem.size)
kfd.AMDKFD_IOC_FREE_MEMORY_OF_GPU(self.kfd, handle=mem.meta.handle)
def map(self, mem):
@@ -624,7 +624,7 @@ class KFDIface:
if not hasattr(self, 'doorbells'):
self.doorbells_base = queue.doorbell_offset & (~0x1fff) # doorbell is two pages
self.doorbells = cast(HWInterface, KFDIface.kfd).mmap(0, 0x2000, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED, self.doorbells_base)
self.doorbells = cast(FileIOInterface, KFDIface.kfd).mmap(0, 0x2000, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED, self.doorbells_base)
return AMDQueueDesc(ring=to_mv(ring.va_addr, ring.size).cast("I"),
read_ptrs=[to_mv(queue.read_pointer_address, 8).cast("Q")], write_ptrs=[to_mv(queue.write_pointer_address, 8).cast("Q")],
@@ -650,17 +650,17 @@ class AMAllocationMeta: owner:AMDDevice; mapped_devs:list[AMDDevice]; mapping:AM
class PCIIface:
supported_devs:list[int] = [0x744c, 0x7480, 0x7550]
vfio:bool = getenv("VFIO", 1) and HWInterface.exists("/dev/vfio/vfio")
vfio_fd:HWInterface
vfio:bool = getenv("VFIO", 1) and FileIOInterface.exists("/dev/vfio/vfio")
vfio_fd:FileIOInterface
gpus:list[Any] = []
def __init__(self, dev, dev_id):
self.dev = dev
if first_dev:=len(PCIIface.gpus) == 0:
for pcibus in HWInterface("/sys/bus/pci/devices").listdir():
vendor = int(HWInterface(f"/sys/bus/pci/devices/{pcibus}/vendor").read(), 16)
device = int(HWInterface(f"/sys/bus/pci/devices/{pcibus}/device").read(), 16)
for pcibus in FileIOInterface("/sys/bus/pci/devices").listdir():
vendor = int(FileIOInterface(f"/sys/bus/pci/devices/{pcibus}/vendor").read(), 16)
device = int(FileIOInterface(f"/sys/bus/pci/devices/{pcibus}/device").read(), 16)
if vendor == 0x1002 and device in PCIIface.supported_devs: PCIIface.gpus.append(pcibus)
PCIIface.gpus = sorted(PCIIface.gpus)
@@ -671,51 +671,51 @@ class PCIIface:
self.pcibus = PCIIface.gpus[dev_id]
# Unbind the device from the kernel driver
if HWInterface.exists(f"/sys/bus/pci/devices/{self.pcibus}/driver"):
HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/driver/unbind", os.O_WRONLY).write(self.pcibus)
if FileIOInterface.exists(f"/sys/bus/pci/devices/{self.pcibus}/driver"):
FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/driver/unbind", os.O_WRONLY).write(self.pcibus)
supported_sizes = int(HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource0_resize", os.O_RDONLY).read(), 16)
try: HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource0_resize", os.O_RDWR).write(str(supported_sizes.bit_length() - 1))
supported_sizes = int(FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource0_resize", os.O_RDONLY).read(), 16)
try: FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource0_resize", os.O_RDWR).write(str(supported_sizes.bit_length() - 1))
except OSError as e: raise RuntimeError(f"Cannot resize BAR: {e}. Ensure the resizable BAR option is enabled on your system.") from e
# Try to init vfio. Use it if success.
if PCIIface.vfio:
try:
if first_dev:
HWInterface("/sys/module/vfio/parameters/enable_unsafe_noiommu_mode", os.O_RDWR).write("1")
PCIIface.vfio_fd = HWInterface("/dev/vfio/vfio", os.O_RDWR)
FileIOInterface("/sys/module/vfio/parameters/enable_unsafe_noiommu_mode", os.O_RDWR).write("1")
PCIIface.vfio_fd = FileIOInterface("/dev/vfio/vfio", os.O_RDWR)
vfio.VFIO_CHECK_EXTENSION(PCIIface.vfio_fd, vfio.VFIO_NOIOMMU_IOMMU)
HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/driver_override", os.O_WRONLY).write("vfio-pci")
HWInterface("/sys/bus/pci/drivers_probe", os.O_WRONLY).write(self.pcibus)
FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/driver_override", os.O_WRONLY).write("vfio-pci")
FileIOInterface("/sys/bus/pci/drivers_probe", os.O_WRONLY).write(self.pcibus)
iommu_group = HWInterface.readlink(f"/sys/bus/pci/devices/{self.pcibus}/iommu_group").split('/')[-1]
iommu_group = FileIOInterface.readlink(f"/sys/bus/pci/devices/{self.pcibus}/iommu_group").split('/')[-1]
except OSError:
if DEBUG >= 1: print(f"am {self.pcibus}: failed to init vfio-pci module (run `sudo modprobe vfio-pci`).")
PCIIface.vfio = False
# Init vfio for the device
if PCIIface.vfio:
self.vfio_group = HWInterface(f"/dev/vfio/noiommu-{iommu_group}", os.O_RDWR)
self.vfio_group = FileIOInterface(f"/dev/vfio/noiommu-{iommu_group}", os.O_RDWR)
vfio.VFIO_GROUP_SET_CONTAINER(self.vfio_group, ctypes.c_int(PCIIface.vfio_fd.fd))
if first_dev: vfio.VFIO_SET_IOMMU(PCIIface.vfio_fd, vfio.VFIO_NOIOMMU_IOMMU)
self.vfio_dev = HWInterface(fd=vfio.VFIO_GROUP_GET_DEVICE_FD(self.vfio_group, ctypes.create_string_buffer(self.pcibus.encode())))
self.vfio_dev = FileIOInterface(fd=vfio.VFIO_GROUP_GET_DEVICE_FD(self.vfio_group, ctypes.create_string_buffer(self.pcibus.encode())))
self.irq_fd = HWInterface.eventfd(0, 0)
self.irq_fd = FileIOInterface.eventfd(0, 0)
self.irq_poller = select.poll()
self.irq_poller.register(self.irq_fd.fd, select.POLLIN)
irqs = vfio.struct_vfio_irq_set(index=vfio.VFIO_PCI_MSI_IRQ_INDEX, flags=vfio.VFIO_IRQ_SET_DATA_EVENTFD|vfio.VFIO_IRQ_SET_ACTION_TRIGGER,
argsz=ctypes.sizeof(vfio.struct_vfio_irq_set), count=1, data=(ctypes.c_int * 1)(self.irq_fd.fd))
vfio.VFIO_DEVICE_SET_IRQS(self.vfio_dev, irqs)
else: HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/enable", os.O_RDWR).write("1")
else: FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/enable", os.O_RDWR).write("1")
self.pagemap = HWInterface("/proc/self/pagemap", os.O_RDONLY)
self.cfg_fd = HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/config", os.O_RDWR | os.O_SYNC | os.O_CLOEXEC)
self.bar_fds = {bar: HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource{bar}", os.O_RDWR | os.O_SYNC | os.O_CLOEXEC) for bar in [0, 2, 5]}
self.pagemap = FileIOInterface("/proc/self/pagemap", os.O_RDONLY)
self.cfg_fd = FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/config", os.O_RDWR | os.O_SYNC | os.O_CLOEXEC)
self.bar_fds = {b: FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource{b}", os.O_RDWR | os.O_SYNC | os.O_CLOEXEC) for b in [0, 2, 5]}
bar_info = HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource", os.O_RDONLY).read().splitlines()
bar_info = FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource", os.O_RDONLY).read().splitlines()
self.bar_info = {j:(int(start,16), int(end,16), int(flgs,16)) for j,(start,end,flgs) in enumerate(l.split() for l in bar_info)}
self.adev = AMDev(self.pcibus, self._map_pci_range(0), dbell:=self._map_pci_range(2).cast('Q'), self._map_pci_range(5).cast('I'))
@@ -741,7 +741,7 @@ class PCIIface:
def alloc(self, size:int, host=False, uncached=False, cpu_access=False):
if host or (not getenv("AMD_ALLOC_QUEUE_DEV_MEM", 1) and uncached and cpu_access): # host or gtt-like memory.
vaddr = self.adev.mm.alloc_vaddr(size:=round_up(size, mmap.PAGESIZE), align=mmap.PAGESIZE)
va = HWInterface.anon_mmap(vaddr, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS | MAP_LOCKED | MAP_FIXED, 0)
va = FileIOInterface.anon_mmap(vaddr, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS | MAP_LOCKED | MAP_FIXED, 0)
# Read pagemap to get the physical address of each page. The pages are locked.
self.pagemap.seek(va // mmap.PAGESIZE * 8)
@@ -791,7 +791,7 @@ class AMDDevice(HCQCompiled):
signal_pages: ClassVar[list[Any]] = []
signal_pool: ClassVar[list[int]] = []
driverless:bool = not HWInterface.exists('/sys/module/amdgpu') or bool(getenv("AMD_DRIVERLESS", 0))
driverless:bool = not FileIOInterface.exists('/sys/module/amdgpu') or bool(getenv("AMD_DRIVERLESS", 0))
def __init__(self, device:str=""):
self.device_id = int(device.split(":")[1]) if ":" in device else 0
@@ -852,7 +852,7 @@ class AMDDevice(HCQCompiled):
self.sqtt_enabled = PROFILE and bool(getenv("SQTT", 0))
if self.sqtt_enabled:
if self.arch != 'gfx1100': raise RuntimeError('SQ Thread Tracing is only supported on 7900XTX')
if not self.driverless and (ppfeaturemask:=int(HWInterface('/sys/module/amdgpu/parameters/ppfeaturemask', os.O_RDONLY).read(), 16)) & 0x8000:
if not self.driverless and (ppfeaturemask:=int(FileIOInterface('/sys/module/amdgpu/parameters/ppfeaturemask', os.O_RDONLY).read(), 16))&0x8000:
raise RuntimeError("SQTT can't be enabled because of hardware bug, to workaround either use driverless or add "
f"ppfeaturemask={(ppfeaturemask&~0x8000):#x} (current {ppfeaturemask=:#x} & ~PP_GFXOFF_MASK) to amdgpu module parameters\n"
"For more information read https://github.com/tinygrad/tinygrad/blob/master/extra/sqtt/README.md")

View File

@@ -4,7 +4,7 @@ assert sys.platform != 'win32'
from typing import Any, cast, Union, Type, ClassVar
from dataclasses import dataclass
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQProgram, HCQSignal, BumpAllocator
from tinygrad.runtime.support.hcq import MMIOInterface, HWInterface, MOCKGPU
from tinygrad.runtime.support.hcq import MMIOInterface, FileIOInterface, MOCKGPU
from tinygrad.ops import sint
from tinygrad.device import BufferSpec, CPUProgram
from tinygrad.helpers import getenv, mv_address, init_c_struct_t, round_up, data64, data64_le, DEBUG, prod, OSX
@@ -20,7 +20,7 @@ def get_error_str(status): return f"{status}: {nv_gpu.nv_status_codes.get(status
NV_PFAULT_FAULT_TYPE = {dt:name for name,dt in nv_gpu.__dict__.items() if name.startswith("NV_PFAULT_FAULT_TYPE_")}
NV_PFAULT_ACCESS_TYPE = {dt:name.split("_")[-1] for name,dt in nv_gpu.__dict__.items() if name.startswith("NV_PFAULT_ACCESS_TYPE_")}
def nv_iowr(fd:HWInterface, nr, args):
def nv_iowr(fd:FileIOInterface, nr, args):
ret = fd.ioctl((3 << 30) | (ctypes.sizeof(args) & 0x1FFF) << 16 | (ord('F') & 0xFF) << 8 | (nr & 0xFF), args)
if ret != 0: raise RuntimeError(f"ioctl returned {ret}")
@@ -46,7 +46,7 @@ def make_rmctrl_type():
getattr(nv_gpu, name+"_PARAMS", getattr(nv_gpu, name.replace("_CTRL_CMD_", "_CTRL_DEBUG_")+"_PARAMETERS", None))))})
rmctrl = make_rmctrl_type()
def uvm_ioctl(cmd, sttyp, fd:HWInterface, **kwargs):
def uvm_ioctl(cmd, sttyp, fd:FileIOInterface, **kwargs):
ret = fd.ioctl(cmd, made:=sttyp(**kwargs))
if ret != 0: raise RuntimeError(f"ioctl(uvm) returned {ret}")
if made.rmStatus != 0: raise RuntimeError(f"uvm_ioctl returned {get_error_str(made.rmStatus)}")
@@ -293,8 +293,8 @@ class NVDevice(HCQCompiled[NVSignal]):
signal_pool: ClassVar[list[int]] = []
root = None
fd_ctl: HWInterface
fd_uvm: HWInterface
fd_ctl: FileIOInterface
fd_uvm: FileIOInterface
gpus_info: Union[list, ctypes.Array] = []
# TODO: Need a proper allocator for va addresses
@@ -305,12 +305,12 @@ class NVDevice(HCQCompiled[NVSignal]):
host_object_enumerator: int = 0x1000
def _new_gpu_fd(self):
fd_dev = HWInterface(f"/dev/nvidia{NVDevice.gpus_info[self.device_id].minor_number}", os.O_RDWR | os.O_CLOEXEC)
fd_dev = FileIOInterface(f"/dev/nvidia{NVDevice.gpus_info[self.device_id].minor_number}", os.O_RDWR | os.O_CLOEXEC)
nv_iowr(fd_dev, nv_gpu.NV_ESC_REGISTER_FD, nv_gpu.nv_ioctl_register_fd_t(ctl_fd=self.fd_ctl.fd))
return fd_dev
def _gpu_map_to_cpu(self, memory_handle, size, target=None, flags=0, system=False):
fd_dev = self._new_gpu_fd() if not system else HWInterface("/dev/nvidiactl", os.O_RDWR | os.O_CLOEXEC)
fd_dev = self._new_gpu_fd() if not system else FileIOInterface("/dev/nvidiactl", os.O_RDWR | os.O_CLOEXEC)
made = nv_gpu.nv_ioctl_nvos33_parameters_with_fd(fd=fd_dev.fd,
params=nv_gpu.NVOS33_PARAMETERS(hClient=self.root, hDevice=self.nvdevice, hMemory=memory_handle, length=size, flags=flags))
nv_iowr(self.fd_ctl, nv_gpu.NV_ESC_RM_MAP_MEMORY, made)
@@ -324,7 +324,7 @@ class NVDevice(HCQCompiled[NVSignal]):
va_addr = self._alloc_gpu_vaddr(size, alignment=page_size, force_low=cpu_access)
if host:
va_addr = HWInterface.anon_mmap(va_addr, size, mmap.PROT_READ | mmap.PROT_WRITE, MAP_FIXED | mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
va_addr = FileIOInterface.anon_mmap(va_addr, size, mmap.PROT_READ | mmap.PROT_WRITE, MAP_FIXED | mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
flags = (nv_gpu.NVOS02_FLAGS_PHYSICALITY_NONCONTIGUOUS << 4) | (nv_gpu.NVOS02_FLAGS_COHERENCY_CACHED << 12) \
| (nv_gpu.NVOS02_FLAGS_MAPPING_NO_MAP << 30)
@@ -363,7 +363,7 @@ class NVDevice(HCQCompiled[NVSignal]):
self._debug_mappings.pop((cast(int, mem.va_addr), mem.size))
uvm.free(self.fd_uvm, base=cast(int, mem.va_addr), length=mem.size)
if mem.meta.has_cpu_mapping: HWInterface.munmap(cast(int, mem.va_addr), mem.size)
if mem.meta.has_cpu_mapping: FileIOInterface.munmap(cast(int, mem.va_addr), mem.size)
def _gpu_uvm_map(self, va_base, size, mem_handle, create_range=True, has_cpu_mapping=False, tag="") -> HCQBuffer:
if create_range: uvm.create_external_range(self.fd_uvm, base=va_base, length=size)
@@ -391,9 +391,9 @@ class NVDevice(HCQCompiled[NVSignal]):
def __init__(self, device:str=""):
if NVDevice.root is None:
NVDevice.fd_ctl = HWInterface("/dev/nvidiactl", os.O_RDWR | os.O_CLOEXEC)
NVDevice.fd_uvm = HWInterface("/dev/nvidia-uvm", os.O_RDWR | os.O_CLOEXEC)
self.fd_uvm_2 = HWInterface("/dev/nvidia-uvm", os.O_RDWR | os.O_CLOEXEC)
NVDevice.fd_ctl = FileIOInterface("/dev/nvidiactl", os.O_RDWR | os.O_CLOEXEC)
NVDevice.fd_uvm = FileIOInterface("/dev/nvidia-uvm", os.O_RDWR | os.O_CLOEXEC)
self.fd_uvm_2 = FileIOInterface("/dev/nvidia-uvm", os.O_RDWR | os.O_CLOEXEC)
NVDevice.root = rm_alloc(self.fd_ctl, nv_gpu.NV01_ROOT_CLIENT, 0, 0, None).hObjectNew
uvm.initialize(self.fd_uvm)
with contextlib.suppress(RuntimeError): uvm.mm_initialize(self.fd_uvm_2, uvmFd=self.fd_uvm.fd) # this error is okay, CUDA hits it too

View File

@@ -5,7 +5,7 @@ from types import SimpleNamespace
from typing import Any, cast, ClassVar
from tinygrad.device import BufferSpec
from tinygrad.runtime.support.hcq import HCQBuffer, HWQueue, HCQProgram, HCQCompiled, HCQAllocatorBase, HCQSignal, HCQArgsState, BumpAllocator
from tinygrad.runtime.support.hcq import HWInterface
from tinygrad.runtime.support.hcq import FileIOInterface
from tinygrad.runtime.autogen import kgsl, adreno
from tinygrad.runtime.ops_gpu import CLCompiler, CLDevice
from tinygrad.renderer.cstyle import QCOMRenderer
@@ -325,7 +325,7 @@ class QCOMDevice(HCQCompiled):
dummy_addr: int = 0
def __init__(self, device:str=""):
self.fd = HWInterface('/dev/kgsl-3d0', os.O_RDWR)
self.fd = FileIOInterface('/dev/kgsl-3d0', os.O_RDWR)
QCOMDevice.dummy_addr = cast(int, self._gpu_alloc(0x1000).va_addr)
flags = kgsl.KGSL_CONTEXT_PREAMBLE | kgsl.KGSL_CONTEXT_PWR_CONSTRAINT | kgsl.KGSL_CONTEXT_NO_FAULT_TOLERANCE | kgsl.KGSL_CONTEXT_NO_GMEM_ALLOC \
@@ -364,7 +364,7 @@ class QCOMDevice(HCQCompiled):
def _gpu_free(self, mem:HCQBuffer):
kgsl.IOCTL_KGSL_GPUOBJ_FREE(self.fd, id=mem.meta.id)
HWInterface.munmap(mem.va_addr, mem.meta.mmapsize)
FileIOInterface.munmap(mem.va_addr, mem.meta.mmapsize)
def _ensure_stack_size(self, sz):
if not hasattr(self, '_stack'): self._stack = self._gpu_alloc(sz)

View File

@@ -13,7 +13,7 @@ class MMIOInterface:
def __getitem__(self, k) -> int|list[int]: return self.mv[k].tolist() if isinstance(k, slice) else self.mv[k]
def __setitem__(self, k, v:int|array.array): self.mv[k] = v
class HWInterface:
class FileIOInterface:
"""
Hardware Abstraction Layer for HCQ devices. The class provides a unified interface for interacting with hardware devices.
"""
@@ -42,9 +42,9 @@ class HWInterface:
@staticmethod
def readlink(path): return os.readlink(path)
@staticmethod
def eventfd(initval, flags=None): return HWInterface(fd=os.eventfd(initval, flags)) # type: ignore[attr-defined]
def eventfd(initval, flags=None): return FileIOInterface(fd=os.eventfd(initval, flags)) # type: ignore[attr-defined]
if MOCKGPU:=getenv("MOCKGPU"): from test.mockgpu.mockgpu import MockHWInterface as HWInterface # noqa: F401 # pylint: disable=unused-import
if MOCKGPU:=getenv("MOCKGPU"): from test.mockgpu.mockgpu import MockFileIOInterface as FileIOInterface # noqa: F401 # pylint: disable=unused-import
# **************** for HCQ Compatible Devices ****************