mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 23:18:04 -05:00
rename HWInterface -> FileIOInterface (#9989)
* rename HWInterface -> FileIOInterface * ugh
This commit is contained in:
@@ -80,8 +80,8 @@ generate_kfd() {
|
||||
fixup $BASE/kfd.py
|
||||
sed -i "s/import ctypes/import ctypes, os/g" $BASE/kfd.py
|
||||
sed -i "s/import fcntl, functools/import functools/g" $BASE/kfd.py
|
||||
sed -i "/import functools/a from tinygrad.runtime.support.hcq import HWInterface" $BASE/kfd.py
|
||||
sed -i "s/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd, \*\*kwargs):/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:HWInterface, \*\*kwargs):/g" $BASE/kfd.py
|
||||
sed -i "/import functools/a from tinygrad.runtime.support.hcq import FileIOInterface" $BASE/kfd.py
|
||||
sed -i "s/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd, \*\*kwargs):/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:FileIOInterface, \*\*kwargs):/g" $BASE/kfd.py
|
||||
sed -i "s/fcntl.ioctl(__fd, (__idir<<30)/__fd.ioctl((__idir<<30)/g" $BASE/kfd.py
|
||||
python3 -c "import tinygrad.runtime.autogen.kfd"
|
||||
}
|
||||
@@ -287,7 +287,7 @@ generate_vfio() {
|
||||
fixup $BASE/vfio.py
|
||||
sed -i "s\import ctypes\import ctypes, os\g" $BASE/vfio.py
|
||||
sed -i "s\import fcntl, functools\import functools" $BASE/vfio.py
|
||||
sed -i "s\import ctypes,os\a from tinygrad.runtime.support import HWInterface\g" $BASE/vfio.py
|
||||
sed -i "s\import ctypes,os\a from tinygrad.runtime.support import FileIOInterface\g" $BASE/vfio.py
|
||||
sed -i "s\fcntl.ioctl(__fd, (__idir<<30)\return __fd.ioctl((__idir<<30)\g" $BASE/vfio.py
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import ctypes, ctypes.util, time, os, builtins, fcntl
|
||||
from tinygrad.runtime.support.hcq import HWInterface
|
||||
from tinygrad.runtime.support.hcq import FileIOInterface
|
||||
from test.mockgpu.nv.nvdriver import NVDriver
|
||||
from test.mockgpu.amd.amddriver import AMDDriver
|
||||
start = time.perf_counter()
|
||||
@@ -53,7 +53,7 @@ def _open(path, flags):
|
||||
return virtfd.fd
|
||||
return os.open(path, flags, 0o777) if os.path.exists(path) else None
|
||||
|
||||
class MockHWInterface(HWInterface):
|
||||
class MockFileIOInterface(FileIOInterface):
|
||||
def __init__(self, path:str="", flags:int=os.O_RDONLY, fd:int|None=None):
|
||||
self.path = path
|
||||
self.fd = fd or _open(path, flags)
|
||||
|
||||
@@ -11,9 +11,9 @@ import ctypes, os
|
||||
|
||||
|
||||
import functools
|
||||
from tinygrad.runtime.support.hcq import HWInterface
|
||||
from tinygrad.runtime.support.hcq import FileIOInterface
|
||||
|
||||
def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:HWInterface, **kwargs):
|
||||
def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:FileIOInterface, **kwargs):
|
||||
ret = __fd.ioctl((__idir<<30) | (ctypes.sizeof(made := __user_struct(**kwargs))<<16) | (__base<<8) | __nr, made)
|
||||
if ret != 0: raise RuntimeError(f"ioctl returned {ret}")
|
||||
return made
|
||||
|
||||
@@ -9,13 +9,13 @@
|
||||
import ctypes
|
||||
|
||||
|
||||
from tinygrad.runtime.support.hcq import HWInterface
|
||||
from tinygrad.runtime.support.hcq import FileIOInterface
|
||||
import functools
|
||||
|
||||
def _do_ioctl_io(__idir, __base, __nr, __fd:HWInterface, val=0, __len=0):
|
||||
def _do_ioctl_io(__idir, __base, __nr, __fd:FileIOInterface, val=0, __len=0):
|
||||
return __fd.ioctl((__idir<<30) | (__len<<16) | (__base<<8) | __nr, val)
|
||||
|
||||
def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:HWInterface, __val=None, **kwargs):
|
||||
def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:FileIOInterface, __val=None, **kwargs):
|
||||
ret = __fd.ioctl((__idir<<30) | (ctypes.sizeof(made := (__made or __user_struct(**kwargs)))<<16) | (__base<<8) | __nr, made)
|
||||
if ret != 0: raise RuntimeError(f"ioctl returned {ret}")
|
||||
return made
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import Any, cast, ClassVar
|
||||
import os, ctypes, ctypes.util, struct, hashlib, functools, importlib, mmap, errno, array, contextlib, sys, select
|
||||
assert sys.platform != 'win32'
|
||||
from dataclasses import dataclass
|
||||
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQSignal, HCQProgram, HWInterface
|
||||
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQSignal, HCQProgram, FileIOInterface
|
||||
from tinygrad.ops import sint
|
||||
from tinygrad.device import Compiled, ProfileEvent, BufferSpec, CPUProgram, PROFILE
|
||||
from tinygrad.helpers import getenv, to_mv, round_up, data64_le, mv_address, all_same, flatten, DEBUG, OSX
|
||||
@@ -520,9 +520,9 @@ class AMDIP:
|
||||
return getattr(self.module, name)
|
||||
|
||||
class KFDIface:
|
||||
kfd:HWInterface|None = None
|
||||
kfd:FileIOInterface|None = None
|
||||
event_page:HCQBuffer|None = None
|
||||
gpus:list[HWInterface] = []
|
||||
gpus:list[FileIOInterface] = []
|
||||
|
||||
def _is_usable_gpu(self, gpu_id):
|
||||
with contextlib.suppress(OSError): return int(gpu_id.read()) != 0
|
||||
@@ -535,23 +535,23 @@ class KFDIface:
|
||||
|
||||
# Initialize KFD interface during first run
|
||||
if KFDIface.kfd is None:
|
||||
KFDIface.kfd = HWInterface("/dev/kfd", os.O_RDWR)
|
||||
gpus = [g for g in HWInterface(kfd_topo_path).listdir() if self._is_usable_gpu(HWInterface(f"{kfd_topo_path}/{g}/gpu_id"))]
|
||||
KFDIface.kfd = FileIOInterface("/dev/kfd", os.O_RDWR)
|
||||
gpus = [g for g in FileIOInterface(kfd_topo_path).listdir() if self._is_usable_gpu(FileIOInterface(f"{kfd_topo_path}/{g}/gpu_id"))]
|
||||
gpus = sorted(gpus, key=lambda x: int(x.split('/')[-1]))
|
||||
visible_devices = [int(x) for x in (getenv('VISIBLE_DEVICES', getenv('HIP_VISIBLE_DEVICES', ''))).split(',') if x.strip()]
|
||||
KFDIface.gpus = [gpus[x] for x in visible_devices] if visible_devices else gpus
|
||||
|
||||
if device_id >= len(KFDIface.gpus): raise RuntimeError(f"No device found for {device_id}. Requesting more devices than the system has?")
|
||||
|
||||
self.gpu_id = int(HWInterface(f"{kfd_topo_path}/{KFDIface.gpus[device_id]}/gpu_id").read())
|
||||
self.props = {l.split()[0]: int(l.split()[1]) for l in HWInterface(f"{kfd_topo_path}/{KFDIface.gpus[device_id]}/properties").read().splitlines()}
|
||||
self.gpu_id = int(FileIOInterface(f"{kfd_topo_path}/{KFDIface.gpus[device_id]}/gpu_id").read())
|
||||
self.props = {(p:=l.split())[0]: int(p[1]) for l in FileIOInterface(f"{kfd_topo_path}/{KFDIface.gpus[device_id]}/properties").read().splitlines()}
|
||||
ip_base = f"/sys/class/drm/renderD{self.props['drm_render_minor']}/device/ip_discovery/die/0"
|
||||
id2ip = {am.GC_HWID: am.GC_HWIP, am.SDMA0_HWID: am.SDMA0_HWIP, am.NBIF_HWID: am.NBIF_HWIP}
|
||||
self.ip_versions = {id2ip[int(hwid)]:tuple(int(HWInterface(f'{ip_base}/{hwid}/0/{part}').read()) for part in ['major', 'minor', 'revision'])
|
||||
for hwid in HWInterface(ip_base).listdir() if hwid.isnumeric() and int(hwid) in id2ip}
|
||||
self.ip_offsets = {id2ip[int(hwid)]:tuple(int(x, 16) for x in HWInterface(f'{ip_base}/{hwid}/0/base_addr').read().splitlines())
|
||||
for hwid in HWInterface(ip_base).listdir() if hwid.isnumeric() and int(hwid) in id2ip}
|
||||
self.drm_fd = HWInterface(f"/dev/dri/renderD{self.props['drm_render_minor']}", os.O_RDWR)
|
||||
self.ip_versions = {id2ip[int(hwid)]:tuple(int(FileIOInterface(f'{ip_base}/{hwid}/0/{part}').read()) for part in ['major', 'minor', 'revision'])
|
||||
for hwid in FileIOInterface(ip_base).listdir() if hwid.isnumeric() and int(hwid) in id2ip}
|
||||
self.ip_offsets = {id2ip[int(hwid)]:tuple(int(x, 16) for x in FileIOInterface(f'{ip_base}/{hwid}/0/base_addr').read().splitlines())
|
||||
for hwid in FileIOInterface(ip_base).listdir() if hwid.isnumeric() and int(hwid) in id2ip}
|
||||
self.drm_fd = FileIOInterface(f"/dev/dri/renderD{self.props['drm_render_minor']}", os.O_RDWR)
|
||||
|
||||
kfd.AMDKFD_IOC_ACQUIRE_VM(KFDIface.kfd, drm_fd=self.drm_fd.fd, gpu_id=self.gpu_id)
|
||||
|
||||
@@ -580,8 +580,8 @@ class KFDIface:
|
||||
if cpu_access or host: flags |= kfd.KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC
|
||||
|
||||
if flags & kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR:
|
||||
buf = addr = HWInterface.anon_mmap(0, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
|
||||
else: buf, addr = 0, HWInterface.anon_mmap(0, size, 0, mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS | MAP_NORESERVE, 0)
|
||||
buf = addr = FileIOInterface.anon_mmap(0, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
|
||||
else: buf, addr = 0, FileIOInterface.anon_mmap(0, size, 0, mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS | MAP_NORESERVE, 0)
|
||||
assert addr != 0xffffffffffffffff
|
||||
|
||||
try: mem = kfd.AMDKFD_IOC_ALLOC_MEMORY_OF_GPU(self.kfd, va_addr=addr, size=size, base=addr, length=size, gpu_id=self.gpu_id,
|
||||
@@ -604,7 +604,7 @@ class KFDIface:
|
||||
c_gpus = (ctypes.c_int32 * len(gpus))(*gpus)
|
||||
stm = kfd.AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU(self.kfd, handle=mem.meta.handle, device_ids_array_ptr=ctypes.addressof(c_gpus), n_devices=len(gpus))
|
||||
assert stm.n_success == len(gpus)
|
||||
if mem.va_addr: HWInterface.munmap(mem.va_addr, mem.size)
|
||||
if mem.va_addr: FileIOInterface.munmap(mem.va_addr, mem.size)
|
||||
kfd.AMDKFD_IOC_FREE_MEMORY_OF_GPU(self.kfd, handle=mem.meta.handle)
|
||||
|
||||
def map(self, mem):
|
||||
@@ -624,7 +624,7 @@ class KFDIface:
|
||||
|
||||
if not hasattr(self, 'doorbells'):
|
||||
self.doorbells_base = queue.doorbell_offset & (~0x1fff) # doorbell is two pages
|
||||
self.doorbells = cast(HWInterface, KFDIface.kfd).mmap(0, 0x2000, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED, self.doorbells_base)
|
||||
self.doorbells = cast(FileIOInterface, KFDIface.kfd).mmap(0, 0x2000, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED, self.doorbells_base)
|
||||
|
||||
return AMDQueueDesc(ring=to_mv(ring.va_addr, ring.size).cast("I"),
|
||||
read_ptrs=[to_mv(queue.read_pointer_address, 8).cast("Q")], write_ptrs=[to_mv(queue.write_pointer_address, 8).cast("Q")],
|
||||
@@ -650,17 +650,17 @@ class AMAllocationMeta: owner:AMDDevice; mapped_devs:list[AMDDevice]; mapping:AM
|
||||
|
||||
class PCIIface:
|
||||
supported_devs:list[int] = [0x744c, 0x7480, 0x7550]
|
||||
vfio:bool = getenv("VFIO", 1) and HWInterface.exists("/dev/vfio/vfio")
|
||||
vfio_fd:HWInterface
|
||||
vfio:bool = getenv("VFIO", 1) and FileIOInterface.exists("/dev/vfio/vfio")
|
||||
vfio_fd:FileIOInterface
|
||||
gpus:list[Any] = []
|
||||
|
||||
def __init__(self, dev, dev_id):
|
||||
self.dev = dev
|
||||
|
||||
if first_dev:=len(PCIIface.gpus) == 0:
|
||||
for pcibus in HWInterface("/sys/bus/pci/devices").listdir():
|
||||
vendor = int(HWInterface(f"/sys/bus/pci/devices/{pcibus}/vendor").read(), 16)
|
||||
device = int(HWInterface(f"/sys/bus/pci/devices/{pcibus}/device").read(), 16)
|
||||
for pcibus in FileIOInterface("/sys/bus/pci/devices").listdir():
|
||||
vendor = int(FileIOInterface(f"/sys/bus/pci/devices/{pcibus}/vendor").read(), 16)
|
||||
device = int(FileIOInterface(f"/sys/bus/pci/devices/{pcibus}/device").read(), 16)
|
||||
if vendor == 0x1002 and device in PCIIface.supported_devs: PCIIface.gpus.append(pcibus)
|
||||
PCIIface.gpus = sorted(PCIIface.gpus)
|
||||
|
||||
@@ -671,51 +671,51 @@ class PCIIface:
|
||||
self.pcibus = PCIIface.gpus[dev_id]
|
||||
|
||||
# Unbind the device from the kernel driver
|
||||
if HWInterface.exists(f"/sys/bus/pci/devices/{self.pcibus}/driver"):
|
||||
HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/driver/unbind", os.O_WRONLY).write(self.pcibus)
|
||||
if FileIOInterface.exists(f"/sys/bus/pci/devices/{self.pcibus}/driver"):
|
||||
FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/driver/unbind", os.O_WRONLY).write(self.pcibus)
|
||||
|
||||
supported_sizes = int(HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource0_resize", os.O_RDONLY).read(), 16)
|
||||
try: HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource0_resize", os.O_RDWR).write(str(supported_sizes.bit_length() - 1))
|
||||
supported_sizes = int(FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource0_resize", os.O_RDONLY).read(), 16)
|
||||
try: FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource0_resize", os.O_RDWR).write(str(supported_sizes.bit_length() - 1))
|
||||
except OSError as e: raise RuntimeError(f"Cannot resize BAR: {e}. Ensure the resizable BAR option is enabled on your system.") from e
|
||||
|
||||
# Try to init vfio. Use it if success.
|
||||
if PCIIface.vfio:
|
||||
try:
|
||||
if first_dev:
|
||||
HWInterface("/sys/module/vfio/parameters/enable_unsafe_noiommu_mode", os.O_RDWR).write("1")
|
||||
PCIIface.vfio_fd = HWInterface("/dev/vfio/vfio", os.O_RDWR)
|
||||
FileIOInterface("/sys/module/vfio/parameters/enable_unsafe_noiommu_mode", os.O_RDWR).write("1")
|
||||
PCIIface.vfio_fd = FileIOInterface("/dev/vfio/vfio", os.O_RDWR)
|
||||
vfio.VFIO_CHECK_EXTENSION(PCIIface.vfio_fd, vfio.VFIO_NOIOMMU_IOMMU)
|
||||
|
||||
HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/driver_override", os.O_WRONLY).write("vfio-pci")
|
||||
HWInterface("/sys/bus/pci/drivers_probe", os.O_WRONLY).write(self.pcibus)
|
||||
FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/driver_override", os.O_WRONLY).write("vfio-pci")
|
||||
FileIOInterface("/sys/bus/pci/drivers_probe", os.O_WRONLY).write(self.pcibus)
|
||||
|
||||
iommu_group = HWInterface.readlink(f"/sys/bus/pci/devices/{self.pcibus}/iommu_group").split('/')[-1]
|
||||
iommu_group = FileIOInterface.readlink(f"/sys/bus/pci/devices/{self.pcibus}/iommu_group").split('/')[-1]
|
||||
except OSError:
|
||||
if DEBUG >= 1: print(f"am {self.pcibus}: failed to init vfio-pci module (run `sudo modprobe vfio-pci`).")
|
||||
PCIIface.vfio = False
|
||||
|
||||
# Init vfio for the device
|
||||
if PCIIface.vfio:
|
||||
self.vfio_group = HWInterface(f"/dev/vfio/noiommu-{iommu_group}", os.O_RDWR)
|
||||
self.vfio_group = FileIOInterface(f"/dev/vfio/noiommu-{iommu_group}", os.O_RDWR)
|
||||
vfio.VFIO_GROUP_SET_CONTAINER(self.vfio_group, ctypes.c_int(PCIIface.vfio_fd.fd))
|
||||
|
||||
if first_dev: vfio.VFIO_SET_IOMMU(PCIIface.vfio_fd, vfio.VFIO_NOIOMMU_IOMMU)
|
||||
self.vfio_dev = HWInterface(fd=vfio.VFIO_GROUP_GET_DEVICE_FD(self.vfio_group, ctypes.create_string_buffer(self.pcibus.encode())))
|
||||
self.vfio_dev = FileIOInterface(fd=vfio.VFIO_GROUP_GET_DEVICE_FD(self.vfio_group, ctypes.create_string_buffer(self.pcibus.encode())))
|
||||
|
||||
self.irq_fd = HWInterface.eventfd(0, 0)
|
||||
self.irq_fd = FileIOInterface.eventfd(0, 0)
|
||||
self.irq_poller = select.poll()
|
||||
self.irq_poller.register(self.irq_fd.fd, select.POLLIN)
|
||||
|
||||
irqs = vfio.struct_vfio_irq_set(index=vfio.VFIO_PCI_MSI_IRQ_INDEX, flags=vfio.VFIO_IRQ_SET_DATA_EVENTFD|vfio.VFIO_IRQ_SET_ACTION_TRIGGER,
|
||||
argsz=ctypes.sizeof(vfio.struct_vfio_irq_set), count=1, data=(ctypes.c_int * 1)(self.irq_fd.fd))
|
||||
vfio.VFIO_DEVICE_SET_IRQS(self.vfio_dev, irqs)
|
||||
else: HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/enable", os.O_RDWR).write("1")
|
||||
else: FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/enable", os.O_RDWR).write("1")
|
||||
|
||||
self.pagemap = HWInterface("/proc/self/pagemap", os.O_RDONLY)
|
||||
self.cfg_fd = HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/config", os.O_RDWR | os.O_SYNC | os.O_CLOEXEC)
|
||||
self.bar_fds = {bar: HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource{bar}", os.O_RDWR | os.O_SYNC | os.O_CLOEXEC) for bar in [0, 2, 5]}
|
||||
self.pagemap = FileIOInterface("/proc/self/pagemap", os.O_RDONLY)
|
||||
self.cfg_fd = FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/config", os.O_RDWR | os.O_SYNC | os.O_CLOEXEC)
|
||||
self.bar_fds = {b: FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource{b}", os.O_RDWR | os.O_SYNC | os.O_CLOEXEC) for b in [0, 2, 5]}
|
||||
|
||||
bar_info = HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource", os.O_RDONLY).read().splitlines()
|
||||
bar_info = FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource", os.O_RDONLY).read().splitlines()
|
||||
self.bar_info = {j:(int(start,16), int(end,16), int(flgs,16)) for j,(start,end,flgs) in enumerate(l.split() for l in bar_info)}
|
||||
|
||||
self.adev = AMDev(self.pcibus, self._map_pci_range(0), dbell:=self._map_pci_range(2).cast('Q'), self._map_pci_range(5).cast('I'))
|
||||
@@ -741,7 +741,7 @@ class PCIIface:
|
||||
def alloc(self, size:int, host=False, uncached=False, cpu_access=False):
|
||||
if host or (not getenv("AMD_ALLOC_QUEUE_DEV_MEM", 1) and uncached and cpu_access): # host or gtt-like memory.
|
||||
vaddr = self.adev.mm.alloc_vaddr(size:=round_up(size, mmap.PAGESIZE), align=mmap.PAGESIZE)
|
||||
va = HWInterface.anon_mmap(vaddr, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS | MAP_LOCKED | MAP_FIXED, 0)
|
||||
va = FileIOInterface.anon_mmap(vaddr, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS | MAP_LOCKED | MAP_FIXED, 0)
|
||||
|
||||
# Read pagemap to get the physical address of each page. The pages are locked.
|
||||
self.pagemap.seek(va // mmap.PAGESIZE * 8)
|
||||
@@ -791,7 +791,7 @@ class AMDDevice(HCQCompiled):
|
||||
signal_pages: ClassVar[list[Any]] = []
|
||||
signal_pool: ClassVar[list[int]] = []
|
||||
|
||||
driverless:bool = not HWInterface.exists('/sys/module/amdgpu') or bool(getenv("AMD_DRIVERLESS", 0))
|
||||
driverless:bool = not FileIOInterface.exists('/sys/module/amdgpu') or bool(getenv("AMD_DRIVERLESS", 0))
|
||||
|
||||
def __init__(self, device:str=""):
|
||||
self.device_id = int(device.split(":")[1]) if ":" in device else 0
|
||||
@@ -852,7 +852,7 @@ class AMDDevice(HCQCompiled):
|
||||
self.sqtt_enabled = PROFILE and bool(getenv("SQTT", 0))
|
||||
if self.sqtt_enabled:
|
||||
if self.arch != 'gfx1100': raise RuntimeError('SQ Thread Tracing is only supported on 7900XTX')
|
||||
if not self.driverless and (ppfeaturemask:=int(HWInterface('/sys/module/amdgpu/parameters/ppfeaturemask', os.O_RDONLY).read(), 16)) & 0x8000:
|
||||
if not self.driverless and (ppfeaturemask:=int(FileIOInterface('/sys/module/amdgpu/parameters/ppfeaturemask', os.O_RDONLY).read(), 16))&0x8000:
|
||||
raise RuntimeError("SQTT can't be enabled because of hardware bug, to workaround either use driverless or add "
|
||||
f"ppfeaturemask={(ppfeaturemask&~0x8000):#x} (current {ppfeaturemask=:#x} & ~PP_GFXOFF_MASK) to amdgpu module parameters\n"
|
||||
"For more information read https://github.com/tinygrad/tinygrad/blob/master/extra/sqtt/README.md")
|
||||
|
||||
@@ -4,7 +4,7 @@ assert sys.platform != 'win32'
|
||||
from typing import Any, cast, Union, Type, ClassVar
|
||||
from dataclasses import dataclass
|
||||
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQProgram, HCQSignal, BumpAllocator
|
||||
from tinygrad.runtime.support.hcq import MMIOInterface, HWInterface, MOCKGPU
|
||||
from tinygrad.runtime.support.hcq import MMIOInterface, FileIOInterface, MOCKGPU
|
||||
from tinygrad.ops import sint
|
||||
from tinygrad.device import BufferSpec, CPUProgram
|
||||
from tinygrad.helpers import getenv, mv_address, init_c_struct_t, round_up, data64, data64_le, DEBUG, prod, OSX
|
||||
@@ -20,7 +20,7 @@ def get_error_str(status): return f"{status}: {nv_gpu.nv_status_codes.get(status
|
||||
NV_PFAULT_FAULT_TYPE = {dt:name for name,dt in nv_gpu.__dict__.items() if name.startswith("NV_PFAULT_FAULT_TYPE_")}
|
||||
NV_PFAULT_ACCESS_TYPE = {dt:name.split("_")[-1] for name,dt in nv_gpu.__dict__.items() if name.startswith("NV_PFAULT_ACCESS_TYPE_")}
|
||||
|
||||
def nv_iowr(fd:HWInterface, nr, args):
|
||||
def nv_iowr(fd:FileIOInterface, nr, args):
|
||||
ret = fd.ioctl((3 << 30) | (ctypes.sizeof(args) & 0x1FFF) << 16 | (ord('F') & 0xFF) << 8 | (nr & 0xFF), args)
|
||||
if ret != 0: raise RuntimeError(f"ioctl returned {ret}")
|
||||
|
||||
@@ -46,7 +46,7 @@ def make_rmctrl_type():
|
||||
getattr(nv_gpu, name+"_PARAMS", getattr(nv_gpu, name.replace("_CTRL_CMD_", "_CTRL_DEBUG_")+"_PARAMETERS", None))))})
|
||||
rmctrl = make_rmctrl_type()
|
||||
|
||||
def uvm_ioctl(cmd, sttyp, fd:HWInterface, **kwargs):
|
||||
def uvm_ioctl(cmd, sttyp, fd:FileIOInterface, **kwargs):
|
||||
ret = fd.ioctl(cmd, made:=sttyp(**kwargs))
|
||||
if ret != 0: raise RuntimeError(f"ioctl(uvm) returned {ret}")
|
||||
if made.rmStatus != 0: raise RuntimeError(f"uvm_ioctl returned {get_error_str(made.rmStatus)}")
|
||||
@@ -293,8 +293,8 @@ class NVDevice(HCQCompiled[NVSignal]):
|
||||
signal_pool: ClassVar[list[int]] = []
|
||||
|
||||
root = None
|
||||
fd_ctl: HWInterface
|
||||
fd_uvm: HWInterface
|
||||
fd_ctl: FileIOInterface
|
||||
fd_uvm: FileIOInterface
|
||||
gpus_info: Union[list, ctypes.Array] = []
|
||||
|
||||
# TODO: Need a proper allocator for va addresses
|
||||
@@ -305,12 +305,12 @@ class NVDevice(HCQCompiled[NVSignal]):
|
||||
host_object_enumerator: int = 0x1000
|
||||
|
||||
def _new_gpu_fd(self):
|
||||
fd_dev = HWInterface(f"/dev/nvidia{NVDevice.gpus_info[self.device_id].minor_number}", os.O_RDWR | os.O_CLOEXEC)
|
||||
fd_dev = FileIOInterface(f"/dev/nvidia{NVDevice.gpus_info[self.device_id].minor_number}", os.O_RDWR | os.O_CLOEXEC)
|
||||
nv_iowr(fd_dev, nv_gpu.NV_ESC_REGISTER_FD, nv_gpu.nv_ioctl_register_fd_t(ctl_fd=self.fd_ctl.fd))
|
||||
return fd_dev
|
||||
|
||||
def _gpu_map_to_cpu(self, memory_handle, size, target=None, flags=0, system=False):
|
||||
fd_dev = self._new_gpu_fd() if not system else HWInterface("/dev/nvidiactl", os.O_RDWR | os.O_CLOEXEC)
|
||||
fd_dev = self._new_gpu_fd() if not system else FileIOInterface("/dev/nvidiactl", os.O_RDWR | os.O_CLOEXEC)
|
||||
made = nv_gpu.nv_ioctl_nvos33_parameters_with_fd(fd=fd_dev.fd,
|
||||
params=nv_gpu.NVOS33_PARAMETERS(hClient=self.root, hDevice=self.nvdevice, hMemory=memory_handle, length=size, flags=flags))
|
||||
nv_iowr(self.fd_ctl, nv_gpu.NV_ESC_RM_MAP_MEMORY, made)
|
||||
@@ -324,7 +324,7 @@ class NVDevice(HCQCompiled[NVSignal]):
|
||||
va_addr = self._alloc_gpu_vaddr(size, alignment=page_size, force_low=cpu_access)
|
||||
|
||||
if host:
|
||||
va_addr = HWInterface.anon_mmap(va_addr, size, mmap.PROT_READ | mmap.PROT_WRITE, MAP_FIXED | mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
|
||||
va_addr = FileIOInterface.anon_mmap(va_addr, size, mmap.PROT_READ | mmap.PROT_WRITE, MAP_FIXED | mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
|
||||
|
||||
flags = (nv_gpu.NVOS02_FLAGS_PHYSICALITY_NONCONTIGUOUS << 4) | (nv_gpu.NVOS02_FLAGS_COHERENCY_CACHED << 12) \
|
||||
| (nv_gpu.NVOS02_FLAGS_MAPPING_NO_MAP << 30)
|
||||
@@ -363,7 +363,7 @@ class NVDevice(HCQCompiled[NVSignal]):
|
||||
|
||||
self._debug_mappings.pop((cast(int, mem.va_addr), mem.size))
|
||||
uvm.free(self.fd_uvm, base=cast(int, mem.va_addr), length=mem.size)
|
||||
if mem.meta.has_cpu_mapping: HWInterface.munmap(cast(int, mem.va_addr), mem.size)
|
||||
if mem.meta.has_cpu_mapping: FileIOInterface.munmap(cast(int, mem.va_addr), mem.size)
|
||||
|
||||
def _gpu_uvm_map(self, va_base, size, mem_handle, create_range=True, has_cpu_mapping=False, tag="") -> HCQBuffer:
|
||||
if create_range: uvm.create_external_range(self.fd_uvm, base=va_base, length=size)
|
||||
@@ -391,9 +391,9 @@ class NVDevice(HCQCompiled[NVSignal]):
|
||||
|
||||
def __init__(self, device:str=""):
|
||||
if NVDevice.root is None:
|
||||
NVDevice.fd_ctl = HWInterface("/dev/nvidiactl", os.O_RDWR | os.O_CLOEXEC)
|
||||
NVDevice.fd_uvm = HWInterface("/dev/nvidia-uvm", os.O_RDWR | os.O_CLOEXEC)
|
||||
self.fd_uvm_2 = HWInterface("/dev/nvidia-uvm", os.O_RDWR | os.O_CLOEXEC)
|
||||
NVDevice.fd_ctl = FileIOInterface("/dev/nvidiactl", os.O_RDWR | os.O_CLOEXEC)
|
||||
NVDevice.fd_uvm = FileIOInterface("/dev/nvidia-uvm", os.O_RDWR | os.O_CLOEXEC)
|
||||
self.fd_uvm_2 = FileIOInterface("/dev/nvidia-uvm", os.O_RDWR | os.O_CLOEXEC)
|
||||
NVDevice.root = rm_alloc(self.fd_ctl, nv_gpu.NV01_ROOT_CLIENT, 0, 0, None).hObjectNew
|
||||
uvm.initialize(self.fd_uvm)
|
||||
with contextlib.suppress(RuntimeError): uvm.mm_initialize(self.fd_uvm_2, uvmFd=self.fd_uvm.fd) # this error is okay, CUDA hits it too
|
||||
|
||||
@@ -5,7 +5,7 @@ from types import SimpleNamespace
|
||||
from typing import Any, cast, ClassVar
|
||||
from tinygrad.device import BufferSpec
|
||||
from tinygrad.runtime.support.hcq import HCQBuffer, HWQueue, HCQProgram, HCQCompiled, HCQAllocatorBase, HCQSignal, HCQArgsState, BumpAllocator
|
||||
from tinygrad.runtime.support.hcq import HWInterface
|
||||
from tinygrad.runtime.support.hcq import FileIOInterface
|
||||
from tinygrad.runtime.autogen import kgsl, adreno
|
||||
from tinygrad.runtime.ops_gpu import CLCompiler, CLDevice
|
||||
from tinygrad.renderer.cstyle import QCOMRenderer
|
||||
@@ -325,7 +325,7 @@ class QCOMDevice(HCQCompiled):
|
||||
dummy_addr: int = 0
|
||||
|
||||
def __init__(self, device:str=""):
|
||||
self.fd = HWInterface('/dev/kgsl-3d0', os.O_RDWR)
|
||||
self.fd = FileIOInterface('/dev/kgsl-3d0', os.O_RDWR)
|
||||
QCOMDevice.dummy_addr = cast(int, self._gpu_alloc(0x1000).va_addr)
|
||||
|
||||
flags = kgsl.KGSL_CONTEXT_PREAMBLE | kgsl.KGSL_CONTEXT_PWR_CONSTRAINT | kgsl.KGSL_CONTEXT_NO_FAULT_TOLERANCE | kgsl.KGSL_CONTEXT_NO_GMEM_ALLOC \
|
||||
@@ -364,7 +364,7 @@ class QCOMDevice(HCQCompiled):
|
||||
|
||||
def _gpu_free(self, mem:HCQBuffer):
|
||||
kgsl.IOCTL_KGSL_GPUOBJ_FREE(self.fd, id=mem.meta.id)
|
||||
HWInterface.munmap(mem.va_addr, mem.meta.mmapsize)
|
||||
FileIOInterface.munmap(mem.va_addr, mem.meta.mmapsize)
|
||||
|
||||
def _ensure_stack_size(self, sz):
|
||||
if not hasattr(self, '_stack'): self._stack = self._gpu_alloc(sz)
|
||||
|
||||
@@ -13,7 +13,7 @@ class MMIOInterface:
|
||||
def __getitem__(self, k) -> int|list[int]: return self.mv[k].tolist() if isinstance(k, slice) else self.mv[k]
|
||||
def __setitem__(self, k, v:int|array.array): self.mv[k] = v
|
||||
|
||||
class HWInterface:
|
||||
class FileIOInterface:
|
||||
"""
|
||||
Hardware Abstraction Layer for HCQ devices. The class provides a unified interface for interacting with hardware devices.
|
||||
"""
|
||||
@@ -42,9 +42,9 @@ class HWInterface:
|
||||
@staticmethod
|
||||
def readlink(path): return os.readlink(path)
|
||||
@staticmethod
|
||||
def eventfd(initval, flags=None): return HWInterface(fd=os.eventfd(initval, flags)) # type: ignore[attr-defined]
|
||||
def eventfd(initval, flags=None): return FileIOInterface(fd=os.eventfd(initval, flags)) # type: ignore[attr-defined]
|
||||
|
||||
if MOCKGPU:=getenv("MOCKGPU"): from test.mockgpu.mockgpu import MockHWInterface as HWInterface # noqa: F401 # pylint: disable=unused-import
|
||||
if MOCKGPU:=getenv("MOCKGPU"): from test.mockgpu.mockgpu import MockFileIOInterface as FileIOInterface # noqa: F401 # pylint: disable=unused-import
|
||||
|
||||
# **************** for HCQ Compatible Devices ****************
|
||||
|
||||
|
||||
Reference in New Issue
Block a user