mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
rename HWInterface -> FileIOInterface (#9989)
* rename HWInterface -> FileIOInterface * ugh
This commit is contained in:
@@ -80,8 +80,8 @@ generate_kfd() {
|
|||||||
fixup $BASE/kfd.py
|
fixup $BASE/kfd.py
|
||||||
sed -i "s/import ctypes/import ctypes, os/g" $BASE/kfd.py
|
sed -i "s/import ctypes/import ctypes, os/g" $BASE/kfd.py
|
||||||
sed -i "s/import fcntl, functools/import functools/g" $BASE/kfd.py
|
sed -i "s/import fcntl, functools/import functools/g" $BASE/kfd.py
|
||||||
sed -i "/import functools/a from tinygrad.runtime.support.hcq import HWInterface" $BASE/kfd.py
|
sed -i "/import functools/a from tinygrad.runtime.support.hcq import FileIOInterface" $BASE/kfd.py
|
||||||
sed -i "s/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd, \*\*kwargs):/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:HWInterface, \*\*kwargs):/g" $BASE/kfd.py
|
sed -i "s/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd, \*\*kwargs):/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:FileIOInterface, \*\*kwargs):/g" $BASE/kfd.py
|
||||||
sed -i "s/fcntl.ioctl(__fd, (__idir<<30)/__fd.ioctl((__idir<<30)/g" $BASE/kfd.py
|
sed -i "s/fcntl.ioctl(__fd, (__idir<<30)/__fd.ioctl((__idir<<30)/g" $BASE/kfd.py
|
||||||
python3 -c "import tinygrad.runtime.autogen.kfd"
|
python3 -c "import tinygrad.runtime.autogen.kfd"
|
||||||
}
|
}
|
||||||
@@ -287,7 +287,7 @@ generate_vfio() {
|
|||||||
fixup $BASE/vfio.py
|
fixup $BASE/vfio.py
|
||||||
sed -i "s\import ctypes\import ctypes, os\g" $BASE/vfio.py
|
sed -i "s\import ctypes\import ctypes, os\g" $BASE/vfio.py
|
||||||
sed -i "s\import fcntl, functools\import functools" $BASE/vfio.py
|
sed -i "s\import fcntl, functools\import functools" $BASE/vfio.py
|
||||||
sed -i "s\import ctypes,os\a from tinygrad.runtime.support import HWInterface\g" $BASE/vfio.py
|
sed -i "s\import ctypes,os\a from tinygrad.runtime.support import FileIOInterface\g" $BASE/vfio.py
|
||||||
sed -i "s\fcntl.ioctl(__fd, (__idir<<30)\return __fd.ioctl((__idir<<30)\g" $BASE/vfio.py
|
sed -i "s\fcntl.ioctl(__fd, (__idir<<30)\return __fd.ioctl((__idir<<30)\g" $BASE/vfio.py
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import ctypes, ctypes.util, time, os, builtins, fcntl
|
import ctypes, ctypes.util, time, os, builtins, fcntl
|
||||||
from tinygrad.runtime.support.hcq import HWInterface
|
from tinygrad.runtime.support.hcq import FileIOInterface
|
||||||
from test.mockgpu.nv.nvdriver import NVDriver
|
from test.mockgpu.nv.nvdriver import NVDriver
|
||||||
from test.mockgpu.amd.amddriver import AMDDriver
|
from test.mockgpu.amd.amddriver import AMDDriver
|
||||||
start = time.perf_counter()
|
start = time.perf_counter()
|
||||||
@@ -53,7 +53,7 @@ def _open(path, flags):
|
|||||||
return virtfd.fd
|
return virtfd.fd
|
||||||
return os.open(path, flags, 0o777) if os.path.exists(path) else None
|
return os.open(path, flags, 0o777) if os.path.exists(path) else None
|
||||||
|
|
||||||
class MockHWInterface(HWInterface):
|
class MockFileIOInterface(FileIOInterface):
|
||||||
def __init__(self, path:str="", flags:int=os.O_RDONLY, fd:int|None=None):
|
def __init__(self, path:str="", flags:int=os.O_RDONLY, fd:int|None=None):
|
||||||
self.path = path
|
self.path = path
|
||||||
self.fd = fd or _open(path, flags)
|
self.fd = fd or _open(path, flags)
|
||||||
|
|||||||
@@ -11,9 +11,9 @@ import ctypes, os
|
|||||||
|
|
||||||
|
|
||||||
import functools
|
import functools
|
||||||
from tinygrad.runtime.support.hcq import HWInterface
|
from tinygrad.runtime.support.hcq import FileIOInterface
|
||||||
|
|
||||||
def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:HWInterface, **kwargs):
|
def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:FileIOInterface, **kwargs):
|
||||||
ret = __fd.ioctl((__idir<<30) | (ctypes.sizeof(made := __user_struct(**kwargs))<<16) | (__base<<8) | __nr, made)
|
ret = __fd.ioctl((__idir<<30) | (ctypes.sizeof(made := __user_struct(**kwargs))<<16) | (__base<<8) | __nr, made)
|
||||||
if ret != 0: raise RuntimeError(f"ioctl returned {ret}")
|
if ret != 0: raise RuntimeError(f"ioctl returned {ret}")
|
||||||
return made
|
return made
|
||||||
|
|||||||
@@ -9,13 +9,13 @@
|
|||||||
import ctypes
|
import ctypes
|
||||||
|
|
||||||
|
|
||||||
from tinygrad.runtime.support.hcq import HWInterface
|
from tinygrad.runtime.support.hcq import FileIOInterface
|
||||||
import functools
|
import functools
|
||||||
|
|
||||||
def _do_ioctl_io(__idir, __base, __nr, __fd:HWInterface, val=0, __len=0):
|
def _do_ioctl_io(__idir, __base, __nr, __fd:FileIOInterface, val=0, __len=0):
|
||||||
return __fd.ioctl((__idir<<30) | (__len<<16) | (__base<<8) | __nr, val)
|
return __fd.ioctl((__idir<<30) | (__len<<16) | (__base<<8) | __nr, val)
|
||||||
|
|
||||||
def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:HWInterface, __val=None, **kwargs):
|
def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:FileIOInterface, __val=None, **kwargs):
|
||||||
ret = __fd.ioctl((__idir<<30) | (ctypes.sizeof(made := (__made or __user_struct(**kwargs)))<<16) | (__base<<8) | __nr, made)
|
ret = __fd.ioctl((__idir<<30) | (ctypes.sizeof(made := (__made or __user_struct(**kwargs)))<<16) | (__base<<8) | __nr, made)
|
||||||
if ret != 0: raise RuntimeError(f"ioctl returned {ret}")
|
if ret != 0: raise RuntimeError(f"ioctl returned {ret}")
|
||||||
return made
|
return made
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ from typing import Any, cast, ClassVar
|
|||||||
import os, ctypes, ctypes.util, struct, hashlib, functools, importlib, mmap, errno, array, contextlib, sys, select
|
import os, ctypes, ctypes.util, struct, hashlib, functools, importlib, mmap, errno, array, contextlib, sys, select
|
||||||
assert sys.platform != 'win32'
|
assert sys.platform != 'win32'
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQSignal, HCQProgram, HWInterface
|
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQSignal, HCQProgram, FileIOInterface
|
||||||
from tinygrad.ops import sint
|
from tinygrad.ops import sint
|
||||||
from tinygrad.device import Compiled, ProfileEvent, BufferSpec, CPUProgram, PROFILE
|
from tinygrad.device import Compiled, ProfileEvent, BufferSpec, CPUProgram, PROFILE
|
||||||
from tinygrad.helpers import getenv, to_mv, round_up, data64_le, mv_address, all_same, flatten, DEBUG, OSX
|
from tinygrad.helpers import getenv, to_mv, round_up, data64_le, mv_address, all_same, flatten, DEBUG, OSX
|
||||||
@@ -520,9 +520,9 @@ class AMDIP:
|
|||||||
return getattr(self.module, name)
|
return getattr(self.module, name)
|
||||||
|
|
||||||
class KFDIface:
|
class KFDIface:
|
||||||
kfd:HWInterface|None = None
|
kfd:FileIOInterface|None = None
|
||||||
event_page:HCQBuffer|None = None
|
event_page:HCQBuffer|None = None
|
||||||
gpus:list[HWInterface] = []
|
gpus:list[FileIOInterface] = []
|
||||||
|
|
||||||
def _is_usable_gpu(self, gpu_id):
|
def _is_usable_gpu(self, gpu_id):
|
||||||
with contextlib.suppress(OSError): return int(gpu_id.read()) != 0
|
with contextlib.suppress(OSError): return int(gpu_id.read()) != 0
|
||||||
@@ -535,23 +535,23 @@ class KFDIface:
|
|||||||
|
|
||||||
# Initialize KFD interface during first run
|
# Initialize KFD interface during first run
|
||||||
if KFDIface.kfd is None:
|
if KFDIface.kfd is None:
|
||||||
KFDIface.kfd = HWInterface("/dev/kfd", os.O_RDWR)
|
KFDIface.kfd = FileIOInterface("/dev/kfd", os.O_RDWR)
|
||||||
gpus = [g for g in HWInterface(kfd_topo_path).listdir() if self._is_usable_gpu(HWInterface(f"{kfd_topo_path}/{g}/gpu_id"))]
|
gpus = [g for g in FileIOInterface(kfd_topo_path).listdir() if self._is_usable_gpu(FileIOInterface(f"{kfd_topo_path}/{g}/gpu_id"))]
|
||||||
gpus = sorted(gpus, key=lambda x: int(x.split('/')[-1]))
|
gpus = sorted(gpus, key=lambda x: int(x.split('/')[-1]))
|
||||||
visible_devices = [int(x) for x in (getenv('VISIBLE_DEVICES', getenv('HIP_VISIBLE_DEVICES', ''))).split(',') if x.strip()]
|
visible_devices = [int(x) for x in (getenv('VISIBLE_DEVICES', getenv('HIP_VISIBLE_DEVICES', ''))).split(',') if x.strip()]
|
||||||
KFDIface.gpus = [gpus[x] for x in visible_devices] if visible_devices else gpus
|
KFDIface.gpus = [gpus[x] for x in visible_devices] if visible_devices else gpus
|
||||||
|
|
||||||
if device_id >= len(KFDIface.gpus): raise RuntimeError(f"No device found for {device_id}. Requesting more devices than the system has?")
|
if device_id >= len(KFDIface.gpus): raise RuntimeError(f"No device found for {device_id}. Requesting more devices than the system has?")
|
||||||
|
|
||||||
self.gpu_id = int(HWInterface(f"{kfd_topo_path}/{KFDIface.gpus[device_id]}/gpu_id").read())
|
self.gpu_id = int(FileIOInterface(f"{kfd_topo_path}/{KFDIface.gpus[device_id]}/gpu_id").read())
|
||||||
self.props = {l.split()[0]: int(l.split()[1]) for l in HWInterface(f"{kfd_topo_path}/{KFDIface.gpus[device_id]}/properties").read().splitlines()}
|
self.props = {(p:=l.split())[0]: int(p[1]) for l in FileIOInterface(f"{kfd_topo_path}/{KFDIface.gpus[device_id]}/properties").read().splitlines()}
|
||||||
ip_base = f"/sys/class/drm/renderD{self.props['drm_render_minor']}/device/ip_discovery/die/0"
|
ip_base = f"/sys/class/drm/renderD{self.props['drm_render_minor']}/device/ip_discovery/die/0"
|
||||||
id2ip = {am.GC_HWID: am.GC_HWIP, am.SDMA0_HWID: am.SDMA0_HWIP, am.NBIF_HWID: am.NBIF_HWIP}
|
id2ip = {am.GC_HWID: am.GC_HWIP, am.SDMA0_HWID: am.SDMA0_HWIP, am.NBIF_HWID: am.NBIF_HWIP}
|
||||||
self.ip_versions = {id2ip[int(hwid)]:tuple(int(HWInterface(f'{ip_base}/{hwid}/0/{part}').read()) for part in ['major', 'minor', 'revision'])
|
self.ip_versions = {id2ip[int(hwid)]:tuple(int(FileIOInterface(f'{ip_base}/{hwid}/0/{part}').read()) for part in ['major', 'minor', 'revision'])
|
||||||
for hwid in HWInterface(ip_base).listdir() if hwid.isnumeric() and int(hwid) in id2ip}
|
for hwid in FileIOInterface(ip_base).listdir() if hwid.isnumeric() and int(hwid) in id2ip}
|
||||||
self.ip_offsets = {id2ip[int(hwid)]:tuple(int(x, 16) for x in HWInterface(f'{ip_base}/{hwid}/0/base_addr').read().splitlines())
|
self.ip_offsets = {id2ip[int(hwid)]:tuple(int(x, 16) for x in FileIOInterface(f'{ip_base}/{hwid}/0/base_addr').read().splitlines())
|
||||||
for hwid in HWInterface(ip_base).listdir() if hwid.isnumeric() and int(hwid) in id2ip}
|
for hwid in FileIOInterface(ip_base).listdir() if hwid.isnumeric() and int(hwid) in id2ip}
|
||||||
self.drm_fd = HWInterface(f"/dev/dri/renderD{self.props['drm_render_minor']}", os.O_RDWR)
|
self.drm_fd = FileIOInterface(f"/dev/dri/renderD{self.props['drm_render_minor']}", os.O_RDWR)
|
||||||
|
|
||||||
kfd.AMDKFD_IOC_ACQUIRE_VM(KFDIface.kfd, drm_fd=self.drm_fd.fd, gpu_id=self.gpu_id)
|
kfd.AMDKFD_IOC_ACQUIRE_VM(KFDIface.kfd, drm_fd=self.drm_fd.fd, gpu_id=self.gpu_id)
|
||||||
|
|
||||||
@@ -580,8 +580,8 @@ class KFDIface:
|
|||||||
if cpu_access or host: flags |= kfd.KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC
|
if cpu_access or host: flags |= kfd.KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC
|
||||||
|
|
||||||
if flags & kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR:
|
if flags & kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR:
|
||||||
buf = addr = HWInterface.anon_mmap(0, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
|
buf = addr = FileIOInterface.anon_mmap(0, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
|
||||||
else: buf, addr = 0, HWInterface.anon_mmap(0, size, 0, mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS | MAP_NORESERVE, 0)
|
else: buf, addr = 0, FileIOInterface.anon_mmap(0, size, 0, mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS | MAP_NORESERVE, 0)
|
||||||
assert addr != 0xffffffffffffffff
|
assert addr != 0xffffffffffffffff
|
||||||
|
|
||||||
try: mem = kfd.AMDKFD_IOC_ALLOC_MEMORY_OF_GPU(self.kfd, va_addr=addr, size=size, base=addr, length=size, gpu_id=self.gpu_id,
|
try: mem = kfd.AMDKFD_IOC_ALLOC_MEMORY_OF_GPU(self.kfd, va_addr=addr, size=size, base=addr, length=size, gpu_id=self.gpu_id,
|
||||||
@@ -604,7 +604,7 @@ class KFDIface:
|
|||||||
c_gpus = (ctypes.c_int32 * len(gpus))(*gpus)
|
c_gpus = (ctypes.c_int32 * len(gpus))(*gpus)
|
||||||
stm = kfd.AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU(self.kfd, handle=mem.meta.handle, device_ids_array_ptr=ctypes.addressof(c_gpus), n_devices=len(gpus))
|
stm = kfd.AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU(self.kfd, handle=mem.meta.handle, device_ids_array_ptr=ctypes.addressof(c_gpus), n_devices=len(gpus))
|
||||||
assert stm.n_success == len(gpus)
|
assert stm.n_success == len(gpus)
|
||||||
if mem.va_addr: HWInterface.munmap(mem.va_addr, mem.size)
|
if mem.va_addr: FileIOInterface.munmap(mem.va_addr, mem.size)
|
||||||
kfd.AMDKFD_IOC_FREE_MEMORY_OF_GPU(self.kfd, handle=mem.meta.handle)
|
kfd.AMDKFD_IOC_FREE_MEMORY_OF_GPU(self.kfd, handle=mem.meta.handle)
|
||||||
|
|
||||||
def map(self, mem):
|
def map(self, mem):
|
||||||
@@ -624,7 +624,7 @@ class KFDIface:
|
|||||||
|
|
||||||
if not hasattr(self, 'doorbells'):
|
if not hasattr(self, 'doorbells'):
|
||||||
self.doorbells_base = queue.doorbell_offset & (~0x1fff) # doorbell is two pages
|
self.doorbells_base = queue.doorbell_offset & (~0x1fff) # doorbell is two pages
|
||||||
self.doorbells = cast(HWInterface, KFDIface.kfd).mmap(0, 0x2000, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED, self.doorbells_base)
|
self.doorbells = cast(FileIOInterface, KFDIface.kfd).mmap(0, 0x2000, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED, self.doorbells_base)
|
||||||
|
|
||||||
return AMDQueueDesc(ring=to_mv(ring.va_addr, ring.size).cast("I"),
|
return AMDQueueDesc(ring=to_mv(ring.va_addr, ring.size).cast("I"),
|
||||||
read_ptrs=[to_mv(queue.read_pointer_address, 8).cast("Q")], write_ptrs=[to_mv(queue.write_pointer_address, 8).cast("Q")],
|
read_ptrs=[to_mv(queue.read_pointer_address, 8).cast("Q")], write_ptrs=[to_mv(queue.write_pointer_address, 8).cast("Q")],
|
||||||
@@ -650,17 +650,17 @@ class AMAllocationMeta: owner:AMDDevice; mapped_devs:list[AMDDevice]; mapping:AM
|
|||||||
|
|
||||||
class PCIIface:
|
class PCIIface:
|
||||||
supported_devs:list[int] = [0x744c, 0x7480, 0x7550]
|
supported_devs:list[int] = [0x744c, 0x7480, 0x7550]
|
||||||
vfio:bool = getenv("VFIO", 1) and HWInterface.exists("/dev/vfio/vfio")
|
vfio:bool = getenv("VFIO", 1) and FileIOInterface.exists("/dev/vfio/vfio")
|
||||||
vfio_fd:HWInterface
|
vfio_fd:FileIOInterface
|
||||||
gpus:list[Any] = []
|
gpus:list[Any] = []
|
||||||
|
|
||||||
def __init__(self, dev, dev_id):
|
def __init__(self, dev, dev_id):
|
||||||
self.dev = dev
|
self.dev = dev
|
||||||
|
|
||||||
if first_dev:=len(PCIIface.gpus) == 0:
|
if first_dev:=len(PCIIface.gpus) == 0:
|
||||||
for pcibus in HWInterface("/sys/bus/pci/devices").listdir():
|
for pcibus in FileIOInterface("/sys/bus/pci/devices").listdir():
|
||||||
vendor = int(HWInterface(f"/sys/bus/pci/devices/{pcibus}/vendor").read(), 16)
|
vendor = int(FileIOInterface(f"/sys/bus/pci/devices/{pcibus}/vendor").read(), 16)
|
||||||
device = int(HWInterface(f"/sys/bus/pci/devices/{pcibus}/device").read(), 16)
|
device = int(FileIOInterface(f"/sys/bus/pci/devices/{pcibus}/device").read(), 16)
|
||||||
if vendor == 0x1002 and device in PCIIface.supported_devs: PCIIface.gpus.append(pcibus)
|
if vendor == 0x1002 and device in PCIIface.supported_devs: PCIIface.gpus.append(pcibus)
|
||||||
PCIIface.gpus = sorted(PCIIface.gpus)
|
PCIIface.gpus = sorted(PCIIface.gpus)
|
||||||
|
|
||||||
@@ -671,51 +671,51 @@ class PCIIface:
|
|||||||
self.pcibus = PCIIface.gpus[dev_id]
|
self.pcibus = PCIIface.gpus[dev_id]
|
||||||
|
|
||||||
# Unbind the device from the kernel driver
|
# Unbind the device from the kernel driver
|
||||||
if HWInterface.exists(f"/sys/bus/pci/devices/{self.pcibus}/driver"):
|
if FileIOInterface.exists(f"/sys/bus/pci/devices/{self.pcibus}/driver"):
|
||||||
HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/driver/unbind", os.O_WRONLY).write(self.pcibus)
|
FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/driver/unbind", os.O_WRONLY).write(self.pcibus)
|
||||||
|
|
||||||
supported_sizes = int(HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource0_resize", os.O_RDONLY).read(), 16)
|
supported_sizes = int(FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource0_resize", os.O_RDONLY).read(), 16)
|
||||||
try: HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource0_resize", os.O_RDWR).write(str(supported_sizes.bit_length() - 1))
|
try: FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource0_resize", os.O_RDWR).write(str(supported_sizes.bit_length() - 1))
|
||||||
except OSError as e: raise RuntimeError(f"Cannot resize BAR: {e}. Ensure the resizable BAR option is enabled on your system.") from e
|
except OSError as e: raise RuntimeError(f"Cannot resize BAR: {e}. Ensure the resizable BAR option is enabled on your system.") from e
|
||||||
|
|
||||||
# Try to init vfio. Use it if success.
|
# Try to init vfio. Use it if success.
|
||||||
if PCIIface.vfio:
|
if PCIIface.vfio:
|
||||||
try:
|
try:
|
||||||
if first_dev:
|
if first_dev:
|
||||||
HWInterface("/sys/module/vfio/parameters/enable_unsafe_noiommu_mode", os.O_RDWR).write("1")
|
FileIOInterface("/sys/module/vfio/parameters/enable_unsafe_noiommu_mode", os.O_RDWR).write("1")
|
||||||
PCIIface.vfio_fd = HWInterface("/dev/vfio/vfio", os.O_RDWR)
|
PCIIface.vfio_fd = FileIOInterface("/dev/vfio/vfio", os.O_RDWR)
|
||||||
vfio.VFIO_CHECK_EXTENSION(PCIIface.vfio_fd, vfio.VFIO_NOIOMMU_IOMMU)
|
vfio.VFIO_CHECK_EXTENSION(PCIIface.vfio_fd, vfio.VFIO_NOIOMMU_IOMMU)
|
||||||
|
|
||||||
HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/driver_override", os.O_WRONLY).write("vfio-pci")
|
FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/driver_override", os.O_WRONLY).write("vfio-pci")
|
||||||
HWInterface("/sys/bus/pci/drivers_probe", os.O_WRONLY).write(self.pcibus)
|
FileIOInterface("/sys/bus/pci/drivers_probe", os.O_WRONLY).write(self.pcibus)
|
||||||
|
|
||||||
iommu_group = HWInterface.readlink(f"/sys/bus/pci/devices/{self.pcibus}/iommu_group").split('/')[-1]
|
iommu_group = FileIOInterface.readlink(f"/sys/bus/pci/devices/{self.pcibus}/iommu_group").split('/')[-1]
|
||||||
except OSError:
|
except OSError:
|
||||||
if DEBUG >= 1: print(f"am {self.pcibus}: failed to init vfio-pci module (run `sudo modprobe vfio-pci`).")
|
if DEBUG >= 1: print(f"am {self.pcibus}: failed to init vfio-pci module (run `sudo modprobe vfio-pci`).")
|
||||||
PCIIface.vfio = False
|
PCIIface.vfio = False
|
||||||
|
|
||||||
# Init vfio for the device
|
# Init vfio for the device
|
||||||
if PCIIface.vfio:
|
if PCIIface.vfio:
|
||||||
self.vfio_group = HWInterface(f"/dev/vfio/noiommu-{iommu_group}", os.O_RDWR)
|
self.vfio_group = FileIOInterface(f"/dev/vfio/noiommu-{iommu_group}", os.O_RDWR)
|
||||||
vfio.VFIO_GROUP_SET_CONTAINER(self.vfio_group, ctypes.c_int(PCIIface.vfio_fd.fd))
|
vfio.VFIO_GROUP_SET_CONTAINER(self.vfio_group, ctypes.c_int(PCIIface.vfio_fd.fd))
|
||||||
|
|
||||||
if first_dev: vfio.VFIO_SET_IOMMU(PCIIface.vfio_fd, vfio.VFIO_NOIOMMU_IOMMU)
|
if first_dev: vfio.VFIO_SET_IOMMU(PCIIface.vfio_fd, vfio.VFIO_NOIOMMU_IOMMU)
|
||||||
self.vfio_dev = HWInterface(fd=vfio.VFIO_GROUP_GET_DEVICE_FD(self.vfio_group, ctypes.create_string_buffer(self.pcibus.encode())))
|
self.vfio_dev = FileIOInterface(fd=vfio.VFIO_GROUP_GET_DEVICE_FD(self.vfio_group, ctypes.create_string_buffer(self.pcibus.encode())))
|
||||||
|
|
||||||
self.irq_fd = HWInterface.eventfd(0, 0)
|
self.irq_fd = FileIOInterface.eventfd(0, 0)
|
||||||
self.irq_poller = select.poll()
|
self.irq_poller = select.poll()
|
||||||
self.irq_poller.register(self.irq_fd.fd, select.POLLIN)
|
self.irq_poller.register(self.irq_fd.fd, select.POLLIN)
|
||||||
|
|
||||||
irqs = vfio.struct_vfio_irq_set(index=vfio.VFIO_PCI_MSI_IRQ_INDEX, flags=vfio.VFIO_IRQ_SET_DATA_EVENTFD|vfio.VFIO_IRQ_SET_ACTION_TRIGGER,
|
irqs = vfio.struct_vfio_irq_set(index=vfio.VFIO_PCI_MSI_IRQ_INDEX, flags=vfio.VFIO_IRQ_SET_DATA_EVENTFD|vfio.VFIO_IRQ_SET_ACTION_TRIGGER,
|
||||||
argsz=ctypes.sizeof(vfio.struct_vfio_irq_set), count=1, data=(ctypes.c_int * 1)(self.irq_fd.fd))
|
argsz=ctypes.sizeof(vfio.struct_vfio_irq_set), count=1, data=(ctypes.c_int * 1)(self.irq_fd.fd))
|
||||||
vfio.VFIO_DEVICE_SET_IRQS(self.vfio_dev, irqs)
|
vfio.VFIO_DEVICE_SET_IRQS(self.vfio_dev, irqs)
|
||||||
else: HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/enable", os.O_RDWR).write("1")
|
else: FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/enable", os.O_RDWR).write("1")
|
||||||
|
|
||||||
self.pagemap = HWInterface("/proc/self/pagemap", os.O_RDONLY)
|
self.pagemap = FileIOInterface("/proc/self/pagemap", os.O_RDONLY)
|
||||||
self.cfg_fd = HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/config", os.O_RDWR | os.O_SYNC | os.O_CLOEXEC)
|
self.cfg_fd = FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/config", os.O_RDWR | os.O_SYNC | os.O_CLOEXEC)
|
||||||
self.bar_fds = {bar: HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource{bar}", os.O_RDWR | os.O_SYNC | os.O_CLOEXEC) for bar in [0, 2, 5]}
|
self.bar_fds = {b: FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource{b}", os.O_RDWR | os.O_SYNC | os.O_CLOEXEC) for b in [0, 2, 5]}
|
||||||
|
|
||||||
bar_info = HWInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource", os.O_RDONLY).read().splitlines()
|
bar_info = FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/resource", os.O_RDONLY).read().splitlines()
|
||||||
self.bar_info = {j:(int(start,16), int(end,16), int(flgs,16)) for j,(start,end,flgs) in enumerate(l.split() for l in bar_info)}
|
self.bar_info = {j:(int(start,16), int(end,16), int(flgs,16)) for j,(start,end,flgs) in enumerate(l.split() for l in bar_info)}
|
||||||
|
|
||||||
self.adev = AMDev(self.pcibus, self._map_pci_range(0), dbell:=self._map_pci_range(2).cast('Q'), self._map_pci_range(5).cast('I'))
|
self.adev = AMDev(self.pcibus, self._map_pci_range(0), dbell:=self._map_pci_range(2).cast('Q'), self._map_pci_range(5).cast('I'))
|
||||||
@@ -741,7 +741,7 @@ class PCIIface:
|
|||||||
def alloc(self, size:int, host=False, uncached=False, cpu_access=False):
|
def alloc(self, size:int, host=False, uncached=False, cpu_access=False):
|
||||||
if host or (not getenv("AMD_ALLOC_QUEUE_DEV_MEM", 1) and uncached and cpu_access): # host or gtt-like memory.
|
if host or (not getenv("AMD_ALLOC_QUEUE_DEV_MEM", 1) and uncached and cpu_access): # host or gtt-like memory.
|
||||||
vaddr = self.adev.mm.alloc_vaddr(size:=round_up(size, mmap.PAGESIZE), align=mmap.PAGESIZE)
|
vaddr = self.adev.mm.alloc_vaddr(size:=round_up(size, mmap.PAGESIZE), align=mmap.PAGESIZE)
|
||||||
va = HWInterface.anon_mmap(vaddr, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS | MAP_LOCKED | MAP_FIXED, 0)
|
va = FileIOInterface.anon_mmap(vaddr, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS | MAP_LOCKED | MAP_FIXED, 0)
|
||||||
|
|
||||||
# Read pagemap to get the physical address of each page. The pages are locked.
|
# Read pagemap to get the physical address of each page. The pages are locked.
|
||||||
self.pagemap.seek(va // mmap.PAGESIZE * 8)
|
self.pagemap.seek(va // mmap.PAGESIZE * 8)
|
||||||
@@ -791,7 +791,7 @@ class AMDDevice(HCQCompiled):
|
|||||||
signal_pages: ClassVar[list[Any]] = []
|
signal_pages: ClassVar[list[Any]] = []
|
||||||
signal_pool: ClassVar[list[int]] = []
|
signal_pool: ClassVar[list[int]] = []
|
||||||
|
|
||||||
driverless:bool = not HWInterface.exists('/sys/module/amdgpu') or bool(getenv("AMD_DRIVERLESS", 0))
|
driverless:bool = not FileIOInterface.exists('/sys/module/amdgpu') or bool(getenv("AMD_DRIVERLESS", 0))
|
||||||
|
|
||||||
def __init__(self, device:str=""):
|
def __init__(self, device:str=""):
|
||||||
self.device_id = int(device.split(":")[1]) if ":" in device else 0
|
self.device_id = int(device.split(":")[1]) if ":" in device else 0
|
||||||
@@ -852,7 +852,7 @@ class AMDDevice(HCQCompiled):
|
|||||||
self.sqtt_enabled = PROFILE and bool(getenv("SQTT", 0))
|
self.sqtt_enabled = PROFILE and bool(getenv("SQTT", 0))
|
||||||
if self.sqtt_enabled:
|
if self.sqtt_enabled:
|
||||||
if self.arch != 'gfx1100': raise RuntimeError('SQ Thread Tracing is only supported on 7900XTX')
|
if self.arch != 'gfx1100': raise RuntimeError('SQ Thread Tracing is only supported on 7900XTX')
|
||||||
if not self.driverless and (ppfeaturemask:=int(HWInterface('/sys/module/amdgpu/parameters/ppfeaturemask', os.O_RDONLY).read(), 16)) & 0x8000:
|
if not self.driverless and (ppfeaturemask:=int(FileIOInterface('/sys/module/amdgpu/parameters/ppfeaturemask', os.O_RDONLY).read(), 16))&0x8000:
|
||||||
raise RuntimeError("SQTT can't be enabled because of hardware bug, to workaround either use driverless or add "
|
raise RuntimeError("SQTT can't be enabled because of hardware bug, to workaround either use driverless or add "
|
||||||
f"ppfeaturemask={(ppfeaturemask&~0x8000):#x} (current {ppfeaturemask=:#x} & ~PP_GFXOFF_MASK) to amdgpu module parameters\n"
|
f"ppfeaturemask={(ppfeaturemask&~0x8000):#x} (current {ppfeaturemask=:#x} & ~PP_GFXOFF_MASK) to amdgpu module parameters\n"
|
||||||
"For more information read https://github.com/tinygrad/tinygrad/blob/master/extra/sqtt/README.md")
|
"For more information read https://github.com/tinygrad/tinygrad/blob/master/extra/sqtt/README.md")
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ assert sys.platform != 'win32'
|
|||||||
from typing import Any, cast, Union, Type, ClassVar
|
from typing import Any, cast, Union, Type, ClassVar
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQProgram, HCQSignal, BumpAllocator
|
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQProgram, HCQSignal, BumpAllocator
|
||||||
from tinygrad.runtime.support.hcq import MMIOInterface, HWInterface, MOCKGPU
|
from tinygrad.runtime.support.hcq import MMIOInterface, FileIOInterface, MOCKGPU
|
||||||
from tinygrad.ops import sint
|
from tinygrad.ops import sint
|
||||||
from tinygrad.device import BufferSpec, CPUProgram
|
from tinygrad.device import BufferSpec, CPUProgram
|
||||||
from tinygrad.helpers import getenv, mv_address, init_c_struct_t, round_up, data64, data64_le, DEBUG, prod, OSX
|
from tinygrad.helpers import getenv, mv_address, init_c_struct_t, round_up, data64, data64_le, DEBUG, prod, OSX
|
||||||
@@ -20,7 +20,7 @@ def get_error_str(status): return f"{status}: {nv_gpu.nv_status_codes.get(status
|
|||||||
NV_PFAULT_FAULT_TYPE = {dt:name for name,dt in nv_gpu.__dict__.items() if name.startswith("NV_PFAULT_FAULT_TYPE_")}
|
NV_PFAULT_FAULT_TYPE = {dt:name for name,dt in nv_gpu.__dict__.items() if name.startswith("NV_PFAULT_FAULT_TYPE_")}
|
||||||
NV_PFAULT_ACCESS_TYPE = {dt:name.split("_")[-1] for name,dt in nv_gpu.__dict__.items() if name.startswith("NV_PFAULT_ACCESS_TYPE_")}
|
NV_PFAULT_ACCESS_TYPE = {dt:name.split("_")[-1] for name,dt in nv_gpu.__dict__.items() if name.startswith("NV_PFAULT_ACCESS_TYPE_")}
|
||||||
|
|
||||||
def nv_iowr(fd:HWInterface, nr, args):
|
def nv_iowr(fd:FileIOInterface, nr, args):
|
||||||
ret = fd.ioctl((3 << 30) | (ctypes.sizeof(args) & 0x1FFF) << 16 | (ord('F') & 0xFF) << 8 | (nr & 0xFF), args)
|
ret = fd.ioctl((3 << 30) | (ctypes.sizeof(args) & 0x1FFF) << 16 | (ord('F') & 0xFF) << 8 | (nr & 0xFF), args)
|
||||||
if ret != 0: raise RuntimeError(f"ioctl returned {ret}")
|
if ret != 0: raise RuntimeError(f"ioctl returned {ret}")
|
||||||
|
|
||||||
@@ -46,7 +46,7 @@ def make_rmctrl_type():
|
|||||||
getattr(nv_gpu, name+"_PARAMS", getattr(nv_gpu, name.replace("_CTRL_CMD_", "_CTRL_DEBUG_")+"_PARAMETERS", None))))})
|
getattr(nv_gpu, name+"_PARAMS", getattr(nv_gpu, name.replace("_CTRL_CMD_", "_CTRL_DEBUG_")+"_PARAMETERS", None))))})
|
||||||
rmctrl = make_rmctrl_type()
|
rmctrl = make_rmctrl_type()
|
||||||
|
|
||||||
def uvm_ioctl(cmd, sttyp, fd:HWInterface, **kwargs):
|
def uvm_ioctl(cmd, sttyp, fd:FileIOInterface, **kwargs):
|
||||||
ret = fd.ioctl(cmd, made:=sttyp(**kwargs))
|
ret = fd.ioctl(cmd, made:=sttyp(**kwargs))
|
||||||
if ret != 0: raise RuntimeError(f"ioctl(uvm) returned {ret}")
|
if ret != 0: raise RuntimeError(f"ioctl(uvm) returned {ret}")
|
||||||
if made.rmStatus != 0: raise RuntimeError(f"uvm_ioctl returned {get_error_str(made.rmStatus)}")
|
if made.rmStatus != 0: raise RuntimeError(f"uvm_ioctl returned {get_error_str(made.rmStatus)}")
|
||||||
@@ -293,8 +293,8 @@ class NVDevice(HCQCompiled[NVSignal]):
|
|||||||
signal_pool: ClassVar[list[int]] = []
|
signal_pool: ClassVar[list[int]] = []
|
||||||
|
|
||||||
root = None
|
root = None
|
||||||
fd_ctl: HWInterface
|
fd_ctl: FileIOInterface
|
||||||
fd_uvm: HWInterface
|
fd_uvm: FileIOInterface
|
||||||
gpus_info: Union[list, ctypes.Array] = []
|
gpus_info: Union[list, ctypes.Array] = []
|
||||||
|
|
||||||
# TODO: Need a proper allocator for va addresses
|
# TODO: Need a proper allocator for va addresses
|
||||||
@@ -305,12 +305,12 @@ class NVDevice(HCQCompiled[NVSignal]):
|
|||||||
host_object_enumerator: int = 0x1000
|
host_object_enumerator: int = 0x1000
|
||||||
|
|
||||||
def _new_gpu_fd(self):
|
def _new_gpu_fd(self):
|
||||||
fd_dev = HWInterface(f"/dev/nvidia{NVDevice.gpus_info[self.device_id].minor_number}", os.O_RDWR | os.O_CLOEXEC)
|
fd_dev = FileIOInterface(f"/dev/nvidia{NVDevice.gpus_info[self.device_id].minor_number}", os.O_RDWR | os.O_CLOEXEC)
|
||||||
nv_iowr(fd_dev, nv_gpu.NV_ESC_REGISTER_FD, nv_gpu.nv_ioctl_register_fd_t(ctl_fd=self.fd_ctl.fd))
|
nv_iowr(fd_dev, nv_gpu.NV_ESC_REGISTER_FD, nv_gpu.nv_ioctl_register_fd_t(ctl_fd=self.fd_ctl.fd))
|
||||||
return fd_dev
|
return fd_dev
|
||||||
|
|
||||||
def _gpu_map_to_cpu(self, memory_handle, size, target=None, flags=0, system=False):
|
def _gpu_map_to_cpu(self, memory_handle, size, target=None, flags=0, system=False):
|
||||||
fd_dev = self._new_gpu_fd() if not system else HWInterface("/dev/nvidiactl", os.O_RDWR | os.O_CLOEXEC)
|
fd_dev = self._new_gpu_fd() if not system else FileIOInterface("/dev/nvidiactl", os.O_RDWR | os.O_CLOEXEC)
|
||||||
made = nv_gpu.nv_ioctl_nvos33_parameters_with_fd(fd=fd_dev.fd,
|
made = nv_gpu.nv_ioctl_nvos33_parameters_with_fd(fd=fd_dev.fd,
|
||||||
params=nv_gpu.NVOS33_PARAMETERS(hClient=self.root, hDevice=self.nvdevice, hMemory=memory_handle, length=size, flags=flags))
|
params=nv_gpu.NVOS33_PARAMETERS(hClient=self.root, hDevice=self.nvdevice, hMemory=memory_handle, length=size, flags=flags))
|
||||||
nv_iowr(self.fd_ctl, nv_gpu.NV_ESC_RM_MAP_MEMORY, made)
|
nv_iowr(self.fd_ctl, nv_gpu.NV_ESC_RM_MAP_MEMORY, made)
|
||||||
@@ -324,7 +324,7 @@ class NVDevice(HCQCompiled[NVSignal]):
|
|||||||
va_addr = self._alloc_gpu_vaddr(size, alignment=page_size, force_low=cpu_access)
|
va_addr = self._alloc_gpu_vaddr(size, alignment=page_size, force_low=cpu_access)
|
||||||
|
|
||||||
if host:
|
if host:
|
||||||
va_addr = HWInterface.anon_mmap(va_addr, size, mmap.PROT_READ | mmap.PROT_WRITE, MAP_FIXED | mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
|
va_addr = FileIOInterface.anon_mmap(va_addr, size, mmap.PROT_READ | mmap.PROT_WRITE, MAP_FIXED | mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
|
||||||
|
|
||||||
flags = (nv_gpu.NVOS02_FLAGS_PHYSICALITY_NONCONTIGUOUS << 4) | (nv_gpu.NVOS02_FLAGS_COHERENCY_CACHED << 12) \
|
flags = (nv_gpu.NVOS02_FLAGS_PHYSICALITY_NONCONTIGUOUS << 4) | (nv_gpu.NVOS02_FLAGS_COHERENCY_CACHED << 12) \
|
||||||
| (nv_gpu.NVOS02_FLAGS_MAPPING_NO_MAP << 30)
|
| (nv_gpu.NVOS02_FLAGS_MAPPING_NO_MAP << 30)
|
||||||
@@ -363,7 +363,7 @@ class NVDevice(HCQCompiled[NVSignal]):
|
|||||||
|
|
||||||
self._debug_mappings.pop((cast(int, mem.va_addr), mem.size))
|
self._debug_mappings.pop((cast(int, mem.va_addr), mem.size))
|
||||||
uvm.free(self.fd_uvm, base=cast(int, mem.va_addr), length=mem.size)
|
uvm.free(self.fd_uvm, base=cast(int, mem.va_addr), length=mem.size)
|
||||||
if mem.meta.has_cpu_mapping: HWInterface.munmap(cast(int, mem.va_addr), mem.size)
|
if mem.meta.has_cpu_mapping: FileIOInterface.munmap(cast(int, mem.va_addr), mem.size)
|
||||||
|
|
||||||
def _gpu_uvm_map(self, va_base, size, mem_handle, create_range=True, has_cpu_mapping=False, tag="") -> HCQBuffer:
|
def _gpu_uvm_map(self, va_base, size, mem_handle, create_range=True, has_cpu_mapping=False, tag="") -> HCQBuffer:
|
||||||
if create_range: uvm.create_external_range(self.fd_uvm, base=va_base, length=size)
|
if create_range: uvm.create_external_range(self.fd_uvm, base=va_base, length=size)
|
||||||
@@ -391,9 +391,9 @@ class NVDevice(HCQCompiled[NVSignal]):
|
|||||||
|
|
||||||
def __init__(self, device:str=""):
|
def __init__(self, device:str=""):
|
||||||
if NVDevice.root is None:
|
if NVDevice.root is None:
|
||||||
NVDevice.fd_ctl = HWInterface("/dev/nvidiactl", os.O_RDWR | os.O_CLOEXEC)
|
NVDevice.fd_ctl = FileIOInterface("/dev/nvidiactl", os.O_RDWR | os.O_CLOEXEC)
|
||||||
NVDevice.fd_uvm = HWInterface("/dev/nvidia-uvm", os.O_RDWR | os.O_CLOEXEC)
|
NVDevice.fd_uvm = FileIOInterface("/dev/nvidia-uvm", os.O_RDWR | os.O_CLOEXEC)
|
||||||
self.fd_uvm_2 = HWInterface("/dev/nvidia-uvm", os.O_RDWR | os.O_CLOEXEC)
|
self.fd_uvm_2 = FileIOInterface("/dev/nvidia-uvm", os.O_RDWR | os.O_CLOEXEC)
|
||||||
NVDevice.root = rm_alloc(self.fd_ctl, nv_gpu.NV01_ROOT_CLIENT, 0, 0, None).hObjectNew
|
NVDevice.root = rm_alloc(self.fd_ctl, nv_gpu.NV01_ROOT_CLIENT, 0, 0, None).hObjectNew
|
||||||
uvm.initialize(self.fd_uvm)
|
uvm.initialize(self.fd_uvm)
|
||||||
with contextlib.suppress(RuntimeError): uvm.mm_initialize(self.fd_uvm_2, uvmFd=self.fd_uvm.fd) # this error is okay, CUDA hits it too
|
with contextlib.suppress(RuntimeError): uvm.mm_initialize(self.fd_uvm_2, uvmFd=self.fd_uvm.fd) # this error is okay, CUDA hits it too
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from types import SimpleNamespace
|
|||||||
from typing import Any, cast, ClassVar
|
from typing import Any, cast, ClassVar
|
||||||
from tinygrad.device import BufferSpec
|
from tinygrad.device import BufferSpec
|
||||||
from tinygrad.runtime.support.hcq import HCQBuffer, HWQueue, HCQProgram, HCQCompiled, HCQAllocatorBase, HCQSignal, HCQArgsState, BumpAllocator
|
from tinygrad.runtime.support.hcq import HCQBuffer, HWQueue, HCQProgram, HCQCompiled, HCQAllocatorBase, HCQSignal, HCQArgsState, BumpAllocator
|
||||||
from tinygrad.runtime.support.hcq import HWInterface
|
from tinygrad.runtime.support.hcq import FileIOInterface
|
||||||
from tinygrad.runtime.autogen import kgsl, adreno
|
from tinygrad.runtime.autogen import kgsl, adreno
|
||||||
from tinygrad.runtime.ops_gpu import CLCompiler, CLDevice
|
from tinygrad.runtime.ops_gpu import CLCompiler, CLDevice
|
||||||
from tinygrad.renderer.cstyle import QCOMRenderer
|
from tinygrad.renderer.cstyle import QCOMRenderer
|
||||||
@@ -325,7 +325,7 @@ class QCOMDevice(HCQCompiled):
|
|||||||
dummy_addr: int = 0
|
dummy_addr: int = 0
|
||||||
|
|
||||||
def __init__(self, device:str=""):
|
def __init__(self, device:str=""):
|
||||||
self.fd = HWInterface('/dev/kgsl-3d0', os.O_RDWR)
|
self.fd = FileIOInterface('/dev/kgsl-3d0', os.O_RDWR)
|
||||||
QCOMDevice.dummy_addr = cast(int, self._gpu_alloc(0x1000).va_addr)
|
QCOMDevice.dummy_addr = cast(int, self._gpu_alloc(0x1000).va_addr)
|
||||||
|
|
||||||
flags = kgsl.KGSL_CONTEXT_PREAMBLE | kgsl.KGSL_CONTEXT_PWR_CONSTRAINT | kgsl.KGSL_CONTEXT_NO_FAULT_TOLERANCE | kgsl.KGSL_CONTEXT_NO_GMEM_ALLOC \
|
flags = kgsl.KGSL_CONTEXT_PREAMBLE | kgsl.KGSL_CONTEXT_PWR_CONSTRAINT | kgsl.KGSL_CONTEXT_NO_FAULT_TOLERANCE | kgsl.KGSL_CONTEXT_NO_GMEM_ALLOC \
|
||||||
@@ -364,7 +364,7 @@ class QCOMDevice(HCQCompiled):
|
|||||||
|
|
||||||
def _gpu_free(self, mem:HCQBuffer):
|
def _gpu_free(self, mem:HCQBuffer):
|
||||||
kgsl.IOCTL_KGSL_GPUOBJ_FREE(self.fd, id=mem.meta.id)
|
kgsl.IOCTL_KGSL_GPUOBJ_FREE(self.fd, id=mem.meta.id)
|
||||||
HWInterface.munmap(mem.va_addr, mem.meta.mmapsize)
|
FileIOInterface.munmap(mem.va_addr, mem.meta.mmapsize)
|
||||||
|
|
||||||
def _ensure_stack_size(self, sz):
|
def _ensure_stack_size(self, sz):
|
||||||
if not hasattr(self, '_stack'): self._stack = self._gpu_alloc(sz)
|
if not hasattr(self, '_stack'): self._stack = self._gpu_alloc(sz)
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ class MMIOInterface:
|
|||||||
def __getitem__(self, k) -> int|list[int]: return self.mv[k].tolist() if isinstance(k, slice) else self.mv[k]
|
def __getitem__(self, k) -> int|list[int]: return self.mv[k].tolist() if isinstance(k, slice) else self.mv[k]
|
||||||
def __setitem__(self, k, v:int|array.array): self.mv[k] = v
|
def __setitem__(self, k, v:int|array.array): self.mv[k] = v
|
||||||
|
|
||||||
class HWInterface:
|
class FileIOInterface:
|
||||||
"""
|
"""
|
||||||
Hardware Abstraction Layer for HCQ devices. The class provides a unified interface for interacting with hardware devices.
|
Hardware Abstraction Layer for HCQ devices. The class provides a unified interface for interacting with hardware devices.
|
||||||
"""
|
"""
|
||||||
@@ -42,9 +42,9 @@ class HWInterface:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def readlink(path): return os.readlink(path)
|
def readlink(path): return os.readlink(path)
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def eventfd(initval, flags=None): return HWInterface(fd=os.eventfd(initval, flags)) # type: ignore[attr-defined]
|
def eventfd(initval, flags=None): return FileIOInterface(fd=os.eventfd(initval, flags)) # type: ignore[attr-defined]
|
||||||
|
|
||||||
if MOCKGPU:=getenv("MOCKGPU"): from test.mockgpu.mockgpu import MockHWInterface as HWInterface # noqa: F401 # pylint: disable=unused-import
|
if MOCKGPU:=getenv("MOCKGPU"): from test.mockgpu.mockgpu import MockFileIOInterface as FileIOInterface # noqa: F401 # pylint: disable=unused-import
|
||||||
|
|
||||||
# **************** for HCQ Compatible Devices ****************
|
# **************** for HCQ Compatible Devices ****************
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user