move mockgpu to tests [pr] (#8396)

* move mockgpu to tests

* linter

* i'm so sorry

* sorry, python

* path
This commit is contained in:
nimlgen
2024-12-24 23:48:02 +03:00
committed by GitHub
parent 2c93f27652
commit a647f3dd2c
10 changed files with 50 additions and 51 deletions

View File

@@ -1,9 +1,8 @@
import pathlib, re, ctypes, mmap, collections, struct, functools, os, copy
import pathlib, re, ctypes, mmap, collections, functools, copy
import tinygrad.runtime.autogen.kfd as kfd
from typing import Optional, Any
from tinygrad.helpers import from_mv
from extra.mockgpu.driver import VirtDriver, VirtFileDesc, TextFileDesc, DirFileDesc, VirtFile
from extra.mockgpu.amd.amdgpu import AMDGPU, gpu_props
from test.mockgpu.driver import VirtDriver, VirtFileDesc, TextFileDesc, DirFileDesc, VirtFile
from test.mockgpu.amd.amdgpu import AMDGPU, gpu_props
libc = ctypes.CDLL(ctypes.util.find_library("c"))
libc.mmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long]
@@ -13,7 +12,7 @@ def ioctls_from_header():
# hdrpy = (pathlib.Path(__file__).parent.parent.parent.parent / "tinygrad" / "runtime" / "autogen" / "kfd.py").read_text()
# pattern = r'# (AMDKFD_IOC_[A-Z0-9_]+)\s=\s_(IOW?R?).*\(( 0x[0-9a-fA-F]+) ,\s+struct\s([A-Za-z0-9_]+)\s+\)'
# matches = re.findall(pattern, hdrpy, re.MULTILINE)
hdr = (pathlib.Path(__file__).parent.parent.parent / "hip_gpu_driver" / "kfd_ioctl.h").read_text().replace("\\\n", "")
hdr = (pathlib.Path(__file__).parent.parent.parent.parent / "extra" / "hip_gpu_driver" / "kfd_ioctl.h").read_text().replace("\\\n", "")
pattern = r'#define\s+(AMDKFD_IOC_[A-Z0-9_]+)\s+AMDKFD_(IOW?R?)\((0x[0-9a-fA-F]+),\s+struct\s([A-Za-z0-9_]+)\)'
matches = re.findall(pattern, hdr, re.MULTILINE)
return type("KFD_IOCTLS", (object, ), {name: int(nr, 0x10) for name, _, nr, _ in matches}), \

View File

@@ -1,5 +1,5 @@
import ctypes, time
from extra.mockgpu.gpu import VirtGPU
from test.mockgpu.gpu import VirtGPU
from tinygrad.helpers import to_mv, init_c_struct_t, mv_address
import tinygrad.runtime.autogen.amd_gpu as amd_gpu
@@ -25,7 +25,8 @@ def _try_dlopen_remu():
try:
remu = ctypes.CDLL(path)
remu.run_asm.restype = ctypes.c_int32
remu.run_asm.argtypes = [ctypes.c_void_p, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_void_p]
remu.run_asm.argtypes = [ctypes.c_void_p, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32,
ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_void_p]
except OSError: pass
else: return remu
print("Could not find libremu.so")
@@ -55,7 +56,7 @@ def create_sdma_packets():
return type("SDMA_PKTS", (object, ), structs)
sdma_pkts = create_sdma_packets()
class AMDQueue():
class AMDQueue:
def __init__(self, base, size, rptr, wptr):
self.queue, self.size = to_mv(base, size).cast("I"), size
self.rptr = to_mv(rptr, 8).cast("Q")
@@ -98,14 +99,14 @@ class PM4Executor(AMDQueue):
mem_event_type = (self._next_dword() >> 0) & 0xff
selectors = self._next_dword()
mem_data_sel = (selectors >> 29) & 0b111
int_sel = (selectors >> 24) & 0b11
mem_dst_sel = (selectors >> 16) & 0b1
# int_sel = (selectors >> 24) & 0b11
# mem_dst_sel = (selectors >> 16) & 0b1
addr_lo = self._next_dword()
addr_hi = self._next_dword()
val_lo = self._next_dword()
val_hi = self._next_dword()
val = val_lo + (val_hi << 32)
ev = self._next_dword()
_ = self._next_dword() # ev
ptr = to_mv(addr_lo + (addr_hi << 32), 8)
if mem_data_sel == 1 or mem_data_sel == 2: ptr.cast('Q')[0] = val
@@ -120,24 +121,22 @@ class PM4Executor(AMDQueue):
addr_lo = self._next_dword()
addr_hi = self._next_dword()
val = self._next_dword()
mask = self._next_dword()
timeout = self._next_dword()
_ = self._next_dword() # mask
_ = self._next_dword() # timeout
mem_function = (info >> 0) & 0b111
mem_space = (info >> 4) & 0b1
mem_op = (info >> 6) & 0b1
mem_engine = (info >> 8) & 0b1
_ = (info >> 6) & 0b1 # memop
_ = (info >> 8) & 0b1 # mem_engine
if mem_space == 0: read_op = lambda: val
elif mem_space == 1: read_op = lambda: to_mv(addr_lo + (addr_hi << 32), 4).cast('I')[0]
if mem_space == 0: mval = val
elif mem_space == 1: mval = to_mv(addr_lo + (addr_hi << 32), 4).cast('I')[0]
if mem_function == WAIT_REG_MEM_FUNCTION_GEQ: cmp = lambda x,y: x >= y
elif mem_function == WAIT_REG_MEM_FUNCTION_EQ: cmp = lambda x,y: x == y
if mem_function == WAIT_REG_MEM_FUNCTION_GEQ: can_cont = bool(mval >= val)
elif mem_function == WAIT_REG_MEM_FUNCTION_EQ: can_cont = bool(mval == val)
else: raise RuntimeError(f"Do not support {mem_function=}")
mval = read_op()
can_cont = cmp(mval, val)
if not can_cont: self.rptr[0] = self.rptr[0] - 7 # revert packet, need to wait again
if not can_cont: self.rptr[0] = self.rptr[0] - 7 # revert this packet, need to wait again
return can_cont
def _exec_set_sh_reg(self, n):
@@ -149,7 +148,7 @@ class PM4Executor(AMDQueue):
def _exec_dispatch_direct(self, n):
assert n == 3
gl = [self._next_dword() for _ in range(3)]
flags = self._next_dword()
_ = self._next_dword() # flags
prg_addr = (self.gpu.regs[regCOMPUTE_PGM_LO] + (self.gpu.regs[regCOMPUTE_PGM_LO + 1] << 32)) << 8
args_addr = self.gpu.regs[regCOMPUTE_USER_DATA_0] + (self.gpu.regs[regCOMPUTE_USER_DATA_0 + 1] << 32)
@@ -211,16 +210,15 @@ class SDMAExecutor(AMDQueue):
def _execute_poll_regmem(self):
struct = sdma_pkts.poll_regmem.from_address(self.base + self.rptr[0] % self.size)
if struct.mem_poll == 0: read_op = lambda: struct.value
elif struct.mem_poll == 1: read_op = lambda: to_mv(struct.addr, 4).cast('I')[0]
if struct.mem_poll == 0: mval = struct.value & struct.mask
elif struct.mem_poll == 1: mval = to_mv(struct.addr, 4).cast('I')[0] & struct.mask
if struct.func == WAIT_REG_MEM_FUNCTION_GEQ: cmp = lambda x,y: x >= y
elif struct.func == WAIT_REG_MEM_FUNCTION_EQ: cmp = lambda x,y: x == y
elif struct.func == WAIT_REG_MEM_FUNCTION_ALWAYS: cmp = lambda x,y: True
if struct.func == WAIT_REG_MEM_FUNCTION_GEQ: can_cont = bool(mval >= struct.value)
elif struct.func == WAIT_REG_MEM_FUNCTION_EQ: can_cont = bool(mval == struct.value)
elif struct.func == WAIT_REG_MEM_FUNCTION_ALWAYS: can_cont = True
else: raise RuntimeError(f"Do not support {struct.func=}")
mval = read_op() & struct.mask
if not cmp(mval, struct.value): return False
if not can_cont: return False
self.rptr[0] += ctypes.sizeof(struct)
return True

View File

@@ -69,14 +69,15 @@ class DirFileDesc(VirtFileDesc):
return 0
@dataclass(frozen=True)
class VirtFile():
class VirtFile:
path: str
fdcls: Any # TODO: fix this Union[VirtFileDesc, functools.partial[VirtFileDesc]]
@staticmethod
def build_fstat(st_dev=0x20, st_ino=0x100000, st_mode=0o100777, st_nlink=1, st_uid=0, st_gid=0, st_rdev=0, st_size=0,
st_blksize=4096, st_blocks=0, st_atime=0, st_mtime=0, st_ctime=0):
assert (ssz:=struct.calcsize(fmt_string:='QQQIIIQQiQqqq')) == 96, f"{ssz} != 96"
fmt_string = 'QQQIIIQQiQqqq'
assert (ssz:=struct.calcsize(fmt_string)) == 96, f"{ssz} != 96"
return struct.pack(fmt_string, st_dev, st_ino, st_nlink, st_mode, st_uid, st_gid,
st_rdev, st_size, st_blksize, st_blocks, st_atime, st_mtime, st_ctime)

View File

@@ -1,7 +1,7 @@
import ctypes, ctypes.util, struct, platform, pathlib, re, time, os, builtins, atexit
from extra.mockgpu.nv.nvdriver import NVDriver
from extra.mockgpu.amd.amddriver import AMDDriver
from tinygrad.helpers import from_mv, to_mv
import ctypes, ctypes.util, struct, platform, time, os, builtins, atexit
from test.mockgpu.nv.nvdriver import NVDriver
from test.mockgpu.amd.amddriver import AMDDriver
from tinygrad.helpers import to_mv
start = time.perf_counter()
# *** ioctl lib ***

View File

@@ -1,9 +1,9 @@
import pathlib, re, ctypes, mmap, collections, struct, functools, os, copy
import ctypes, mmap, collections, functools
import tinygrad.runtime.autogen.nv_gpu as nv_gpu
from typing import Optional, Any
from typing import Any
from tinygrad.helpers import to_mv
from extra.mockgpu.driver import VirtDriver, VirtFileDesc, TextFileDesc, DirFileDesc, VirtFile
from extra.mockgpu.nv.nvgpu import NVGPU
from test.mockgpu.driver import VirtDriver, VirtFileDesc, VirtFile
from test.mockgpu.nv.nvgpu import NVGPU
MAP_FIXED = 0x10
libc = ctypes.CDLL(ctypes.util.find_library("c"))
@@ -45,7 +45,8 @@ class NVDevFileDesc(VirtFileDesc):
def ioctl(self, fd, request, argp): return self.driver.dev_ioctl(self.gpu, request, argp)
def mmap(self, start, sz, prot, flags, fd, offset):
start = libc.mmap(start, sz, prot, flags|mmap.MAP_ANONYMOUS, -1, 0)
if self._mapping_userland: self.driver.track_address(start, start+sz, lambda mv,off: None, lambda mv, off: self.driver._gpu_mmio_write(mv, off, self.gpu))
if self._mapping_userland:
self.driver.track_address(start, start+sz, lambda mv,off: None, lambda mv, off: self.driver._gpu_mmio_write(mv, off, self.gpu))
return start
class NVDriver(VirtDriver):
@@ -88,7 +89,7 @@ class NVDriver(VirtDriver):
def rm_alloc(self, argp):
struct = nv_gpu.NVOS21_PARAMETERS.from_address(argp)
params_ptr = struct.pAllocParms if struct.pAllocParms else None
params_ptr = struct.pAllocParms
if struct.hClass == nv_gpu.NV01_ROOT_CLIENT: self.root_handle = struct.hObjectNew = self._alloc_handle()
elif struct.hClass == nv_gpu.NV01_DEVICE_0:
params:Any = nv_gpu.NV0080_ALLOC_PARAMETERS.from_address(params_ptr)
@@ -137,7 +138,7 @@ class NVDriver(VirtDriver):
def rm_control(self, argp):
struct = nv_gpu.NVOS54_PARAMETERS.from_address(argp)
params_ptr = struct.params if struct.params else None
params_ptr = struct.params
if struct.cmd == nv_gpu.NV0000_CTRL_CMD_GPU_GET_ID_INFO_V2:
params:Any = nv_gpu.NV0000_CTRL_GPU_GET_ID_INFO_V2_PARAMS.from_address(params_ptr)
params.deviceInstance = params.gpuId # emulate them to be the same
@@ -171,7 +172,7 @@ class NVDriver(VirtDriver):
assert struct.hObject in self.object_by_handle and isinstance(self.object_by_handle[struct.hObject], NVSubDevice)
gpu = self.object_by_handle[struct.hObject].device
params = nv_gpu.NV2080_CTRL_GPU_GET_GID_INFO_PARAMS.from_address(params_ptr)
if params.flags != nv_gpu.NV2080_GPU_CMD_GPU_GET_GID_FLAGS_FORMAT_BINARY: raise RuntimeError(f"Unknown format")
if params.flags != nv_gpu.NV2080_GPU_CMD_GPU_GET_GID_FLAGS_FORMAT_BINARY: raise RuntimeError("Unknown format")
bts = gpu.gpu_uuid(sz=params.length)
for i in range(params.length): params.data[i] = bts[i]
elif struct.cmd == nv_gpu.NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN:
@@ -243,5 +244,5 @@ class NVDriver(VirtDriver):
any_progress = False
for gpu in self.gpus.values():
for q in gpu.queues:
if (prev_rptr:=q.ctrl.GPGet) != q.ctrl.GPPut:
if q.ctrl.GPGet != q.ctrl.GPPut:
any_progress |= q.execute()

View File

@@ -1,7 +1,7 @@
import ctypes, ctypes.util, time
import tinygrad.runtime.autogen.nv_gpu as nv_gpu
from enum import Enum, auto
from extra.mockgpu.gpu import VirtGPU
from test.mockgpu.gpu import VirtGPU
from tinygrad.helpers import to_mv, init_c_struct_t
def make_qmd_struct_type():
@@ -77,7 +77,7 @@ class GPFIFO:
def execute_buf(self) -> bool:
while self.buf_ptr < self.buf_sz:
init_off = self.buf_ptr
typ, size, subc, mthd = self._next_header()
_, size, _, mthd = self._next_header()
cmd_end_off = self.buf_ptr + size
while self.buf_ptr < cmd_end_off:
@@ -151,7 +151,7 @@ class GPFIFO:
assert lanes == 1, f"unsupported lanes > 1 in _exec_nvc6c0_dma: {lanes}"
flags = self._next_dword()
assert flags == 0x41, f"unsupported flags in _exec_nvc6c0_dma: {flags}"
typ, dsize, subc, mthd = self._next_header()
typ, dsize, _, mthd = self._next_header()
assert typ == 6 and mthd == nv_gpu.NVC6C0_LOAD_INLINE_DATA, f"Expected inline data not found after nvc6c0_dma, {typ=} {mthd=}"
copy_data = [self._next_dword() for _ in range(dsize)]
assert len(copy_data) * 4 == sz, f"different copy sizes in _exec_nvc6c0_dma: {len(copy_data) * 4} != {sz}"

View File

@@ -5,7 +5,7 @@ import unittest, importlib
class TestMockGPU(unittest.TestCase):
# https://github.com/tinygrad/tinygrad/pull/7627
def test_import_typing_extensions(self):
import extra.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import
import test.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import
import typing_extensions
importlib.reload(typing_extensions) # pytest imports typing_extension before mockgpu

View File

@@ -12,7 +12,7 @@ from tinygrad.runtime.autogen import kfd, hsa, amd_gpu, libc
from tinygrad.runtime.support.compiler_hip import AMDCompiler
from tinygrad.runtime.support.elf import elf_loader
if getenv("IOCTL"): import extra.hip_gpu_driver.hip_ioctl # noqa: F401 # pylint: disable=unused-import
if getenv("MOCKGPU"): import extra.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import
if getenv("MOCKGPU"): import test.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import
def is_usable_gpu(gpu_id):
with contextlib.suppress(OSError): return int(pathlib.Path(gpu_id).read_text()) != 0

View File

@@ -13,7 +13,7 @@ from tinygrad.runtime.support.compiler_cuda import CUDACompiler, PTXCompiler, PT
from tinygrad.runtime.autogen import nv_gpu, libc
from tinygrad.runtime.support.elf import elf_loader
if getenv("IOCTL"): import extra.nv_gpu_driver.nv_ioctl # noqa: F401 # pylint: disable=unused-import
if MOCKGPU:=getenv("MOCKGPU"): import extra.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import
if MOCKGPU:=getenv("MOCKGPU"): import test.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import
def get_error_str(status): return f"{status}: {nv_gpu.nv_status_codes.get(status, 'Unknown error')}"