From a647f3dd2c46c4959688058f977f933a6ec358e3 Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Tue, 24 Dec 2024 23:48:02 +0300 Subject: [PATCH] move mockgpu to tests [pr] (#8396) * move mockgpu to tests * linter * i'm so sorry * sorry, python * path --- {extra => test}/mockgpu/amd/amddriver.py | 9 ++--- {extra => test}/mockgpu/amd/amdgpu.py | 48 ++++++++++++------------ {extra => test}/mockgpu/driver.py | 5 ++- {extra => test}/mockgpu/gpu.py | 0 {extra => test}/mockgpu/mockgpu.py | 8 ++-- {extra => test}/mockgpu/nv/nvdriver.py | 19 +++++----- {extra => test}/mockgpu/nv/nvgpu.py | 6 +-- test/testextra/test_mockgpu.py | 2 +- tinygrad/runtime/ops_amd.py | 2 +- tinygrad/runtime/ops_nv.py | 2 +- 10 files changed, 50 insertions(+), 51 deletions(-) rename {extra => test}/mockgpu/amd/amddriver.py (94%) rename {extra => test}/mockgpu/amd/amdgpu.py (87%) rename {extra => test}/mockgpu/driver.py (96%) rename {extra => test}/mockgpu/gpu.py (100%) rename {extra => test}/mockgpu/mockgpu.py (97%) rename {extra => test}/mockgpu/nv/nvdriver.py (95%) rename {extra => test}/mockgpu/nv/nvgpu.py (98%) diff --git a/extra/mockgpu/amd/amddriver.py b/test/mockgpu/amd/amddriver.py similarity index 94% rename from extra/mockgpu/amd/amddriver.py rename to test/mockgpu/amd/amddriver.py index 07b41fbfab..2005dc0fef 100644 --- a/extra/mockgpu/amd/amddriver.py +++ b/test/mockgpu/amd/amddriver.py @@ -1,9 +1,8 @@ -import pathlib, re, ctypes, mmap, collections, struct, functools, os, copy +import pathlib, re, ctypes, mmap, collections, functools, copy import tinygrad.runtime.autogen.kfd as kfd -from typing import Optional, Any from tinygrad.helpers import from_mv -from extra.mockgpu.driver import VirtDriver, VirtFileDesc, TextFileDesc, DirFileDesc, VirtFile -from extra.mockgpu.amd.amdgpu import AMDGPU, gpu_props +from test.mockgpu.driver import VirtDriver, VirtFileDesc, TextFileDesc, DirFileDesc, VirtFile +from test.mockgpu.amd.amdgpu import AMDGPU, gpu_props libc = ctypes.CDLL(ctypes.util.find_library("c")) libc.mmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long] @@ -13,7 +12,7 @@ def ioctls_from_header(): # hdrpy = (pathlib.Path(__file__).parent.parent.parent.parent / "tinygrad" / "runtime" / "autogen" / "kfd.py").read_text() # pattern = r'# (AMDKFD_IOC_[A-Z0-9_]+)\s=\s_(IOW?R?).*\(( 0x[0-9a-fA-F]+) ,\s+struct\s([A-Za-z0-9_]+)\s+\)' # matches = re.findall(pattern, hdrpy, re.MULTILINE) - hdr = (pathlib.Path(__file__).parent.parent.parent / "hip_gpu_driver" / "kfd_ioctl.h").read_text().replace("\\\n", "") + hdr = (pathlib.Path(__file__).parent.parent.parent.parent / "extra" / "hip_gpu_driver" / "kfd_ioctl.h").read_text().replace("\\\n", "") pattern = r'#define\s+(AMDKFD_IOC_[A-Z0-9_]+)\s+AMDKFD_(IOW?R?)\((0x[0-9a-fA-F]+),\s+struct\s([A-Za-z0-9_]+)\)' matches = re.findall(pattern, hdr, re.MULTILINE) return type("KFD_IOCTLS", (object, ), {name: int(nr, 0x10) for name, _, nr, _ in matches}), \ diff --git a/extra/mockgpu/amd/amdgpu.py b/test/mockgpu/amd/amdgpu.py similarity index 87% rename from extra/mockgpu/amd/amdgpu.py rename to test/mockgpu/amd/amdgpu.py index b9cd7dcfe6..1a12061310 100644 --- a/extra/mockgpu/amd/amdgpu.py +++ b/test/mockgpu/amd/amdgpu.py @@ -1,5 +1,5 @@ import ctypes, time -from extra.mockgpu.gpu import VirtGPU +from test.mockgpu.gpu import VirtGPU from tinygrad.helpers import to_mv, init_c_struct_t, mv_address import tinygrad.runtime.autogen.amd_gpu as amd_gpu @@ -25,7 +25,8 @@ def _try_dlopen_remu(): try: remu = ctypes.CDLL(path) remu.run_asm.restype = ctypes.c_int32 - remu.run_asm.argtypes = [ctypes.c_void_p, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_void_p] + remu.run_asm.argtypes = [ctypes.c_void_p, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, + ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_void_p] except OSError: pass else: return remu print("Could not find libremu.so") @@ -55,7 +56,7 @@ def create_sdma_packets(): return type("SDMA_PKTS", (object, ), structs) sdma_pkts = create_sdma_packets() -class AMDQueue(): +class AMDQueue: def __init__(self, base, size, rptr, wptr): self.queue, self.size = to_mv(base, size).cast("I"), size self.rptr = to_mv(rptr, 8).cast("Q") @@ -98,14 +99,14 @@ class PM4Executor(AMDQueue): mem_event_type = (self._next_dword() >> 0) & 0xff selectors = self._next_dword() mem_data_sel = (selectors >> 29) & 0b111 - int_sel = (selectors >> 24) & 0b11 - mem_dst_sel = (selectors >> 16) & 0b1 + # int_sel = (selectors >> 24) & 0b11 + # mem_dst_sel = (selectors >> 16) & 0b1 addr_lo = self._next_dword() addr_hi = self._next_dword() val_lo = self._next_dword() val_hi = self._next_dword() val = val_lo + (val_hi << 32) - ev = self._next_dword() + _ = self._next_dword() # ev ptr = to_mv(addr_lo + (addr_hi << 32), 8) if mem_data_sel == 1 or mem_data_sel == 2: ptr.cast('Q')[0] = val @@ -120,24 +121,22 @@ class PM4Executor(AMDQueue): addr_lo = self._next_dword() addr_hi = self._next_dword() val = self._next_dword() - mask = self._next_dword() - timeout = self._next_dword() + _ = self._next_dword() # mask + _ = self._next_dword() # timeout mem_function = (info >> 0) & 0b111 mem_space = (info >> 4) & 0b1 - mem_op = (info >> 6) & 0b1 - mem_engine = (info >> 8) & 0b1 + _ = (info >> 6) & 0b1 # memop + _ = (info >> 8) & 0b1 # mem_engine - if mem_space == 0: read_op = lambda: val - elif mem_space == 1: read_op = lambda: to_mv(addr_lo + (addr_hi << 32), 4).cast('I')[0] + if mem_space == 0: mval = val + elif mem_space == 1: mval = to_mv(addr_lo + (addr_hi << 32), 4).cast('I')[0] - if mem_function == WAIT_REG_MEM_FUNCTION_GEQ: cmp = lambda x,y: x >= y - elif mem_function == WAIT_REG_MEM_FUNCTION_EQ: cmp = lambda x,y: x == y + if mem_function == WAIT_REG_MEM_FUNCTION_GEQ: can_cont = bool(mval >= val) + elif mem_function == WAIT_REG_MEM_FUNCTION_EQ: can_cont = bool(mval == val) else: raise RuntimeError(f"Do not support {mem_function=}") - mval = read_op() - can_cont = cmp(mval, val) - if not can_cont: self.rptr[0] = self.rptr[0] - 7 # revert packet, need to wait again + if not can_cont: self.rptr[0] = self.rptr[0] - 7 # revert this packet, need to wait again return can_cont def _exec_set_sh_reg(self, n): @@ -149,7 +148,7 @@ class PM4Executor(AMDQueue): def _exec_dispatch_direct(self, n): assert n == 3 gl = [self._next_dword() for _ in range(3)] - flags = self._next_dword() + _ = self._next_dword() # flags prg_addr = (self.gpu.regs[regCOMPUTE_PGM_LO] + (self.gpu.regs[regCOMPUTE_PGM_LO + 1] << 32)) << 8 args_addr = self.gpu.regs[regCOMPUTE_USER_DATA_0] + (self.gpu.regs[regCOMPUTE_USER_DATA_0 + 1] << 32) @@ -211,16 +210,15 @@ class SDMAExecutor(AMDQueue): def _execute_poll_regmem(self): struct = sdma_pkts.poll_regmem.from_address(self.base + self.rptr[0] % self.size) - if struct.mem_poll == 0: read_op = lambda: struct.value - elif struct.mem_poll == 1: read_op = lambda: to_mv(struct.addr, 4).cast('I')[0] + if struct.mem_poll == 0: mval = struct.value & struct.mask + elif struct.mem_poll == 1: mval = to_mv(struct.addr, 4).cast('I')[0] & struct.mask - if struct.func == WAIT_REG_MEM_FUNCTION_GEQ: cmp = lambda x,y: x >= y - elif struct.func == WAIT_REG_MEM_FUNCTION_EQ: cmp = lambda x,y: x == y - elif struct.func == WAIT_REG_MEM_FUNCTION_ALWAYS: cmp = lambda x,y: True + if struct.func == WAIT_REG_MEM_FUNCTION_GEQ: can_cont = bool(mval >= struct.value) + elif struct.func == WAIT_REG_MEM_FUNCTION_EQ: can_cont = bool(mval == struct.value) + elif struct.func == WAIT_REG_MEM_FUNCTION_ALWAYS: can_cont = True else: raise RuntimeError(f"Do not support {struct.func=}") - mval = read_op() & struct.mask - if not cmp(mval, struct.value): return False + if not can_cont: return False self.rptr[0] += ctypes.sizeof(struct) return True diff --git a/extra/mockgpu/driver.py b/test/mockgpu/driver.py similarity index 96% rename from extra/mockgpu/driver.py rename to test/mockgpu/driver.py index e820657010..83891bde40 100644 --- a/extra/mockgpu/driver.py +++ b/test/mockgpu/driver.py @@ -69,14 +69,15 @@ class DirFileDesc(VirtFileDesc): return 0 @dataclass(frozen=True) -class VirtFile(): +class VirtFile: path: str fdcls: Any # TODO: fix this Union[VirtFileDesc, functools.partial[VirtFileDesc]] @staticmethod def build_fstat(st_dev=0x20, st_ino=0x100000, st_mode=0o100777, st_nlink=1, st_uid=0, st_gid=0, st_rdev=0, st_size=0, st_blksize=4096, st_blocks=0, st_atime=0, st_mtime=0, st_ctime=0): - assert (ssz:=struct.calcsize(fmt_string:='QQQIIIQQiQqqq')) == 96, f"{ssz} != 96" + fmt_string = 'QQQIIIQQiQqqq' + assert (ssz:=struct.calcsize(fmt_string)) == 96, f"{ssz} != 96" return struct.pack(fmt_string, st_dev, st_ino, st_nlink, st_mode, st_uid, st_gid, st_rdev, st_size, st_blksize, st_blocks, st_atime, st_mtime, st_ctime) diff --git a/extra/mockgpu/gpu.py b/test/mockgpu/gpu.py similarity index 100% rename from extra/mockgpu/gpu.py rename to test/mockgpu/gpu.py diff --git a/extra/mockgpu/mockgpu.py b/test/mockgpu/mockgpu.py similarity index 97% rename from extra/mockgpu/mockgpu.py rename to test/mockgpu/mockgpu.py index 4a118dce91..9f120df0e7 100644 --- a/extra/mockgpu/mockgpu.py +++ b/test/mockgpu/mockgpu.py @@ -1,7 +1,7 @@ -import ctypes, ctypes.util, struct, platform, pathlib, re, time, os, builtins, atexit -from extra.mockgpu.nv.nvdriver import NVDriver -from extra.mockgpu.amd.amddriver import AMDDriver -from tinygrad.helpers import from_mv, to_mv +import ctypes, ctypes.util, struct, platform, time, os, builtins, atexit +from test.mockgpu.nv.nvdriver import NVDriver +from test.mockgpu.amd.amddriver import AMDDriver +from tinygrad.helpers import to_mv start = time.perf_counter() # *** ioctl lib *** diff --git a/extra/mockgpu/nv/nvdriver.py b/test/mockgpu/nv/nvdriver.py similarity index 95% rename from extra/mockgpu/nv/nvdriver.py rename to test/mockgpu/nv/nvdriver.py index eddd720d38..a347f5c613 100644 --- a/extra/mockgpu/nv/nvdriver.py +++ b/test/mockgpu/nv/nvdriver.py @@ -1,9 +1,9 @@ -import pathlib, re, ctypes, mmap, collections, struct, functools, os, copy +import ctypes, mmap, collections, functools import tinygrad.runtime.autogen.nv_gpu as nv_gpu -from typing import Optional, Any +from typing import Any from tinygrad.helpers import to_mv -from extra.mockgpu.driver import VirtDriver, VirtFileDesc, TextFileDesc, DirFileDesc, VirtFile -from extra.mockgpu.nv.nvgpu import NVGPU +from test.mockgpu.driver import VirtDriver, VirtFileDesc, VirtFile +from test.mockgpu.nv.nvgpu import NVGPU MAP_FIXED = 0x10 libc = ctypes.CDLL(ctypes.util.find_library("c")) @@ -45,7 +45,8 @@ class NVDevFileDesc(VirtFileDesc): def ioctl(self, fd, request, argp): return self.driver.dev_ioctl(self.gpu, request, argp) def mmap(self, start, sz, prot, flags, fd, offset): start = libc.mmap(start, sz, prot, flags|mmap.MAP_ANONYMOUS, -1, 0) - if self._mapping_userland: self.driver.track_address(start, start+sz, lambda mv,off: None, lambda mv, off: self.driver._gpu_mmio_write(mv, off, self.gpu)) + if self._mapping_userland: + self.driver.track_address(start, start+sz, lambda mv,off: None, lambda mv, off: self.driver._gpu_mmio_write(mv, off, self.gpu)) return start class NVDriver(VirtDriver): @@ -88,7 +89,7 @@ class NVDriver(VirtDriver): def rm_alloc(self, argp): struct = nv_gpu.NVOS21_PARAMETERS.from_address(argp) - params_ptr = struct.pAllocParms if struct.pAllocParms else None + params_ptr = struct.pAllocParms if struct.hClass == nv_gpu.NV01_ROOT_CLIENT: self.root_handle = struct.hObjectNew = self._alloc_handle() elif struct.hClass == nv_gpu.NV01_DEVICE_0: params:Any = nv_gpu.NV0080_ALLOC_PARAMETERS.from_address(params_ptr) @@ -137,7 +138,7 @@ class NVDriver(VirtDriver): def rm_control(self, argp): struct = nv_gpu.NVOS54_PARAMETERS.from_address(argp) - params_ptr = struct.params if struct.params else None + params_ptr = struct.params if struct.cmd == nv_gpu.NV0000_CTRL_CMD_GPU_GET_ID_INFO_V2: params:Any = nv_gpu.NV0000_CTRL_GPU_GET_ID_INFO_V2_PARAMS.from_address(params_ptr) params.deviceInstance = params.gpuId # emulate them to be the same @@ -171,7 +172,7 @@ class NVDriver(VirtDriver): assert struct.hObject in self.object_by_handle and isinstance(self.object_by_handle[struct.hObject], NVSubDevice) gpu = self.object_by_handle[struct.hObject].device params = nv_gpu.NV2080_CTRL_GPU_GET_GID_INFO_PARAMS.from_address(params_ptr) - if params.flags != nv_gpu.NV2080_GPU_CMD_GPU_GET_GID_FLAGS_FORMAT_BINARY: raise RuntimeError(f"Unknown format") + if params.flags != nv_gpu.NV2080_GPU_CMD_GPU_GET_GID_FLAGS_FORMAT_BINARY: raise RuntimeError("Unknown format") bts = gpu.gpu_uuid(sz=params.length) for i in range(params.length): params.data[i] = bts[i] elif struct.cmd == nv_gpu.NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN: @@ -243,5 +244,5 @@ class NVDriver(VirtDriver): any_progress = False for gpu in self.gpus.values(): for q in gpu.queues: - if (prev_rptr:=q.ctrl.GPGet) != q.ctrl.GPPut: + if q.ctrl.GPGet != q.ctrl.GPPut: any_progress |= q.execute() \ No newline at end of file diff --git a/extra/mockgpu/nv/nvgpu.py b/test/mockgpu/nv/nvgpu.py similarity index 98% rename from extra/mockgpu/nv/nvgpu.py rename to test/mockgpu/nv/nvgpu.py index 3bb401de5f..8a7f8e3074 100644 --- a/extra/mockgpu/nv/nvgpu.py +++ b/test/mockgpu/nv/nvgpu.py @@ -1,7 +1,7 @@ import ctypes, ctypes.util, time import tinygrad.runtime.autogen.nv_gpu as nv_gpu from enum import Enum, auto -from extra.mockgpu.gpu import VirtGPU +from test.mockgpu.gpu import VirtGPU from tinygrad.helpers import to_mv, init_c_struct_t def make_qmd_struct_type(): @@ -77,7 +77,7 @@ class GPFIFO: def execute_buf(self) -> bool: while self.buf_ptr < self.buf_sz: init_off = self.buf_ptr - typ, size, subc, mthd = self._next_header() + _, size, _, mthd = self._next_header() cmd_end_off = self.buf_ptr + size while self.buf_ptr < cmd_end_off: @@ -151,7 +151,7 @@ class GPFIFO: assert lanes == 1, f"unsupported lanes > 1 in _exec_nvc6c0_dma: {lanes}" flags = self._next_dword() assert flags == 0x41, f"unsupported flags in _exec_nvc6c0_dma: {flags}" - typ, dsize, subc, mthd = self._next_header() + typ, dsize, _, mthd = self._next_header() assert typ == 6 and mthd == nv_gpu.NVC6C0_LOAD_INLINE_DATA, f"Expected inline data not found after nvc6c0_dma, {typ=} {mthd=}" copy_data = [self._next_dword() for _ in range(dsize)] assert len(copy_data) * 4 == sz, f"different copy sizes in _exec_nvc6c0_dma: {len(copy_data) * 4} != {sz}" diff --git a/test/testextra/test_mockgpu.py b/test/testextra/test_mockgpu.py index ec6b76a3d4..0f1d616480 100644 --- a/test/testextra/test_mockgpu.py +++ b/test/testextra/test_mockgpu.py @@ -5,7 +5,7 @@ import unittest, importlib class TestMockGPU(unittest.TestCase): # https://github.com/tinygrad/tinygrad/pull/7627 def test_import_typing_extensions(self): - import extra.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import + import test.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import import typing_extensions importlib.reload(typing_extensions) # pytest imports typing_extension before mockgpu diff --git a/tinygrad/runtime/ops_amd.py b/tinygrad/runtime/ops_amd.py index 8619aaa2ac..7a20d1c0db 100644 --- a/tinygrad/runtime/ops_amd.py +++ b/tinygrad/runtime/ops_amd.py @@ -12,7 +12,7 @@ from tinygrad.runtime.autogen import kfd, hsa, amd_gpu, libc from tinygrad.runtime.support.compiler_hip import AMDCompiler from tinygrad.runtime.support.elf import elf_loader if getenv("IOCTL"): import extra.hip_gpu_driver.hip_ioctl # noqa: F401 # pylint: disable=unused-import -if getenv("MOCKGPU"): import extra.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import +if getenv("MOCKGPU"): import test.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import def is_usable_gpu(gpu_id): with contextlib.suppress(OSError): return int(pathlib.Path(gpu_id).read_text()) != 0 diff --git a/tinygrad/runtime/ops_nv.py b/tinygrad/runtime/ops_nv.py index d2395e97db..9fa1cc9513 100644 --- a/tinygrad/runtime/ops_nv.py +++ b/tinygrad/runtime/ops_nv.py @@ -13,7 +13,7 @@ from tinygrad.runtime.support.compiler_cuda import CUDACompiler, PTXCompiler, PT from tinygrad.runtime.autogen import nv_gpu, libc from tinygrad.runtime.support.elf import elf_loader if getenv("IOCTL"): import extra.nv_gpu_driver.nv_ioctl # noqa: F401 # pylint: disable=unused-import -if MOCKGPU:=getenv("MOCKGPU"): import extra.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import +if MOCKGPU:=getenv("MOCKGPU"): import test.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import def get_error_str(status): return f"{status}: {nv_gpu.nv_status_codes.get(status, 'Unknown error')}"