mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-08 22:48:25 -05:00
move mockgpu to tests [pr] (#8396)
* move mockgpu to tests * linter * i'm so sorry * sorry, python * path
This commit is contained in:
@@ -1,9 +1,8 @@
|
||||
import pathlib, re, ctypes, mmap, collections, struct, functools, os, copy
|
||||
import pathlib, re, ctypes, mmap, collections, functools, copy
|
||||
import tinygrad.runtime.autogen.kfd as kfd
|
||||
from typing import Optional, Any
|
||||
from tinygrad.helpers import from_mv
|
||||
from extra.mockgpu.driver import VirtDriver, VirtFileDesc, TextFileDesc, DirFileDesc, VirtFile
|
||||
from extra.mockgpu.amd.amdgpu import AMDGPU, gpu_props
|
||||
from test.mockgpu.driver import VirtDriver, VirtFileDesc, TextFileDesc, DirFileDesc, VirtFile
|
||||
from test.mockgpu.amd.amdgpu import AMDGPU, gpu_props
|
||||
|
||||
libc = ctypes.CDLL(ctypes.util.find_library("c"))
|
||||
libc.mmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long]
|
||||
@@ -13,7 +12,7 @@ def ioctls_from_header():
|
||||
# hdrpy = (pathlib.Path(__file__).parent.parent.parent.parent / "tinygrad" / "runtime" / "autogen" / "kfd.py").read_text()
|
||||
# pattern = r'# (AMDKFD_IOC_[A-Z0-9_]+)\s=\s_(IOW?R?).*\(( 0x[0-9a-fA-F]+) ,\s+struct\s([A-Za-z0-9_]+)\s+\)'
|
||||
# matches = re.findall(pattern, hdrpy, re.MULTILINE)
|
||||
hdr = (pathlib.Path(__file__).parent.parent.parent / "hip_gpu_driver" / "kfd_ioctl.h").read_text().replace("\\\n", "")
|
||||
hdr = (pathlib.Path(__file__).parent.parent.parent.parent / "extra" / "hip_gpu_driver" / "kfd_ioctl.h").read_text().replace("\\\n", "")
|
||||
pattern = r'#define\s+(AMDKFD_IOC_[A-Z0-9_]+)\s+AMDKFD_(IOW?R?)\((0x[0-9a-fA-F]+),\s+struct\s([A-Za-z0-9_]+)\)'
|
||||
matches = re.findall(pattern, hdr, re.MULTILINE)
|
||||
return type("KFD_IOCTLS", (object, ), {name: int(nr, 0x10) for name, _, nr, _ in matches}), \
|
||||
@@ -1,5 +1,5 @@
|
||||
import ctypes, time
|
||||
from extra.mockgpu.gpu import VirtGPU
|
||||
from test.mockgpu.gpu import VirtGPU
|
||||
from tinygrad.helpers import to_mv, init_c_struct_t, mv_address
|
||||
import tinygrad.runtime.autogen.amd_gpu as amd_gpu
|
||||
|
||||
@@ -25,7 +25,8 @@ def _try_dlopen_remu():
|
||||
try:
|
||||
remu = ctypes.CDLL(path)
|
||||
remu.run_asm.restype = ctypes.c_int32
|
||||
remu.run_asm.argtypes = [ctypes.c_void_p, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_void_p]
|
||||
remu.run_asm.argtypes = [ctypes.c_void_p, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32,
|
||||
ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_void_p]
|
||||
except OSError: pass
|
||||
else: return remu
|
||||
print("Could not find libremu.so")
|
||||
@@ -55,7 +56,7 @@ def create_sdma_packets():
|
||||
return type("SDMA_PKTS", (object, ), structs)
|
||||
sdma_pkts = create_sdma_packets()
|
||||
|
||||
class AMDQueue():
|
||||
class AMDQueue:
|
||||
def __init__(self, base, size, rptr, wptr):
|
||||
self.queue, self.size = to_mv(base, size).cast("I"), size
|
||||
self.rptr = to_mv(rptr, 8).cast("Q")
|
||||
@@ -98,14 +99,14 @@ class PM4Executor(AMDQueue):
|
||||
mem_event_type = (self._next_dword() >> 0) & 0xff
|
||||
selectors = self._next_dword()
|
||||
mem_data_sel = (selectors >> 29) & 0b111
|
||||
int_sel = (selectors >> 24) & 0b11
|
||||
mem_dst_sel = (selectors >> 16) & 0b1
|
||||
# int_sel = (selectors >> 24) & 0b11
|
||||
# mem_dst_sel = (selectors >> 16) & 0b1
|
||||
addr_lo = self._next_dword()
|
||||
addr_hi = self._next_dword()
|
||||
val_lo = self._next_dword()
|
||||
val_hi = self._next_dword()
|
||||
val = val_lo + (val_hi << 32)
|
||||
ev = self._next_dword()
|
||||
_ = self._next_dword() # ev
|
||||
|
||||
ptr = to_mv(addr_lo + (addr_hi << 32), 8)
|
||||
if mem_data_sel == 1 or mem_data_sel == 2: ptr.cast('Q')[0] = val
|
||||
@@ -120,24 +121,22 @@ class PM4Executor(AMDQueue):
|
||||
addr_lo = self._next_dword()
|
||||
addr_hi = self._next_dword()
|
||||
val = self._next_dword()
|
||||
mask = self._next_dword()
|
||||
timeout = self._next_dword()
|
||||
_ = self._next_dword() # mask
|
||||
_ = self._next_dword() # timeout
|
||||
|
||||
mem_function = (info >> 0) & 0b111
|
||||
mem_space = (info >> 4) & 0b1
|
||||
mem_op = (info >> 6) & 0b1
|
||||
mem_engine = (info >> 8) & 0b1
|
||||
_ = (info >> 6) & 0b1 # memop
|
||||
_ = (info >> 8) & 0b1 # mem_engine
|
||||
|
||||
if mem_space == 0: read_op = lambda: val
|
||||
elif mem_space == 1: read_op = lambda: to_mv(addr_lo + (addr_hi << 32), 4).cast('I')[0]
|
||||
if mem_space == 0: mval = val
|
||||
elif mem_space == 1: mval = to_mv(addr_lo + (addr_hi << 32), 4).cast('I')[0]
|
||||
|
||||
if mem_function == WAIT_REG_MEM_FUNCTION_GEQ: cmp = lambda x,y: x >= y
|
||||
elif mem_function == WAIT_REG_MEM_FUNCTION_EQ: cmp = lambda x,y: x == y
|
||||
if mem_function == WAIT_REG_MEM_FUNCTION_GEQ: can_cont = bool(mval >= val)
|
||||
elif mem_function == WAIT_REG_MEM_FUNCTION_EQ: can_cont = bool(mval == val)
|
||||
else: raise RuntimeError(f"Do not support {mem_function=}")
|
||||
|
||||
mval = read_op()
|
||||
can_cont = cmp(mval, val)
|
||||
if not can_cont: self.rptr[0] = self.rptr[0] - 7 # revert packet, need to wait again
|
||||
if not can_cont: self.rptr[0] = self.rptr[0] - 7 # revert this packet, need to wait again
|
||||
return can_cont
|
||||
|
||||
def _exec_set_sh_reg(self, n):
|
||||
@@ -149,7 +148,7 @@ class PM4Executor(AMDQueue):
|
||||
def _exec_dispatch_direct(self, n):
|
||||
assert n == 3
|
||||
gl = [self._next_dword() for _ in range(3)]
|
||||
flags = self._next_dword()
|
||||
_ = self._next_dword() # flags
|
||||
|
||||
prg_addr = (self.gpu.regs[regCOMPUTE_PGM_LO] + (self.gpu.regs[regCOMPUTE_PGM_LO + 1] << 32)) << 8
|
||||
args_addr = self.gpu.regs[regCOMPUTE_USER_DATA_0] + (self.gpu.regs[regCOMPUTE_USER_DATA_0 + 1] << 32)
|
||||
@@ -211,16 +210,15 @@ class SDMAExecutor(AMDQueue):
|
||||
def _execute_poll_regmem(self):
|
||||
struct = sdma_pkts.poll_regmem.from_address(self.base + self.rptr[0] % self.size)
|
||||
|
||||
if struct.mem_poll == 0: read_op = lambda: struct.value
|
||||
elif struct.mem_poll == 1: read_op = lambda: to_mv(struct.addr, 4).cast('I')[0]
|
||||
if struct.mem_poll == 0: mval = struct.value & struct.mask
|
||||
elif struct.mem_poll == 1: mval = to_mv(struct.addr, 4).cast('I')[0] & struct.mask
|
||||
|
||||
if struct.func == WAIT_REG_MEM_FUNCTION_GEQ: cmp = lambda x,y: x >= y
|
||||
elif struct.func == WAIT_REG_MEM_FUNCTION_EQ: cmp = lambda x,y: x == y
|
||||
elif struct.func == WAIT_REG_MEM_FUNCTION_ALWAYS: cmp = lambda x,y: True
|
||||
if struct.func == WAIT_REG_MEM_FUNCTION_GEQ: can_cont = bool(mval >= struct.value)
|
||||
elif struct.func == WAIT_REG_MEM_FUNCTION_EQ: can_cont = bool(mval == struct.value)
|
||||
elif struct.func == WAIT_REG_MEM_FUNCTION_ALWAYS: can_cont = True
|
||||
else: raise RuntimeError(f"Do not support {struct.func=}")
|
||||
|
||||
mval = read_op() & struct.mask
|
||||
if not cmp(mval, struct.value): return False
|
||||
if not can_cont: return False
|
||||
|
||||
self.rptr[0] += ctypes.sizeof(struct)
|
||||
return True
|
||||
@@ -69,14 +69,15 @@ class DirFileDesc(VirtFileDesc):
|
||||
return 0
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VirtFile():
|
||||
class VirtFile:
|
||||
path: str
|
||||
fdcls: Any # TODO: fix this Union[VirtFileDesc, functools.partial[VirtFileDesc]]
|
||||
|
||||
@staticmethod
|
||||
def build_fstat(st_dev=0x20, st_ino=0x100000, st_mode=0o100777, st_nlink=1, st_uid=0, st_gid=0, st_rdev=0, st_size=0,
|
||||
st_blksize=4096, st_blocks=0, st_atime=0, st_mtime=0, st_ctime=0):
|
||||
assert (ssz:=struct.calcsize(fmt_string:='QQQIIIQQiQqqq')) == 96, f"{ssz} != 96"
|
||||
fmt_string = 'QQQIIIQQiQqqq'
|
||||
assert (ssz:=struct.calcsize(fmt_string)) == 96, f"{ssz} != 96"
|
||||
return struct.pack(fmt_string, st_dev, st_ino, st_nlink, st_mode, st_uid, st_gid,
|
||||
st_rdev, st_size, st_blksize, st_blocks, st_atime, st_mtime, st_ctime)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import ctypes, ctypes.util, struct, platform, pathlib, re, time, os, builtins, atexit
|
||||
from extra.mockgpu.nv.nvdriver import NVDriver
|
||||
from extra.mockgpu.amd.amddriver import AMDDriver
|
||||
from tinygrad.helpers import from_mv, to_mv
|
||||
import ctypes, ctypes.util, struct, platform, time, os, builtins, atexit
|
||||
from test.mockgpu.nv.nvdriver import NVDriver
|
||||
from test.mockgpu.amd.amddriver import AMDDriver
|
||||
from tinygrad.helpers import to_mv
|
||||
start = time.perf_counter()
|
||||
|
||||
# *** ioctl lib ***
|
||||
@@ -1,9 +1,9 @@
|
||||
import pathlib, re, ctypes, mmap, collections, struct, functools, os, copy
|
||||
import ctypes, mmap, collections, functools
|
||||
import tinygrad.runtime.autogen.nv_gpu as nv_gpu
|
||||
from typing import Optional, Any
|
||||
from typing import Any
|
||||
from tinygrad.helpers import to_mv
|
||||
from extra.mockgpu.driver import VirtDriver, VirtFileDesc, TextFileDesc, DirFileDesc, VirtFile
|
||||
from extra.mockgpu.nv.nvgpu import NVGPU
|
||||
from test.mockgpu.driver import VirtDriver, VirtFileDesc, VirtFile
|
||||
from test.mockgpu.nv.nvgpu import NVGPU
|
||||
|
||||
MAP_FIXED = 0x10
|
||||
libc = ctypes.CDLL(ctypes.util.find_library("c"))
|
||||
@@ -45,7 +45,8 @@ class NVDevFileDesc(VirtFileDesc):
|
||||
def ioctl(self, fd, request, argp): return self.driver.dev_ioctl(self.gpu, request, argp)
|
||||
def mmap(self, start, sz, prot, flags, fd, offset):
|
||||
start = libc.mmap(start, sz, prot, flags|mmap.MAP_ANONYMOUS, -1, 0)
|
||||
if self._mapping_userland: self.driver.track_address(start, start+sz, lambda mv,off: None, lambda mv, off: self.driver._gpu_mmio_write(mv, off, self.gpu))
|
||||
if self._mapping_userland:
|
||||
self.driver.track_address(start, start+sz, lambda mv,off: None, lambda mv, off: self.driver._gpu_mmio_write(mv, off, self.gpu))
|
||||
return start
|
||||
|
||||
class NVDriver(VirtDriver):
|
||||
@@ -88,7 +89,7 @@ class NVDriver(VirtDriver):
|
||||
|
||||
def rm_alloc(self, argp):
|
||||
struct = nv_gpu.NVOS21_PARAMETERS.from_address(argp)
|
||||
params_ptr = struct.pAllocParms if struct.pAllocParms else None
|
||||
params_ptr = struct.pAllocParms
|
||||
if struct.hClass == nv_gpu.NV01_ROOT_CLIENT: self.root_handle = struct.hObjectNew = self._alloc_handle()
|
||||
elif struct.hClass == nv_gpu.NV01_DEVICE_0:
|
||||
params:Any = nv_gpu.NV0080_ALLOC_PARAMETERS.from_address(params_ptr)
|
||||
@@ -137,7 +138,7 @@ class NVDriver(VirtDriver):
|
||||
|
||||
def rm_control(self, argp):
|
||||
struct = nv_gpu.NVOS54_PARAMETERS.from_address(argp)
|
||||
params_ptr = struct.params if struct.params else None
|
||||
params_ptr = struct.params
|
||||
if struct.cmd == nv_gpu.NV0000_CTRL_CMD_GPU_GET_ID_INFO_V2:
|
||||
params:Any = nv_gpu.NV0000_CTRL_GPU_GET_ID_INFO_V2_PARAMS.from_address(params_ptr)
|
||||
params.deviceInstance = params.gpuId # emulate them to be the same
|
||||
@@ -171,7 +172,7 @@ class NVDriver(VirtDriver):
|
||||
assert struct.hObject in self.object_by_handle and isinstance(self.object_by_handle[struct.hObject], NVSubDevice)
|
||||
gpu = self.object_by_handle[struct.hObject].device
|
||||
params = nv_gpu.NV2080_CTRL_GPU_GET_GID_INFO_PARAMS.from_address(params_ptr)
|
||||
if params.flags != nv_gpu.NV2080_GPU_CMD_GPU_GET_GID_FLAGS_FORMAT_BINARY: raise RuntimeError(f"Unknown format")
|
||||
if params.flags != nv_gpu.NV2080_GPU_CMD_GPU_GET_GID_FLAGS_FORMAT_BINARY: raise RuntimeError("Unknown format")
|
||||
bts = gpu.gpu_uuid(sz=params.length)
|
||||
for i in range(params.length): params.data[i] = bts[i]
|
||||
elif struct.cmd == nv_gpu.NVC36F_CTRL_CMD_GPFIFO_GET_WORK_SUBMIT_TOKEN:
|
||||
@@ -243,5 +244,5 @@ class NVDriver(VirtDriver):
|
||||
any_progress = False
|
||||
for gpu in self.gpus.values():
|
||||
for q in gpu.queues:
|
||||
if (prev_rptr:=q.ctrl.GPGet) != q.ctrl.GPPut:
|
||||
if q.ctrl.GPGet != q.ctrl.GPPut:
|
||||
any_progress |= q.execute()
|
||||
@@ -1,7 +1,7 @@
|
||||
import ctypes, ctypes.util, time
|
||||
import tinygrad.runtime.autogen.nv_gpu as nv_gpu
|
||||
from enum import Enum, auto
|
||||
from extra.mockgpu.gpu import VirtGPU
|
||||
from test.mockgpu.gpu import VirtGPU
|
||||
from tinygrad.helpers import to_mv, init_c_struct_t
|
||||
|
||||
def make_qmd_struct_type():
|
||||
@@ -77,7 +77,7 @@ class GPFIFO:
|
||||
def execute_buf(self) -> bool:
|
||||
while self.buf_ptr < self.buf_sz:
|
||||
init_off = self.buf_ptr
|
||||
typ, size, subc, mthd = self._next_header()
|
||||
_, size, _, mthd = self._next_header()
|
||||
cmd_end_off = self.buf_ptr + size
|
||||
|
||||
while self.buf_ptr < cmd_end_off:
|
||||
@@ -151,7 +151,7 @@ class GPFIFO:
|
||||
assert lanes == 1, f"unsupported lanes > 1 in _exec_nvc6c0_dma: {lanes}"
|
||||
flags = self._next_dword()
|
||||
assert flags == 0x41, f"unsupported flags in _exec_nvc6c0_dma: {flags}"
|
||||
typ, dsize, subc, mthd = self._next_header()
|
||||
typ, dsize, _, mthd = self._next_header()
|
||||
assert typ == 6 and mthd == nv_gpu.NVC6C0_LOAD_INLINE_DATA, f"Expected inline data not found after nvc6c0_dma, {typ=} {mthd=}"
|
||||
copy_data = [self._next_dword() for _ in range(dsize)]
|
||||
assert len(copy_data) * 4 == sz, f"different copy sizes in _exec_nvc6c0_dma: {len(copy_data) * 4} != {sz}"
|
||||
@@ -5,7 +5,7 @@ import unittest, importlib
|
||||
class TestMockGPU(unittest.TestCase):
|
||||
# https://github.com/tinygrad/tinygrad/pull/7627
|
||||
def test_import_typing_extensions(self):
|
||||
import extra.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import
|
||||
import test.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import
|
||||
import typing_extensions
|
||||
importlib.reload(typing_extensions) # pytest imports typing_extension before mockgpu
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ from tinygrad.runtime.autogen import kfd, hsa, amd_gpu, libc
|
||||
from tinygrad.runtime.support.compiler_hip import AMDCompiler
|
||||
from tinygrad.runtime.support.elf import elf_loader
|
||||
if getenv("IOCTL"): import extra.hip_gpu_driver.hip_ioctl # noqa: F401 # pylint: disable=unused-import
|
||||
if getenv("MOCKGPU"): import extra.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import
|
||||
if getenv("MOCKGPU"): import test.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import
|
||||
|
||||
def is_usable_gpu(gpu_id):
|
||||
with contextlib.suppress(OSError): return int(pathlib.Path(gpu_id).read_text()) != 0
|
||||
|
||||
@@ -13,7 +13,7 @@ from tinygrad.runtime.support.compiler_cuda import CUDACompiler, PTXCompiler, PT
|
||||
from tinygrad.runtime.autogen import nv_gpu, libc
|
||||
from tinygrad.runtime.support.elf import elf_loader
|
||||
if getenv("IOCTL"): import extra.nv_gpu_driver.nv_ioctl # noqa: F401 # pylint: disable=unused-import
|
||||
if MOCKGPU:=getenv("MOCKGPU"): import extra.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import
|
||||
if MOCKGPU:=getenv("MOCKGPU"): import test.mockgpu.mockgpu # noqa: F401 # pylint: disable=unused-import
|
||||
|
||||
def get_error_str(status): return f"{status}: {nv_gpu.nv_status_codes.get(status, 'Unknown error')}"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user