Files
tinygrad/test/mockgpu/helpers.py
George Hotz 25ef866e89 write python emulator from RDNA3 psuedocode in pdf (#13841)
* write python emulator from RDNA3 psuedocode in pdf

* emu2

* more emu

* working

* more psueod

* progress

* cleanups

* delete junk

* delete stale files

* just emu

* work

* emu compare

* bemu

* cleanups and more failures

* revert bench emu

* fix emu cmp

* four tests fail

* bugfixes

* dsl

* ext

* refactor

* dsl

* div scale fix

* test_emu

* fix emu tests

* pcode

* test pcode

* top imports

* fix test_emu to use run_asm

* emu tests on real hardware

* more tests

* more emu tests

* more

* work

* work

* bug fix

* bugfixes

* fix fp16 gemm

* all ops tests pass in emulator

* fix llvm tests

* fix a few more tests

* fix mockgpu timeout
2025-12-29 07:39:53 -05:00

45 lines
2.2 KiB
Python

import ctypes, ctypes.util
from tinygrad.helpers import getenv
def _try_dlopen_gpuocelot():
GPUOCELOT_PATHS = [ctypes.util.find_library("gpuocelot")] if ctypes.util.find_library("gpuocelot") is not None else []
GPUOCELOT_PATHS += ["libgpuocelot.so", "/usr/local/lib/libgpuocelot.so",
"libgpuocelot.dylib", "/usr/local/lib/libgpuocelot.dylib", "/opt/homebrew/lib/libgpuocelot.dylib"]
for path in GPUOCELOT_PATHS:
try:
gpuocelot_lib = ctypes.CDLL(path)
gpuocelot_lib.ptx_run.argtypes = [ctypes.c_char_p, ctypes.c_int, ctypes.POINTER(ctypes.c_void_p), ctypes.c_int, ctypes.c_int,
ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int]
except OSError: pass
else: return gpuocelot_lib
print("Could not find libgpuocelot.so")
return None
class PythonRemu:
"""Python RDNA3 emulator wrapper that matches the libremu.so interface."""
valid_mem_ranges: set[tuple[int, int]] = set()
rsrc2: int = 0x19c # Default: USER_SGPR_COUNT=14, enable X and Y workgroup IDs
def run_asm(self, lib: int, lib_sz: int, gx: int, gy: int, gz: int, lx: int, ly: int, lz: int, args_ptr: int) -> int:
from extra.assembly.rdna3.emu import run_asm, set_valid_mem_ranges
# Pad ranges to handle GPU loads that may read past small buffers (e.g. s_load_b128 on 12-byte buffer)
set_valid_mem_ranges({(start, size + 4096) for start, size in self.valid_mem_ranges})
return run_asm(lib, lib_sz, gx, gy, gz, lx, ly, lz, args_ptr, self.rsrc2)
def _try_dlopen_remu():
# Use Python emulator only if PYTHON_REMU=1
if getenv("PYTHON_REMU"):
return PythonRemu()
REMU_PATHS = ["extra/remu/target/release/libremu.so", "libremu.so", "/usr/local/lib/libremu.so",
"extra/remu/target/release/libremu.dylib", "libremu.dylib", "/usr/local/lib/libremu.dylib", "/opt/homebrew/lib/libremu.dylib"]
for path in REMU_PATHS:
try:
remu = ctypes.CDLL(path)
remu.run_asm.restype = ctypes.c_int32
remu.run_asm.argtypes = [ctypes.c_void_p, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32,
ctypes.c_uint32, ctypes.c_uint32, ctypes.c_uint32, ctypes.c_void_p]
except OSError: pass
else: return remu
print("Could not find libremu.so")
return None