diff --git a/test/external/external_fuzz_am_interrupts.py b/test/external/external_fuzz_am_interrupts.py deleted file mode 100644 index 2ed5724288..0000000000 --- a/test/external/external_fuzz_am_interrupts.py +++ /dev/null @@ -1,39 +0,0 @@ -import subprocess -import random -import time -from concurrent.futures import ThreadPoolExecutor, as_completed - -def run_test(i, full_run=False): - print(f"\rRunning iteration {i}...", end=" ", flush=True) - - p = subprocess.Popen(['python3', 'test/test_tiny.py', 'TestTiny.test_plus'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - - if not full_run: - time.sleep(random.uniform(0, 1200) / 1000) - p.kill() - _, stderr = p.communicate() - else: - _, stderr = p.communicate() - - if full_run: - stderr_text = stderr.decode() - print(stderr_text) - assert "Ran 1 test in" in stderr_text and "OK" in stderr_text - -max_workers = 4 -with ThreadPoolExecutor(max_workers=max_workers) as executor: - futures = [] - for i in range(1000000): - if i % 100 == 0: - for future in as_completed(futures): - try: future.result() - except Exception as e: - print(f"\nError in iteration: {e}") - futures = [] - - run_test(i, True) - else: - future = executor.submit(run_test, i, False) - futures.append(future) - - if len(futures) > max_workers * 2: futures = [f for f in futures if not f.done()] \ No newline at end of file diff --git a/test/external/external_fuzz_hcq_mp.py b/test/external/external_fuzz_hcq_mp.py new file mode 100644 index 0000000000..5fb68bdeaf --- /dev/null +++ b/test/external/external_fuzz_hcq_mp.py @@ -0,0 +1,44 @@ +import subprocess +import random +import time +from concurrent.futures import ProcessPoolExecutor, as_completed +from tinygrad.helpers import getenv + +# checks that HCQ drivers can be killed during operation without causing issues + +def run_test(i, full_run=False, force_ok=False): + print(f"\rRunning iteration {i}...", end=" ", flush=True) + + p = subprocess.Popen(["python3", "test/test_tiny.py", "TestTiny.test_plus"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + if not full_run: + time.sleep(random.uniform(0, 1200) / 1000.0) + p.kill() + _, stderr = p.communicate() + else: + _, stderr = p.communicate() + stderr_text = stderr.decode() + assert ("Ran 1 test in" in stderr_text and "OK" in stderr_text) or (not force_ok and "Failed to take lock file" in stderr_text), stderr_text + +if __name__ == "__main__": + max_workers = getenv("MAX_WORKERS", 4) + with ProcessPoolExecutor(max_workers=max_workers) as executor: + futures = [] + for i in range(1000000): + if i % 100 == 0: + # wait for everything we launched so far + for f in as_completed(futures): + try: + f.result() + except Exception as e: + print(f"\nError in iteration: {e}") + futures = [] + + # do a full run in the main proc + run_test(i, True, force_ok=True) + else: + futures.append(executor.submit(run_test, i, bool(getenv("FULL_RUN", 0)))) + + # keep list small + if len(futures) > max_workers * 2: + futures = [f for f in futures if not f.done()] diff --git a/tinygrad/runtime/ops_amd.py b/tinygrad/runtime/ops_amd.py index 0ee414cae7..656b7597ee 100644 --- a/tinygrad/runtime/ops_amd.py +++ b/tinygrad/runtime/ops_amd.py @@ -831,7 +831,7 @@ class PCIIface(PCIIfaceBase): class USBIface(PCIIface): def __init__(self, dev, dev_id): # pylint: disable=super-init-not-called - self.dev, self.pci_dev = dev, USBPCIDevice(f"usb:{dev_id}", bars=[0, 2, 5]) + self.dev, self.pci_dev = dev, USBPCIDevice(dev.__class__.__name__[:2], f"usb:{dev_id}", bars=[0, 2, 5]) self._setup_adev(self.pci_dev, dma_regions=[(0x200000, self.pci_dev.dma_view(0xf000, 0x80000))]) self.pci_dev.usb._pci_cacheable += [(self.pci_dev.bar_info[2].addr, self.pci_dev.bar_info[2].size)] # doorbell region is cacheable diff --git a/tinygrad/runtime/support/am/amdev.py b/tinygrad/runtime/support/am/amdev.py index 5ce913e349..3a8efd8a8e 100644 --- a/tinygrad/runtime/support/am/amdev.py +++ b/tinygrad/runtime/support/am/amdev.py @@ -1,11 +1,11 @@ from __future__ import annotations -import ctypes, collections, dataclasses, functools, os, hashlib, array +import ctypes, collections, dataclasses, functools, hashlib, array from tinygrad.helpers import mv_address, getenv, DEBUG, fetch from tinygrad.runtime.autogen.am import am from tinygrad.runtime.support.hcq import MMIOInterface from tinygrad.runtime.support.amd import AMDReg, import_module, import_asic_regs from tinygrad.runtime.support.memory import TLSFAllocator, MemoryManager -from tinygrad.runtime.support.system import System, PCIDevice, PCIDevImplBase +from tinygrad.runtime.support.system import PCIDevice, PCIDevImplBase from tinygrad.runtime.support.am.ip import AM_SOC, AM_GMC, AM_IH, AM_PSP, AM_SMU, AM_GFX, AM_SDMA AM_DEBUG = getenv("AM_DEBUG", 0) @@ -122,8 +122,6 @@ class AMDev(PCIDevImplBase): self.pci_dev, self.devfmt, self.dma_regions = pci_dev, pci_dev.pcibus, dma_regions self.vram, self.doorbell64, self.mmio = self.pci_dev.map_bar(0), self.pci_dev.map_bar(2, fmt='Q'), self.pci_dev.map_bar(5, fmt='I') - self.lock_fd = System.flock_acquire(f"am_{self.devfmt}.lock") - self._run_discovery() self._build_regs() @@ -190,7 +188,6 @@ class AMDev(PCIDevImplBase): for ip in [self.sdma, self.gfx]: ip.fini_hw() self.smu.set_clocks(level=0) self.ih.interrupt_handler() - os.close(self.lock_fd) def paddr2mc(self, paddr:int) -> int: return self.gmc.mc_base + paddr diff --git a/tinygrad/runtime/support/nv/nvdev.py b/tinygrad/runtime/support/nv/nvdev.py index 1bf6919823..c004c43b20 100644 --- a/tinygrad/runtime/support/nv/nvdev.py +++ b/tinygrad/runtime/support/nv/nvdev.py @@ -73,8 +73,6 @@ class NVDev(PCIDevImplBase): def __init__(self, pci_dev:PCIDevice): self.pci_dev, self.devfmt, self.mmio = pci_dev, pci_dev.pcibus, pci_dev.map_bar(0, fmt='I') - self.lock_fd = System.flock_acquire(f"nv_{self.devfmt}.lock") - self.smi_dev, self.is_booting = False, True self._early_init() diff --git a/tinygrad/runtime/support/system.py b/tinygrad/runtime/support/system.py index 49b1a32a37..5c49c5513d 100644 --- a/tinygrad/runtime/support/system.py +++ b/tinygrad/runtime/support/system.py @@ -165,7 +165,8 @@ class _System: System = _System() class PCIDevice: - def __init__(self, pcibus:str, bars:list[int], resize_bars:list[int]|None=None): + def __init__(self, devpref:str, pcibus:str, bars:list[int], resize_bars:list[int]|None=None): + self.lock_fd = System.flock_acquire(f"{devpref.lower()}_{pcibus.lower()}.lock") self.pcibus, self.irq_poller = pcibus, None if FileIOInterface.exists(f"/sys/bus/pci/devices/{self.pcibus}/driver"): @@ -215,7 +216,8 @@ class PCIDevice: def reset(self): os.system(f"sudo sh -c 'echo 1 > /sys/bus/pci/devices/{self.pcibus}/reset'") class APLPCIDevice(PCIDevice): - def __init__(self, pcibus:str, bars:list[int], resize_bars:list[int]|None=None): + def __init__(self, devpref:str, pcibus:str, bars:list[int], resize_bars:list[int]|None=None): + self.lock_fd = System.flock_acquire(f"{devpref.lower()}_{pcibus.lower()}.lock") self.pcibus, self.bars = pcibus, {b: System.iokit_pci_memmap(b) for b in bars} self.bar_info = {b:PCIBarInfo(0, self.bars[b].nbytes-1 if b in self.bars else 0) for b in range(6)} # NOTE: fake bar info for nv. def map_bar(self, bar:int, off:int=0, addr:int=0, size:int|None=None, fmt='B') -> MMIOInterface: return self.bars[bar].view(off, size, fmt) @@ -224,7 +226,8 @@ class APLPCIDevice(PCIDevice): def reset(self): System.iokit_pci_rpc(__TinyGPURPCReset:=2) class USBPCIDevice(PCIDevice): - def __init__(self, pcibus:str, bars:list[int], resize_bars:list[int]|None=None): + def __init__(self, devpref:str, pcibus:str, bars:list[int], resize_bars:list[int]|None=None): + self.lock_fd = System.flock_acquire(f"{devpref.lower()}_{pcibus.lower()}.lock") self.usb = ASM24Controller() self.pcibus, self.bar_info = pcibus, System.pci_setup_usb_bars(self.usb, gpu_bus=4, mem_base=0x10000000, pref_mem_base=(32 << 30)) def map_bar(self, bar, off=0, addr=0, size=None, fmt='B'): @@ -247,7 +250,7 @@ class LNXPCIIfaceBase: # Acquire va range to avoid collisions. FileIOInterface.anon_mmap(va_start, va_size, 0, mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED, 0) - self.pci_dev, self.dev, self.vram_bar = PCIDevice(cls.gpus[dev_id], bars=bars, resize_bars=[vram_bar]), dev, vram_bar + self.pci_dev, self.dev, self.vram_bar = PCIDevice(dev.__class__.__name__[:2], cls.gpus[dev_id], bars=bars, resize_bars=[vram_bar]), dev, vram_bar self.p2p_base_addr = self.pci_dev.bar_info[vram_bar].addr def alloc(self, size:int, host=False, uncached=False, cpu_access=False, contiguous=False, force_devmem=False, **kwargs) -> HCQBuffer: @@ -281,7 +284,7 @@ class LNXPCIIfaceBase: class APLPCIIfaceBase(LNXPCIIfaceBase): def __init__(self, dev, dev_id, vendor, devices, bars, vram_bar, va_start, va_size): - self.pci_dev, self.dev, self.vram_bar = APLPCIDevice(pcibus=f'usb4:{dev_id}', bars=bars), dev, vram_bar + self.pci_dev, self.dev, self.vram_bar = APLPCIDevice(dev.__class__.__name__[:2], pcibus=f'usb4:{dev_id}', bars=bars), dev, vram_bar def map(self, b:HCQBuffer): raise RuntimeError(f"map failed: {b.owner} -> {self.dev}") PCIIfaceBase:type = APLPCIIfaceBase if OSX else LNXPCIIfaceBase