mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-08 22:48:25 -05:00
system: fix flock on pcidevs (#13123)
* system: fix locking of hcq devices * rename and fullrun * force ok * fix * fix
This commit is contained in:
39
test/external/external_fuzz_am_interrupts.py
vendored
39
test/external/external_fuzz_am_interrupts.py
vendored
@@ -1,39 +0,0 @@
|
||||
import subprocess
|
||||
import random
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
def run_test(i, full_run=False):
|
||||
print(f"\rRunning iteration {i}...", end=" ", flush=True)
|
||||
|
||||
p = subprocess.Popen(['python3', 'test/test_tiny.py', 'TestTiny.test_plus'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
if not full_run:
|
||||
time.sleep(random.uniform(0, 1200) / 1000)
|
||||
p.kill()
|
||||
_, stderr = p.communicate()
|
||||
else:
|
||||
_, stderr = p.communicate()
|
||||
|
||||
if full_run:
|
||||
stderr_text = stderr.decode()
|
||||
print(stderr_text)
|
||||
assert "Ran 1 test in" in stderr_text and "OK" in stderr_text
|
||||
|
||||
max_workers = 4
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
futures = []
|
||||
for i in range(1000000):
|
||||
if i % 100 == 0:
|
||||
for future in as_completed(futures):
|
||||
try: future.result()
|
||||
except Exception as e:
|
||||
print(f"\nError in iteration: {e}")
|
||||
futures = []
|
||||
|
||||
run_test(i, True)
|
||||
else:
|
||||
future = executor.submit(run_test, i, False)
|
||||
futures.append(future)
|
||||
|
||||
if len(futures) > max_workers * 2: futures = [f for f in futures if not f.done()]
|
||||
44
test/external/external_fuzz_hcq_mp.py
vendored
Normal file
44
test/external/external_fuzz_hcq_mp.py
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
import subprocess
|
||||
import random
|
||||
import time
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from tinygrad.helpers import getenv
|
||||
|
||||
# checks that HCQ drivers can be killed during operation without causing issues
|
||||
|
||||
def run_test(i, full_run=False, force_ok=False):
|
||||
print(f"\rRunning iteration {i}...", end=" ", flush=True)
|
||||
|
||||
p = subprocess.Popen(["python3", "test/test_tiny.py", "TestTiny.test_plus"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
if not full_run:
|
||||
time.sleep(random.uniform(0, 1200) / 1000.0)
|
||||
p.kill()
|
||||
_, stderr = p.communicate()
|
||||
else:
|
||||
_, stderr = p.communicate()
|
||||
stderr_text = stderr.decode()
|
||||
assert ("Ran 1 test in" in stderr_text and "OK" in stderr_text) or (not force_ok and "Failed to take lock file" in stderr_text), stderr_text
|
||||
|
||||
if __name__ == "__main__":
|
||||
max_workers = getenv("MAX_WORKERS", 4)
|
||||
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
||||
futures = []
|
||||
for i in range(1000000):
|
||||
if i % 100 == 0:
|
||||
# wait for everything we launched so far
|
||||
for f in as_completed(futures):
|
||||
try:
|
||||
f.result()
|
||||
except Exception as e:
|
||||
print(f"\nError in iteration: {e}")
|
||||
futures = []
|
||||
|
||||
# do a full run in the main proc
|
||||
run_test(i, True, force_ok=True)
|
||||
else:
|
||||
futures.append(executor.submit(run_test, i, bool(getenv("FULL_RUN", 0))))
|
||||
|
||||
# keep list small
|
||||
if len(futures) > max_workers * 2:
|
||||
futures = [f for f in futures if not f.done()]
|
||||
@@ -831,7 +831,7 @@ class PCIIface(PCIIfaceBase):
|
||||
|
||||
class USBIface(PCIIface):
|
||||
def __init__(self, dev, dev_id): # pylint: disable=super-init-not-called
|
||||
self.dev, self.pci_dev = dev, USBPCIDevice(f"usb:{dev_id}", bars=[0, 2, 5])
|
||||
self.dev, self.pci_dev = dev, USBPCIDevice(dev.__class__.__name__[:2], f"usb:{dev_id}", bars=[0, 2, 5])
|
||||
self._setup_adev(self.pci_dev, dma_regions=[(0x200000, self.pci_dev.dma_view(0xf000, 0x80000))])
|
||||
self.pci_dev.usb._pci_cacheable += [(self.pci_dev.bar_info[2].addr, self.pci_dev.bar_info[2].size)] # doorbell region is cacheable
|
||||
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from __future__ import annotations
|
||||
import ctypes, collections, dataclasses, functools, os, hashlib, array
|
||||
import ctypes, collections, dataclasses, functools, hashlib, array
|
||||
from tinygrad.helpers import mv_address, getenv, DEBUG, fetch
|
||||
from tinygrad.runtime.autogen.am import am
|
||||
from tinygrad.runtime.support.hcq import MMIOInterface
|
||||
from tinygrad.runtime.support.amd import AMDReg, import_module, import_asic_regs
|
||||
from tinygrad.runtime.support.memory import TLSFAllocator, MemoryManager
|
||||
from tinygrad.runtime.support.system import System, PCIDevice, PCIDevImplBase
|
||||
from tinygrad.runtime.support.system import PCIDevice, PCIDevImplBase
|
||||
from tinygrad.runtime.support.am.ip import AM_SOC, AM_GMC, AM_IH, AM_PSP, AM_SMU, AM_GFX, AM_SDMA
|
||||
|
||||
AM_DEBUG = getenv("AM_DEBUG", 0)
|
||||
@@ -122,8 +122,6 @@ class AMDev(PCIDevImplBase):
|
||||
self.pci_dev, self.devfmt, self.dma_regions = pci_dev, pci_dev.pcibus, dma_regions
|
||||
self.vram, self.doorbell64, self.mmio = self.pci_dev.map_bar(0), self.pci_dev.map_bar(2, fmt='Q'), self.pci_dev.map_bar(5, fmt='I')
|
||||
|
||||
self.lock_fd = System.flock_acquire(f"am_{self.devfmt}.lock")
|
||||
|
||||
self._run_discovery()
|
||||
self._build_regs()
|
||||
|
||||
@@ -190,7 +188,6 @@ class AMDev(PCIDevImplBase):
|
||||
for ip in [self.sdma, self.gfx]: ip.fini_hw()
|
||||
self.smu.set_clocks(level=0)
|
||||
self.ih.interrupt_handler()
|
||||
os.close(self.lock_fd)
|
||||
|
||||
def paddr2mc(self, paddr:int) -> int: return self.gmc.mc_base + paddr
|
||||
|
||||
|
||||
@@ -73,8 +73,6 @@ class NVDev(PCIDevImplBase):
|
||||
def __init__(self, pci_dev:PCIDevice):
|
||||
self.pci_dev, self.devfmt, self.mmio = pci_dev, pci_dev.pcibus, pci_dev.map_bar(0, fmt='I')
|
||||
|
||||
self.lock_fd = System.flock_acquire(f"nv_{self.devfmt}.lock")
|
||||
|
||||
self.smi_dev, self.is_booting = False, True
|
||||
self._early_init()
|
||||
|
||||
|
||||
@@ -165,7 +165,8 @@ class _System:
|
||||
System = _System()
|
||||
|
||||
class PCIDevice:
|
||||
def __init__(self, pcibus:str, bars:list[int], resize_bars:list[int]|None=None):
|
||||
def __init__(self, devpref:str, pcibus:str, bars:list[int], resize_bars:list[int]|None=None):
|
||||
self.lock_fd = System.flock_acquire(f"{devpref.lower()}_{pcibus.lower()}.lock")
|
||||
self.pcibus, self.irq_poller = pcibus, None
|
||||
|
||||
if FileIOInterface.exists(f"/sys/bus/pci/devices/{self.pcibus}/driver"):
|
||||
@@ -215,7 +216,8 @@ class PCIDevice:
|
||||
def reset(self): os.system(f"sudo sh -c 'echo 1 > /sys/bus/pci/devices/{self.pcibus}/reset'")
|
||||
|
||||
class APLPCIDevice(PCIDevice):
|
||||
def __init__(self, pcibus:str, bars:list[int], resize_bars:list[int]|None=None):
|
||||
def __init__(self, devpref:str, pcibus:str, bars:list[int], resize_bars:list[int]|None=None):
|
||||
self.lock_fd = System.flock_acquire(f"{devpref.lower()}_{pcibus.lower()}.lock")
|
||||
self.pcibus, self.bars = pcibus, {b: System.iokit_pci_memmap(b) for b in bars}
|
||||
self.bar_info = {b:PCIBarInfo(0, self.bars[b].nbytes-1 if b in self.bars else 0) for b in range(6)} # NOTE: fake bar info for nv.
|
||||
def map_bar(self, bar:int, off:int=0, addr:int=0, size:int|None=None, fmt='B') -> MMIOInterface: return self.bars[bar].view(off, size, fmt)
|
||||
@@ -224,7 +226,8 @@ class APLPCIDevice(PCIDevice):
|
||||
def reset(self): System.iokit_pci_rpc(__TinyGPURPCReset:=2)
|
||||
|
||||
class USBPCIDevice(PCIDevice):
|
||||
def __init__(self, pcibus:str, bars:list[int], resize_bars:list[int]|None=None):
|
||||
def __init__(self, devpref:str, pcibus:str, bars:list[int], resize_bars:list[int]|None=None):
|
||||
self.lock_fd = System.flock_acquire(f"{devpref.lower()}_{pcibus.lower()}.lock")
|
||||
self.usb = ASM24Controller()
|
||||
self.pcibus, self.bar_info = pcibus, System.pci_setup_usb_bars(self.usb, gpu_bus=4, mem_base=0x10000000, pref_mem_base=(32 << 30))
|
||||
def map_bar(self, bar, off=0, addr=0, size=None, fmt='B'):
|
||||
@@ -247,7 +250,7 @@ class LNXPCIIfaceBase:
|
||||
|
||||
# Acquire va range to avoid collisions.
|
||||
FileIOInterface.anon_mmap(va_start, va_size, 0, mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED, 0)
|
||||
self.pci_dev, self.dev, self.vram_bar = PCIDevice(cls.gpus[dev_id], bars=bars, resize_bars=[vram_bar]), dev, vram_bar
|
||||
self.pci_dev, self.dev, self.vram_bar = PCIDevice(dev.__class__.__name__[:2], cls.gpus[dev_id], bars=bars, resize_bars=[vram_bar]), dev, vram_bar
|
||||
self.p2p_base_addr = self.pci_dev.bar_info[vram_bar].addr
|
||||
|
||||
def alloc(self, size:int, host=False, uncached=False, cpu_access=False, contiguous=False, force_devmem=False, **kwargs) -> HCQBuffer:
|
||||
@@ -281,7 +284,7 @@ class LNXPCIIfaceBase:
|
||||
|
||||
class APLPCIIfaceBase(LNXPCIIfaceBase):
|
||||
def __init__(self, dev, dev_id, vendor, devices, bars, vram_bar, va_start, va_size):
|
||||
self.pci_dev, self.dev, self.vram_bar = APLPCIDevice(pcibus=f'usb4:{dev_id}', bars=bars), dev, vram_bar
|
||||
self.pci_dev, self.dev, self.vram_bar = APLPCIDevice(dev.__class__.__name__[:2], pcibus=f'usb4:{dev_id}', bars=bars), dev, vram_bar
|
||||
def map(self, b:HCQBuffer): raise RuntimeError(f"map failed: {b.owner} -> {self.dev}")
|
||||
|
||||
PCIIfaceBase:type = APLPCIIfaceBase if OSX else LNXPCIIfaceBase
|
||||
|
||||
Reference in New Issue
Block a user