mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-24 06:18:01 -05:00
am: lock to access dev (#8594)
* amm lock to access dev * wording * just works * disbale
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
from __future__ import annotations
|
||||
import ctypes, collections, time, dataclasses, pathlib
|
||||
from tinygrad.helpers import to_mv, mv_address, getenv, round_up, DEBUG
|
||||
import ctypes, collections, time, dataclasses, pathlib, fcntl, os, signal
|
||||
from tinygrad.helpers import to_mv, mv_address, getenv, round_up, DEBUG, temp
|
||||
from tinygrad.runtime.autogen.am import am, mp_11_0, mp_13_0_0, nbio_4_3_0, mmhub_3_0_0, gc_11_0_0, osssys_6_0_0
|
||||
from tinygrad.runtime.support.allocator import TLSFAllocator
|
||||
from tinygrad.runtime.support.am.ip import AM_SOC21, AM_GMC, AM_IH, AM_PSP, AM_SMU, AM_GFX, AM_SDMA
|
||||
@@ -255,6 +255,15 @@ class AMDev:
|
||||
self.pcidev, self.devfmt = pcidev, devfmt
|
||||
self.vram, self.doorbell64, self.mmio = vram_bar, doorbell_bar, mmio_bar
|
||||
|
||||
os.umask(0) # Set umask to 0 to allow creating files with 0666 permissions
|
||||
|
||||
# Avoid O_CREAT because we don’t want to re-create/replace an existing file (triggers extra perms checks) when opening as non-owner.
|
||||
if os.path.exists(lock_name:=temp(f"am_{self.devfmt}.lock")): self.lock_fd = os.open(lock_name, os.O_RDWR)
|
||||
else: self.lock_fd = os.open(lock_name, os.O_RDWR | os.O_CREAT, 0o666)
|
||||
|
||||
try: fcntl.flock(self.lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
except OSError: raise RuntimeError(f"Failed to open AM device {self.devfmt}. It's already in use.")
|
||||
|
||||
self._run_discovery()
|
||||
self._build_regs()
|
||||
|
||||
@@ -284,14 +293,17 @@ class AMDev:
|
||||
self.sdma:AM_SDMA = AM_SDMA(self)
|
||||
|
||||
if self.partial_boot and (self.reg("regCP_MEC_RS64_CNTL").read() & gc_11_0_0.CP_MEC_RS64_CNTL__MEC_HALT_MASK == 0):
|
||||
print(f"am {self.devfmt}: MEC is active. Someone might be using the GPU? Issue a full reset.")
|
||||
if DEBUG >= 2: print(f"am {self.devfmt}: MEC is active. Issue a full reset.")
|
||||
self.partial_boot = False
|
||||
|
||||
if not self.partial_boot:
|
||||
if self.psp.is_sos_alive(): self.smu.mode1_reset()
|
||||
for ip in [self.soc21, self.gmc, self.ih, self.psp, self.smu]:
|
||||
ip.init()
|
||||
if DEBUG >= 2: print(f"am {self.devfmt}: {ip.__class__.__name__} initialized")
|
||||
try: # do not interrupt the boot process
|
||||
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
||||
if self.psp.is_sos_alive(): self.smu.mode1_reset()
|
||||
for ip in [self.soc21, self.gmc, self.ih, self.psp, self.smu]:
|
||||
ip.init()
|
||||
if DEBUG >= 2: print(f"am {self.devfmt}: {ip.__class__.__name__} initialized")
|
||||
finally: signal.signal(signal.SIGINT, signal.default_int_handler)
|
||||
|
||||
# Booting done
|
||||
self.is_booting = False
|
||||
|
||||
Reference in New Issue
Block a user