mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
am: reserve address space (#10564)
* am: reserve address space * f * cc * errno * fix * always has cpu mapping
This commit is contained in:
@@ -225,7 +225,7 @@ generate_libc() {
|
||||
|
||||
sed -i "s\import ctypes\import ctypes, ctypes.util, os\g" $BASE/libc.py
|
||||
sed -i "s\FIXME_STUB\libc\g" $BASE/libc.py
|
||||
sed -i "s\FunctionFactoryStub()\None if (libc_path := ctypes.util.find_library('c')) is None else ctypes.CDLL(libc_path)\g" $BASE/libc.py
|
||||
sed -i "s\FunctionFactoryStub()\None if (libc_path := ctypes.util.find_library('c')) is None else ctypes.CDLL(libc_path, use_errno=True)\g" $BASE/libc.py
|
||||
|
||||
fixup $BASE/libc.py
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ from tinygrad.runtime.support.hcq import MMIOInterface
|
||||
from tinygrad.runtime.support.am.amdev import AMDev, AMMemoryManager, AMPageTableEntry
|
||||
from tinygrad.runtime.support.am.ip import AM_SOC, AM_GMC, AM_IH, AM_PSP, AM_SMU, AM_GFX, AM_SDMA
|
||||
|
||||
AM_VERSION = 0xA0000004
|
||||
AM_VERSION = 0xA0000005
|
||||
|
||||
def bold(s): return f"\033[1m{s}\033[0m"
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ class FunctionFactoryStub:
|
||||
# You can either re-run clan2py with -l /path/to/library.so
|
||||
# Or manually fix this by comment the ctypes.CDLL loading
|
||||
_libraries = {}
|
||||
_libraries['libc'] = None if (libc_path := ctypes.util.find_library('c')) is None else ctypes.CDLL(libc_path) # ctypes.CDLL('libc')
|
||||
_libraries['libc'] = None if (libc_path := ctypes.util.find_library('c')) is None else ctypes.CDLL(libc_path, use_errno=True) # ctypes.CDLL('libc')
|
||||
def string_cast(char_pointer, encoding='utf-8', errors='strict'):
|
||||
value = ctypes.cast(char_pointer, ctypes.c_char_p).value
|
||||
if value is not None and encoding is not None:
|
||||
|
||||
@@ -571,7 +571,6 @@ class KFDIface:
|
||||
if flags & kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR:
|
||||
buf = addr = FileIOInterface.anon_mmap(0, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
|
||||
else: buf, addr = 0, FileIOInterface.anon_mmap(0, size, 0, mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS | MAP_NORESERVE, 0)
|
||||
assert addr != 0xffffffffffffffff
|
||||
|
||||
try: mem = kfd.AMDKFD_IOC_ALLOC_MEMORY_OF_GPU(self.kfd, va_addr=addr, size=size, base=addr, length=size, gpu_id=self.gpu_id,
|
||||
flags=flags, mmap_offset=buf)
|
||||
@@ -711,6 +710,9 @@ class PCIIface:
|
||||
self._setup_adev(self.pcibus, self._map_pci_range(0), dbell:=self._map_pci_range(2, fmt='Q'), self._map_pci_range(5, fmt='I'))
|
||||
self.doorbell_cpu_addr = dbell.addr
|
||||
|
||||
if first_dev:
|
||||
FileIOInterface.anon_mmap((alloc:=self.adev.mm.va_allocator).base, alloc.size, 0, mmap.MAP_PRIVATE|mmap.MAP_ANONYMOUS|MAP_NORESERVE, 0)
|
||||
|
||||
pci_cmd = int.from_bytes(self.cfg_fd.read(2, binary=True, offset=pci.PCI_COMMAND), byteorder='little') | pci.PCI_COMMAND_MASTER
|
||||
self.cfg_fd.write(pci_cmd.to_bytes(2, byteorder='little'), binary=True, offset=pci.PCI_COMMAND)
|
||||
|
||||
@@ -729,20 +731,18 @@ class PCIIface:
|
||||
def _map_pci_range(self, bar, off=0, addr=0, size=None, fmt='B'):
|
||||
fd, sz = self.bar_fds[bar], size or (self.bar_info[bar][1] - self.bar_info[bar][0] + 1)
|
||||
libc.madvise(loc:=fd.mmap(addr, sz, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | (MAP_FIXED if addr else 0), off), sz, libc.MADV_DONTFORK)
|
||||
assert loc != 0xffffffffffffffff, f"Failed to mmap {size} bytes at {hex(addr)}"
|
||||
return MMIOInterface(loc, sz, fmt=fmt)
|
||||
|
||||
def alloc(self, size:int, host=False, uncached=False, cpu_access=False):
|
||||
if host or (not getenv("AMD_ALLOC_QUEUE_DEV_MEM", 1) and uncached and cpu_access): # host or gtt-like memory.
|
||||
vaddr = self.adev.mm.alloc_vaddr(size:=round_up(size, mmap.PAGESIZE), align=mmap.PAGESIZE)
|
||||
va = FileIOInterface.anon_mmap(vaddr, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS | MAP_LOCKED | MAP_FIXED, 0)
|
||||
assert va != 0xffffffffffffffff, f"Failed to mmap {size} bytes at {hex(vaddr)}"
|
||||
|
||||
# Read pagemap to get the physical address of each page. The pages are locked.
|
||||
self.pagemap.seek(va // mmap.PAGESIZE * 8)
|
||||
paddrs = [((x & ((1<<55) - 1)) * mmap.PAGESIZE, mmap.PAGESIZE) for x in array.array('Q', self.pagemap.read(size//mmap.PAGESIZE*8, binary=True))]
|
||||
am_mapping = self.adev.mm.map_range(vaddr, size, paddrs, system=True, snooped=True, uncached=True)
|
||||
return HCQBuffer(vaddr, size, meta=AMAllocationMeta(self.dev, [self.dev], am_mapping, has_cpu_mapping=cpu_access),
|
||||
return HCQBuffer(vaddr, size, meta=AMAllocationMeta(self.dev, [self.dev], am_mapping, has_cpu_mapping=True),
|
||||
view=MMIOInterface(am_mapping.va_addr, size, fmt='B'))
|
||||
|
||||
am_mapping = self.adev.mm.valloc(size:=round_up(size, 4 << 10), uncached=uncached, contigous=cpu_access)
|
||||
|
||||
@@ -152,7 +152,7 @@ class AMPageTableTraverseContext:
|
||||
self.level_up()
|
||||
|
||||
class AMMemoryManager:
|
||||
va_allocator = TLSFAllocator(512 * (1 << 30), base=0x7F0000000000) # global for all devices.
|
||||
va_allocator = TLSFAllocator(512 * (1 << 30), base=0x200000000000) # global for all devices.
|
||||
|
||||
def __init__(self, adev:AMDev, vram_size:int):
|
||||
self.adev, self.vram_size = adev, vram_size
|
||||
@@ -265,7 +265,7 @@ class AMDev:
|
||||
# all blocks that are initialized only during the initial AM boot.
|
||||
# To determine if the GPU is in the third state, AM uses regSCRATCH_REG7 as a flag.
|
||||
self.is_booting, self.smi_dev = True, False # During boot only boot memory can be allocated. This flag is to validate this.
|
||||
self.partial_boot = (self.reg("regSCRATCH_REG7").read() == (am_version:=0xA0000004)) and (getenv("AM_RESET", 0) != 1)
|
||||
self.partial_boot = (self.reg("regSCRATCH_REG7").read() == (am_version:=0xA0000005)) and (getenv("AM_RESET", 0) != 1)
|
||||
|
||||
# Memory manager & firmware
|
||||
self.mm = AMMemoryManager(self, self.vram_size)
|
||||
|
||||
@@ -26,7 +26,10 @@ class FileIOInterface:
|
||||
def __del__(self):
|
||||
if hasattr(self, 'fd'): os.close(self.fd)
|
||||
def ioctl(self, request, arg): return fcntl.ioctl(self.fd, request, arg)
|
||||
def mmap(self, start, sz, prot, flags, offset): return libc.mmap(start, sz, prot, flags, self.fd, offset)
|
||||
def mmap(self, start, sz, prot, flags, offset):
|
||||
x = libc.mmap(start, sz, prot, flags, self.fd, offset)
|
||||
if x == 0xffffffffffffffff: raise OSError(f"Failed to mmap {sz} bytes at {hex(start)}: {os.strerror(ctypes.get_errno())}")
|
||||
return x
|
||||
def read(self, size=None, binary=False, offset=None):
|
||||
if offset is not None: self.seek(offset)
|
||||
with open(self.fd, "rb" if binary else "r", closefd=False) as file: return file.read(size)
|
||||
@@ -36,7 +39,10 @@ class FileIOInterface:
|
||||
def listdir(self): return os.listdir(self.path)
|
||||
def seek(self, offset): os.lseek(self.fd, offset, os.SEEK_SET)
|
||||
@staticmethod
|
||||
def anon_mmap(start, sz, prot, flags, offset): return libc.mmap(start, sz, prot, flags, -1, offset)
|
||||
def anon_mmap(start, sz, prot, flags, offset):
|
||||
x = libc.mmap(start, sz, prot, flags, -1, offset)
|
||||
if x == 0xffffffffffffffff: raise OSError(f"Failed to mmap {sz} bytes at {hex(start)}: {os.strerror(ctypes.get_errno())}")
|
||||
return x
|
||||
@staticmethod
|
||||
def munmap(buf, sz): return libc.munmap(buf, sz)
|
||||
@staticmethod
|
||||
|
||||
Reference in New Issue
Block a user