mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-07 03:00:26 -04:00
am: move queues to gpus (#8848)
* am: fix * add flsg for thos * do not depend on host parameter,
This commit is contained in:
@@ -353,7 +353,8 @@ class KFDIface:
|
||||
|
||||
if cpu_access or host: flags |= kfd.KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC
|
||||
|
||||
if host: buf = addr = HWInterface.anon_mmap(0, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
|
||||
if flags & kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR:
|
||||
buf = addr = HWInterface.anon_mmap(0, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
|
||||
else: buf, addr = 0, HWInterface.anon_mmap(0, size, 0, mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS | MAP_NORESERVE, 0)
|
||||
assert addr != 0xffffffffffffffff
|
||||
|
||||
@@ -365,7 +366,7 @@ class KFDIface:
|
||||
if e.errno == errno.ENOMEM: raise MemoryError("Cannot allocate memory: no memory is available.") from e
|
||||
raise
|
||||
|
||||
if not host:
|
||||
if not (flags & kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR):
|
||||
buf = self.drm_fd.mmap(mem.va_addr, mem.size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | MAP_FIXED, mem.mmap_offset)
|
||||
assert addr == buf == mem.va_addr
|
||||
|
||||
@@ -502,7 +503,7 @@ class PCIIface:
|
||||
return to_mv(fd.mmap(addr, sz, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | (MAP_FIXED if addr else 0), off), sz)
|
||||
|
||||
def alloc(self, size:int, host=False, uncached=False, cpu_access=False):
|
||||
if host or (uncached and cpu_access): # host or gtt-like memory.
|
||||
if host or (not getenv("AMD_ALLOC_QUEUE_DEV_MEM", 1) and uncached and cpu_access): # host or gtt-like memory.
|
||||
vaddr = self.adev.mm.alloc_vaddr(size:=round_up(size, mmap.PAGESIZE), align=mmap.PAGESIZE)
|
||||
va = HWInterface.anon_mmap(vaddr, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS | MAP_LOCKED | MAP_FIXED, 0)
|
||||
|
||||
@@ -564,7 +565,7 @@ class AMDDevice(HCQCompiled):
|
||||
if self.target < 100300 or self.target >= 120000: raise RuntimeError(f"Unsupported arch: {self.arch}")
|
||||
|
||||
if AMDDevice.signals_page is None:
|
||||
AMDDevice.signals_page = self.dev_iface.alloc(16 * 65536, uncached=True, cpu_access=True)
|
||||
AMDDevice.signals_page = self.dev_iface.alloc(16 * 65536, host=True, uncached=True, cpu_access=True)
|
||||
AMDDevice.signals_pool = [AMDDevice.signals_page.va_addr + off for off in range(0, AMDDevice.signals_page.size, 16)]
|
||||
else: self.dev_iface.map(AMDDevice.signals_page)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user