From 67f70cef0217582df1ffd40b0085a0095d049ff2 Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Sat, 13 Jul 2024 22:55:09 +0300 Subject: [PATCH] amd better allocation error messages (#5462) * amd better allocation error messages * a bit better --- tinygrad/runtime/ops_amd.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tinygrad/runtime/ops_amd.py b/tinygrad/runtime/ops_amd.py index 76ee47e397..6f4327e8f8 100644 --- a/tinygrad/runtime/ops_amd.py +++ b/tinygrad/runtime/ops_amd.py @@ -351,12 +351,8 @@ class AMDAllocator(HCQCompatAllocator): def __init__(self, device:AMDDevice): super().__init__(device, batch_size=SDMA_MAX_COPY_SIZE) def _alloc(self, size:int, options:BufferOptions) -> HCQCompatAllocRes: - try: - if options.host: return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, public=True) - return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM, public=options.cpu_access) - except OSError as e: - if e.errno == errno.ENOMEM: raise MemoryError("Cannot allocate memory") from e - raise + if options.host: return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, public=True) + return self.device._gpu_alloc(size, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM, public=options.cpu_access) def _free(self, opaque, options:BufferOptions): self.device._gpu_free(opaque) @@ -394,7 +390,14 @@ class AMDDevice(HCQCompatCompiled): else: buf, addr = 0, libc.mmap(0, size, 0, mmap.MAP_PRIVATE|mmap.MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) assert addr != 0xffffffffffffffff - mem = kio.alloc_memory_of_gpu(self.kfd, va_addr=addr, size=size, base=addr, length=size, gpu_id=self.gpu_id, flags=flags, mmap_offset=buf) + + try: mem = kio.alloc_memory_of_gpu(self.kfd, va_addr=addr, size=size, base=addr, length=size, gpu_id=self.gpu_id, flags=flags, mmap_offset=buf) + except OSError as e: + if e.errno == errno.EINVAL and (flags & kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM) and public: + raise MemoryError("Cannot allocate host-visible VRAM. Ensure the resizable BAR option is enabled on your system.") from e + if e.errno == errno.ENOMEM: raise MemoryError("Cannot allocate memory: no memory is available.") from e + raise + if not (flags & kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR): buf = libc.mmap(mem.va_addr, mem.size, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED|MAP_FIXED, self.drm_fd, mem.mmap_offset) assert addr == buf == mem.va_addr