From db2cf48828011b75a16cc01468c9146f11e305c3 Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Wed, 28 Feb 2024 21:24:50 +0300 Subject: [PATCH] check buffers are seeable by other gpu before transfer (#3504) --- tinygrad/runtime/ops_hsa.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tinygrad/runtime/ops_hsa.py b/tinygrad/runtime/ops_hsa.py index d5e17bd56c..82c0f398b5 100644 --- a/tinygrad/runtime/ops_hsa.py +++ b/tinygrad/runtime/ops_hsa.py @@ -61,9 +61,7 @@ class HSAAllocator(LRUAllocator): super().__init__() def _alloc(self, size:int): - c_agents = (hsa.hsa_agent_t * len(HSADevice.devices))(*[dev.agent for dev in HSADevice.devices]) check(hsa.hsa_amd_memory_pool_allocate(self.device.gpu_mempool, size, 0, ctypes.byref(buf := ctypes.c_void_p()))) - check(hsa.hsa_amd_agents_allow_access(len(HSADevice.devices), c_agents, None, buf)) return buf.value def _free(self, opaque:T): @@ -139,6 +137,8 @@ class HSAAllocator(LRUAllocator): sync_signal_1 = src_dev.hw_queue.submit_barrier(need_signal=True) sync_signal_2 = dest_dev.hw_queue.submit_barrier(need_signal=True) c_wait_signal = (hsa.hsa_signal_t*2)(sync_signal_1, sync_signal_2) + c_agents = (hsa.hsa_agent_t * len(HSADevice.devices))(*[dev.agent for dev in HSADevice.devices]) + check(hsa.hsa_amd_agents_allow_access(len(HSADevice.devices), c_agents, None, src)) check(hsa.hsa_amd_memory_async_copy_on_engine(dest, dest_dev.agent, src, src_dev.agent, sz, 2, c_wait_signal, copy_signal, hsa.HSA_AMD_SDMA_ENGINE_0, True)) # noqa: E501 src_dev.hw_queue.submit_barrier(wait_signals=[copy_signal]) dest_dev.hw_queue.submit_barrier(wait_signals=[copy_signal])