diff --git a/tinygrad/device.py b/tinygrad/device.py index 46e528bb6f..160ef59f46 100644 --- a/tinygrad/device.py +++ b/tinygrad/device.py @@ -130,7 +130,7 @@ class BufferXfer(BufferCopy): def copy(self, dest, src): if hasattr(dest.allocator.device, "track_cross_buffer") and hasattr(src.allocator, "track_cross_device"): dest.allocator.device.track_cross_buffer.append(src) - src.allocator.track_cross_device.append(dest.allocator.device) + src.allocator.track_cross_device.add(dest.allocator.device) dest.allocator.transfer(dest._buf, src._buf, dest.nbytes) # TODO: size, dest, src are the same type. can we enforce this? diff --git a/tinygrad/runtime/ops_hip.py b/tinygrad/runtime/ops_hip.py index df0c7ed248..cac340aeed 100644 --- a/tinygrad/runtime/ops_hip.py +++ b/tinygrad/runtime/ops_hip.py @@ -71,11 +71,14 @@ CHUNK_SIZE, PAGE_SIZE = 256*1024*1024, 0x1000 class HIPAllocator(LRUAllocator): def __init__(self, device:HIPDevice): self.device = device - self.track_cross_device: List[HIPDevice] = [] + self.track_cross_device: Set[HIPDevice] = set() super().__init__() - def free_cache(self): + def full_synchronize(self): self.device.synchronize() for x in self.track_cross_device: x.synchronize() + self.track_cross_device.clear() + def free_cache(self): + self.full_synchronize() return super().free_cache() def _alloc(self, size:int): hip_set_device(self.device.device) @@ -120,7 +123,7 @@ class HIPAllocator(LRUAllocator): ctypes.memmove(host_mem, from_mv(src), len(src)) check(hip.hipMemcpyAsync(dest, host_mem, len(src), hip.hipMemcpyHostToDevice, None)) def copyout(self, dest:memoryview, src:T): - self.device.synchronize() + self.full_synchronize() hip_set_device(self.device.device) check(hip.hipMemcpy(from_mv(dest), src, len(dest), hip.hipMemcpyDeviceToHost)) def transfer(self, dest:T, src:T, sz:int):