diff --git a/tinygrad/device.py b/tinygrad/device.py
index 46e528bb6f..160ef59f46 100644
--- a/tinygrad/device.py
+++ b/tinygrad/device.py
@@ -130,7 +130,7 @@ class BufferXfer(BufferCopy):
   def copy(self, dest, src):
     if hasattr(dest.allocator.device, "track_cross_buffer") and hasattr(src.allocator, "track_cross_device"):
       dest.allocator.device.track_cross_buffer.append(src)
-      src.allocator.track_cross_device.append(dest.allocator.device)
+      src.allocator.track_cross_device.add(dest.allocator.device)
     dest.allocator.transfer(dest._buf, src._buf, dest.nbytes)
 
 # TODO: size, dest, src are the same type. can we enforce this?
diff --git a/tinygrad/runtime/ops_hip.py b/tinygrad/runtime/ops_hip.py
index df0c7ed248..cac340aeed 100644
--- a/tinygrad/runtime/ops_hip.py
+++ b/tinygrad/runtime/ops_hip.py
@@ -71,11 +71,14 @@ CHUNK_SIZE, PAGE_SIZE = 256*1024*1024, 0x1000
 class HIPAllocator(LRUAllocator):
   def __init__(self, device:HIPDevice):
     self.device = device
-    self.track_cross_device: List[HIPDevice] = []
+    self.track_cross_device: Set[HIPDevice] = set()
     super().__init__()
-  def free_cache(self):
+  def full_synchronize(self):
     self.device.synchronize()
     for x in self.track_cross_device: x.synchronize()
+    self.track_cross_device.clear()
+  def free_cache(self):
+    self.full_synchronize()
     return super().free_cache()
   def _alloc(self, size:int):
     hip_set_device(self.device.device)
@@ -120,7 +123,7 @@ class HIPAllocator(LRUAllocator):
     ctypes.memmove(host_mem, from_mv(src), len(src))
     check(hip.hipMemcpyAsync(dest, host_mem, len(src), hip.hipMemcpyHostToDevice, None))
   def copyout(self, dest:memoryview, src:T):
-    self.device.synchronize()
+    self.full_synchronize()
     hip_set_device(self.device.device)
     check(hip.hipMemcpy(from_mv(dest), src, len(dest), hip.hipMemcpyDeviceToHost))
   def transfer(self, dest:T, src:T, sz:int):