more sync in transfer (#3174)

This commit is contained in:
George Hotz
2024-01-18 17:17:03 -08:00
committed by GitHub
parent 28dcbf0e00
commit c51c90bcd4

View File

@@ -99,9 +99,13 @@ class Buffer:
def _internal_buffer_copy(dest:Buffer, src:Buffer):
if hasattr(src.allocator, 'transfer') and type(dest.allocator) is type(src.allocator): # noqa: E721
# fast path, used on HIP between GPUs
# NOTE: it's important we use the dest device here to ensure the transfer is ready
# NOTE: we have to block here so the data isn't copied too early. this is probably due to buffer reuse
if hasattr(src.d, "block") and hasattr(dest.d, "event"): src.d.block(dest.d.event())
else: dest.d.synchronize()
src.allocator.transfer(dest._buf, src._buf, dest.size*dest.dtype.itemsize)
# NOTE: we have to block here so the data is ready on dest when dest needs it
if hasattr(dest.d, "block") and hasattr(src.d, "event"): dest.d.block(src.d.event())
else: src.d.synchronize()
return
if getenv("FROM_BUFFER") and hasattr(dest.allocator, 'from_buffer') and hasattr(dest.allocator, 'transfer') and hasattr(src.allocator, 'as_buffer'):
# fast path, used on Metal in OS X Sonoma