diff --git a/tinygrad/runtime/ops_qcom.py b/tinygrad/runtime/ops_qcom.py index 63d9be42d0..cd0686e806 100644 --- a/tinygrad/runtime/ops_qcom.py +++ b/tinygrad/runtime/ops_qcom.py @@ -329,15 +329,11 @@ class QCOMAllocator(HCQAllocatorBase): return self.dev._gpu_map(opts.external_ptr, size) if opts.external_ptr else self.dev._gpu_alloc(size) def _do_copy(self, src_addr, dest_addr, size, prof_text): + self.dev.synchronize() with cpu_profile(prof_text, f"{self.dev.device}:COPY"): ctypes.memmove(dest_addr, src_addr, size) - def _copyin(self, dest:HCQBuffer, src:memoryview): - self._do_copy(mv_address(src), dest.cpu_view().addr, src.nbytes, f"TINY -> {self.dev.device}") - - def _copyout(self, dest:memoryview, src:HCQBuffer): - self.dev.synchronize() - - self._do_copy(src.cpu_view().addr, mv_address(dest), src.size, f"{self.dev.device} -> TINY") + def _copyin(self, dest:HCQBuffer, src:memoryview): self._do_copy(mv_address(src), dest.cpu_view().addr, src.nbytes, f"TINY -> {self.dev.device}") + def _copyout(self, dest:memoryview, src:HCQBuffer): self._do_copy(src.cpu_view().addr, mv_address(dest), src.size, f"{self.dev.device} -> TINY") def _as_buffer(self, src:HCQBuffer) -> memoryview: self.dev.synchronize()