From b95f47784a33ea9c2c50be9889d8d97c8ce80301 Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Fri, 27 Sep 2024 19:14:10 +0800 Subject: [PATCH] qcom sleep when sync (#6785) * qcom sleep when sync * linter * short --- tinygrad/device.py | 2 +- tinygrad/runtime/ops_qcom.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tinygrad/device.py b/tinygrad/device.py index 92f5732886..514cbf0e72 100644 --- a/tinygrad/device.py +++ b/tinygrad/device.py @@ -522,7 +522,7 @@ class HCQCompiled(Compiled): self.devices.append(self) def synchronize(self): - self.timeline_signal.wait(self.timeline_value - 1) + self.timeline_signal.wait(self.timeline_value - 1) if not hasattr(self, '_syncdev') else self._syncdev() if self.timeline_value > (1 << 31): self._wrap_timeline_signal() if PROFILE: diff --git a/tinygrad/runtime/ops_qcom.py b/tinygrad/runtime/ops_qcom.py index 65b58684a8..48dd4daa8e 100644 --- a/tinygrad/runtime/ops_qcom.py +++ b/tinygrad/runtime/ops_qcom.py @@ -101,7 +101,7 @@ class QCOMComputeQueue(HWComputeQueue): def _submit(self, device): if self.binded_device == device: submit_req = self.submit_req else: submit_req, _ = self._build_gpu_command(device) - kgsl.IOCTL_KGSL_GPU_COMMAND(device.fd, __payload=submit_req) + device.last_cmd = kgsl.IOCTL_KGSL_GPU_COMMAND(device.fd, __payload=submit_req).timestamp def _exec(self, prg, args_state, global_size, local_size): global_size_mp = [int(g*l) for g,l in zip(global_size, local_size)] @@ -345,7 +345,7 @@ class QCOMDevice(HCQCompiled): QCOMDevice.dummy_addr = self._gpu_alloc(0x1000, map_to_cpu=False).va_addr QCOMDevice.signals_page = self._gpu_alloc(16 * 65536, map_to_cpu=True, uncached=True) QCOMDevice.signals_pool = [to_mv(self.signals_page.va_addr + off, 16).cast("Q") for off in range(0, self.signals_page.size, 16)] - info, self.ctx, self.cmd_buf, self.cmd_buf_ptr = self._info(), self._ctx_create(), self._gpu_alloc(0x1000000, map_to_cpu=True), 0 + info, self.ctx, self.cmd_buf, self.cmd_buf_ptr, self.last_cmd = self._info(), self._ctx_create(), self._gpu_alloc(0x1000000, map_to_cpu=True), 0,0 QCOMDevice.gpu_id = ((info.chip_id >> 24) & 0xFF) * 100 + ((info.chip_id >> 16) & 0xFF) * 10 + ((info.chip_id >> 8) & 0xFF) if QCOMDevice.gpu_id >= 700: raise RuntimeError(f"Unsupported GPU: {QCOMDevice.gpu_id}") @@ -398,3 +398,5 @@ class QCOMDevice(HCQCompiled): self.synchronize() self._gpu_free(self._stack) self._stack = self._gpu_alloc(sz) + + def _syncdev(self): kgsl.IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID(self.fd, context_id=self.ctx, timestamp=self.last_cmd, timeout=0xffffffff)