From e8b5f2040d8a3eb3f021d296b82856ae8ce028d3 Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Sun, 2 Jun 2024 21:47:24 +0300 Subject: [PATCH] nv faster signal on dma queue (#4789) --- tinygrad/runtime/ops_nv.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tinygrad/runtime/ops_nv.py b/tinygrad/runtime/ops_nv.py index f936412db9..31bfd9dab4 100644 --- a/tinygrad/runtime/ops_nv.py +++ b/tinygrad/runtime/ops_nv.py @@ -186,6 +186,12 @@ class HWCopyQueue(HWQueue): self.next_cmd_index += 1 return self + def signal(self, signal, value=0): + self.q += [nvmethod(4, nv_gpu.NVC6B5_SET_SEMAPHORE_A, 4), *nvdata64(ctypes.addressof(from_mv(signal))), value, 4] + self.q += [nvmethod(4, nv_gpu.NVC6B5_LAUNCH_DMA, 1), 0x14] + self.next_cmd_index += 1 + return self + def submit(self, dev:NVDevice): if len(self.q) == 0: return dev.dma_put_value = self._submit(dev, dev.dma_gpu_ring, dev.dma_put_value, dev.dma_gpfifo_entries,