From 1b8bed4a26703f63f2b2c1bcf732723344d417a4 Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Tue, 4 Jun 2024 23:22:58 +0300 Subject: [PATCH] nv check cmdq overrun (#4824) * nv check cmdq overrun * fix assert --- tinygrad/runtime/ops_nv.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tinygrad/runtime/ops_nv.py b/tinygrad/runtime/ops_nv.py index 2f5047c30c..ae80ed6ebb 100644 --- a/tinygrad/runtime/ops_nv.py +++ b/tinygrad/runtime/ops_nv.py @@ -115,8 +115,15 @@ class HWQueue: self.q = hw_view # type: ignore def _submit(self, dev, gpu_ring, put_value, gpfifo_entries, gpfifo_token, gpu_ring_controls): + if len(self.q) == 0: return put_value + if dev == self.binded_device: cmdq_addr = self.hw_page.base else: + if dev.cmdq_wptr + len(self.q) * 4 > dev.cmdq_page.length: + assert (gpu_ring[gpu_ring_controls.GPGet] & 0xFFFFFFFFFC) >= dev.cmdq_page.base + len(self.q) * 4 or \ + gpu_ring_controls.GPGet == gpu_ring_controls.GPPut, "cmdq overrun" + dev.cmdq_wptr = 0 + dev.cmdq[dev.cmdq_wptr//4:dev.cmdq_wptr//4+len(self.q)] = array.array('I', self.q) cmdq_addr = dev.cmdq_page.base+dev.cmdq_wptr dev.cmdq_wptr += len(self.q) * 4 @@ -173,10 +180,8 @@ class HWComputeQueue(HWQueue): qmd.cta_raster_width, qmd.cta_raster_height, qmd.cta_raster_depth = global_size qmd.cta_thread_dimension0, qmd.cta_thread_dimension1, qmd.cta_thread_dimension2 = local_size - def submit(self, dev:NVDevice): - if len(self.q) == 0: return - dev.compute_put_value = self._submit(dev, dev.compute_gpu_ring, dev.compute_put_value, dev.compute_gpfifo_entries, - dev.compute_gpfifo_token, dev.compute_gpu_ring_controls) + def submit(self, dev:NVDevice): dev.compute_put_value = self._submit(dev, dev.compute_gpu_ring, dev.compute_put_value, dev.compute_gpfifo_entries, + dev.compute_gpfifo_token, dev.compute_gpu_ring_controls) class HWCopyQueue(HWQueue): def copy(self, dest, src, copy_size): @@ -192,10 +197,8 @@ class HWCopyQueue(HWQueue): self.next_cmd_index += 1 return self - def submit(self, dev:NVDevice): - if len(self.q) == 0: return - dev.dma_put_value = self._submit(dev, dev.dma_gpu_ring, dev.dma_put_value, dev.dma_gpfifo_entries, - dev.dma_gpfifo_token, dev.dma_gpu_ring_controls) + def submit(self, dev:NVDevice): dev.dma_put_value = self._submit(dev, dev.dma_gpu_ring, dev.dma_put_value, dev.dma_gpfifo_entries, + dev.dma_gpfifo_token, dev.dma_gpu_ring_controls) SHT_PROGBITS, SHT_NOBITS, SHF_ALLOC, SHF_EXECINSTR = 0x1, 0x8, 0x2, 0x4 class NVProgram: