diff --git a/tinygrad/runtime/ops_nv.py b/tinygrad/runtime/ops_nv.py index ae80ed6ebb..017444c1fc 100644 --- a/tinygrad/runtime/ops_nv.py +++ b/tinygrad/runtime/ops_nv.py @@ -243,6 +243,8 @@ class NVProgram: self.lib_sz = round_up(round_up(self.program.nbytes, 128) + round_up(0 if self.global_init is None else self.global_init.nbytes, 128) + sum([round_up(x.nbytes, 128) for i,x in constant_buffers_data.items()]), 0x1000) self.lib_gpu = self.device.allocator.alloc(self.lib_sz) + + HWComputeQueue().wait(self.device.timeline_signal, self.device.timeline_value - 1).submit(self.device) for st in range(0, len(self.program), 4095): HWComputeQueue().copy_from_cpu(self.lib_gpu.base+st*4, self.program[st:st+4095]).submit(self.device)