hcq move out synchronize to base class (#5634)

This commit is contained in:
nimlgen
2024-07-22 20:36:04 +03:00
committed by GitHub
parent 26fc4610a0
commit ee633c1988
3 changed files with 6 additions and 15 deletions

View File

@@ -443,6 +443,12 @@ class HCQCompiled(Compiled):
self.kernargs_page:HCQBuffer = self.allocator.alloc(16 << 20, BufferOptions(cpu_access=True))
self.kernargs_ptr:int = self.kernargs_page.va_addr
def synchronize(self):
self.timeline_signal.wait(self.timeline_value - 1)
if self.timeline_value > (1 << 31): self._wrap_timeline_signal()
if PROFILE: self._prof_process_events()
def _gpu2cpu_time(self, gpu_time:float, is_copy:bool) -> float:
"""
Translates local gpu time (timestamp) into global cpu time.

View File

@@ -334,7 +334,6 @@ class AMDProgram(HCQProgram):
q.exec(self, kernargs_ptr, global_size, local_size)
q.signal(self.device.timeline_signal, self.device.timeline_value).submit(self.device)
self.device.timeline_value += 1
if wait:
@@ -471,12 +470,6 @@ class AMDDevice(HCQCompiled):
read_ptr=to_mv(queue.read_pointer_address, 8).cast("Q"), write_ptr=to_mv(queue.write_pointer_address, 8).cast("Q"),
doorbell=to_mv(self.doorbells + queue.doorbell_offset - self.doorbells_base, 8).cast("Q"))
def synchronize(self):
self.timeline_signal.wait(self.timeline_value - 1)
if self.timeline_value > (1 << 31): self._wrap_timeline_signal()
if PROFILE: self._prof_process_events()
def invalidate_cache(self):
AMDComputeQueue().memory_barrier().signal(self.timeline_signal, self.timeline_value).submit(self)
self.timeline_value += 1

View File

@@ -314,7 +314,6 @@ class NVProgram(HCQProgram):
q.exec(self, kernargs_ptr, global_size, local_size)
q.signal(self.device.timeline_signal, self.device.timeline_value).submit(self.device)
self.device.timeline_value += 1
if wait:
@@ -526,13 +525,6 @@ class NVDevice(HCQCompiled):
self._setup_gpfifos()
NVDevice.devices.append(self)
def synchronize(self):
self.timeline_signal.wait(self.timeline_value - 1)
self.cmdq_wptr = 0
if self.timeline_value > (1 << 31): self._wrap_timeline_signal()
if PROFILE: self._prof_process_events()
def _new_gpu_fifo(self, gpfifo_area, ctxshare, channel_group, offset=0, entries=0x400) -> GPFifo:
notifier = self._gpu_system_alloc(48 << 20)
params = nv_gpu.NV_CHANNELGPFIFO_ALLOCATION_PARAMETERS(hObjectError=notifier.hMemory, hObjectBuffer=gpfifo_area.hMemory,