mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
hcq: default timestamp metrics is ns (#11295)
This commit is contained in:
@@ -71,10 +71,7 @@ class QMD:
|
||||
if self.ver < 4: self.write(**{f'constant_buffer_addr_upper_{i}':hi32(addr), f'constant_buffer_addr_lower_{i}':lo32(addr)})
|
||||
else: self.write(**{f'constant_buffer_addr_upper_shifted6_{i}':hi32(addr >> 6), f'constant_buffer_addr_lower_shifted6_{i}':lo32(addr >> 6)})
|
||||
|
||||
class NVSignal(HCQSignal):
|
||||
def __init__(self, *args, **kwargs): super().__init__(*args, **{**kwargs, 'timestamp_divider': 1000})
|
||||
|
||||
class NVCommandQueue(HWQueue[NVSignal, 'NVDevice', 'NVProgram', 'NVArgsState']):
|
||||
class NVCommandQueue(HWQueue[HCQSignal, 'NVDevice', 'NVProgram', 'NVArgsState']):
|
||||
def __init__(self):
|
||||
self.active_qmd = None
|
||||
super().__init__()
|
||||
@@ -93,12 +90,12 @@ class NVCommandQueue(HWQueue[NVSignal, 'NVDevice', 'NVProgram', 'NVArgsState']):
|
||||
if local_mem_tpc_bytes: self.nvm(1, nv_gpu.NVC6C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A, *data64(local_mem_tpc_bytes), 0xff)
|
||||
return self
|
||||
|
||||
def wait(self, signal:NVSignal, value:sint=0):
|
||||
def wait(self, signal:HCQSignal, value:sint=0):
|
||||
self.nvm(0, nv_gpu.NVC56F_SEM_ADDR_LO, *data64_le(signal.value_addr), *data64_le(value), (3 << 0) | (1 << 24)) # ACQUIRE | PAYLOAD_SIZE_64BIT
|
||||
self.active_qmd = None
|
||||
return self
|
||||
|
||||
def timestamp(self, signal:NVSignal): return self.signal(signal, 0)
|
||||
def timestamp(self, signal:HCQSignal): return self.signal(signal, 0)
|
||||
|
||||
def bind(self, dev:NVDevice):
|
||||
self.binded_device = dev
|
||||
@@ -152,7 +149,7 @@ class NVComputeQueue(NVCommandQueue):
|
||||
self.active_qmd, self.active_qmd_buf = qmd, qmd_buf
|
||||
return self
|
||||
|
||||
def signal(self, signal:NVSignal, value:sint=0):
|
||||
def signal(self, signal:HCQSignal, value:sint=0):
|
||||
if self.active_qmd is not None:
|
||||
for i in range(2):
|
||||
if self.active_qmd.read(f'release{i}_enable') == 0:
|
||||
@@ -179,7 +176,7 @@ class NVCopyQueue(NVCommandQueue):
|
||||
self.nvm(4, nv_gpu.NVC6B5_LAUNCH_DMA, 0x182) # TRANSFER_TYPE_NON_PIPELINED | DST_MEMORY_LAYOUT_PITCH | SRC_MEMORY_LAYOUT_PITCH
|
||||
return self
|
||||
|
||||
def signal(self, signal:NVSignal, value:sint=0):
|
||||
def signal(self, signal:HCQSignal, value:sint=0):
|
||||
self.nvm(4, nv_gpu.NVC6B5_SET_SEMAPHORE_A, *data64(signal.value_addr), value)
|
||||
self.nvm(4, nv_gpu.NVC6B5_LAUNCH_DMA, 0x14)
|
||||
return self
|
||||
@@ -479,7 +476,7 @@ class PCIIface(PCIIfaceBase):
|
||||
|
||||
def device_fini(self): self.dev_impl.fini()
|
||||
|
||||
class NVDevice(HCQCompiled[NVSignal]):
|
||||
class NVDevice(HCQCompiled[HCQSignal]):
|
||||
def is_nvd(self) -> bool: return isinstance(self.iface, PCIIface)
|
||||
|
||||
def __init__(self, device:str=""):
|
||||
@@ -527,7 +524,7 @@ class NVDevice(HCQCompiled[NVSignal]):
|
||||
|
||||
compiler_t = (PTXCompiler if PTX else CUDACompiler) if MOCKGPU else (NVPTXCompiler if PTX else NVCompiler)
|
||||
super().__init__(device, NVAllocator(self), PTXRenderer(self.arch, device="NV") if PTX else NVRenderer(self.arch), compiler_t(self.arch),
|
||||
functools.partial(NVProgram, self), NVSignal, NVComputeQueue, NVCopyQueue)
|
||||
functools.partial(NVProgram, self), HCQSignal, NVComputeQueue, NVCopyQueue)
|
||||
|
||||
self._setup_gpfifos()
|
||||
|
||||
|
||||
@@ -217,7 +217,7 @@ class HWQueue(Generic[SignalType, HCQDeviceType, ProgramType, ArgsStateType]):
|
||||
def _submit(self, dev:HCQDeviceType): raise NotImplementedError("need _submit")
|
||||
|
||||
class HCQSignal(Generic[HCQDeviceType]):
|
||||
def __init__(self, base_buf:HCQBuffer, value:int=0, owner:HCQDeviceType|None=None, is_timeline:bool=False, timestamp_divider=1):
|
||||
def __init__(self, base_buf:HCQBuffer, value:int=0, owner:HCQDeviceType|None=None, is_timeline:bool=False, timestamp_divider=1000):
|
||||
self.base_buf, self.value_addr, self.timestamp_addr, self.owner = base_buf, base_buf.va_addr+0, base_buf.va_addr+8, owner
|
||||
self.is_timeline = is_timeline
|
||||
self.timestamp_divider:decimal.Decimal = decimal.Decimal(timestamp_divider)
|
||||
|
||||
Reference in New Issue
Block a user