mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-02-16 01:26:29 -05:00
hcq signal tiny refactor (#7913)
* hcq signal tiny refactor * no mv * fix * fix2 * fix3
This commit is contained in:
@@ -32,27 +32,24 @@ def nbioreg(reg): return reg + 0x00000d20 # NBIO_BASE__INST0_SEG2
|
||||
|
||||
class AMDSignal(HCQSignal):
|
||||
def __init__(self, value=0, is_timeline=False):
|
||||
self._signal = AMDDevice.signals_pool.pop()
|
||||
self._value_addr, self._timestamp_addr = mv_address(self._signal), mv_address(self._signal) + 8
|
||||
super().__init__(AMDDevice.signals_pool.pop(), value, is_timeline, decimal.Decimal(100), value_off=0, timestamp_off=8)
|
||||
|
||||
if is_timeline:
|
||||
self._event = kfd.AMDKFD_IOC_CREATE_EVENT(AMDDevice.kfd, auto_reset=1)
|
||||
self._event_mailbox_ptr = AMDDevice.event_page.va_addr + self._event.event_slot_index*8
|
||||
self._evt_array = (kfd.struct_kfd_event_data)(event_id=self._event.event_id)
|
||||
else: self._event_mailbox_ptr = 0
|
||||
super().__init__(value)
|
||||
def __del__(self): AMDDevice.signals_pool.append(self._signal)
|
||||
def _get_value(self) -> int: return self._signal[0]
|
||||
def _get_timestamp(self) -> decimal.Decimal: return decimal.Decimal(self._signal[1]) / decimal.Decimal(100)
|
||||
def _set_value(self, new_value:int): self._signal[0] = new_value
|
||||
|
||||
def __del__(self): AMDDevice.signals_pool.append(self.base_addr)
|
||||
|
||||
def wait(self, value:int, timeout:int=getenv("HCQDEV_WAIT_TIMEOUT_MS", 30000)):
|
||||
start_time = time.time() * 1000
|
||||
while (time_spent:=time.time() * 1000 - start_time) < timeout:
|
||||
if self._signal[0] >= value: return
|
||||
if self.value >= value: return
|
||||
|
||||
# Wait active for 5s, then going to sleep.
|
||||
if time_spent > 5000 and self._event_mailbox_ptr != 0:
|
||||
if time_spent > 5000 and self.is_timeline:
|
||||
kfd.AMDKFD_IOC_WAIT_EVENTS(AMDDevice.kfd, events_ptr=ctypes.addressof(self._evt_array), num_events=1, wait_for_all=1, timeout=1000)
|
||||
raise RuntimeError(f"wait_signal: not set to {value}, but {self._signal[0]}, {timeout} ms TIMEOUT!")
|
||||
raise RuntimeError(f"wait_signal: not set to {value}, but {self.value}, {timeout} ms TIMEOUT!")
|
||||
|
||||
class AMDComputeQueue(HWQueue):
|
||||
def __init__(self):
|
||||
@@ -140,24 +137,24 @@ class AMDComputeQueue(HWQueue):
|
||||
def _wait(self, signal:AMDSignal, value=0):
|
||||
self.q += [amd_gpu.PACKET3(amd_gpu.PACKET3_WAIT_REG_MEM, 5),
|
||||
amd_gpu.WAIT_REG_MEM_MEM_SPACE(1) | amd_gpu.WAIT_REG_MEM_OPERATION(0) | amd_gpu.WAIT_REG_MEM_FUNCTION(WAIT_REG_MEM_FUNCTION_GEQ) | \
|
||||
amd_gpu.WAIT_REG_MEM_ENGINE(0), *data64_le(signal._value_addr), value, 0xffffffff, 4]
|
||||
amd_gpu.WAIT_REG_MEM_ENGINE(0), *data64_le(signal.value_addr), value, 0xffffffff, 4]
|
||||
|
||||
def _timestamp(self, signal:AMDSignal):
|
||||
self._release_mem(CACHE_FLUSH_AND_INV_TS_EVENT, mem_data_sel=3, mem_int_sel=0, address=signal._timestamp_addr)
|
||||
self._release_mem(CACHE_FLUSH_AND_INV_TS_EVENT, mem_data_sel=3, mem_int_sel=0, address=signal.timestamp_addr)
|
||||
|
||||
def _signal(self, signal:AMDSignal, value=0):
|
||||
# NOTE: this needs an EOP buffer on the queue or it will NULL pointer
|
||||
self._release_mem(CACHE_FLUSH_AND_INV_TS_EVENT, mem_data_sel=1, mem_int_sel=2, address=signal._value_addr, value=value, cache_flush=True)
|
||||
if signal._event_mailbox_ptr != 0:
|
||||
self._release_mem(CACHE_FLUSH_AND_INV_TS_EVENT, mem_data_sel=1, mem_int_sel=2, address=signal.value_addr, value=value, cache_flush=True)
|
||||
if signal.is_timeline:
|
||||
self._release_mem(CACHE_FLUSH_AND_INV_TS_EVENT, mem_data_sel=1, mem_int_sel=2, address=signal._event_mailbox_ptr,
|
||||
value=signal._event.event_id, cst=signal._event.event_id, cache_flush=False)
|
||||
|
||||
def _update_wait(self, cmd_idx, signal:Optional[AMDSignal]=None, value=None):
|
||||
if signal is not None: self._patch(cmd_idx, offset=2, data=data64_le(signal._value_addr))
|
||||
if signal is not None: self._patch(cmd_idx, offset=2, data=data64_le(signal.value_addr))
|
||||
if value is not None: self._patch(cmd_idx, offset=4, data=[value])
|
||||
|
||||
def _update_signal(self, cmd_idx, signal:Optional[AMDSignal]=None, value=None):
|
||||
if signal is not None: self._patch(cmd_idx, offset=3, data=data64_le(signal._value_addr))
|
||||
if signal is not None: self._patch(cmd_idx, offset=3, data=data64_le(signal.value_addr))
|
||||
if value is not None: self._patch(cmd_idx, offset=5, data=data64_le(value))
|
||||
|
||||
# Check if the signal command has mailptr part
|
||||
@@ -210,27 +207,27 @@ class AMDCopyQueue(HWQueue):
|
||||
if dest is not None: self._patch(cmd_idx, offset=5+i*7, data=[*data64_le(dest + SDMA_MAX_COPY_SIZE*i)])
|
||||
|
||||
def _signal(self, signal:AMDSignal, value=0):
|
||||
self._q([amd_gpu.SDMA_OP_FENCE | amd_gpu.SDMA_PKT_FENCE_HEADER_MTYPE(3), *data64_le(signal._value_addr), value])
|
||||
self._q([amd_gpu.SDMA_OP_FENCE | amd_gpu.SDMA_PKT_FENCE_HEADER_MTYPE(3), *data64_le(signal.value_addr), value])
|
||||
|
||||
if signal._event_mailbox_ptr != 0:
|
||||
if signal.is_timeline:
|
||||
self._q([amd_gpu.SDMA_OP_FENCE | amd_gpu.SDMA_PKT_FENCE_HEADER_MTYPE(3), *data64_le(signal._event_mailbox_ptr), signal._event.event_id])
|
||||
self._q([amd_gpu.SDMA_OP_TRAP, amd_gpu.SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(signal._event.event_id)])
|
||||
|
||||
def _wait(self, signal:AMDSignal, value=0):
|
||||
self._q([amd_gpu.SDMA_OP_POLL_REGMEM | amd_gpu.SDMA_PKT_POLL_REGMEM_HEADER_FUNC(WAIT_REG_MEM_FUNCTION_GEQ) | \
|
||||
amd_gpu.SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1), *data64_le(signal._value_addr), value, 0xffffffff,
|
||||
amd_gpu.SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1), *data64_le(signal.value_addr), value, 0xffffffff,
|
||||
amd_gpu.SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(0x04) | amd_gpu.SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff)])
|
||||
|
||||
def _update_signal(self, cmd_idx, signal:Optional[AMDSignal]=None, value=None):
|
||||
return self._update_wait(cmd_idx, signal, value) # the same offsets and commands
|
||||
|
||||
def _update_wait(self, cmd_idx, signal:Optional[AMDSignal]=None, value=None):
|
||||
if signal is not None: self._patch(cmd_idx, offset=1, data=data64_le(signal._value_addr))
|
||||
if signal is not None: self._patch(cmd_idx, offset=1, data=data64_le(signal.value_addr))
|
||||
if value is not None: self._patch(cmd_idx, offset=3, data=[value])
|
||||
|
||||
def _timestamp(self, signal:AMDSignal):
|
||||
self._q([amd_gpu.SDMA_OP_TIMESTAMP | amd_gpu.SDMA_PKT_TIMESTAMP_GET_HEADER_SUB_OP(amd_gpu.SDMA_SUBOP_TIMESTAMP_GET_GLOBAL),
|
||||
*data64_le(signal._timestamp_addr)])
|
||||
*data64_le(signal.timestamp_addr)])
|
||||
|
||||
def _submit(self, dev:AMDDevice):
|
||||
if dev.sdma_queue.put_value - dev.sdma_queue.read_ptr[0] > dev.sdma_queue.ring.nbytes: raise RuntimeError("SDMA queue overrun")
|
||||
@@ -333,7 +330,7 @@ class AMDDevice(HCQCompiled):
|
||||
kfd:int = -1
|
||||
event_page:Any = None # TODO: fix types in kfd, Optional[kfd.struct_kfd_ioctl_alloc_memory_of_gpu_args]
|
||||
signals_page:Any = None
|
||||
signals_pool:List[memoryview] = []
|
||||
signals_pool:List[int] = []
|
||||
gpus:List[pathlib.Path] = []
|
||||
|
||||
def _gpu_map(self, mem):
|
||||
@@ -399,7 +396,7 @@ class AMDDevice(HCQCompiled):
|
||||
if AMDDevice.event_page is None:
|
||||
AMDDevice.signals_page = self._gpu_alloc(16 * 65536, kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT, uncached=True)
|
||||
AMDDevice.event_page = self._gpu_alloc(0x8000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT, uncached=True)
|
||||
AMDDevice.signals_pool = [to_mv(self.signals_page.va_addr + off, 16).cast("Q") for off in range(0, AMDDevice.signals_page.size, 16)]
|
||||
AMDDevice.signals_pool = [self.signals_page.va_addr + off for off in range(0, AMDDevice.signals_page.size, 16)]
|
||||
kfd.AMDKFD_IOC_CREATE_EVENT(AMDDevice.kfd, event_page_offset=AMDDevice.event_page.handle)
|
||||
else:
|
||||
self._gpu_map(AMDDevice.signals_page)
|
||||
|
||||
@@ -75,13 +75,8 @@ def nvmethod(subc, mthd, size, typ=2): return (typ << 28) | (size << 16) | (subc
|
||||
|
||||
class NVSignal(HCQSignal):
|
||||
def __init__(self, value=0, is_timeline=False):
|
||||
self._signal = NVDevice.signals_pool.pop()
|
||||
self.signal_addr = mv_address(self._signal)
|
||||
super().__init__(value)
|
||||
def __del__(self): NVDevice.signals_pool.append(self._signal)
|
||||
def _get_value(self) -> int: return self._signal[0]
|
||||
def _get_timestamp(self) -> decimal.Decimal: return decimal.Decimal(self._signal[1]) / decimal.Decimal(1000)
|
||||
def _set_value(self, new_value:int): self._signal[0] = new_value
|
||||
super().__init__(NVDevice.signals_pool.pop(), value, is_timeline, decimal.Decimal(1000), value_off=0, timestamp_off=8)
|
||||
def __del__(self): NVDevice.signals_pool.append(self.base_addr)
|
||||
|
||||
class NVCommandQueue(HWQueue[NVSignal, 'NVDevice', 'NVProgram', 'NVArgsState']):
|
||||
def __del__(self):
|
||||
@@ -97,11 +92,11 @@ class NVCommandQueue(HWQueue[NVSignal, 'NVDevice', 'NVProgram', 'NVArgsState']):
|
||||
if local_mem_tpc_bytes: self.q += [nvmethod(1, nv_gpu.NVC6C0_SET_SHADER_LOCAL_MEMORY_NON_THROTTLED_A, 3), *data64(local_mem_tpc_bytes), 0xff]
|
||||
|
||||
def _wait(self, signal:NVSignal, value=0):
|
||||
self.q += [nvmethod(0, nv_gpu.NVC56F_SEM_ADDR_LO, 5), *data64_le(signal.signal_addr), *data64_le(value),
|
||||
self.q += [nvmethod(0, nv_gpu.NVC56F_SEM_ADDR_LO, 5), *data64_le(signal.value_addr), *data64_le(value),
|
||||
(3 << 0) | (1 << 24)] # ACQUIRE | PAYLOAD_SIZE_64BIT
|
||||
|
||||
def _update_wait(self, cmd_idx, signal=None, value=None):
|
||||
if signal is not None: self.q[(sigoff:=self.cmds_offset[cmd_idx]+1):sigoff+2] = array.array('I', data64_le(signal.signal_addr))
|
||||
if signal is not None: self.q[(sigoff:=self.cmds_offset[cmd_idx]+1):sigoff+2] = array.array('I', data64_le(signal.value_addr))
|
||||
if value is not None: self.q[(valoff:=self.cmds_offset[cmd_idx]+3):valoff+2] = array.array('I', data64_le(value))
|
||||
|
||||
def _timestamp(self, signal): return self._signal(signal, 0)
|
||||
@@ -170,19 +165,19 @@ class NVComputeQueue(NVCommandQueue):
|
||||
for i in range(2):
|
||||
if getattr(prev_qmd, f'release{i}_enable') == 0:
|
||||
setattr(prev_qmd, f'release{i}_enable', 1)
|
||||
setattr(prev_qmd, f'release{i}_address', signal.signal_addr)
|
||||
setattr(prev_qmd, f'release{i}_address', signal.value_addr)
|
||||
setattr(prev_qmd, f'release{i}_payload', value)
|
||||
self.cmd_idx_to_qmd[self._cur_cmd_idx()] = prev_qmd
|
||||
self.cmd_idx_to_signal_id[self._cur_cmd_idx()] = i
|
||||
return
|
||||
|
||||
self.q += [nvmethod(0, nv_gpu.NVC56F_SEM_ADDR_LO, 5), *data64_le(signal.signal_addr), *data64_le(value),
|
||||
self.q += [nvmethod(0, nv_gpu.NVC56F_SEM_ADDR_LO, 5), *data64_le(signal.value_addr), *data64_le(value),
|
||||
(1 << 0) | (1 << 20) | (1 << 24) | (1 << 25)] # RELEASE | RELEASE_WFI | PAYLOAD_SIZE_64BIT | RELEASE_TIMESTAMP
|
||||
self.q += [nvmethod(0, nv_gpu.NVC56F_NON_STALL_INTERRUPT, 1), 0x0]
|
||||
|
||||
def _update_signal(self, cmd_idx, signal:Optional[NVSignal]=None, value=None):
|
||||
if (qmd:=self.cmd_idx_to_qmd.get(cmd_idx)) is None: return super()._update_wait(cmd_idx, signal, value) # reuse wait, same offsets to update.
|
||||
if signal is not None: setattr(qmd, f'release{self.cmd_idx_to_signal_id[cmd_idx]}_address', signal.signal_addr)
|
||||
if signal is not None: setattr(qmd, f'release{self.cmd_idx_to_signal_id[cmd_idx]}_address', signal.value_addr)
|
||||
if value is not None: setattr(qmd, f'release{self.cmd_idx_to_signal_id[cmd_idx]}_payload', value)
|
||||
|
||||
def _submit(self, dev): self._submit_to_gpfifo(dev, cast(NVDevice, dev).compute_gpfifo)
|
||||
@@ -198,11 +193,11 @@ class NVCopyQueue(NVCommandQueue):
|
||||
if src is not None: self._patch(cmd_idx, offset=1, data=data64(src))
|
||||
|
||||
def _signal(self, signal, value=0):
|
||||
self.q += [nvmethod(4, nv_gpu.NVC6B5_SET_SEMAPHORE_A, 3), *data64(signal.signal_addr), value]
|
||||
self.q += [nvmethod(4, nv_gpu.NVC6B5_SET_SEMAPHORE_A, 3), *data64(signal.value_addr), value]
|
||||
self.q += [nvmethod(4, nv_gpu.NVC6B5_LAUNCH_DMA, 1), 0x14]
|
||||
|
||||
def _update_signal(self, cmd_idx, signal=None, value=None):
|
||||
if signal is not None: self._patch(cmd_idx, offset=1, data=data64(signal.signal_addr))
|
||||
if signal is not None: self._patch(cmd_idx, offset=1, data=data64(signal.value_addr))
|
||||
if value is not None: self._patch(cmd_idx, offset=3, data=[value])
|
||||
|
||||
def _submit(self, dev): self._submit_to_gpfifo(dev, cast(NVDevice, dev).dma_gpfifo)
|
||||
@@ -316,7 +311,7 @@ class NVDevice(HCQCompiled[NVSignal]):
|
||||
fd_uvm: int = -1
|
||||
gpus_info: Union[List, ctypes.Array] = []
|
||||
signals_page: Any = None
|
||||
signals_pool: List[Any] = []
|
||||
signals_pool: List[int] = []
|
||||
low_uvm_vaddr: int = 0x1000000000 # 0x1000000000 - 0x2000000000, reserved for system/cpu mappings
|
||||
uvm_vaddr: int = 0x2000000000 # 0x2000000000+
|
||||
host_object_enumerator: int = 0x1000
|
||||
@@ -467,7 +462,7 @@ class NVDevice(HCQCompiled[NVSignal]):
|
||||
|
||||
if NVDevice.signals_page is None:
|
||||
NVDevice.signals_page = self._gpu_system_alloc(16 * 65536, map_to_cpu=True)
|
||||
NVDevice.signals_pool = [to_mv(self.signals_page.va_addr + off, 16).cast("Q") for off in range(0, NVDevice.signals_page.size, 16)]
|
||||
NVDevice.signals_pool = [self.signals_page.va_addr + off for off in range(0, NVDevice.signals_page.size, 16)]
|
||||
else: self._gpu_map(NVDevice.signals_page)
|
||||
|
||||
channel_params = nv_gpu.NV_CHANNEL_GROUP_ALLOCATION_PARAMETERS(engineType=nv_gpu.NV2080_ENGINE_TYPE_GRAPHICS)
|
||||
|
||||
@@ -36,13 +36,8 @@ class QCOMCompiler(CLCompiler):
|
||||
def disassemble(self, lib:bytes): fromimport('extra.disassemblers.adreno', 'disasm')(lib)
|
||||
|
||||
class QCOMSignal(HCQSignal):
|
||||
def __init__(self, value=0, is_timeline=False):
|
||||
self._signal = QCOMDevice.signals_pool.pop()
|
||||
super().__init__(value)
|
||||
def __del__(self): QCOMDevice.signals_pool.append(self._signal)
|
||||
def _get_value(self) -> int: return self._signal[0]
|
||||
def _get_timestamp(self) -> decimal.Decimal: return decimal.Decimal(self._signal[1]) / decimal.Decimal(19.2) # based on the 19.2MHz always-on timer
|
||||
def _set_value(self, new_value:int): self._signal[0] = new_value
|
||||
def __init__(self, value=0, is_timeline=False): super().__init__(QCOMDevice.signals_pool.pop(), value, is_timeline, decimal.Decimal(19.2))
|
||||
def __del__(self): QCOMDevice.signals_pool.append(self.base_addr)
|
||||
|
||||
class QCOMComputeQueue(HWQueue):
|
||||
def __init__(self):
|
||||
@@ -69,7 +64,7 @@ class QCOMComputeQueue(HWQueue):
|
||||
self.cmd(adreno.CP_WAIT_FOR_IDLE)
|
||||
if QCOMDevice.gpu_id < 700:
|
||||
self.cmd(adreno.CP_EVENT_WRITE, qreg.cp_event_write_0(event=adreno.CACHE_FLUSH_TS, timestamp=ts),
|
||||
*data64_le(mv_address(signal._signal) + (0 if not ts else 8)), qreg.cp_event_write_3(value & 0xFFFFFFFF))
|
||||
*data64_le(signal.timestamp_addr if ts else signal.value_addr), qreg.cp_event_write_3(value & 0xFFFFFFFF))
|
||||
self._cache_flush(write_back=True, invalidate=False, sync=False, memsync=False)
|
||||
else:
|
||||
# TODO: support devices starting with 8 Gen 1. Also, 700th series have convenient CP_GLOBAL_TIMESTAMP and CP_LOCAL_TIMESTAMP
|
||||
@@ -78,15 +73,15 @@ class QCOMComputeQueue(HWQueue):
|
||||
def _timestamp(self, signal:QCOMSignal): return self._signal(signal, 0, ts=True)
|
||||
|
||||
def _wait(self, signal:QCOMSignal, value=0):
|
||||
self.cmd(adreno.CP_WAIT_REG_MEM, qreg.cp_wait_reg_mem_0(function=adreno.WRITE_GE, poll=adreno.POLL_MEMORY),*data64_le(mv_address(signal._signal)),
|
||||
self.cmd(adreno.CP_WAIT_REG_MEM, qreg.cp_wait_reg_mem_0(function=adreno.WRITE_GE, poll=adreno.POLL_MEMORY),*data64_le(signal.value_addr),
|
||||
qreg.cp_wait_reg_mem_3(ref=value&0xFFFFFFFF), qreg.cp_wait_reg_mem_4(mask=0xFFFFFFFF), qreg.cp_wait_reg_mem_5(delay_loop_cycles=32))
|
||||
|
||||
def _update_signal(self, cmd_idx, signal:Optional[QCOMSignal], value):
|
||||
if signal is not None: self._patch(cmd_idx, offset=3, data=data64_le(mv_address(signal._signal)))
|
||||
if signal is not None: self._patch(cmd_idx, offset=3, data=data64_le(signal.value_addr))
|
||||
if value is not None: self._patch(cmd_idx, offset=5, data=[value & 0xFFFFFFFF])
|
||||
|
||||
def _update_wait(self, cmd_idx, signal:Optional[QCOMSignal], value):
|
||||
if signal is not None: self._patch(cmd_idx, offset=2, data=data64_le(mv_address(signal._signal)))
|
||||
if signal is not None: self._patch(cmd_idx, offset=2, data=data64_le(signal.value_addr))
|
||||
if value is not None: self._patch(cmd_idx, offset=4, data=[value & 0xFFFFFFFF])
|
||||
|
||||
def _build_gpu_command(self, dev:QCOMDevice, hw_addr=None):
|
||||
@@ -343,7 +338,7 @@ class QCOMAllocator(HCQAllocator):
|
||||
|
||||
class QCOMDevice(HCQCompiled):
|
||||
signals_page: Any = None
|
||||
signals_pool: List[Any] = []
|
||||
signals_pool: List[int] = []
|
||||
gpu_id: int = 0
|
||||
dummy_addr: int = 0
|
||||
|
||||
@@ -351,7 +346,7 @@ class QCOMDevice(HCQCompiled):
|
||||
self.fd = os.open('/dev/kgsl-3d0', os.O_RDWR)
|
||||
QCOMDevice.dummy_addr = self._gpu_alloc(0x1000).va_addr
|
||||
QCOMDevice.signals_page = self._gpu_alloc(16 * 65536, uncached=True)
|
||||
QCOMDevice.signals_pool = [to_mv(self.signals_page.va_addr + off, 16).cast("Q") for off in range(0, self.signals_page.size, 16)]
|
||||
QCOMDevice.signals_pool = [self.signals_page.va_addr + off for off in range(0, self.signals_page.size, 16)]
|
||||
info, self.ctx, self.cmd_buf, self.cmd_buf_ptr, self.last_cmd = self._info(), self._ctx_create(), self._gpu_alloc(16 << 20), 0,0
|
||||
QCOMDevice.gpu_id = ((info.chip_id >> 24) & 0xFF) * 100 + ((info.chip_id >> 16) & 0xFF) * 10 + ((info.chip_id >> 8) & 0xFF)
|
||||
if QCOMDevice.gpu_id >= 700: raise RuntimeError(f"Unsupported GPU: {QCOMDevice.gpu_id}")
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
from typing import List, Optional, Dict, Tuple, cast, Protocol, Type, Union, TypeVar, Generic, Callable, ParamSpec, Concatenate
|
||||
import contextlib, decimal, statistics, random, json, atexit, time, array, ctypes, functools
|
||||
from tinygrad.helpers import PROFILEPATH, PROFILE, from_mv, getenv
|
||||
from tinygrad.helpers import PROFILEPATH, PROFILE, from_mv, getenv, to_mv
|
||||
from tinygrad.renderer import Renderer
|
||||
from tinygrad.device import BufferSpec, Compiler, Compiled, LRUAllocator
|
||||
|
||||
@@ -209,16 +209,16 @@ class HWQueue(Generic[SignalType, DeviceType, ProgramType, ArgsStateType]):
|
||||
raise NotImplementedError("backend should overload this function")
|
||||
|
||||
class HCQSignal:
|
||||
def __init__(self, value:int=0, is_timeline:bool=False): self._set_value(value)
|
||||
def __init__(self, base_addr:int, value:int=0, is_timeline:bool=False, timestamp_divider=decimal.Decimal(1), value_off=0, timestamp_off=8):
|
||||
self.base_addr, self.value_addr, self.timestamp_addr, self.ts_divider = base_addr, base_addr+value_off, base_addr+timestamp_off, timestamp_divider
|
||||
self.value_mv, self.timestamp_mv, self.is_timeline = to_mv(self.value_addr, 8).cast('Q'), to_mv(self.timestamp_addr, 8).cast('Q'), is_timeline
|
||||
self.value_mv[0] = value
|
||||
|
||||
@property
|
||||
def value(self) -> int: return self._get_value()
|
||||
def value(self) -> int: return self.value_mv[0]
|
||||
|
||||
@value.setter
|
||||
def value(self, new_value:int): self._set_value(new_value)
|
||||
|
||||
def _get_value(self) -> int: raise NotImplementedError("_get_value() method must be implemented")
|
||||
def _set_value(self, new_value:int): raise NotImplementedError("_set_value() method must be implemented")
|
||||
def value(self, new_value:int): self.value_mv[0] = new_value
|
||||
|
||||
@property
|
||||
def timestamp(self) -> decimal.Decimal:
|
||||
@@ -230,8 +230,7 @@ class HCQSignal:
|
||||
Returns:
|
||||
The timestamp in microseconds.
|
||||
"""
|
||||
return self._get_timestamp()
|
||||
def _get_timestamp(self) -> decimal.Decimal: raise NotImplementedError("_get_timestamp() method must be implemented")
|
||||
return self.timestamp_mv[0] / self.ts_divider
|
||||
|
||||
def wait(self, value:int, timeout:int=getenv("HCQDEV_WAIT_TIMEOUT_MS", 30000)):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user