diff --git a/tinygrad/runtime/ops_qcom.py b/tinygrad/runtime/ops_qcom.py index 31d30a9942..aab8fa7ade 100644 --- a/tinygrad/runtime/ops_qcom.py +++ b/tinygrad/runtime/ops_qcom.py @@ -13,6 +13,8 @@ if getenv("IOCTL"): import extra.qcom_gpu_driver.opencl_ioctl # noqa: F401 # p BUFTYPE_BUF, BUFTYPE_TEX, BUFTYPE_IBO = 0, 1, 2 +#Parse C-style defines: ___SHIFT and ___MASK from the adreno module into the following format: +# qreg.(=..., =..., ..., =...) def _qreg_exec(reg, __val=0, **kwargs): for k, v in kwargs.items(): __val |= (getattr(adreno, f'{reg[4:]}_{k.upper()}') if v else 0) if type(v) is bool else (v << getattr(adreno, f'{reg[4:]}_{k.upper()}__SHIFT')) @@ -21,13 +23,13 @@ qreg: Any = type("QREG", (object,), {name[4:].lower(): functools.partial(_qreg_e def next_power2(x): return 1 if x == 0 else 1 << (x - 1).bit_length() -def prt(val: int): +def parity(val: int): for i in range(4,1,-1): val ^= val >> (1 << i) return (~0x6996 >> (val & 0xf)) & 1 -def pkt7_hdr(opcode: int, cnt: int): return adreno.CP_TYPE7_PKT | cnt & 0x3FFF | prt(cnt) << 15 | (opcode & 0x7F) << 16 | prt(opcode) << 23 +def pkt7_hdr(opcode: int, cnt: int): return adreno.CP_TYPE7_PKT | cnt & 0x3FFF | parity(cnt) << 15 | (opcode & 0x7F) << 16 | parity(opcode) << 23 -def pkt4_hdr(reg: int, cnt: int): return adreno.CP_TYPE4_PKT | cnt & 0x7F | prt(cnt) << 7 | (reg & 0x3FFFF) << 8 | prt(reg) << 27 +def pkt4_hdr(reg: int, cnt: int): return adreno.CP_TYPE4_PKT | cnt & 0x7F | parity(cnt) << 7 | (reg & 0x3FFFF) << 8 | parity(reg) << 27 class QCOMCompiler(CLCompiler): def __init__(self, device:str=""): super().__init__(CLDevice(device), 'compile_qcom') @@ -183,12 +185,12 @@ class QCOMComputeQueue(HWComputeQueue): class QCOMArgsState(HCQArgsState): def __init__(self, ptr:int, prg:QCOMProgram, bufs:Tuple[HCQBuffer, ...], vals:Tuple[int, ...]=()): super().__init__(ptr, prg, bufs, vals=vals) - ctypes.memset(self.ptr, 0, prg.kernargs_alloc_size) if len(bufs) + len(vals) != len(prg.buf_info): raise RuntimeError(f'incorrect args size given={len(bufs)+len(vals)} != want={len(prg.buf_info)}') self.buf_info, self.args_info, self.args_view = prg.buf_info[:len(bufs)], prg.buf_info[len(bufs):], to_mv(ptr, prg.kernargs_alloc_size).cast('Q') + ctypes.memset(self.ptr, 0, prg.kernargs_alloc_size) for cnst_val, cnst_off, cnst_sz in prg.consts_info: to_mv(self.ptr + cnst_off, cnst_sz)[:] = cnst_val.to_bytes(cnst_sz, byteorder='little') if prg.samp_cnt > 0: to_mv(self.ptr + prg.samp_off, len(prg.samplers) * 4).cast('I')[:] = array.array('I', prg.samplers)