qcom a bit cleaner (#7380)

This commit is contained in:
nimlgen
2024-10-29 23:50:28 +03:00
committed by GitHub
parent 07ad6d20ed
commit 4ed2c40d48

View File

@@ -13,6 +13,8 @@ if getenv("IOCTL"): import extra.qcom_gpu_driver.opencl_ioctl # noqa: F401 # p
BUFTYPE_BUF, BUFTYPE_TEX, BUFTYPE_IBO = 0, 1, 2
#Parse C-style defines: <regname>_<field_x>__SHIFT and <regname>_<field_y>__MASK from the adreno module into the following format:
# qreg.<regname>(<field_x>=..., <field_y>=..., ..., <field_n>=...)
def _qreg_exec(reg, __val=0, **kwargs):
for k, v in kwargs.items():
__val |= (getattr(adreno, f'{reg[4:]}_{k.upper()}') if v else 0) if type(v) is bool else (v << getattr(adreno, f'{reg[4:]}_{k.upper()}__SHIFT'))
@@ -21,13 +23,13 @@ qreg: Any = type("QREG", (object,), {name[4:].lower(): functools.partial(_qreg_e
def next_power2(x): return 1 if x == 0 else 1 << (x - 1).bit_length()
def prt(val: int):
def parity(val: int):
for i in range(4,1,-1): val ^= val >> (1 << i)
return (~0x6996 >> (val & 0xf)) & 1
def pkt7_hdr(opcode: int, cnt: int): return adreno.CP_TYPE7_PKT | cnt & 0x3FFF | prt(cnt) << 15 | (opcode & 0x7F) << 16 | prt(opcode) << 23
def pkt7_hdr(opcode: int, cnt: int): return adreno.CP_TYPE7_PKT | cnt & 0x3FFF | parity(cnt) << 15 | (opcode & 0x7F) << 16 | parity(opcode) << 23
def pkt4_hdr(reg: int, cnt: int): return adreno.CP_TYPE4_PKT | cnt & 0x7F | prt(cnt) << 7 | (reg & 0x3FFFF) << 8 | prt(reg) << 27
def pkt4_hdr(reg: int, cnt: int): return adreno.CP_TYPE4_PKT | cnt & 0x7F | parity(cnt) << 7 | (reg & 0x3FFFF) << 8 | parity(reg) << 27
class QCOMCompiler(CLCompiler):
def __init__(self, device:str=""): super().__init__(CLDevice(device), 'compile_qcom')
@@ -183,12 +185,12 @@ class QCOMComputeQueue(HWComputeQueue):
class QCOMArgsState(HCQArgsState):
def __init__(self, ptr:int, prg:QCOMProgram, bufs:Tuple[HCQBuffer, ...], vals:Tuple[int, ...]=()):
super().__init__(ptr, prg, bufs, vals=vals)
ctypes.memset(self.ptr, 0, prg.kernargs_alloc_size)
if len(bufs) + len(vals) != len(prg.buf_info): raise RuntimeError(f'incorrect args size given={len(bufs)+len(vals)} != want={len(prg.buf_info)}')
self.buf_info, self.args_info, self.args_view = prg.buf_info[:len(bufs)], prg.buf_info[len(bufs):], to_mv(ptr, prg.kernargs_alloc_size).cast('Q')
ctypes.memset(self.ptr, 0, prg.kernargs_alloc_size)
for cnst_val, cnst_off, cnst_sz in prg.consts_info: to_mv(self.ptr + cnst_off, cnst_sz)[:] = cnst_val.to_bytes(cnst_sz, byteorder='little')
if prg.samp_cnt > 0: to_mv(self.ptr + prg.samp_off, len(prg.samplers) * 4).cast('I')[:] = array.array('I', prg.samplers)