mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-07 03:00:26 -04:00
qcom fix binded queue might be overwritten (#6712)
This commit is contained in:
@@ -44,6 +44,9 @@ class QCOMComputeQueue(HWComputeQueue):
|
||||
self.cmd_idx_to_dims = {}
|
||||
super().__init__()
|
||||
|
||||
def __del__(self):
|
||||
if self.binded_device is not None: self.binded_device.allocator.free(self.hw_page, self.hw_page.size, BufferOptions(cpu_access=True, nolru=True))
|
||||
|
||||
def cmd(self, opcode: int, *vals: int): self.q += [pkt7_hdr(opcode, len(vals)), *vals]
|
||||
|
||||
def reg(self, reg: int, *vals: int): self.q += [pkt4_hdr(reg, len(vals)), *vals]
|
||||
@@ -81,8 +84,8 @@ class QCOMComputeQueue(HWComputeQueue):
|
||||
if signal is not None: self._patch(cmd_idx, offset=2, data=data64_le(mv_address(signal._signal)))
|
||||
if value is not None: self._patch(cmd_idx, offset=4, data=[value & 0xFFFFFFFF])
|
||||
|
||||
def _build_gpu_command(self, device):
|
||||
to_mv((hw_page_addr:=device._alloc_cmd_buf(len(self.q) * 4)), len(self.q) * 4).cast('I')[:] = array.array('I', self.q)
|
||||
def _build_gpu_command(self, device, hw_addr=None):
|
||||
to_mv((hw_page_addr:=hw_addr or device._alloc_cmd_buf(len(self.q) * 4)), len(self.q) * 4).cast('I')[:] = array.array('I', self.q)
|
||||
obj = kgsl.struct_kgsl_command_object(gpuaddr=hw_page_addr, size=len(self.q) * 4, flags=kgsl.KGSL_CMDLIST_IB)
|
||||
submit_req = kgsl.struct_kgsl_gpu_command(cmdlist=ctypes.addressof(obj), numcmds=1, context_id=device.ctx,
|
||||
cmdsize=ctypes.sizeof(kgsl.struct_kgsl_command_object))
|
||||
@@ -90,7 +93,8 @@ class QCOMComputeQueue(HWComputeQueue):
|
||||
|
||||
def bind(self, device):
|
||||
self.binded_device = device
|
||||
self.submit_req, self.obj = self._build_gpu_command(self.binded_device)
|
||||
self.hw_page = device.allocator.alloc(len(self.q) * 4, BufferOptions(cpu_access=True, nolru=True))
|
||||
self.submit_req, self.obj = self._build_gpu_command(self.binded_device, self.hw_page.va_addr)
|
||||
# From now on, the queue is on the device for faster submission.
|
||||
self.q = to_mv(self.obj.gpuaddr, len(self.q) * 4).cast("I") # type: ignore
|
||||
|
||||
@@ -346,8 +350,7 @@ class QCOMDevice(HCQCompiled):
|
||||
super().__init__(device, QCOMAllocator(self), QCOMRenderer(), QCOMCompiler(device), functools.partial(QCOMProgram, self),
|
||||
QCOMSignal, QCOMComputeQueue, None, timeline_signals=(QCOMSignal(), QCOMSignal()))
|
||||
|
||||
QCOMComputeQueue().setup().signal(self.timeline_signal, self.timeline_value).submit(self)
|
||||
self.timeline_value += 1
|
||||
QCOMComputeQueue().setup().submit(self)
|
||||
|
||||
def _ctx_create(self):
|
||||
cr = kgsl.IOCTL_KGSL_DRAWCTXT_CREATE(self.fd, flags=(kgsl.KGSL_CONTEXT_PREAMBLE | kgsl.KGSL_CONTEXT_PWR_CONSTRAINT |
|
||||
|
||||
Reference in New Issue
Block a user