qcom cleanup allocs (#7200)

* qcom cleanup allocs

* oops
This commit is contained in:
nimlgen
2024-10-21 23:20:15 +03:00
committed by GitHub
parent f37e6b453b
commit 21acfc39d4

View File

@@ -299,7 +299,7 @@ class QCOMAllocator(HCQAllocator):
pitch = round_up((real_stride:=imgw * 4 * options.image.itemsize), 1 << pitchalign) + pitch_add
if options.external_ptr: texture = QCOMBuffer(options.external_ptr, size)
else: texture = self.device._gpu_alloc(pitch * imgh, kgsl.KGSL_MEMTYPE_TEXTURE, map_to_cpu=True)
else: texture = self.device._gpu_alloc(pitch * imgh, kgsl.KGSL_MEMTYPE_TEXTURE)
texture.pitch, texture.real_stride = pitch, real_stride
@@ -312,7 +312,7 @@ class QCOMAllocator(HCQAllocator):
return texture
return QCOMBuffer(options.external_ptr, size) if options.external_ptr else self.device._gpu_alloc(size, map_to_cpu=True)
return QCOMBuffer(options.external_ptr, size) if options.external_ptr else self.device._gpu_alloc(size)
def _do_copy(self, src_addr, dest_addr, src_size, real_size, src_stride, dest_stride, dest_off=0, src_off=0):
while src_off < src_size:
@@ -336,7 +336,6 @@ class QCOMAllocator(HCQAllocator):
self.device.synchronize()
self.device._gpu_free(opaque)
MAP_FIXED = 0x10
class QCOMDevice(HCQCompiled):
signals_page: Any = None
signals_pool: List[Any] = []
@@ -345,10 +344,10 @@ class QCOMDevice(HCQCompiled):
def __init__(self, device:str=""):
self.fd = os.open('/dev/kgsl-3d0', os.O_RDWR)
QCOMDevice.dummy_addr = self._gpu_alloc(0x1000, map_to_cpu=False).va_addr
QCOMDevice.signals_page = self._gpu_alloc(16 * 65536, map_to_cpu=True, uncached=True)
QCOMDevice.dummy_addr = self._gpu_alloc(0x1000).va_addr
QCOMDevice.signals_page = self._gpu_alloc(16 * 65536, uncached=True)
QCOMDevice.signals_pool = [to_mv(self.signals_page.va_addr + off, 16).cast("Q") for off in range(0, self.signals_page.size, 16)]
info, self.ctx, self.cmd_buf, self.cmd_buf_ptr, self.last_cmd = self._info(), self._ctx_create(), self._gpu_alloc(16 << 20, map_to_cpu=True), 0,0
info, self.ctx, self.cmd_buf, self.cmd_buf_ptr, self.last_cmd = self._info(), self._ctx_create(), self._gpu_alloc(16 << 20), 0,0
QCOMDevice.gpu_id = ((info.chip_id >> 24) & 0xFF) * 100 + ((info.chip_id >> 16) & 0xFF) * 10 + ((info.chip_id >> 8) & 0xFF)
if QCOMDevice.gpu_id >= 700: raise RuntimeError(f"Unsupported GPU: {QCOMDevice.gpu_id}")
@@ -370,29 +369,26 @@ class QCOMDevice(HCQCompiled):
kgsl.IOCTL_KGSL_DEVICE_GETPROPERTY(self.fd, type=kgsl.KGSL_PROP_DEVICE_INFO, value=ctypes.addressof(info), sizebytes=ctypes.sizeof(info))
return info
def _gpu_alloc(self, size:int, flags:int=0, map_to_cpu=False, uncached=False, fill_zeroes=False):
def _gpu_alloc(self, size:int, flags:int=0, uncached=False, fill_zeroes=False):
flags |= kgsl.KGSL_MEMALIGN(alignment_hint:=12) | kgsl.KGSL_MEMFLAGS_USE_CPU_MAP
if uncached: flags |= kgsl.KGSL_CACHEMODE(kgsl.KGSL_CACHEMODE_UNCACHED)
alloc = kgsl.IOCTL_KGSL_GPUOBJ_ALLOC(self.fd, size=round_up(size, 1<<(alignment_hint:=12)), flags=flags | kgsl.KGSL_MEMALIGN(alignment_hint))
info = kgsl.IOCTL_KGSL_GPUOBJ_INFO(self.fd, id=alloc.id)
va_addr, va_len = info.gpuaddr, info.va_len
alloc = kgsl.IOCTL_KGSL_GPUOBJ_ALLOC(self.fd, size=(bosz:=round_up(size, 1<<alignment_hint)), flags=flags, mmapsize=bosz)
va_addr = libc.mmap(0, bosz, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED, self.fd, alloc.id * 0x1000)
if map_to_cpu:
va_addr = libc.mmap(va_addr, va_len, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED|MAP_FIXED, self.fd, alloc.id * 0x1000)
if fill_zeroes: ctypes.memset(va_addr, 0, va_len)
return QCOMBuffer(va_addr=va_addr, size=size, mapped=map_to_cpu, info=alloc)
if fill_zeroes: ctypes.memset(va_addr, 0, size)
return QCOMBuffer(va_addr=va_addr, size=size, info=alloc)
def _gpu_free(self, mem):
kgsl.IOCTL_KGSL_GPUOBJ_FREE(self.fd, id=mem.info.id)
if mem.mapped: libc.munmap(mem.va_addr, mem.info.va_len)
libc.munmap(mem.va_addr, mem.info.mmapsize)
def _alloc_cmd_buf(self, sz: int):
self.cmd_buf_ptr = (cur_ptr:=self.cmd_buf_ptr if self.cmd_buf_ptr + sz < self.cmd_buf.size else 0) + sz
return self.cmd_buf.va_addr + cur_ptr
def _border_color_base(self):
if not hasattr(self, '_border_color_gpu'): self._border_color_gpu = self._gpu_alloc(0x1000, map_to_cpu=True, fill_zeroes=True)
if not hasattr(self, '_border_color_gpu'): self._border_color_gpu = self._gpu_alloc(0x1000, fill_zeroes=True)
return self._border_color_gpu.va_addr
def _ensure_stack_size(self, sz):