qcom cleanups (#6351)

* qcom as_buffer

* revert asbuf
This commit is contained in:
nimlgen
2024-09-04 11:51:14 +03:00
committed by GitHub
parent 99018a4aa1
commit e8d1a37ecd

View File

@@ -290,17 +290,11 @@ class QCOMAllocator(HCQAllocator):
texture.samplers, texture.descriptor, texture.ibo = [0] * 4, [0] * 16, [0] * 16
texture.samplers[0:2] = [0x1b60, 0x30] # compiled sampler. always the same in tinygrad.
texture.descriptor[0] = adreno.A6XX_TEX_CONST_0_SWIZ_X(0) | adreno.A6XX_TEX_CONST_0_SWIZ_Y(1) | adreno.A6XX_TEX_CONST_0_SWIZ_Z(2) \
| adreno.A6XX_TEX_CONST_0_SWIZ_W(3) \
| adreno.A6XX_TEX_CONST_0_FMT(adreno.FMT6_32_32_32_32_FLOAT if options.image.itemsize == 4 else adreno.FMT6_16_16_16_16_FLOAT)
texture.descriptor[1] = adreno.A6XX_TEX_CONST_1_WIDTH(options.image.shape[1]) | adreno.A6XX_TEX_CONST_1_HEIGHT(options.image.shape[0])
texture.descriptor[2] = adreno.A6XX_TEX_CONST_2_TYPE(adreno.A6XX_TEX_2D) | adreno.A6XX_TEX_CONST_2_PITCH(pitch) \
| adreno.A6XX_TEX_CONST_2_PITCHALIGN(pitchalign - 6)
texture.descriptor[4:6] = data64_le(texture.va_addr)
texture.descriptor[6] = 0x40000000
texture.descriptor[7] = 0xe
tex_fmt = adreno.FMT6_32_32_32_32_FLOAT if options.image.itemsize == 4 else adreno.FMT6_16_16_16_16_FLOAT
texture.descriptor[0] = qreg.a6xx_tex_const_0(swiz_x=0, swiz_y=1, swiz_z=2, swiz_w=3, fmt=tex_fmt)
texture.descriptor[1] = qreg.a6xx_tex_const_1(width=options.image.shape[1], height=options.image.shape[0])
texture.descriptor[2] = qreg.a6xx_tex_const_2(type=adreno.A6XX_TEX_2D, pitch=pitch, pitchalign=pitchalign-6)
texture.descriptor[4:7] = [*data64_le(texture.va_addr), qreg.a6xx_tex_const_6(plane_pitch=0x400000)]
texture.ibo = [texture.descriptor[0] & (~0xffff), *texture.descriptor[1:len(texture.descriptor)]]
return texture
@@ -349,26 +343,21 @@ class QCOMDevice(HCQCompiled):
return info
def _gpu_alloc(self, size:int, flags:int=0, map_to_cpu=False, uncached=False, fill_zeroes=False):
size = round_up(size, 1 << (alignment_hint:=12))
flags |= (kgsl.KGSL_MEMALIGN(alignment_hint))
if uncached: flags |= (kgsl.KGSL_CACHEMODE(kgsl.KGSL_CACHEMODE_UNCACHED))
if uncached: flags |= kgsl.KGSL_CACHEMODE(kgsl.KGSL_CACHEMODE_UNCACHED)
alloc = kgsl.IOCTL_KGSL_GPUOBJ_ALLOC(self.fd, size=size, flags=flags)
va_addr, va_len = None, 0
if not (flags & kgsl.KGSL_MEMFLAGS_USE_CPU_MAP):
info = kgsl.IOCTL_KGSL_GPUOBJ_INFO(self.fd, id=alloc.id)
va_addr, va_len = info.gpuaddr, info.va_len
alloc = kgsl.IOCTL_KGSL_GPUOBJ_ALLOC(self.fd, size=round_up(size, 1<<(alignment_hint:=12)), flags=flags | kgsl.KGSL_MEMALIGN(alignment_hint))
info = kgsl.IOCTL_KGSL_GPUOBJ_INFO(self.fd, id=alloc.id)
va_addr, va_len = info.gpuaddr, info.va_len
if map_to_cpu or (flags & kgsl.KGSL_MEMFLAGS_USE_CPU_MAP):
va_addr = libc.mmap(va_addr, va_len := (va_len or alloc.mmapsize), mmap.PROT_READ|mmap.PROT_WRITE,
mmap.MAP_SHARED | (MAP_FIXED if va_addr is not None else 0), self.fd, alloc.id * 0x1000)
if map_to_cpu:
va_addr = libc.mmap(va_addr, va_len, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED|MAP_FIXED, self.fd, alloc.id * 0x1000)
if fill_zeroes: ctypes.memset(va_addr, 0, va_len)
return SimpleNamespace(va_addr=va_addr, size=va_len, mapped=map_to_cpu or (flags & kgsl.KGSL_MEMFLAGS_USE_CPU_MAP), info=alloc)
return SimpleNamespace(va_addr=va_addr, size=size, mapped=map_to_cpu, info=alloc)
def _gpu_free(self, mem):
kgsl.IOCTL_KGSL_GPUOBJ_FREE(self.fd, id=mem.info.id)
if mem.mapped: libc.munmap(mem.va_addr, mem.size)
if mem.mapped: libc.munmap(mem.va_addr, mem.info.va_len)
def _alloc_cmd_buf(self, sz: int):
self.cmd_buf_ptr = (cur_ptr:=self.cmd_buf_ptr if self.cmd_buf_ptr + sz < self.cmd_buf.size else 0) + sz