mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-02-19 02:44:40 -05:00
@@ -299,7 +299,7 @@ class QCOMAllocator(HCQAllocator):
|
||||
pitch = round_up((real_stride:=imgw * 4 * options.image.itemsize), 1 << pitchalign) + pitch_add
|
||||
|
||||
if options.external_ptr: texture = QCOMBuffer(options.external_ptr, size)
|
||||
else: texture = self.device._gpu_alloc(pitch * imgh, kgsl.KGSL_MEMTYPE_TEXTURE, map_to_cpu=True)
|
||||
else: texture = self.device._gpu_alloc(pitch * imgh, kgsl.KGSL_MEMTYPE_TEXTURE)
|
||||
|
||||
texture.pitch, texture.real_stride = pitch, real_stride
|
||||
|
||||
@@ -312,7 +312,7 @@ class QCOMAllocator(HCQAllocator):
|
||||
|
||||
return texture
|
||||
|
||||
return QCOMBuffer(options.external_ptr, size) if options.external_ptr else self.device._gpu_alloc(size, map_to_cpu=True)
|
||||
return QCOMBuffer(options.external_ptr, size) if options.external_ptr else self.device._gpu_alloc(size)
|
||||
|
||||
def _do_copy(self, src_addr, dest_addr, src_size, real_size, src_stride, dest_stride, dest_off=0, src_off=0):
|
||||
while src_off < src_size:
|
||||
@@ -336,7 +336,6 @@ class QCOMAllocator(HCQAllocator):
|
||||
self.device.synchronize()
|
||||
self.device._gpu_free(opaque)
|
||||
|
||||
MAP_FIXED = 0x10
|
||||
class QCOMDevice(HCQCompiled):
|
||||
signals_page: Any = None
|
||||
signals_pool: List[Any] = []
|
||||
@@ -345,10 +344,10 @@ class QCOMDevice(HCQCompiled):
|
||||
|
||||
def __init__(self, device:str=""):
|
||||
self.fd = os.open('/dev/kgsl-3d0', os.O_RDWR)
|
||||
QCOMDevice.dummy_addr = self._gpu_alloc(0x1000, map_to_cpu=False).va_addr
|
||||
QCOMDevice.signals_page = self._gpu_alloc(16 * 65536, map_to_cpu=True, uncached=True)
|
||||
QCOMDevice.dummy_addr = self._gpu_alloc(0x1000).va_addr
|
||||
QCOMDevice.signals_page = self._gpu_alloc(16 * 65536, uncached=True)
|
||||
QCOMDevice.signals_pool = [to_mv(self.signals_page.va_addr + off, 16).cast("Q") for off in range(0, self.signals_page.size, 16)]
|
||||
info, self.ctx, self.cmd_buf, self.cmd_buf_ptr, self.last_cmd = self._info(), self._ctx_create(), self._gpu_alloc(16 << 20, map_to_cpu=True), 0,0
|
||||
info, self.ctx, self.cmd_buf, self.cmd_buf_ptr, self.last_cmd = self._info(), self._ctx_create(), self._gpu_alloc(16 << 20), 0,0
|
||||
QCOMDevice.gpu_id = ((info.chip_id >> 24) & 0xFF) * 100 + ((info.chip_id >> 16) & 0xFF) * 10 + ((info.chip_id >> 8) & 0xFF)
|
||||
if QCOMDevice.gpu_id >= 700: raise RuntimeError(f"Unsupported GPU: {QCOMDevice.gpu_id}")
|
||||
|
||||
@@ -370,29 +369,26 @@ class QCOMDevice(HCQCompiled):
|
||||
kgsl.IOCTL_KGSL_DEVICE_GETPROPERTY(self.fd, type=kgsl.KGSL_PROP_DEVICE_INFO, value=ctypes.addressof(info), sizebytes=ctypes.sizeof(info))
|
||||
return info
|
||||
|
||||
def _gpu_alloc(self, size:int, flags:int=0, map_to_cpu=False, uncached=False, fill_zeroes=False):
|
||||
def _gpu_alloc(self, size:int, flags:int=0, uncached=False, fill_zeroes=False):
|
||||
flags |= kgsl.KGSL_MEMALIGN(alignment_hint:=12) | kgsl.KGSL_MEMFLAGS_USE_CPU_MAP
|
||||
if uncached: flags |= kgsl.KGSL_CACHEMODE(kgsl.KGSL_CACHEMODE_UNCACHED)
|
||||
|
||||
alloc = kgsl.IOCTL_KGSL_GPUOBJ_ALLOC(self.fd, size=round_up(size, 1<<(alignment_hint:=12)), flags=flags | kgsl.KGSL_MEMALIGN(alignment_hint))
|
||||
info = kgsl.IOCTL_KGSL_GPUOBJ_INFO(self.fd, id=alloc.id)
|
||||
va_addr, va_len = info.gpuaddr, info.va_len
|
||||
alloc = kgsl.IOCTL_KGSL_GPUOBJ_ALLOC(self.fd, size=(bosz:=round_up(size, 1<<alignment_hint)), flags=flags, mmapsize=bosz)
|
||||
va_addr = libc.mmap(0, bosz, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED, self.fd, alloc.id * 0x1000)
|
||||
|
||||
if map_to_cpu:
|
||||
va_addr = libc.mmap(va_addr, va_len, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED|MAP_FIXED, self.fd, alloc.id * 0x1000)
|
||||
if fill_zeroes: ctypes.memset(va_addr, 0, va_len)
|
||||
|
||||
return QCOMBuffer(va_addr=va_addr, size=size, mapped=map_to_cpu, info=alloc)
|
||||
if fill_zeroes: ctypes.memset(va_addr, 0, size)
|
||||
return QCOMBuffer(va_addr=va_addr, size=size, info=alloc)
|
||||
|
||||
def _gpu_free(self, mem):
|
||||
kgsl.IOCTL_KGSL_GPUOBJ_FREE(self.fd, id=mem.info.id)
|
||||
if mem.mapped: libc.munmap(mem.va_addr, mem.info.va_len)
|
||||
libc.munmap(mem.va_addr, mem.info.mmapsize)
|
||||
|
||||
def _alloc_cmd_buf(self, sz: int):
|
||||
self.cmd_buf_ptr = (cur_ptr:=self.cmd_buf_ptr if self.cmd_buf_ptr + sz < self.cmd_buf.size else 0) + sz
|
||||
return self.cmd_buf.va_addr + cur_ptr
|
||||
|
||||
def _border_color_base(self):
|
||||
if not hasattr(self, '_border_color_gpu'): self._border_color_gpu = self._gpu_alloc(0x1000, map_to_cpu=True, fill_zeroes=True)
|
||||
if not hasattr(self, '_border_color_gpu'): self._border_color_gpu = self._gpu_alloc(0x1000, fill_zeroes=True)
|
||||
return self._border_color_gpu.va_addr
|
||||
|
||||
def _ensure_stack_size(self, sz):
|
||||
|
||||
Reference in New Issue
Block a user