add Tensor.from_blob (#6765)

* draft tensor from pointer init

* some docs and types

* comment

* cleaner

* test

* malloc

* qcom cl interop

* jit example

* cleaner

* dealoc

* wording

* docs
This commit is contained in:
nimlgen
2024-09-26 18:33:19 +08:00
committed by GitHub
parent 14ad47b515
commit 3c56aeee70
6 changed files with 127 additions and 8 deletions

View File

@@ -294,7 +294,8 @@ class QCOMAllocator(HCQAllocator):
pitch_add = (1 << pitchalign) if min(next_power2(imgw), round_up(imgw, granularity)) - align_up + 1 <= imgw and imgw > granularity//2 else 0
pitch = round_up((real_stride:=imgw * 4 * options.image.itemsize), 1 << pitchalign) + pitch_add
texture = self.device._gpu_alloc(pitch * imgh, kgsl.KGSL_MEMTYPE_TEXTURE, map_to_cpu=True)
if options.external_ptr: texture = QCOMBuffer(options.external_ptr, size)
else: texture = self.device._gpu_alloc(pitch * imgh, kgsl.KGSL_MEMTYPE_TEXTURE, map_to_cpu=True)
# Extend HCQBuffer with texture-related info.
texture.pitch, texture.real_stride, texture.desc, texture.ibo = pitch, real_stride, [0] * 16, [0] * 16
@@ -308,7 +309,7 @@ class QCOMAllocator(HCQAllocator):
return texture
return self.device._gpu_alloc(size, map_to_cpu=True)
return QCOMBuffer(options.external_ptr, size) if options.external_ptr else self.device._gpu_alloc(size, map_to_cpu=True)
def _do_copy(self, src_addr, dest_addr, src_size, real_size, src_stride, dest_stride, dest_off=0, src_off=0):
while src_off < src_size: