From 7810be8d3c9d0d399dd2babcaad13fbce2d975bf Mon Sep 17 00:00:00 2001 From: Christopher Milan Date: Fri, 6 Mar 2026 03:24:27 -0800 Subject: [PATCH] compile QCOM without opening device (#15165) Co-authored-by: Comma Device --- .github/actions/setup-tinygrad/action.yml | 10 ++++ .github/workflows/test.yml | 23 +++++++++ test/backend/test_ops.py | 17 +++++-- tinygrad/device.py | 4 +- tinygrad/helpers.py | 3 +- tinygrad/renderer/cstyle.py | 7 ++- tinygrad/runtime/ops_null.py | 7 +-- tinygrad/runtime/ops_qcom.py | 15 +++--- tinygrad/runtime/support/compiler_qcom.py | 57 +++++++++++++++++++++++ 9 files changed, 122 insertions(+), 21 deletions(-) create mode 100644 tinygrad/runtime/support/compiler_qcom.py diff --git a/.github/actions/setup-tinygrad/action.yml b/.github/actions/setup-tinygrad/action.yml index b75bdc0f6c..dff7f3ebe2 100644 --- a/.github/actions/setup-tinygrad/action.yml +++ b/.github/actions/setup-tinygrad/action.yml @@ -45,6 +45,10 @@ inputs: description: "Install mesa" required: false default: 'false' + tinydreno: + description: "Install tinydreno" + required: false + default: 'false' runs: using: "composite" steps: @@ -326,3 +330,9 @@ runs: if: inputs.mesa == 'true' && runner.os == 'macOS' shell: bash run: brew install sirhcm/tinymesa/tinymesa_cpu + + # *** tinydreno *** + - name: Install tinydreno (linux) + if: inputs.tinydreno == 'true' && runner.os == 'Linux' + shell: bash + run: sudo curl -fL https://github.com/sirhcm/tinydreno/raw/refs/heads/master/libllvm-qcom.so -o /usr/lib/libllvm-qcom.so diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dc2023ca43..afd546b2d0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1011,3 +1011,26 @@ jobs: python -c "from tinygrad import Device; assert Device.DEFAULT == 'NULL'" DEBUG=4 python3 test/backend/test_ops.py TestOps.test_add python -m pytest -n=auto test/backend/test_ops.py --durations=20 + qcomclcompiletests: + name: Compile-only (QCOM CL) + runs-on: ubuntu-24.04-arm + timeout-minutes: 15 + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Setup Environment + uses: ./.github/actions/setup-tinygrad + with: + key: compile-qcomcl + deps: testing_unit + tinydreno: 'true' + python-version: '3.12' + - name: Set env + shell: bash + run: printf "NULL=1\nNULL_ALLOW_COPYOUT=1\nNULL_QCOMCL=1" >> $GITHUB_ENV + - name: Run test_ops + shell: bash + run: | + python -c "from tinygrad import Device; assert Device.DEFAULT == 'NULL'" + DEBUG=4 python3 test/backend/test_ops.py TestOps.test_add + python -m pytest -n=auto test/backend/test_ops.py --durations=20 diff --git a/test/backend/test_ops.py b/test/backend/test_ops.py index e4e910b246..a507a0830f 100644 --- a/test/backend/test_ops.py +++ b/test/backend/test_ops.py @@ -6,6 +6,7 @@ from tinygrad.helpers import getenv, IMAGE, DEBUG, CI, Context, CPU_LLVM, AMD_LL from tinygrad import Tensor, Device, dtypes from tinygrad.tensor import _to_np_dtype from tinygrad.device import is_dtype_supported +from tinygrad.renderer.cstyle import QCOMCLRenderer from tinygrad.renderer.nir import NIRRenderer TINY_BACKEND = getenv("TINY_BACKEND") @@ -436,7 +437,7 @@ class TestOps(unittest.TestCase): helper_test_op([(45,35), (45,35), (45,35)], lambda x,y,z: x.lerp(y,z)) helper_test_op(None, lambda x,y,z: x.lerp(y,z), vals=[[1.,2.,3.], [4.,5.,6.], 0.5]) - @unittest.skipIf(Device.DEFAULT == "QCOM", "OpenCL fails to compile this (both on GPU(qcom)/QCOM backends)") + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug") def test_tril(self): helper_test_op([(3,3)], lambda x: x.tril()) helper_test_op([(3,3)], lambda x: x.tril(1)) @@ -454,7 +455,7 @@ class TestOps(unittest.TestCase): helper_test_op([(5,3,3)], lambda x: x.tril(1)) helper_test_op(None, lambda x: x.tril(), vals=[[[True] * 3] * 3], forward_only=True) - @unittest.skipIf(Device.DEFAULT == "QCOM", "OpenCL fails to compile this (both on GPU(qcom)/QCOM backends)") + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug") def test_triu(self): helper_test_op([(3,3)], lambda x: x.triu()) helper_test_op([(3,3)], lambda x: x.triu(1)) @@ -765,6 +766,7 @@ class TestOps(unittest.TestCase): self.helper_test_exception([(4), (4)], lambda x,y: x.bitwise_xor(y), expected=RuntimeError) + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug") def test_and(self): data = [[1,-8,1],[32,1,6]] tor = torch.tensor(data, dtype=torch.int) @@ -782,6 +784,7 @@ class TestOps(unittest.TestCase): self.helper_test_exception([(4), (4)], lambda x,y: x.bitwise_and(y), expected=RuntimeError) + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug") def test_or(self): data = [[1,-8,1],[32,1,6]] tor = torch.tensor(data, dtype=torch.int) @@ -1170,6 +1173,7 @@ class TestOps(unittest.TestCase): helper_test_op(None, lambda x: x.type(torch.int32).argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True, vals=[[False, True]]) helper_test_op(None, lambda x: x.type(torch.int32).argmax().type(torch.int32), lambda x: x.argmax(), forward_only=True, vals=[[True, False]]) + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug") def test_argmin(self): # check if it returns the first index for multiple occurrences helper_test_op(None, lambda x: x.argmin().type(torch.int32), lambda x: x.argmin(), forward_only=True, vals=[[2, 2]]) @@ -1475,6 +1479,7 @@ class TestOps(unittest.TestCase): def test_prod_dtype_arg(self): with self.assertRaises(AttributeError): Tensor([1.0, 2.0]).prod(dtype="") + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug") def test_min(self): helper_test_op([(3,3)], lambda x: x.min()) helper_test_op([(45,3)], lambda x: x.min()) @@ -1503,7 +1508,6 @@ class TestOps(unittest.TestCase): helper_test_op([(3,3)], lambda x: torch.full_like(x, 2).prod(), lambda x: (x.full_like(2)).prod(), forward_only=True) helper_test_op([(3,3)], lambda x: torch.full_like(x, 2).max(), lambda x: (x.full_like(2)).max(), forward_only=True) - @unittest.skipIf(Device.DEFAULT == "QCOM", "OpenCL fails to compile this (both on GPU(qcom)/QCOM backends)") def test_any(self): helper_test_op([(3,4,5,6)], lambda x: x.any(), forward_only=True) helper_test_op(None, lambda x: x.any(), vals=[[True, True]], forward_only=True) @@ -1515,7 +1519,7 @@ class TestOps(unittest.TestCase): def test_any_zero_axis(self): helper_test_op([(1,0,3,0,5)], lambda x: x.any(axis=(1,3)), forward_only=True) - @unittest.skipIf(Device.DEFAULT == "QCOM", "OpenCL fails to compile this (both on GPU(qcom)/QCOM backends)") + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug") def test_all(self): helper_test_op([(3,4,5,6)], lambda x: x.all(), forward_only=True) helper_test_op(None, lambda x: x.all(), vals=[[True, True]], forward_only=True) @@ -2889,6 +2893,7 @@ class TestOps(unittest.TestCase): helper_test_op([(2,5,6,5,3,4)], lambda x: x[...,c,:,e], lambda x: x[...,k,:,p]) @slow_test + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug") def test_slice_fancy_indexing_dim_collapse_int(self): a,b,c,d,e,i,j,k,o,p = self._get_index_randoms() # dim collapse from int @@ -2899,6 +2904,7 @@ class TestOps(unittest.TestCase): helper_test_op([(2,5,6,5,3,4)], lambda x: x[1,:,3:11:2,d,0:2], lambda x: x[1,:,3:11:2,o,0:2]) @slow_test + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug") def test_slice_fancy_indexing_dim_inject_none(self): a,b,c,d,e,i,j,k,o,p = self._get_index_randoms() # dim injection from None @@ -2933,6 +2939,7 @@ class TestOps(unittest.TestCase): lambda x: x[Tensor([[0,1,-1],[-1,-2,0]]), Tensor([2,1,-1])]) @slow_test + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug") def test_slice_fancy_indexing_list_indices(self): a,b,c,d,e,i,j,k,o,p = self._get_index_randoms() helper_test_op([(2,5,6,5,3,4)], lambda x: x[((0,),)]) @@ -2944,6 +2951,7 @@ class TestOps(unittest.TestCase): helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,(2,1,0),c,(-2,1,0),e], lambda x: x[i,(2,1,0),k,(-2,1,0),p]) @slow_test + @unittest.skipIf(isinstance(Device[Device.DEFAULT].renderer, QCOMCLRenderer), "QCOM CL vectorized bool bug") def test_slice_fancy_indexing_tuple_indices(self): a,b,c,d,e,i,j,k,o,p = self._get_index_randoms() helper_test_op([(2,5,6,5,3,4)], lambda x: x[(((0,),),)], lambda x: x[(((0,),),)]) @@ -3285,7 +3293,6 @@ class TestOps(unittest.TestCase): helper_test_op([(20,)], lambda x: (x>0.5).nonzero().int(), lambda x: (x>0.5).nonzero(), forward_only=True) helper_test_op([(10, 5, 3)], lambda x: (x>0.5).nonzero().int(), lambda x: (x>0.5).nonzero(), forward_only=True) - @unittest.skipIf(Device.DEFAULT == "QCOM", "OpenCL fails to compile this (both on GPU(qcom)/QCOM backends)") def test_cast(self): helper_test_op([(3, 3)], lambda x: x.float()) helper_test_op(None, lambda x: x.float(), vals=[[0, 1, 2, 3]], forward_only=True) diff --git a/tinygrad/device.py b/tinygrad/device.py index aaf83131a2..3e1d5f4ac3 100644 --- a/tinygrad/device.py +++ b/tinygrad/device.py @@ -6,7 +6,7 @@ import importlib, inspect, functools, pathlib, os, platform, contextlib, sys, re from tinygrad.helpers import CI, OSX, LRU, getenv, diskcache_get, diskcache_put, DEBUG, GlobalCounters, flat_mv, PROFILE, temp, colored from tinygrad.helpers import Context, CCACHE, ALLOW_DEVICE_USAGE, MAX_BUFFER_SIZE, cpu_events, ProfileEvent, ProfilePointEvent, dedup, ContextVar from tinygrad.helpers import unwrap_class_type, suppress_finalizing, select_first_inited, VIZ, CPU_LLVM, CPU_LVP, NV_PTX, CUDA_PTX, NV_NAK -from tinygrad.helpers import EMULATED_DTYPES, TracingKey +from tinygrad.helpers import EMULATED_DTYPES, NULL_IR3, NULL_QCOMCL, TracingKey from tinygrad.dtype import DType, ImageDType, PtrDType, dtypes, _to_np_dtype if TYPE_CHECKING: from tinygrad.renderer import Renderer @@ -371,7 +371,7 @@ def is_dtype_supported(dtype:DType, device:str|None=None) -> bool: if device in ["CUDA", "NV"]: return not CI if device == "CPU" and CPU_LLVM: return OSX if device == "PYTHON": return sys.version_info >= (3, 12) - if dtype == dtypes.float64: return (device not in {"METAL", "QCOM"} and not (OSX and device == "CL") and not getenv("NULL_IR3") + if dtype == dtypes.float64: return (device not in {"METAL", "QCOM"} and not (OSX and device == "CL") and not NULL_IR3 and not NULL_QCOMCL and dtypes.long not in EMULATED_DTYPES.tolist(dtypes)) return True diff --git a/tinygrad/helpers.py b/tinygrad/helpers.py index 4f38b13094..748844ab6d 100644 --- a/tinygrad/helpers.py +++ b/tinygrad/helpers.py @@ -195,7 +195,8 @@ CPU_COUNT = ContextVar("CPU_COUNT", max(1, len(os.sched_getaffinity(0)) if hasat CPU_CC, CPU_LLVM, CPU_LVP = ContextVar("CPU_CC", ""), ContextVar("CPU_LLVM", 0), ContextVar("CPU_LVP", 0) NV_CC, NV_PTX, NV_NAK, NV_NVCC = ContextVar("NV_CC", ""), ContextVar("NV_PTX", 0), ContextVar("NV_NAK", 0), ContextVar("NV_NVCC", 0) CUDA_CC, CUDA_PTX, CUDA_NVCC = ContextVar("CUDA_CC", ""), ContextVar("CUDA_PTX", 0), ContextVar("CUDA_NVCC", 0) -NULL_IR3, NULL_NAK, NULL_ALLOW_COPYOUT = ContextVar("NULL_IR3", 0), ContextVar("NULL_NAK", 0), ContextVar("NULL_ALLOW_COPYOUT", 0) +NULL_QCOMCL, NULL_IR3, NULL_NAK = ContextVar("NULL_QCOMCL", 0), ContextVar("NULL_IR3", 0), ContextVar("NULL_NAK", 0) +NULL_ALLOW_COPYOUT = ContextVar("NULL_ALLOW_COPYOUT", 0) AMD_CC, AMD_LLVM, AMD_HIPCC = ContextVar("AMD_CC", ""), ContextVar("AMD_LLVM", 0), ContextVar("AMD_HIPCC", 0) QCOM_CC, QCOM_IR3 = ContextVar("QCOM_CC", ""), ContextVar("QCOM_IR3", 0) # VIZ implies PROFILE, but you can run PROFILE without VIZ diff --git a/tinygrad/renderer/cstyle.py b/tinygrad/renderer/cstyle.py index 624305aec1..9e2a40d950 100644 --- a/tinygrad/renderer/cstyle.py +++ b/tinygrad/renderer/cstyle.py @@ -566,4 +566,9 @@ class AMDHIPCCRenderer(AMDHIPRenderer): super().__init__(arch) self.compiler = HIPCCCompiler(arch) -class QCOMRenderer(OpenCLRenderer): device = "QCOM" +class QCOMCLRenderer(OpenCLRenderer): + device = "QCOM" + + def __init__(self, chip_id): + from tinygrad.runtime.support.compiler_qcom import QCOMCompiler + self.compiler = QCOMCompiler(chip_id) diff --git a/tinygrad/runtime/ops_null.py b/tinygrad/runtime/ops_null.py index 529204562c..8436d75f23 100644 --- a/tinygrad/runtime/ops_null.py +++ b/tinygrad/runtime/ops_null.py @@ -1,9 +1,9 @@ import functools from tinygrad.device import Compiled, Allocator, CompilerSet from tinygrad.engine.jit import MultiGraphRunner -from tinygrad.renderer.cstyle import Renderer, CStyleLanguage, AMDHIPRenderer +from tinygrad.renderer.cstyle import Renderer, CStyleLanguage, AMDHIPRenderer, QCOMCLRenderer from tinygrad.uop.ops import Ops -from tinygrad.helpers import cpu_profile, EMULATE, NULL_IR3, NULL_NAK, NULL_ALLOW_COPYOUT +from tinygrad.helpers import cpu_profile, EMULATE, NULL_QCOMCL, NULL_IR3, NULL_NAK, NULL_ALLOW_COPYOUT from tinygrad.renderer.nir import IR3Renderer, NAKRenderer class NullRenderer(CStyleLanguage): @@ -39,6 +39,7 @@ class NullDevice(Compiled): case "AMD_CDNA4": renderer = functools.partial(AMDHIPRenderer, "gfx950") case "": renderer = NullRenderer case _: raise RuntimeError(f"can't EMULATE device: {EMULATE.value}") - compilers = CompilerSet([(renderer, None), (functools.partial(IR3Renderer, 0x6030001), NULL_IR3), # adreno 630 + compilers = CompilerSet([(renderer, None), (functools.partial(QCOMCLRenderer, 0x6030001), NULL_QCOMCL), # adreno 630 + (functools.partial(IR3Renderer, 0x6030001), NULL_IR3), # adreno 630 (functools.partial(NAKRenderer, "sm_120", 48), NULL_NAK)]) # 5090 super().__init__(device, NullAllocator(self), compilers, functools.partial(NullProgram, device), NullGraph) diff --git a/tinygrad/runtime/ops_qcom.py b/tinygrad/runtime/ops_qcom.py index 632d1d79fd..8a75a2099d 100644 --- a/tinygrad/runtime/ops_qcom.py +++ b/tinygrad/runtime/ops_qcom.py @@ -6,11 +6,10 @@ from tinygrad.device import BufferSpec, CompilerSet, Device from tinygrad.runtime.support.hcq import HCQBuffer, HWQueue, HCQProgram, HCQCompiled, HCQAllocatorBase, HCQSignal, HCQArgsState, BumpAllocator from tinygrad.runtime.support.hcq import FileIOInterface, MMIOInterface from tinygrad.runtime.autogen import kgsl, mesa -from tinygrad.runtime.ops_cl import CLDevice -from tinygrad.renderer.cstyle import QCOMRenderer +from tinygrad.renderer.cstyle import QCOMCLRenderer from tinygrad.renderer.nir import IR3Renderer -from tinygrad.helpers import getenv, mv_address, to_mv, round_up, data64_le, ceildiv, prod, fromimport, cpu_profile, lo32, suppress_finalizing -from tinygrad.helpers import next_power2, flatten, QCOM_IR3, QCOM_CC, PROFILE, DEBUG +from tinygrad.helpers import getenv, mv_address, to_mv, round_up, data64_le, ceildiv, prod, cpu_profile, lo32, suppress_finalizing +from tinygrad.helpers import next_power2, flatten, QCOM_IR3, QCOM_CC, PROFILE from tinygrad.dtype import ImageDType, dtypes from tinygrad.runtime.support.system import System if getenv("IOCTL"): import extra.qcom_gpu_driver.opencl_ioctl # noqa: F401 # pylint: disable=unused-import @@ -248,9 +247,7 @@ class QCOMProgram(HCQProgram): self.tex_off, self.ibo_off, self.samp_off = 2048, 2048 + 0x40 * self.tex_cnt, 2048 + 0x40 * (self.tex_cnt + self.ibo_cnt) self.fregs, self.hregs = v.info.max_reg + 1, v.info.max_half_reg + 1 self.consts_info:list[tuple] = [] - else: - self._parse_lib(lib:=self.dev.cl_dev.cl_compiler.compile_cached(lib.decode())) - if DEBUG >= 7: fromimport('tinygrad.runtime.support.compiler_mesa', 'disas_adreno')(lib[(ofs:=_read_lib(lib, 0xc0)):ofs+_read_lib(lib, 0x100)]) + else: self._parse_lib(lib) self.lib_gpu: HCQBuffer = self.dev.allocator.alloc(self.image_size, buf_spec:=BufferSpec(cpu_access=True, nolru=True)) to_mv(self.lib_gpu.va_addr, self.image_size)[:] = self.image @@ -384,8 +381,8 @@ class QCOMDevice(HCQCompiled): if PROFILE and self.gpu_id[:2] < (7, 3): System.write_sysfs("/sys/class/kgsl/kgsl-3d0/idle_timer", value="4000000000", msg="Failed to disable suspend mode", expected="4294967276") - self.cl_dev = CLDevice(device) - compilers = CompilerSet(ctrl_var=QCOM_CC, cset=[(QCOMRenderer, None), (functools.partial(IR3Renderer, info.chip_id), QCOM_IR3)]) + compilers = CompilerSet(ctrl_var=QCOM_CC, cset=[(functools.partial(QCOMCLRenderer, info.chip_id), None), + (functools.partial(IR3Renderer, info.chip_id), QCOM_IR3)]) super().__init__(device, QCOMAllocator(self), compilers, functools.partial(QCOMProgram, self), QCOMSignal, functools.partial(QCOMComputeQueue, self), None) diff --git a/tinygrad/runtime/support/compiler_qcom.py b/tinygrad/runtime/support/compiler_qcom.py new file mode 100644 index 0000000000..a2878315e6 --- /dev/null +++ b/tinygrad/runtime/support/compiler_qcom.py @@ -0,0 +1,57 @@ +import ctypes, struct +from tinygrad.device import Compiler +from tinygrad.runtime.support.c import DLL +from tinygrad.runtime.support.compiler_mesa import disas_adreno + +# see https://github.com/sirhcm/tinydreno +dll = DLL("llvm-qcom", ["llvm-qcom"]) + +(create_llvm_instance:=dll.cl_compiler_create_llvm_instance).restype, create_llvm_instance.argtypes = ctypes.c_void_p, [] + +(compile_source:=dll.cl_compiler_compile_source).restype = ctypes.c_void_p +compile_source.argtypes = [ctypes.c_void_p, ctypes.c_uint64, ctypes.c_int, ctypes.c_char_p, ctypes.c_int, ctypes.c_uint64, ctypes.c_uint64, + ctypes.c_char_p, ctypes.c_uint64, ctypes.c_uint64, ctypes.c_void_p] + +(link_program:=dll.cl_compiler_link_program).restype = ctypes.c_void_p +link_program.argtypes = [ctypes.c_void_p, ctypes.c_uint64, ctypes.c_int, ctypes.c_char_p, ctypes.c_int, ctypes.c_void_p] + +(get_error_code:=dll.cl_compiler_get_error_code).restype, get_error_code.argtypes = ctypes.c_int, [ctypes.c_void_p] +(get_build_log:=dll.cl_compiler_get_build_log).restype, get_build_log.argtypes = ctypes.c_char_p, [ctypes.c_void_p] + +(handle_create_binary:=dll.cl_compiler_handle_create_binary).restype = None +handle_create_binary.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_void_p), ctypes.POINTER(ctypes.c_size_t)] + +(free_handle:=dll.cl_compiler_free_handle).restype, free_handle.argtypes = None, [ctypes.c_void_p] +(free_assembly:=dll.cl_compiler_free_assembly).restype, free_assembly.argtypes = None, [ctypes.c_void_p] +(destroy_llvm_instance:=dll.cl_compiler_destroy_llvm_instance).restype, destroy_llvm_instance.argtypes = None, [ctypes.c_void_p] + +MODE_32BIT, MODE_64BIT, SRC_STR, SRC_BLOB = 0, 1, 0, 1 + +def _read_lib(lib, off) -> int: return struct.unpack("I", lib[off:off+4])[0] + +class QCOMCompiler(Compiler): + def __init__(self, chip_id): + self.chip_id, self.llvm_inst = chip_id, create_llvm_instance() + super().__init__(f"compile_qcomcl_{chip_id}") + + def __del__(self): destroy_llvm_instance(self.llvm_inst) + + def __reduce__(self): return QCOMCompiler, (self.chip_id,) + + def checked(self, handle): + if handle is None or get_error_code(handle) != 0: + destroy_llvm_instance(self.llvm_inst) + self.llvm_inst = create_llvm_instance() + raise RuntimeError("QCOM Compilation Error" + ("" if handle is None else f": {get_build_log(handle)}")) + return handle + + def compile(self, src) -> bytes: + ch = self.checked(compile_source(self.llvm_inst, self.chip_id, MODE_64BIT, b"", 0, 0, 0, src.encode(), 0, SRC_STR, None)) + lh = self.checked(link_program(self.llvm_inst, self.chip_id, MODE_64BIT, None, 1, ctypes.pointer(ctypes.c_void_p(ch)))) + handle_create_binary(lh, ctypes.byref(ptr:=ctypes.c_void_p()), ctypes.byref(sz:=ctypes.c_size_t())) + for h in [ch, lh]: free_handle(h) + ret = ctypes.string_at(ptr, sz.value) + free_assembly(ptr) + return ret + + def disassemble(self, lib: bytes): disas_adreno(lib[(ofs:=_read_lib(lib, 0xc0)):ofs+_read_lib(lib, 0x100)], self.chip_id)