diff --git a/tinygrad/helpers.py b/tinygrad/helpers.py index ac28f4385a..af29200786 100644 --- a/tinygrad/helpers.py +++ b/tinygrad/helpers.py @@ -291,10 +291,10 @@ def cpu_time_execution(cb, enable): cb() if enable: return time.perf_counter()-st -def cpu_objdump(lib): +def cpu_objdump(lib, objdump_tool='objdump'): with tempfile.NamedTemporaryFile(delete=True) as f: pathlib.Path(f.name).write_bytes(lib) - print(subprocess.check_output(['objdump', '-d', f.name]).decode('utf-8')) + print(subprocess.check_output([objdump_tool, '-d', f.name]).decode('utf-8')) # *** ctypes helpers diff --git a/tinygrad/runtime/ops_dsp.py b/tinygrad/runtime/ops_dsp.py index d4c1481c3a..64f77aa6f7 100644 --- a/tinygrad/runtime/ops_dsp.py +++ b/tinygrad/runtime/ops_dsp.py @@ -2,13 +2,13 @@ from __future__ import annotations from typing import Tuple, Any import ctypes, os, mmap, tempfile, pathlib, array, functools, threading, contextlib from tinygrad.device import BufferOptions, Compiled, Allocator -from tinygrad.helpers import from_mv, getenv, DEBUG, round_up, mv_address, to_mv +from tinygrad.helpers import from_mv, getenv, DEBUG, round_up, mv_address, to_mv, cpu_objdump from tinygrad.runtime.ops_clang import ClangCompiler from tinygrad.renderer.cstyle import DSPRenderer from tinygrad.runtime.autogen import libc, qcom_dsp if getenv("IOCTL"): import extra.dsp.run # noqa: F401 # pylint: disable=unused-import -def rpc_sc(method=0, ins=0, outs=0, fd=0): return (method << 24) | (ins << 16) | (outs << 8) +def rpc_sc(method=0, ins=0, outs=0, fds=0): return (method << 24) | (ins << 16) | (outs << 8) | fds def rpc_prep_args(ins=None, outs=None, in_fds=None): ins, outs, in_fds = ins or list(), outs or list(), in_fds or list() @@ -22,14 +22,7 @@ def rpc_prep_args(ins=None, outs=None, in_fds=None): class DSPProgram: def __init__(self, device:DSPDevice, name:str, lib:bytes): self.device, self.lib = device, lib - - # TODO: Remove lib flush to FS. - with tempfile.NamedTemporaryFile(delete=False) as self.filepath: - self.filepath.write(lib) - self.filepath.flush() - if DEBUG >= 6: os.system(f"llvm-objdump -d {self.filepath.name}") - - def __del__(self): os.remove(self.filepath.name) + if DEBUG >= 6: cpu_objdump(lib, objdump_tool='llvm-objdump') def __call__(self, *bufs, vals:Tuple[int, ...]=(), wait=False): if len(bufs) >= 16: raise RuntimeError(f"Too many buffers to execute: {len(bufs)}") @@ -37,7 +30,7 @@ class DSPProgram: pra, fds, attrs, _ = rpc_prep_args(ins=[var_vals_mv:=memoryview(bytearray((len(bufs) + len(vals)) * 4))], outs=[timer:=memoryview(bytearray(8)).cast('Q')], in_fds=[b.share_info.fd for b in bufs]) var_vals_mv.cast('i')[:] = array.array('i', tuple(b.size for b in bufs) + vals) - self.device.exec_lib(self.filepath.name, (2<<24) | (1<<16) | (1<<8) | len(bufs), pra, fds, attrs) + self.device.exec_lib(self.lib, rpc_sc(method=2, ins=1, outs=1, fds=len(bufs)), pra, fds, attrs) return timer[0] / 1e6 class DSPBuffer: @@ -84,9 +77,10 @@ class DSPDevice(Compiled): self.init_dsp() RPCListner(self).start() - def open_lib(self, filepath): - fp = f"file:///{filepath}?entry&_modver=1.0&_dom=cdsp\0" - pra, _, _, _ = rpc_prep_args(ins=[memoryview(array.array('I', [len(fp), 0xff])), memoryview(bytearray(f"{fp}".encode()))], + def open_lib(self, lib): + self.binded_lib, self.binded_lib_off = lib, 0 + fp = "file:///tinylib?entry&_modver=1.0&_dom=cdsp\0" + pra, _, _, _ = rpc_prep_args(ins=[memoryview(array.array('I', [len(fp), 0xff])), memoryview(bytearray(fp.encode()))], outs=[o1:=memoryview(bytearray(0x8)), o2:=memoryview(bytearray(0xff))]) qcom_dsp.FASTRPC_IOCTL_INVOKE(self.rpc_fd, handle=0, sc=rpc_sc(method=0, ins=2, outs=2), pra=pra) if o1.cast('i')[1] < 0: raise RuntimeError(f"Cannot open lib: {o2.tobytes().decode()}") @@ -96,9 +90,9 @@ class DSPDevice(Compiled): pra, _, _, _ = rpc_prep_args(ins=[memoryview(array.array('I', [handle, 0xff]))], outs=[memoryview(bytearray(0x8)), memoryview(bytearray(0xff))]) qcom_dsp.FASTRPC_IOCTL_INVOKE(self.rpc_fd, handle=0, sc=rpc_sc(method=1, ins=1, outs=2), pra=pra) - def exec_lib(self, filepath, sc, args, fds, attrs): + def exec_lib(self, lib, sc, args, fds, attrs): def _exec_lib(): - handle = self.open_lib(filepath) + handle = self.open_lib(lib) qcom_dsp.FASTRPC_IOCTL_INVOKE_ATTRS(self.rpc_fd, fds=fds, attrs=attrs, inv=qcom_dsp.struct_fastrpc_ioctl_invoke(handle=handle, sc=sc, pra=args)) self.close_lib(handle) try: _exec_lib() @@ -126,7 +120,7 @@ class RPCListner(threading.Thread): def run(self): # Setup initial request arguments. - context, status = 0, 0xffffffff + context, status, TINYFD = 0, 0xffffffff, 0xffff req_args, _, _, _ = rpc_prep_args(ins=[msg_send:=memoryview(bytearray(0x10)).cast('I'), out_buf:=memoryview(bytearray(0x10000)).cast('I')], outs=[msg_recv:=memoryview(bytearray(0x10)).cast('I'), in_buf:=memoryview(bytearray(0x10000)).cast('I')]) req_args[1].buf.len = 0 @@ -156,14 +150,21 @@ class RPCListner(threading.Thread): status = 0 # reset status, will set if error if sc == 0x20200: pass # greating elif sc == 0x13050100: # open - try: out_args[0].cast('I')[0] = os.open(in_args[3].tobytes()[:-1].decode(), os.O_RDONLY) + try: out_args[0].cast('I')[0] = TINYFD if (name:=in_args[3].tobytes()[:-1].decode()) == "tinylib" else os.open(name, os.O_RDONLY) except OSError: status = 1 - elif sc == 0x3010000: os.close(in_args[0].cast('I')[0]) + elif sc == 0x3010000: + if (fd:=in_args[0].cast('I')[0]) != TINYFD: os.close(fd) elif sc == 0x9010000: # seek - res = os.lseek(in_args[0].cast('I')[0], in_args[0].cast('I')[1], in_args[0].cast('I')[2]) + if (fd:=in_args[0].cast('I')[0]) == TINYFD: + assert in_args[0].cast('I')[2] == qcom_dsp.APPS_STD_SEEK_SET, "Supported only SEEK_SET" + res, self.device.binded_lib_off = 0, in_args[0].cast('I')[1] + else: res = os.lseek(fd, in_args[0].cast('I')[1], in_args[0].cast('I')[2]) status = 0 if res >= 0 else res elif sc == 0x4010200: # read - buf = os.read(in_args[0].cast('I')[0], in_args[0].cast('I')[1]) + if (fd:=in_args[0].cast('I')[0]) == TINYFD: + buf = self.device.binded_lib[self.device.binded_lib_off:self.device.binded_lib_off+in_args[0].cast('I')[1]] + self.device.binded_lib_off += len(buf) + else: buf = os.read(fd, in_args[0].cast('I')[1]) out_args[1][:len(buf)] = buf out_args[0].cast('I')[0:2] = array.array('I', [len(buf), int(len(buf) == 0)]) elif sc == 0x1f020100: # stat