dsp do not flush libs to ds (#6531)

* dsp use sc

* no flush to fs

* ruff

* tiny nit

* shorter
This commit is contained in:
nimlgen
2024-09-16 16:42:15 +08:00
committed by GitHub
parent dae3615008
commit 25d8f3046a
2 changed files with 24 additions and 23 deletions

View File

@@ -291,10 +291,10 @@ def cpu_time_execution(cb, enable):
cb()
if enable: return time.perf_counter()-st
def cpu_objdump(lib):
def cpu_objdump(lib, objdump_tool='objdump'):
with tempfile.NamedTemporaryFile(delete=True) as f:
pathlib.Path(f.name).write_bytes(lib)
print(subprocess.check_output(['objdump', '-d', f.name]).decode('utf-8'))
print(subprocess.check_output([objdump_tool, '-d', f.name]).decode('utf-8'))
# *** ctypes helpers

View File

@@ -2,13 +2,13 @@ from __future__ import annotations
from typing import Tuple, Any
import ctypes, os, mmap, tempfile, pathlib, array, functools, threading, contextlib
from tinygrad.device import BufferOptions, Compiled, Allocator
from tinygrad.helpers import from_mv, getenv, DEBUG, round_up, mv_address, to_mv
from tinygrad.helpers import from_mv, getenv, DEBUG, round_up, mv_address, to_mv, cpu_objdump
from tinygrad.runtime.ops_clang import ClangCompiler
from tinygrad.renderer.cstyle import DSPRenderer
from tinygrad.runtime.autogen import libc, qcom_dsp
if getenv("IOCTL"): import extra.dsp.run # noqa: F401 # pylint: disable=unused-import
def rpc_sc(method=0, ins=0, outs=0, fd=0): return (method << 24) | (ins << 16) | (outs << 8)
def rpc_sc(method=0, ins=0, outs=0, fds=0): return (method << 24) | (ins << 16) | (outs << 8) | fds
def rpc_prep_args(ins=None, outs=None, in_fds=None):
ins, outs, in_fds = ins or list(), outs or list(), in_fds or list()
@@ -22,14 +22,7 @@ def rpc_prep_args(ins=None, outs=None, in_fds=None):
class DSPProgram:
def __init__(self, device:DSPDevice, name:str, lib:bytes):
self.device, self.lib = device, lib
# TODO: Remove lib flush to FS.
with tempfile.NamedTemporaryFile(delete=False) as self.filepath:
self.filepath.write(lib)
self.filepath.flush()
if DEBUG >= 6: os.system(f"llvm-objdump -d {self.filepath.name}")
def __del__(self): os.remove(self.filepath.name)
if DEBUG >= 6: cpu_objdump(lib, objdump_tool='llvm-objdump')
def __call__(self, *bufs, vals:Tuple[int, ...]=(), wait=False):
if len(bufs) >= 16: raise RuntimeError(f"Too many buffers to execute: {len(bufs)}")
@@ -37,7 +30,7 @@ class DSPProgram:
pra, fds, attrs, _ = rpc_prep_args(ins=[var_vals_mv:=memoryview(bytearray((len(bufs) + len(vals)) * 4))],
outs=[timer:=memoryview(bytearray(8)).cast('Q')], in_fds=[b.share_info.fd for b in bufs])
var_vals_mv.cast('i')[:] = array.array('i', tuple(b.size for b in bufs) + vals)
self.device.exec_lib(self.filepath.name, (2<<24) | (1<<16) | (1<<8) | len(bufs), pra, fds, attrs)
self.device.exec_lib(self.lib, rpc_sc(method=2, ins=1, outs=1, fds=len(bufs)), pra, fds, attrs)
return timer[0] / 1e6
class DSPBuffer:
@@ -84,9 +77,10 @@ class DSPDevice(Compiled):
self.init_dsp()
RPCListner(self).start()
def open_lib(self, filepath):
fp = f"file:///{filepath}?entry&_modver=1.0&_dom=cdsp\0"
pra, _, _, _ = rpc_prep_args(ins=[memoryview(array.array('I', [len(fp), 0xff])), memoryview(bytearray(f"{fp}".encode()))],
def open_lib(self, lib):
self.binded_lib, self.binded_lib_off = lib, 0
fp = "file:///tinylib?entry&_modver=1.0&_dom=cdsp\0"
pra, _, _, _ = rpc_prep_args(ins=[memoryview(array.array('I', [len(fp), 0xff])), memoryview(bytearray(fp.encode()))],
outs=[o1:=memoryview(bytearray(0x8)), o2:=memoryview(bytearray(0xff))])
qcom_dsp.FASTRPC_IOCTL_INVOKE(self.rpc_fd, handle=0, sc=rpc_sc(method=0, ins=2, outs=2), pra=pra)
if o1.cast('i')[1] < 0: raise RuntimeError(f"Cannot open lib: {o2.tobytes().decode()}")
@@ -96,9 +90,9 @@ class DSPDevice(Compiled):
pra, _, _, _ = rpc_prep_args(ins=[memoryview(array.array('I', [handle, 0xff]))], outs=[memoryview(bytearray(0x8)), memoryview(bytearray(0xff))])
qcom_dsp.FASTRPC_IOCTL_INVOKE(self.rpc_fd, handle=0, sc=rpc_sc(method=1, ins=1, outs=2), pra=pra)
def exec_lib(self, filepath, sc, args, fds, attrs):
def exec_lib(self, lib, sc, args, fds, attrs):
def _exec_lib():
handle = self.open_lib(filepath)
handle = self.open_lib(lib)
qcom_dsp.FASTRPC_IOCTL_INVOKE_ATTRS(self.rpc_fd, fds=fds, attrs=attrs, inv=qcom_dsp.struct_fastrpc_ioctl_invoke(handle=handle, sc=sc, pra=args))
self.close_lib(handle)
try: _exec_lib()
@@ -126,7 +120,7 @@ class RPCListner(threading.Thread):
def run(self):
# Setup initial request arguments.
context, status = 0, 0xffffffff
context, status, TINYFD = 0, 0xffffffff, 0xffff
req_args, _, _, _ = rpc_prep_args(ins=[msg_send:=memoryview(bytearray(0x10)).cast('I'), out_buf:=memoryview(bytearray(0x10000)).cast('I')],
outs=[msg_recv:=memoryview(bytearray(0x10)).cast('I'), in_buf:=memoryview(bytearray(0x10000)).cast('I')])
req_args[1].buf.len = 0
@@ -156,14 +150,21 @@ class RPCListner(threading.Thread):
status = 0 # reset status, will set if error
if sc == 0x20200: pass # greating
elif sc == 0x13050100: # open
try: out_args[0].cast('I')[0] = os.open(in_args[3].tobytes()[:-1].decode(), os.O_RDONLY)
try: out_args[0].cast('I')[0] = TINYFD if (name:=in_args[3].tobytes()[:-1].decode()) == "tinylib" else os.open(name, os.O_RDONLY)
except OSError: status = 1
elif sc == 0x3010000: os.close(in_args[0].cast('I')[0])
elif sc == 0x3010000:
if (fd:=in_args[0].cast('I')[0]) != TINYFD: os.close(fd)
elif sc == 0x9010000: # seek
res = os.lseek(in_args[0].cast('I')[0], in_args[0].cast('I')[1], in_args[0].cast('I')[2])
if (fd:=in_args[0].cast('I')[0]) == TINYFD:
assert in_args[0].cast('I')[2] == qcom_dsp.APPS_STD_SEEK_SET, "Supported only SEEK_SET"
res, self.device.binded_lib_off = 0, in_args[0].cast('I')[1]
else: res = os.lseek(fd, in_args[0].cast('I')[1], in_args[0].cast('I')[2])
status = 0 if res >= 0 else res
elif sc == 0x4010200: # read
buf = os.read(in_args[0].cast('I')[0], in_args[0].cast('I')[1])
if (fd:=in_args[0].cast('I')[0]) == TINYFD:
buf = self.device.binded_lib[self.device.binded_lib_off:self.device.binded_lib_off+in_args[0].cast('I')[1]]
self.device.binded_lib_off += len(buf)
else: buf = os.read(fd, in_args[0].cast('I')[1])
out_args[1][:len(buf)] = buf
out_args[0].cast('I')[0:2] = array.array('I', [len(buf), int(len(buf) == 0)])
elif sc == 0x1f020100: # stat