diff --git a/extra/dsp/hook.py b/extra/dsp/hook.py new file mode 100644 index 0000000000..31d2b4e920 --- /dev/null +++ b/extra/dsp/hook.py @@ -0,0 +1,101 @@ +import os +print("from import") +del os.environ["LD_PRELOAD"] +import ctypes, ctypes.util +from extra.dsp.run import install_hook, ioctl, libc, get_struct, qcom_dsp, format_struct, to_mv, hexdump + +@ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long) +def _mmap(addr, length, prot, flags, fd, offset): + mmap_type = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long) + orig_mmap = mmap_type(ctypes.addressof(orig_mmap_mv)) + ret = orig_mmap(addr, length, prot, flags, fd, offset) + # ll = os.readlink(f"/proc/self/fd/{fd}") if fd >= 0 else "" + print(f"mmap {addr=}, {length=}, {prot=}, {flags=}, {fd=}, {offset=} {ret=}") + return ret + +#install_hook(libc.ioctl, ioctl) +#orig_mmap_mv = install_hook(libc.mmap, _mmap) +print("import done") +import mmap + +alloc_sizes = {} +mmaped = {} + +def handle_ioctl(fd, request, argp, ret): + fn = os.readlink(f"/proc/self/fd/{fd}") + idir, size, itype, nr = (request>>30), (request>>16)&0x3FFF, (request>>8)&0xFF, request&0xFF + + if fn == "/dev/ion": + if nr == 0: + st = get_struct(argp, qcom_dsp.struct_ion_allocation_data) + print(ret, "ION_IOC_ALLOC", format_struct(st)) + alloc_sizes[st.handle] = st.len + elif nr == 1: + st = get_struct(argp, qcom_dsp.struct_ion_handle_data) + print(ret, "ION_IOC_FREE", format_struct(st)) + if st.handle in alloc_sizes: del alloc_sizes[st.handle] + if st.handle in mmaped: del mmaped[st.handle] + elif nr == 2: + st = get_struct(argp, qcom_dsp.struct_ion_fd_data) + print(ret, "ION_IOC_MAP", format_struct(st)) + mmaped[st.handle] = mmap.mmap(st.fd, alloc_sizes[st.handle]) + elif fn == "/dev/adsprpc-smd": + assert chr(itype) == 'R' + if nr == 8: + st = ctypes.c_uint32.from_address(argp) + print(ret, "FASTRPC_IOCTL_GETINFO", st.value) + elif nr == 2: + st = get_struct(argp, qcom_dsp.struct_fastrpc_ioctl_mmap) + print(ret, "FASTRPC_IOCTL_MMAP", format_struct(st)) + elif nr == 1: + # https://research.checkpoint.com/2021/pwn2own-qualcomm-dsp/ + st = get_struct(argp, qcom_dsp.struct_fastrpc_ioctl_invoke) + print(ret, "FASTRPC_IOCTL_INVOKE", format_struct(st)) + # 0xFF000000 = Method index and attribute (the highest byte) + # 0x00FF0000 = Number of input arguments + # 0x0000FF00 = Number of output arguments + # 0x000000F0 = Number of input handles + # 0x0000000F = Number of output handles + + method = (st.sc>>24) & 0xFF + in_args = (st.sc>>16) & 0xFF + out_args = (st.sc>>8) & 0xFF + in_h = (st.sc>>4) & 0xF + out_h = (st.sc>>0) & 0xF + print(f"\tm:{method} ia:{in_args} oa:{out_args} ih:{in_h} oh:{out_h}") + """ + if in_args or out_args: + for arg in range(in_args+out_args): + print(arg, format_struct(st.pra[arg])) + if st.pra[arg].buf.pv is not None: + ww = to_mv(st.pra[arg].buf.pv, st.pra[arg].buf.len) + hexdump(to_mv(st.pra[arg].buf.pv, st.pra[arg].buf.len)[:0x40]) + """ + elif nr == 6: + print(ret, "FASTRPC_IOCTL_INIT", format_struct(ini:=get_struct(argp, qcom_dsp.struct_fastrpc_ioctl_init))) + print(os.readlink(f"/proc/self/fd/{ini.filefd}")) + # print(bytearray(to_mv(ini.file, ini.filelen))) + elif nr == 7: + print(ret, "FASTRPC_IOCTL_INVOKE_ATTRS", format_struct(ini:=get_struct(argp, qcom_dsp.struct_fastrpc_ioctl_invoke_attrs))) + elif nr == 12: print(ret, "FASTRPC_IOCTL_CONTROL", format_struct(get_struct(argp, qcom_dsp.struct_fastrpc_ioctl_control))) + elif nr == 4: + st_fd = get_struct(argp, qcom_dsp.struct_fastrpc_ioctl_invoke_fd) + st = st_fd.inv + print(ret, "FASTRPC_IOCTL_INVOKE_FD", format_struct(st)) + + method = (st.sc>>24) & 0xFF + in_args = (st.sc>>16) & 0xFF + out_args = (st.sc>>8) & 0xFF + in_h = (st.sc>>4) & 0xF + out_h = (st.sc>>0) & 0xF + print(f"\tm:{method} ia:{in_args} oa:{out_args} ih:{in_h} oh:{out_h}") + + if st.sc in [0x2030200, 0x3040300]: + for handle, mapped in mmaped.items(): + print(f" buffer {handle} {alloc_sizes[handle]:X}") + with open(f"/tmp/buf_{st.sc:X}_{handle}_{alloc_sizes[handle]:X}", "wb") as f: f.write(mapped) + else: + print(f"{ret} UNPARSED {nr}") + else: + print("ioctl", f"{idir=} {size=} {itype=} {nr=} {fd=} {ret=}", fn) + diff --git a/extra/dsp/preload.c b/extra/dsp/preload.c new file mode 100644 index 0000000000..9428dc4e2b --- /dev/null +++ b/extra/dsp/preload.c @@ -0,0 +1,79 @@ +__attribute__((constructor)) +void preload_init() { + Py_Initialize(); + PyRun_SimpleString("print('hello from c'); import extra.dsp.hook"); +} + +#define _GNU_SOURCE // Must be defined before any includes for RTLD_NEXT +#include +#include +#include // Include Python header +//#include + +// Define the original ioctl function pointer +static int (*real_ioctl)(int fd, unsigned long request, void *arg) = NULL; + +// Our custom ioctl hook +int ioctl(int fd, unsigned long request, void *arg) { + // Initialize the real ioctl function pointer on first call + if (!real_ioctl) { + real_ioctl = dlsym(RTLD_NEXT, "ioctl"); + if (!real_ioctl) { + fprintf(stderr, "Error: Could not find real ioctl\n"); + return -1; + } + } + + // Log the call + //printf("Hooked ioctl: tid=%d fd=%d, request=0x%lx, arg=%p\n", gettid(), fd, request, arg); + + // Call a Python function from extra.dsp.hook + PyObject *pName, *pModule, *pFunc, *pArgs, *pValue; + PyGILState_STATE gstate; + + // Ensure the GIL is held (required for Python calls in multi-threaded apps) + //gstate = PyGILState_Ensure(); + + // Import the module + pName = PyUnicode_FromString("extra.dsp.hook"); + pModule = PyImport_Import(pName); + Py_DECREF(pName); + + // Call the original ioctl + int ret = real_ioctl(fd, request, arg); + + if (pModule != NULL) { + // Get the function (assume it’s called "handle_ioctl") + pFunc = PyObject_GetAttrString(pModule, "handle_ioctl"); + + if (pFunc && PyCallable_Check(pFunc)) { + // Create arguments tuple (fd, request, arg, ret) + pArgs = PyTuple_Pack(4, + PyLong_FromLong(fd), + PyLong_FromUnsignedLong(request), + PyLong_FromVoidPtr(arg), + PyLong_FromLong(ret)); + pValue = PyObject_CallObject(pFunc, pArgs); + Py_DECREF(pArgs); + + if (pValue != NULL) { + Py_DECREF(pValue); + } else { + PyErr_Print(); // Print Python error if call fails + } + Py_DECREF(pFunc); + } else { + if (PyErr_Occurred()) PyErr_Print(); + fprintf(stderr, "Cannot find function 'handle_ioctl'\n"); + } + Py_DECREF(pModule); + } else { + PyErr_Print(); + fprintf(stderr, "Failed to load 'extra.dsp.hook'\n"); + } + + // Release the GIL + //PyGILState_Release(gstate); + return ret; +} + diff --git a/extra/dsp/run.py b/extra/dsp/run.py index 32f81ca956..4293a0372e 100755 --- a/extra/dsp/run.py +++ b/extra/dsp/run.py @@ -112,7 +112,7 @@ def install_hook(c_function, python_function): return orig_func libc = ctypes.CDLL(ctypes.util.find_library("libc")) -install_hook(libc.ioctl, ioctl) +#install_hook(libc.ioctl, ioctl) adsp = ctypes.CDLL(ctypes.util.find_library("adsprpc")) def send_rpc_invoke(filename): diff --git a/extra/dsp/snpe.sh b/extra/dsp/snpe.sh new file mode 100755 index 0000000000..e714a47171 --- /dev/null +++ b/extra/dsp/snpe.sh @@ -0,0 +1,11 @@ +#!/bin/bash -e +echo "building" +gcc -shared -fPIC -o preload_python.so preload.c -L/usr/local/pyenv/versions/3.11.4/lib -lpython3.11 -I/usr/local/pyenv/versions/3.11.4/include/python3.11 +echo "compiled" +export LD_LIBRARY_PATH="/usr/local/pyenv/versions/3.11.4/lib;/data/snpe" +export LD_PRELOAD="$PWD/preload_python.so" +export PYTHONPATH="/data/tinygrad" +cd /data/snpe +#ADSP_LIBRARY_PATH="." strace -f -e ioctl ./snpe-net-run --container MobileNetV2.dlc --input_list hello --use_dsp +ADSP_LIBRARY_PATH="." ./snpe-net-run --container MobileNetV2.dlc --input_list hello --use_dsp + diff --git a/extra/dsp/snpe_logs/dlc_info_2 b/extra/dsp/snpe_logs/dlc_info_2 new file mode 100644 index 0000000000..3f073a1937 --- /dev/null +++ b/extra/dsp/snpe_logs/dlc_info_2 @@ -0,0 +1,715 @@ +DLC info for: /home/batman/xx/ml_tools/snpe/snpe-1.61.0.3358/mobilenetv2-7.dlc +Model Version: N/A +Model Copyright:N/A +----------------------------------------------------------------------------------------------------------------------------------------- +| Id | Name | Type | Inputs | Outputs | Out Dims | Runtimes | Parameters | +----------------------------------------------------------------------------------------------------------------------------------------- +| 0 | input | data | input | input | 1x224x224x3 | A D G C | input_preprocessing: passthrough | +| | | | | | | | input_type: image | +| 1 | Conv_0 | convolutional | input | 474 | 1x112x112x32 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 2 | +| | | | | | | | stride y: 2 | +| | | | | | | | num filters: 32 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | param count: 896 (0.0257%) | +| | | | | | | | MACs per inference: 10M (3.6%) | +| 2 | Clip_1 | neuron | 474 | 317 | 1x112x112x32 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 3 | Conv_2 | convolutional | 317 | 477 | 1x112x112x32 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 32 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 32 | +| | | | | | | | param count: 320 (0.00917%) | +| | | | | | | | MACs per inference: 3M (1.2%) | +| 4 | Clip_3 | neuron | 477 | 320 | 1x112x112x32 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 5 | Conv_4 | convolutional | 320 | 480 | 1x112x112x16 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 16 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 528 (0.0151%) | +| | | | | | | | MACs per inference: 6M (2.13%) | +| 6 | Conv_5 | convolutional | 480 | 483 | 1x112x112x96 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 96 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 1k (0.0468%) | +| | | | | | | | MACs per inference: 19M (6.4%) | +| 7 | Clip_6 | neuron | 483 | 325 | 1x112x112x96 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 8 | Conv_7 | convolutional | 325 | 486 | 1x56x56x96 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 2 | +| | | | | | | | stride y: 2 | +| | | | | | | | num filters: 96 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 96 | +| | | | | | | | param count: 960 (0.0275%) | +| | | | | | | | MACs per inference: 2M (0.9%) | +| 9 | Clip_8 | neuron | 486 | 328 | 1x56x56x96 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 10 | Conv_9 | convolutional | 328 | 489 | 1x56x56x24 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 24 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 2k (0.0667%) | +| | | | | | | | MACs per inference: 7M (2.4%) | +| 11 | Conv_10 | convolutional | 489 | 492 | 1x56x56x144 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 144 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 3k (0.103%) | +| | | | | | | | MACs per inference: 10M (3.6%) | +| 12 | Clip_11 | neuron | 492 | 333 | 1x56x56x144 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 13 | Conv_12 | convolutional | 333 | 495 | 1x56x56x144 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 144 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 144 | +| | | | | | | | param count: 1k (0.0413%) | +| | | | | | | | MACs per inference: 4M (1.35%) | +| 14 | Clip_13 | neuron | 495 | 336 | 1x56x56x144 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 15 | Conv_14 | convolutional | 336 | 498 | 1x56x56x24 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 24 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 3k (0.0998%) | +| | | | | | | | MACs per inference: 10M (3.6%) | +| 16 | Add_15 | elementwise_binary_op | 489 | 339 | 1x56x56x24 | A D G C | operation: sum | +| | | | 498 | | | | MACs per inference: 75k (0.025%) | +| 17 | Conv_16 | convolutional | 339 | 501 | 1x56x56x144 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 144 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 3k (0.103%) | +| | | | | | | | MACs per inference: 10M (3.6%) | +| 18 | Clip_17 | neuron | 501 | 342 | 1x56x56x144 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 19 | Conv_18 | convolutional | 342 | 504 | 1x28x28x144 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 2 | +| | | | | | | | stride y: 2 | +| | | | | | | | num filters: 144 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 144 | +| | | | | | | | param count: 1k (0.0413%) | +| | | | | | | | MACs per inference: 1M (0.338%) | +| 20 | Clip_19 | neuron | 504 | 345 | 1x28x28x144 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 21 | Conv_20 | convolutional | 345 | 507 | 1x28x28x32 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 32 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 4k (0.133%) | +| | | | | | | | MACs per inference: 3M (1.2%) | +| 22 | Conv_21 | convolutional | 507 | 510 | 1x28x28x192 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 192 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 6k (0.182%) | +| | | | | | | | MACs per inference: 4M (1.6%) | +| 23 | Clip_22 | neuron | 510 | 350 | 1x28x28x192 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 24 | Conv_23 | convolutional | 350 | 513 | 1x28x28x192 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 192 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 192 | +| | | | | | | | param count: 1k (0.055%) | +| | | | | | | | MACs per inference: 1M (0.45%) | +| 25 | Clip_24 | neuron | 513 | 353 | 1x28x28x192 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 26 | Conv_25 | convolutional | 353 | 516 | 1x28x28x32 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 32 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 6k (0.177%) | +| | | | | | | | MACs per inference: 4M (1.6%) | +| 27 | Add_26 | elementwise_binary_op | 507 | 356 | 1x28x28x32 | A D G C | operation: sum | +| | | | 516 | | | | MACs per inference: 25k (0.00833%) | +| 28 | Conv_27 | convolutional | 356 | 519 | 1x28x28x192 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 192 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 6k (0.182%) | +| | | | | | | | MACs per inference: 4M (1.6%) | +| 29 | Clip_28 | neuron | 519 | 359 | 1x28x28x192 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 30 | Conv_29 | convolutional | 359 | 522 | 1x28x28x192 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 192 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 192 | +| | | | | | | | param count: 1k (0.055%) | +| | | | | | | | MACs per inference: 1M (0.45%) | +| 31 | Clip_30 | neuron | 522 | 362 | 1x28x28x192 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 32 | Conv_31 | convolutional | 362 | 525 | 1x28x28x32 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 32 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 6k (0.177%) | +| | | | | | | | MACs per inference: 4M (1.6%) | +| 33 | Add_32 | elementwise_binary_op | 356 | 365 | 1x28x28x32 | A D G C | operation: sum | +| | | | 525 | | | | MACs per inference: 25k (0.00833%) | +| 34 | Conv_33 | convolutional | 365 | 528 | 1x28x28x192 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 192 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 6k (0.182%) | +| | | | | | | | MACs per inference: 4M (1.6%) | +| 35 | Clip_34 | neuron | 528 | 368 | 1x28x28x192 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 36 | Conv_35 | convolutional | 368 | 531 | 1x14x14x192 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 2 | +| | | | | | | | stride y: 2 | +| | | | | | | | num filters: 192 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 192 | +| | | | | | | | param count: 1k (0.055%) | +| | | | | | | | MACs per inference: 338k (0.113%) | +| 37 | Clip_36 | neuron | 531 | 371 | 1x14x14x192 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 38 | Conv_37 | convolutional | 371 | 534 | 1x14x14x64 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 64 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 12k (0.354%) | +| | | | | | | | MACs per inference: 2M (0.8%) | +| 39 | Conv_38 | convolutional | 534 | 537 | 1x14x14x384 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 384 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 24k (0.716%) | +| | | | | | | | MACs per inference: 4M (1.6%) | +| 40 | Clip_39 | neuron | 537 | 376 | 1x14x14x384 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 41 | Conv_40 | convolutional | 376 | 540 | 1x14x14x384 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 384 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 384 | +| | | | | | | | param count: 3k (0.11%) | +| | | | | | | | MACs per inference: 677k (0.225%) | +| 42 | Clip_41 | neuron | 540 | 379 | 1x14x14x384 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 43 | Conv_42 | convolutional | 379 | 543 | 1x14x14x64 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 64 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 24k (0.706%) | +| | | | | | | | MACs per inference: 4M (1.6%) | +| 44 | Add_43 | elementwise_binary_op | 534 | 382 | 1x14x14x64 | A D G C | operation: sum | +| | | | 543 | | | | MACs per inference: 12k (0.00417%) | +| 45 | Conv_44 | convolutional | 382 | 546 | 1x14x14x384 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 384 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 24k (0.716%) | +| | | | | | | | MACs per inference: 4M (1.6%) | +| 46 | Clip_45 | neuron | 546 | 385 | 1x14x14x384 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 47 | Conv_46 | convolutional | 385 | 549 | 1x14x14x384 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 384 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 384 | +| | | | | | | | param count: 3k (0.11%) | +| | | | | | | | MACs per inference: 677k (0.225%) | +| 48 | Clip_47 | neuron | 549 | 388 | 1x14x14x384 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 49 | Conv_48 | convolutional | 388 | 552 | 1x14x14x64 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 64 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 24k (0.706%) | +| | | | | | | | MACs per inference: 4M (1.6%) | +| 50 | Add_49 | elementwise_binary_op | 382 | 391 | 1x14x14x64 | A D G C | operation: sum | +| | | | 552 | | | | MACs per inference: 12k (0.00417%) | +| 51 | Conv_50 | convolutional | 391 | 555 | 1x14x14x384 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 384 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 24k (0.716%) | +| | | | | | | | MACs per inference: 4M (1.6%) | +| 52 | Clip_51 | neuron | 555 | 394 | 1x14x14x384 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 53 | Conv_52 | convolutional | 394 | 558 | 1x14x14x384 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 384 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 384 | +| | | | | | | | param count: 3k (0.11%) | +| | | | | | | | MACs per inference: 677k (0.225%) | +| 54 | Clip_53 | neuron | 558 | 397 | 1x14x14x384 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 55 | Conv_54 | convolutional | 397 | 561 | 1x14x14x64 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 64 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 24k (0.706%) | +| | | | | | | | MACs per inference: 4M (1.6%) | +| 56 | Add_55 | elementwise_binary_op | 391 | 400 | 1x14x14x64 | A D G C | operation: sum | +| | | | 561 | | | | MACs per inference: 12k (0.00417%) | +| 57 | Conv_56 | convolutional | 400 | 564 | 1x14x14x384 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 384 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 24k (0.716%) | +| | | | | | | | MACs per inference: 4M (1.6%) | +| 58 | Clip_57 | neuron | 564 | 403 | 1x14x14x384 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 59 | Conv_58 | convolutional | 403 | 567 | 1x14x14x384 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 384 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 384 | +| | | | | | | | param count: 3k (0.11%) | +| | | | | | | | MACs per inference: 677k (0.225%) | +| 60 | Clip_59 | neuron | 567 | 406 | 1x14x14x384 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 61 | Conv_60 | convolutional | 406 | 570 | 1x14x14x96 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 96 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 36k (1.06%) | +| | | | | | | | MACs per inference: 7M (2.4%) | +| 62 | Conv_61 | convolutional | 570 | 573 | 1x14x14x576 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 576 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 55k (1.6%) | +| | | | | | | | MACs per inference: 10M (3.6%) | +| 63 | Clip_62 | neuron | 573 | 411 | 1x14x14x576 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 64 | Conv_63 | convolutional | 411 | 576 | 1x14x14x576 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 576 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 576 | +| | | | | | | | param count: 5k (0.165%) | +| | | | | | | | MACs per inference: 1M (0.338%) | +| 65 | Clip_64 | neuron | 576 | 414 | 1x14x14x576 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 66 | Conv_65 | convolutional | 414 | 579 | 1x14x14x96 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 96 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 55k (1.59%) | +| | | | | | | | MACs per inference: 10M (3.6%) | +| 67 | Add_66 | elementwise_binary_op | 570 | 417 | 1x14x14x96 | A D G C | operation: sum | +| | | | 579 | | | | MACs per inference: 18k (0.00625%) | +| 68 | Conv_67 | convolutional | 417 | 582 | 1x14x14x576 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 576 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 55k (1.6%) | +| | | | | | | | MACs per inference: 10M (3.6%) | +| 69 | Clip_68 | neuron | 582 | 420 | 1x14x14x576 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 70 | Conv_69 | convolutional | 420 | 585 | 1x14x14x576 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 576 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 576 | +| | | | | | | | param count: 5k (0.165%) | +| | | | | | | | MACs per inference: 1M (0.338%) | +| 71 | Clip_70 | neuron | 585 | 423 | 1x14x14x576 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 72 | Conv_71 | convolutional | 423 | 588 | 1x14x14x96 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 96 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 55k (1.59%) | +| | | | | | | | MACs per inference: 10M (3.6%) | +| 73 | Add_72 | elementwise_binary_op | 417 | 426 | 1x14x14x96 | A D G C | operation: sum | +| | | | 588 | | | | MACs per inference: 18k (0.00625%) | +| 74 | Conv_73 | convolutional | 426 | 591 | 1x14x14x576 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 576 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 55k (1.6%) | +| | | | | | | | MACs per inference: 10M (3.6%) | +| 75 | Clip_74 | neuron | 591 | 429 | 1x14x14x576 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 76 | Conv_75 | convolutional | 429 | 594 | 1x7x7x576 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 2 | +| | | | | | | | stride y: 2 | +| | | | | | | | num filters: 576 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 576 | +| | | | | | | | param count: 5k (0.165%) | +| | | | | | | | MACs per inference: 254k (0.0844%) | +| 77 | Clip_76 | neuron | 594 | 432 | 1x7x7x576 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 78 | Conv_77 | convolutional | 432 | 597 | 1x7x7x160 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 160 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 92k (2.65%) | +| | | | | | | | MACs per inference: 4M (1.5%) | +| 79 | Conv_78 | convolutional | 597 | 600 | 1x7x7x960 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 960 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 154k (4.43%) | +| | | | | | | | MACs per inference: 7M (2.5%) | +| 80 | Clip_79 | neuron | 600 | 437 | 1x7x7x960 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 81 | Conv_80 | convolutional | 437 | 603 | 1x7x7x960 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 960 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 960 | +| | | | | | | | param count: 9k (0.275%) | +| | | | | | | | MACs per inference: 423k (0.141%) | +| 82 | Clip_81 | neuron | 603 | 440 | 1x7x7x960 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 83 | Conv_82 | convolutional | 440 | 606 | 1x7x7x160 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 160 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 153k (4.41%) | +| | | | | | | | MACs per inference: 7M (2.5%) | +| 84 | Add_83 | elementwise_binary_op | 597 | 443 | 1x7x7x160 | A D G C | operation: sum | +| | | | 606 | | | | MACs per inference: 7k (0.0026%) | +| 85 | Conv_84 | convolutional | 443 | 609 | 1x7x7x960 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 960 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 154k (4.43%) | +| | | | | | | | MACs per inference: 7M (2.5%) | +| 86 | Clip_85 | neuron | 609 | 446 | 1x7x7x960 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 87 | Conv_86 | convolutional | 446 | 612 | 1x7x7x960 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 960 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 960 | +| | | | | | | | param count: 9k (0.275%) | +| | | | | | | | MACs per inference: 423k (0.141%) | +| 88 | Clip_87 | neuron | 612 | 449 | 1x7x7x960 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 89 | Conv_88 | convolutional | 449 | 615 | 1x7x7x160 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 160 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 153k (4.41%) | +| | | | | | | | MACs per inference: 7M (2.5%) | +| 90 | Add_89 | elementwise_binary_op | 443 | 452 | 1x7x7x160 | A D G C | operation: sum | +| | | | 615 | | | | MACs per inference: 7k (0.0026%) | +| 91 | Conv_90 | convolutional | 452 | 618 | 1x7x7x960 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 960 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 154k (4.43%) | +| | | | | | | | MACs per inference: 7M (2.5%) | +| 92 | Clip_91 | neuron | 618 | 455 | 1x7x7x960 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 93 | Conv_92 | convolutional | 455 | 621 | 1x7x7x960 | A D G C | padding x: 1 | +| | | | | | | | padding y: 1 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 960 | +| | | | | | | | kernel: 3x3 | +| | | | | | | | groups: 960 | +| | | | | | | | param count: 9k (0.275%) | +| | | | | | | | MACs per inference: 423k (0.141%) | +| 94 | Clip_93 | neuron | 621 | 458 | 1x7x7x960 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 95 | Conv_94 | convolutional | 458 | 624 | 1x7x7x320 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 320 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 307k (8.82%) | +| | | | | | | | MACs per inference: 15M (5%) | +| 96 | Conv_95 | convolutional | 624 | 627 | 1x7x7x1280 | A D G C | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | padding mode: zero | +| | | | | | | | stride x: 1 | +| | | | | | | | stride y: 1 | +| | | | | | | | num filters: 1280 | +| | | | | | | | kernel: 1x1 | +| | | | | | | | param count: 410k (11.8%) | +| | | | | | | | MACs per inference: 20M (6.67%) | +| 97 | Clip_96 | neuron | 627 | 463 | 1x7x7x1280 | A D G C | a: 0 | +| | | | | | | | b: 0 | +| | | | | | | | min_clamp: 0 | +| | | | | | | | max_clamp: 6 | +| | | | | | | | func: relu_min_max | +| 98 | GlobalAveragePool_97 | pooling | 463 | 464 | 1x1x1x1280 | A D G C | pool size x: 7 | +| | | | | | | | pool size y: 7 | +| | | | | | | | stride x: 7 | +| | | | | | | | stride y: 7 | +| | | | | | | | padding x: 0 | +| | | | | | | | padding y: 0 | +| | | | | | | | pool_type: POOL_AVG | +| | | | | | | | MACs per inference: 62k (0.0208%) | +| 99 | 464.ncs | permute | 464 | 464.ncs | 1x1280x1x1 | A D G C | permute_order: [0, 3, 1, 2] | +| 100 | Gemm_104 | fully_connected | 464.ncs | output | 1x1000 | A D G C | param count: 1M (36.7%) | +| | | | | | | | MACs per inference: 1M (0.425%) | +----------------------------------------------------------------------------------------------------------------------------------------- +Note: The supported runtimes column assumes a processor target of Snapdragon 835 (8998) +Key : A:AIP + D:DSP + G:GPU + C:CPU + +Total parameters: 3487816 (13 MB assuming single precision float) +Total MACs per inference: 301M (100%) +Converter command: snpe-onnx-to-dlc adjust_nms_features_dims=False align_matmul_ranks=True copyright_file=None custom_op_config_paths=None debug=-1 disable_batchnorm_folding=False disable_chaining_eltwise_ops=False dry_run=None dumpIR=False dump_inferred_model=False dump_value_info=False enable_strict_validation=False extract_color_transform=False force_prune_cast_ops=True handle_gather_negative_indices=False inject_cast_for_gather=False input_dim=[['input', '1,3,224,224']] input_dtype=[] input_encoding=[] input_layout=[] input_type=[['input', 'image']] keep_disconnected_nodes=False keep_quant_nodes=False match_caffe_ssd_to_tf=False model_version=None no_simplification=False out_names=['output'] perform_axes_to_spatial_first_order=True prepare_inputs_as_params=True preprocess_lstm_ops=False preprocess_roi_pool_inputs=False quantization_overrides= squash_box_decoder=False unroll_lstm_time_steps=False use_convert_quantization_nodes=True validation_target=[] +Quantizer command: N/A +DLC created with converter version: 1.61.0.3358 +Layers used by DLC: CONVOLUTIONAL, DATA, ELEMENTWISE_BINARY_OP_SUM, FULLY_CONNECTED, NEURON_RELU_MIN_MAX, PERMUTE, POOLING +Est. Steady-State Memory Needed to Run: 164.3 MiB +----------------------------------------------------------------------------------------------------------------------------------------- + diff --git a/extra/dsp/snpe_logs/high_perf_2 b/extra/dsp/snpe_logs/high_perf_2 new file mode 100644 index 0000000000..b21ec979c2 --- /dev/null +++ b/extra/dsp/snpe_logs/high_perf_2 @@ -0,0 +1,131 @@ +Log File Created: Tue Mar 18 01:33:12 2025 +Time Scale: 1e-06 +Epoch Timestamp: 1742286792883569 Steady Clock Timestamp: 75586845756 +Software library version: 1.61.0.3358 + +Dnn Runtime Load/Deserialize/Create/De-Init Statistics: +-------------------------------------------------- +Load: 333 us +Deserialize: 32452 us +Create: 143084 us + +Init: 178071 us +De-Init: 16710 us + +Create Network(s): 86850 us +RPC Init Time: 43213 us +Snpe Accelerator Init Time: 42154 us +Accelerator Init Time: 39189 us + +Average SNPE Statistics: +------------------------------ +Total Inference Time: 11868 us +Forward Propagate Time: 11816 us +RPC Execute Time: 9810 us +Snpe Accelerator Time: 9129 us +Accelerator Time: 8701 us +Misc Accelerator Time: 10 us + +Layer Times: +--------------- +0: 42 us : DSP +1: 0 us : DSP +2: 254 us : DSP +3: 0 us : DSP +4: 153 us : DSP +5: 295 us : DSP +6: 0 us : DSP +7: 287 us : DSP +8: 0 us : DSP +9: 162 us : DSP +10: 210 us : DSP +11: 0 us : DSP +12: 138 us : DSP +13: 0 us : DSP +14: 176 us : DSP +15: 293 us : DSP +16: 60 us : DSP +17: 0 us : DSP +18: 157 us : DSP +19: 0 us : DSP +20: 112 us : DSP +21: 134 us : DSP +22: 0 us : DSP +23: 81 us : DSP +24: 0 us : DSP +25: 104 us : DSP +26: 130 us : DSP +27: 37 us : DSP +28: 0 us : DSP +29: 81 us : DSP +30: 0 us : DSP +31: 87 us : DSP +32: 124 us : DSP +33: 30 us : DSP +34: 0 us : DSP +35: 87 us : DSP +36: 0 us : DSP +37: 63 us : DSP +38: 74 us : DSP +39: 0 us : DSP +40: 102 us : DSP +41: 0 us : DSP +42: 82 us : DSP +43: 95 us : DSP +44: 29 us : DSP +45: 0 us : DSP +46: 112 us : DSP +47: 0 us : DSP +48: 88 us : DSP +49: 96 us : DSP +50: 25 us : DSP +51: 0 us : DSP +52: 103 us : DSP +53: 0 us : DSP +54: 80 us : DSP +55: 100 us : DSP +56: 26 us : DSP +57: 0 us : DSP +58: 102 us : DSP +59: 0 us : DSP +60: 85 us : DSP +61: 129 us : DSP +62: 0 us : DSP +63: 155 us : DSP +64: 0 us : DSP +65: 113 us : DSP +66: 194 us : DSP +67: 34 us : DSP +68: 0 us : DSP +69: 157 us : DSP +70: 0 us : DSP +71: 120 us : DSP +72: 198 us : DSP +73: 34 us : DSP +74: 0 us : DSP +75: 155 us : DSP +76: 0 us : DSP +77: 101 us : DSP +78: 121 us : DSP +79: 0 us : DSP +80: 256 us : DSP +81: 0 us : DSP +82: 134 us : DSP +83: 159 us : DSP +84: 31 us : DSP +85: 0 us : DSP +86: 199 us : DSP +87: 0 us : DSP +88: 142 us : DSP +89: 152 us : DSP +90: 26 us : DSP +91: 0 us : DSP +92: 202 us : DSP +93: 0 us : DSP +94: 143 us : DSP +95: 278 us : DSP +96: 0 us : DSP +97: 316 us : DSP +98: 40 us : DSP +99: 12 us : DSP +100: 199 us : DSP diff --git a/extra/dsp/snpe_logs/parse.py b/extra/dsp/snpe_logs/parse.py new file mode 100644 index 0000000000..b3d3513b21 --- /dev/null +++ b/extra/dsp/snpe_logs/parse.py @@ -0,0 +1,21 @@ +di = open("dlc_info_2").read().split("\n") +layers = {} +for l in di: + if not l.startswith("| "): continue + if l.startswith("| |"): continue + ll = [x.strip() for x in l.split("|")] + if ll[1] == "Id": continue + layers[int(ll[1])] = (ll[2], ll[6]) +hp = open("high_perf_2").read().split("Layer Times:")[1].strip().split("\n")[2:] +sl = 1 +tms = 0 +for l in hp: + kk, tm, _ = l.split(" ", 2) + tm = int(tm) + lnum = int(kk.strip(":")) + if int(tm) != 0: + print(f"{sl:2d} {tm:4d} us {layers[lnum]}") + tms += tm + sl += 1 +print(f"total time, {tms/1000:.2f} ms") + diff --git a/extra/onnx.py b/extra/onnx.py index 5a6a64534a..dc723ddfe1 100644 --- a/extra/onnx.py +++ b/extra/onnx.py @@ -728,9 +728,12 @@ def get_onnx_ops(): y_scale, y_zero_point = _prepare_quantize(x, y_scale, y_zero_point, axis, block_size) if out_dtype == dtypes.uchar: # this appears to work in practice, at least for uchar out_dtype. it folds with the quantize stuff - return _clamp_cast((x / y_scale + 0.4999999 + y_zero_point).int(), out_dtype).contiguous() + ret = _clamp_cast((x / y_scale + 0.4999999 + y_zero_point).int(), out_dtype) else: - return _clamp_cast(((x / y_scale).round() + y_zero_point), out_dtype).contiguous() + ret = _clamp_cast(((x / y_scale).round() + y_zero_point), out_dtype) + # you need both NHWC=1 DONT_GROUP_REDUCES=1 for this to work + if getenv("NHWC") and len(ret.shape) == 4: return ret.permute(0,2,3,1).contiguous().permute(0,3,1,2) + return ret.contiguous() def DynamicQuantizeLinear(x: Tensor): # only support uint8 diff --git a/extra/replay_pkl.py b/extra/replay_pkl.py index b22a851725..e550585f87 100644 --- a/extra/replay_pkl.py +++ b/extra/replay_pkl.py @@ -23,6 +23,8 @@ if __name__ == "__main__": p: ProgramSpec = ei.prg.p k = Kernel(p.ast, Device["DSP"].renderer) if not getenv("NOOPT"): + # only NCHW + """ if knum in [6,7,9,11]: k.apply_opt(Opt(OptOps.PADTO, 1, 128)) k.apply_opt(Opt(OptOps.UPCAST, 1, 128)) @@ -48,6 +50,15 @@ if __name__ == "__main__": k.apply_opt(Opt(OptOps.UPCAST, 1, 128)) else: k.hand_coded_optimizations() + """ + if knum == 3: + k.apply_opt(Opt(OptOps.UNROLL, 0, 0)) + k.apply_opt(Opt(OptOps.UPCAST, 1, 16)) + k.apply_opt(Opt(OptOps.UPCAST, 0, 128//16)) + #k.apply_opt(Opt(OptOps.UPCAST, 0, 8)) + pass + else: + k.hand_coded_optimizations() #if knum in [5]: k.apply_opt(Opt(OptOps.UPCAST, 1, 2)) p2 = k.to_program() new_ei = replace(ei, prg=CompiledRunner(p2), bufs=[Buffer("DSP", 1024+b.size*2, b.dtype).view(b.size, b.dtype, 512) for b in ei.bufs])