dsp stuff / sniff ioctls from snpe (#9490)

* sniff ioctls from snpe

* dump input buffers

* snpe logs from dsp

* NHWC support

* knum 3

* this run?

* revert those

---------

Co-authored-by: Comma Device <device@comma.ai>
This commit is contained in:
George Hotz
2025-03-20 10:38:23 +08:00
committed by GitHub
parent 2223b93338
commit 68053d0510
9 changed files with 1075 additions and 3 deletions

101
extra/dsp/hook.py Normal file
View File

@@ -0,0 +1,101 @@
import os
print("from import")
del os.environ["LD_PRELOAD"]
import ctypes, ctypes.util
from extra.dsp.run import install_hook, ioctl, libc, get_struct, qcom_dsp, format_struct, to_mv, hexdump
@ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long)
def _mmap(addr, length, prot, flags, fd, offset):
mmap_type = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long)
orig_mmap = mmap_type(ctypes.addressof(orig_mmap_mv))
ret = orig_mmap(addr, length, prot, flags, fd, offset)
# ll = os.readlink(f"/proc/self/fd/{fd}") if fd >= 0 else ""
print(f"mmap {addr=}, {length=}, {prot=}, {flags=}, {fd=}, {offset=} {ret=}")
return ret
#install_hook(libc.ioctl, ioctl)
#orig_mmap_mv = install_hook(libc.mmap, _mmap)
print("import done")
import mmap
alloc_sizes = {}
mmaped = {}
def handle_ioctl(fd, request, argp, ret):
fn = os.readlink(f"/proc/self/fd/{fd}")
idir, size, itype, nr = (request>>30), (request>>16)&0x3FFF, (request>>8)&0xFF, request&0xFF
if fn == "/dev/ion":
if nr == 0:
st = get_struct(argp, qcom_dsp.struct_ion_allocation_data)
print(ret, "ION_IOC_ALLOC", format_struct(st))
alloc_sizes[st.handle] = st.len
elif nr == 1:
st = get_struct(argp, qcom_dsp.struct_ion_handle_data)
print(ret, "ION_IOC_FREE", format_struct(st))
if st.handle in alloc_sizes: del alloc_sizes[st.handle]
if st.handle in mmaped: del mmaped[st.handle]
elif nr == 2:
st = get_struct(argp, qcom_dsp.struct_ion_fd_data)
print(ret, "ION_IOC_MAP", format_struct(st))
mmaped[st.handle] = mmap.mmap(st.fd, alloc_sizes[st.handle])
elif fn == "/dev/adsprpc-smd":
assert chr(itype) == 'R'
if nr == 8:
st = ctypes.c_uint32.from_address(argp)
print(ret, "FASTRPC_IOCTL_GETINFO", st.value)
elif nr == 2:
st = get_struct(argp, qcom_dsp.struct_fastrpc_ioctl_mmap)
print(ret, "FASTRPC_IOCTL_MMAP", format_struct(st))
elif nr == 1:
# https://research.checkpoint.com/2021/pwn2own-qualcomm-dsp/
st = get_struct(argp, qcom_dsp.struct_fastrpc_ioctl_invoke)
print(ret, "FASTRPC_IOCTL_INVOKE", format_struct(st))
# 0xFF000000 = Method index and attribute (the highest byte)
# 0x00FF0000 = Number of input arguments
# 0x0000FF00 = Number of output arguments
# 0x000000F0 = Number of input handles
# 0x0000000F = Number of output handles
method = (st.sc>>24) & 0xFF
in_args = (st.sc>>16) & 0xFF
out_args = (st.sc>>8) & 0xFF
in_h = (st.sc>>4) & 0xF
out_h = (st.sc>>0) & 0xF
print(f"\tm:{method} ia:{in_args} oa:{out_args} ih:{in_h} oh:{out_h}")
"""
if in_args or out_args:
for arg in range(in_args+out_args):
print(arg, format_struct(st.pra[arg]))
if st.pra[arg].buf.pv is not None:
ww = to_mv(st.pra[arg].buf.pv, st.pra[arg].buf.len)
hexdump(to_mv(st.pra[arg].buf.pv, st.pra[arg].buf.len)[:0x40])
"""
elif nr == 6:
print(ret, "FASTRPC_IOCTL_INIT", format_struct(ini:=get_struct(argp, qcom_dsp.struct_fastrpc_ioctl_init)))
print(os.readlink(f"/proc/self/fd/{ini.filefd}"))
# print(bytearray(to_mv(ini.file, ini.filelen)))
elif nr == 7:
print(ret, "FASTRPC_IOCTL_INVOKE_ATTRS", format_struct(ini:=get_struct(argp, qcom_dsp.struct_fastrpc_ioctl_invoke_attrs)))
elif nr == 12: print(ret, "FASTRPC_IOCTL_CONTROL", format_struct(get_struct(argp, qcom_dsp.struct_fastrpc_ioctl_control)))
elif nr == 4:
st_fd = get_struct(argp, qcom_dsp.struct_fastrpc_ioctl_invoke_fd)
st = st_fd.inv
print(ret, "FASTRPC_IOCTL_INVOKE_FD", format_struct(st))
method = (st.sc>>24) & 0xFF
in_args = (st.sc>>16) & 0xFF
out_args = (st.sc>>8) & 0xFF
in_h = (st.sc>>4) & 0xF
out_h = (st.sc>>0) & 0xF
print(f"\tm:{method} ia:{in_args} oa:{out_args} ih:{in_h} oh:{out_h}")
if st.sc in [0x2030200, 0x3040300]:
for handle, mapped in mmaped.items():
print(f" buffer {handle} {alloc_sizes[handle]:X}")
with open(f"/tmp/buf_{st.sc:X}_{handle}_{alloc_sizes[handle]:X}", "wb") as f: f.write(mapped)
else:
print(f"{ret} UNPARSED {nr}")
else:
print("ioctl", f"{idir=} {size=} {itype=} {nr=} {fd=} {ret=}", fn)

79
extra/dsp/preload.c Normal file
View File

@@ -0,0 +1,79 @@
__attribute__((constructor))
void preload_init() {
Py_Initialize();
PyRun_SimpleString("print('hello from c'); import extra.dsp.hook");
}
#define _GNU_SOURCE // Must be defined before any includes for RTLD_NEXT
#include <stdio.h>
#include <dlfcn.h>
#include <Python.h> // Include Python header
//#include <sys/ioctl.h>
// Define the original ioctl function pointer
static int (*real_ioctl)(int fd, unsigned long request, void *arg) = NULL;
// Our custom ioctl hook
int ioctl(int fd, unsigned long request, void *arg) {
// Initialize the real ioctl function pointer on first call
if (!real_ioctl) {
real_ioctl = dlsym(RTLD_NEXT, "ioctl");
if (!real_ioctl) {
fprintf(stderr, "Error: Could not find real ioctl\n");
return -1;
}
}
// Log the call
//printf("Hooked ioctl: tid=%d fd=%d, request=0x%lx, arg=%p\n", gettid(), fd, request, arg);
// Call a Python function from extra.dsp.hook
PyObject *pName, *pModule, *pFunc, *pArgs, *pValue;
PyGILState_STATE gstate;
// Ensure the GIL is held (required for Python calls in multi-threaded apps)
//gstate = PyGILState_Ensure();
// Import the module
pName = PyUnicode_FromString("extra.dsp.hook");
pModule = PyImport_Import(pName);
Py_DECREF(pName);
// Call the original ioctl
int ret = real_ioctl(fd, request, arg);
if (pModule != NULL) {
// Get the function (assume its called "handle_ioctl")
pFunc = PyObject_GetAttrString(pModule, "handle_ioctl");
if (pFunc && PyCallable_Check(pFunc)) {
// Create arguments tuple (fd, request, arg, ret)
pArgs = PyTuple_Pack(4,
PyLong_FromLong(fd),
PyLong_FromUnsignedLong(request),
PyLong_FromVoidPtr(arg),
PyLong_FromLong(ret));
pValue = PyObject_CallObject(pFunc, pArgs);
Py_DECREF(pArgs);
if (pValue != NULL) {
Py_DECREF(pValue);
} else {
PyErr_Print(); // Print Python error if call fails
}
Py_DECREF(pFunc);
} else {
if (PyErr_Occurred()) PyErr_Print();
fprintf(stderr, "Cannot find function 'handle_ioctl'\n");
}
Py_DECREF(pModule);
} else {
PyErr_Print();
fprintf(stderr, "Failed to load 'extra.dsp.hook'\n");
}
// Release the GIL
//PyGILState_Release(gstate);
return ret;
}

View File

@@ -112,7 +112,7 @@ def install_hook(c_function, python_function):
return orig_func
libc = ctypes.CDLL(ctypes.util.find_library("libc"))
install_hook(libc.ioctl, ioctl)
#install_hook(libc.ioctl, ioctl)
adsp = ctypes.CDLL(ctypes.util.find_library("adsprpc"))
def send_rpc_invoke(filename):

11
extra/dsp/snpe.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/bin/bash -e
echo "building"
gcc -shared -fPIC -o preload_python.so preload.c -L/usr/local/pyenv/versions/3.11.4/lib -lpython3.11 -I/usr/local/pyenv/versions/3.11.4/include/python3.11
echo "compiled"
export LD_LIBRARY_PATH="/usr/local/pyenv/versions/3.11.4/lib;/data/snpe"
export LD_PRELOAD="$PWD/preload_python.so"
export PYTHONPATH="/data/tinygrad"
cd /data/snpe
#ADSP_LIBRARY_PATH="." strace -f -e ioctl ./snpe-net-run --container MobileNetV2.dlc --input_list hello --use_dsp
ADSP_LIBRARY_PATH="." ./snpe-net-run --container MobileNetV2.dlc --input_list hello --use_dsp

View File

@@ -0,0 +1,715 @@
DLC info for: /home/batman/xx/ml_tools/snpe/snpe-1.61.0.3358/mobilenetv2-7.dlc
Model Version: N/A
Model Copyright:N/A
-----------------------------------------------------------------------------------------------------------------------------------------
| Id | Name | Type | Inputs | Outputs | Out Dims | Runtimes | Parameters |
-----------------------------------------------------------------------------------------------------------------------------------------
| 0 | input | data | input | input | 1x224x224x3 | A D G C | input_preprocessing: passthrough |
| | | | | | | | input_type: image |
| 1 | Conv_0 | convolutional | input | 474 | 1x112x112x32 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 2 |
| | | | | | | | stride y: 2 |
| | | | | | | | num filters: 32 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | param count: 896 (0.0257%) |
| | | | | | | | MACs per inference: 10M (3.6%) |
| 2 | Clip_1 | neuron | 474 | 317 | 1x112x112x32 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 3 | Conv_2 | convolutional | 317 | 477 | 1x112x112x32 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 32 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 32 |
| | | | | | | | param count: 320 (0.00917%) |
| | | | | | | | MACs per inference: 3M (1.2%) |
| 4 | Clip_3 | neuron | 477 | 320 | 1x112x112x32 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 5 | Conv_4 | convolutional | 320 | 480 | 1x112x112x16 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 16 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 528 (0.0151%) |
| | | | | | | | MACs per inference: 6M (2.13%) |
| 6 | Conv_5 | convolutional | 480 | 483 | 1x112x112x96 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 96 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 1k (0.0468%) |
| | | | | | | | MACs per inference: 19M (6.4%) |
| 7 | Clip_6 | neuron | 483 | 325 | 1x112x112x96 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 8 | Conv_7 | convolutional | 325 | 486 | 1x56x56x96 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 2 |
| | | | | | | | stride y: 2 |
| | | | | | | | num filters: 96 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 96 |
| | | | | | | | param count: 960 (0.0275%) |
| | | | | | | | MACs per inference: 2M (0.9%) |
| 9 | Clip_8 | neuron | 486 | 328 | 1x56x56x96 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 10 | Conv_9 | convolutional | 328 | 489 | 1x56x56x24 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 24 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 2k (0.0667%) |
| | | | | | | | MACs per inference: 7M (2.4%) |
| 11 | Conv_10 | convolutional | 489 | 492 | 1x56x56x144 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 144 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 3k (0.103%) |
| | | | | | | | MACs per inference: 10M (3.6%) |
| 12 | Clip_11 | neuron | 492 | 333 | 1x56x56x144 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 13 | Conv_12 | convolutional | 333 | 495 | 1x56x56x144 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 144 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 144 |
| | | | | | | | param count: 1k (0.0413%) |
| | | | | | | | MACs per inference: 4M (1.35%) |
| 14 | Clip_13 | neuron | 495 | 336 | 1x56x56x144 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 15 | Conv_14 | convolutional | 336 | 498 | 1x56x56x24 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 24 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 3k (0.0998%) |
| | | | | | | | MACs per inference: 10M (3.6%) |
| 16 | Add_15 | elementwise_binary_op | 489 | 339 | 1x56x56x24 | A D G C | operation: sum |
| | | | 498 | | | | MACs per inference: 75k (0.025%) |
| 17 | Conv_16 | convolutional | 339 | 501 | 1x56x56x144 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 144 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 3k (0.103%) |
| | | | | | | | MACs per inference: 10M (3.6%) |
| 18 | Clip_17 | neuron | 501 | 342 | 1x56x56x144 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 19 | Conv_18 | convolutional | 342 | 504 | 1x28x28x144 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 2 |
| | | | | | | | stride y: 2 |
| | | | | | | | num filters: 144 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 144 |
| | | | | | | | param count: 1k (0.0413%) |
| | | | | | | | MACs per inference: 1M (0.338%) |
| 20 | Clip_19 | neuron | 504 | 345 | 1x28x28x144 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 21 | Conv_20 | convolutional | 345 | 507 | 1x28x28x32 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 32 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 4k (0.133%) |
| | | | | | | | MACs per inference: 3M (1.2%) |
| 22 | Conv_21 | convolutional | 507 | 510 | 1x28x28x192 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 192 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 6k (0.182%) |
| | | | | | | | MACs per inference: 4M (1.6%) |
| 23 | Clip_22 | neuron | 510 | 350 | 1x28x28x192 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 24 | Conv_23 | convolutional | 350 | 513 | 1x28x28x192 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 192 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 192 |
| | | | | | | | param count: 1k (0.055%) |
| | | | | | | | MACs per inference: 1M (0.45%) |
| 25 | Clip_24 | neuron | 513 | 353 | 1x28x28x192 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 26 | Conv_25 | convolutional | 353 | 516 | 1x28x28x32 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 32 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 6k (0.177%) |
| | | | | | | | MACs per inference: 4M (1.6%) |
| 27 | Add_26 | elementwise_binary_op | 507 | 356 | 1x28x28x32 | A D G C | operation: sum |
| | | | 516 | | | | MACs per inference: 25k (0.00833%) |
| 28 | Conv_27 | convolutional | 356 | 519 | 1x28x28x192 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 192 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 6k (0.182%) |
| | | | | | | | MACs per inference: 4M (1.6%) |
| 29 | Clip_28 | neuron | 519 | 359 | 1x28x28x192 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 30 | Conv_29 | convolutional | 359 | 522 | 1x28x28x192 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 192 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 192 |
| | | | | | | | param count: 1k (0.055%) |
| | | | | | | | MACs per inference: 1M (0.45%) |
| 31 | Clip_30 | neuron | 522 | 362 | 1x28x28x192 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 32 | Conv_31 | convolutional | 362 | 525 | 1x28x28x32 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 32 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 6k (0.177%) |
| | | | | | | | MACs per inference: 4M (1.6%) |
| 33 | Add_32 | elementwise_binary_op | 356 | 365 | 1x28x28x32 | A D G C | operation: sum |
| | | | 525 | | | | MACs per inference: 25k (0.00833%) |
| 34 | Conv_33 | convolutional | 365 | 528 | 1x28x28x192 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 192 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 6k (0.182%) |
| | | | | | | | MACs per inference: 4M (1.6%) |
| 35 | Clip_34 | neuron | 528 | 368 | 1x28x28x192 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 36 | Conv_35 | convolutional | 368 | 531 | 1x14x14x192 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 2 |
| | | | | | | | stride y: 2 |
| | | | | | | | num filters: 192 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 192 |
| | | | | | | | param count: 1k (0.055%) |
| | | | | | | | MACs per inference: 338k (0.113%) |
| 37 | Clip_36 | neuron | 531 | 371 | 1x14x14x192 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 38 | Conv_37 | convolutional | 371 | 534 | 1x14x14x64 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 64 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 12k (0.354%) |
| | | | | | | | MACs per inference: 2M (0.8%) |
| 39 | Conv_38 | convolutional | 534 | 537 | 1x14x14x384 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 384 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 24k (0.716%) |
| | | | | | | | MACs per inference: 4M (1.6%) |
| 40 | Clip_39 | neuron | 537 | 376 | 1x14x14x384 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 41 | Conv_40 | convolutional | 376 | 540 | 1x14x14x384 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 384 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 384 |
| | | | | | | | param count: 3k (0.11%) |
| | | | | | | | MACs per inference: 677k (0.225%) |
| 42 | Clip_41 | neuron | 540 | 379 | 1x14x14x384 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 43 | Conv_42 | convolutional | 379 | 543 | 1x14x14x64 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 64 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 24k (0.706%) |
| | | | | | | | MACs per inference: 4M (1.6%) |
| 44 | Add_43 | elementwise_binary_op | 534 | 382 | 1x14x14x64 | A D G C | operation: sum |
| | | | 543 | | | | MACs per inference: 12k (0.00417%) |
| 45 | Conv_44 | convolutional | 382 | 546 | 1x14x14x384 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 384 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 24k (0.716%) |
| | | | | | | | MACs per inference: 4M (1.6%) |
| 46 | Clip_45 | neuron | 546 | 385 | 1x14x14x384 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 47 | Conv_46 | convolutional | 385 | 549 | 1x14x14x384 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 384 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 384 |
| | | | | | | | param count: 3k (0.11%) |
| | | | | | | | MACs per inference: 677k (0.225%) |
| 48 | Clip_47 | neuron | 549 | 388 | 1x14x14x384 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 49 | Conv_48 | convolutional | 388 | 552 | 1x14x14x64 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 64 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 24k (0.706%) |
| | | | | | | | MACs per inference: 4M (1.6%) |
| 50 | Add_49 | elementwise_binary_op | 382 | 391 | 1x14x14x64 | A D G C | operation: sum |
| | | | 552 | | | | MACs per inference: 12k (0.00417%) |
| 51 | Conv_50 | convolutional | 391 | 555 | 1x14x14x384 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 384 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 24k (0.716%) |
| | | | | | | | MACs per inference: 4M (1.6%) |
| 52 | Clip_51 | neuron | 555 | 394 | 1x14x14x384 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 53 | Conv_52 | convolutional | 394 | 558 | 1x14x14x384 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 384 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 384 |
| | | | | | | | param count: 3k (0.11%) |
| | | | | | | | MACs per inference: 677k (0.225%) |
| 54 | Clip_53 | neuron | 558 | 397 | 1x14x14x384 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 55 | Conv_54 | convolutional | 397 | 561 | 1x14x14x64 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 64 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 24k (0.706%) |
| | | | | | | | MACs per inference: 4M (1.6%) |
| 56 | Add_55 | elementwise_binary_op | 391 | 400 | 1x14x14x64 | A D G C | operation: sum |
| | | | 561 | | | | MACs per inference: 12k (0.00417%) |
| 57 | Conv_56 | convolutional | 400 | 564 | 1x14x14x384 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 384 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 24k (0.716%) |
| | | | | | | | MACs per inference: 4M (1.6%) |
| 58 | Clip_57 | neuron | 564 | 403 | 1x14x14x384 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 59 | Conv_58 | convolutional | 403 | 567 | 1x14x14x384 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 384 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 384 |
| | | | | | | | param count: 3k (0.11%) |
| | | | | | | | MACs per inference: 677k (0.225%) |
| 60 | Clip_59 | neuron | 567 | 406 | 1x14x14x384 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 61 | Conv_60 | convolutional | 406 | 570 | 1x14x14x96 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 96 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 36k (1.06%) |
| | | | | | | | MACs per inference: 7M (2.4%) |
| 62 | Conv_61 | convolutional | 570 | 573 | 1x14x14x576 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 576 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 55k (1.6%) |
| | | | | | | | MACs per inference: 10M (3.6%) |
| 63 | Clip_62 | neuron | 573 | 411 | 1x14x14x576 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 64 | Conv_63 | convolutional | 411 | 576 | 1x14x14x576 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 576 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 576 |
| | | | | | | | param count: 5k (0.165%) |
| | | | | | | | MACs per inference: 1M (0.338%) |
| 65 | Clip_64 | neuron | 576 | 414 | 1x14x14x576 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 66 | Conv_65 | convolutional | 414 | 579 | 1x14x14x96 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 96 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 55k (1.59%) |
| | | | | | | | MACs per inference: 10M (3.6%) |
| 67 | Add_66 | elementwise_binary_op | 570 | 417 | 1x14x14x96 | A D G C | operation: sum |
| | | | 579 | | | | MACs per inference: 18k (0.00625%) |
| 68 | Conv_67 | convolutional | 417 | 582 | 1x14x14x576 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 576 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 55k (1.6%) |
| | | | | | | | MACs per inference: 10M (3.6%) |
| 69 | Clip_68 | neuron | 582 | 420 | 1x14x14x576 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 70 | Conv_69 | convolutional | 420 | 585 | 1x14x14x576 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 576 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 576 |
| | | | | | | | param count: 5k (0.165%) |
| | | | | | | | MACs per inference: 1M (0.338%) |
| 71 | Clip_70 | neuron | 585 | 423 | 1x14x14x576 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 72 | Conv_71 | convolutional | 423 | 588 | 1x14x14x96 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 96 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 55k (1.59%) |
| | | | | | | | MACs per inference: 10M (3.6%) |
| 73 | Add_72 | elementwise_binary_op | 417 | 426 | 1x14x14x96 | A D G C | operation: sum |
| | | | 588 | | | | MACs per inference: 18k (0.00625%) |
| 74 | Conv_73 | convolutional | 426 | 591 | 1x14x14x576 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 576 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 55k (1.6%) |
| | | | | | | | MACs per inference: 10M (3.6%) |
| 75 | Clip_74 | neuron | 591 | 429 | 1x14x14x576 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 76 | Conv_75 | convolutional | 429 | 594 | 1x7x7x576 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 2 |
| | | | | | | | stride y: 2 |
| | | | | | | | num filters: 576 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 576 |
| | | | | | | | param count: 5k (0.165%) |
| | | | | | | | MACs per inference: 254k (0.0844%) |
| 77 | Clip_76 | neuron | 594 | 432 | 1x7x7x576 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 78 | Conv_77 | convolutional | 432 | 597 | 1x7x7x160 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 160 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 92k (2.65%) |
| | | | | | | | MACs per inference: 4M (1.5%) |
| 79 | Conv_78 | convolutional | 597 | 600 | 1x7x7x960 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 960 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 154k (4.43%) |
| | | | | | | | MACs per inference: 7M (2.5%) |
| 80 | Clip_79 | neuron | 600 | 437 | 1x7x7x960 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 81 | Conv_80 | convolutional | 437 | 603 | 1x7x7x960 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 960 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 960 |
| | | | | | | | param count: 9k (0.275%) |
| | | | | | | | MACs per inference: 423k (0.141%) |
| 82 | Clip_81 | neuron | 603 | 440 | 1x7x7x960 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 83 | Conv_82 | convolutional | 440 | 606 | 1x7x7x160 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 160 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 153k (4.41%) |
| | | | | | | | MACs per inference: 7M (2.5%) |
| 84 | Add_83 | elementwise_binary_op | 597 | 443 | 1x7x7x160 | A D G C | operation: sum |
| | | | 606 | | | | MACs per inference: 7k (0.0026%) |
| 85 | Conv_84 | convolutional | 443 | 609 | 1x7x7x960 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 960 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 154k (4.43%) |
| | | | | | | | MACs per inference: 7M (2.5%) |
| 86 | Clip_85 | neuron | 609 | 446 | 1x7x7x960 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 87 | Conv_86 | convolutional | 446 | 612 | 1x7x7x960 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 960 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 960 |
| | | | | | | | param count: 9k (0.275%) |
| | | | | | | | MACs per inference: 423k (0.141%) |
| 88 | Clip_87 | neuron | 612 | 449 | 1x7x7x960 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 89 | Conv_88 | convolutional | 449 | 615 | 1x7x7x160 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 160 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 153k (4.41%) |
| | | | | | | | MACs per inference: 7M (2.5%) |
| 90 | Add_89 | elementwise_binary_op | 443 | 452 | 1x7x7x160 | A D G C | operation: sum |
| | | | 615 | | | | MACs per inference: 7k (0.0026%) |
| 91 | Conv_90 | convolutional | 452 | 618 | 1x7x7x960 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 960 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 154k (4.43%) |
| | | | | | | | MACs per inference: 7M (2.5%) |
| 92 | Clip_91 | neuron | 618 | 455 | 1x7x7x960 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 93 | Conv_92 | convolutional | 455 | 621 | 1x7x7x960 | A D G C | padding x: 1 |
| | | | | | | | padding y: 1 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 960 |
| | | | | | | | kernel: 3x3 |
| | | | | | | | groups: 960 |
| | | | | | | | param count: 9k (0.275%) |
| | | | | | | | MACs per inference: 423k (0.141%) |
| 94 | Clip_93 | neuron | 621 | 458 | 1x7x7x960 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 95 | Conv_94 | convolutional | 458 | 624 | 1x7x7x320 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 320 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 307k (8.82%) |
| | | | | | | | MACs per inference: 15M (5%) |
| 96 | Conv_95 | convolutional | 624 | 627 | 1x7x7x1280 | A D G C | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | padding mode: zero |
| | | | | | | | stride x: 1 |
| | | | | | | | stride y: 1 |
| | | | | | | | num filters: 1280 |
| | | | | | | | kernel: 1x1 |
| | | | | | | | param count: 410k (11.8%) |
| | | | | | | | MACs per inference: 20M (6.67%) |
| 97 | Clip_96 | neuron | 627 | 463 | 1x7x7x1280 | A D G C | a: 0 |
| | | | | | | | b: 0 |
| | | | | | | | min_clamp: 0 |
| | | | | | | | max_clamp: 6 |
| | | | | | | | func: relu_min_max |
| 98 | GlobalAveragePool_97 | pooling | 463 | 464 | 1x1x1x1280 | A D G C | pool size x: 7 |
| | | | | | | | pool size y: 7 |
| | | | | | | | stride x: 7 |
| | | | | | | | stride y: 7 |
| | | | | | | | padding x: 0 |
| | | | | | | | padding y: 0 |
| | | | | | | | pool_type: POOL_AVG |
| | | | | | | | MACs per inference: 62k (0.0208%) |
| 99 | 464.ncs | permute | 464 | 464.ncs | 1x1280x1x1 | A D G C | permute_order: [0, 3, 1, 2] |
| 100 | Gemm_104 | fully_connected | 464.ncs | output | 1x1000 | A D G C | param count: 1M (36.7%) |
| | | | | | | | MACs per inference: 1M (0.425%) |
-----------------------------------------------------------------------------------------------------------------------------------------
Note: The supported runtimes column assumes a processor target of Snapdragon 835 (8998)
Key : A:AIP
D:DSP
G:GPU
C:CPU
Total parameters: 3487816 (13 MB assuming single precision float)
Total MACs per inference: 301M (100%)
Converter command: snpe-onnx-to-dlc adjust_nms_features_dims=False align_matmul_ranks=True copyright_file=None custom_op_config_paths=None debug=-1 disable_batchnorm_folding=False disable_chaining_eltwise_ops=False dry_run=None dumpIR=False dump_inferred_model=False dump_value_info=False enable_strict_validation=False extract_color_transform=False force_prune_cast_ops=True handle_gather_negative_indices=False inject_cast_for_gather=False input_dim=[['input', '1,3,224,224']] input_dtype=[] input_encoding=[] input_layout=[] input_type=[['input', 'image']] keep_disconnected_nodes=False keep_quant_nodes=False match_caffe_ssd_to_tf=False model_version=None no_simplification=False out_names=['output'] perform_axes_to_spatial_first_order=True prepare_inputs_as_params=True preprocess_lstm_ops=False preprocess_roi_pool_inputs=False quantization_overrides= squash_box_decoder=False unroll_lstm_time_steps=False use_convert_quantization_nodes=True validation_target=[]
Quantizer command: N/A
DLC created with converter version: 1.61.0.3358
Layers used by DLC: CONVOLUTIONAL, DATA, ELEMENTWISE_BINARY_OP_SUM, FULLY_CONNECTED, NEURON_RELU_MIN_MAX, PERMUTE, POOLING
Est. Steady-State Memory Needed to Run: 164.3 MiB
-----------------------------------------------------------------------------------------------------------------------------------------

View File

@@ -0,0 +1,131 @@
Log File Created: Tue Mar 18 01:33:12 2025
Time Scale: 1e-06
Epoch Timestamp: 1742286792883569 Steady Clock Timestamp: 75586845756
Software library version: 1.61.0.3358
Dnn Runtime Load/Deserialize/Create/De-Init Statistics:
--------------------------------------------------
Load: 333 us
Deserialize: 32452 us
Create: 143084 us
Init: 178071 us
De-Init: 16710 us
Create Network(s): 86850 us
RPC Init Time: 43213 us
Snpe Accelerator Init Time: 42154 us
Accelerator Init Time: 39189 us
Average SNPE Statistics:
------------------------------
Total Inference Time: 11868 us
Forward Propagate Time: 11816 us
RPC Execute Time: 9810 us
Snpe Accelerator Time: 9129 us
Accelerator Time: 8701 us
Misc Accelerator Time: 10 us
Layer Times:
---------------
0: 42 us : DSP
1: 0 us : DSP
2: 254 us : DSP
3: 0 us : DSP
4: 153 us : DSP
5: 295 us : DSP
6: 0 us : DSP
7: 287 us : DSP
8: 0 us : DSP
9: 162 us : DSP
10: 210 us : DSP
11: 0 us : DSP
12: 138 us : DSP
13: 0 us : DSP
14: 176 us : DSP
15: 293 us : DSP
16: 60 us : DSP
17: 0 us : DSP
18: 157 us : DSP
19: 0 us : DSP
20: 112 us : DSP
21: 134 us : DSP
22: 0 us : DSP
23: 81 us : DSP
24: 0 us : DSP
25: 104 us : DSP
26: 130 us : DSP
27: 37 us : DSP
28: 0 us : DSP
29: 81 us : DSP
30: 0 us : DSP
31: 87 us : DSP
32: 124 us : DSP
33: 30 us : DSP
34: 0 us : DSP
35: 87 us : DSP
36: 0 us : DSP
37: 63 us : DSP
38: 74 us : DSP
39: 0 us : DSP
40: 102 us : DSP
41: 0 us : DSP
42: 82 us : DSP
43: 95 us : DSP
44: 29 us : DSP
45: 0 us : DSP
46: 112 us : DSP
47: 0 us : DSP
48: 88 us : DSP
49: 96 us : DSP
50: 25 us : DSP
51: 0 us : DSP
52: 103 us : DSP
53: 0 us : DSP
54: 80 us : DSP
55: 100 us : DSP
56: 26 us : DSP
57: 0 us : DSP
58: 102 us : DSP
59: 0 us : DSP
60: 85 us : DSP
61: 129 us : DSP
62: 0 us : DSP
63: 155 us : DSP
64: 0 us : DSP
65: 113 us : DSP
66: 194 us : DSP
67: 34 us : DSP
68: 0 us : DSP
69: 157 us : DSP
70: 0 us : DSP
71: 120 us : DSP
72: 198 us : DSP
73: 34 us : DSP
74: 0 us : DSP
75: 155 us : DSP
76: 0 us : DSP
77: 101 us : DSP
78: 121 us : DSP
79: 0 us : DSP
80: 256 us : DSP
81: 0 us : DSP
82: 134 us : DSP
83: 159 us : DSP
84: 31 us : DSP
85: 0 us : DSP
86: 199 us : DSP
87: 0 us : DSP
88: 142 us : DSP
89: 152 us : DSP
90: 26 us : DSP
91: 0 us : DSP
92: 202 us : DSP
93: 0 us : DSP
94: 143 us : DSP
95: 278 us : DSP
96: 0 us : DSP
97: 316 us : DSP
98: 40 us : DSP
99: 12 us : DSP
100: 199 us : DSP

View File

@@ -0,0 +1,21 @@
di = open("dlc_info_2").read().split("\n")
layers = {}
for l in di:
if not l.startswith("| "): continue
if l.startswith("| |"): continue
ll = [x.strip() for x in l.split("|")]
if ll[1] == "Id": continue
layers[int(ll[1])] = (ll[2], ll[6])
hp = open("high_perf_2").read().split("Layer Times:")[1].strip().split("\n")[2:]
sl = 1
tms = 0
for l in hp:
kk, tm, _ = l.split(" ", 2)
tm = int(tm)
lnum = int(kk.strip(":"))
if int(tm) != 0:
print(f"{sl:2d} {tm:4d} us {layers[lnum]}")
tms += tm
sl += 1
print(f"total time, {tms/1000:.2f} ms")

View File

@@ -728,9 +728,12 @@ def get_onnx_ops():
y_scale, y_zero_point = _prepare_quantize(x, y_scale, y_zero_point, axis, block_size)
if out_dtype == dtypes.uchar:
# this appears to work in practice, at least for uchar out_dtype. it folds with the quantize stuff
return _clamp_cast((x / y_scale + 0.4999999 + y_zero_point).int(), out_dtype).contiguous()
ret = _clamp_cast((x / y_scale + 0.4999999 + y_zero_point).int(), out_dtype)
else:
return _clamp_cast(((x / y_scale).round() + y_zero_point), out_dtype).contiguous()
ret = _clamp_cast(((x / y_scale).round() + y_zero_point), out_dtype)
# you need both NHWC=1 DONT_GROUP_REDUCES=1 for this to work
if getenv("NHWC") and len(ret.shape) == 4: return ret.permute(0,2,3,1).contiguous().permute(0,3,1,2)
return ret.contiguous()
def DynamicQuantizeLinear(x: Tensor):
# only support uint8

View File

@@ -23,6 +23,8 @@ if __name__ == "__main__":
p: ProgramSpec = ei.prg.p
k = Kernel(p.ast, Device["DSP"].renderer)
if not getenv("NOOPT"):
# only NCHW
"""
if knum in [6,7,9,11]:
k.apply_opt(Opt(OptOps.PADTO, 1, 128))
k.apply_opt(Opt(OptOps.UPCAST, 1, 128))
@@ -48,6 +50,15 @@ if __name__ == "__main__":
k.apply_opt(Opt(OptOps.UPCAST, 1, 128))
else:
k.hand_coded_optimizations()
"""
if knum == 3:
k.apply_opt(Opt(OptOps.UNROLL, 0, 0))
k.apply_opt(Opt(OptOps.UPCAST, 1, 16))
k.apply_opt(Opt(OptOps.UPCAST, 0, 128//16))
#k.apply_opt(Opt(OptOps.UPCAST, 0, 8))
pass
else:
k.hand_coded_optimizations()
#if knum in [5]: k.apply_opt(Opt(OptOps.UPCAST, 1, 2))
p2 = k.to_program()
new_ei = replace(ei, prg=CompiledRunner(p2), bufs=[Buffer("DSP", 1024+b.size*2, b.dtype).view(b.size, b.dtype, 512) for b in ei.bufs])