Added a dispatch benchmarking tool (#441)

To produce benchmarks of individual dispatches, you can add --dispatch_benchmarks=All --dispatch_benchmarks_dir=<output_dir> to your command line argument. Co-authored-by: Elias Joseph <elias@nod-labs.com>
2026-04-03 03:00:17 -04:00 · 2022-10-28 14:31:03 -07:00
parent 77c9a2c5ea
commit 7f37599a60
6 changed files with 221 additions and 2 deletions
--- a/README.md
+++ b/README.md
@@ -121,6 +121,33 @@ pytest tank/test_models.py -k "MiniLM"
 <details>
  <summary>Testing and Benchmarks</summary>

+## Benchmarking Dispatches
+
+To produce benchmarks of individual dispatches, you can add `--dispatch_benchmarks=All --dispatch_benchmarks_dir=<output_dir>` to your command line argument.  
+If you only want to compile specific dispatches, you can specify them with a space seperated string instead of `"All"`.  E.G. `--dispatch_benchmarks="0 1 2 10"`
+
+if you want to instead incorporate this into a python script, you can pass the `dispatch_benchmarks` and `dispatch_benchmarks_dir` commands when initializing `SharkInference`, and the benchmarks will be generated when compiled.  E.G:
+
+```
+shark_module = SharkInference(
+        mlir_model,
+        func_name,
+        device=args.device,
+        mlir_dialect="tm_tensor",
+        dispatch_benchmarks="all",
+        dispatch_benchmarks_dir="results"
+    )
+```
+
+Output will include:
+- Inside the specified directory, there will be a directory for each dispatch (there will be mlir files for all dispatches, but only compiled binaries and benchmark data for the specified dispatches)
+- An .mlir file containing the dispatch benchmark 
+- A compiled .vmfb file containing the dispatch benchmark
+- An .mlir file containing just the hal executable
+- A compiled .vmfb file of the hal executable
+- A .txt file containing benchmark output
+
+
 See tank/README.md for instructions on how to run model tests and benchmarks from the SHARK tank.

 </details>
@@ -175,7 +202,6 @@ result = shark_module.forward((arg0, arg1))
 ```
 </details>

-
 ## Supported and Validated Models

 SHARK is maintained to support the latest innovations in ML Models: 
--- a/shark/examples/shark_inference/resnet50_script.py
+++ b/shark/examples/shark_inference/resnet50_script.py
@@ -69,7 +69,7 @@ labels = load_labels()
 mlir_model, func_name, inputs, golden_out = download_torch_model("resnet50")

 shark_module = SharkInference(mlir_model, func_name, mlir_dialect="linalg")
-# shark_module.compile()
+shark_module.compile()
 path = shark_module.save_module()
 shark_module.load_module(path)
 result = shark_module.forward((img.detach().numpy(),))
--- a/shark/iree_utils/benchmark_utils.py
+++ b/shark/iree_utils/benchmark_utils.py
@@ -78,6 +78,31 @@ def build_benchmark_args(
    return benchmark_cl


+def build_benchmark_args_non_tensor_input(
+    input_file: str,
+    device: str,
+    inputs: tuple,
+    mlir_dialect: str,
+    function_name: str,
+):
+    """
+    Inputs: input_file leading to vmfb, input_tensor to function, target device,
+    and whether it is training or not.
+    Outputs: string that execute benchmark-module on target model.
+    """
+    path = benchmark_module.__path__[0]
+    benchmarker_path = os.path.join(path, "..", "..", "iree-benchmark-module")
+    benchmark_cl = [benchmarker_path, f"--module_file={input_file}"]
+    # TODO: The function named can be passed as one of the args.
+    benchmark_cl.append(f"--entry_function={function_name}")
+    benchmark_cl.append(f"--device={IREE_DEVICE_MAP[device]}")
+    for input in inputs:
+        benchmark_cl.append(f"--function_input={input}")
+    time_extractor = "| awk 'END{{print $2 $3}}'"
+    benchmark_cl.append(time_extractor)
+    return benchmark_cl
+
+
 def run_benchmark_module(benchmark_cl):
    """
    Run benchmark command, extract result and return iteration/seconds.
--- a/shark/iree_utils/compile_utils.py
+++ b/shark/iree_utils/compile_utils.py
@@ -14,8 +14,10 @@
 import iree.runtime as ireert
 import iree.compiler as ireec
 from shark.iree_utils._common import IREE_DEVICE_MAP, IREE_TARGET_MAP
+from shark.iree_utils.benchmark_utils import *
 import numpy as np
 import os
+import re

 # Get the iree-compile arguments given device.
 def get_iree_device_args(device, extra_args=[]):
@@ -62,6 +64,125 @@ def get_iree_common_args():
    ]


+def create_dispatch_dirs(bench_dir, device):
+    bench_dir_path = bench_dir.split("/")
+    bench_dir_path[-1] = "temp_" + bench_dir_path[-1]
+    tmp_bench_dir = "/".join(bench_dir_path)
+    for f_ in os.listdir(bench_dir):
+        if os.path.isfile(f"{bench_dir}/{f_}"):
+            dir_name = re.sub("\.\S*$", "", f_)
+            if os.path.exists(f"{bench_dir}/{dir_name}"):
+                os.system(f"rm -rf {bench_dir}/{dir_name}")
+            os.system(f"mkdir {bench_dir}/{dir_name}")
+            os.system(f"mv {bench_dir}/{f_} {bench_dir}/{dir_name}/{f_}")
+    for f_ in os.listdir(tmp_bench_dir):
+        if os.path.isfile(f"{tmp_bench_dir}/{f_}"):
+            dir_name = ""
+            for d_ in os.listdir(bench_dir):
+                if re.search(f"{d_}(?=\D)", f_):
+                    dir_name = d_
+            if dir_name != "":
+                os.system(
+                    f"mv {tmp_bench_dir}/{f_} {bench_dir}/{dir_name}/{dir_name}_benchmark.mlir"
+                )
+
+
+def compile_benchmark_dirs(bench_dir, device, dispatch_benchmarks):
+    dispatch_list = []
+    all_dispatches = False
+
+    if dispatch_benchmarks.lower().strip() == "all":
+        all_dispatches = True
+    else:
+        try:
+            dispatch_list = [
+                int(dispatch_index)
+                for dispatch_index in dispatch_benchmarks.split(" ")
+            ]
+        except:
+            print("ERROR: Invalid dispatch benchmarks")
+            return None
+    for d_ in os.listdir(bench_dir):
+        in_dispatches = False
+        for dispatch in dispatch_list:
+            if str(dispatch) in d_:
+                in_dispatches = True
+        if all_dispatches or in_dispatches:
+            for f_ in os.listdir(f"{bench_dir}/{d_}"):
+
+                if "benchmark.mlir" in f_:
+                    dispatch_file = open(f"{bench_dir}/{d_}/{f_}", "r")
+                    module = dispatch_file.read()
+                    dispatch_file.close()
+
+                    flatbuffer_blob = ireec.compile_str(
+                        module, target_backends=[IREE_TARGET_MAP[device]]
+                    )
+
+                    vmfb_file = open(
+                        f"{bench_dir}/{d_}/{d_}_benchmark.vmfb", "wb"
+                    )
+                    vmfb_file.write(flatbuffer_blob)
+                    vmfb_file.close()
+
+                    config = ireert.Config(IREE_DEVICE_MAP[device])
+                    vm_module = ireert.VmModule.from_flatbuffer(
+                        config.vm_instance, flatbuffer_blob
+                    )
+
+                    benchmark_cl = build_benchmark_args_non_tensor_input(
+                        input_file=f"{bench_dir}/{d_}/{d_}_benchmark.vmfb",
+                        device=device,
+                        inputs=(0,),
+                        mlir_dialect="linalg",
+                        function_name=vm_module.function_names[0],
+                    )
+
+                    benchmark_bash = open(
+                        f"{bench_dir}/{d_}/{d_}_benchmark.sh", "w+"
+                    )
+                    benchmark_bash.write("#!/bin/bash\n")
+                    benchmark_bash.write(" ".join(benchmark_cl))
+                    benchmark_bash.close()
+
+                    benchmark_data = run_benchmark_module(benchmark_cl)
+
+                    benchmark_file = open(
+                        f"{bench_dir}/{d_}/{d_}_data.txt", "w+"
+                    )
+                    benchmark_file.write(f"DISPATCH: {d_}\n")
+                    benchmark_file.write(str(benchmark_data) + "\n")
+                    benchmark_file.write(
+                        "SHARK BENCHMARK RESULT: "
+                        + str(1 / (benchmark_data * 0.001))
+                        + "\n"
+                    )
+                    benchmark_file.close()
+
+                elif ".mlir" in f_ and "benchmark" not in f_:
+                    dispatch_file = open(f"{bench_dir}/{d_}/{f_}", "r")
+                    module = dispatch_file.read()
+                    dispatch_file.close()
+
+                    module = re.sub(
+                        "hal.executable private",
+                        "hal.executable public",
+                        module,
+                    )
+
+                    flatbuffer_blob = ireec.compile_str(
+                        module,
+                        target_backends=[IREE_TARGET_MAP[device]],
+                        extra_args=["--compile-mode=hal-executable"],
+                    )
+
+                    spirv_file = open(
+                        f"{bench_dir}/{d_}/{d_}_spirv.vmfb", "wb"
+                    )
+                    spirv_file.write(flatbuffer_blob)
+                    spirv_file.close()
+
+
 def compile_module_to_flatbuffer(
    module, device, frontend, func_name, model_config_path, extra_args
 ):
--- a/shark/parser.py
+++ b/shark/parser.py
@@ -93,4 +93,16 @@ parser.add_argument(
    help="Specify where to save downloaded shark_tank artifacts. If this is not set, the default is ~/.local/shark_tank/.",
 )

+parser.add_argument(
+    "--dispatch_benchmarks",
+    default=None,
+    help='dispatches to return benchamrk data on.  use "All" for all, and None for none.',
+)
+
+parser.add_argument(
+    "--dispatch_benchmarks_dir",
+    default="temp_dispatch_benchmarks",
+    help='directory where you want to store dispatch data generated with "--dispatch_benchmarks"',
+)
+
 shark_args, unknown = parser.parse_known_args()
--- a/shark/shark_inference.py
+++ b/shark/shark_inference.py
@@ -12,6 +12,8 @@
 from shark.iree_utils.compile_utils import (
    export_iree_module_to_vmfb,
    load_flatbuffer,
+    create_dispatch_dirs,
+    compile_benchmark_dirs,
 )
 import os
 from shark.shark_runner import SharkRunner
@@ -68,17 +70,41 @@ class SharkInference:
        device: str = "none",
        mlir_dialect: str = "linalg",
        is_benchmark: bool = False,
+        dispatch_benchmark: str = None,
+        dispatch_benchmark_dir: str = "temp_dispatch_benchmarks",
    ):
        self.mlir_module = mlir_module
        self.function_name = function_name
        self.device = shark_args.device if device == "none" else device
        self.mlir_dialect = mlir_dialect
        self.is_benchmark = is_benchmark
+        self.dispatch_benchmarks = (
+            shark_args.dispatch_benchmarks
+            if dispatch_benchmark is None
+            else dispatch_benchmark
+        )
+        self.dispatch_benchmarks_dir = (
+            shark_args.dispatch_benchmarks_dir
+            if dispatch_benchmark_dir == "temp_dispatch_benchmarks"
+            else dispatch_benchmark_dir
+        )

        self.shark_runner = None

    def compile(self, extra_args=[]):

+        if self.dispatch_benchmarks is not None:
+            extra_args.append(
+                f"--iree-hal-dump-executable-sources-to={self.dispatch_benchmarks_dir}"
+            )
+            temp_dir = self.dispatch_benchmarks_dir.split("/")
+            temp_dir[-1] = "temp_" + temp_dir[-1]
+            temp_dir = "/".join(temp_dir)
+            self.temp_dispatch_benchmarks_dir = temp_dir
+            extra_args.append(
+                f"--iree-hal-dump-executable-benchmarks-to={self.temp_dispatch_benchmarks_dir}"
+            )
+
        if self.is_benchmark == True:
            from shark.shark_benchmark_runner import SharkBenchmarkRunner

@@ -99,6 +125,15 @@ class SharkInference:
                extra_args=extra_args,
            )

+        if self.dispatch_benchmarks is not None:
+            create_dispatch_dirs(self.dispatch_benchmarks_dir, self.device)
+            compile_benchmark_dirs(
+                self.dispatch_benchmarks_dir,
+                self.device,
+                self.dispatch_benchmarks,
+            )
+            os.system(f"rm -rf {self.temp_dispatch_benchmarks_dir}")
+
    # inputs are considered to be tuple of np.array.
    def forward(self, inputs: tuple):
        return self.shark_runner.run(inputs)