Add stress test

Return dynamic model if specified when downloading from the tank
In get_iree_runtime_config get the specific device instead of the default
2026-01-11 14:58:11 -05:00 · 2022-12-12 17:47:49 -08:00 · 2022-12-12 17:47:49 -08:00 · 2022-12-12 17:47:49 -08:00 · 2022-12-12 17:47:46 -08:00
5 changed files with 337 additions and 64 deletions
--- a/shark/iree_utils/_common.py
+++ b/shark/iree_utils/_common.py
@@ -38,56 +38,11 @@ def run_cmd(cmd):


 def iree_device_map(device):
-
-    from iree.runtime import get_driver, get_device
-
-    def get_all_devices(driver_name):
-        driver = get_driver(driver_name)
-        device_list_src = driver.query_available_devices()
-        device_list = []
-        for device_dict in device_list_src:
-            device_list.append(f"{driver_name}://{device_dict['path']}")
-        device_list.sort()
-        return device_list
-
-    # only supported for vulkan as of now
-    if "vulkan://" in device:
-        device_list = get_all_devices("vulkan")
-        _, d_index = device.split("://")
-        matched_index = None
-        match_with_index = False
-        if 0 <= len(d_index) <= 2:
-            try:
-                d_index = int(d_index)
-            except:
-                print(
-                    f"{d_index} is not valid index or uri. Will choose device 0"
-                )
-                d_index = 0
-            match_with_index = True
-
-        if len(device_list) > 1:
-            print("List of available vulkan devices:")
-            for i, d in enumerate(device_list):
-                print(f"vulkan://{i} => {d}")
-                if (match_with_index and d_index == i) or (
-                    not match_with_index and d == device
-                ):
-                    matched_index = i
-            print(
-                f"Choosing device vulkan://{matched_index}\nTo choose another device please specify device index or uri accordingly."
-            )
-            return get_device(device_list[matched_index])
-        elif len(device_list) == 1:
-            print(f"Found one vulkan device: {device_list[0]}. Using this.")
-            return get_device(device_list[0])
-        else:
-            print(
-                f"No device found! returning device corresponding to driver name: vulkan"
-            )
-            return _IREE_DEVICE_MAP["vulkan"]
+    uri_parts = device.split("://", 2)
+    if len(uri_parts) == 1:
+        return _IREE_DEVICE_MAP[uri_parts[0]]
    else:
-        return _IREE_DEVICE_MAP[device]
+        return f"{_IREE_DEVICE_MAP[uri_parts[0]]}://{uri_parts[1]}"


 def get_supported_device_list():
@@ -119,6 +74,7 @@ _IREE_TARGET_MAP = {
    "intel-gpu": "opencl-spirv",
 }

+
 # Finds whether the required drivers are installed for the given device.
 def check_device_drivers(device):
    """Checks necessary drivers present for gpu and vulkan devices"""
--- a/shark/iree_utils/compile_utils.py
+++ b/shark/iree_utils/compile_utils.py
@@ -20,6 +20,7 @@ import numpy as np
 import os
 import re

+
 # Get the iree-compile arguments given device.
 def get_iree_device_args(device, extra_args=[]):
    if "://" in device:
@@ -366,9 +367,5 @@ def get_results(compiled_vm, input, config, frontend="torch"):

 def get_iree_runtime_config(device):
    device = iree_device_map(device)
-    if type(device) == ireert.HalDevice:
-        config = ireert.Config(device=device)
-    else:
-        driver_name = device.split("://")[0] if "://" in device else device
-        config = ireert.Config(driver_name=driver_name)
+    config = ireert.Config(device=ireert.get_device(device))
    return config
--- a/shark/shark_downloader.py
+++ b/shark/shark_downloader.py
@@ -70,7 +70,6 @@ input_type_to_np_dtype = {
    "int8": np.int8,
 }

-
 # Save the model in the home local so it needn't be fetched everytime in the CI.
 home = str(Path.home())
 alt_path = os.path.join(os.path.dirname(__file__), "../gen_shark_tank/")
@@ -93,6 +92,7 @@ else:
        f"shark_tank local cache is located at {WORKDIR} . You may change this by setting the --local_tank_cache= flag"
    )

+
 # Checks whether the directory and files exists.
 def check_dir_exists(model_name, frontend="torch", dynamic=""):
    model_dir = os.path.join(WORKDIR, model_name)
@@ -174,16 +174,9 @@ def download_model(
                    )

    model_dir = os.path.join(WORKDIR, model_dir_name)
-    suffix = (
-        "_" + frontend + ".mlir"
-        if tuned is None
-        else "_" + frontend + "_" + tuned + ".mlir"
-    )
+    tuned_str = "" if tuned is None else "_" + tuned
+    suffix = f"{dyn_str}_{frontend}{tuned_str}.mlir"
    filename = os.path.join(model_dir, model_name + suffix)
-    if not os.path.isfile(filename):
-        filename = os.path.join(
-            model_dir, model_name + "_" + frontend + ".mlir"
-        )

    with open(filename, mode="rb") as f:
        mlir_file = f.read()
--- a/shark/stress_test.py
+++ b/shark/stress_test.py
@@ -0,0 +1,296 @@
+# Copyright 2022 The Nod Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from iree.runtime import query_available_drivers, get_driver
+from shark.shark_downloader import download_model
+from shark.shark_inference import SharkInference
+from typing import List, Optional, Tuple
+import numpy as np
+import argparse
+from shark.iree_utils._common import _IREE_DEVICE_MAP
+import multiprocessing
+from shark.shark_runner import supported_dialects
+import logging
+from concurrent.futures import ProcessPoolExecutor
+from concurrent.futures.thread import ThreadPoolExecutor
+import time
+import numpy as np
+
+IREE_TO_SHARK_DRIVER_MAP = {v: k for k, v in _IREE_DEVICE_MAP.items()}
+
+
+def stress_test_compiled_model(
+    shark_module_path: str,
+    function_name: str,
+    device: str,
+    inputs: List[np.ndarray],
+    golden_out: List[np.ndarray],
+    batch_size: int,
+    max_iterations: int,
+    max_duration_seconds: float,
+    inference_timeout_seconds: float,
+    tolerance_nulp: int,
+    stress_test_index: int,
+):
+    logging.info(
+        f"Running stress test {stress_test_index} on device {device}."
+    )
+    shark_module = SharkInference(
+        mlir_module=bytes(), function_name=function_name, device=device
+    )
+    shark_module.load_module(shark_module_path)
+    input_batches = [np.repeat(arr, batch_size, axis=0) for arr in inputs]
+    golden_output_batches = np.repeat(golden_out, batch_size, axis=0)
+    report_interval_seconds = 10
+    start_time = time.time()
+    previous_report_time = start_time
+    executor = ThreadPoolExecutor(1)
+    first_iteration_output = None
+    for i in range(max_iterations):
+        inference_task = executor.submit(shark_module.forward, input_batches)
+        output = inference_task.result(inference_timeout_seconds)
+        if first_iteration_output is None:
+            np.testing.assert_array_almost_equal_nulp(
+                golden_output_batches, output, nulp=tolerance_nulp
+            )
+            first_iteration_output = output
+        else:
+            np.testing.assert_array_equal(output, first_iteration_output)
+        current_time = time.time()
+        if report_interval_seconds < current_time - previous_report_time:
+            logging.info(
+                f"Stress test {stress_test_index} on device "
+                f"{device} at iteration {i+1}"
+            )
+            previous_report_time = current_time
+        if max_duration_seconds < current_time - start_time:
+            return
+    logging.info(f"Stress test {stress_test_index} on device {device} done.")
+
+
+def get_device_type(device_name: str):
+    return device_name.split("://", 1)[0]
+
+
+def get_device_types(device_names: str):
+    return [get_device_type(device_name) for device_name in device_names]
+
+
+def query_devices(device_types: Optional[List[str]] = None) -> List[str]:
+    devices = []
+    if device_types is None:
+        device_types = [
+            IREE_TO_SHARK_DRIVER_MAP[name]
+            for name in query_available_drivers()
+            if name in IREE_TO_SHARK_DRIVER_MAP
+        ]
+    for device_type in device_types:
+        driver = get_driver(_IREE_DEVICE_MAP[device_type])
+        device_infos = driver.query_available_devices()
+        for device_info in device_infos:
+            uri_path = (
+                device_info["path"]
+                if device_info["path"] != ""
+                else str(device_info["device_id"])
+            )
+            device_uri = f"{device_type}://{uri_path}"
+            devices.append(device_uri)
+    return devices
+
+
+def compile_stress_test_module(
+    device_types: List[str], mlir_model: str, func_name: str, mlir_dialect: str
+) -> List[str]:
+    shark_module_paths = []
+    for device_type in device_types:
+        logging.info(
+            f"Compiling stress test model for device type {device_type}."
+        )
+        shark_module = SharkInference(
+            mlir_model,
+            func_name,
+            mlir_dialect=mlir_dialect,
+            device=device_type,
+        )
+        shark_module_paths.append(shark_module.save_module())
+    return shark_module_paths
+
+
+def stress_test(
+    model_name: str,
+    dynamic_model: bool = False,
+    device_types: Optional[List[str]] = None,
+    device_names: Optional[List[str]] = None,
+    batch_size: int = 1,
+    max_iterations: int = 10**7,
+    max_duration_seconds: float = 3600,
+    inference_timeout_seconds: float = 60,
+    mlir_dialect: str = "linalg",
+    frontend: str = "torch",
+    oversubscription_factor: int = 1,
+    tolerance_nulp: int = 50000,
+):
+    logging.info(f"Downloading stress test model {model_name}.")
+    mlir_model, func_name, inputs, golden_out = download_model(
+        model_name=model_name, dynamic=dynamic_model, frontend=frontend
+    )
+
+    if device_names is None or device_types is not None:
+        device_names = [] if device_names is None else device_names
+        with ProcessPoolExecutor() as executor:
+            device_names.extend(
+                executor.submit(query_devices, device_types).result()
+            )
+
+    device_types_set = list(set(get_device_types(device_names)))
+    shark_module_paths_set = compile_stress_test_module(
+        device_types_set, mlir_model, func_name, mlir_dialect
+    )
+    device_type_shark_module_path_map = {
+        device_type: module_path
+        for device_type, module_path in zip(
+            device_types_set, shark_module_paths_set
+        )
+    }
+    device_name_shark_module_path_map = {
+        device_name: device_type_shark_module_path_map[
+            get_device_type(device_name)
+        ]
+        for device_name in device_names
+    }
+
+    # This needs to run in a spearate process, because it uses the drvier chache
+    # in IREE and a subsequent call to `iree.runtime.SystemContext.add_vm_module`
+    # in a forked process will hang.
+    with multiprocessing.Pool(
+        len(device_name_shark_module_path_map) * oversubscription_factor
+    ) as process_pool:
+        process_pool.starmap(
+            stress_test_compiled_model,
+            [
+                (
+                    module_path,
+                    func_name,
+                    device_name,
+                    inputs,
+                    golden_out,
+                    batch_size,
+                    max_iterations,
+                    max_duration_seconds,
+                    inference_timeout_seconds,
+                    tolerance_nulp,
+                    stress_test_index,
+                )
+                for stress_test_index, (device_name, module_path) in enumerate(
+                    list(device_name_shark_module_path_map.items())
+                    * oversubscription_factor
+                )
+            ],
+        )
+
+
+if __name__ == "__main__":
+    logging.basicConfig(encoding="utf-8", level=logging.INFO)
+    parser = argparse.ArgumentParser(
+        description="Downloads, compiles and runs a model from the tank to stress test the system."
+    )
+    parser.add_argument(
+        "--model", type=str, help="Model name in the tank.", default="alexnet"
+    )
+    parser.add_argument(
+        "--dynamic",
+        help="Use dynamic version of the model.",
+        action="store_true",
+        default=False,
+    )
+    parser.add_argument(
+        "--frontend", type=str, help="Frontend of the model.", default="torch"
+    )
+    parser.add_argument(
+        "--mlir-dialect",
+        type=str,
+        help="MLIR dialect of the model.",
+        default="linalg",
+        choices=supported_dialects,
+    )
+    parser.add_argument(
+        "--device-types",
+        type=str,
+        nargs="*",
+        choices=_IREE_DEVICE_MAP.keys(),
+        help="Runs the stress test on all devices with that type. "
+        "If absent and no deveices are specified "
+        "will run against all available devices.",
+    )
+    parser.add_argument(
+        "--devices",
+        type=str,
+        nargs="*",
+        help="List of devices to run the stress test on. "
+        "If device-types is specified will run against the union of the two.",
+    )
+    parser.add_argument(
+        "--batch-size",
+        type=int,
+        help="Number of inputs to feed into the model",
+        default=1,
+    )
+    parser.add_argument(
+        "--oversubscription",
+        type=int,
+        help="Oversubscrption factor. Each device will execute the model simultaneously "
+        "this many number of times.",
+        default=1,
+    )
+    parser.add_argument(
+        "--max-iterations",
+        type=int,
+        help="Maximum number of iterations to run the stress test per device.",
+        default=10**7,
+    )
+    parser.add_argument(
+        "--max-duration",
+        type=float,
+        help="Maximum number of seconds to run the stress test.",
+        default=3600,
+    )
+    parser.add_argument(
+        "--inference-timeout",
+        type=float,
+        help="Timeout in seconds for a single model inference operation.",
+        default=60,
+    )
+    parser.add_argument(
+        "--tolerance-nulp",
+        type=int,
+        help="The maximum number of unit in the last place for tolerance "
+        "when verifing results with the golden reference output.",
+        default=50000,
+    )
+
+    args = parser.parse_known_args()[0]
+    stress_test(
+        model_name=args.model,
+        dynamic_model=args.dynamic,
+        frontend=args.frontend,
+        mlir_dialect=args.mlir_dialect,
+        device_types=args.device_types,
+        device_names=args.devices,
+        batch_size=args.batch_size,
+        oversubscription_factor=args.oversubscription,
+        max_iterations=args.max_iterations,
+        max_duration_seconds=args.max_duration,
+        inference_timeout_seconds=args.inference_timeout,
+        tolerance_nulp=args.tolerance_nulp,
+    )
--- a/shark/tests/test_stress_test.py
+++ b/shark/tests/test_stress_test.py
@@ -0,0 +1,31 @@
+# Copyright 2022 The Nod Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import subprocess
+import sys
+import importlib.util
+
+
+def test_stress_test():
+    subprocess.check_call(
+        [
+            sys.executable,
+            importlib.util.find_spec("shark.stress_test").origin,
+            "--model=squeezenet1_0",
+            "--devices",
+            "cpu",
+            "--max-iterations=1",
+        ]
+    )
Author	SHA1	Message	Date
Boian Petkantchin	dd2a5795a2	Add stress test	2022-12-12 17:47:49 -08:00
Boian Petkantchin	47d1f047b9	Return dynamic model if specified when downloading from the tank	2022-12-12 17:47:49 -08:00
Boian Petkantchin	7639b62d85	In get_iree_runtime_config get the specific device instead of the default	2022-12-12 17:47:49 -08:00
Boian Petkantchin	4c00e54f89	Simplify iree_device_map	2022-12-12 17:47:46 -08:00