mirror of
https://github.com/nod-ai/SHARK-Studio.git
synced 2026-01-11 14:58:11 -05:00
Compare commits
4 Commits
debug
...
boian-stre
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dd2a5795a2 | ||
|
|
47d1f047b9 | ||
|
|
7639b62d85 | ||
|
|
4c00e54f89 |
@@ -38,56 +38,11 @@ def run_cmd(cmd):
|
||||
|
||||
|
||||
def iree_device_map(device):
|
||||
|
||||
from iree.runtime import get_driver, get_device
|
||||
|
||||
def get_all_devices(driver_name):
|
||||
driver = get_driver(driver_name)
|
||||
device_list_src = driver.query_available_devices()
|
||||
device_list = []
|
||||
for device_dict in device_list_src:
|
||||
device_list.append(f"{driver_name}://{device_dict['path']}")
|
||||
device_list.sort()
|
||||
return device_list
|
||||
|
||||
# only supported for vulkan as of now
|
||||
if "vulkan://" in device:
|
||||
device_list = get_all_devices("vulkan")
|
||||
_, d_index = device.split("://")
|
||||
matched_index = None
|
||||
match_with_index = False
|
||||
if 0 <= len(d_index) <= 2:
|
||||
try:
|
||||
d_index = int(d_index)
|
||||
except:
|
||||
print(
|
||||
f"{d_index} is not valid index or uri. Will choose device 0"
|
||||
)
|
||||
d_index = 0
|
||||
match_with_index = True
|
||||
|
||||
if len(device_list) > 1:
|
||||
print("List of available vulkan devices:")
|
||||
for i, d in enumerate(device_list):
|
||||
print(f"vulkan://{i} => {d}")
|
||||
if (match_with_index and d_index == i) or (
|
||||
not match_with_index and d == device
|
||||
):
|
||||
matched_index = i
|
||||
print(
|
||||
f"Choosing device vulkan://{matched_index}\nTo choose another device please specify device index or uri accordingly."
|
||||
)
|
||||
return get_device(device_list[matched_index])
|
||||
elif len(device_list) == 1:
|
||||
print(f"Found one vulkan device: {device_list[0]}. Using this.")
|
||||
return get_device(device_list[0])
|
||||
else:
|
||||
print(
|
||||
f"No device found! returning device corresponding to driver name: vulkan"
|
||||
)
|
||||
return _IREE_DEVICE_MAP["vulkan"]
|
||||
uri_parts = device.split("://", 2)
|
||||
if len(uri_parts) == 1:
|
||||
return _IREE_DEVICE_MAP[uri_parts[0]]
|
||||
else:
|
||||
return _IREE_DEVICE_MAP[device]
|
||||
return f"{_IREE_DEVICE_MAP[uri_parts[0]]}://{uri_parts[1]}"
|
||||
|
||||
|
||||
def get_supported_device_list():
|
||||
@@ -119,6 +74,7 @@ _IREE_TARGET_MAP = {
|
||||
"intel-gpu": "opencl-spirv",
|
||||
}
|
||||
|
||||
|
||||
# Finds whether the required drivers are installed for the given device.
|
||||
def check_device_drivers(device):
|
||||
"""Checks necessary drivers present for gpu and vulkan devices"""
|
||||
|
||||
@@ -20,6 +20,7 @@ import numpy as np
|
||||
import os
|
||||
import re
|
||||
|
||||
|
||||
# Get the iree-compile arguments given device.
|
||||
def get_iree_device_args(device, extra_args=[]):
|
||||
if "://" in device:
|
||||
@@ -366,9 +367,5 @@ def get_results(compiled_vm, input, config, frontend="torch"):
|
||||
|
||||
def get_iree_runtime_config(device):
|
||||
device = iree_device_map(device)
|
||||
if type(device) == ireert.HalDevice:
|
||||
config = ireert.Config(device=device)
|
||||
else:
|
||||
driver_name = device.split("://")[0] if "://" in device else device
|
||||
config = ireert.Config(driver_name=driver_name)
|
||||
config = ireert.Config(device=ireert.get_device(device))
|
||||
return config
|
||||
|
||||
@@ -70,7 +70,6 @@ input_type_to_np_dtype = {
|
||||
"int8": np.int8,
|
||||
}
|
||||
|
||||
|
||||
# Save the model in the home local so it needn't be fetched everytime in the CI.
|
||||
home = str(Path.home())
|
||||
alt_path = os.path.join(os.path.dirname(__file__), "../gen_shark_tank/")
|
||||
@@ -93,6 +92,7 @@ else:
|
||||
f"shark_tank local cache is located at {WORKDIR} . You may change this by setting the --local_tank_cache= flag"
|
||||
)
|
||||
|
||||
|
||||
# Checks whether the directory and files exists.
|
||||
def check_dir_exists(model_name, frontend="torch", dynamic=""):
|
||||
model_dir = os.path.join(WORKDIR, model_name)
|
||||
@@ -174,16 +174,9 @@ def download_model(
|
||||
)
|
||||
|
||||
model_dir = os.path.join(WORKDIR, model_dir_name)
|
||||
suffix = (
|
||||
"_" + frontend + ".mlir"
|
||||
if tuned is None
|
||||
else "_" + frontend + "_" + tuned + ".mlir"
|
||||
)
|
||||
tuned_str = "" if tuned is None else "_" + tuned
|
||||
suffix = f"{dyn_str}_{frontend}{tuned_str}.mlir"
|
||||
filename = os.path.join(model_dir, model_name + suffix)
|
||||
if not os.path.isfile(filename):
|
||||
filename = os.path.join(
|
||||
model_dir, model_name + "_" + frontend + ".mlir"
|
||||
)
|
||||
|
||||
with open(filename, mode="rb") as f:
|
||||
mlir_file = f.read()
|
||||
|
||||
296
shark/stress_test.py
Normal file
296
shark/stress_test.py
Normal file
@@ -0,0 +1,296 @@
|
||||
# Copyright 2022 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from iree.runtime import query_available_drivers, get_driver
|
||||
from shark.shark_downloader import download_model
|
||||
from shark.shark_inference import SharkInference
|
||||
from typing import List, Optional, Tuple
|
||||
import numpy as np
|
||||
import argparse
|
||||
from shark.iree_utils._common import _IREE_DEVICE_MAP
|
||||
import multiprocessing
|
||||
from shark.shark_runner import supported_dialects
|
||||
import logging
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from concurrent.futures.thread import ThreadPoolExecutor
|
||||
import time
|
||||
import numpy as np
|
||||
|
||||
IREE_TO_SHARK_DRIVER_MAP = {v: k for k, v in _IREE_DEVICE_MAP.items()}
|
||||
|
||||
|
||||
def stress_test_compiled_model(
|
||||
shark_module_path: str,
|
||||
function_name: str,
|
||||
device: str,
|
||||
inputs: List[np.ndarray],
|
||||
golden_out: List[np.ndarray],
|
||||
batch_size: int,
|
||||
max_iterations: int,
|
||||
max_duration_seconds: float,
|
||||
inference_timeout_seconds: float,
|
||||
tolerance_nulp: int,
|
||||
stress_test_index: int,
|
||||
):
|
||||
logging.info(
|
||||
f"Running stress test {stress_test_index} on device {device}."
|
||||
)
|
||||
shark_module = SharkInference(
|
||||
mlir_module=bytes(), function_name=function_name, device=device
|
||||
)
|
||||
shark_module.load_module(shark_module_path)
|
||||
input_batches = [np.repeat(arr, batch_size, axis=0) for arr in inputs]
|
||||
golden_output_batches = np.repeat(golden_out, batch_size, axis=0)
|
||||
report_interval_seconds = 10
|
||||
start_time = time.time()
|
||||
previous_report_time = start_time
|
||||
executor = ThreadPoolExecutor(1)
|
||||
first_iteration_output = None
|
||||
for i in range(max_iterations):
|
||||
inference_task = executor.submit(shark_module.forward, input_batches)
|
||||
output = inference_task.result(inference_timeout_seconds)
|
||||
if first_iteration_output is None:
|
||||
np.testing.assert_array_almost_equal_nulp(
|
||||
golden_output_batches, output, nulp=tolerance_nulp
|
||||
)
|
||||
first_iteration_output = output
|
||||
else:
|
||||
np.testing.assert_array_equal(output, first_iteration_output)
|
||||
current_time = time.time()
|
||||
if report_interval_seconds < current_time - previous_report_time:
|
||||
logging.info(
|
||||
f"Stress test {stress_test_index} on device "
|
||||
f"{device} at iteration {i+1}"
|
||||
)
|
||||
previous_report_time = current_time
|
||||
if max_duration_seconds < current_time - start_time:
|
||||
return
|
||||
logging.info(f"Stress test {stress_test_index} on device {device} done.")
|
||||
|
||||
|
||||
def get_device_type(device_name: str):
|
||||
return device_name.split("://", 1)[0]
|
||||
|
||||
|
||||
def get_device_types(device_names: str):
|
||||
return [get_device_type(device_name) for device_name in device_names]
|
||||
|
||||
|
||||
def query_devices(device_types: Optional[List[str]] = None) -> List[str]:
|
||||
devices = []
|
||||
if device_types is None:
|
||||
device_types = [
|
||||
IREE_TO_SHARK_DRIVER_MAP[name]
|
||||
for name in query_available_drivers()
|
||||
if name in IREE_TO_SHARK_DRIVER_MAP
|
||||
]
|
||||
for device_type in device_types:
|
||||
driver = get_driver(_IREE_DEVICE_MAP[device_type])
|
||||
device_infos = driver.query_available_devices()
|
||||
for device_info in device_infos:
|
||||
uri_path = (
|
||||
device_info["path"]
|
||||
if device_info["path"] != ""
|
||||
else str(device_info["device_id"])
|
||||
)
|
||||
device_uri = f"{device_type}://{uri_path}"
|
||||
devices.append(device_uri)
|
||||
return devices
|
||||
|
||||
|
||||
def compile_stress_test_module(
|
||||
device_types: List[str], mlir_model: str, func_name: str, mlir_dialect: str
|
||||
) -> List[str]:
|
||||
shark_module_paths = []
|
||||
for device_type in device_types:
|
||||
logging.info(
|
||||
f"Compiling stress test model for device type {device_type}."
|
||||
)
|
||||
shark_module = SharkInference(
|
||||
mlir_model,
|
||||
func_name,
|
||||
mlir_dialect=mlir_dialect,
|
||||
device=device_type,
|
||||
)
|
||||
shark_module_paths.append(shark_module.save_module())
|
||||
return shark_module_paths
|
||||
|
||||
|
||||
def stress_test(
|
||||
model_name: str,
|
||||
dynamic_model: bool = False,
|
||||
device_types: Optional[List[str]] = None,
|
||||
device_names: Optional[List[str]] = None,
|
||||
batch_size: int = 1,
|
||||
max_iterations: int = 10**7,
|
||||
max_duration_seconds: float = 3600,
|
||||
inference_timeout_seconds: float = 60,
|
||||
mlir_dialect: str = "linalg",
|
||||
frontend: str = "torch",
|
||||
oversubscription_factor: int = 1,
|
||||
tolerance_nulp: int = 50000,
|
||||
):
|
||||
logging.info(f"Downloading stress test model {model_name}.")
|
||||
mlir_model, func_name, inputs, golden_out = download_model(
|
||||
model_name=model_name, dynamic=dynamic_model, frontend=frontend
|
||||
)
|
||||
|
||||
if device_names is None or device_types is not None:
|
||||
device_names = [] if device_names is None else device_names
|
||||
with ProcessPoolExecutor() as executor:
|
||||
device_names.extend(
|
||||
executor.submit(query_devices, device_types).result()
|
||||
)
|
||||
|
||||
device_types_set = list(set(get_device_types(device_names)))
|
||||
shark_module_paths_set = compile_stress_test_module(
|
||||
device_types_set, mlir_model, func_name, mlir_dialect
|
||||
)
|
||||
device_type_shark_module_path_map = {
|
||||
device_type: module_path
|
||||
for device_type, module_path in zip(
|
||||
device_types_set, shark_module_paths_set
|
||||
)
|
||||
}
|
||||
device_name_shark_module_path_map = {
|
||||
device_name: device_type_shark_module_path_map[
|
||||
get_device_type(device_name)
|
||||
]
|
||||
for device_name in device_names
|
||||
}
|
||||
|
||||
# This needs to run in a spearate process, because it uses the drvier chache
|
||||
# in IREE and a subsequent call to `iree.runtime.SystemContext.add_vm_module`
|
||||
# in a forked process will hang.
|
||||
with multiprocessing.Pool(
|
||||
len(device_name_shark_module_path_map) * oversubscription_factor
|
||||
) as process_pool:
|
||||
process_pool.starmap(
|
||||
stress_test_compiled_model,
|
||||
[
|
||||
(
|
||||
module_path,
|
||||
func_name,
|
||||
device_name,
|
||||
inputs,
|
||||
golden_out,
|
||||
batch_size,
|
||||
max_iterations,
|
||||
max_duration_seconds,
|
||||
inference_timeout_seconds,
|
||||
tolerance_nulp,
|
||||
stress_test_index,
|
||||
)
|
||||
for stress_test_index, (device_name, module_path) in enumerate(
|
||||
list(device_name_shark_module_path_map.items())
|
||||
* oversubscription_factor
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(encoding="utf-8", level=logging.INFO)
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Downloads, compiles and runs a model from the tank to stress test the system."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model", type=str, help="Model name in the tank.", default="alexnet"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dynamic",
|
||||
help="Use dynamic version of the model.",
|
||||
action="store_true",
|
||||
default=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--frontend", type=str, help="Frontend of the model.", default="torch"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mlir-dialect",
|
||||
type=str,
|
||||
help="MLIR dialect of the model.",
|
||||
default="linalg",
|
||||
choices=supported_dialects,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--device-types",
|
||||
type=str,
|
||||
nargs="*",
|
||||
choices=_IREE_DEVICE_MAP.keys(),
|
||||
help="Runs the stress test on all devices with that type. "
|
||||
"If absent and no deveices are specified "
|
||||
"will run against all available devices.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--devices",
|
||||
type=str,
|
||||
nargs="*",
|
||||
help="List of devices to run the stress test on. "
|
||||
"If device-types is specified will run against the union of the two.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--batch-size",
|
||||
type=int,
|
||||
help="Number of inputs to feed into the model",
|
||||
default=1,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--oversubscription",
|
||||
type=int,
|
||||
help="Oversubscrption factor. Each device will execute the model simultaneously "
|
||||
"this many number of times.",
|
||||
default=1,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-iterations",
|
||||
type=int,
|
||||
help="Maximum number of iterations to run the stress test per device.",
|
||||
default=10**7,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-duration",
|
||||
type=float,
|
||||
help="Maximum number of seconds to run the stress test.",
|
||||
default=3600,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--inference-timeout",
|
||||
type=float,
|
||||
help="Timeout in seconds for a single model inference operation.",
|
||||
default=60,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tolerance-nulp",
|
||||
type=int,
|
||||
help="The maximum number of unit in the last place for tolerance "
|
||||
"when verifing results with the golden reference output.",
|
||||
default=50000,
|
||||
)
|
||||
|
||||
args = parser.parse_known_args()[0]
|
||||
stress_test(
|
||||
model_name=args.model,
|
||||
dynamic_model=args.dynamic,
|
||||
frontend=args.frontend,
|
||||
mlir_dialect=args.mlir_dialect,
|
||||
device_types=args.device_types,
|
||||
device_names=args.devices,
|
||||
batch_size=args.batch_size,
|
||||
oversubscription_factor=args.oversubscription,
|
||||
max_iterations=args.max_iterations,
|
||||
max_duration_seconds=args.max_duration,
|
||||
inference_timeout_seconds=args.inference_timeout,
|
||||
tolerance_nulp=args.tolerance_nulp,
|
||||
)
|
||||
31
shark/tests/test_stress_test.py
Normal file
31
shark/tests/test_stress_test.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# Copyright 2022 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import pytest
|
||||
import subprocess
|
||||
import sys
|
||||
import importlib.util
|
||||
|
||||
|
||||
def test_stress_test():
|
||||
subprocess.check_call(
|
||||
[
|
||||
sys.executable,
|
||||
importlib.util.find_spec("shark.stress_test").origin,
|
||||
"--model=squeezenet1_0",
|
||||
"--devices",
|
||||
"cpu",
|
||||
"--max-iterations=1",
|
||||
]
|
||||
)
|
||||
Reference in New Issue
Block a user