mirror of
https://github.com/nod-ai/SHARK-Studio.git
synced 2026-01-10 14:27:58 -05:00
155 lines
5.8 KiB
Python
155 lines
5.8 KiB
Python
# Copyright 2020 The Nod Team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from amdshark.iree_utils._common import run_cmd, iree_device_map
|
|
from amdshark.iree_utils.cpu_utils import get_cpu_count
|
|
import numpy as np
|
|
import os
|
|
import re
|
|
import platform
|
|
|
|
UNIT_TO_SECOND_MAP = {"us": 1e-6, "ms": 0.001, "s": 1}
|
|
|
|
|
|
def tensor_to_type_str(input_tensors: tuple, mlir_dialect: str):
|
|
"""
|
|
Input: A tuple of input tensors i.e tuple(torch.tensor)
|
|
Output: list of string that represent mlir types (i.e 1x24xf64)
|
|
# TODO: Support more than floats, and ints
|
|
"""
|
|
list_of_type = []
|
|
for input_tensor in input_tensors:
|
|
type_string = "x".join([str(dim) for dim in input_tensor.shape])
|
|
if mlir_dialect in ["linalg", "tosa"]:
|
|
dtype_string = str(input_tensor.dtype).replace("torch.", "")
|
|
elif mlir_dialect in ["mhlo", "tflite"]:
|
|
dtype = input_tensor.dtype
|
|
try:
|
|
dtype_string = re.findall("'[^\"]*'", str(dtype))[0].replace(
|
|
"'", ""
|
|
)
|
|
except IndexError:
|
|
dtype_string = str(dtype)
|
|
regex_split = re.compile("([a-zA-Z]+)([0-9]+)")
|
|
match = regex_split.match(dtype_string)
|
|
mlir_type_string = str(match.group(1)[0]) + str(match.group(2))
|
|
type_string += f"x{mlir_type_string}"
|
|
list_of_type.append(type_string)
|
|
return list_of_type
|
|
|
|
|
|
def build_benchmark_args(
|
|
input_file: str,
|
|
device: str,
|
|
input_tensors: tuple,
|
|
mlir_dialect: str,
|
|
training=False,
|
|
):
|
|
"""
|
|
Inputs: input_file leading to vmfb, input_tensor to function, target device,
|
|
and whether it is training or not.
|
|
Outputs: string that execute benchmark-module on target model.
|
|
"""
|
|
path = os.path.join(os.environ["VIRTUAL_ENV"], "bin")
|
|
if platform.system() == "Windows":
|
|
benchmarker_path = os.path.join(path, "iree-benchmark-module.exe")
|
|
time_extractor = None
|
|
else:
|
|
benchmarker_path = os.path.join(path, "iree-benchmark-module")
|
|
time_extractor = "| awk 'END{{print $2 $3}}'"
|
|
benchmark_cl = [benchmarker_path, f"--module={input_file}"]
|
|
# TODO: The function named can be passed as one of the args.
|
|
fn_name = "forward"
|
|
if training == True:
|
|
# TODO: Replace name of train with actual train fn name.
|
|
fn_name = "train"
|
|
benchmark_cl.append(f"--function={fn_name}")
|
|
benchmark_cl.append(f"--device={iree_device_map(device)}")
|
|
mlir_input_types = tensor_to_type_str(input_tensors, mlir_dialect)
|
|
for mlir_input in mlir_input_types:
|
|
benchmark_cl.append(f"--input={mlir_input}")
|
|
if device == "cpu":
|
|
num_cpus = get_cpu_count()
|
|
if num_cpus is not None:
|
|
benchmark_cl.append(f"--task_topology_max_group_count={num_cpus}")
|
|
# if time_extractor:
|
|
# benchmark_cl.append(time_extractor)
|
|
benchmark_cl.append(f"--print_statistics=true")
|
|
return benchmark_cl
|
|
|
|
|
|
def build_benchmark_args_non_tensor_input(
|
|
input_file: str,
|
|
device: str,
|
|
inputs: tuple,
|
|
mlir_dialect: str,
|
|
function_name: str,
|
|
):
|
|
"""
|
|
Inputs: input_file leading to vmfb, input_tensor to function, target device,
|
|
and whether it is training or not.
|
|
Outputs: string that execute benchmark-module on target model.
|
|
"""
|
|
path = os.path.join(os.environ["VIRTUAL_ENV"], "bin")
|
|
if platform.system() == "Windows":
|
|
benchmarker_path = os.path.join(path, "iree-benchmark-module.exe")
|
|
time_extractor = None
|
|
else:
|
|
benchmarker_path = os.path.join(path, "iree-benchmark-module")
|
|
time_extractor = "| awk 'END{{print $2 $3}}'"
|
|
benchmark_cl = [benchmarker_path, f"--module={input_file}"]
|
|
# TODO: The function named can be passed as one of the args.
|
|
if function_name:
|
|
benchmark_cl.append(f"--function={function_name}")
|
|
benchmark_cl.append(f"--device={iree_device_map(device)}")
|
|
for input in inputs:
|
|
benchmark_cl.append(f"--input={input}")
|
|
if platform.system() != "Windows":
|
|
time_extractor = "| awk 'END{{print $2 $3}}'"
|
|
benchmark_cl.append(time_extractor)
|
|
return benchmark_cl
|
|
|
|
|
|
def run_benchmark_module(benchmark_cl):
|
|
"""
|
|
Run benchmark command, extract result and return iteration/seconds, host
|
|
peak memory, and device peak memory.
|
|
|
|
# TODO: Add an example of the benchmark command.
|
|
Input: benchmark command.
|
|
"""
|
|
benchmark_path = benchmark_cl[0]
|
|
assert os.path.exists(
|
|
benchmark_path
|
|
), "Cannot find iree_benchmark_module, Please contact AMDSHARK maintainer on discord."
|
|
bench_stdout, bench_stderr = run_cmd(" ".join(benchmark_cl))
|
|
try:
|
|
regex_split = re.compile("(\d+[.]*\d*)( *)([a-zA-Z]+)")
|
|
match = regex_split.search(bench_stdout)
|
|
time_ms = float(match.group(1))
|
|
unit = match.group(3)
|
|
except AttributeError:
|
|
regex_split = re.compile("(\d+[.]*\d*)([a-zA-Z]+)")
|
|
match = regex_split.search(bench_stdout)
|
|
time_ms = float(match.group(1))
|
|
unit = match.group(2)
|
|
iter_per_second = 1.0 / (time_ms * 0.001)
|
|
|
|
# Extract peak memory.
|
|
host_regex = re.compile(r".*HOST_LOCAL:\s*([0-9]+)B peak")
|
|
host_peak_b = int(host_regex.search(bench_stderr).group(1))
|
|
device_regex = re.compile(r".*DEVICE_LOCAL:\s*([0-9]+)B peak")
|
|
device_peak_b = int(device_regex.search(bench_stderr).group(1))
|
|
return iter_per_second, host_peak_b, device_peak_b
|