mirror of
https://github.com/nod-ai/SHARK-Studio.git
synced 2026-01-09 22:07:55 -05:00
Improvements to pytest benchmarks. (#267)
* Add ONNX env var flags for venv setup. * Setup arguments for ONNX benchmarking via pytest. * Enable ONNX benchmarking on MiniLM via pytest (experimental) * Fix sequence lengths to 128 for TF model creation and fix issue with benchmarks. * Disable CI CPU benchmarks on A100, change some default args. * add xfails for roberta TF model tests on GPU.
This commit is contained in:
3
.github/workflows/test-models.yml
vendored
3
.github/workflows/test-models.yml
vendored
@@ -84,8 +84,7 @@ jobs:
|
|||||||
cd $GITHUB_WORKSPACE
|
cd $GITHUB_WORKSPACE
|
||||||
PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
|
PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
|
||||||
source shark.venv/bin/activate
|
source shark.venv/bin/activate
|
||||||
pytest --benchmark -k 'cpu' --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
|
pytest -k 'cpu' --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
|
||||||
gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv
|
|
||||||
|
|
||||||
- name: Validate GPU Models
|
- name: Validate GPU Models
|
||||||
if: matrix.suite == 'gpu'
|
if: matrix.suite == 'gpu'
|
||||||
|
|||||||
19
conftest.py
19
conftest.py
@@ -1,5 +1,18 @@
|
|||||||
def pytest_addoption(parser):
|
def pytest_addoption(parser):
|
||||||
# Attaches SHARK command-line arguments to the pytest machinery.
|
# Attaches SHARK command-line arguments to the pytest machinery.
|
||||||
|
parser.addoption(
|
||||||
|
"--benchmark",
|
||||||
|
action="store_true",
|
||||||
|
default="False",
|
||||||
|
help="Pass option to benchmark and write results.csv",
|
||||||
|
)
|
||||||
|
parser.addoption(
|
||||||
|
"--onnx_bench",
|
||||||
|
action="store_true",
|
||||||
|
default="False",
|
||||||
|
help="Add ONNX benchmark results to pytest benchmarks.",
|
||||||
|
)
|
||||||
|
# The following options are deprecated and pending removal.
|
||||||
parser.addoption(
|
parser.addoption(
|
||||||
"--save_mlir",
|
"--save_mlir",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -12,12 +25,6 @@ def pytest_addoption(parser):
|
|||||||
default="False",
|
default="False",
|
||||||
help="Pass option to save IREE output .vmfb",
|
help="Pass option to save IREE output .vmfb",
|
||||||
)
|
)
|
||||||
parser.addoption(
|
|
||||||
"--benchmark",
|
|
||||||
action="store_true",
|
|
||||||
default="False",
|
|
||||||
help="Pass option to benchmark and write results.csv",
|
|
||||||
)
|
|
||||||
parser.addoption(
|
parser.addoption(
|
||||||
"--save_temps",
|
"--save_temps",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
|
|||||||
@@ -118,6 +118,16 @@ if [[ $(uname -s) = 'Linux' && ! -z "${IMPORTER}" ]]; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ ! -z "${ONNX}" ]]; then
|
||||||
|
echo "${Yellow}Installing ONNX and onnxruntime for benchmarks..."
|
||||||
|
$PYTHON -m pip install onnx onnxruntime psutil
|
||||||
|
if [ $? -eq 0 ];then
|
||||||
|
echo "Successfully installed ONNX and ONNX runtime."
|
||||||
|
else
|
||||||
|
echo "Could not install ONNX." >&2
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ -z "${CONDA_PREFIX}" ]]; then
|
if [[ -z "${CONDA_PREFIX}" ]]; then
|
||||||
echo "${Green}Before running examples activate venv with:"
|
echo "${Green}Before running examples activate venv with:"
|
||||||
echo " ${Green}source $VENV_DIR/bin/activate"
|
echo " ${Green}source $VENV_DIR/bin/activate"
|
||||||
|
|||||||
@@ -61,14 +61,20 @@ parser.add_argument(
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--num_warmup_iterations",
|
"--num_warmup_iterations",
|
||||||
type=int,
|
type=int,
|
||||||
default=2,
|
default=5,
|
||||||
help="Run the model for the specified number of warmup iterations.",
|
help="Run the model for the specified number of warmup iterations.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--num_iterations",
|
"--num_iterations",
|
||||||
type=int,
|
type=int,
|
||||||
default=1,
|
default=100,
|
||||||
help="Run the model for the specified number of iterations.",
|
help="Run the model for the specified number of iterations.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--onnx_bench",
|
||||||
|
default=False,
|
||||||
|
action="store_true",
|
||||||
|
help="When enabled, pytest bench results will include ONNX benchmark results.",
|
||||||
|
)
|
||||||
|
|
||||||
shark_args, unknown = parser.parse_known_args()
|
shark_args, unknown = parser.parse_known_args()
|
||||||
|
|||||||
@@ -25,6 +25,20 @@ import csv
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class OnnxFusionOptions(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.disable_gelu = False
|
||||||
|
self.disable_layer_norm = False
|
||||||
|
self.disable_attention = False
|
||||||
|
self.disable_skip_layer_norm = False
|
||||||
|
self.disable_embed_layer_norm = False
|
||||||
|
self.disable_bias_skip_layer_norm = False
|
||||||
|
self.disable_bias_gelu = False
|
||||||
|
self.enable_gelu_approximation = False
|
||||||
|
self.use_mask_index = False
|
||||||
|
self.no_attention_mask = False
|
||||||
|
|
||||||
|
|
||||||
class SharkBenchmarkRunner(SharkRunner):
|
class SharkBenchmarkRunner(SharkRunner):
|
||||||
# SharkRunner derived class with Benchmarking capabilities.
|
# SharkRunner derived class with Benchmarking capabilities.
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -148,6 +162,80 @@ class SharkBenchmarkRunner(SharkRunner):
|
|||||||
f"{((end-begin)/shark_args.num_iterations)*1000}",
|
f"{((end-begin)/shark_args.num_iterations)*1000}",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def benchmark_onnx(self, modelname, inputs):
|
||||||
|
if self.device == "gpu":
|
||||||
|
print(
|
||||||
|
"Currently GPU benchmarking on ONNX is not supported in SHARK."
|
||||||
|
)
|
||||||
|
return ["N/A", "N/A"]
|
||||||
|
else:
|
||||||
|
from onnxruntime.transformers.benchmark import run_onnxruntime
|
||||||
|
from onnxruntime.transformers.huggingface_models import MODELS
|
||||||
|
from onnxruntime.transformers.benchmark_helper import (
|
||||||
|
ConfigModifier,
|
||||||
|
Precision,
|
||||||
|
)
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
if modelname == "microsoft/MiniLM-L12-H384-uncased":
|
||||||
|
modelname = "bert-base-uncased"
|
||||||
|
if modelname not in MODELS:
|
||||||
|
print(
|
||||||
|
f"{modelname} is currently not supported in ORT's HF. Check \
|
||||||
|
https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/huggingface_models.py \
|
||||||
|
for currently supported models. Exiting benchmark ONNX."
|
||||||
|
)
|
||||||
|
return ["N/A", "N/A"]
|
||||||
|
use_gpu = self.device == "gpu"
|
||||||
|
num_threads = psutil.cpu_count(logical=False)
|
||||||
|
batch_sizes = [1]
|
||||||
|
sequence_lengths = [128]
|
||||||
|
cache_dir = os.path.join(".", "cache_models")
|
||||||
|
onnx_dir = os.path.join(".", "onnx_models")
|
||||||
|
verbose = False
|
||||||
|
input_counts = [1]
|
||||||
|
optimize_onnx = True
|
||||||
|
validate_onnx = False
|
||||||
|
disable_ort_io_binding = False
|
||||||
|
use_raw_attention_mask = True
|
||||||
|
model_fusion_statistics = {}
|
||||||
|
overwrite = False
|
||||||
|
model_source = "pt" # Either "pt" or "tf"
|
||||||
|
provider = None
|
||||||
|
config_modifier = ConfigModifier(None)
|
||||||
|
onnx_args = OnnxFusionOptions()
|
||||||
|
result = run_onnxruntime(
|
||||||
|
use_gpu,
|
||||||
|
provider,
|
||||||
|
(modelname,),
|
||||||
|
None,
|
||||||
|
config_modifier,
|
||||||
|
Precision.FLOAT32,
|
||||||
|
num_threads,
|
||||||
|
batch_sizes,
|
||||||
|
sequence_lengths,
|
||||||
|
shark_args.num_iterations,
|
||||||
|
input_counts,
|
||||||
|
optimize_onnx,
|
||||||
|
validate_onnx,
|
||||||
|
cache_dir,
|
||||||
|
onnx_dir,
|
||||||
|
verbose,
|
||||||
|
overwrite,
|
||||||
|
disable_ort_io_binding,
|
||||||
|
use_raw_attention_mask,
|
||||||
|
model_fusion_statistics,
|
||||||
|
model_source,
|
||||||
|
onnx_args,
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||||
|
)
|
||||||
|
return [
|
||||||
|
result[0]["QPS"],
|
||||||
|
result[0]["average_latency_ms"],
|
||||||
|
]
|
||||||
|
|
||||||
def benchmark_all_csv(
|
def benchmark_all_csv(
|
||||||
self, inputs: tuple, modelname, dynamic, device_str, frontend
|
self, inputs: tuple, modelname, dynamic, device_str, frontend
|
||||||
):
|
):
|
||||||
@@ -164,6 +252,8 @@ class SharkBenchmarkRunner(SharkRunner):
|
|||||||
"datetime",
|
"datetime",
|
||||||
]
|
]
|
||||||
engines = ["frontend", "shark_python", "shark_iree_c"]
|
engines = ["frontend", "shark_python", "shark_iree_c"]
|
||||||
|
if shark_args.onnx_bench == True:
|
||||||
|
engines.append("onnxruntime")
|
||||||
|
|
||||||
if not os.path.exists("bench_results.csv"):
|
if not os.path.exists("bench_results.csv"):
|
||||||
with open("bench_results.csv", mode="w", newline="") as f:
|
with open("bench_results.csv", mode="w", newline="") as f:
|
||||||
@@ -182,20 +272,29 @@ class SharkBenchmarkRunner(SharkRunner):
|
|||||||
for e in engines:
|
for e in engines:
|
||||||
if e == "frontend":
|
if e == "frontend":
|
||||||
bench_result["engine"] = frontend
|
bench_result["engine"] = frontend
|
||||||
bench_result["iter/sec"] = self.benchmark_frontend(
|
(
|
||||||
modelname
|
bench_result["iter/sec"],
|
||||||
)[0]
|
bench_result["ms/iter"],
|
||||||
bench_result["ms/iter"] = self.benchmark_frontend(
|
) = self.benchmark_frontend(modelname)
|
||||||
modelname
|
|
||||||
)[1]
|
|
||||||
elif e == "shark_python":
|
elif e == "shark_python":
|
||||||
bench_result["engine"] = "shark_python"
|
bench_result["engine"] = "shark_python"
|
||||||
bench_result["iter/sec"] = self.benchmark_python(inputs)[0]
|
(
|
||||||
bench_result["ms/iter"] = self.benchmark_python(inputs)[1]
|
bench_result["iter/sec"],
|
||||||
else:
|
bench_result["ms/iter"],
|
||||||
|
) = self.benchmark_python(inputs)
|
||||||
|
elif e == "shark_iree_c":
|
||||||
bench_result["engine"] = "shark_iree_c"
|
bench_result["engine"] = "shark_iree_c"
|
||||||
bench_result["iter/sec"] = self.benchmark_c()[0]
|
(
|
||||||
bench_result["ms/iter"] = self.benchmark_c()[1]
|
bench_result["iter/sec"],
|
||||||
|
bench_result["ms/iter"],
|
||||||
|
) = self.benchmark_c()
|
||||||
|
elif e == "onnxruntime":
|
||||||
|
bench_result["engine"] = "onnxruntime"
|
||||||
|
(
|
||||||
|
bench_result["iter/sec"],
|
||||||
|
bench_result["ms/iter"],
|
||||||
|
) = self.benchmark_onnx(modelname, inputs)
|
||||||
|
|
||||||
bench_result["dialect"] = self.mlir_dialect
|
bench_result["dialect"] = self.mlir_dialect
|
||||||
bench_result["iterations"] = shark_args.num_iterations
|
bench_result["iterations"] = shark_args.num_iterations
|
||||||
bench_result["datetime"] = str(datetime.now())
|
bench_result["datetime"] = str(datetime.now())
|
||||||
|
|||||||
@@ -13,14 +13,15 @@ class MiniLMModuleTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
benchmark=False,
|
benchmark=False,
|
||||||
|
onnx_bench=False,
|
||||||
):
|
):
|
||||||
self.benchmark = benchmark
|
self.benchmark = benchmark
|
||||||
|
self.onnx_bench = onnx_bench
|
||||||
|
|
||||||
def create_and_check_module(self, dynamic, device):
|
def create_and_check_module(self, dynamic, device):
|
||||||
model, func_name, inputs, golden_out = download_tf_model(
|
model, func_name, inputs, golden_out = download_tf_model(
|
||||||
"microsoft/MiniLM-L12-H384-uncased"
|
"microsoft/MiniLM-L12-H384-uncased"
|
||||||
)
|
)
|
||||||
shark_args.enable_tf32 = self.benchmark
|
|
||||||
|
|
||||||
shark_module = SharkInference(
|
shark_module = SharkInference(
|
||||||
model,
|
model,
|
||||||
@@ -32,8 +33,7 @@ class MiniLMModuleTester:
|
|||||||
if self.benchmark == True:
|
if self.benchmark == True:
|
||||||
shark_args.enable_tf32 = True
|
shark_args.enable_tf32 = True
|
||||||
shark_module.compile()
|
shark_module.compile()
|
||||||
rtol = 1e-01
|
shark_args.onnx_bench = self.onnx_bench
|
||||||
atol = 1e-02
|
|
||||||
shark_module.shark_runner.benchmark_all_csv(
|
shark_module.shark_runner.benchmark_all_csv(
|
||||||
(inputs),
|
(inputs),
|
||||||
"microsoft/MiniLM-L12-H384-uncased",
|
"microsoft/MiniLM-L12-H384-uncased",
|
||||||
@@ -42,6 +42,8 @@ class MiniLMModuleTester:
|
|||||||
"tensorflow",
|
"tensorflow",
|
||||||
)
|
)
|
||||||
shark_args.enable_tf32 = False
|
shark_args.enable_tf32 = False
|
||||||
|
rtol = 1e-01
|
||||||
|
atol = 1e-02
|
||||||
|
|
||||||
else:
|
else:
|
||||||
shark_module.compile()
|
shark_module.compile()
|
||||||
@@ -57,6 +59,7 @@ class MiniLMModuleTest(unittest.TestCase):
|
|||||||
def configure(self, pytestconfig):
|
def configure(self, pytestconfig):
|
||||||
self.module_tester = MiniLMModuleTester(self)
|
self.module_tester = MiniLMModuleTester(self)
|
||||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||||
|
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
|
||||||
|
|
||||||
def test_module_static_cpu(self):
|
def test_module_static_cpu(self):
|
||||||
dynamic = False
|
dynamic = False
|
||||||
|
|||||||
@@ -13,8 +13,10 @@ class MiniLMModuleTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
benchmark=False,
|
benchmark=False,
|
||||||
|
onnx_bench=False,
|
||||||
):
|
):
|
||||||
self.benchmark = benchmark
|
self.benchmark = benchmark
|
||||||
|
self.onnx_bench = onnx_bench
|
||||||
|
|
||||||
def create_and_check_module(self, dynamic, device):
|
def create_and_check_module(self, dynamic, device):
|
||||||
model_mlir, func_name, input, act_out = download_torch_model(
|
model_mlir, func_name, input, act_out = download_torch_model(
|
||||||
@@ -30,6 +32,7 @@ class MiniLMModuleTester:
|
|||||||
if self.benchmark == True:
|
if self.benchmark == True:
|
||||||
shark_args.enable_tf32 = True
|
shark_args.enable_tf32 = True
|
||||||
shark_module.compile()
|
shark_module.compile()
|
||||||
|
shark_args.onnx_bench = self.onnx_bench
|
||||||
shark_module.shark_runner.benchmark_all_csv(
|
shark_module.shark_runner.benchmark_all_csv(
|
||||||
(input),
|
(input),
|
||||||
"microsoft/MiniLM-L12-H384-uncased",
|
"microsoft/MiniLM-L12-H384-uncased",
|
||||||
@@ -54,6 +57,7 @@ class MiniLMModuleTest(unittest.TestCase):
|
|||||||
def configure(self, pytestconfig):
|
def configure(self, pytestconfig):
|
||||||
self.module_tester = MiniLMModuleTester(self)
|
self.module_tester = MiniLMModuleTester(self)
|
||||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||||
|
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
|
||||||
|
|
||||||
def test_module_static_cpu(self):
|
def test_module_static_cpu(self):
|
||||||
dynamic = False
|
dynamic = False
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||||
from shark.shark_inference import SharkInference
|
from shark.shark_inference import SharkInference
|
||||||
from shark.shark_downloader import download_tf_model
|
from shark.shark_downloader import download_tf_model
|
||||||
|
from shark.parser import shark_args
|
||||||
|
|
||||||
import iree.compiler as ireec
|
|
||||||
import unittest
|
import unittest
|
||||||
import pytest
|
import pytest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -12,8 +12,10 @@ class BertBaseUncasedModuleTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
benchmark=False,
|
benchmark=False,
|
||||||
|
onnx_bench=False,
|
||||||
):
|
):
|
||||||
self.benchmark = benchmark
|
self.benchmark = benchmark
|
||||||
|
self.onnx_bench = onnx_bench
|
||||||
|
|
||||||
def create_and_check_module(self, dynamic, device):
|
def create_and_check_module(self, dynamic, device):
|
||||||
model, func_name, inputs, golden_out = download_tf_model(
|
model, func_name, inputs, golden_out = download_tf_model(
|
||||||
@@ -33,6 +35,7 @@ class BertBaseUncasedModuleTest(unittest.TestCase):
|
|||||||
def configure(self, pytestconfig):
|
def configure(self, pytestconfig):
|
||||||
self.module_tester = BertBaseUncasedModuleTester(self)
|
self.module_tester = BertBaseUncasedModuleTester(self)
|
||||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||||
|
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||||
|
|
||||||
def test_module_static_cpu(self):
|
def test_module_static_cpu(self):
|
||||||
dynamic = False
|
dynamic = False
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from shark.shark_inference import SharkInference
|
|||||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||||
from tank.model_utils import compare_tensors
|
from tank.model_utils import compare_tensors
|
||||||
from shark.shark_downloader import download_torch_model
|
from shark.shark_downloader import download_torch_model
|
||||||
|
from shark.parser import shark_args
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import unittest
|
import unittest
|
||||||
@@ -12,29 +13,17 @@ import pytest
|
|||||||
class BertBaseUncasedModuleTester:
|
class BertBaseUncasedModuleTester:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
save_mlir=False,
|
|
||||||
save_vmfb=False,
|
|
||||||
benchmark=False,
|
benchmark=False,
|
||||||
|
onnx_bench=False,
|
||||||
):
|
):
|
||||||
self.save_mlir = save_mlir
|
|
||||||
self.save_vmfb = save_vmfb
|
|
||||||
self.benchmark = benchmark
|
self.benchmark = benchmark
|
||||||
|
self.onnx_bench = onnx_bench
|
||||||
|
|
||||||
def create_and_check_module(self, dynamic, device):
|
def create_and_check_module(self, dynamic, device):
|
||||||
model_mlir, func_name, input, act_out = download_torch_model(
|
model_mlir, func_name, input, act_out = download_torch_model(
|
||||||
"bert-base-uncased", dynamic
|
"bert-base-uncased", dynamic
|
||||||
)
|
)
|
||||||
|
|
||||||
# from shark.shark_importer import SharkImporter
|
|
||||||
# mlir_importer = SharkImporter(
|
|
||||||
# model,
|
|
||||||
# (input,),
|
|
||||||
# frontend="torch",
|
|
||||||
# )
|
|
||||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
|
||||||
# is_dynamic=dynamic, tracing_required=True
|
|
||||||
# )
|
|
||||||
|
|
||||||
shark_module = SharkInference(
|
shark_module = SharkInference(
|
||||||
model_mlir,
|
model_mlir,
|
||||||
func_name,
|
func_name,
|
||||||
@@ -47,6 +36,7 @@ class BertBaseUncasedModuleTester:
|
|||||||
assert True == compare_tensors(act_out, results)
|
assert True == compare_tensors(act_out, results)
|
||||||
|
|
||||||
if self.benchmark == True:
|
if self.benchmark == True:
|
||||||
|
shark_args.onnx_bench = self.onnx_bench
|
||||||
shark_module.shark_runner.benchmark_all_csv(
|
shark_module.shark_runner.benchmark_all_csv(
|
||||||
(input),
|
(input),
|
||||||
"bert-base-uncased",
|
"bert-base-uncased",
|
||||||
@@ -61,6 +51,7 @@ class BertBaseUncasedModuleTest(unittest.TestCase):
|
|||||||
def configure(self, pytestconfig):
|
def configure(self, pytestconfig):
|
||||||
self.module_tester = BertBaseUncasedModuleTester(self)
|
self.module_tester = BertBaseUncasedModuleTester(self)
|
||||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||||
|
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
|
||||||
|
|
||||||
def test_module_static_cpu(self):
|
def test_module_static_cpu(self):
|
||||||
dynamic = False
|
dynamic = False
|
||||||
|
|||||||
@@ -85,9 +85,6 @@ class TFHuggingFaceLanguage(tf.Module):
|
|||||||
|
|
||||||
|
|
||||||
def get_TFhf_model(name):
|
def get_TFhf_model(name):
|
||||||
# gpus = tf.config.experimental.list_physical_devices("GPU")
|
|
||||||
# for gpu in gpus:
|
|
||||||
# tf.config.experimental.set_memory_growth(gpu, True)
|
|
||||||
model = TFHuggingFaceLanguage(name)
|
model = TFHuggingFaceLanguage(name)
|
||||||
tokenizer = BertTokenizer.from_pretrained(
|
tokenizer = BertTokenizer.from_pretrained(
|
||||||
"microsoft/MiniLM-L12-H384-uncased"
|
"microsoft/MiniLM-L12-H384-uncased"
|
||||||
@@ -123,37 +120,7 @@ def compare_tensors_tf(tf_tensor, numpy_tensor):
|
|||||||
|
|
||||||
##################### Tensorflow Hugging Face Masked LM Models ###################################
|
##################### Tensorflow Hugging Face Masked LM Models ###################################
|
||||||
from transformers import TFAutoModelForMaskedLM, AutoTokenizer
|
from transformers import TFAutoModelForMaskedLM, AutoTokenizer
|
||||||
|
import tensorflow as tf
|
||||||
# Create a set of input signature.
|
|
||||||
inputs_signature = [
|
|
||||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
|
||||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
|
||||||
]
|
|
||||||
|
|
||||||
# For supported models please see here:
|
|
||||||
# Utility function for comparing two tensors (tensorflow).
|
|
||||||
def compare_tensors_tf(tf_tensor, numpy_tensor):
|
|
||||||
# setting the absolute and relative tolerance
|
|
||||||
rtol = 1e-02
|
|
||||||
atol = 1e-03
|
|
||||||
tf_to_numpy = tf_tensor.numpy()
|
|
||||||
return np.allclose(tf_to_numpy, numpy_tensor, rtol, atol)
|
|
||||||
|
|
||||||
|
|
||||||
##################### Tensorflow Hugging Face Masked LM Models ###################################
|
|
||||||
from transformers import TFAutoModelForMaskedLM, AutoTokenizer
|
|
||||||
|
|
||||||
visible_default = tf.config.list_physical_devices("GPU")
|
|
||||||
try:
|
|
||||||
tf.config.set_visible_devices([], "GPU")
|
|
||||||
visible_devices = tf.config.get_visible_devices()
|
|
||||||
for device in visible_devices:
|
|
||||||
assert device.device_type != "GPU"
|
|
||||||
except:
|
|
||||||
# Invalid device or cannot modify virtual devices once initialized.
|
|
||||||
pass
|
|
||||||
|
|
||||||
# The max_sequence_length is set small for testing purpose.
|
|
||||||
|
|
||||||
# Create a set of input signature.
|
# Create a set of input signature.
|
||||||
input_signature_maskedlm = [
|
input_signature_maskedlm = [
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
from shark.shark_inference import SharkInference
|
from shark.shark_inference import SharkInference
|
||||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||||
from shark.shark_downloader import download_tf_model
|
from shark.shark_downloader import download_tf_model
|
||||||
|
from shark.parser import shark_args
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -12,8 +13,10 @@ class Resnet50ModuleTester:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
benchmark=False,
|
benchmark=False,
|
||||||
|
onnx_bench=False,
|
||||||
):
|
):
|
||||||
self.benchmark = benchmark
|
self.benchmark = benchmark
|
||||||
|
self.onnx_bench = onnx_bench
|
||||||
|
|
||||||
def create_and_check_module(self, dynamic, device):
|
def create_and_check_module(self, dynamic, device):
|
||||||
model, func_name, inputs, golden_out = download_tf_model("resnet50")
|
model, func_name, inputs, golden_out = download_tf_model("resnet50")
|
||||||
@@ -30,6 +33,8 @@ class Resnet50ModuleTester:
|
|||||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||||
|
|
||||||
if self.benchmark == True:
|
if self.benchmark == True:
|
||||||
|
shark_args.enable_tf32 = True
|
||||||
|
shark_args.onnx_bench = self.onnx_bench
|
||||||
shark_module.shark_runner.benchmark_all_csv(
|
shark_module.shark_runner.benchmark_all_csv(
|
||||||
(inputs), "resnet50", dynamic, device, "tensorflow"
|
(inputs), "resnet50", dynamic, device, "tensorflow"
|
||||||
)
|
)
|
||||||
@@ -40,6 +45,7 @@ class Resnet50ModuleTest(unittest.TestCase):
|
|||||||
def configure(self, pytestconfig):
|
def configure(self, pytestconfig):
|
||||||
self.module_tester = Resnet50ModuleTester(self)
|
self.module_tester = Resnet50ModuleTester(self)
|
||||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||||
|
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
|
||||||
|
|
||||||
def test_module_static_cpu(self):
|
def test_module_static_cpu(self):
|
||||||
dynamic = False
|
dynamic = False
|
||||||
|
|||||||
@@ -28,7 +28,9 @@ class RobertaBaseModuleTester:
|
|||||||
)
|
)
|
||||||
shark_module.compile()
|
shark_module.compile()
|
||||||
result = shark_module.forward(inputs)
|
result = shark_module.forward(inputs)
|
||||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
np.testing.assert_allclose(
|
||||||
|
result, golden_out, rtol=1e-02, atol=1e-01, verbose=True
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RobertaBaseModuleTest(unittest.TestCase):
|
class RobertaBaseModuleTest(unittest.TestCase):
|
||||||
@@ -42,6 +44,7 @@ class RobertaBaseModuleTest(unittest.TestCase):
|
|||||||
device = "cpu"
|
device = "cpu"
|
||||||
self.module_tester.create_and_check_module(dynamic, device)
|
self.module_tester.create_and_check_module(dynamic, device)
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="https://github.com/nod-ai/SHARK/issues/274")
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -25,7 +25,9 @@ class XLMRobertaModuleTester:
|
|||||||
)
|
)
|
||||||
shark_module.compile()
|
shark_module.compile()
|
||||||
result = shark_module.forward(inputs)
|
result = shark_module.forward(inputs)
|
||||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
np.testing.assert_allclose(
|
||||||
|
result, golden_out, rtol=1e-02, atol=1e-01, verbose=True
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class XLMRobertaModuleTest(unittest.TestCase):
|
class XLMRobertaModuleTest(unittest.TestCase):
|
||||||
@@ -39,6 +41,7 @@ class XLMRobertaModuleTest(unittest.TestCase):
|
|||||||
device = "cpu"
|
device = "cpu"
|
||||||
self.module_tester.create_and_check_module(dynamic, device)
|
self.module_tester.create_and_check_module(dynamic, device)
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="https://github.com/nod-ai/SHARK/issues/274")
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user