mirror of
https://github.com/nod-ai/SHARK-Studio.git
synced 2026-01-12 07:18:27 -05:00
Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c9a310842d | ||
|
|
67bdfda58c |
43
.github/workflows/nightly.yml
vendored
43
.github/workflows/nightly.yml
vendored
@@ -61,30 +61,8 @@ jobs:
|
||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude shark.venv,lit.cfg.py
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude shark.venv,lit.cfg.py
|
||||
- name: Build and validate the IREE package
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE
|
||||
USE_IREE=1 VENV_DIR=iree.venv ./setup_venv.sh
|
||||
source iree.venv/bin/activate
|
||||
package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
|
||||
SHARK_PACKAGE_VERSION=${package_version} \
|
||||
pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://github.com/llvm/torch-mlir/releases -f https://github.com/iree-org/iree/releases
|
||||
# Install the built wheel
|
||||
pip install ./wheelhouse/nodai*
|
||||
# Validate the Models
|
||||
/bin/bash "$GITHUB_WORKSPACE/build_tools/populate_sharktank_ci.sh"
|
||||
pytest -k 'cpu' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
|
||||
tail -n 1 |
|
||||
tee -a pytest_results.txt
|
||||
pytest -k 'gpu' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
|
||||
tail -n 1 |
|
||||
tee -a pytest_results.txt
|
||||
pytest -k 'vulkan' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
|
||||
tail -n 1 |
|
||||
tee -a pytest_results.txt
|
||||
rm -rf ./wheelhouse/nodai*
|
||||
|
||||
- name: Build and validate the SHARK Runtime package
|
||||
- name: Build and validate the package
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE
|
||||
./setup_venv.sh
|
||||
@@ -95,24 +73,7 @@ jobs:
|
||||
# Install the built wheel
|
||||
pip install ./wheelhouse/nodai*
|
||||
# Validate the Models
|
||||
pytest -k 'cpu' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
|
||||
tail -n 1 |
|
||||
tee -a pytest_results.txt
|
||||
pytest -k 'gpu' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
|
||||
tail -n 1 |
|
||||
tee -a pytest_results.txt
|
||||
pytest -k 'vulkan' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
|
||||
tail -n 1 |
|
||||
tee -a pytest_results.txt
|
||||
if !(grep -Fxq " failed" pytest_results.txt)
|
||||
then
|
||||
export SHA=$(git log -1 --format='%h')
|
||||
gsutil -m cp -r $GITHUB_WORKSPACE/gen_shark_tank/* gs://shark_tank/$SHA
|
||||
gsutil -m cp -r gs://shark_tank/$SHA/* gs://shark_tank/latest/
|
||||
fi
|
||||
rm pytest_results.txt
|
||||
rm -rf ./wheelhouse/nodai*
|
||||
|
||||
pytest -k 'not benchmark' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/
|
||||
|
||||
- name: Upload Release Assets
|
||||
id: upload-release-assets
|
||||
|
||||
33
.github/workflows/test-models.yml
vendored
33
.github/workflows/test-models.yml
vendored
@@ -15,12 +15,14 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os: [a100, MacStudio, ubuntu-latest]
|
||||
os: [a100, MI100, MacStudio, ubuntu-latest]
|
||||
suite: [cpu,gpu,vulkan]
|
||||
python-version: ["3.10"]
|
||||
include:
|
||||
- os: ubuntu-latest
|
||||
suite: lint
|
||||
- os: MI100
|
||||
suite: rocm
|
||||
exclude:
|
||||
- os: ubuntu-latest
|
||||
suite: vulkan
|
||||
@@ -30,21 +32,19 @@ jobs:
|
||||
suite: cpu
|
||||
- os: MacStudio
|
||||
suite: gpu
|
||||
- os: MacStudio
|
||||
suite: cpu
|
||||
- os: MacStudio
|
||||
suite: vulkan
|
||||
- os: MI100
|
||||
suite: gpu
|
||||
- os: MI100
|
||||
suite: vulkan
|
||||
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set Environment Variables
|
||||
run: |
|
||||
echo "SHORT_SHA=`git rev-parse --short=4 HEAD`" >> $GITHUB_ENV
|
||||
echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
|
||||
|
||||
- name: Set up Python Version File ${{ matrix.python-version }}
|
||||
if: matrix.os == 'a100' || matrix.os == 'ubuntu-latest'
|
||||
run: |
|
||||
@@ -82,18 +82,17 @@ jobs:
|
||||
if: matrix.suite == 'cpu'
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE
|
||||
PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
|
||||
PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
pytest -k 'cpu' --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
|
||||
|
||||
- name: Validate GPU Models
|
||||
- name: Validate GPU/CUDA Models
|
||||
if: matrix.suite == 'gpu'
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE
|
||||
PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
|
||||
PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
pytest --benchmark -k "gpu" --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
|
||||
gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_gpu_${SHORT_SHA}.csv
|
||||
pytest -k "gpu" --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
|
||||
|
||||
- name: Validate Vulkan Models
|
||||
if: matrix.suite == 'vulkan'
|
||||
@@ -102,3 +101,11 @@ jobs:
|
||||
PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
pytest -k 'vulkan' --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
|
||||
|
||||
- name: Validate GPU/ROCM Models
|
||||
if: matrix.suite == 'rocm'
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE
|
||||
PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
pytest -k 'rocm' --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
|
||||
|
||||
28
README.md
28
README.md
@@ -121,40 +121,14 @@ pytest tank/<MODEL_NAME> -k "keyword"
|
||||
```
|
||||
|
||||
### Run benchmarks on SHARK tank pytests and generate bench_results.csv with results.
|
||||
|
||||
|
||||
(requires source installation with `IMPORTER=1 ./setup_venv.sh`)
|
||||
|
||||
```shell
|
||||
pytest --benchmark tank
|
||||
|
||||
# Just do static GPU benchmarks for PyTorch tests:
|
||||
pytest --benchmark tank --ignore-glob="_tf*" -k "static_gpu"
|
||||
```
|
||||
|
||||
### Benchmark Resnet50, MiniLM on CPU
|
||||
|
||||
(requires source installation with `IMPORTER=1 ./setup_venv.sh`)
|
||||
|
||||
```shell
|
||||
# We suggest running the following commands as root before running benchmarks on CPU:
|
||||
|
||||
cat /sys/devices/system/cpu/cpu*/topology/thread_siblings_list | awk -F, '{print $2}' | sort -n | uniq | ( while read X ; do echo $X ; echo 0 > /sys/devices/system/cpu/cpu$X/online ; done )
|
||||
echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
|
||||
|
||||
# Benchmark canonical Resnet50 on CPU via pytest
|
||||
pytest --benchmark tank/resnet50/ -k "cpu"
|
||||
|
||||
# Benchmark canonical MiniLM on CPU via pytest
|
||||
pytest --benchmark tank/MiniLM-L12-H384-uncased/ -k "cpu"
|
||||
|
||||
# Benchmark MiniLM on CPU via transformer-benchmarks:
|
||||
git clone --recursive https://github.com/nod-ai/transformer-benchmarks.git
|
||||
cd transformer-benchmarks
|
||||
./perf-ci.sh -n
|
||||
# Check detail.csv for MLIR/IREE results.
|
||||
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
IMPORTER=1 ./setup_venv.sh
|
||||
source $GITHUB_WORKSPACE/shark.venv/bin/activate
|
||||
python generate_sharktank.py --upload=False
|
||||
19
conftest.py
19
conftest.py
@@ -1,18 +1,5 @@
|
||||
def pytest_addoption(parser):
|
||||
# Attaches SHARK command-line arguments to the pytest machinery.
|
||||
parser.addoption(
|
||||
"--benchmark",
|
||||
action="store_true",
|
||||
default="False",
|
||||
help="Pass option to benchmark and write results.csv",
|
||||
)
|
||||
parser.addoption(
|
||||
"--onnx_bench",
|
||||
action="store_true",
|
||||
default="False",
|
||||
help="Add ONNX benchmark results to pytest benchmarks.",
|
||||
)
|
||||
# The following options are deprecated and pending removal.
|
||||
parser.addoption(
|
||||
"--save_mlir",
|
||||
action="store_true",
|
||||
@@ -25,6 +12,12 @@ def pytest_addoption(parser):
|
||||
default="False",
|
||||
help="Pass option to save IREE output .vmfb",
|
||||
)
|
||||
parser.addoption(
|
||||
"--benchmark",
|
||||
action="store_true",
|
||||
default="False",
|
||||
help="Pass option to benchmark and write results.csv",
|
||||
)
|
||||
parser.addoption(
|
||||
"--save_temps",
|
||||
action="store_true",
|
||||
|
||||
@@ -13,7 +13,6 @@ import csv
|
||||
import argparse
|
||||
from shark.shark_importer import SharkImporter
|
||||
import tensorflow as tf
|
||||
import subprocess as sp
|
||||
import hashlib
|
||||
import numpy as np
|
||||
|
||||
@@ -94,12 +93,8 @@ def save_torch_model(torch_model_list):
|
||||
|
||||
|
||||
def save_tf_model(tf_model_list):
|
||||
from tank.model_utils_tf import (
|
||||
get_causal_image_model,
|
||||
get_causal_lm_model,
|
||||
get_keras_model,
|
||||
get_TFhf_model,
|
||||
)
|
||||
from tank.model_utils_tf import get_causal_lm_model
|
||||
from tank.model_utils_tf import get_causal_image_model
|
||||
|
||||
with open(tf_model_list) as csvfile:
|
||||
tf_reader = csv.reader(csvfile, delimiter=",")
|
||||
@@ -110,15 +105,11 @@ def save_tf_model(tf_model_list):
|
||||
|
||||
model = None
|
||||
input = None
|
||||
print(f"Generating artifacts for model {tf_model_name}")
|
||||
print(model_type)
|
||||
if model_type == "hf":
|
||||
model, input, _ = get_causal_lm_model(tf_model_name)
|
||||
if model_type == "img":
|
||||
model, input, _ = get_causal_image_model(tf_model_name)
|
||||
if model_type == "keras":
|
||||
model, input, _ = get_keras_model(tf_model_name)
|
||||
if model_type == "TFhf":
|
||||
model, input, _ = get_TFhf_model(tf_model_name)
|
||||
|
||||
tf_model_name = tf_model_name.replace("/", "_")
|
||||
tf_model_dir = os.path.join(WORKDIR, str(tf_model_name) + "_tf")
|
||||
@@ -228,8 +219,5 @@ if __name__ == "__main__":
|
||||
save_tflite_model(args.tflite_model_csv)
|
||||
|
||||
if args.upload:
|
||||
git_hash = sp.getoutput("git log -1 --format='%h'") + "/"
|
||||
print("uploading files to gs://shark_tank/" + git_hash)
|
||||
os.system(
|
||||
"gsutil cp -r ./gen_shark_tank/* gs://shark_tank/" + git_hash
|
||||
)
|
||||
print("uploading files to gs://shark_tank/")
|
||||
os.system("gsutil cp -r ./gen_shark_tank/* gs://shark_tank/")
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import compare_tensors
|
||||
from shark.shark_downloader import download_torch_model
|
||||
from shark.parser import shark_args
|
||||
|
||||
import torch
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class BertBaseUncasedModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
onnx_bench=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
self.onnx_bench = onnx_bench
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"bert-base-uncased", dynamic
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_args.onnx_bench = self.onnx_bench
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"bert-base-uncased",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class BertBaseUncasedModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = BertBaseUncasedModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,95 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import compare_tensors
|
||||
from shark.parser import shark_args
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class DistilBertModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"distilbert-base-uncased", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"distilbert-base-uncased",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class DistilBertModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = DistilBertModuleTester(self)
|
||||
self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,114 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class MobileNetV3ModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"mobilenet_v3_small", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
np.testing.assert_allclose(act_out, results, rtol=1e-02, atol=1e-03)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"alexnet",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class MobileNetV3ModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = MobileNetV3ModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="golden results don't match.")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="golden results don't match.")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="stuck in the pipeline.")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,114 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import compare_tensors
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class Resnet101ModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"resnet101", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"resnet101",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class Resnet101ModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = Resnet101ModuleTester(self)
|
||||
self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,114 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import get_vision_model, compare_tensors
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class Resnet50ModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"resnet50", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"resnet50",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class Resnet50ModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = Resnet50ModuleTester(self)
|
||||
self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,91 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class UnetModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"unet", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
np.testing.assert_allclose(act_out, results, rtol=1e-02, atol=1e-03)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"unet",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class UnetModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = UnetModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -118,16 +118,6 @@ if [[ $(uname -s) = 'Linux' && ! -z "${IMPORTER}" ]]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ ! -z "${ONNX}" ]]; then
|
||||
echo "${Yellow}Installing ONNX and onnxruntime for benchmarks..."
|
||||
$PYTHON -m pip install onnx onnxruntime psutil
|
||||
if [ $? -eq 0 ];then
|
||||
echo "Successfully installed ONNX and ONNX runtime."
|
||||
else
|
||||
echo "Could not install ONNX." >&2
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -z "${CONDA_PREFIX}" ]]; then
|
||||
echo "${Green}Before running examples activate venv with:"
|
||||
echo " ${Green}source $VENV_DIR/bin/activate"
|
||||
|
||||
@@ -1,88 +0,0 @@
|
||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||
import torch
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_importer import SharkImporter
|
||||
from iree.compiler import compile_str
|
||||
from iree import runtime as ireert
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
MAX_SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
|
||||
|
||||
class AlbertModule(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.model = AutoModelForMaskedLM.from_pretrained("albert-base-v2")
|
||||
self.model.eval()
|
||||
|
||||
def forward(self, input_ids, attention_mask):
|
||||
return self.model(
|
||||
input_ids=input_ids, attention_mask=attention_mask
|
||||
).logits
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Prepping Data
|
||||
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
|
||||
text = "This [MASK] is very tasty."
|
||||
encoded_inputs = tokenizer(
|
||||
text,
|
||||
padding="max_length",
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH,
|
||||
return_tensors="pt",
|
||||
)
|
||||
inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
|
||||
mlir_importer = SharkImporter(
|
||||
AlbertModule(),
|
||||
inputs,
|
||||
frontend="torch",
|
||||
)
|
||||
minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
is_dynamic=False, tracing_required=True
|
||||
)
|
||||
shark_module = SharkInference(
|
||||
minilm_mlir, func_name, mlir_dialect="linalg"
|
||||
)
|
||||
shark_module.compile()
|
||||
token_logits = torch.tensor(shark_module.forward(inputs))
|
||||
mask_id = torch.where(
|
||||
encoded_inputs["input_ids"] == tokenizer.mask_token_id
|
||||
)[1]
|
||||
mask_token_logits = token_logits[0, mask_id, :]
|
||||
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
|
||||
for token in top_5_tokens:
|
||||
print(
|
||||
f"'>>> Sample/Warmup output: {text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
|
||||
)
|
||||
while True:
|
||||
try:
|
||||
new_text = input("Give me a sentence with [MASK] to fill: ")
|
||||
encoded_inputs = tokenizer(
|
||||
new_text,
|
||||
padding="max_length",
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH,
|
||||
return_tensors="pt",
|
||||
)
|
||||
inputs = (
|
||||
encoded_inputs["input_ids"],
|
||||
encoded_inputs["attention_mask"],
|
||||
)
|
||||
token_logits = torch.tensor(shark_module.forward(inputs))
|
||||
mask_id = torch.where(
|
||||
encoded_inputs["input_ids"] == tokenizer.mask_token_id
|
||||
)[1]
|
||||
mask_token_logits = token_logits[0, mask_id, :]
|
||||
top_5_tokens = (
|
||||
torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
|
||||
)
|
||||
for token in top_5_tokens:
|
||||
print(
|
||||
f"'>>> {new_text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
print("Exiting program.")
|
||||
break
|
||||
@@ -1,100 +0,0 @@
|
||||
from PIL import Image
|
||||
import requests
|
||||
|
||||
from transformers import TFAutoModelForMaskedLM, AutoTokenizer
|
||||
import tensorflow as tf
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_importer import SharkImporter
|
||||
from iree.compiler import tf as tfc
|
||||
from iree.compiler import compile_str
|
||||
from iree import runtime as ireert
|
||||
import os
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
MAX_SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
|
||||
# Create a set of inputs
|
||||
t5_inputs = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
]
|
||||
|
||||
|
||||
class AlbertModule(tf.Module):
|
||||
def __init__(self):
|
||||
super(AlbertModule, self).__init__()
|
||||
self.m = TFAutoModelForMaskedLM.from_pretrained("albert-base-v2")
|
||||
self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)
|
||||
|
||||
@tf.function(input_signature=t5_inputs)
|
||||
def forward(self, input_ids, attention_mask):
|
||||
return self.m.predict(input_ids, attention_mask)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Prepping Data
|
||||
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
|
||||
# text = "This is a great [MASK]."
|
||||
text = "This [MASK] is very tasty."
|
||||
encoded_inputs = tokenizer(
|
||||
text,
|
||||
padding="max_length",
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH,
|
||||
return_tensors="tf",
|
||||
)
|
||||
inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
|
||||
mlir_importer = SharkImporter(
|
||||
AlbertModule(),
|
||||
inputs,
|
||||
frontend="tf",
|
||||
)
|
||||
minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
is_dynamic=False, tracing_required=False
|
||||
)
|
||||
shark_module = SharkInference(minilm_mlir, func_name, mlir_dialect="mhlo")
|
||||
shark_module.compile()
|
||||
output_idx = 0
|
||||
data_idx = 1
|
||||
token_logits = shark_module.forward(inputs)[output_idx][data_idx]
|
||||
mask_id = np.where(
|
||||
tf.squeeze(encoded_inputs["input_ids"]) == tokenizer.mask_token_id
|
||||
)
|
||||
mask_token_logits = token_logits[0, mask_id, :]
|
||||
top_5_tokens = np.flip(np.argsort(mask_token_logits)).squeeze()[0:5]
|
||||
for token in top_5_tokens:
|
||||
print(
|
||||
f"'>>> Sample/Warmup output: {text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
|
||||
)
|
||||
while True:
|
||||
try:
|
||||
new_text = input("Give me a sentence with [MASK] to fill: ")
|
||||
encoded_inputs = tokenizer(
|
||||
new_text,
|
||||
padding="max_length",
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH,
|
||||
return_tensors="tf",
|
||||
)
|
||||
inputs = (
|
||||
encoded_inputs["input_ids"],
|
||||
encoded_inputs["attention_mask"],
|
||||
)
|
||||
token_logits = shark_module.forward(inputs)[output_idx][data_idx]
|
||||
mask_id = np.where(
|
||||
tf.squeeze(encoded_inputs["input_ids"])
|
||||
== tokenizer.mask_token_id
|
||||
)
|
||||
mask_token_logits = token_logits[0, mask_id, :]
|
||||
top_5_tokens = np.flip(np.argsort(mask_token_logits)).squeeze()[
|
||||
0:5
|
||||
]
|
||||
for token in top_5_tokens:
|
||||
print(
|
||||
f"'>>> {new_text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
print("Exiting program.")
|
||||
sys.exit()
|
||||
@@ -23,7 +23,7 @@ input = torch.randn(1, 3, 224, 224)
|
||||
|
||||
mlir_importer = SharkImporter(
|
||||
ResnestModule(),
|
||||
(input,),
|
||||
(input),
|
||||
frontend="torch",
|
||||
)
|
||||
|
||||
@@ -33,7 +33,9 @@ mlir_importer = SharkImporter(
|
||||
|
||||
print(golden_out)
|
||||
|
||||
shark_module = SharkInference(vision_mlir, func_name, mlir_dialect="linalg")
|
||||
shark_module = SharkInference(
|
||||
vision_mlir, func_name, device="cpu", mlir_dialect="linalg"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward((input,))
|
||||
result = shark_module.forward((input))
|
||||
print("Obtained result", result)
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_importer import SharkImporter
|
||||
from shark_runner import SharkInference
|
||||
|
||||
|
||||
# Currently not supported aten.transpose_conv2d missing.
|
||||
class UnetModule(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
@@ -15,7 +14,7 @@ class UnetModule(torch.nn.Module):
|
||||
init_features=32,
|
||||
pretrained=True,
|
||||
)
|
||||
self.model.eval()
|
||||
self.train(False)
|
||||
|
||||
def forward(self, input):
|
||||
return self.model(input)
|
||||
@@ -23,17 +22,10 @@ class UnetModule(torch.nn.Module):
|
||||
|
||||
input = torch.randn(1, 3, 224, 224)
|
||||
|
||||
mlir_importer = SharkImporter(
|
||||
print(input)
|
||||
shark_module = SharkInference(
|
||||
UnetModule(),
|
||||
(input,),
|
||||
frontend="torch",
|
||||
)
|
||||
|
||||
(vision_mlir, func_name), inputs, golden_out = mlir_importer.import_debug(
|
||||
tracing_required=False
|
||||
)
|
||||
|
||||
shark_module = SharkInference(vision_mlir, func_name, mlir_dialect="linalg")
|
||||
shark_module.compile()
|
||||
result = shark_module.forward((input,))
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
shark_module.benchmark_forward((input,))
|
||||
print(input)
|
||||
|
||||
@@ -48,8 +48,8 @@ class EagerModeIREELinalgOnTensorsBackend(TorchMLIREagerBackend):
|
||||
|
||||
def __init__(self, device: str):
|
||||
self.torch_device_str = device
|
||||
self.config = ireert.Config(IREE_DEVICE_MAP[device])
|
||||
self.raw_device_str = device
|
||||
self.iree_device_str = IREE_DEVICE_MAP[device]
|
||||
self.config = ireert.Config(self.iree_device_str)
|
||||
|
||||
def get_torch_metadata(
|
||||
self, tensor: DeviceArray, kwargs: Dict[str, Any]
|
||||
@@ -71,7 +71,7 @@ class EagerModeIREELinalgOnTensorsBackend(TorchMLIREagerBackend):
|
||||
"EagerMode",
|
||||
)
|
||||
callable, _ = get_iree_compiled_module(
|
||||
imported_module, self.raw_device_str, func_name=fn_name
|
||||
imported_module, self.iree_device_str, func_name=fn_name
|
||||
)
|
||||
return callable
|
||||
|
||||
|
||||
@@ -44,17 +44,15 @@ IREE_DEVICE_MAP = {
|
||||
"vulkan": "vulkan",
|
||||
"metal": "vulkan",
|
||||
"rocm": "rocm",
|
||||
"intel-gpu": "level_zero",
|
||||
}
|
||||
|
||||
IREE_TARGET_MAP = {
|
||||
"cpu": "llvm-cpu",
|
||||
"cpu": "dylib",
|
||||
"gpu": "cuda",
|
||||
"cuda": "cuda",
|
||||
"vulkan": "vulkan",
|
||||
"metal": "vulkan",
|
||||
"rocm": "rocm",
|
||||
"intel-gpu": "opencl-spirv",
|
||||
}
|
||||
|
||||
# Finds whether the required drivers are installed for the given device.
|
||||
@@ -70,12 +68,6 @@ def check_device_drivers(device):
|
||||
subprocess.check_output("vulkaninfo")
|
||||
except Exception:
|
||||
return True
|
||||
elif device in ["intel-gpu"]:
|
||||
try:
|
||||
subprocess.check_output(["dpkg", "-L", "intel-level-zero-gpu"])
|
||||
return False
|
||||
except Exception:
|
||||
return True
|
||||
elif device == "cpu":
|
||||
return False
|
||||
# Unknown device.
|
||||
|
||||
@@ -34,12 +34,9 @@ def tensor_to_type_str(input_tensors: tuple, mlir_dialect: str):
|
||||
dtype_string = str(input_tensor.dtype).replace("torch.", "")
|
||||
elif mlir_dialect in ["mhlo", "tflite"]:
|
||||
dtype = input_tensor.dtype
|
||||
try:
|
||||
dtype_string = re.findall("'[^\"]*'", str(dtype))[0].replace(
|
||||
"'", ""
|
||||
)
|
||||
except IndexError:
|
||||
dtype_string = str(dtype)
|
||||
dtype_string = re.findall("'[^\"]*'", str(dtype))[0].replace(
|
||||
"'", ""
|
||||
)
|
||||
regex_split = re.compile("([a-zA-Z]+)([0-9]+)")
|
||||
match = regex_split.match(dtype_string)
|
||||
mlir_type_string = str(match.group(1)[0]) + str(match.group(2))
|
||||
|
||||
@@ -98,10 +98,8 @@ def compile_module_to_flatbuffer(
|
||||
|
||||
def get_iree_module(flatbuffer_blob, device, func_name):
|
||||
# Returns the compiled module and the configs.
|
||||
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
|
||||
config = ireert.Config(IREE_DEVICE_MAP[device])
|
||||
vm_module = ireert.VmModule.from_flatbuffer(
|
||||
config.vm_instance, flatbuffer_blob
|
||||
)
|
||||
ctx = ireert.SystemContext(config=config)
|
||||
ctx.add_vm_module(vm_module)
|
||||
ModuleCompiled = ctx.modules.module[func_name]
|
||||
@@ -126,15 +124,15 @@ def export_iree_module_to_vmfb(
|
||||
module,
|
||||
device: str,
|
||||
directory: str,
|
||||
mlir_dialect: str = "linalg",
|
||||
frontend: str = "torch",
|
||||
func_name: str = "forward",
|
||||
model_config_path: str = None,
|
||||
):
|
||||
# Compiles the module given specs and saves it as .vmfb file.
|
||||
flatbuffer_blob = compile_module_to_flatbuffer(
|
||||
module, device, mlir_dialect, func_name, model_config_path
|
||||
module, device, frontend, func_name, model_config_path
|
||||
)
|
||||
module_name = f"{mlir_dialect}_{func_name}_{device}"
|
||||
module_name = f"{frontend}_{func_name}_{device}"
|
||||
filename = os.path.join(directory, module_name + ".vmfb")
|
||||
print(f"Saved vmfb in {filename}.")
|
||||
with open(filename, "wb") as f:
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
import iree.runtime as ireert
|
||||
import ctypes
|
||||
from shark.parser import shark_args
|
||||
|
||||
# Get the default gpu args given the architecture.
|
||||
def get_iree_gpu_args():
|
||||
@@ -24,9 +23,7 @@ def get_iree_gpu_args():
|
||||
ireert.flags.parse_flags("--cuda_allow_inline_execution")
|
||||
# TODO: Give the user_interface to pass the sm_arch.
|
||||
sm_arch = get_cuda_sm_cc()
|
||||
if (
|
||||
sm_arch in ["sm_70", "sm_72", "sm_75", "sm_80", "sm_84", "sm_86"]
|
||||
) and (shark_args.enable_tf32 == True):
|
||||
if sm_arch in ["sm_70", "sm_72", "sm_75", "sm_80", "sm_84", "sm_86"]:
|
||||
return [
|
||||
"--iree-hal-cuda-disable-loop-nounroll-wa",
|
||||
f"--iree-hal-cuda-llvm-target-arch={sm_arch}",
|
||||
|
||||
@@ -26,12 +26,6 @@ def get_vulkan_triple_flag():
|
||||
elif vulkan_device == "M2":
|
||||
print("Found Apple M2 Device. Using m1-moltenvk-macos")
|
||||
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
|
||||
elif vulkan_device == "Max":
|
||||
print("Found Apple M1 Max Device. Using m1-moltenvk-macos")
|
||||
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
|
||||
elif vulkan_device == "Pro":
|
||||
print("Found Apple M1 Pro Device. Using m1-moltenvk-macos")
|
||||
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
|
||||
elif vulkan_device == "M1":
|
||||
print("Found Apple M1 Device. Using m1-moltenvk-macos")
|
||||
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
|
||||
|
||||
@@ -47,10 +47,16 @@ parser.add_argument(
|
||||
default="./shark_tmp",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable_tf32",
|
||||
type=bool,
|
||||
"--save_mlir",
|
||||
default=False,
|
||||
help="Enables TF32 precision calculations on supported GPUs.",
|
||||
action="store_true",
|
||||
help="Saves input MLIR module to /tmp/ directory.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--save_vmfb",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Saves iree .vmfb module to /tmp/ directory.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model_config_path",
|
||||
@@ -61,20 +67,14 @@ parser.add_argument(
|
||||
parser.add_argument(
|
||||
"--num_warmup_iterations",
|
||||
type=int,
|
||||
default=5,
|
||||
default=2,
|
||||
help="Run the model for the specified number of warmup iterations.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--num_iterations",
|
||||
type=int,
|
||||
default=100,
|
||||
default=1,
|
||||
help="Run the model for the specified number of iterations.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--onnx_bench",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="When enabled, pytest bench results will include ONNX benchmark results.",
|
||||
)
|
||||
|
||||
shark_args, unknown = parser.parse_known_args()
|
||||
|
||||
@@ -19,26 +19,13 @@ from shark.iree_utils.benchmark_utils import (
|
||||
run_benchmark_module,
|
||||
)
|
||||
from shark.parser import shark_args
|
||||
from tank.model_utils import get_torch_model
|
||||
from datetime import datetime
|
||||
import time
|
||||
import csv
|
||||
import os
|
||||
|
||||
|
||||
class OnnxFusionOptions(object):
|
||||
def __init__(self):
|
||||
self.disable_gelu = False
|
||||
self.disable_layer_norm = False
|
||||
self.disable_attention = False
|
||||
self.disable_skip_layer_norm = False
|
||||
self.disable_embed_layer_norm = False
|
||||
self.disable_bias_skip_layer_norm = False
|
||||
self.disable_bias_gelu = False
|
||||
self.enable_gelu_approximation = False
|
||||
self.use_mask_index = False
|
||||
self.no_attention_mask = False
|
||||
|
||||
|
||||
class SharkBenchmarkRunner(SharkRunner):
|
||||
# SharkRunner derived class with Benchmarking capabilities.
|
||||
def __init__(
|
||||
@@ -47,21 +34,22 @@ class SharkBenchmarkRunner(SharkRunner):
|
||||
function_name: str = "forward",
|
||||
device: str = "none",
|
||||
mlir_dialect: str = "linalg",
|
||||
frontend: str = "torch",
|
||||
):
|
||||
self.device = shark_args.device if device == "none" else device
|
||||
self.frontend = frontend
|
||||
self.frontend_model = None
|
||||
self.vmfb_file = None
|
||||
self.mlir_dialect = mlir_dialect
|
||||
SharkRunner.__init__(
|
||||
self,
|
||||
mlir_module,
|
||||
function_name,
|
||||
device,
|
||||
self.mlir_dialect,
|
||||
mlir_dialect,
|
||||
)
|
||||
if self.vmfb_file == None:
|
||||
self.vmfb_file = export_iree_module_to_vmfb(
|
||||
mlir_module, device, shark_args.repro_dir, self.mlir_dialect
|
||||
mlir_module, device, shark_args.repro_dir, self.frontend
|
||||
)
|
||||
|
||||
def setup_cl(self, input_tensors):
|
||||
@@ -71,17 +59,15 @@ class SharkBenchmarkRunner(SharkRunner):
|
||||
input_tensors,
|
||||
mlir_dialect=self.mlir_dialect,
|
||||
)
|
||||
# print(self.benchmark_cl)
|
||||
|
||||
def benchmark_frontend(self, modelname):
|
||||
if self.mlir_dialect in ["linalg", "torch"]:
|
||||
def benchmark_frontend(self, inputs, modelname):
|
||||
if self.frontend in ["pytorch", "torch"]:
|
||||
return self.benchmark_torch(modelname)
|
||||
elif self.mlir_dialect in ["mhlo", "tf"]:
|
||||
return self.benchmark_tf(modelname)
|
||||
elif self.frontend in ["tensorflow", "tf"]:
|
||||
return self.benchmark_tf(inputs, modelname)
|
||||
|
||||
def benchmark_torch(self, modelname):
|
||||
import torch
|
||||
from tank.model_utils import get_torch_model
|
||||
|
||||
if self.device == "gpu":
|
||||
torch.set_default_tensor_type(torch.cuda.FloatTensor)
|
||||
@@ -90,7 +76,7 @@ class SharkBenchmarkRunner(SharkRunner):
|
||||
torch_device = torch.device(
|
||||
"cuda:0" if self.device == "gpu" else "cpu"
|
||||
)
|
||||
HFmodel, input = get_torch_model(modelname)[:2]
|
||||
HFmodel, input, act_out = get_torch_model(modelname)
|
||||
frontend_model = HFmodel.model
|
||||
frontend_model.to(torch_device)
|
||||
input.to(torch_device)
|
||||
@@ -112,21 +98,13 @@ class SharkBenchmarkRunner(SharkRunner):
|
||||
f"{((end-begin)/shark_args.num_iterations)*1000}",
|
||||
]
|
||||
|
||||
def benchmark_tf(self, modelname):
|
||||
import tensorflow as tf
|
||||
from tank.model_utils_tf import get_tf_model
|
||||
|
||||
model, input, = get_tf_model(
|
||||
modelname
|
||||
)[:2]
|
||||
frontend_model = model
|
||||
|
||||
def benchmark_tf(self, frontend_model, inputs):
|
||||
for i in range(shark_args.num_warmup_iterations):
|
||||
frontend_model.forward(*input)
|
||||
frontend_model.forward(*inputs)
|
||||
|
||||
begin = time.time()
|
||||
for i in range(shark_args.num_iterations):
|
||||
out = frontend_model.forward(*input)
|
||||
out = frontend_model.forward(*inputs)
|
||||
if i == shark_args.num_iterations - 1:
|
||||
end = time.time()
|
||||
break
|
||||
@@ -139,9 +117,8 @@ class SharkBenchmarkRunner(SharkRunner):
|
||||
]
|
||||
|
||||
def benchmark_c(self):
|
||||
print(self.benchmark_cl)
|
||||
result = run_benchmark_module(self.benchmark_cl)
|
||||
print(f"Shark-IREE-C benchmark:{result} iter/second")
|
||||
print(f"Shark-{self.frontend} C-benchmark:{result} iter/second")
|
||||
return [f"{result}", f"{1000/result}"]
|
||||
|
||||
def benchmark_python(self, inputs):
|
||||
@@ -155,105 +132,32 @@ class SharkBenchmarkRunner(SharkRunner):
|
||||
if i == shark_args.num_iterations - 1:
|
||||
end = time.time()
|
||||
print(
|
||||
f"Shark-IREE Python benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
f"Shark-{self.frontend} Python-benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
return [
|
||||
f"{shark_args.num_iterations/(end-begin)}",
|
||||
f"{((end-begin)/shark_args.num_iterations)*1000}",
|
||||
]
|
||||
|
||||
def benchmark_onnx(self, modelname, inputs):
|
||||
if self.device == "gpu":
|
||||
print(
|
||||
"Currently GPU benchmarking on ONNX is not supported in SHARK."
|
||||
)
|
||||
return ["N/A", "N/A"]
|
||||
else:
|
||||
from onnxruntime.transformers.benchmark import run_onnxruntime
|
||||
from onnxruntime.transformers.huggingface_models import MODELS
|
||||
from onnxruntime.transformers.benchmark_helper import (
|
||||
ConfigModifier,
|
||||
Precision,
|
||||
)
|
||||
import psutil
|
||||
|
||||
if modelname == "microsoft/MiniLM-L12-H384-uncased":
|
||||
modelname = "bert-base-uncased"
|
||||
if modelname not in MODELS:
|
||||
print(
|
||||
f"{modelname} is currently not supported in ORT's HF. Check \
|
||||
https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/huggingface_models.py \
|
||||
for currently supported models. Exiting benchmark ONNX."
|
||||
)
|
||||
return ["N/A", "N/A"]
|
||||
use_gpu = self.device == "gpu"
|
||||
num_threads = psutil.cpu_count(logical=False)
|
||||
batch_sizes = [1]
|
||||
sequence_lengths = [128]
|
||||
cache_dir = os.path.join(".", "cache_models")
|
||||
onnx_dir = os.path.join(".", "onnx_models")
|
||||
verbose = False
|
||||
input_counts = [1]
|
||||
optimize_onnx = True
|
||||
validate_onnx = False
|
||||
disable_ort_io_binding = False
|
||||
use_raw_attention_mask = True
|
||||
model_fusion_statistics = {}
|
||||
overwrite = False
|
||||
model_source = "pt" # Either "pt" or "tf"
|
||||
provider = None
|
||||
config_modifier = ConfigModifier(None)
|
||||
onnx_args = OnnxFusionOptions()
|
||||
result = run_onnxruntime(
|
||||
use_gpu,
|
||||
provider,
|
||||
(modelname,),
|
||||
None,
|
||||
config_modifier,
|
||||
Precision.FLOAT32,
|
||||
num_threads,
|
||||
batch_sizes,
|
||||
sequence_lengths,
|
||||
shark_args.num_iterations,
|
||||
input_counts,
|
||||
optimize_onnx,
|
||||
validate_onnx,
|
||||
cache_dir,
|
||||
onnx_dir,
|
||||
verbose,
|
||||
overwrite,
|
||||
disable_ort_io_binding,
|
||||
use_raw_attention_mask,
|
||||
model_fusion_statistics,
|
||||
model_source,
|
||||
onnx_args,
|
||||
)
|
||||
print(
|
||||
f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
return [
|
||||
result[0]["QPS"],
|
||||
result[0]["average_latency_ms"],
|
||||
]
|
||||
def benchmark_all(self, inputs: tuple):
|
||||
self.benchmark_frontend(inputs)
|
||||
self.benchmark_python(inputs)
|
||||
self.benchmark_c()
|
||||
|
||||
def benchmark_all_csv(
|
||||
self, inputs: tuple, modelname, dynamic, device_str, frontend
|
||||
):
|
||||
self.setup_cl(inputs)
|
||||
field_names = [
|
||||
"platform",
|
||||
"model",
|
||||
"engine",
|
||||
"dynamic",
|
||||
"dialect",
|
||||
"device",
|
||||
"iter/sec",
|
||||
"ms/iter",
|
||||
"iterations",
|
||||
"datetime",
|
||||
]
|
||||
engines = ["frontend", "shark_python", "shark_iree_c"]
|
||||
if shark_args.onnx_bench == True:
|
||||
engines.append("onnxruntime")
|
||||
platforms = ["frontend", "shark_python", "shark_iree_c"]
|
||||
|
||||
if not os.path.exists("bench_results.csv"):
|
||||
with open("bench_results.csv", mode="w", newline="") as f:
|
||||
@@ -269,33 +173,22 @@ for currently supported models. Exiting benchmark ONNX."
|
||||
else:
|
||||
bench_result["dynamic"] = "False"
|
||||
bench_result["device"] = device_str
|
||||
for e in engines:
|
||||
if e == "frontend":
|
||||
bench_result["engine"] = frontend
|
||||
(
|
||||
bench_result["iter/sec"],
|
||||
bench_result["ms/iter"],
|
||||
) = self.benchmark_frontend(modelname)
|
||||
elif e == "shark_python":
|
||||
bench_result["engine"] = "shark_python"
|
||||
(
|
||||
bench_result["iter/sec"],
|
||||
bench_result["ms/iter"],
|
||||
) = self.benchmark_python(inputs)
|
||||
elif e == "shark_iree_c":
|
||||
bench_result["engine"] = "shark_iree_c"
|
||||
(
|
||||
bench_result["iter/sec"],
|
||||
bench_result["ms/iter"],
|
||||
) = self.benchmark_c()
|
||||
elif e == "onnxruntime":
|
||||
bench_result["engine"] = "onnxruntime"
|
||||
(
|
||||
bench_result["iter/sec"],
|
||||
bench_result["ms/iter"],
|
||||
) = self.benchmark_onnx(modelname, inputs)
|
||||
|
||||
bench_result["dialect"] = self.mlir_dialect
|
||||
bench_result["iterations"] = shark_args.num_iterations
|
||||
for p in platforms:
|
||||
if p == "frontend":
|
||||
bench_result["platform"] = frontend
|
||||
bench_result["iter/sec"] = self.benchmark_frontend(
|
||||
inputs, modelname
|
||||
)[0]
|
||||
bench_result["ms/iter"] = self.benchmark_frontend(
|
||||
inputs, modelname
|
||||
)[1]
|
||||
elif p == "shark_python":
|
||||
bench_result["platform"] = "shark_python"
|
||||
bench_result["iter/sec"] = self.benchmark_python(inputs)[0]
|
||||
bench_result["ms/iter"] = self.benchmark_python(inputs)[1]
|
||||
else:
|
||||
bench_result["platform"] = "shark_iree_c"
|
||||
bench_result["iter/sec"] = self.benchmark_c()[0]
|
||||
bench_result["ms/iter"] = self.benchmark_c()[1]
|
||||
bench_result["datetime"] = str(datetime.now())
|
||||
writer.writerow(bench_result)
|
||||
|
||||
@@ -29,8 +29,6 @@ input_type_to_np_dtype = {
|
||||
"int8": np.int8,
|
||||
}
|
||||
|
||||
# default hash is updated when nightly populate_sharktank_ci is successful
|
||||
shark_default_sha = "latest"
|
||||
|
||||
# Save the model in the home local so it needn't be fetched everytime in the CI.
|
||||
home = str(Path.home())
|
||||
@@ -80,8 +78,7 @@ def download_torch_model(model_name, dynamic=False):
|
||||
|
||||
def gs_download_model():
|
||||
gs_command = (
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp -r gs://shark_tank/'
|
||||
+ shark_default_sha
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp -r gs://shark_tank'
|
||||
+ "/"
|
||||
+ model_dir_name
|
||||
+ " "
|
||||
@@ -96,8 +93,7 @@ def download_torch_model(model_name, dynamic=False):
|
||||
model_dir = os.path.join(WORKDIR, model_dir_name)
|
||||
local_hash = str(np.load(os.path.join(model_dir, "hash.npy")))
|
||||
gs_hash = (
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp gs://shark_tank/'
|
||||
+ shark_default_sha
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp gs://shark_tank'
|
||||
+ "/"
|
||||
+ model_dir_name
|
||||
+ "/hash.npy"
|
||||
@@ -135,8 +131,7 @@ def download_tflite_model(model_name, dynamic=False):
|
||||
|
||||
def gs_download_model():
|
||||
gs_command = (
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp -r gs://shark_tank/'
|
||||
+ shark_default_sha
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp -r gs://shark_tank'
|
||||
+ "/"
|
||||
+ model_dir_name
|
||||
+ " "
|
||||
@@ -153,8 +148,7 @@ def download_tflite_model(model_name, dynamic=False):
|
||||
model_dir = os.path.join(WORKDIR, model_dir_name)
|
||||
local_hash = str(np.load(os.path.join(model_dir, "hash.npy")))
|
||||
gs_hash = (
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp gs://shark_tank/'
|
||||
+ shark_default_sha
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp gs://shark_tank'
|
||||
+ "/"
|
||||
+ model_dir_name
|
||||
+ "/hash.npy"
|
||||
@@ -191,8 +185,7 @@ def download_tf_model(model_name):
|
||||
|
||||
def gs_download_model():
|
||||
gs_command = (
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp -r gs://shark_tank/'
|
||||
+ shark_default_sha
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp -r gs://shark_tank'
|
||||
+ "/"
|
||||
+ model_dir_name
|
||||
+ " "
|
||||
@@ -207,8 +200,7 @@ def download_tf_model(model_name):
|
||||
model_dir = os.path.join(WORKDIR, model_dir_name)
|
||||
local_hash = str(np.load(os.path.join(model_dir, "hash.npy")))
|
||||
gs_hash = (
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp gs://shark_tank/'
|
||||
+ shark_default_sha
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp gs://shark_tank'
|
||||
+ "/"
|
||||
+ model_dir_name
|
||||
+ "/hash.npy"
|
||||
|
||||
@@ -199,11 +199,9 @@ class SharkImporter:
|
||||
)
|
||||
elif golden_out is tuple:
|
||||
golden_out = self.convert_to_numpy(golden_out)
|
||||
elif hasattr(golden_out, "logits"):
|
||||
else:
|
||||
# from transformers import TFSequenceClassifierOutput
|
||||
golden_out = golden_out.logits
|
||||
else:
|
||||
golden_out = golden_out.last_hidden_state
|
||||
# Save the artifacts in the directory dir.
|
||||
self.save_data(
|
||||
dir,
|
||||
|
||||
@@ -12,9 +12,26 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import torch
|
||||
import io
|
||||
import pickle
|
||||
|
||||
from torch_mlir.dialects.torch.importer.jit_ir import (
|
||||
ClassAnnotator,
|
||||
ModuleBuilder,
|
||||
)
|
||||
from torch_mlir_e2e_test.torchscript.serialization import (
|
||||
extract_serializable_annotations,
|
||||
apply_serializable_annotations,
|
||||
SerializableTest,
|
||||
)
|
||||
|
||||
from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend
|
||||
|
||||
from torch_mlir.passmanager import PassManager
|
||||
from torch_mlir_e2e_test.torchscript.annotations import annotate_args, export
|
||||
from torch_mlir.ir import StringAttr
|
||||
import torch_mlir
|
||||
from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend
|
||||
|
||||
|
||||
def get_module_name_for_asm_dump(module):
|
||||
@@ -28,6 +45,22 @@ def get_module_name_for_asm_dump(module):
|
||||
).value
|
||||
|
||||
|
||||
def get_input_annotations(inputs: tuple, dynamic: bool) -> list:
|
||||
"""TODO: Include necessary documentation"""
|
||||
|
||||
annotations_list = [None]
|
||||
for i in inputs:
|
||||
temp_list = []
|
||||
if dynamic:
|
||||
temp_list.append([-1 for i in range(len(i.shape))])
|
||||
else:
|
||||
temp_list.append(list(i.shape))
|
||||
temp_list.append(i.dtype)
|
||||
temp_list.append(True)
|
||||
annotations_list.append(tuple(temp_list))
|
||||
return annotations_list
|
||||
|
||||
|
||||
def run_on_refbackend(torch_module, inputs):
|
||||
backend = refbackend.RefBackendLinalgOnTensorsBackend()
|
||||
compiled = backend.compile(torch_module)
|
||||
@@ -36,16 +69,42 @@ def run_on_refbackend(torch_module, inputs):
|
||||
return jit_module.forward(np_inputs[0])
|
||||
|
||||
|
||||
# Creates dynamic dims for all dims.
|
||||
# TODO: Pass user specified dynamic dims.
|
||||
def create_dynamic_placeholders(inputs):
|
||||
placeholders = []
|
||||
for inp in inputs:
|
||||
placeholder = torch_mlir.TensorPlaceholder.like(
|
||||
inp, dynamic_axes=[i for i in range(len(inp.shape))]
|
||||
)
|
||||
placeholders.append(placeholder)
|
||||
return tuple(placeholders)
|
||||
def shark_jit_trace(
|
||||
module, input: tuple, dynamic: bool, tracing_required: bool
|
||||
):
|
||||
"""TODO: Include necessary documentation."""
|
||||
|
||||
if not tracing_required:
|
||||
return torch.jit.script(module)
|
||||
|
||||
traced_module = torch.jit.trace_module(module, {"forward": input})
|
||||
actual_script = traced_module._actual_script_module
|
||||
export(actual_script.forward)
|
||||
annotate_args_decorator = annotate_args(
|
||||
get_input_annotations(input, dynamic)
|
||||
)
|
||||
annotate_args_decorator(actual_script.forward)
|
||||
module = torch.jit.script(actual_script)
|
||||
|
||||
# TODO: remove saved annotations.pickle
|
||||
torchscript_module_bytes = module.save_to_buffer(
|
||||
{
|
||||
"annotations.pkl": pickle.dumps(
|
||||
extract_serializable_annotations(module)
|
||||
)
|
||||
}
|
||||
)
|
||||
serializable_test = SerializableTest(
|
||||
unique_name="", program=torchscript_module_bytes, trace=None
|
||||
)
|
||||
_extra_files = {"annotations.pkl": ""}
|
||||
module = torch.jit.load(
|
||||
io.BytesIO(serializable_test.program), _extra_files=_extra_files
|
||||
)
|
||||
# Load the pickled annotations.
|
||||
annotations = pickle.loads(_extra_files["annotations.pkl"])
|
||||
apply_serializable_annotations(module, annotations)
|
||||
return module
|
||||
|
||||
|
||||
def get_torch_mlir_module(
|
||||
@@ -55,18 +114,39 @@ def get_torch_mlir_module(
|
||||
jit_trace: bool,
|
||||
from_torchscript: bool = False,
|
||||
):
|
||||
"""Get the MLIR's linalg-on-tensors module from torchscipt module."""
|
||||
ignore_traced_shapes = False
|
||||
if dynamic:
|
||||
input = create_dynamic_placeholders(input)
|
||||
if jit_trace:
|
||||
ignore_traced_shapes = True
|
||||
"""TODO: Include necessary documentation."""
|
||||
|
||||
module = torch_mlir.compile(
|
||||
module,
|
||||
input,
|
||||
output_type=torch_mlir.OutputType.LINALG_ON_TENSORS,
|
||||
use_tracing=jit_trace,
|
||||
ignore_traced_shapes=ignore_traced_shapes,
|
||||
# Static modules compiles well with the torch_mlir.compile API.
|
||||
# We will always jit_trace = True with the API since we always
|
||||
# want to propagate static shapes.
|
||||
if not dynamic:
|
||||
module = torch_mlir.compile(
|
||||
module,
|
||||
input,
|
||||
output_type=torch_mlir.OutputType.LINALG_ON_TENSORS,
|
||||
use_tracing=jit_trace,
|
||||
)
|
||||
return module
|
||||
|
||||
# Tracing is not required from the aot_module.
|
||||
if not from_torchscript:
|
||||
module = shark_jit_trace(module, input, dynamic, jit_trace)
|
||||
|
||||
mb = ModuleBuilder()
|
||||
class_annotator = ClassAnnotator()
|
||||
class_annotator.exportNone(module._c._type())
|
||||
class_annotator.exportPath(module._c._type(), ["forward"])
|
||||
class_annotator.annotateArgs(
|
||||
module._c._type(),
|
||||
["forward"],
|
||||
get_input_annotations(input, dynamic),
|
||||
)
|
||||
return module
|
||||
mb.import_module(module._c, class_annotator)
|
||||
|
||||
with mb.module.context:
|
||||
pm = PassManager.parse(
|
||||
"torchscript-module-to-torch-backend-pipeline,torch-backend-to-linalg-on-tensors-backend-pipeline"
|
||||
)
|
||||
pm.run(mb.module)
|
||||
|
||||
return mb.module
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
from shark.parser import shark_args
|
||||
|
||||
import iree.compiler as ireec
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class MiniLMModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
onnx_bench=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
self.onnx_bench = onnx_bench
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"microsoft/MiniLM-L12-H384-uncased"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="mhlo",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
if self.benchmark == True:
|
||||
shark_args.enable_tf32 = True
|
||||
shark_module.compile()
|
||||
shark_args.onnx_bench = self.onnx_bench
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(inputs),
|
||||
"microsoft/MiniLM-L12-H384-uncased",
|
||||
dynamic,
|
||||
device,
|
||||
"tensorflow",
|
||||
)
|
||||
shark_args.enable_tf32 = False
|
||||
rtol = 1e-01
|
||||
atol = 1e-02
|
||||
|
||||
else:
|
||||
shark_module.compile()
|
||||
rtol = 1e-02
|
||||
atol = 1e-03
|
||||
|
||||
# TODO: Remove catch once new MiniLM stable
|
||||
try:
|
||||
result = shark_module.forward(inputs)[0][1].to_host()
|
||||
|
||||
except:
|
||||
result = shark_module.forward(inputs)
|
||||
|
||||
np.testing.assert_allclose(golden_out, result, rtol=rtol, atol=atol)
|
||||
|
||||
|
||||
class MiniLMModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = MiniLMModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -2,7 +2,6 @@ from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import compare_tensors
|
||||
from shark.shark_downloader import download_torch_model
|
||||
from shark.parser import shark_args
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
@@ -13,15 +12,24 @@ class MiniLMModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
onnx_bench=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
self.onnx_bench = onnx_bench
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"microsoft/MiniLM-L12-H384-uncased", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
@@ -29,10 +37,11 @@ class MiniLMModuleTester:
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_args.enable_tf32 = True
|
||||
shark_module.compile()
|
||||
shark_args.onnx_bench = self.onnx_bench
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"microsoft/MiniLM-L12-H384-uncased",
|
||||
@@ -40,16 +49,6 @@ class MiniLMModuleTester:
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
shark_args.enable_tf32 = False
|
||||
rtol = 1e-01
|
||||
atol = 1e-02
|
||||
else:
|
||||
shark_module.compile()
|
||||
rtol = 1e-02
|
||||
atol = 1e-03
|
||||
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results, rtol, atol)
|
||||
|
||||
|
||||
class MiniLMModuleTest(unittest.TestCase):
|
||||
@@ -57,7 +56,6 @@ class MiniLMModuleTest(unittest.TestCase):
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = MiniLMModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
@@ -100,14 +98,6 @@ class MiniLMModuleTest(unittest.TestCase):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
To run the fine tuning example, from the root SHARK directory, run:
|
||||
|
||||
```shell
|
||||
IMPORTER=1 ./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
pip install jupyter tf-models-nightly tf-datasets
|
||||
jupyter-notebook
|
||||
```
|
||||
if running from a google vm, you can view jupyter notebooks on your local system with:
|
||||
```shell
|
||||
gcloud compute ssh <YOUR_INSTANCE_DETAILS> --ssh-flag="-N -L localhost:8888:localhost:8888"
|
||||
```
|
||||
|
||||
@@ -55,15 +55,6 @@ class AlbertBaseModuleTest(unittest.TestCase):
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -99,15 +99,6 @@ class AlbertModuleTest(unittest.TestCase):
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -101,14 +101,6 @@ class AlexnetModuleTest(unittest.TestCase):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,117 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import compare_tensors
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import torch
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class BertBaseUncasedModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
benchmark=False,
|
||||
):
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"bert-base-cased", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"bert-base-cased",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class BertBaseUncasedModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = BertBaseUncasedModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,8 +1,8 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
from shark.parser import shark_args
|
||||
|
||||
import iree.compiler as ireec
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
@@ -12,10 +12,8 @@ class BertBaseUncasedModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
onnx_bench=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
self.onnx_bench = onnx_bench
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
@@ -35,7 +33,6 @@ class BertBaseUncasedModuleTest(unittest.TestCase):
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = BertBaseUncasedModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
@@ -57,14 +54,6 @@ class BertBaseUncasedModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -2,7 +2,6 @@ from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import compare_tensors
|
||||
from shark.shark_downloader import download_torch_model
|
||||
from shark.parser import shark_args
|
||||
|
||||
import torch
|
||||
import unittest
|
||||
@@ -13,17 +12,29 @@ import pytest
|
||||
class BertBaseUncasedModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
benchmark=False,
|
||||
onnx_bench=False,
|
||||
):
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
self.benchmark = benchmark
|
||||
self.onnx_bench = onnx_bench
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"bert-base-uncased", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
@@ -36,7 +47,6 @@ class BertBaseUncasedModuleTester:
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_args.onnx_bench = self.onnx_bench
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"bert-base-uncased",
|
||||
@@ -51,7 +61,6 @@ class BertBaseUncasedModuleTest(unittest.TestCase):
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = BertBaseUncasedModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
@@ -94,14 +103,6 @@ class BertBaseUncasedModuleTest(unittest.TestCase):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,182 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from iree import runtime as ireert
|
||||
from iree.tf.support import module_utils
|
||||
from iree.compiler import tf as tfc
|
||||
from iree.compiler import compile_str
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
try:
|
||||
import tensorflow_datasets as tfds
|
||||
import tensorflow_models as tfm
|
||||
from official.nlp.modeling import layers
|
||||
from official.nlp.modeling import networks
|
||||
from official.nlp.modeling.models import bert_classifier
|
||||
except ModuleNotFoundError:
|
||||
print(
|
||||
"tensorflow models or datasets not found please run the following command with your virtual env active:\npip install tf-models-nightly tf-datasets"
|
||||
)
|
||||
import json
|
||||
import time
|
||||
import os
|
||||
|
||||
gs_folder_bert = "gs://cloud-tpu-checkpoints/bert/v3/uncased_L-12_H-768_A-12"
|
||||
tf.io.gfile.listdir(gs_folder_bert)
|
||||
vocab_size = 100
|
||||
NUM_CLASSES = 2
|
||||
SEQUENCE_LENGTH = 128
|
||||
BATCH_SIZE = 1
|
||||
# Create a set of 2-dimensional inputs
|
||||
bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
]
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
dict_outputs = False
|
||||
|
||||
bert_config_file = os.path.join(gs_folder_bert, "bert_config.json")
|
||||
|
||||
config_dict = json.loads(tf.io.gfile.GFile(bert_config_file).read())
|
||||
encoder_config = tfm.nlp.encoders.EncoderConfig(
|
||||
{"type": "bert", "bert": config_dict}
|
||||
)
|
||||
bert_encoder = tfm.nlp.encoders.build_encoder(encoder_config)
|
||||
|
||||
# Create a BERT trainer with the created network.
|
||||
bert_trainer_model = bert_classifier.BertClassifier(
|
||||
bert_encoder, num_classes=NUM_CLASSES
|
||||
)
|
||||
bert_trainer_model.summary()
|
||||
checkpoint = tf.train.Checkpoint(encoder=bert_encoder)
|
||||
checkpoint.read(
|
||||
os.path.join(gs_folder_bert, "bert_model.ckpt")
|
||||
).assert_consumed()
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m = bert_trainer_model
|
||||
self.m.predict = lambda x: self.m.call(x, training=False)
|
||||
self.predict = tf.function(input_signature=[bert_input])(
|
||||
self.m.predict
|
||||
)
|
||||
self.m.learn = lambda x, y: self.m.call(x, training=False)
|
||||
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
|
||||
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
|
||||
|
||||
@tf.function(
|
||||
input_signature=[
|
||||
bert_input, # inputs
|
||||
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32), # labels
|
||||
]
|
||||
)
|
||||
def learn(self, inputs, labels):
|
||||
with tf.GradientTape() as tape:
|
||||
# Capture the gradients from forward prop...
|
||||
probs = self.m.call(inputs, training=True)
|
||||
loss = self.loss(labels, probs)
|
||||
|
||||
# ...and use them to update the model's weights.
|
||||
variables = self.m.trainable_variables
|
||||
gradients = tape.gradient(loss, variables)
|
||||
self.optimizer.apply_gradients(zip(gradients, variables))
|
||||
return loss
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
glue, info = tfds.load("glue/mrpc", with_info=True, batch_size=BATCH_SIZE)
|
||||
|
||||
tokenizer = tfm.nlp.layers.FastWordpieceBertTokenizer(
|
||||
vocab_file=os.path.join(gs_folder_bert, "vocab.txt"), lower_case=True
|
||||
)
|
||||
|
||||
max_seq_length = SEQUENCE_LENGTH
|
||||
|
||||
packer = tfm.nlp.layers.BertPackInputs(
|
||||
seq_length=max_seq_length,
|
||||
special_tokens_dict=tokenizer.get_special_tokens_dict(),
|
||||
)
|
||||
|
||||
class BertInputProcessor(tf.keras.layers.Layer):
|
||||
def __init__(self, tokenizer, packer):
|
||||
super().__init__()
|
||||
self.tokenizer = tokenizer
|
||||
self.packer = packer
|
||||
|
||||
def call(self, inputs):
|
||||
tok1 = self.tokenizer(inputs["sentence1"])
|
||||
tok2 = self.tokenizer(inputs["sentence2"])
|
||||
|
||||
packed = self.packer([tok1, tok2])
|
||||
|
||||
if "label" in inputs:
|
||||
return packed, inputs["label"]
|
||||
else:
|
||||
return packed
|
||||
|
||||
bert_inputs_processor = BertInputProcessor(tokenizer, packer)
|
||||
glue_train = glue["train"].map(bert_inputs_processor).prefetch(1)
|
||||
glue_validation = glue["validation"].map(bert_inputs_processor).prefetch(1)
|
||||
glue_test = glue["test"].map(bert_inputs_processor).prefetch(1)
|
||||
|
||||
# base tensorflow model
|
||||
bert_model = BertModule()
|
||||
|
||||
# Compile the model using IREE
|
||||
compiler_module = tfc.compile_module(
|
||||
bert_model, exported_names=["learn"], import_only=True
|
||||
)
|
||||
|
||||
# choose from dylib-llvm-aot or cuda
|
||||
backend = "dylib-llvm-aot"
|
||||
if backend == "dylib-llvm-aot":
|
||||
args = [
|
||||
"--iree-llvm-target-cpu-features=host",
|
||||
"--iree-mhlo-demote-i64-to-i32=false",
|
||||
"--iree-flow-demote-i64-to-i32",
|
||||
]
|
||||
backend_config = "dylib"
|
||||
|
||||
else:
|
||||
backend_config = "cuda"
|
||||
args = [
|
||||
"--iree-cuda-llvm-target-arch=sm_80",
|
||||
"--iree-hal-cuda-disable-loop-nounroll-wa",
|
||||
"--iree-enable-fusion-with-reduction-ops",
|
||||
]
|
||||
|
||||
flatbuffer_blob = compile_str(
|
||||
compiler_module,
|
||||
target_backends=[backend],
|
||||
extra_args=args,
|
||||
input_type="mhlo",
|
||||
)
|
||||
|
||||
# Save module as MLIR file in a directory
|
||||
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
|
||||
tracer = ireert.Tracer(os.getcwd())
|
||||
config = ireert.Config("local-sync", tracer)
|
||||
ctx = ireert.SystemContext(config=config)
|
||||
ctx.add_vm_module(vm_module)
|
||||
BertCompiled = ctx.modules.module
|
||||
|
||||
# compare output losses:
|
||||
|
||||
iterations = 10
|
||||
for i in range(iterations):
|
||||
example_inputs, example_labels = next(iter(glue_train))
|
||||
example_labels = tf.cast(example_labels, tf.int32)
|
||||
example_inputs = [value for key, value in example_inputs.items()]
|
||||
|
||||
# iree version
|
||||
iree_loss = BertCompiled.learn(
|
||||
example_inputs, example_labels
|
||||
).to_host()
|
||||
|
||||
# base tensorflow
|
||||
tf_loss = np.array(bert_model.learn(example_inputs, example_labels))
|
||||
print(np.allclose(iree_loss, tf_loss))
|
||||
@@ -54,14 +54,6 @@ class CamemBertModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -57,14 +57,6 @@ class ConvBertModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -58,14 +58,6 @@ class DebertaBaseModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -34,13 +34,11 @@ class DistilBertModuleTest(unittest.TestCase):
|
||||
self.module_tester = DistilBertModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
@pytest.mark.xfail(reason="shark_tank hash issues -- awaiting triage")
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="shark_tank hash issues -- awaiting triage")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
@@ -49,7 +47,6 @@ class DistilBertModuleTest(unittest.TestCase):
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="shark_tank hash issues -- awaiting triage")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
@@ -57,14 +54,6 @@ class DistilBertModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -113,15 +113,6 @@ class DistilBertModuleTest(unittest.TestCase):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
# @pytest.mark.skip(reason="DistilBert needs to be uploaded to cloud.")
|
||||
# @pytest.mark.skipif(
|
||||
# check_device_drivers("intel-gpu"),
|
||||
# reason=device_driver_info("intel-gpu"),
|
||||
# )
|
||||
# def test_module_static_intel_gpu(self):
|
||||
# dynamic = False
|
||||
# device = "intel-gpu"
|
||||
# self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -54,14 +54,6 @@ class ElectraModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -63,14 +63,6 @@ class ConvNextTinyModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
# @pytest.mark.skipif(
|
||||
# check_device_drivers("intel-gpu"),
|
||||
# reason=device_driver_info("intel-gpu"),
|
||||
# )
|
||||
# def test_module_static_intel_gpu(self):
|
||||
# dynamic = False
|
||||
# device = "intel-gpu"
|
||||
# self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -60,14 +60,6 @@ class FunnelModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
# @pytest.mark.skipif(
|
||||
# check_device_drivers("intel-gpu"),
|
||||
# reason=device_driver_info("intel-gpu"),
|
||||
# )
|
||||
# def test_module_static_intel_gpu(self):
|
||||
# dynamic = False
|
||||
# device = "intel-gpu"
|
||||
# self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -60,14 +60,6 @@ class VitBaseModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
# @pytest.mark.skipif(
|
||||
# check_device_drivers("intel-gpu"),
|
||||
# reason=device_driver_info("intel-gpu"),
|
||||
# )
|
||||
# def test_module_static_intel_gpu(self):
|
||||
# dynamic = False
|
||||
# device = "intel-gpu"
|
||||
# self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -54,14 +54,6 @@ class LayoutLMModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -55,14 +55,6 @@ class LongformerModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,114 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class MobileNetV3ModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"mobilenet_v3_small", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
np.testing.assert_allclose(act_out, results, rtol=1e-02, atol=1e-03)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"alexnet",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class MobileNetV3ModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = MobileNetV3ModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="golden results don't match.")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="golden results don't match.")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="stuck in the pipeline.")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,5 +1,4 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.parser import shark_args
|
||||
|
||||
import torch
|
||||
import numpy as np
|
||||
@@ -14,7 +13,6 @@ vision_models = [
|
||||
"resnet50",
|
||||
"squeezenet1_0",
|
||||
"wide_resnet50_2",
|
||||
"mobilenet_v3_small",
|
||||
]
|
||||
|
||||
|
||||
@@ -86,7 +84,6 @@ def get_vision_model(torch_model):
|
||||
"resnet101": models.resnet101(pretrained=True),
|
||||
"squeezenet1_0": models.squeezenet1_0(pretrained=True),
|
||||
"wide_resnet50_2": models.wide_resnet50_2(pretrained=True),
|
||||
"mobilenet_v3_small": models.mobilenet_v3_small(pretrained=True),
|
||||
}
|
||||
if isinstance(torch_model, str):
|
||||
torch_model = vision_models_dict[torch_model]
|
||||
@@ -99,6 +96,9 @@ def get_vision_model(torch_model):
|
||||
################################################################################
|
||||
|
||||
# Utility function for comparing two tensors (torch).
|
||||
def compare_tensors(torch_tensor, numpy_tensor, rtol=1e-02, atol=1e-03):
|
||||
def compare_tensors(torch_tensor, numpy_tensor):
|
||||
# setting the absolute and relative tolerance
|
||||
rtol = 1e-02
|
||||
atol = 1e-03
|
||||
# torch_to_numpy = torch_tensor.detach().numpy()
|
||||
return np.allclose(torch_tensor, numpy_tensor, rtol, atol)
|
||||
|
||||
@@ -16,50 +16,10 @@ except:
|
||||
# Invalid device or cannot modify virtual devices once initialized.
|
||||
pass
|
||||
|
||||
BATCH_SIZE = 1
|
||||
MAX_SEQUENCE_LENGTH = 128
|
||||
|
||||
################################## MHLO/TF models #########################################
|
||||
# TODO : Generate these lists or fetch model source from tank/tf/tf_model_list.csv
|
||||
keras_models = [
|
||||
"resnet50",
|
||||
]
|
||||
maskedlm_models = [
|
||||
"albert-base-v2",
|
||||
"bert-base-uncased",
|
||||
"camembert-base",
|
||||
"convbert-base-turkish-cased",
|
||||
"deberta-base",
|
||||
"distilbert-base-uncased",
|
||||
"electra-small-discriminator",
|
||||
"funnel-transformer",
|
||||
"layoutlm-base-uncased",
|
||||
"longformer-base-4096",
|
||||
"mobilebert-uncased",
|
||||
"mpnet-base",
|
||||
"rembert",
|
||||
"roberta-base",
|
||||
"tapas-base",
|
||||
"tiny-random-flaubert",
|
||||
"xlm-roberta",
|
||||
]
|
||||
tfhf_models = [
|
||||
"microsoft/MiniLM-L12-H384-uncased",
|
||||
]
|
||||
|
||||
|
||||
def get_tf_model(name):
|
||||
if name in keras_models:
|
||||
return get_keras_model(name)
|
||||
elif name in maskedlm_models:
|
||||
return get_causal_lm_model(name)
|
||||
elif name in tfhf_models:
|
||||
return get_TFhf_model(name)
|
||||
else:
|
||||
return get_causal_image_model(name)
|
||||
|
||||
|
||||
##################### Tensorflow Hugging Face LM Models ###################################
|
||||
MAX_SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
|
||||
# Create a set of 2-dimensional inputs
|
||||
tf_bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
@@ -85,6 +45,9 @@ class TFHuggingFaceLanguage(tf.Module):
|
||||
|
||||
|
||||
def get_TFhf_model(name):
|
||||
# gpus = tf.config.experimental.list_physical_devices("GPU")
|
||||
# for gpu in gpus:
|
||||
# tf.config.experimental.set_memory_growth(gpu, True)
|
||||
model = TFHuggingFaceLanguage(name)
|
||||
tokenizer = BertTokenizer.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased"
|
||||
@@ -122,8 +85,22 @@ def compare_tensors_tf(tf_tensor, numpy_tensor):
|
||||
from transformers import TFAutoModelForMaskedLM, AutoTokenizer
|
||||
import tensorflow as tf
|
||||
|
||||
visible_default = tf.config.list_physical_devices("GPU")
|
||||
try:
|
||||
tf.config.set_visible_devices([], "GPU")
|
||||
visible_devices = tf.config.get_visible_devices()
|
||||
for device in visible_devices:
|
||||
assert device.device_type != "GPU"
|
||||
except:
|
||||
# Invalid device or cannot modify virtual devices once initialized.
|
||||
pass
|
||||
|
||||
# The max_sequence_length is set small for testing purpose.
|
||||
BATCH_SIZE = 1
|
||||
MAX_SEQUENCE_LENGTH = 16
|
||||
|
||||
# Create a set of input signature.
|
||||
input_signature_maskedlm = [
|
||||
inputs_signature = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
]
|
||||
@@ -154,12 +131,15 @@ class MaskedLM(tf.Module):
|
||||
)
|
||||
self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)[0]
|
||||
|
||||
@tf.function(input_signature=input_signature_maskedlm)
|
||||
@tf.function(input_signature=inputs_signature)
|
||||
def forward(self, input_ids, attention_mask):
|
||||
return self.m.predict(input_ids, attention_mask)
|
||||
|
||||
|
||||
def get_causal_lm_model(hf_name, text="Hello, this is the default text."):
|
||||
# gpus = tf.config.experimental.list_physical_devices("GPU")
|
||||
# for gpu in gpus:
|
||||
# tf.config.experimental.set_memory_growth(gpu, True)
|
||||
model = MaskedLM(hf_name)
|
||||
encoded_input = preprocess_input(hf_name, text)
|
||||
test_input = (encoded_input["input_ids"], encoded_input["attention_mask"])
|
||||
@@ -167,59 +147,16 @@ def get_causal_lm_model(hf_name, text="Hello, this is the default text."):
|
||||
return model, test_input, actual_out
|
||||
|
||||
|
||||
##################### TensorFlow Keras Resnet Models #########################################################
|
||||
# Static shape, including batch size (1).
|
||||
# Can be dynamic once dynamic shape support is ready.
|
||||
INPUT_SHAPE = [1, 224, 224, 3]
|
||||
|
||||
tf_model = tf.keras.applications.resnet50.ResNet50(
|
||||
weights="imagenet", include_top=True, input_shape=tuple(INPUT_SHAPE[1:])
|
||||
)
|
||||
|
||||
|
||||
class ResNetModule(tf.Module):
|
||||
def __init__(self):
|
||||
super(ResNetModule, self).__init__()
|
||||
self.m = tf_model
|
||||
self.m.predict = lambda x: self.m.call(x, training=False)
|
||||
|
||||
@tf.function(input_signature=[tf.TensorSpec(INPUT_SHAPE, tf.float32)])
|
||||
def forward(self, inputs):
|
||||
return self.m.predict(inputs)
|
||||
|
||||
|
||||
def load_image(path_to_image):
|
||||
image = tf.io.read_file(path_to_image)
|
||||
image = tf.image.decode_image(image, channels=3)
|
||||
image = tf.image.resize(image, (224, 224))
|
||||
image = image[tf.newaxis, :]
|
||||
return image
|
||||
|
||||
|
||||
def get_keras_model(modelname):
|
||||
model = ResNetModule()
|
||||
content_path = tf.keras.utils.get_file(
|
||||
"YellowLabradorLooking_new.jpg",
|
||||
"https://storage.googleapis.com/download.tensorflow.org/example_images/YellowLabradorLooking_new.jpg",
|
||||
)
|
||||
content_image = load_image(content_path)
|
||||
input_tensor = tf.keras.applications.resnet50.preprocess_input(
|
||||
content_image
|
||||
)
|
||||
input_data = tf.expand_dims(input_tensor, 0)
|
||||
actual_out = model.forward(*input_data)
|
||||
return model, input_data, actual_out
|
||||
|
||||
|
||||
##################### Tensorflow Hugging Face Image Classification Models ###################################
|
||||
from transformers import TFAutoModelForImageClassification
|
||||
from transformers import ConvNextFeatureExtractor, ViTFeatureExtractor
|
||||
from transformers import BeitFeatureExtractor, AutoFeatureExtractor
|
||||
import tensorflow as tf
|
||||
from PIL import Image
|
||||
import requests
|
||||
|
||||
# Create a set of input signature.
|
||||
input_signature_img_cls = [
|
||||
inputs_signature = [
|
||||
tf.TensorSpec(shape=[1, 3, 224, 224], dtype=tf.float32),
|
||||
]
|
||||
|
||||
@@ -232,7 +169,7 @@ class AutoModelImageClassfication(tf.Module):
|
||||
)
|
||||
self.m.predict = lambda x: self.m(x)
|
||||
|
||||
@tf.function(input_signature=input_signature_img_cls)
|
||||
@tf.function(input_signature=inputs_signature)
|
||||
def forward(self, inputs):
|
||||
return self.m.predict(inputs)
|
||||
|
||||
|
||||
@@ -34,13 +34,11 @@ class MpNetModuleTest(unittest.TestCase):
|
||||
self.module_tester = MpNetModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
@pytest.mark.xfail(reason="https://github.com/nod-ai/SHARK/issues/203")
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="https://github.com/nod-ai/SHARK/issues/203")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
@@ -49,7 +47,6 @@ class MpNetModuleTest(unittest.TestCase):
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="https://github.com/nod-ai/SHARK/issues/203")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
@@ -57,14 +54,6 @@ class MpNetModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
# @pytest.mark.skipif(
|
||||
# check_device_drivers("intel-gpu"),
|
||||
# reason=device_driver_info("intel-gpu"),
|
||||
# )
|
||||
# def test_module_static_intel_gpu(self):
|
||||
# dynamic = False
|
||||
# device = "intel-gpu"
|
||||
# self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -2,7 +2,6 @@ model_name, use_tracing, model_type
|
||||
microsoft/MiniLM-L12-H384-uncased,True,hf
|
||||
albert-base-v2,True,hf
|
||||
bert-base-uncased,True,hf
|
||||
bert-base-cased,True,hf
|
||||
google/mobilebert-uncased,True,hf
|
||||
alexnet,False,vision
|
||||
resnet18,False,vision
|
||||
@@ -10,4 +9,3 @@ resnet50,False,vision
|
||||
resnet101,False,vision
|
||||
squeezenet1_0,False,vision
|
||||
wide_resnet50_2,False,vision
|
||||
mobilenet_v3_small,False,vision
|
||||
|
||||
|
@@ -17,17 +17,16 @@ python -m pip install --upgrade pip
|
||||
### Install v-diffusion model and its dependencies
|
||||
|
||||
```shell
|
||||
cd tank/pytorch/v_diffusion/
|
||||
Run the script setup_v_diffusion_pytorch.sh
|
||||
./setup_diffusion.sh
|
||||
```
|
||||
|
||||
### Run v-diffusion-pytorch model
|
||||
|
||||
```shell
|
||||
./v-diffusion-pytorch/cfg_sample.py "New York City, oil on canvas":5 -n 5 -bs 5
|
||||
./v-diffusion-pytorch/cfg_sample.py "the rise of consciousness":5 -n 5 -bs 5 --seed 0
|
||||
```
|
||||
|
||||
### Run the v-diffusion model via torch-mlir
|
||||
### Compile v-diffusion model via torch-mlir
|
||||
```shell
|
||||
./cfg_sample.py "New York City, oil on canvas":5 -n 1 -bs 1 --steps 2
|
||||
python v_diffusion.py 2> v_diffusion_ir.mlir
|
||||
```
|
||||
116
tank/pytorch/v_diffusion/cfg_sample.py
Normal file
116
tank/pytorch/v_diffusion/cfg_sample.py
Normal file
@@ -0,0 +1,116 @@
|
||||
import argparse
|
||||
import os
|
||||
from functools import partial
|
||||
|
||||
import clip
|
||||
import torch
|
||||
from torchvision import transforms
|
||||
from tqdm import trange
|
||||
|
||||
try:
|
||||
from diffusion import get_model, sampling, utils
|
||||
except ModuleNotFoundError:
|
||||
print(
|
||||
"You need to download v-diffusion source from https://github.com/crowsonkb/v-diffusion-pytorch"
|
||||
)
|
||||
raise
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
|
||||
def parse_prompt(prompt, default_weight=3.0):
|
||||
if prompt.startswith("http://") or prompt.startswith("https://"):
|
||||
vals = prompt.rsplit(":", 2)
|
||||
vals = [vals[0] + ":" + vals[1], *vals[2:]]
|
||||
else:
|
||||
vals = prompt.rsplit(":", 1)
|
||||
vals = vals + ["", default_weight][len(vals) :]
|
||||
return vals[0], float(vals[1])
|
||||
|
||||
|
||||
args = argparse.Namespace(
|
||||
prompts=["New York City, oil on canvas"],
|
||||
batch_size=1,
|
||||
device="cuda",
|
||||
model="cc12m_1_cfg",
|
||||
n=1,
|
||||
steps=10,
|
||||
)
|
||||
|
||||
device = torch.device(args.device)
|
||||
print("Using device:", device)
|
||||
|
||||
model = get_model(args.model)()
|
||||
_, side_y, side_x = model.shape
|
||||
checkpoint = f"{args.model}.pth"
|
||||
if os.path.exists(checkpoint):
|
||||
model.load_state_dict(torch.load(checkpoint, map_location="cpu"))
|
||||
|
||||
model = model.to(device).eval().requires_grad_(False)
|
||||
clip_model_name = (
|
||||
model.clip_model if hasattr(model, "clip_model") else "ViT-B/16"
|
||||
)
|
||||
clip_model = clip.load(clip_model_name, jit=False, device=device)[0]
|
||||
clip_model.eval().requires_grad_(False)
|
||||
normalize = transforms.Normalize(
|
||||
mean=[0.48145466, 0.4578275, 0.40821073],
|
||||
std=[0.26862954, 0.26130258, 0.27577711],
|
||||
)
|
||||
|
||||
zero_embed = torch.zeros([1, clip_model.visual.output_dim], device=device)
|
||||
target_embeds, weights = [zero_embed], []
|
||||
|
||||
txt, weight = parse_prompt(args.prompts[0])
|
||||
target_embeds.append(
|
||||
clip_model.encode_text(clip.tokenize(txt).to(device)).float()
|
||||
)
|
||||
weights.append(weight)
|
||||
|
||||
weights = torch.tensor([1 - sum(weights), *weights], device=device)
|
||||
|
||||
|
||||
def cfg_model_fn(model, x, t):
|
||||
n = x.shape[0]
|
||||
n_conds = len(target_embeds)
|
||||
x_in = x.repeat([n_conds, 1, 1, 1])
|
||||
t_in = t.repeat([n_conds])
|
||||
clip_embed_in = torch.cat([*target_embeds]).repeat_interleave(n, 0)
|
||||
vs = model(x_in, t_in, clip_embed_in).view([n_conds, n, *x.shape[1:]])
|
||||
v = vs.mul(weights[:, None, None, None, None]).sum(0)
|
||||
return v
|
||||
|
||||
|
||||
x = torch.randn([args.n, 3, side_y, side_x], device=device)
|
||||
t = torch.linspace(1, 0, args.steps + 1, device=device)[:-1]
|
||||
|
||||
|
||||
def repro(model):
|
||||
if device.type == "cuda":
|
||||
model = model.half()
|
||||
|
||||
steps = utils.get_spliced_ddpm_cosine_schedule(t)
|
||||
for i in trange(0, args.n, args.batch_size):
|
||||
cur_batch_size = min(args.n - i, args.batch_size)
|
||||
outs = sampling.plms_sample(
|
||||
partial(cfg_model_fn, model), x[i : i + cur_batch_size], steps, {}
|
||||
)
|
||||
for j, out in enumerate(outs):
|
||||
utils.to_pil_image(out).save(f"out_{i + j:05}.png")
|
||||
|
||||
|
||||
def trace(model, x, t):
|
||||
n = x.shape[0]
|
||||
n_conds = len(target_embeds)
|
||||
x_in = x.repeat([n_conds, 1, 1, 1])
|
||||
t_in = t.repeat([n_conds])
|
||||
clip_embed_in = torch.cat([*target_embeds]).repeat_interleave(n, 0)
|
||||
ts_mod = torch.jit.trace(model, (x_in, t_in, clip_embed_in))
|
||||
print(ts_mod.graph)
|
||||
|
||||
clip_model = clip.load(clip_model_name, jit=True, device=device)[0]
|
||||
print(clip_model.graph)
|
||||
|
||||
|
||||
# You can't run both of these because repro will `.half()` the model
|
||||
# repro(model)
|
||||
trace(model, x, t[0])
|
||||
BIN
tank/pytorch/v_diffusion/out_00000.png
Normal file
BIN
tank/pytorch/v_diffusion/out_00000.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 145 KiB |
12
tank/pytorch/v_diffusion_pytorch/setup_v_diffusion_pytorch.sh → tank/pytorch/v_diffusion/setup_v_diffusion.sh
Normal file → Executable file
12
tank/pytorch/v_diffusion_pytorch/setup_v_diffusion_pytorch.sh → tank/pytorch/v_diffusion/setup_v_diffusion.sh
Normal file → Executable file
@@ -14,9 +14,11 @@ echo "Python: $PYTHON"
|
||||
echo "Python version: $PYTHON_VERSION_X_Y"
|
||||
|
||||
git clone --recursive https://github.com/crowsonkb/v-diffusion-pytorch.git
|
||||
pip install ftfy regex tqdm
|
||||
pip uninstall -y torch torchvision
|
||||
pip install -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html --pre torch torchvision
|
||||
|
||||
mkdir checkpoints
|
||||
wget https://the-eye.eu/public/AI/models/v-diffusion/cc12m_1_cfg.pth -P checkpoints/
|
||||
pip install -r v-diffusion-pytorch/requirements.txt
|
||||
pip install ftfy regex tqdm
|
||||
pip install git+https://github.com/openai/CLIP.git
|
||||
|
||||
|
||||
mkdir v-diffusion-pytorch/checkpoints
|
||||
wget https://the-eye.eu/public/AI/models/v-diffusion/cc12m_1_cfg.pth -P v-diffusion-pytorch/checkpoints/
|
||||
86
tank/pytorch/v_diffusion/v_diffusion.py
Normal file
86
tank/pytorch/v_diffusion/v_diffusion.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# # See https://llvm.org/LICENSE.txt for license information.
|
||||
# # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
# # Also available under a BSD-style license. See LICENSE.
|
||||
|
||||
import torch
|
||||
|
||||
from torch.fx.experimental.proxy_tensor import make_fx
|
||||
from torch._decomp import get_decompositions
|
||||
import tempfile
|
||||
|
||||
import math
|
||||
import sys
|
||||
import gc
|
||||
|
||||
from torchvision import utils as tv_utils
|
||||
from torchvision.transforms import functional as TF
|
||||
from tqdm.notebook import trange, tqdm
|
||||
|
||||
sys.path.append("v-diffusion-pytorch")
|
||||
|
||||
import clip
|
||||
from diffusion import get_model, sampling, utils
|
||||
import torch_mlir
|
||||
|
||||
|
||||
# Load the models
|
||||
model = get_model("cc12m_1_cfg")()
|
||||
_, side_y, side_x = model.shape
|
||||
model = model.eval().requires_grad_(False)
|
||||
clip_model = clip.load(model.clip_model, jit=True, device="cpu")[0]
|
||||
|
||||
prompt = "New York City, oil on canvas"
|
||||
|
||||
weight = 1
|
||||
n_images = 1
|
||||
steps = 2
|
||||
|
||||
target_embed = clip_model.encode_text(clip.tokenize(prompt))
|
||||
x = torch.randn([n_images, 3, side_y, side_x], device="cpu")
|
||||
t = torch.linspace(1, 0, steps + 1, device="cpu")[:-1]
|
||||
|
||||
n = x.shape[0]
|
||||
x_in = x.repeat([2, 1, 1, 1])
|
||||
t_in = t
|
||||
clip_embed_repeat = target_embed.repeat([n, 1])
|
||||
clip_embed_in = torch.cat(
|
||||
[torch.zeros_like(clip_embed_repeat), clip_embed_repeat]
|
||||
)
|
||||
|
||||
|
||||
def model_inference(x_in, t_in, clip_embed_in):
|
||||
return model(x_in, t_in, clip_embed_in)
|
||||
|
||||
|
||||
fx_g = make_fx(
|
||||
model_inference,
|
||||
decomposition_table=get_decompositions(
|
||||
[
|
||||
torch.ops.aten.embedding_dense_backward,
|
||||
torch.ops.aten.native_layer_norm_backward,
|
||||
torch.ops.aten.slice_backward,
|
||||
torch.ops.aten.select_backward,
|
||||
torch.ops.aten.norm.ScalarOpt_dim,
|
||||
torch.ops.aten.native_group_norm,
|
||||
torch.ops.aten.upsample_bilinear2d.vec,
|
||||
torch.ops.aten.split.Tensor,
|
||||
]
|
||||
),
|
||||
)(x_in, t_in, clip_embed_in)
|
||||
|
||||
fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
|
||||
fx_g.recompile()
|
||||
|
||||
ts_g = torch.jit.trace(fx_g, (x_in, t_in, clip_embed_in))
|
||||
temp = tempfile.NamedTemporaryFile(suffix="_shark_ts", prefix="temp_ts_")
|
||||
ts_g.save(temp.name)
|
||||
new_ts = torch.jit.load(temp.name)
|
||||
|
||||
module = torch_mlir.compile(
|
||||
new_ts,
|
||||
[x_in, t_in, clip_embed_in],
|
||||
torch_mlir.OutputType.LINALG_ON_TENSORS,
|
||||
use_tracing=False,
|
||||
)
|
||||
module.dump()
|
||||
@@ -1,283 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""Classifier-free guidance sampling from a diffusion model."""
|
||||
|
||||
import argparse
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
|
||||
from PIL import Image
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
from torchvision import transforms
|
||||
from torchvision.transforms import functional as TF
|
||||
from tqdm import trange
|
||||
|
||||
from shark.shark_inference import SharkInference
|
||||
|
||||
import sys
|
||||
|
||||
sys.path.append("v-diffusion-pytorch")
|
||||
from CLIP import clip
|
||||
from diffusion import get_model, get_models, sampling, utils
|
||||
|
||||
MODULE_DIR = Path(__file__).resolve().parent
|
||||
|
||||
|
||||
def parse_prompt(prompt, default_weight=3.0):
|
||||
if prompt.startswith("http://") or prompt.startswith("https://"):
|
||||
vals = prompt.rsplit(":", 2)
|
||||
vals = [vals[0] + ":" + vals[1], *vals[2:]]
|
||||
else:
|
||||
vals = prompt.rsplit(":", 1)
|
||||
vals = vals + ["", default_weight][len(vals) :]
|
||||
return vals[0], float(vals[1])
|
||||
|
||||
|
||||
def resize_and_center_crop(image, size):
|
||||
fac = max(size[0] / image.size[0], size[1] / image.size[1])
|
||||
image = image.resize(
|
||||
(int(fac * image.size[0]), int(fac * image.size[1])), Image.LANCZOS
|
||||
)
|
||||
return TF.center_crop(image, size[::-1])
|
||||
|
||||
|
||||
# def main():
|
||||
p = argparse.ArgumentParser(
|
||||
description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
p.add_argument(
|
||||
"prompts", type=str, default=[], nargs="*", help="the text prompts to use"
|
||||
)
|
||||
p.add_argument(
|
||||
"--images",
|
||||
type=str,
|
||||
default=[],
|
||||
nargs="*",
|
||||
metavar="IMAGE",
|
||||
help="the image prompts",
|
||||
)
|
||||
p.add_argument(
|
||||
"--batch-size",
|
||||
"-bs",
|
||||
type=int,
|
||||
default=1,
|
||||
help="the number of images per batch",
|
||||
)
|
||||
p.add_argument("--checkpoint", type=str, help="the checkpoint to use")
|
||||
p.add_argument("--device", type=str, help="the device to use")
|
||||
p.add_argument(
|
||||
"--eta",
|
||||
type=float,
|
||||
default=0.0,
|
||||
help="the amount of noise to add during sampling (0-1)",
|
||||
)
|
||||
p.add_argument("--init", type=str, help="the init image")
|
||||
p.add_argument(
|
||||
"--method",
|
||||
type=str,
|
||||
default="plms",
|
||||
choices=["ddpm", "ddim", "prk", "plms", "pie", "plms2", "iplms"],
|
||||
help="the sampling method to use",
|
||||
)
|
||||
p.add_argument(
|
||||
"--model",
|
||||
type=str,
|
||||
default="cc12m_1_cfg",
|
||||
choices=["cc12m_1_cfg"],
|
||||
help="the model to use",
|
||||
)
|
||||
p.add_argument(
|
||||
"-n", type=int, default=1, help="the number of images to sample"
|
||||
)
|
||||
p.add_argument("--seed", type=int, default=0, help="the random seed")
|
||||
p.add_argument("--size", type=int, nargs=2, help="the output image size")
|
||||
p.add_argument(
|
||||
"--starting-timestep",
|
||||
"-st",
|
||||
type=float,
|
||||
default=0.9,
|
||||
help="the timestep to start at (used with init images)",
|
||||
)
|
||||
p.add_argument("--steps", type=int, default=50, help="the number of timesteps")
|
||||
args = p.parse_args()
|
||||
|
||||
if args.device:
|
||||
device = torch.device(args.device)
|
||||
else:
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
print("Using device:", device)
|
||||
|
||||
model = get_model(args.model)()
|
||||
_, side_y, side_x = model.shape
|
||||
if args.size:
|
||||
side_x, side_y = args.size
|
||||
checkpoint = args.checkpoint
|
||||
if not checkpoint:
|
||||
checkpoint = MODULE_DIR / f"checkpoints/{args.model}.pth"
|
||||
model.load_state_dict(torch.load(checkpoint, map_location="cpu"))
|
||||
if device.type == "cuda":
|
||||
model = model.half()
|
||||
model = model.to(device).eval().requires_grad_(False)
|
||||
clip_model_name = (
|
||||
model.clip_model if hasattr(model, "clip_model") else "ViT-B/16"
|
||||
)
|
||||
clip_model = clip.load(clip_model_name, jit=False, device=device)[0]
|
||||
clip_model.eval().requires_grad_(False)
|
||||
normalize = transforms.Normalize(
|
||||
mean=[0.48145466, 0.4578275, 0.40821073],
|
||||
std=[0.26862954, 0.26130258, 0.27577711],
|
||||
)
|
||||
|
||||
if args.init:
|
||||
init = Image.open(utils.fetch(args.init)).convert("RGB")
|
||||
init = resize_and_center_crop(init, (side_x, side_y))
|
||||
init = (
|
||||
utils.from_pil_image(init).to(device)[None].repeat([args.n, 1, 1, 1])
|
||||
)
|
||||
|
||||
zero_embed = torch.zeros([1, clip_model.visual.output_dim], device=device)
|
||||
target_embeds, weights = [zero_embed], []
|
||||
|
||||
for prompt in args.prompts:
|
||||
txt, weight = parse_prompt(prompt)
|
||||
target_embeds.append(
|
||||
clip_model.encode_text(clip.tokenize(txt).to(device)).float()
|
||||
)
|
||||
weights.append(weight)
|
||||
|
||||
for prompt in args.images:
|
||||
path, weight = parse_prompt(prompt)
|
||||
img = Image.open(utils.fetch(path)).convert("RGB")
|
||||
clip_size = clip_model.visual.input_resolution
|
||||
img = resize_and_center_crop(img, (clip_size, clip_size))
|
||||
batch = TF.to_tensor(img)[None].to(device)
|
||||
embed = F.normalize(
|
||||
clip_model.encode_image(normalize(batch)).float(), dim=-1
|
||||
)
|
||||
target_embeds.append(embed)
|
||||
weights.append(weight)
|
||||
|
||||
weights = torch.tensor([1 - sum(weights), *weights], device=device)
|
||||
|
||||
torch.manual_seed(args.seed)
|
||||
|
||||
|
||||
def cfg_model_fn(x, t):
|
||||
n = x.shape[0]
|
||||
n_conds = len(target_embeds)
|
||||
x_in = x.repeat([n_conds, 1, 1, 1])
|
||||
t_in = t.repeat([n_conds])
|
||||
clip_embed_in = torch.cat([*target_embeds]).repeat([n, 1])
|
||||
vs = model(x_in, t_in, clip_embed_in).view([n_conds, n, *x.shape[1:]])
|
||||
v = vs.mul(weights[:, None, None, None, None]).sum(0)
|
||||
return v
|
||||
|
||||
|
||||
x = torch.randn([args.n, 3, side_y, side_x], device=device)
|
||||
t = torch.linspace(1, 0, args.steps + 1, device=device)[:-1]
|
||||
steps = utils.get_spliced_ddpm_cosine_schedule(t)
|
||||
min_batch_size = min(args.n, args.batch_size)
|
||||
x_in = x[0:min_batch_size, :, :, :]
|
||||
ts = x_in.new_ones([x_in.shape[0]])
|
||||
t_in = t[0] * ts
|
||||
|
||||
from torch.fx.experimental.proxy_tensor import make_fx
|
||||
from torch._decomp import get_decompositions
|
||||
import torch_mlir
|
||||
|
||||
fx_g = make_fx(
|
||||
cfg_model_fn,
|
||||
decomposition_table=get_decompositions(
|
||||
[
|
||||
torch.ops.aten.embedding_dense_backward,
|
||||
torch.ops.aten.native_layer_norm_backward,
|
||||
torch.ops.aten.slice_backward,
|
||||
torch.ops.aten.select_backward,
|
||||
torch.ops.aten.norm.ScalarOpt_dim,
|
||||
torch.ops.aten.native_group_norm,
|
||||
torch.ops.aten.upsample_bilinear2d.vec,
|
||||
torch.ops.aten.split.Tensor,
|
||||
torch.ops.aten.split_with_sizes,
|
||||
]
|
||||
),
|
||||
)(x_in, t_in)
|
||||
|
||||
fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
|
||||
fx_g.recompile()
|
||||
|
||||
|
||||
def strip_overloads(gm):
|
||||
"""
|
||||
Modifies the target of graph nodes in :attr:`gm` to strip overloads.
|
||||
Args:
|
||||
gm(fx.GraphModule): The input Fx graph module to be modified
|
||||
"""
|
||||
for node in gm.graph.nodes:
|
||||
if isinstance(node.target, torch._ops.OpOverload):
|
||||
node.target = node.target.overloadpacket
|
||||
gm.recompile()
|
||||
|
||||
|
||||
strip_overloads(fx_g)
|
||||
|
||||
ts_g = torch.jit.script(fx_g)
|
||||
|
||||
module = torch_mlir.compile(
|
||||
ts_g,
|
||||
[x_in, t_in],
|
||||
torch_mlir.OutputType.LINALG_ON_TENSORS,
|
||||
use_tracing=False,
|
||||
)
|
||||
|
||||
mlir_model = module
|
||||
func_name = "forward"
|
||||
|
||||
|
||||
def compiled_cfg_model_fn(x, t):
|
||||
x_ny = x.detach().numpy()
|
||||
t_ny = t.detach().numpy()
|
||||
inputs = (x_ny, t_ny)
|
||||
shark_module = SharkInference(
|
||||
mlir_model, func_name, device="gpu", mlir_dialect="linalg"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
return torch.from_numpy(result)
|
||||
|
||||
|
||||
def run(x, steps):
|
||||
if args.method == "ddpm":
|
||||
return sampling.sample(compiled_cfg_model_fn, x, steps, 1.0, {})
|
||||
if args.method == "ddim":
|
||||
return sampling.sample(compiled_cfg_model_fn, x, steps, args.eta, {})
|
||||
if args.method == "prk":
|
||||
return sampling.prk_sample(compiled_cfg_model_fn, x, steps, {})
|
||||
if args.method == "plms":
|
||||
return sampling.plms_sample(compiled_cfg_model_fn, x, steps, {})
|
||||
if args.method == "pie":
|
||||
return sampling.pie_sample(compiled_cfg_model_fn, x, steps, {})
|
||||
if args.method == "plms2":
|
||||
return sampling.plms2_sample(compiled_cfg_model_fn, x, steps, {})
|
||||
if args.method == "iplms":
|
||||
return sampling.iplms_sample(compiled_cfg_model_fn, x, steps, {})
|
||||
assert False
|
||||
|
||||
|
||||
def run_all(x, t, steps, n, batch_size):
|
||||
x = torch.randn([n, 3, side_y, side_x], device=device)
|
||||
t = torch.linspace(1, 0, args.steps + 1, device=device)[:-1]
|
||||
steps = utils.get_spliced_ddpm_cosine_schedule(t)
|
||||
if args.init:
|
||||
steps = steps[steps < args.starting_timestep]
|
||||
alpha, sigma = utils.t_to_alpha_sigma(steps[0])
|
||||
x = init * alpha + x * sigma
|
||||
for i in trange(0, n, batch_size):
|
||||
cur_batch_size = min(n - i, batch_size)
|
||||
outs = run(x[i : i + cur_batch_size], steps)
|
||||
for j, out in enumerate(outs):
|
||||
utils.to_pil_image(out).save(f"out_{i + j:05}.png")
|
||||
|
||||
|
||||
run_all(x, t, steps, args.n, args.batch_size)
|
||||
@@ -55,14 +55,6 @@ class RemBertModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -100,14 +100,6 @@ class Resnet101ModuleTest(unittest.TestCase):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -99,14 +99,7 @@ class Resnet18ModuleTest(unittest.TestCase):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_downloader import download_tf_model
|
||||
from shark.parser import shark_args
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Resnet50ModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
onnx_bench=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
self.onnx_bench = onnx_bench
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model("resnet50")
|
||||
|
||||
shark_module = SharkInference(
|
||||
model,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="mhlo",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_args.enable_tf32 = True
|
||||
shark_args.onnx_bench = self.onnx_bench
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(inputs), "resnet50", dynamic, device, "tensorflow"
|
||||
)
|
||||
|
||||
|
||||
class Resnet50ModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = Resnet50ModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -100,14 +100,6 @@ class Resnet50ModuleTest(unittest.TestCase):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -28,9 +28,7 @@ class RobertaBaseModuleTester:
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
np.testing.assert_allclose(
|
||||
result, golden_out, rtol=1e-02, atol=1e-01, verbose=True
|
||||
)
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
|
||||
|
||||
class RobertaBaseModuleTest(unittest.TestCase):
|
||||
@@ -44,7 +42,6 @@ class RobertaBaseModuleTest(unittest.TestCase):
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="https://github.com/nod-ai/SHARK/issues/274")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
@@ -60,14 +57,6 @@ class RobertaBaseModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -57,14 +57,6 @@ class TapasBaseModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -8,7 +8,7 @@ import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class DistilBertModuleTester:
|
||||
class MiniLMModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
@@ -17,7 +17,7 @@ class DistilBertModuleTester:
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"distilbert-base-uncased"
|
||||
"microsoft/MiniLM-L12-H384-uncased"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
@@ -28,19 +28,18 @@ class DistilBertModuleTester:
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
|
||||
|
||||
class DistilBertModuleTest(unittest.TestCase):
|
||||
class MiniLMModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = DistilBertModuleTester(self)
|
||||
self.module_tester = MiniLMModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
@pytest.mark.xfail(reason="shark_tank hash issues -- awaiting triage")
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="shark_tank hash issues -- awaiting triage")
|
||||
@pytest.mark.skip(reason="MiniLM numerics issues on gpu")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
@@ -49,7 +48,6 @@ class DistilBertModuleTest(unittest.TestCase):
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="shark_tank hash issues -- awaiting triage")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
@@ -57,14 +55,6 @@ class DistilBertModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -11,9 +11,8 @@ microsoft/layoutlm-base-uncased,hf
|
||||
google/mobilebert-uncased,hf
|
||||
microsoft/mpnet-base,hf
|
||||
roberta-base,hf
|
||||
resnet50,keras
|
||||
xlm-roberta-base,hf
|
||||
microsoft/MiniLM-L12-H384-uncased,TFhf
|
||||
microsoft/MiniLM-L12-H384-uncased,hf
|
||||
funnel-transformer/small,hf
|
||||
microsoft/mpnet-base,hf
|
||||
facebook/convnext-tiny-224,img
|
||||
|
||||
|
@@ -55,14 +55,6 @@ class FlauBertModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -100,14 +100,6 @@ class WideResnet50ModuleTest(unittest.TestCase):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -25,9 +25,7 @@ class XLMRobertaModuleTester:
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
np.testing.assert_allclose(
|
||||
result, golden_out, rtol=1e-02, atol=1e-01, verbose=True
|
||||
)
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
|
||||
|
||||
class XLMRobertaModuleTest(unittest.TestCase):
|
||||
@@ -41,7 +39,6 @@ class XLMRobertaModuleTest(unittest.TestCase):
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="https://github.com/nod-ai/SHARK/issues/274")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
@@ -57,14 +54,6 @@ class XLMRobertaModuleTest(unittest.TestCase):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user