mirror of
https://github.com/nod-ai/SHARK-Studio.git
synced 2026-01-11 23:08:19 -05:00
Compare commits
1 Commits
RefVideo
...
minilmLoad
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
14a56ca9b0 |
52
.github/workflows/nightly.yml
vendored
52
.github/workflows/nightly.yml
vendored
@@ -11,7 +11,7 @@ on:
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: a100
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -53,7 +53,7 @@ jobs:
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install flake8 pytest toml
|
||||
python -m pip install flake8 pytest yapf toml
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/SHARK-Runtime/releases; fi
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
@@ -61,58 +61,20 @@ jobs:
|
||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude shark.venv,lit.cfg.py
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude shark.venv,lit.cfg.py
|
||||
- name: Build and validate the IREE package
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE
|
||||
USE_IREE=1 VENV_DIR=iree.venv ./setup_venv.sh
|
||||
source iree.venv/bin/activate
|
||||
package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
|
||||
SHARK_PACKAGE_VERSION=${package_version} \
|
||||
pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://github.com/llvm/torch-mlir/releases -f https://github.com/iree-org/iree/releases
|
||||
# Install the built wheel
|
||||
pip install ./wheelhouse/nodai*
|
||||
# Validate the Models
|
||||
/bin/bash "$GITHUB_WORKSPACE/build_tools/populate_sharktank_ci.sh"
|
||||
pytest -k 'cpu' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
|
||||
tail -n 1 |
|
||||
tee -a pytest_results.txt
|
||||
pytest -k 'gpu' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
|
||||
tail -n 1 |
|
||||
tee -a pytest_results.txt
|
||||
pytest -k 'vulkan' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
|
||||
tail -n 1 |
|
||||
tee -a pytest_results.txt
|
||||
rm -rf ./wheelhouse/nodai*
|
||||
yapf -i --style .style.yapf shark/*.py
|
||||
|
||||
- name: Build and validate the SHARK Runtime package
|
||||
- name: Build and validate the package
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE
|
||||
./setup_venv.sh
|
||||
IMPORTER=1 ./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
|
||||
SHARK_PACKAGE_VERSION=${package_version} \
|
||||
pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/SHARK-Runtime/releases
|
||||
pip wheel -v -w wheelhouse . --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/SHARK-Runtime/releases
|
||||
# Install the built wheel
|
||||
pip install ./wheelhouse/nodai*
|
||||
# Validate the Models
|
||||
pytest -k 'cpu' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
|
||||
tail -n 1 |
|
||||
tee -a pytest_results.txt
|
||||
pytest -k 'gpu' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
|
||||
tail -n 1 |
|
||||
tee -a pytest_results.txt
|
||||
pytest -k 'vulkan' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
|
||||
tail -n 1 |
|
||||
tee -a pytest_results.txt
|
||||
if !(grep -Fxq " failed" pytest_results.txt)
|
||||
then
|
||||
export SHA=$(git log -1 --format='%h')
|
||||
gsutil -m cp -r $GITHUB_WORKSPACE/gen_shark_tank/* gs://shark_tank/$SHA
|
||||
gsutil -m cp -r gs://shark_tank/$SHA/* gs://shark_tank/latest/
|
||||
fi
|
||||
rm pytest_results.txt
|
||||
rm -rf ./wheelhouse/nodai*
|
||||
|
||||
pytest -k 'not benchmark' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/
|
||||
|
||||
- name: Upload Release Assets
|
||||
id: upload-release-assets
|
||||
|
||||
128
.github/workflows/test-models.yml
vendored
128
.github/workflows/test-models.yml
vendored
@@ -1,7 +1,7 @@
|
||||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: Validate Models on Shark Runtime
|
||||
name: Validate torch-models on Shark Runtime
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -11,94 +11,92 @@ on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build-validate:
|
||||
strategy:
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os: [a100, MacStudio, ubuntu-latest]
|
||||
suite: [cpu,gpu,vulkan]
|
||||
python-version: ["3.10"]
|
||||
include:
|
||||
- os: ubuntu-latest
|
||||
suite: lint
|
||||
exclude:
|
||||
- os: ubuntu-latest
|
||||
suite: vulkan
|
||||
- os: ubuntu-latest
|
||||
suite: gpu
|
||||
- os: ubuntu-latest
|
||||
suite: cpu
|
||||
- os: MacStudio
|
||||
suite: gpu
|
||||
- os: MacStudio
|
||||
suite: cpu
|
||||
- os: MacStudio
|
||||
suite: vulkan
|
||||
build-linux:
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set Environment Variables
|
||||
run: |
|
||||
echo "SHORT_SHA=`git rev-parse --short=4 HEAD`" >> $GITHUB_ENV
|
||||
echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
|
||||
|
||||
- name: Set up Python Version File ${{ matrix.python-version }}
|
||||
if: matrix.os == 'a100' || matrix.os == 'ubuntu-latest'
|
||||
run: |
|
||||
# See https://github.com/actions/setup-python/issues/433
|
||||
echo ${{ matrix.python-version }} >> $GITHUB_WORKSPACE/.python-version
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
if: matrix.os == 'a100' || matrix.os == 'ubuntu-latest'
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: '${{ matrix.python-version }}'
|
||||
#cache: 'pip'
|
||||
#cache-dependency-path: |
|
||||
# **/requirements-importer.txt
|
||||
# **/requirements.txt
|
||||
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Setup pip cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Install dependencies
|
||||
if: matrix.suite == 'lint'
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install flake8 pytest toml black
|
||||
|
||||
python -m pip install flake8 pytest yapf toml
|
||||
|
||||
- name: Lint with flake8
|
||||
if: matrix.suite == 'lint'
|
||||
run: |
|
||||
# black format check
|
||||
black --version
|
||||
black --line-length 79 --check .
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude lit.cfg.py
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude lit.cfg.py
|
||||
yapf -i --style .style.yapf shark/*.py
|
||||
|
||||
- name: Validate CPU Models
|
||||
if: matrix.suite == 'cpu'
|
||||
- name: Validate Models
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE
|
||||
PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
|
||||
IMPORTER=1 ./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
pytest -k 'cpu' --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
|
||||
pytest -k 'not benchmark' --ignore=tank/tf/ --ignore=shark/tests/test_shark_importer.py
|
||||
|
||||
perf-macOS:
|
||||
runs-on: MacStudio
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
|
||||
- name: Validate GPU Models
|
||||
if: matrix.suite == 'gpu'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Validate Models dependencies
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE
|
||||
PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
|
||||
PYTHON=python3.10 IMPORTER=1 ./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
pytest --benchmark -k "gpu" --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
|
||||
gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_gpu_${SHORT_SHA}.csv
|
||||
pytest -k 'not benchmark' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=tank/tf/ --ignore=shark/tests/test_shark_importer.py
|
||||
|
||||
perf-linux:
|
||||
runs-on: a100
|
||||
timeout-minutes: 45
|
||||
continue-on-error: true
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
|
||||
- name: Validate Vulkan Models
|
||||
if: matrix.suite == 'vulkan'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Setup pip cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Validate Models
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE
|
||||
PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
|
||||
IMPORTER=1 ./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
pytest -k 'vulkan' --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
|
||||
pytest --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -162,7 +162,6 @@ cython_debug/
|
||||
|
||||
# Shark related artefacts
|
||||
*venv/
|
||||
shark_tmp/
|
||||
|
||||
# ORT related artefacts
|
||||
cache_models/
|
||||
|
||||
282
README.md
282
README.md
@@ -15,11 +15,11 @@ High Performance Machine Learning and Data Analytics for CPUs, GPUs, Accelerator
|
||||
|
||||
<details>
|
||||
<summary>Installation (Linux and macOS)</summary>
|
||||
|
||||
|
||||
### Setup a new pip Virtual Environment
|
||||
|
||||
This step sets up a new VirtualEnv for Python
|
||||
|
||||
|
||||
```shell
|
||||
python --version #Check you have 3.7->3.10 on Linux or 3.10 on macOS
|
||||
python -m venv shark_venv
|
||||
@@ -31,10 +31,10 @@ source shark_venv/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
```
|
||||
|
||||
*macOS Metal* users please install https://sdk.lunarg.com/sdk/download/latest/mac/vulkan-sdk.dmg and enable "System wide install"
|
||||
*macOS Metal* users please install https://sdk.lunarg.com/sdk/download/latest/mac/vulkan-sdk.dmg
|
||||
|
||||
### Install SHARK
|
||||
|
||||
|
||||
This step pip installs SHARK and related packages on Linux Python 3.7, 3.8, 3.9, 3.10 and macOS Python 3.10
|
||||
|
||||
```shell
|
||||
@@ -43,20 +43,20 @@ pip install nodai-shark -f https://github.com/nod-ai/SHARK/releases -f https://g
|
||||
If you are on an Intel macOS machine you need this [workaround](https://github.com/nod-ai/SHARK/issues/102) for an upstream issue.
|
||||
|
||||
### Download and run Resnet50 sample
|
||||
|
||||
|
||||
```shell
|
||||
curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/resnet50_script.py
|
||||
#Install deps for test script
|
||||
pip install --pre torch torchvision torchaudio tqdm pillow --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
||||
python ./resnet50_script.py --device="cpu" #use cuda or vulkan or metal
|
||||
python ./resnet50_script.py --device="cpu" #use cuda or vulkan or metal
|
||||
```
|
||||
|
||||
|
||||
### Download and run BERT (MiniLM) sample
|
||||
```shell
|
||||
curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/minilm_jit.py
|
||||
#Install deps for test script
|
||||
pip install transformers torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
||||
python ./minilm_jit.py --device="cpu" #use cuda or vulkan or metal
|
||||
python ./minilm_jit.py --device="cpu" #use cuda or vulkan or metal
|
||||
```
|
||||
</details>
|
||||
|
||||
@@ -67,94 +67,34 @@ python ./minilm_jit.py --device="cpu" #use cuda or vulkan or metal
|
||||
## Check out the code
|
||||
|
||||
```shell
|
||||
git clone https://github.com/nod-ai/SHARK.git
|
||||
git clone https://github.com/nod-ai/SHARK.git
|
||||
```
|
||||
|
||||
## Setup your Python VirtualEnvironment and Dependencies
|
||||
```shell
|
||||
# Setup venv and install necessary packages (torch-mlir, nodLabs/Shark, ...).
|
||||
./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
# Please activate the venv after installation.
|
||||
```
|
||||
For example if you want to use Python3.10 and upstream IREE with TF Import tools you can use the environment variables like:
|
||||
```
|
||||
# PYTHON=python3.10 VENV_DIR=0617_venv IMPORTER=1 USE_IREE=1 ./setup_venv.sh
|
||||
```
|
||||
|
||||
If you are a Torch-mlir developer or an IREE developer and want to test local changes you can uninstall
|
||||
the provided packages with `pip uninstall torch-mlir` and / or `pip uninstall iree-compiler iree-runtime` and build locally
|
||||
with Python bindings and set your PYTHONPATH as mentioned [here](https://google.github.io/iree/bindings/python/)
|
||||
for IREE and [here](https://github.com/llvm/torch-mlir/blob/main/development.md#setup-python-environment-to-export-the-built-python-packages)
|
||||
for Torch-MLIR.
|
||||
|
||||
### Run a demo script
|
||||
```shell
|
||||
python -m shark.examples.shark_inference.resnet50_script --device="cpu" # Use gpu | vulkan
|
||||
# Or a pytest
|
||||
pytest tank/tf/hf_masked_lm/albert-base-v2_test.py::AlbertBaseModuleTest::test_module_static_cpu
|
||||
```
|
||||
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
<details>
|
||||
<summary>Testing</summary>
|
||||
|
||||
### Run all model tests on CPU/GPU/VULKAN/Metal
|
||||
```shell
|
||||
pytest tank
|
||||
pytest shark/tests/models
|
||||
|
||||
# If on Linux for multithreading on CPU (faster results):
|
||||
pytest tank -n auto
|
||||
# If on Linux for quicker results:
|
||||
pytest shark/tests/models -n auto
|
||||
```
|
||||
|
||||
### Running specific tests
|
||||
### Run all model benchmark tests on CPU/GPU/VULKAN/Metal
|
||||
```shell
|
||||
# Run tests for a specific model:
|
||||
pytest tank/<MODEL_NAME> #i.e., pytest tank/bert-base-uncased
|
||||
|
||||
# Run tests for a specific case:
|
||||
pytest tank/<MODEL_NAME> -k "keyword"
|
||||
# i.e., pytest tank/bert-base-uncased/bert-base-uncased_test.py -k "static_gpu"
|
||||
|
||||
pytest shark/tests/benchmarks
|
||||
```
|
||||
|
||||
### Run benchmarks on SHARK tank pytests and generate bench_results.csv with results.
|
||||
|
||||
(requires source installation with `IMPORTER=1 ./setup_venv.sh`)
|
||||
|
||||
```shell
|
||||
pytest --benchmark tank
|
||||
|
||||
# Just do static GPU benchmarks for PyTorch tests:
|
||||
pytest --benchmark tank --ignore-glob="_tf*" -k "static_gpu"
|
||||
```
|
||||
|
||||
### Benchmark Resnet50, MiniLM on CPU
|
||||
|
||||
(requires source installation with `IMPORTER=1 ./setup_venv.sh`)
|
||||
|
||||
```shell
|
||||
# We suggest running the following commands as root before running benchmarks on CPU:
|
||||
|
||||
cat /sys/devices/system/cpu/cpu*/topology/thread_siblings_list | awk -F, '{print $2}' | sort -n | uniq | ( while read X ; do echo $X ; echo 0 > /sys/devices/system/cpu/cpu$X/online ; done )
|
||||
echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
|
||||
|
||||
# Benchmark canonical Resnet50 on CPU via pytest
|
||||
pytest --benchmark tank/resnet50/ -k "cpu"
|
||||
|
||||
# Benchmark canonical MiniLM on CPU via pytest
|
||||
pytest --benchmark tank/MiniLM-L12-H384-uncased/ -k "cpu"
|
||||
|
||||
# Benchmark MiniLM on CPU via transformer-benchmarks:
|
||||
git clone --recursive https://github.com/nod-ai/transformer-benchmarks.git
|
||||
cd transformer-benchmarks
|
||||
./perf-ci.sh -n
|
||||
# Check detail.csv for MLIR/IREE results.
|
||||
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
@@ -164,25 +104,18 @@ cd transformer-benchmarks
|
||||
### Shark Inference API
|
||||
|
||||
```
|
||||
from shark_runner import SharkInference
|
||||
|
||||
from shark.shark_importer import SharkImporter
|
||||
|
||||
# SharkImporter imports mlir file from the torch, tensorflow or tf-lite module.
|
||||
|
||||
mlir_importer = SharkImporter(
|
||||
torch_module,
|
||||
(input),
|
||||
frontend="torch", #tf, #tf-lite
|
||||
)
|
||||
torch_mlir, func_name = mlir_importer.import_mlir(tracing_required=True)
|
||||
|
||||
# SharkInference accepts mlir in linalg, mhlo, and tosa dialect.
|
||||
|
||||
from shark.shark_inference import SharkInference
|
||||
shark_module = SharkInference(torch_mlir, func_name, device="cpu", mlir_dialect="linalg")
|
||||
shark_module = SharkInference(
|
||||
module = model class.
|
||||
(input,) = inputs to model (must be a torch-tensor)
|
||||
dynamic (boolean) = Pass the input shapes as static or dynamic.
|
||||
device = `cpu`, `gpu` or `vulkan` is supported.
|
||||
tracing_required = (boolean) = Jit trace the module with the given input, useful in the case where jit.script doesn't work. )
|
||||
shark_module.set_frontend("pytorch") # Use tensorflow, mhlo, linalg, tosa
|
||||
shark_module.compile()
|
||||
result = shark_module.forward((input))
|
||||
|
||||
result = shark_module.forward(inputs)
|
||||
```
|
||||
|
||||
|
||||
@@ -202,9 +135,11 @@ mhlo_ir = r"""builtin.module {
|
||||
|
||||
arg0 = np.ones((1, 4)).astype(np.float32)
|
||||
arg1 = np.ones((4, 1)).astype(np.float32)
|
||||
shark_module = SharkInference(mhlo_ir, func_name="forward", device="cpu", mlir_dialect="mhlo")
|
||||
|
||||
shark_module = SharkInference(mhlo_ir, (arg0, arg1))
|
||||
shark_module.set_frontend("mhlo")
|
||||
shark_module.compile()
|
||||
result = shark_module.forward((arg0, arg1))
|
||||
print(shark_module.forward((arg0, arg1)))
|
||||
```
|
||||
</details>
|
||||
|
||||
@@ -218,153 +153,86 @@ result = shark_module.forward((arg0, arg1))
|
||||
|
||||
| Hugging Face Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|
||||
|---------------------|----------------------|----------|----------|-------------|
|
||||
| BERT | :green_heart: (JIT) | :green_heart: | :green_heart: | :green_heart: |
|
||||
| Albert | :green_heart: (JIT) | :green_heart: | :green_heart: | :green_heart: |
|
||||
| BigBird | :green_heart: (AOT) | | | |
|
||||
| DistilBERT | :green_heart: (JIT) | :green_heart: | :green_heart: | :green_heart: |
|
||||
| GPT2 | :broken_heart: (AOT) | | | |
|
||||
| MobileBert | :green_heart: (JIT) | :green_heart: | :green_heart: | :green_heart: |
|
||||
| BERT | :heavy_check_mark: (JIT) | :heavy_check_mark: | | |
|
||||
| Albert | :heavy_check_mark: (JIT) | :heavy_check_mark: | | |
|
||||
| BigBird | :heavy_check_mark: (AOT) | | | |
|
||||
| DistilBERT | :heavy_check_mark: (JIT) | :heavy_check_mark: | | |
|
||||
| GPT2 | :x: (AOT) | | | |
|
||||
|
||||
### Torchvision Models
|
||||
|
||||
|
||||
| TORCHVISION Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|
||||
|--------------------|----------------------|----------|----------|-------------|
|
||||
| AlexNet | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
|
||||
| DenseNet121 | :green_heart: (Script) | | | |
|
||||
| MNasNet1_0 | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
|
||||
| MobileNetV2 | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
|
||||
| MobileNetV3 | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
|
||||
| Unet | :broken_heart: (Script) | | | |
|
||||
| Resnet18 | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
|
||||
| Resnet50 | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
|
||||
| Resnet101 | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
|
||||
| Resnext50_32x4d | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
|
||||
| ShuffleNet_v2 | :broken_heart: (Script) | | | |
|
||||
| SqueezeNet | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
|
||||
| EfficientNet | :green_heart: (Script) | | | |
|
||||
| Regnet | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
|
||||
| Resnest | :broken_heart: (Script) | | | |
|
||||
| Vision Transformer | :green_heart: (Script) | | | |
|
||||
| VGG 16 | :green_heart: (Script) | :green_heart: | :green_heart: | |
|
||||
| Wide Resnet | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
|
||||
| RAFT | :broken_heart: (JIT) | | | |
|
||||
| AlexNet | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
|
||||
| DenseNet121 | :heavy_check_mark: (Script) | | | |
|
||||
| MNasNet1_0 | :heavy_check_mark: (Script) | | | |
|
||||
| MobileNetV2 | :heavy_check_mark: (Script) | | | |
|
||||
| MobileNetV3 | :heavy_check_mark: (Script) | | | |
|
||||
| Unet | :x: (Script) | | | |
|
||||
| Resnet18 | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
|
||||
| Resnet50 | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
|
||||
| Resnet101 | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
|
||||
| Resnext50_32x4d | :heavy_check_mark: (Script) | | | |
|
||||
| ShuffleNet_v2 | :x: (Script) | | | |
|
||||
| SqueezeNet | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
|
||||
| EfficientNet | :heavy_check_mark: (Script) | | | |
|
||||
| Regnet | :heavy_check_mark: (Script) | | | |
|
||||
| Resnest | :x: (Script) | | | |
|
||||
| Vision Transformer | :heavy_check_mark: (Script) | | | |
|
||||
| VGG 16 | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
|
||||
| Wide Resnet | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
|
||||
| RAFT | :x: (JIT) | | | |
|
||||
|
||||
For more information refer to [MODEL TRACKING SHEET](https://docs.google.com/spreadsheets/d/15PcjKeHZIrB5LfDyuw7DGEEE8XnQEX2aX8lm8qbxV8A/edit#gid=0)
|
||||
|
||||
### PyTorch Training Models
|
||||
### PyTorch Training Models
|
||||
|
||||
| Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|
||||
|---------------------|----------------------|----------|----------|-------------|
|
||||
| BERT | :broken_heart: | :broken_heart: | | |
|
||||
| FullyConnected | :green_heart: | :green_heart: | | |
|
||||
| BERT | :x: | :x: | | |
|
||||
| FullyConnected | :heavy_check_mark: | :heavy_check_mark: | | |
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
<details>
|
||||
<summary>JAX Models</summary>
|
||||
|
||||
|
||||
### JAX Models
|
||||
### JAX Models
|
||||
|
||||
| Models | JAX-MHLO lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|
||||
|---------------------|----------------------|----------|----------|-------------|
|
||||
| DALL-E | :broken_heart: | :broken_heart: | | |
|
||||
| FullyConnected | :green_heart: | :green_heart: | | |
|
||||
|
||||
| DALL-E | :x: | :x: | | |
|
||||
| FullyConnected | :heavy_check_mark: | :heavy_check_mark: | | |
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
<details>
|
||||
<summary>TFLite Models</summary>
|
||||
|
||||
### TFLite Models
|
||||
|
||||
### TFLite Models
|
||||
|
||||
| Models | TOSA/LinAlg | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|
||||
|---------------------|----------------------|----------|----------|-------------|
|
||||
| BERT | :broken_heart: | :broken_heart: | | |
|
||||
| FullyConnected | :green_heart: | :green_heart: | | |
|
||||
| albert | :green_heart: | :green_heart: | | |
|
||||
| asr_conformer | :green_heart: | :green_heart: | | |
|
||||
| bird_classifier | :green_heart: | :green_heart: | | |
|
||||
| cartoon_gan | :green_heart: | :green_heart: | | |
|
||||
| craft_text | :green_heart: | :green_heart: | | |
|
||||
| deeplab_v3 | :green_heart: | :green_heart: | | |
|
||||
| densenet | :green_heart: | :green_heart: | | |
|
||||
| east_text_detector | :green_heart: | :green_heart: | | |
|
||||
| efficientnet_lite0_int8 | :green_heart: | :green_heart: | | |
|
||||
| efficientnet | :green_heart: | :green_heart: | | |
|
||||
| gpt2 | :green_heart: | :green_heart: | | |
|
||||
| image_stylization | :green_heart: | :green_heart: | | |
|
||||
| inception_v4 | :green_heart: | :green_heart: | | |
|
||||
| inception_v4_uint8 | :green_heart: | :green_heart: | | |
|
||||
| lightning_fp16 | :green_heart: | :green_heart: | | |
|
||||
| lightning_i8 | :green_heart: | :green_heart: | | |
|
||||
| lightning | :green_heart: | :green_heart: | | |
|
||||
| magenta | :green_heart: | :green_heart: | | |
|
||||
| midas | :green_heart: | :green_heart: | | |
|
||||
| mirnet | :green_heart: | :green_heart: | | |
|
||||
| mnasnet | :green_heart: | :green_heart: | | |
|
||||
| mobilebert_edgetpu_s_float | :green_heart: | :green_heart: | | |
|
||||
| mobilebert_edgetpu_s_quant | :green_heart: | :green_heart: | | |
|
||||
| mobilebert | :green_heart: | :green_heart: | | |
|
||||
| mobilebert_tf2_float | :green_heart: | :green_heart: | | |
|
||||
| mobilebert_tf2_quant | :green_heart: | :green_heart: | | |
|
||||
| mobilenet_ssd_quant | :green_heart: | :green_heart: | | |
|
||||
| mobilenet_v1 | :green_heart: | :green_heart: | | |
|
||||
| mobilenet_v1_uint8 | :green_heart: | :green_heart: | | |
|
||||
| mobilenet_v2_int8 | :green_heart: | :green_heart: | | |
|
||||
| mobilenet_v2 | :green_heart: | :green_heart: | | |
|
||||
| mobilenet_v2_uint8 | :green_heart: | :green_heart: | | |
|
||||
| mobilenet_v3-large | :green_heart: | :green_heart: | | |
|
||||
| mobilenet_v3-large_uint8 | :green_heart: | :green_heart: | | |
|
||||
| mobilenet_v35-int8 | :green_heart: | :green_heart: | | |
|
||||
| nasnet | :green_heart: | :green_heart: | | |
|
||||
| person_detect | :green_heart: | :green_heart: | | |
|
||||
| posenet | :green_heart: | :green_heart: | | |
|
||||
| resnet_50_int8 | :green_heart: | :green_heart: | | |
|
||||
| rosetta | :green_heart: | :green_heart: | | |
|
||||
| spice | :green_heart: | :green_heart: | | |
|
||||
| squeezenet | :green_heart: | :green_heart: | | |
|
||||
| ssd_mobilenet_v1 | :green_heart: | :green_heart: | | |
|
||||
| ssd_mobilenet_v1_uint8 | :green_heart: | :green_heart: | | |
|
||||
| ssd_mobilenet_v2_fpnlite | :green_heart: | :green_heart: | | |
|
||||
| ssd_mobilenet_v2_fpnlite_uint8 | :green_heart: | :green_heart: | | |
|
||||
| ssd_mobilenet_v2_int8 | :green_heart: | :green_heart: | | |
|
||||
| ssd_mobilenet_v2 | :green_heart: | :green_heart: | | |
|
||||
| ssd_spaghettinet_large | :green_heart: | :green_heart: | | |
|
||||
| ssd_spaghettinet_large_uint8 | :green_heart: | :green_heart: | | |
|
||||
| visual_wake_words_i8 | :green_heart: | :green_heart: | | |
|
||||
|
||||
| BERT | :x: | :x: | | |
|
||||
| FullyConnected | :heavy_check_mark: | :heavy_check_mark: | | |
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>TF Models</summary>
|
||||
|
||||
### Tensorflow Models
|
||||
|
||||
### Tensorflow Models (Inference)
|
||||
|
||||
| Hugging Face Models | tf-mhlo lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|
||||
| Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|
||||
|---------------------|----------------------|----------|----------|-------------|
|
||||
| BERT | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
|
||||
| albert-base-v2 | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
|
||||
| DistilBERT | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
|
||||
| CamemBert | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
|
||||
| ConvBert | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
|
||||
| Deberta | | | | |
|
||||
| electra | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
|
||||
| funnel | | | | |
|
||||
| layoutlm | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
|
||||
| longformer | | | | |
|
||||
| mobile-bert | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
|
||||
| remembert | | | | |
|
||||
| tapas | | | | |
|
||||
| flaubert | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
|
||||
| roberta | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
|
||||
| xlm-roberta | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
|
||||
| mpnet | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
|
||||
|
||||
| BERT | :x: | :x: | | |
|
||||
| FullyConnected | :heavy_check_mark: | :heavy_check_mark: | | |
|
||||
|
||||
</details>
|
||||
|
||||
## Related Projects
|
||||
|
||||
|
||||
<details>
|
||||
<summary>IREE Project Channels</summary>
|
||||
|
||||
@@ -375,7 +243,7 @@ For more information refer to [MODEL TRACKING SHEET](https://docs.google.com/spr
|
||||
* [iree-discuss email list](https://groups.google.com/forum/#!forum/iree-discuss):
|
||||
Announcements, general and low-priority discussion
|
||||
</details>
|
||||
|
||||
|
||||
<details>
|
||||
<summary>MLIR and Torch-MLIR Project Channels</summary>
|
||||
|
||||
|
||||
@@ -6,16 +6,16 @@ parser.add_argument(
|
||||
"--model_name",
|
||||
type=str,
|
||||
required=True,
|
||||
help='Specifies name of HF model to benchmark. (For exmaple "microsoft/MiniLM-L12-H384-uncased"',
|
||||
help=
|
||||
"Specifies name of HF model to benchmark. (For exmaple \"microsoft/MiniLM-L12-H384-uncased\""
|
||||
)
|
||||
load_args, unknown = parser.parse_known_args()
|
||||
|
||||
if __name__ == "__main__":
|
||||
model_name = load_args.model_name
|
||||
test_input = torch.randint(2, (1, 128))
|
||||
shark_module = SharkHFBenchmarkRunner(
|
||||
model_name, (test_input,), jit_trace=True
|
||||
)
|
||||
shark_module = SharkHFBenchmarkRunner(model_name, (test_input,),
|
||||
jit_trace=True)
|
||||
shark_module.benchmark_c()
|
||||
shark_module.benchmark_python((test_input,))
|
||||
shark_module.benchmark_torch(test_input)
|
||||
|
||||
@@ -1,12 +1,8 @@
|
||||
import torch
|
||||
from shark.shark_benchmark_runner import SharkBenchmarkRunner
|
||||
from shark.shark_runner import SharkBenchmarkRunner
|
||||
from shark.parser import shark_args
|
||||
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
||||
from onnxruntime.transformers.benchmark import (
|
||||
run_pytorch,
|
||||
run_tensorflow,
|
||||
run_onnxruntime,
|
||||
)
|
||||
from onnxruntime.transformers.benchmark import run_pytorch, run_tensorflow, run_onnxruntime
|
||||
from onnxruntime.transformers.huggingface_models import MODELS
|
||||
from onnxruntime.transformers.benchmark_helper import ConfigModifier, Precision
|
||||
import os
|
||||
@@ -14,6 +10,7 @@ import psutil
|
||||
|
||||
|
||||
class OnnxFusionOptions(object):
|
||||
|
||||
def __init__(self):
|
||||
self.disable_gelu = False
|
||||
self.disable_layer_norm = False
|
||||
@@ -28,13 +25,17 @@ class OnnxFusionOptions(object):
|
||||
|
||||
|
||||
class HuggingFaceLanguage(torch.nn.Module):
|
||||
|
||||
def __init__(self, hf_model_name):
|
||||
super().__init__()
|
||||
self.model = AutoModelForSequenceClassification.from_pretrained(
|
||||
hf_model_name, # The pretrained model.
|
||||
num_labels=2, # The number of output labels--2 for binary classification.
|
||||
output_attentions=False, # Whether the model returns attentions weights.
|
||||
output_hidden_states=False, # Whether the model returns all hidden-states.
|
||||
num_labels=
|
||||
2, # The number of output labels--2 for binary classification.
|
||||
output_attentions=
|
||||
False, # Whether the model returns attentions weights.
|
||||
output_hidden_states=
|
||||
False, # Whether the model returns all hidden-states.
|
||||
torchscript=True,
|
||||
)
|
||||
|
||||
@@ -61,16 +62,8 @@ class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
|
||||
)
|
||||
self.model_name = model_name
|
||||
model = HuggingFaceLanguage(model_name)
|
||||
SharkBenchmarkRunner.__init__(
|
||||
self,
|
||||
model,
|
||||
input,
|
||||
dynamic,
|
||||
self.device,
|
||||
jit_trace,
|
||||
from_aot,
|
||||
frontend,
|
||||
)
|
||||
SharkBenchmarkRunner.__init__(self, model, input, dynamic, self.device,
|
||||
jit_trace, from_aot, frontend)
|
||||
|
||||
def benchmark_torch(self, inputs):
|
||||
use_gpu = self.device == "gpu"
|
||||
@@ -81,20 +74,10 @@ class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
|
||||
sequence_lengths = [inputs.shape[-1]]
|
||||
cache_dir = os.path.join(".", "cache_models")
|
||||
verbose = False
|
||||
result = run_pytorch(
|
||||
use_gpu,
|
||||
[self.model_name],
|
||||
None,
|
||||
config_modifier,
|
||||
Precision.FLOAT32,
|
||||
num_threads,
|
||||
batch_sizes,
|
||||
sequence_lengths,
|
||||
shark_args.num_iterations,
|
||||
False,
|
||||
cache_dir,
|
||||
verbose,
|
||||
)
|
||||
result = run_pytorch(use_gpu, [self.model_name], None, config_modifier,
|
||||
Precision.FLOAT32, num_threads, batch_sizes,
|
||||
sequence_lengths, shark_args.num_iterations, False,
|
||||
cache_dir, verbose)
|
||||
print(
|
||||
f"ONNX Pytorch-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
@@ -109,19 +92,10 @@ class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
|
||||
sequence_lengths = [inputs.shape[-1]]
|
||||
cache_dir = os.path.join(".", "cache_models")
|
||||
verbose = False
|
||||
result = run_tensorflow(
|
||||
use_gpu,
|
||||
[self.model_name],
|
||||
None,
|
||||
config_modifier,
|
||||
Precision.FLOAT32,
|
||||
num_threads,
|
||||
batch_sizes,
|
||||
sequence_lengths,
|
||||
shark_args.num_iterations,
|
||||
cache_dir,
|
||||
verbose,
|
||||
)
|
||||
result = run_tensorflow(use_gpu, [self.model_name], None,
|
||||
config_modifier, Precision.FLOAT32, num_threads,
|
||||
batch_sizes, sequence_lengths,
|
||||
shark_args.num_iterations, cache_dir, verbose)
|
||||
print(
|
||||
f"ONNX TF-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
@@ -131,8 +105,7 @@ class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
|
||||
print(
|
||||
f"{self.model_name} is currently not supported in ORT's HF. Check \
|
||||
https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/huggingface_models.py \
|
||||
for currently supported models. Exiting benchmark ONNX."
|
||||
)
|
||||
for currently supported models. Exiting benchmark ONNX.")
|
||||
return
|
||||
use_gpu = self.device == "gpu"
|
||||
num_threads = psutil.cpu_count(logical=False)
|
||||
@@ -148,34 +121,17 @@ for currently supported models. Exiting benchmark ONNX."
|
||||
use_raw_attention_mask = True
|
||||
model_fusion_statistics = {}
|
||||
overwrite = False
|
||||
model_source = "pt" # Either "pt" or "tf"
|
||||
model_source = "pt" #Either "pt" or "tf"
|
||||
provider = None
|
||||
config_modifier = ConfigModifier(None)
|
||||
onnx_args = OnnxFusionOptions()
|
||||
result = run_onnxruntime(
|
||||
use_gpu,
|
||||
provider,
|
||||
[self.model_name],
|
||||
None,
|
||||
config_modifier,
|
||||
Precision.FLOAT32,
|
||||
num_threads,
|
||||
batch_sizes,
|
||||
sequence_lengths,
|
||||
shark_args.num_iterations,
|
||||
input_counts,
|
||||
optimize_onnx,
|
||||
validate_onnx,
|
||||
cache_dir,
|
||||
onnx_dir,
|
||||
verbose,
|
||||
overwrite,
|
||||
disable_ort_io_binding,
|
||||
use_raw_attention_mask,
|
||||
model_fusion_statistics,
|
||||
model_source,
|
||||
onnx_args,
|
||||
)
|
||||
use_gpu, provider, [self.model_name], None, config_modifier,
|
||||
Precision.FLOAT32, num_threads, batch_sizes, sequence_lengths,
|
||||
shark_args.num_iterations, input_counts, optimize_onnx,
|
||||
validate_onnx, cache_dir, onnx_dir, verbose, overwrite,
|
||||
disable_ort_io_binding, use_raw_attention_mask,
|
||||
model_fusion_statistics, model_source, onnx_args)
|
||||
print(
|
||||
f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
|
||||
@@ -1,23 +1,19 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers
|
||||
from shark.iree_utils import check_device_drivers
|
||||
|
||||
import torch
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import torchvision.models as models
|
||||
from transformers import (
|
||||
AutoModelForSequenceClassification,
|
||||
BertTokenizer,
|
||||
TFBertModel,
|
||||
)
|
||||
from transformers import AutoModelForSequenceClassification, BertTokenizer, TFBertModel
|
||||
import importlib
|
||||
import pytest
|
||||
import unittest
|
||||
|
||||
torch.manual_seed(0)
|
||||
gpus = tf.config.experimental.list_physical_devices("GPU")
|
||||
gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||
for gpu in gpus:
|
||||
tf.config.experimental.set_memory_growth(gpu, True)
|
||||
tf.config.experimental.set_memory_growth(gpu, True)
|
||||
|
||||
##################### Tensorflow Hugging Face LM Models ###################################
|
||||
MAX_SEQUENCE_LENGTH = 512
|
||||
@@ -27,11 +23,12 @@ BATCH_SIZE = 1
|
||||
tf_bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
|
||||
class TFHuggingFaceLanguage(tf.Module):
|
||||
|
||||
def __init__(self, hf_model_name):
|
||||
super(TFHuggingFaceLanguage, self).__init__()
|
||||
# Create a BERT trainer with the created network.
|
||||
@@ -39,8 +36,7 @@ class TFHuggingFaceLanguage(tf.Module):
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m.predict = lambda x, y, z: self.m.call(
|
||||
input_ids=x, attention_mask=y, token_type_ids=z, training=False
|
||||
)
|
||||
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
|
||||
|
||||
@tf.function(input_signature=tf_bert_input)
|
||||
def forward(self, input_ids, attention_mask, token_type_ids):
|
||||
@@ -51,21 +47,15 @@ def get_TFhf_model(name):
|
||||
model = TFHuggingFaceLanguage(name)
|
||||
tokenizer = BertTokenizer.from_pretrained(name)
|
||||
text = "Replace me by any text you'd like."
|
||||
encoded_input = tokenizer(
|
||||
text,
|
||||
padding="max_length",
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH,
|
||||
)
|
||||
encoded_input = tokenizer(text,
|
||||
padding='max_length',
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH)
|
||||
for key in encoded_input:
|
||||
encoded_input[key] = tf.expand_dims(
|
||||
tf.convert_to_tensor(encoded_input[key]), 0
|
||||
)
|
||||
test_input = (
|
||||
encoded_input["input_ids"],
|
||||
encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"],
|
||||
)
|
||||
tf.convert_to_tensor(encoded_input[key]), 0)
|
||||
test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"])
|
||||
actual_out = model.forward(*test_input)
|
||||
return model, test_input, actual_out
|
||||
|
||||
@@ -74,13 +64,17 @@ def get_TFhf_model(name):
|
||||
|
||||
|
||||
class HuggingFaceLanguage(torch.nn.Module):
|
||||
|
||||
def __init__(self, hf_model_name):
|
||||
super().__init__()
|
||||
self.model = AutoModelForSequenceClassification.from_pretrained(
|
||||
hf_model_name, # The pretrained model.
|
||||
num_labels=2, # The number of output labels--2 for binary classification.
|
||||
output_attentions=False, # Whether the model returns attentions weights.
|
||||
output_hidden_states=False, # Whether the model returns all hidden-states.
|
||||
num_labels=
|
||||
2, # The number of output labels--2 for binary classification.
|
||||
output_attentions=
|
||||
False, # Whether the model returns attentions weights.
|
||||
output_hidden_states=
|
||||
False, # Whether the model returns all hidden-states.
|
||||
torchscript=True,
|
||||
)
|
||||
|
||||
@@ -102,6 +96,7 @@ def get_hf_model(name):
|
||||
|
||||
|
||||
class VisionModule(torch.nn.Module):
|
||||
|
||||
def __init__(self, model):
|
||||
super().__init__()
|
||||
self.model = model
|
||||
@@ -122,56 +117,46 @@ def get_vision_model(torch_model):
|
||||
############################# Benchmark Tests ####################################
|
||||
|
||||
pytest_benchmark_param = pytest.mark.parametrize(
|
||||
("dynamic", "device"),
|
||||
('dynamic', 'device'),
|
||||
[
|
||||
pytest.param(False, "cpu"),
|
||||
pytest.param(False, 'cpu'),
|
||||
# TODO: Language models are failing for dynamic case..
|
||||
pytest.param(True, "cpu", marks=pytest.mark.skip),
|
||||
pytest.param(True, 'cpu', marks=pytest.mark.skip),
|
||||
pytest.param(False,
|
||||
'gpu',
|
||||
marks=pytest.mark.skipif(check_device_drivers("gpu"),
|
||||
reason="nvidia-smi not found")),
|
||||
pytest.param(True,
|
||||
'gpu',
|
||||
marks=pytest.mark.skip),
|
||||
pytest.param(
|
||||
False,
|
||||
"gpu",
|
||||
marks=pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason="nvidia-smi not found"
|
||||
),
|
||||
),
|
||||
pytest.param(True, "gpu", marks=pytest.mark.skip),
|
||||
pytest.param(
|
||||
False,
|
||||
"vulkan",
|
||||
'vulkan',
|
||||
marks=pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases",
|
||||
),
|
||||
),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)),
|
||||
pytest.param(
|
||||
True,
|
||||
"vulkan",
|
||||
'vulkan',
|
||||
marks=pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases",
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)),
|
||||
])
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
importlib.util.find_spec("iree.tools") is None,
|
||||
reason="Cannot find tools to import TF",
|
||||
)
|
||||
@pytest.mark.skipif(importlib.util.find_spec("iree.tools") is None,
|
||||
reason="Cannot find tools to import TF")
|
||||
@pytest_benchmark_param
|
||||
def test_bench_minilm_torch(dynamic, device):
|
||||
model, test_input, act_out = get_hf_model(
|
||||
"microsoft/MiniLM-L12-H384-uncased"
|
||||
)
|
||||
shark_module = SharkInference(
|
||||
model,
|
||||
(test_input,),
|
||||
device=device,
|
||||
dynamic=dynamic,
|
||||
jit_trace=True,
|
||||
benchmark_mode=True,
|
||||
)
|
||||
"microsoft/MiniLM-L12-H384-uncased")
|
||||
shark_module = SharkInference(model, (test_input,),
|
||||
device=device,
|
||||
dynamic=dynamic,
|
||||
jit_trace=True,
|
||||
benchmark_mode=True)
|
||||
try:
|
||||
# If becnhmarking succesful, assert success/True.
|
||||
shark_module.compile()
|
||||
@@ -182,21 +167,17 @@ def test_bench_minilm_torch(dynamic, device):
|
||||
assert False
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
importlib.util.find_spec("iree.tools") is None,
|
||||
reason="Cannot find tools to import TF",
|
||||
)
|
||||
@pytest.mark.skipif(importlib.util.find_spec("iree.tools") is None,
|
||||
reason="Cannot find tools to import TF")
|
||||
@pytest_benchmark_param
|
||||
def test_bench_distilbert(dynamic, device):
|
||||
model, test_input, act_out = get_TFhf_model("distilbert-base-uncased")
|
||||
shark_module = SharkInference(
|
||||
model,
|
||||
test_input,
|
||||
device=device,
|
||||
dynamic=dynamic,
|
||||
jit_trace=True,
|
||||
benchmark_mode=True,
|
||||
)
|
||||
shark_module = SharkInference(model,
|
||||
test_input,
|
||||
device=device,
|
||||
dynamic=dynamic,
|
||||
jit_trace=True,
|
||||
benchmark_mode=True)
|
||||
try:
|
||||
# If becnhmarking succesful, assert success/True.
|
||||
shark_module.set_frontend("tensorflow")
|
||||
@@ -212,14 +193,12 @@ def test_bench_distilbert(dynamic, device):
|
||||
@pytest_benchmark_param
|
||||
def test_bench_xlm_roberta(dynamic, device):
|
||||
model, test_input, act_out = get_TFhf_model("xlm-roberta-base")
|
||||
shark_module = SharkInference(
|
||||
model,
|
||||
test_input,
|
||||
device=device,
|
||||
dynamic=dynamic,
|
||||
jit_trace=True,
|
||||
benchmark_mode=True,
|
||||
)
|
||||
shark_module = SharkInference(model,
|
||||
test_input,
|
||||
device=device,
|
||||
dynamic=dynamic,
|
||||
jit_trace=True,
|
||||
benchmark_mode=True)
|
||||
try:
|
||||
# If becnhmarking succesful, assert success/True.
|
||||
shark_module.set_frontend("tensorflow")
|
||||
|
||||
@@ -9,31 +9,25 @@ torch.manual_seed(0)
|
||||
|
||||
# Test running benchmark module without failing.
|
||||
pytest_benchmark_param = pytest.mark.parametrize(
|
||||
("dynamic", "device"),
|
||||
('dynamic', 'device'),
|
||||
[
|
||||
pytest.param(False, "cpu"),
|
||||
pytest.param(False, 'cpu'),
|
||||
# TODO: Language models are failing for dynamic case..
|
||||
pytest.param(True, "cpu", marks=pytest.mark.skip),
|
||||
],
|
||||
)
|
||||
pytest.param(True, 'cpu', marks=pytest.mark.skip),
|
||||
])
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
importlib.util.find_spec("onnxruntime") is None,
|
||||
reason="Cannot find ONNXRUNTIME.",
|
||||
)
|
||||
@pytest.mark.skipif(importlib.util.find_spec("onnxruntime") is None,
|
||||
reason="Cannot find ONNXRUNTIME.")
|
||||
@pytest_benchmark_param
|
||||
def test_HFbench_minilm_torch(dynamic, device):
|
||||
model_name = "bert-base-uncased"
|
||||
test_input = torch.randint(2, (1, 128))
|
||||
try:
|
||||
shark_module = SharkHFBenchmarkRunner(
|
||||
model_name,
|
||||
(test_input,),
|
||||
jit_trace=True,
|
||||
dynamic=dynamic,
|
||||
device=device,
|
||||
)
|
||||
shark_module = SharkHFBenchmarkRunner(model_name, (test_input,),
|
||||
jit_trace=True,
|
||||
dynamic=dynamic,
|
||||
device=device)
|
||||
shark_module.benchmark_c()
|
||||
shark_module.benchmark_python((test_input,))
|
||||
shark_module.benchmark_torch(test_input)
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
IMPORTER=1 ./setup_venv.sh
|
||||
source $GITHUB_WORKSPACE/shark.venv/bin/activate
|
||||
python generate_sharktank.py --upload=False
|
||||
33
conftest.py
33
conftest.py
@@ -1,33 +0,0 @@
|
||||
def pytest_addoption(parser):
|
||||
# Attaches SHARK command-line arguments to the pytest machinery.
|
||||
parser.addoption(
|
||||
"--benchmark",
|
||||
action="store_true",
|
||||
default="False",
|
||||
help="Pass option to benchmark and write results.csv",
|
||||
)
|
||||
parser.addoption(
|
||||
"--onnx_bench",
|
||||
action="store_true",
|
||||
default="False",
|
||||
help="Add ONNX benchmark results to pytest benchmarks.",
|
||||
)
|
||||
# The following options are deprecated and pending removal.
|
||||
parser.addoption(
|
||||
"--save_mlir",
|
||||
action="store_true",
|
||||
default="False",
|
||||
help="Pass option to save input MLIR",
|
||||
)
|
||||
parser.addoption(
|
||||
"--save_vmfb",
|
||||
action="store_true",
|
||||
default="False",
|
||||
help="Pass option to save IREE output .vmfb",
|
||||
)
|
||||
parser.addoption(
|
||||
"--save_temps",
|
||||
action="store_true",
|
||||
default="False",
|
||||
help="Saves IREE reproduction artifacts for filing upstream issues.",
|
||||
)
|
||||
@@ -1,235 +0,0 @@
|
||||
# Lint as: python3
|
||||
"""SHARK Tank"""
|
||||
# python generate_sharktank.py, you have to give a csv tile with [model_name, model_download_url]
|
||||
# will generate local shark tank folder like this:
|
||||
# /SHARK
|
||||
# /gen_shark_tank
|
||||
# /albert_lite_base
|
||||
# /...model_name...
|
||||
#
|
||||
|
||||
import os
|
||||
import csv
|
||||
import argparse
|
||||
from shark.shark_importer import SharkImporter
|
||||
import tensorflow as tf
|
||||
import subprocess as sp
|
||||
import hashlib
|
||||
import numpy as np
|
||||
|
||||
visible_default = tf.config.list_physical_devices("GPU")
|
||||
try:
|
||||
tf.config.set_visible_devices([], "GPU")
|
||||
visible_devices = tf.config.get_visible_devices()
|
||||
for device in visible_devices:
|
||||
assert device.device_type != "GPU"
|
||||
except:
|
||||
# Invalid device or cannot modify virtual devices once initialized.
|
||||
pass
|
||||
|
||||
# All generated models and metadata will be saved under this directory.
|
||||
WORKDIR = os.path.join(os.path.dirname(__file__), "gen_shark_tank")
|
||||
|
||||
|
||||
def create_hash(file_name):
|
||||
with open(file_name, "rb") as f:
|
||||
file_hash = hashlib.blake2b()
|
||||
while chunk := f.read(2**20):
|
||||
file_hash.update(chunk)
|
||||
|
||||
return file_hash.hexdigest()
|
||||
|
||||
|
||||
def save_torch_model(torch_model_list):
|
||||
from tank.model_utils import get_hf_model
|
||||
from tank.model_utils import get_vision_model
|
||||
|
||||
with open(torch_model_list) as csvfile:
|
||||
torch_reader = csv.reader(csvfile, delimiter=",")
|
||||
fields = next(torch_reader)
|
||||
for row in torch_reader:
|
||||
torch_model_name = row[0]
|
||||
tracing_required = row[1]
|
||||
model_type = row[2]
|
||||
|
||||
tracing_required = False if tracing_required == "False" else True
|
||||
|
||||
model = None
|
||||
input = None
|
||||
if model_type == "vision":
|
||||
model, input, _ = get_vision_model(torch_model_name)
|
||||
elif model_type == "hf":
|
||||
model, input, _ = get_hf_model(torch_model_name)
|
||||
|
||||
torch_model_name = torch_model_name.replace("/", "_")
|
||||
torch_model_dir = os.path.join(
|
||||
WORKDIR, str(torch_model_name) + "_torch"
|
||||
)
|
||||
os.makedirs(torch_model_dir, exist_ok=True)
|
||||
|
||||
mlir_importer = SharkImporter(
|
||||
model,
|
||||
(input,),
|
||||
frontend="torch",
|
||||
)
|
||||
mlir_importer.import_debug(
|
||||
is_dynamic=False,
|
||||
tracing_required=tracing_required,
|
||||
dir=torch_model_dir,
|
||||
model_name=torch_model_name,
|
||||
)
|
||||
mlir_hash = create_hash(
|
||||
os.path.join(
|
||||
torch_model_dir, torch_model_name + "_torch" + ".mlir"
|
||||
)
|
||||
)
|
||||
np.save(os.path.join(torch_model_dir, "hash"), np.array(mlir_hash))
|
||||
# Generate torch dynamic models.
|
||||
mlir_importer.import_debug(
|
||||
is_dynamic=True,
|
||||
tracing_required=tracing_required,
|
||||
dir=torch_model_dir,
|
||||
model_name=torch_model_name + "_dynamic",
|
||||
)
|
||||
|
||||
|
||||
def save_tf_model(tf_model_list):
|
||||
from tank.model_utils_tf import (
|
||||
get_causal_image_model,
|
||||
get_causal_lm_model,
|
||||
get_keras_model,
|
||||
get_TFhf_model,
|
||||
)
|
||||
|
||||
with open(tf_model_list) as csvfile:
|
||||
tf_reader = csv.reader(csvfile, delimiter=",")
|
||||
fields = next(tf_reader)
|
||||
for row in tf_reader:
|
||||
tf_model_name = row[0]
|
||||
model_type = row[1]
|
||||
|
||||
model = None
|
||||
input = None
|
||||
print(f"Generating artifacts for model {tf_model_name}")
|
||||
if model_type == "hf":
|
||||
model, input, _ = get_causal_lm_model(tf_model_name)
|
||||
if model_type == "img":
|
||||
model, input, _ = get_causal_image_model(tf_model_name)
|
||||
if model_type == "keras":
|
||||
model, input, _ = get_keras_model(tf_model_name)
|
||||
if model_type == "TFhf":
|
||||
model, input, _ = get_TFhf_model(tf_model_name)
|
||||
|
||||
tf_model_name = tf_model_name.replace("/", "_")
|
||||
tf_model_dir = os.path.join(WORKDIR, str(tf_model_name) + "_tf")
|
||||
os.makedirs(tf_model_dir, exist_ok=True)
|
||||
|
||||
mlir_importer = SharkImporter(
|
||||
model,
|
||||
input,
|
||||
frontend="tf",
|
||||
)
|
||||
mlir_importer.import_debug(
|
||||
dir=tf_model_dir,
|
||||
model_name=tf_model_name,
|
||||
)
|
||||
mlir_hash = create_hash(
|
||||
os.path.join(tf_model_dir, tf_model_name + "_tf" + ".mlir")
|
||||
)
|
||||
np.save(os.path.join(tf_model_dir, "hash"), np.array(mlir_hash))
|
||||
|
||||
|
||||
def save_tflite_model(tflite_model_list):
|
||||
from shark.tflite_utils import TFLitePreprocessor
|
||||
|
||||
with open(tflite_model_list) as csvfile:
|
||||
tflite_reader = csv.reader(csvfile, delimiter=",")
|
||||
for row in tflite_reader:
|
||||
print("\n")
|
||||
tflite_model_name = row[0]
|
||||
tflite_model_link = row[1]
|
||||
print("tflite_model_name", tflite_model_name)
|
||||
print("tflite_model_link", tflite_model_link)
|
||||
tflite_model_name_dir = os.path.join(
|
||||
WORKDIR, str(tflite_model_name) + "_tflite"
|
||||
)
|
||||
os.makedirs(tflite_model_name_dir, exist_ok=True)
|
||||
print(f"TMP_TFLITE_MODELNAME_DIR = {tflite_model_name_dir}")
|
||||
|
||||
# Preprocess to get SharkImporter input args
|
||||
tflite_preprocessor = TFLitePreprocessor(str(tflite_model_name))
|
||||
raw_model_file_path = tflite_preprocessor.get_raw_model_file()
|
||||
inputs = tflite_preprocessor.get_inputs()
|
||||
tflite_interpreter = tflite_preprocessor.get_interpreter()
|
||||
|
||||
# Use SharkImporter to get SharkInference input args
|
||||
my_shark_importer = SharkImporter(
|
||||
module=tflite_interpreter,
|
||||
inputs=inputs,
|
||||
frontend="tflite",
|
||||
raw_model_file=raw_model_file_path,
|
||||
)
|
||||
my_shark_importer.import_debug(
|
||||
dir=tflite_model_name_dir,
|
||||
model_name=tflite_model_name,
|
||||
func_name="main",
|
||||
)
|
||||
mlir_hash = create_hash(
|
||||
os.path.join(
|
||||
tflite_model_name_dir,
|
||||
tflite_model_name + "_tflite" + ".mlir",
|
||||
)
|
||||
)
|
||||
np.save(
|
||||
os.path.join(tflite_model_name_dir, "hash"),
|
||||
np.array(mlir_hash),
|
||||
)
|
||||
|
||||
|
||||
# Validates whether the file is present or not.
|
||||
def is_valid_file(arg):
|
||||
if not os.path.exists(arg):
|
||||
return None
|
||||
else:
|
||||
return arg
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--torch_model_csv",
|
||||
type=lambda x: is_valid_file(x),
|
||||
default="./tank/pytorch/torch_model_list.csv",
|
||||
help="""Contains the file with torch_model name and args.
|
||||
Please see: https://github.com/nod-ai/SHARK/blob/main/tank/pytorch/torch_model_list.csv""",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tf_model_csv",
|
||||
type=lambda x: is_valid_file(x),
|
||||
default="./tank/tf/tf_model_list.csv",
|
||||
help="Contains the file with tf model name and args.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tflite_model_csv",
|
||||
type=lambda x: is_valid_file(x),
|
||||
default="./tank/tflite/tflite_model_list.csv",
|
||||
help="Contains the file with tf model name and args.",
|
||||
)
|
||||
parser.add_argument("--upload", type=bool, default=False)
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.torch_model_csv:
|
||||
save_torch_model(args.torch_model_csv)
|
||||
|
||||
if args.tf_model_csv:
|
||||
save_tf_model(args.tf_model_csv)
|
||||
|
||||
if args.tflite_model_csv:
|
||||
save_tflite_model(args.tflite_model_csv)
|
||||
|
||||
if args.upload:
|
||||
git_hash = sp.getoutput("git log -1 --format='%h'") + "/"
|
||||
print("uploading files to gs://shark_tank/" + git_hash)
|
||||
os.system(
|
||||
"gsutil cp -r ./gen_shark_tank/* gs://shark_tank/" + git_hash
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,109 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import compare_tensors
|
||||
from shark.shark_downloader import download_torch_model
|
||||
from shark.parser import shark_args
|
||||
|
||||
import torch
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class BertBaseUncasedModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
onnx_bench=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
self.onnx_bench = onnx_bench
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"bert-base-uncased", dynamic
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_args.onnx_bench = self.onnx_bench
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"bert-base-uncased",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class BertBaseUncasedModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = BertBaseUncasedModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,71 +0,0 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
|
||||
import iree.compiler as ireec
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class DistilBertModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"distilbert-base-uncased"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model, func_name, device=device, mlir_dialect="mhlo"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
|
||||
|
||||
class DistilBertModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = DistilBertModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
@pytest.mark.xfail(reason="shark_tank hash issues -- awaiting triage")
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="shark_tank hash issues -- awaiting triage")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="shark_tank hash issues -- awaiting triage")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,95 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import compare_tensors
|
||||
from shark.parser import shark_args
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class DistilBertModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"distilbert-base-uncased", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"distilbert-base-uncased",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class DistilBertModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = DistilBertModuleTester(self)
|
||||
self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,114 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class MobileNetV3ModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"mobilenet_v3_small", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
np.testing.assert_allclose(act_out, results, rtol=1e-02, atol=1e-03)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"alexnet",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class MobileNetV3ModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = MobileNetV3ModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="golden results don't match.")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="golden results don't match.")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="stuck in the pipeline.")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,114 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import compare_tensors
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class Resnet101ModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"resnet101", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"resnet101",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class Resnet101ModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = Resnet101ModuleTester(self)
|
||||
self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,114 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import get_vision_model, compare_tensors
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class Resnet50ModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"resnet50", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"resnet50",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class Resnet50ModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = Resnet50ModuleTester(self)
|
||||
self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,91 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class UnetModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"unet", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
np.testing.assert_allclose(act_out, results, rtol=1e-02, atol=1e-03)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"unet",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class UnetModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = UnetModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -20,7 +20,6 @@ tensorflow-metal
|
||||
#tf-models-nightly
|
||||
#tensorflow-text-nightly
|
||||
transformers==4.18.0
|
||||
tensorflow-probability
|
||||
#jax[cpu]
|
||||
|
||||
# tflitehub dependencies.
|
||||
|
||||
@@ -15,10 +15,9 @@ iree-tools-tf
|
||||
# TensorFlow and JAX.
|
||||
gin-config
|
||||
tensorflow
|
||||
#tf-models-nightly
|
||||
#tensorflow-text-nightly
|
||||
tf-models-nightly
|
||||
tensorflow-text-nightly
|
||||
transformers==4.18.0
|
||||
#tensorflow-probability
|
||||
#jax[cpu]
|
||||
|
||||
|
||||
@@ -30,11 +29,11 @@ lit
|
||||
pyyaml
|
||||
|
||||
#ONNX and ORT for benchmarking
|
||||
#--extra-index-url https://test.pypi.org/simple/
|
||||
#protobuf
|
||||
#coloredlogs
|
||||
#flatbuffers
|
||||
#sympy
|
||||
#psutil
|
||||
#onnx-weekly
|
||||
#ort-nightly
|
||||
--extra-index-url https://test.pypi.org/simple/
|
||||
protobuf
|
||||
coloredlogs
|
||||
flatbuffers
|
||||
sympy
|
||||
psutil
|
||||
onnx-weekly
|
||||
ort-nightly
|
||||
|
||||
@@ -1,13 +1,9 @@
|
||||
setuptools
|
||||
wheel
|
||||
|
||||
# SHARK Runner
|
||||
#SHARK Runner
|
||||
tqdm
|
||||
|
||||
# SHARK Downloader
|
||||
gsutil
|
||||
|
||||
# Testing
|
||||
#Testing
|
||||
pytest
|
||||
pytest-xdist
|
||||
Pillow
|
||||
|
||||
2
setup.py
2
setup.py
@@ -26,7 +26,7 @@ setup(
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
packages=find_packages(exclude=("examples")),
|
||||
packages=find_packages(exclude=('examples')),
|
||||
python_requires=">=3.7",
|
||||
install_requires=[
|
||||
"numpy",
|
||||
|
||||
@@ -98,7 +98,7 @@ if [[ ! -z "${IMPORTER}" ]]; then
|
||||
echo "${Yellow}Installing importer tools.."
|
||||
if [[ $(uname -s) = 'Linux' ]]; then
|
||||
echo "${Yellow}Linux detected.. installing Linux importer tools"
|
||||
$PYTHON -m pip install --upgrade -r "$TD/requirements-importer.txt" -f https://github.com/${RUNTIME}/releases --extra-index-url https://test.pypi.org/simple/ --extra-index-url https://download.pytorch.org/whl/nightly/cu116
|
||||
$PYTHON -m pip install --upgrade -r "$TD/requirements-importer.txt" -f https://github.com/${RUNTIME}/releases --extra-index-url https://test.pypi.org/simple/ --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
||||
elif [[ $(uname -s) = 'Darwin' ]]; then
|
||||
echo "${Yellow}macOS detected.. installing macOS importer tools"
|
||||
#Conda seems to have some problems installing these packages and hope they get resolved upstream.
|
||||
@@ -108,26 +108,6 @@ fi
|
||||
|
||||
$PYTHON -m pip install -e . --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://github.com/llvm/torch-mlir/releases -f https://github.com/${RUNTIME}/releases
|
||||
|
||||
if [[ $(uname -s) = 'Linux' && ! -z "${IMPORTER}" ]]; then
|
||||
$PYTHON -m pip uninstall -y torch torchvision
|
||||
$PYTHON -m pip install --pre torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cu116
|
||||
if [ $? -eq 0 ];then
|
||||
echo "Successfully Installed torch + cu116."
|
||||
else
|
||||
echo "Could not install torch + cu116." >&2
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ ! -z "${ONNX}" ]]; then
|
||||
echo "${Yellow}Installing ONNX and onnxruntime for benchmarks..."
|
||||
$PYTHON -m pip install onnx onnxruntime psutil
|
||||
if [ $? -eq 0 ];then
|
||||
echo "Successfully installed ONNX and ONNX runtime."
|
||||
else
|
||||
echo "Could not install ONNX." >&2
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -z "${CONDA_PREFIX}" ]]; then
|
||||
echo "${Green}Before running examples activate venv with:"
|
||||
echo " ${Green}source $VENV_DIR/bin/activate"
|
||||
|
||||
@@ -18,10 +18,12 @@ from torch.fx.experimental.proxy_tensor import make_fx
|
||||
from torch.nn.utils import _stateless
|
||||
|
||||
from torch import fx
|
||||
import copy
|
||||
import tempfile
|
||||
|
||||
|
||||
class MakeFxModule:
|
||||
|
||||
def __init__(self, model, inputs, labels=None, custom_inference_fn=None):
|
||||
self.model = model
|
||||
self.inputs = inputs
|
||||
@@ -51,28 +53,20 @@ class MakeFxModule:
|
||||
return fx_g
|
||||
|
||||
def generate_graph(self):
|
||||
fx_g = make_fx(
|
||||
self.custom_inference_fn,
|
||||
decomposition_table=get_decompositions(
|
||||
[
|
||||
torch.ops.aten.embedding_dense_backward,
|
||||
torch.ops.aten.native_layer_norm_backward,
|
||||
torch.ops.aten.slice_backward,
|
||||
torch.ops.aten.select_backward,
|
||||
]
|
||||
),
|
||||
)(
|
||||
dict(self.model.named_parameters()),
|
||||
dict(self.model.named_buffers()),
|
||||
self.inputs,
|
||||
)
|
||||
fx_g = make_fx(self.custom_inference_fn,
|
||||
decomposition_table=get_decompositions([
|
||||
torch.ops.aten.embedding_dense_backward,
|
||||
torch.ops.aten.native_layer_norm_backward,
|
||||
torch.ops.aten.slice_backward,
|
||||
torch.ops.aten.select_backward
|
||||
]))(dict(self.model.named_parameters()),
|
||||
dict(self.model.named_buffers()), self.inputs)
|
||||
fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
|
||||
fx_g.recompile()
|
||||
fx_g = self.change_fx_graph_return_to_tuple(fx_g)
|
||||
ts_g = torch.jit.script(fx_g)
|
||||
temp = tempfile.NamedTemporaryFile(
|
||||
suffix="_shark_ts", prefix="temp_ts_"
|
||||
)
|
||||
temp = tempfile.NamedTemporaryFile(suffix='_shark_ts',
|
||||
prefix='temp_ts_')
|
||||
ts_g.save(temp.name)
|
||||
new_ts = torch.jit.load(temp.name)
|
||||
self.training_graph = new_ts
|
||||
|
||||
@@ -12,30 +12,10 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# All the iree_gpu related functionalities go here.
|
||||
|
||||
import iree.runtime as ireert
|
||||
import sys
|
||||
import ctypes
|
||||
from shark.parser import shark_args
|
||||
|
||||
# Get the default gpu args given the architecture.
|
||||
def get_iree_gpu_args():
|
||||
ireert.flags.FUNCTION_INPUT_VALIDATION = False
|
||||
ireert.flags.parse_flags("--cuda_allow_inline_execution")
|
||||
# TODO: Give the user_interface to pass the sm_arch.
|
||||
sm_arch = get_cuda_sm_cc()
|
||||
if (
|
||||
sm_arch in ["sm_70", "sm_72", "sm_75", "sm_80", "sm_84", "sm_86"]
|
||||
) and (shark_args.enable_tf32 == True):
|
||||
return [
|
||||
"--iree-hal-cuda-disable-loop-nounroll-wa",
|
||||
f"--iree-hal-cuda-llvm-target-arch={sm_arch}",
|
||||
]
|
||||
else:
|
||||
return ["--iree-hal-cuda-disable-loop-nounroll-wa"]
|
||||
|
||||
|
||||
# Some constants taken from cuda.h
|
||||
#Some constants taken from cuda.h
|
||||
CUDA_SUCCESS = 0
|
||||
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16
|
||||
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39
|
||||
@@ -44,7 +24,7 @@ CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36
|
||||
|
||||
|
||||
def get_cuda_sm_cc():
|
||||
libnames = ("libcuda.so", "libcuda.dylib", "cuda.dll")
|
||||
libnames = ('libcuda.so', 'libcuda.dylib', 'cuda.dll')
|
||||
for libname in libnames:
|
||||
try:
|
||||
cuda = ctypes.CDLL(libname)
|
||||
@@ -53,10 +33,10 @@ def get_cuda_sm_cc():
|
||||
else:
|
||||
break
|
||||
else:
|
||||
raise OSError("could not load any of: " + " ".join(libnames))
|
||||
raise OSError("could not load any of: " + ' '.join(libnames))
|
||||
|
||||
nGpus = ctypes.c_int()
|
||||
name = b" " * 100
|
||||
name = b' ' * 100
|
||||
cc_major = ctypes.c_int()
|
||||
cc_minor = ctypes.c_int()
|
||||
|
||||
@@ -68,44 +48,31 @@ def get_cuda_sm_cc():
|
||||
result = cuda.cuInit(0)
|
||||
if result != CUDA_SUCCESS:
|
||||
cuda.cuGetErrorString(result, ctypes.byref(error_str))
|
||||
print(
|
||||
"cuInit failed with error code %d: %s"
|
||||
% (result, error_str.value.decode())
|
||||
)
|
||||
print("cuInit failed with error code %d: %s" %
|
||||
(result, error_str.value.decode()))
|
||||
return 1
|
||||
result = cuda.cuDeviceGetCount(ctypes.byref(nGpus))
|
||||
if result != CUDA_SUCCESS:
|
||||
cuda.cuGetErrorString(result, ctypes.byref(error_str))
|
||||
print(
|
||||
"cuDeviceGetCount failed with error code %d: %s"
|
||||
% (result, error_str.value.decode())
|
||||
)
|
||||
print("cuDeviceGetCount failed with error code %d: %s" %
|
||||
(result, error_str.value.decode()))
|
||||
return 1
|
||||
print("Found %d device(s)." % nGpus.value)
|
||||
for i in range(nGpus.value):
|
||||
result = cuda.cuDeviceGet(ctypes.byref(device), i)
|
||||
if result != CUDA_SUCCESS:
|
||||
cuda.cuGetErrorString(result, ctypes.byref(error_str))
|
||||
print(
|
||||
"cuDeviceGet failed with error code %d: %s"
|
||||
% (result, error_str.value.decode())
|
||||
)
|
||||
print("cuDeviceGet failed with error code %d: %s" %
|
||||
(result, error_str.value.decode()))
|
||||
return 1
|
||||
print("Device: %d" % i)
|
||||
if (
|
||||
cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device)
|
||||
== CUDA_SUCCESS
|
||||
):
|
||||
print(" Name: %s" % (name.split(b"\0", 1)[0].decode()))
|
||||
if (
|
||||
cuda.cuDeviceComputeCapability(
|
||||
ctypes.byref(cc_major), ctypes.byref(cc_minor), device
|
||||
)
|
||||
== CUDA_SUCCESS
|
||||
):
|
||||
print(
|
||||
" Compute Capability: %d.%d"
|
||||
% (cc_major.value, cc_minor.value)
|
||||
)
|
||||
if cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name),
|
||||
device) == CUDA_SUCCESS:
|
||||
print(" Name: %s" % (name.split(b'\0', 1)[0].decode()))
|
||||
if cuda.cuDeviceComputeCapability(ctypes.byref(cc_major),
|
||||
ctypes.byref(cc_minor),
|
||||
device) == CUDA_SUCCESS:
|
||||
print(" Compute Capability: %d.%d" %
|
||||
(cc_major.value, cc_minor.value))
|
||||
sm = f"sm_{cc_major.value}{cc_minor.value}"
|
||||
return sm
|
||||
@@ -8,9 +8,7 @@ try:
|
||||
from torchdynamo.optimizations.backends import create_backend
|
||||
from torchdynamo.optimizations.subgraph import SubGraph
|
||||
except ModuleNotFoundError:
|
||||
print(
|
||||
"Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo"
|
||||
)
|
||||
print("Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo")
|
||||
exit()
|
||||
|
||||
NUM_ITERS = 10
|
||||
@@ -26,9 +24,7 @@ def __torch_mlir(fx_graph, *args, **kwargs):
|
||||
|
||||
for node in fx_g.graph.nodes:
|
||||
if node.op == "output":
|
||||
assert (
|
||||
len(node.args) == 1
|
||||
), "Output node must have a single argument"
|
||||
assert len(node.args) == 1, "Output node must have a single argument"
|
||||
node_arg = node.args[0]
|
||||
if isinstance(node_arg, tuple) and len(node_arg) == 1:
|
||||
node.args = (node_arg[0],)
|
||||
@@ -45,12 +41,8 @@ def __torch_mlir(fx_graph, *args, **kwargs):
|
||||
if len(args) == 1 and isinstance(args[0], list):
|
||||
args = args[0]
|
||||
|
||||
linalg_module = compile(
|
||||
ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS
|
||||
)
|
||||
callable, _ = get_iree_compiled_module(
|
||||
linalg_module, "cuda", func_name="forward"
|
||||
)
|
||||
linalg_module = compile(ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS)
|
||||
callable, _ = get_iree_compiled_module(linalg_module, "cuda", func_name="forward")
|
||||
|
||||
def forward(*inputs):
|
||||
return callable(*inputs)
|
||||
|
||||
@@ -9,24 +9,23 @@ from shark.shark_inference import SharkInference
|
||||
clip_vit_inputs = [
|
||||
tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[1, 3, 224, 224], dtype=tf.float32),
|
||||
tf.TensorSpec(shape=[1, 3, 224, 224], dtype=tf.float32)
|
||||
]
|
||||
|
||||
|
||||
class CLIPModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(CLIPModule, self).__init__()
|
||||
self.m = TFCLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
||||
|
||||
self.m.predict = lambda x, y, z: self.m(
|
||||
input_ids=x, attention_mask=y, pixel_values=z
|
||||
)
|
||||
input_ids=x, attention_mask=y, pixel_values=z)
|
||||
|
||||
@tf.function(input_signature=clip_vit_inputs)
|
||||
def forward(self, input_ids, attention_mask, pixel_values):
|
||||
return self.m.predict(
|
||||
input_ids, attention_mask, pixel_values
|
||||
).logits_per_image
|
||||
return self.m.predict(input_ids, attention_mask,
|
||||
pixel_values).logits_per_image
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -36,30 +35,17 @@ if __name__ == "__main__":
|
||||
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
||||
image = Image.open(requests.get(url, stream=True).raw)
|
||||
|
||||
inputs = processor(
|
||||
text=["a photo of a cat", "a photo of a dog"],
|
||||
images=image,
|
||||
return_tensors="tf",
|
||||
padding=True,
|
||||
)
|
||||
inputs = processor(text=["a photo of a cat", "a photo of a dog"],
|
||||
images=image,
|
||||
return_tensors="tf",
|
||||
padding=True)
|
||||
|
||||
shark_module = SharkInference(
|
||||
CLIPModule(),
|
||||
(
|
||||
inputs["input_ids"],
|
||||
inputs["attention_mask"],
|
||||
inputs["pixel_values"],
|
||||
),
|
||||
)
|
||||
(inputs["input_ids"], inputs["attention_mask"], inputs["pixel_values"]))
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
|
||||
print(
|
||||
shark_module.forward(
|
||||
(
|
||||
inputs["input_ids"],
|
||||
inputs["attention_mask"],
|
||||
inputs["pixel_values"],
|
||||
)
|
||||
)
|
||||
)
|
||||
shark_module.forward((inputs["input_ids"], inputs["attention_mask"],
|
||||
inputs["pixel_values"])))
|
||||
|
||||
@@ -1,88 +0,0 @@
|
||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||
import torch
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_importer import SharkImporter
|
||||
from iree.compiler import compile_str
|
||||
from iree import runtime as ireert
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
MAX_SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
|
||||
|
||||
class AlbertModule(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.model = AutoModelForMaskedLM.from_pretrained("albert-base-v2")
|
||||
self.model.eval()
|
||||
|
||||
def forward(self, input_ids, attention_mask):
|
||||
return self.model(
|
||||
input_ids=input_ids, attention_mask=attention_mask
|
||||
).logits
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Prepping Data
|
||||
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
|
||||
text = "This [MASK] is very tasty."
|
||||
encoded_inputs = tokenizer(
|
||||
text,
|
||||
padding="max_length",
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH,
|
||||
return_tensors="pt",
|
||||
)
|
||||
inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
|
||||
mlir_importer = SharkImporter(
|
||||
AlbertModule(),
|
||||
inputs,
|
||||
frontend="torch",
|
||||
)
|
||||
minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
is_dynamic=False, tracing_required=True
|
||||
)
|
||||
shark_module = SharkInference(
|
||||
minilm_mlir, func_name, mlir_dialect="linalg"
|
||||
)
|
||||
shark_module.compile()
|
||||
token_logits = torch.tensor(shark_module.forward(inputs))
|
||||
mask_id = torch.where(
|
||||
encoded_inputs["input_ids"] == tokenizer.mask_token_id
|
||||
)[1]
|
||||
mask_token_logits = token_logits[0, mask_id, :]
|
||||
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
|
||||
for token in top_5_tokens:
|
||||
print(
|
||||
f"'>>> Sample/Warmup output: {text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
|
||||
)
|
||||
while True:
|
||||
try:
|
||||
new_text = input("Give me a sentence with [MASK] to fill: ")
|
||||
encoded_inputs = tokenizer(
|
||||
new_text,
|
||||
padding="max_length",
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH,
|
||||
return_tensors="pt",
|
||||
)
|
||||
inputs = (
|
||||
encoded_inputs["input_ids"],
|
||||
encoded_inputs["attention_mask"],
|
||||
)
|
||||
token_logits = torch.tensor(shark_module.forward(inputs))
|
||||
mask_id = torch.where(
|
||||
encoded_inputs["input_ids"] == tokenizer.mask_token_id
|
||||
)[1]
|
||||
mask_token_logits = token_logits[0, mask_id, :]
|
||||
top_5_tokens = (
|
||||
torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
|
||||
)
|
||||
for token in top_5_tokens:
|
||||
print(
|
||||
f"'>>> {new_text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
print("Exiting program.")
|
||||
break
|
||||
@@ -1,100 +0,0 @@
|
||||
from PIL import Image
|
||||
import requests
|
||||
|
||||
from transformers import TFAutoModelForMaskedLM, AutoTokenizer
|
||||
import tensorflow as tf
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_importer import SharkImporter
|
||||
from iree.compiler import tf as tfc
|
||||
from iree.compiler import compile_str
|
||||
from iree import runtime as ireert
|
||||
import os
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
MAX_SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
|
||||
# Create a set of inputs
|
||||
t5_inputs = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
]
|
||||
|
||||
|
||||
class AlbertModule(tf.Module):
|
||||
def __init__(self):
|
||||
super(AlbertModule, self).__init__()
|
||||
self.m = TFAutoModelForMaskedLM.from_pretrained("albert-base-v2")
|
||||
self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)
|
||||
|
||||
@tf.function(input_signature=t5_inputs)
|
||||
def forward(self, input_ids, attention_mask):
|
||||
return self.m.predict(input_ids, attention_mask)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Prepping Data
|
||||
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
|
||||
# text = "This is a great [MASK]."
|
||||
text = "This [MASK] is very tasty."
|
||||
encoded_inputs = tokenizer(
|
||||
text,
|
||||
padding="max_length",
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH,
|
||||
return_tensors="tf",
|
||||
)
|
||||
inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
|
||||
mlir_importer = SharkImporter(
|
||||
AlbertModule(),
|
||||
inputs,
|
||||
frontend="tf",
|
||||
)
|
||||
minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
is_dynamic=False, tracing_required=False
|
||||
)
|
||||
shark_module = SharkInference(minilm_mlir, func_name, mlir_dialect="mhlo")
|
||||
shark_module.compile()
|
||||
output_idx = 0
|
||||
data_idx = 1
|
||||
token_logits = shark_module.forward(inputs)[output_idx][data_idx]
|
||||
mask_id = np.where(
|
||||
tf.squeeze(encoded_inputs["input_ids"]) == tokenizer.mask_token_id
|
||||
)
|
||||
mask_token_logits = token_logits[0, mask_id, :]
|
||||
top_5_tokens = np.flip(np.argsort(mask_token_logits)).squeeze()[0:5]
|
||||
for token in top_5_tokens:
|
||||
print(
|
||||
f"'>>> Sample/Warmup output: {text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
|
||||
)
|
||||
while True:
|
||||
try:
|
||||
new_text = input("Give me a sentence with [MASK] to fill: ")
|
||||
encoded_inputs = tokenizer(
|
||||
new_text,
|
||||
padding="max_length",
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH,
|
||||
return_tensors="tf",
|
||||
)
|
||||
inputs = (
|
||||
encoded_inputs["input_ids"],
|
||||
encoded_inputs["attention_mask"],
|
||||
)
|
||||
token_logits = shark_module.forward(inputs)[output_idx][data_idx]
|
||||
mask_id = np.where(
|
||||
tf.squeeze(encoded_inputs["input_ids"])
|
||||
== tokenizer.mask_token_id
|
||||
)
|
||||
mask_token_logits = token_logits[0, mask_id, :]
|
||||
top_5_tokens = np.flip(np.argsort(mask_token_logits)).squeeze()[
|
||||
0:5
|
||||
]
|
||||
for token in top_5_tokens:
|
||||
print(
|
||||
f"'>>> {new_text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
print("Exiting program.")
|
||||
sys.exit()
|
||||
@@ -13,6 +13,7 @@ gpt2_inputs = [
|
||||
|
||||
|
||||
class GPT2Module(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(GPT2Module, self).__init__()
|
||||
self.m = TFGPT2Model.from_pretrained("distilgpt2")
|
||||
@@ -29,12 +30,9 @@ if __name__ == "__main__":
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
|
||||
text = "I love the distilled version of models."
|
||||
|
||||
inputs = tokenizer(text, return_tensors="tf")
|
||||
inputs = tokenizer(text, return_tensors='tf')
|
||||
shark_module = SharkInference(
|
||||
GPT2Module(), (inputs["input_ids"], inputs["attention_mask"])
|
||||
)
|
||||
GPT2Module(), (inputs["input_ids"], inputs["attention_mask"]))
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
print(
|
||||
shark_module.forward((inputs["input_ids"], inputs["attention_mask"]))
|
||||
)
|
||||
print(shark_module.forward((inputs["input_ids"], inputs["attention_mask"])))
|
||||
|
||||
@@ -12,26 +12,7 @@ mhlo_ir = r"""builtin.module {
|
||||
arg0 = np.ones((1, 4)).astype(np.float32)
|
||||
arg1 = np.ones((4, 1)).astype(np.float32)
|
||||
|
||||
print("Running shark on cpu backend")
|
||||
shark_module = SharkInference(
|
||||
mhlo_ir, function_name="forward", device="cpu", mlir_dialect="mhlo"
|
||||
)
|
||||
|
||||
# Generate the random inputs and feed into the graph.
|
||||
x = shark_module.generate_random_inputs()
|
||||
shark_module = SharkInference(mhlo_ir, (arg0, arg1))
|
||||
shark_module.set_frontend("mhlo")
|
||||
shark_module.compile()
|
||||
print(shark_module.forward(x))
|
||||
|
||||
print("Running shark on cuda backend")
|
||||
shark_module = SharkInference(
|
||||
mhlo_ir, function_name="forward", device="cuda", mlir_dialect="mhlo"
|
||||
)
|
||||
shark_module.compile()
|
||||
print(shark_module.forward(x))
|
||||
|
||||
print("Running shark on vulkan backend")
|
||||
shark_module = SharkInference(
|
||||
mhlo_ir, function_name="forward", device="vulkan", mlir_dialect="mhlo"
|
||||
)
|
||||
shark_module.compile()
|
||||
print(shark_module.forward(x))
|
||||
print(shark_module.forward((arg0, arg1)))
|
||||
|
||||
@@ -7,13 +7,17 @@ tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
|
||||
|
||||
|
||||
class MiniLMSequenceClassification(torch.nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.model = AutoModelForSequenceClassification.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased", # The pretrained model.
|
||||
num_labels=2, # The number of output labels--2 for binary classification.
|
||||
output_attentions=False, # Whether the model returns attentions weights.
|
||||
output_hidden_states=False, # Whether the model returns all hidden-states.
|
||||
num_labels=
|
||||
2, # The number of output labels--2 for binary classification.
|
||||
output_attentions=
|
||||
False, # Whether the model returns attentions weights.
|
||||
output_hidden_states=
|
||||
False, # Whether the model returns all hidden-states.
|
||||
torchscript=True,
|
||||
)
|
||||
|
||||
@@ -23,12 +27,9 @@ class MiniLMSequenceClassification(torch.nn.Module):
|
||||
|
||||
test_input = torch.randint(2, (1, 128))
|
||||
|
||||
shark_module = SharkInference(
|
||||
MiniLMSequenceClassification(),
|
||||
(test_input,),
|
||||
jit_trace=True,
|
||||
benchmark_mode=True,
|
||||
)
|
||||
shark_module = SharkInference(MiniLMSequenceClassification(), (test_input,),
|
||||
jit_trace=True,
|
||||
benchmark_mode=True)
|
||||
|
||||
shark_module.compile()
|
||||
shark_module.forward((test_input,))
|
||||
|
||||
@@ -2,6 +2,10 @@ import tensorflow as tf
|
||||
from transformers import BertModel, BertTokenizer, TFBertModel
|
||||
from shark.shark_inference import SharkInference
|
||||
|
||||
gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||
for gpu in gpus:
|
||||
tf.config.experimental.set_memory_growth(gpu, True)
|
||||
|
||||
MAX_SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
|
||||
@@ -9,22 +13,21 @@ BATCH_SIZE = 1
|
||||
bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
# Create a BERT trainer with the created network.
|
||||
self.m = TFBertModel.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased", from_pt=True
|
||||
)
|
||||
"microsoft/MiniLM-L12-H384-uncased", from_pt=True)
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m.predict = lambda x, y, z: self.m.call(
|
||||
input_ids=x, attention_mask=y, token_type_ids=z, training=False
|
||||
)
|
||||
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
|
||||
|
||||
@tf.function(input_signature=bert_input)
|
||||
def forward(self, input_ids, attention_mask, token_type_ids):
|
||||
@@ -34,28 +37,22 @@ class BertModule(tf.Module):
|
||||
if __name__ == "__main__":
|
||||
# Prepping Data
|
||||
tokenizer = BertTokenizer.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased"
|
||||
)
|
||||
"microsoft/MiniLM-L12-H384-uncased")
|
||||
text = "Replace me by any text you'd like."
|
||||
encoded_input = tokenizer(
|
||||
text,
|
||||
padding="max_length",
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH,
|
||||
)
|
||||
encoded_input = tokenizer(text,
|
||||
padding='max_length',
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH)
|
||||
for key in encoded_input:
|
||||
encoded_input[key] = tf.expand_dims(
|
||||
tf.convert_to_tensor(encoded_input[key]), 0
|
||||
)
|
||||
tf.convert_to_tensor(encoded_input[key]), 0)
|
||||
|
||||
test_input = (
|
||||
encoded_input["input_ids"],
|
||||
encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"],
|
||||
)
|
||||
test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"])
|
||||
shark_module = SharkInference(
|
||||
BertModule(), test_input, benchmark_mode=True
|
||||
)
|
||||
BertModule(),
|
||||
test_input,
|
||||
benchmark_mode=True)
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
shark_module.benchmark_all(test_input)
|
||||
|
||||
@@ -1,24 +1,35 @@
|
||||
import torch
|
||||
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
torch.manual_seed(0)
|
||||
tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
|
||||
|
||||
|
||||
mlir_model, func_name, inputs, golden_out = download_torch_model(
|
||||
"microsoft/MiniLM-L12-H384-uncased"
|
||||
)
|
||||
class MiniLMSequenceClassification(torch.nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.model = AutoModelForSequenceClassification.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased", # The pretrained model.
|
||||
num_labels=
|
||||
2, # The number of output labels--2 for binary classification.
|
||||
output_attentions=
|
||||
False, # Whether the model returns attentions weights.
|
||||
output_hidden_states=
|
||||
False, # Whether the model returns all hidden-states.
|
||||
torchscript=True,
|
||||
)
|
||||
|
||||
def forward(self, tokens):
|
||||
return self.model.forward(tokens)[0]
|
||||
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_model, func_name, device="cpu", mlir_dialect="linalg"
|
||||
)
|
||||
test_input = torch.randint(2, (1, 128))
|
||||
|
||||
shark_module = SharkInference(MiniLMSequenceClassification(), (test_input,),
|
||||
jit_trace=True)
|
||||
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
print("The obtained result via shark is: ", result)
|
||||
print("The golden result is:", golden_out)
|
||||
|
||||
|
||||
# Let's generate random inputs, currently supported
|
||||
# for static models.
|
||||
rand_inputs = shark_module.generate_random_inputs()
|
||||
rand_results = shark_module.forward(rand_inputs)
|
||||
|
||||
print("Running shark_module with random_inputs is: ", rand_results)
|
||||
result = shark_module.forward((test_input,))
|
||||
print("Obtained result", result)
|
||||
|
||||
41
shark/examples/shark_inference/minilm_load_benchmark_tf.py
Normal file
41
shark/examples/shark_inference/minilm_load_benchmark_tf.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import tensorflow as tf
|
||||
from transformers import BertModel, BertTokenizer, TFBertModel
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_importer import shark_load
|
||||
from shark.parser import parser
|
||||
import os
|
||||
|
||||
gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||
for gpu in gpus:
|
||||
tf.config.experimental.set_memory_growth(gpu, True)
|
||||
|
||||
parser.add_argument(
|
||||
"--download_mlir_path",
|
||||
type=str,
|
||||
default="minilm_tf_inference.mlir",
|
||||
help="Specifies path to target mlir file that will be loaded.")
|
||||
load_args, unknown = parser.parse_known_args()
|
||||
|
||||
MAX_SEQUENCE_LENGTH = 512
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Prepping Data
|
||||
tokenizer = BertTokenizer.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased")
|
||||
text = "Replace me by any text you'd like."
|
||||
encoded_input = tokenizer(text,
|
||||
padding='max_length',
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH)
|
||||
for key in encoded_input:
|
||||
encoded_input[key] = tf.expand_dims(
|
||||
tf.convert_to_tensor(encoded_input[key]), 0)
|
||||
model_name = "minilm_tf_inference"
|
||||
minilm_mlir = shark_load(model_name, load_args.download_mlir_path)
|
||||
test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"])
|
||||
shark_module = SharkInference(
|
||||
minilm_mlir, test_input, benchmark_mode=True)
|
||||
shark_module.set_frontend("mhlo")
|
||||
shark_module.compile()
|
||||
shark_module.benchmark_all(test_input)
|
||||
@@ -9,22 +9,21 @@ BATCH_SIZE = 1
|
||||
bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
# Create a BERT trainer with the created network.
|
||||
self.m = TFBertModel.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased", from_pt=True
|
||||
)
|
||||
"microsoft/MiniLM-L12-H384-uncased", from_pt=True)
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m.predict = lambda x, y, z: self.m.call(
|
||||
input_ids=x, attention_mask=y, token_type_ids=z, training=False
|
||||
)
|
||||
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
|
||||
|
||||
@tf.function(input_signature=bert_input)
|
||||
def forward(self, input_ids, attention_mask, token_type_ids):
|
||||
@@ -34,37 +33,24 @@ class BertModule(tf.Module):
|
||||
if __name__ == "__main__":
|
||||
# Prepping Data
|
||||
tokenizer = BertTokenizer.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased"
|
||||
)
|
||||
"microsoft/MiniLM-L12-H384-uncased")
|
||||
text = "Replace me by any text you'd like."
|
||||
encoded_input = tokenizer(
|
||||
text,
|
||||
padding="max_length",
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH,
|
||||
)
|
||||
encoded_input = tokenizer(text,
|
||||
padding='max_length',
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH)
|
||||
for key in encoded_input:
|
||||
encoded_input[key] = tf.expand_dims(
|
||||
tf.convert_to_tensor(encoded_input[key]), 0
|
||||
)
|
||||
tf.convert_to_tensor(encoded_input[key]), 0)
|
||||
|
||||
shark_module = SharkInference(
|
||||
BertModule(),
|
||||
(
|
||||
encoded_input["input_ids"],
|
||||
encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"],
|
||||
),
|
||||
)
|
||||
(encoded_input["input_ids"], encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"]))
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
|
||||
print(
|
||||
shark_module.forward(
|
||||
(
|
||||
encoded_input["input_ids"],
|
||||
encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"],
|
||||
)
|
||||
)
|
||||
)
|
||||
(encoded_input["input_ids"], encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"])))
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
import torch
|
||||
import torchvision.models as models
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_importer import SharkImporter
|
||||
|
||||
torch.hub.list("zhanghang1989/ResNeSt", force_reload=True)
|
||||
|
||||
|
||||
class ResnestModule(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.model = torch.hub.load(
|
||||
"zhanghang1989/ResNeSt", "resnest50", pretrained=True
|
||||
)
|
||||
self.model.eval()
|
||||
|
||||
def forward(self, input):
|
||||
return self.model.forward(input)
|
||||
|
||||
|
||||
input = torch.randn(1, 3, 224, 224)
|
||||
|
||||
|
||||
mlir_importer = SharkImporter(
|
||||
ResnestModule(),
|
||||
(input,),
|
||||
frontend="torch",
|
||||
)
|
||||
|
||||
(vision_mlir, func_name), inputs, golden_out = mlir_importer.import_debug(
|
||||
tracing_required=True
|
||||
)
|
||||
|
||||
print(golden_out)
|
||||
|
||||
shark_module = SharkInference(vision_mlir, func_name, mlir_dialect="linalg")
|
||||
shark_module.compile()
|
||||
result = shark_module.forward((input,))
|
||||
print("Obtained result", result)
|
||||
@@ -5,28 +5,24 @@ import torchvision.models as models
|
||||
from torchvision import transforms
|
||||
import sys
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
|
||||
################################## Preprocessing inputs and model ############
|
||||
def load_and_preprocess_image(url: str):
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
|
||||
}
|
||||
img = Image.open(
|
||||
requests.get(url, headers=headers, stream=True).raw
|
||||
).convert("RGB")
|
||||
img = Image.open(requests.get(url, headers=headers,
|
||||
stream=True).raw).convert("RGB")
|
||||
# preprocessing pipeline
|
||||
preprocess = transforms.Compose(
|
||||
[
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(
|
||||
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
|
||||
),
|
||||
]
|
||||
)
|
||||
preprocess = transforms.Compose([
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
std=[0.229, 0.224, 0.225]),
|
||||
])
|
||||
img_preprocessed = preprocess(img)
|
||||
return torch.unsqueeze(img_preprocessed, 0)
|
||||
|
||||
@@ -48,6 +44,7 @@ def top3_possibilities(res):
|
||||
|
||||
|
||||
class Resnet50Module(torch.nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.resnet = models.resnet50(pretrained=True)
|
||||
@@ -64,16 +61,18 @@ labels = load_labels()
|
||||
|
||||
##############################################################################
|
||||
|
||||
input = torch.randn(1, 3, 224, 224)
|
||||
print(input.shape)
|
||||
|
||||
## The img is passed to determine the input shape.
|
||||
shark_module = SharkInference(Resnet50Module(), (img,))
|
||||
shark_module.compile()
|
||||
|
||||
## Can pass any img or input to the forward module.
|
||||
mlir_model, func_name, inputs, golden_out = download_torch_model("resnet50")
|
||||
|
||||
shark_module = SharkInference(mlir_model, func_name, mlir_dialect="linalg")
|
||||
shark_module.compile()
|
||||
result = shark_module.forward((img.detach().numpy(),))
|
||||
results = shark_module.forward((img,))
|
||||
|
||||
print("The top 3 results obtained via shark_runner is:")
|
||||
print(top3_possibilities(torch.from_numpy(result)))
|
||||
print(top3_possibilities(torch.from_numpy(results)))
|
||||
|
||||
print()
|
||||
|
||||
|
||||
@@ -11,12 +11,12 @@ t5_inputs = [
|
||||
tf.TensorSpec(shape=[1, 10], dtype=tf.int32),
|
||||
]
|
||||
|
||||
|
||||
class T5Module(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(T5Module, self).__init__()
|
||||
self.m = TFT5Model.from_pretrained("t5-small")
|
||||
self.m.predict = lambda x, y: self.m(input_ids=x, decoder_input_ids=y)
|
||||
self.m.predict = lambda x,y: self.m(input_ids=x, decoder_input_ids=y)
|
||||
|
||||
@tf.function(input_signature=t5_inputs)
|
||||
def forward(self, input_ids, decoder_input_ids):
|
||||
@@ -27,9 +27,12 @@ if __name__ == "__main__":
|
||||
# Prepping Data
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
text = "I love the distilled version of models."
|
||||
inputs = tokenizer(text, return_tensors="tf").input_ids
|
||||
inputs = tokenizer(
|
||||
text, return_tensors="tf"
|
||||
).input_ids
|
||||
|
||||
shark_module = SharkInference(T5Module(), (inputs, inputs))
|
||||
shark_module = SharkInference(
|
||||
T5Module(), (inputs, inputs))
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
print(shark_module.forward((inputs, inputs)))
|
||||
print(shark_module.forward((inputs,inputs)))
|
||||
|
||||
@@ -4,6 +4,7 @@ from shark.shark_inference import SharkInference
|
||||
|
||||
|
||||
class VisionModule(torch.nn.Module):
|
||||
|
||||
def __init__(self, model):
|
||||
super().__init__()
|
||||
self.model = model
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_importer import SharkImporter
|
||||
from shark_runner import SharkInference
|
||||
|
||||
|
||||
# Currently not supported aten.transpose_conv2d missing.
|
||||
class UnetModule(torch.nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.model = torch.hub.load(
|
||||
@@ -15,7 +15,7 @@ class UnetModule(torch.nn.Module):
|
||||
init_features=32,
|
||||
pretrained=True,
|
||||
)
|
||||
self.model.eval()
|
||||
self.train(False)
|
||||
|
||||
def forward(self, input):
|
||||
return self.model(input)
|
||||
@@ -23,17 +23,10 @@ class UnetModule(torch.nn.Module):
|
||||
|
||||
input = torch.randn(1, 3, 224, 224)
|
||||
|
||||
mlir_importer = SharkImporter(
|
||||
print(input)
|
||||
shark_module = SharkInference(
|
||||
UnetModule(),
|
||||
(input,),
|
||||
frontend="torch",
|
||||
)
|
||||
|
||||
(vision_mlir, func_name), inputs, golden_out = mlir_importer.import_debug(
|
||||
tracing_required=False
|
||||
)
|
||||
|
||||
shark_module = SharkInference(vision_mlir, func_name, mlir_dialect="linalg")
|
||||
shark_module.compile()
|
||||
result = shark_module.forward((input,))
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
shark_module.benchmark_forward((input,))
|
||||
print(input)
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
|
||||
mlir_model, func_name, inputs, golden_out = download_torch_model("v_diffusion")
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_model, func_name, device="vulkan", mlir_dialect="linalg"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
print("The obtained result via shark is: ", result)
|
||||
print("The golden result is:", golden_out)
|
||||
@@ -5,13 +5,17 @@ from shark.shark_runner import SharkTrainer
|
||||
|
||||
|
||||
class MiniLMSequenceClassification(torch.nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.model = AutoModelForSequenceClassification.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased", # The pretrained model.
|
||||
num_labels=2, # The number of output labels--2 for binary classification.
|
||||
output_attentions=False, # Whether the model returns attentions weights.
|
||||
output_hidden_states=False, # Whether the model returns all hidden-states.
|
||||
num_labels=
|
||||
2, # The number of output labels--2 for binary classification.
|
||||
output_attentions=
|
||||
False, # Whether the model returns attentions weights.
|
||||
output_hidden_states=
|
||||
False, # Whether the model returns all hidden-states.
|
||||
torchscript=True,
|
||||
)
|
||||
|
||||
@@ -33,9 +37,8 @@ inp = (torch.randint(2, (1, 128)),)
|
||||
|
||||
def forward(params, buffers, args):
|
||||
params_and_buffers = {**params, **buffers}
|
||||
_stateless.functional_call(
|
||||
mod, params_and_buffers, args, {}
|
||||
).sum().backward()
|
||||
_stateless.functional_call(mod, params_and_buffers, args,
|
||||
{}).sum().backward()
|
||||
optim = torch.optim.SGD(get_sorted_params(params), lr=0.01)
|
||||
# optim.load_state_dict(optim_state)
|
||||
optim.step()
|
||||
|
||||
@@ -5,14 +5,13 @@ import tensorflow as tf
|
||||
|
||||
from shark.shark_trainer import SharkTrainer
|
||||
from shark.parser import parser
|
||||
from urllib import request
|
||||
from shark.shark_importer import shark_load
|
||||
|
||||
parser.add_argument(
|
||||
"--download_mlir_path",
|
||||
type=str,
|
||||
default="bert_tf_training.mlir",
|
||||
help="Specifies path to target mlir file that will be loaded.",
|
||||
)
|
||||
help="Specifies path to target mlir file that will be loaded.")
|
||||
load_args, unknown = parser.parse_known_args()
|
||||
|
||||
tf.random.set_seed(0)
|
||||
@@ -26,30 +25,16 @@ if __name__ == "__main__":
|
||||
predict_sample_input = [
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
]
|
||||
file_link = "https://storage.googleapis.com/shark_tank/users/stanley/bert_tf_training.mlir"
|
||||
response = request.urlretrieve(file_link, load_args.download_mlir_path)
|
||||
sample_input_tensors = [
|
||||
tf.convert_to_tensor(val, dtype=tf.int32)
|
||||
for val in predict_sample_input
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
|
||||
]
|
||||
model_name = "bert_tf_training"
|
||||
bert_mlir = shark_load(model_name, load_args.download_mlir_path)
|
||||
sample_input_tensors = [tf.convert_to_tensor(val, dtype=tf.int32) for val in predict_sample_input]
|
||||
num_iter = 10
|
||||
if not os.path.isfile(load_args.download_mlir_path):
|
||||
raise ValueError(
|
||||
f"Tried looking for target mlir in {load_args.download_mlir_path}, but cannot be found."
|
||||
)
|
||||
with open(load_args.download_mlir_path, "rb") as input_file:
|
||||
bert_mlir = input_file.read()
|
||||
shark_module = SharkTrainer(
|
||||
bert_mlir,
|
||||
(
|
||||
sample_input_tensors,
|
||||
tf.convert_to_tensor(
|
||||
np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32
|
||||
),
|
||||
),
|
||||
)
|
||||
(sample_input_tensors,
|
||||
tf.convert_to_tensor(np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32)))
|
||||
shark_module.set_frontend("mhlo")
|
||||
shark_module.compile()
|
||||
start = time.time()
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import sys
|
||||
from absl import app
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import tempfile
|
||||
import tensorflow as tf
|
||||
|
||||
from official.nlp.modeling import layers
|
||||
@@ -25,35 +28,31 @@ bert_input = [
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
dict_outputs = False
|
||||
test_network = networks.BertEncoder(
|
||||
vocab_size=vocab_size, num_layers=2, dict_outputs=dict_outputs
|
||||
)
|
||||
test_network = networks.BertEncoder(vocab_size=vocab_size,
|
||||
num_layers=2,
|
||||
dict_outputs=dict_outputs)
|
||||
|
||||
# Create a BERT trainer with the created network.
|
||||
bert_trainer_model = bert_classifier.BertClassifier(
|
||||
test_network, num_classes=NUM_CLASSES
|
||||
)
|
||||
test_network, num_classes=NUM_CLASSES)
|
||||
bert_trainer_model.summary()
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m = bert_trainer_model
|
||||
self.m.predict = lambda x: self.m.call(x, training=False)
|
||||
self.predict = tf.function(input_signature=[bert_input])(
|
||||
self.m.predict
|
||||
)
|
||||
self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
|
||||
self.m.learn = lambda x, y: self.m.call(x, training=False)
|
||||
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
|
||||
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
|
||||
|
||||
@tf.function(
|
||||
input_signature=[
|
||||
bert_input, # inputs
|
||||
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32), # labels
|
||||
]
|
||||
)
|
||||
@tf.function(input_signature=[
|
||||
bert_input, # inputs
|
||||
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32) # labels
|
||||
])
|
||||
def forward(self, inputs, labels):
|
||||
with tf.GradientTape() as tape:
|
||||
# Capture the gradients from forward prop...
|
||||
@@ -71,22 +70,14 @@ if __name__ == "__main__":
|
||||
predict_sample_input = [
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
]
|
||||
sample_input_tensors = [
|
||||
tf.convert_to_tensor(val, dtype=tf.int32)
|
||||
for val in predict_sample_input
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
|
||||
]
|
||||
sample_input_tensors = [tf.convert_to_tensor(val, dtype=tf.int32) for val in predict_sample_input]
|
||||
num_iter = 10
|
||||
shark_module = SharkTrainer(
|
||||
BertModule(),
|
||||
(
|
||||
sample_input_tensors,
|
||||
tf.convert_to_tensor(
|
||||
np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32
|
||||
),
|
||||
),
|
||||
)
|
||||
(sample_input_tensors,
|
||||
tf.convert_to_tensor(np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32)))
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
start = time.time()
|
||||
|
||||
@@ -4,6 +4,7 @@ from shark.shark_trainer import SharkTrainer
|
||||
|
||||
|
||||
class Foo(torch.nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(Foo, self).__init__()
|
||||
self.l1 = torch.nn.Linear(10, 16)
|
||||
@@ -27,9 +28,8 @@ def get_sorted_params(named_params):
|
||||
|
||||
def forward(params, buffers, args):
|
||||
params_and_buffers = {**params, **buffers}
|
||||
_stateless.functional_call(
|
||||
mod, params_and_buffers, args, {}
|
||||
).sum().backward()
|
||||
_stateless.functional_call(mod, params_and_buffers, args,
|
||||
{}).sum().backward()
|
||||
optim = torch.optim.SGD(get_sorted_params(params), lr=0.01)
|
||||
optim.step()
|
||||
return params, buffers
|
||||
|
||||
@@ -28,14 +28,9 @@ from torch_mlir.eager_mode.torch_mlir_eager_backend import (
|
||||
TorchMLIREagerBackend,
|
||||
TensorMetaData,
|
||||
)
|
||||
from torch_mlir_e2e_test.eager_backends.refbackend import (
|
||||
NUMPY_TO_TORCH_DTYPE_DICT,
|
||||
)
|
||||
from torch_mlir_e2e_test.eager_backends.refbackend import NUMPY_TO_TORCH_DTYPE_DICT
|
||||
|
||||
from shark.iree_utils.compile_utils import (
|
||||
get_iree_compiled_module,
|
||||
IREE_DEVICE_MAP,
|
||||
)
|
||||
from shark.iree_utils import get_iree_compiled_module, IREE_DEVICE_MAP
|
||||
|
||||
|
||||
class EagerModeIREELinalgOnTensorsBackend(TorchMLIREagerBackend):
|
||||
@@ -48,19 +43,18 @@ class EagerModeIREELinalgOnTensorsBackend(TorchMLIREagerBackend):
|
||||
|
||||
def __init__(self, device: str):
|
||||
self.torch_device_str = device
|
||||
self.config = ireert.Config(IREE_DEVICE_MAP[device])
|
||||
self.raw_device_str = device
|
||||
self.iree_device_str = IREE_DEVICE_MAP[device]
|
||||
self.config = ireert.Config(self.iree_device_str)
|
||||
|
||||
def get_torch_metadata(
|
||||
self, tensor: DeviceArray, kwargs: Dict[str, Any]
|
||||
) -> TensorMetaData:
|
||||
def get_torch_metadata(self, tensor: DeviceArray,
|
||||
kwargs: Dict[str, Any]) -> TensorMetaData:
|
||||
return TensorMetaData(
|
||||
size=tensor.shape,
|
||||
dtype=NUMPY_TO_TORCH_DTYPE_DICT[tensor.dtype.type],
|
||||
device=torch.device(self.torch_device_str),
|
||||
requires_grad=tensor.dtype.type
|
||||
in {np.float, np.float32, np.float64}
|
||||
and kwargs.get("requires_grad", False),
|
||||
in {np.float, np.float32, np.float64} and
|
||||
kwargs.get("requires_grad", False),
|
||||
)
|
||||
|
||||
def compile(self, imported_module: Module):
|
||||
@@ -70,9 +64,9 @@ class EagerModeIREELinalgOnTensorsBackend(TorchMLIREagerBackend):
|
||||
"torch-function-to-torch-backend-pipeline,torch-backend-to-linalg-on-tensors-backend-pipeline",
|
||||
"EagerMode",
|
||||
)
|
||||
callable, _ = get_iree_compiled_module(
|
||||
imported_module, self.raw_device_str, func_name=fn_name
|
||||
)
|
||||
callable, _ = get_iree_compiled_module(imported_module,
|
||||
self.iree_device_str,
|
||||
func_name=fn_name)
|
||||
return callable
|
||||
|
||||
def copy_into(self, dst, src):
|
||||
@@ -82,7 +76,6 @@ class EagerModeIREELinalgOnTensorsBackend(TorchMLIREagerBackend):
|
||||
def transfer_from_device_to_torch(self, e):
|
||||
return torch.from_numpy(e.to_host())
|
||||
|
||||
def transfer_from_torch_to_device(
|
||||
self, tensor: torch.Tensor
|
||||
) -> DeviceArray:
|
||||
def transfer_from_torch_to_device(self,
|
||||
tensor: torch.Tensor) -> DeviceArray:
|
||||
return iree.runtime.asdevicearray(self.config.device, tensor.numpy())
|
||||
|
||||
359
shark/iree_utils.py
Normal file
359
shark/iree_utils.py
Normal file
@@ -0,0 +1,359 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import iree.runtime as ireert
|
||||
import iree.runtime.scripts.iree_benchmark_module as benchmark_module
|
||||
import iree.compiler as ireec
|
||||
from shark.torch_mlir_utils import get_module_name_for_asm_dump
|
||||
from shark.cuda_utils import get_cuda_sm_cc
|
||||
from shark.model_annotation import *
|
||||
import subprocess
|
||||
import numpy as np
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
IREE_DEVICE_MAP = {
|
||||
"cpu": "local-task",
|
||||
"gpu": "cuda",
|
||||
"cuda": "cuda",
|
||||
"vulkan": "vulkan",
|
||||
"metal": "vulkan",
|
||||
"rocm": "rocm"
|
||||
}
|
||||
|
||||
IREE_TARGET_MAP = {
|
||||
"cpu": "dylib",
|
||||
"gpu": "cuda",
|
||||
"cuda": "cuda",
|
||||
"vulkan": "vulkan",
|
||||
"metal": "vulkan",
|
||||
"rocm": "rocm"
|
||||
}
|
||||
|
||||
UNIT_TO_SECOND_MAP = {"ms": 0.001, "s": 1}
|
||||
|
||||
|
||||
def check_device_drivers(device):
|
||||
"""Checks necessary drivers present for gpu and vulkan devices"""
|
||||
if (device in ["gpu", "cuda"]):
|
||||
try:
|
||||
subprocess.check_output('nvidia-smi')
|
||||
except Exception:
|
||||
return True
|
||||
elif (device in ["metal", "vulkan"]):
|
||||
try:
|
||||
subprocess.check_output('vulkaninfo')
|
||||
except Exception:
|
||||
return True
|
||||
elif (device == "cpu"):
|
||||
return False
|
||||
# Unknown device.
|
||||
else:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_iree_cpu_args():
|
||||
find_triple_cmd = "uname -s -m"
|
||||
os_name, proc_name = subprocess.run(
|
||||
find_triple_cmd, shell=True, stdout=subprocess.PIPE,
|
||||
check=True).stdout.decode('utf-8').split()
|
||||
if os_name == "Darwin":
|
||||
find_kernel_version_cmd = "uname -r"
|
||||
kernel_version = subprocess.run(find_kernel_version_cmd,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
check=True).stdout.decode('utf-8')
|
||||
target_triple = f"{proc_name}-apple-darwin{kernel_version}"
|
||||
elif os_name == "Linux":
|
||||
target_triple = f"{proc_name}-linux-gnu"
|
||||
else:
|
||||
error_message = f"OS Type f{os_name} not supported and triple can't be determined, open issue to dSHARK team please :)"
|
||||
raise Exception(error_message)
|
||||
print(f"Target triple found:{target_triple}")
|
||||
return [f"-iree-llvm-target-triple={target_triple}"]
|
||||
|
||||
|
||||
def get_iree_gpu_args():
|
||||
ireert.flags.FUNCTION_INPUT_VALIDATION = False
|
||||
ireert.flags.parse_flags("--cuda_allow_inline_execution")
|
||||
sm_arch = get_cuda_sm_cc()
|
||||
if sm_arch in ['sm_70', 'sm_72', 'sm_75', 'sm_80', 'sm_84', 'sm_86']:
|
||||
return [
|
||||
"--iree-hal-cuda-disable-loop-nounroll-wa",
|
||||
f"--iree-hal-cuda-llvm-target-arch={sm_arch}"
|
||||
]
|
||||
else:
|
||||
return ["--iree-hal-cuda-disable-loop-nounroll-wa"]
|
||||
|
||||
|
||||
def get_vulkan_triple_flag():
|
||||
vulkan_device_cmd = "vulkaninfo | grep deviceName | awk \'END{{print $NF}}\'"
|
||||
vulkan_device = run_cmd(vulkan_device_cmd).strip()
|
||||
if vulkan_device == "M1":
|
||||
print("Found Apple Device. Using m1-moltenvk-macos")
|
||||
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
|
||||
elif vulkan_device == "A100-SXM4-40GB":
|
||||
print("Found Nvidia Device. Using ampere-rtx3080-linux")
|
||||
return "-iree-vulkan-target-triple=ampere-rtx3080-linux"
|
||||
else:
|
||||
print(
|
||||
"Optimized kernel for your target device is not added yet. Contact SHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u] or pull up an issue."
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def get_iree_vulkan_args():
|
||||
#vulkan_flag = ["--iree-flow-demote-i64-to-i32"]
|
||||
vulkan_flag = []
|
||||
vulkan_triple_flag = get_vulkan_triple_flag()
|
||||
if vulkan_triple_flag is not None:
|
||||
vulkan_flag.append(vulkan_triple_flag)
|
||||
return vulkan_flag
|
||||
|
||||
|
||||
def get_iree_device_args(device):
|
||||
if device == "cpu":
|
||||
return get_iree_cpu_args()
|
||||
if device in ["gpu", "cuda"]:
|
||||
return get_iree_gpu_args()
|
||||
if device in ["metal", "vulkan"]:
|
||||
return get_iree_vulkan_args()
|
||||
return []
|
||||
|
||||
|
||||
def get_iree_frontend_args(frontend):
|
||||
if frontend in ["torch", "pytorch", "linalg"]:
|
||||
return ["--iree-llvm-target-cpu-features=host"]
|
||||
elif frontend in ["tensorflow", "tf", "mhlo"]:
|
||||
return [
|
||||
"--iree-llvm-target-cpu-features=host",
|
||||
"--iree-mhlo-demote-i64-to-i32=false",
|
||||
"--iree-flow-demote-i64-to-i32"
|
||||
]
|
||||
else:
|
||||
# Frontend not found.
|
||||
return []
|
||||
|
||||
|
||||
def compile_module_to_flatbuffer(module, device, frontend, func_name,
|
||||
model_config_path):
|
||||
# Setup Compile arguments wrt to frontends.
|
||||
input_type = ""
|
||||
args = get_iree_frontend_args(frontend)
|
||||
args += get_iree_device_args(device)
|
||||
|
||||
if frontend in ["tensorflow", "tf"]:
|
||||
input_type = "mhlo"
|
||||
elif frontend in ["mhlo", "tosa"]:
|
||||
input_type = frontend
|
||||
elif frontend in ["tflite"]:
|
||||
input_type = "tosa"
|
||||
|
||||
# Annotate the input module with the configs
|
||||
if model_config_path != None:
|
||||
# Currently tuned model only works on tf frontend
|
||||
if frontend in ["tensorflow", "tf"]:
|
||||
input_module = module.decode('utf-8')
|
||||
elif frontend in ["pytorch", "torch"]:
|
||||
input_module = module.operation.get_asm()
|
||||
with create_context() as ctx:
|
||||
module = model_annotation(ctx,
|
||||
input_contents=input_module,
|
||||
config_path=model_config_path)
|
||||
module = str(module)
|
||||
|
||||
# Compile according to the input type, else just try compiling.
|
||||
if input_type not in ["mhlo", "tosa"]:
|
||||
module = str(module)
|
||||
if input_type != "":
|
||||
# Currently for MHLO/TOSA.
|
||||
flatbuffer_blob = ireec.compile_str(
|
||||
module,
|
||||
target_backends=[IREE_TARGET_MAP[device]],
|
||||
extra_args=args,
|
||||
input_type=input_type)
|
||||
else:
|
||||
# Currently for Torch.
|
||||
flatbuffer_blob = ireec.compile_str(
|
||||
str(module),
|
||||
target_backends=[IREE_TARGET_MAP[device]],
|
||||
extra_args=args)
|
||||
return flatbuffer_blob
|
||||
|
||||
|
||||
def get_iree_module(flatbuffer_blob, device, func_name):
|
||||
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
|
||||
config = ireert.Config(IREE_DEVICE_MAP[device])
|
||||
ctx = ireert.SystemContext(config=config)
|
||||
ctx.add_vm_module(vm_module)
|
||||
ModuleCompiled = ctx.modules.module[func_name]
|
||||
return ModuleCompiled, config
|
||||
|
||||
|
||||
def get_iree_compiled_module(module,
|
||||
device: str,
|
||||
frontend: str = "torch",
|
||||
func_name: str = "forward",
|
||||
model_config_path: str = None):
|
||||
"""Given a module returns the compiled .vmfb and configs"""
|
||||
flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend,
|
||||
func_name, model_config_path)
|
||||
return get_iree_module(flatbuffer_blob, device, func_name)
|
||||
|
||||
|
||||
def export_iree_module_to_vmfb(module,
|
||||
device: str,
|
||||
directory: str,
|
||||
frontend: str = "torch",
|
||||
func_name: str = "forward",
|
||||
model_config_path: str = None):
|
||||
flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend,
|
||||
func_name, model_config_path)
|
||||
module_name = f"{frontend}_{func_name}_{device}"
|
||||
filename = os.path.join(directory, module_name + ".vmfb")
|
||||
print(f"Saved vmfb in {filename}.")
|
||||
with open(filename, 'wb') as f:
|
||||
f.write(flatbuffer_blob)
|
||||
return filename
|
||||
|
||||
|
||||
def export_module_to_mlir_file(module, frontend, directory: str):
|
||||
mlir_str = module
|
||||
if frontend in ["tensorflow", "tf", "mhlo"]:
|
||||
mlir_str = module.decode('utf-8')
|
||||
elif frontend in ["pytorch", "torch"]:
|
||||
mlir_str = module.operation.get_asm()
|
||||
filename = os.path.join(directory, "model.mlir")
|
||||
with open(filename, 'w') as f:
|
||||
f.write(mlir_str)
|
||||
print(f"Saved mlir in {filename}.")
|
||||
return filename
|
||||
|
||||
|
||||
def get_results(compiled_vm, input, config, frontend="torch"):
|
||||
"""Runs a .vmfb file given inputs and config and returns output."""
|
||||
device_inputs = input
|
||||
if frontend in ["torch", "pytorch"]:
|
||||
device_inputs = [ireert.asdevicearray(config.device, a) for a in input]
|
||||
if frontend in ["tensorflow", "tf", "tflite"]:
|
||||
device_inputs = []
|
||||
for a in input:
|
||||
if (isinstance(a, list)):
|
||||
device_inputs.append([
|
||||
ireert.asdevicearray(config.device, val, dtype=np.int32)
|
||||
for val in a
|
||||
])
|
||||
else:
|
||||
device_inputs.append(ireert.asdevicearray(config.device, a))
|
||||
result = compiled_vm(*device_inputs)
|
||||
result_tensors = []
|
||||
if (isinstance(result, tuple)):
|
||||
for val in result:
|
||||
result_tensors.append(np.copy(np.asarray(val, val.dtype)))
|
||||
return result_tensors
|
||||
elif (isinstance(result, dict)):
|
||||
data = list(result.items())
|
||||
res = np.array(data, dtype=object)
|
||||
return np.copy(res)
|
||||
else:
|
||||
return np.copy(np.asarray(result, dtype=result.dtype))
|
||||
|
||||
|
||||
######### Benchmark Related Tools ###########
|
||||
|
||||
|
||||
def tensor_to_type_str(input_tensors: tuple, frontend: str):
|
||||
"""
|
||||
Input: A tuple of input tensors i.e tuple(torch.tensor)
|
||||
Output: list of string that represent mlir types (i.e 1x24xf64)
|
||||
# TODO: Support more than floats, and ints
|
||||
"""
|
||||
list_of_type = []
|
||||
for input_tensor in input_tensors:
|
||||
type_string = "x".join([str(dim) for dim in input_tensor.shape])
|
||||
if frontend in ["torch", "pytorch"]:
|
||||
dtype_string = str(input_tensor.dtype).replace("torch.", "")
|
||||
elif frontend in ["tensorflow", "tf", "mhlo"]:
|
||||
dtype = input_tensor.dtype
|
||||
dtype_string = re.findall('\'[^"]*\'',
|
||||
str(dtype))[0].replace("\'", "")
|
||||
regex_split = re.compile("([a-zA-Z]+)([0-9]+)")
|
||||
match = regex_split.match(dtype_string)
|
||||
mlir_type_string = str(match.group(1)[0]) + str(match.group(2))
|
||||
type_string += f"x{mlir_type_string}"
|
||||
list_of_type.append(type_string)
|
||||
return list_of_type
|
||||
|
||||
|
||||
def build_benchmark_args(input_file: str,
|
||||
device: str,
|
||||
input_tensors: tuple,
|
||||
frontend: str,
|
||||
training=False):
|
||||
"""
|
||||
Inputs: input_file leading to vmfb, input_tensor to function, target device, and whether it is training or not.
|
||||
Outputs: string that execute benchmark-module on target model.
|
||||
"""
|
||||
path = benchmark_module.__path__[0]
|
||||
benchmarker_path = os.path.join(path, "..", "..", "iree-benchmark-module")
|
||||
benchmark_cl = [benchmarker_path, f"--module_file={input_file}"]
|
||||
fn_name = "forward"
|
||||
if training == True:
|
||||
# TODO: Replace name of train with actual train fn name.
|
||||
fn_name = "train"
|
||||
benchmark_cl.append(f"--entry_function={fn_name}")
|
||||
benchmark_cl.append(f"--device={IREE_DEVICE_MAP[device]}")
|
||||
mlir_input_types = tensor_to_type_str(input_tensors, frontend)
|
||||
for mlir_input in mlir_input_types:
|
||||
benchmark_cl.append(f"--function_input={mlir_input}")
|
||||
time_extractor = "| awk \'END{{print $2 $3}}\'"
|
||||
benchmark_cl.append(time_extractor)
|
||||
return benchmark_cl
|
||||
|
||||
|
||||
def run_cmd(cmd):
|
||||
"""
|
||||
Inputs: cli command string.
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(cmd,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
check=True)
|
||||
result_str = result.stdout.decode()
|
||||
return result_str
|
||||
except Exception:
|
||||
sys.exit("Exiting program due to error running:", cmd)
|
||||
|
||||
|
||||
def run_benchmark_module(benchmark_cl):
|
||||
"""
|
||||
Run benchmark command, extract result and return iteration/seconds.
|
||||
|
||||
Input: benchmark command.
|
||||
"""
|
||||
benchmark_path = benchmark_cl[0]
|
||||
assert os.path.exists(
|
||||
benchmark_path
|
||||
), "Cannot find benchmark_module, Please contact SHARK maintainer on discord."
|
||||
bench_result = run_cmd(' '.join(benchmark_cl))
|
||||
regex_split = re.compile("([0-9]+[.]*[0-9]*)([a-zA-Z]+)")
|
||||
match = regex_split.match(bench_result)
|
||||
time = float(match.group(1))
|
||||
unit = match.group(2)
|
||||
return 1.0 / (time * UNIT_TO_SECOND_MAP[unit])
|
||||
@@ -1,95 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
## Common utilities to be shared by iree utilities.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
|
||||
|
||||
def run_cmd(cmd):
|
||||
"""
|
||||
Inputs: cli command string.
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
check=True,
|
||||
)
|
||||
result_str = result.stdout.decode()
|
||||
return result_str
|
||||
except Exception:
|
||||
sys.exit("Exiting program due to error running:", cmd)
|
||||
|
||||
|
||||
IREE_DEVICE_MAP = {
|
||||
"cpu": "local-task",
|
||||
"gpu": "cuda",
|
||||
"cuda": "cuda",
|
||||
"vulkan": "vulkan",
|
||||
"metal": "vulkan",
|
||||
"rocm": "rocm",
|
||||
"intel-gpu": "level_zero",
|
||||
}
|
||||
|
||||
IREE_TARGET_MAP = {
|
||||
"cpu": "llvm-cpu",
|
||||
"gpu": "cuda",
|
||||
"cuda": "cuda",
|
||||
"vulkan": "vulkan",
|
||||
"metal": "vulkan",
|
||||
"rocm": "rocm",
|
||||
"intel-gpu": "opencl-spirv",
|
||||
}
|
||||
|
||||
# Finds whether the required drivers are installed for the given device.
|
||||
def check_device_drivers(device):
|
||||
"""Checks necessary drivers present for gpu and vulkan devices"""
|
||||
if device in ["gpu", "cuda"]:
|
||||
try:
|
||||
subprocess.check_output("nvidia-smi")
|
||||
except Exception:
|
||||
return True
|
||||
elif device in ["metal", "vulkan"]:
|
||||
try:
|
||||
subprocess.check_output("vulkaninfo")
|
||||
except Exception:
|
||||
return True
|
||||
elif device in ["intel-gpu"]:
|
||||
try:
|
||||
subprocess.check_output(["dpkg", "-L", "intel-level-zero-gpu"])
|
||||
return False
|
||||
except Exception:
|
||||
return True
|
||||
elif device == "cpu":
|
||||
return False
|
||||
# Unknown device.
|
||||
else:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# Installation info for the missing device drivers.
|
||||
def device_driver_info(device):
|
||||
if device in ["gpu", "cuda"]:
|
||||
return "nvidia-smi not found, please install the required drivers from https://www.nvidia.in/Download/index.aspx?lang=en-in"
|
||||
elif device in ["metal", "vulkan"]:
|
||||
return "vulkaninfo not found, Install from https://vulkan.lunarg.com/sdk/home or your distribution"
|
||||
else:
|
||||
return f"{device} is not supported."
|
||||
@@ -1,97 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import iree.runtime.scripts.iree_benchmark_module as benchmark_module
|
||||
from shark.iree_utils._common import run_cmd, IREE_DEVICE_MAP
|
||||
import numpy as np
|
||||
import os
|
||||
import re
|
||||
|
||||
UNIT_TO_SECOND_MAP = {"ms": 0.001, "s": 1}
|
||||
|
||||
|
||||
def tensor_to_type_str(input_tensors: tuple, mlir_dialect: str):
|
||||
"""
|
||||
Input: A tuple of input tensors i.e tuple(torch.tensor)
|
||||
Output: list of string that represent mlir types (i.e 1x24xf64)
|
||||
# TODO: Support more than floats, and ints
|
||||
"""
|
||||
list_of_type = []
|
||||
for input_tensor in input_tensors:
|
||||
type_string = "x".join([str(dim) for dim in input_tensor.shape])
|
||||
if mlir_dialect in ["linalg", "tosa"]:
|
||||
dtype_string = str(input_tensor.dtype).replace("torch.", "")
|
||||
elif mlir_dialect in ["mhlo", "tflite"]:
|
||||
dtype = input_tensor.dtype
|
||||
try:
|
||||
dtype_string = re.findall("'[^\"]*'", str(dtype))[0].replace(
|
||||
"'", ""
|
||||
)
|
||||
except IndexError:
|
||||
dtype_string = str(dtype)
|
||||
regex_split = re.compile("([a-zA-Z]+)([0-9]+)")
|
||||
match = regex_split.match(dtype_string)
|
||||
mlir_type_string = str(match.group(1)[0]) + str(match.group(2))
|
||||
type_string += f"x{mlir_type_string}"
|
||||
list_of_type.append(type_string)
|
||||
return list_of_type
|
||||
|
||||
|
||||
def build_benchmark_args(
|
||||
input_file: str,
|
||||
device: str,
|
||||
input_tensors: tuple,
|
||||
mlir_dialect: str,
|
||||
training=False,
|
||||
):
|
||||
"""
|
||||
Inputs: input_file leading to vmfb, input_tensor to function, target device,
|
||||
and whether it is training or not.
|
||||
Outputs: string that execute benchmark-module on target model.
|
||||
"""
|
||||
path = benchmark_module.__path__[0]
|
||||
benchmarker_path = os.path.join(path, "..", "..", "iree-benchmark-module")
|
||||
benchmark_cl = [benchmarker_path, f"--module_file={input_file}"]
|
||||
# TODO: The function named can be passed as one of the args.
|
||||
fn_name = "forward"
|
||||
if training == True:
|
||||
# TODO: Replace name of train with actual train fn name.
|
||||
fn_name = "train"
|
||||
benchmark_cl.append(f"--entry_function={fn_name}")
|
||||
benchmark_cl.append(f"--device={IREE_DEVICE_MAP[device]}")
|
||||
mlir_input_types = tensor_to_type_str(input_tensors, mlir_dialect)
|
||||
for mlir_input in mlir_input_types:
|
||||
benchmark_cl.append(f"--function_input={mlir_input}")
|
||||
time_extractor = "| awk 'END{{print $2 $3}}'"
|
||||
benchmark_cl.append(time_extractor)
|
||||
return benchmark_cl
|
||||
|
||||
|
||||
def run_benchmark_module(benchmark_cl):
|
||||
"""
|
||||
Run benchmark command, extract result and return iteration/seconds.
|
||||
|
||||
# TODO: Add an example of the benchmark command.
|
||||
Input: benchmark command.
|
||||
"""
|
||||
benchmark_path = benchmark_cl[0]
|
||||
assert os.path.exists(
|
||||
benchmark_path
|
||||
), "Cannot find benchmark_module, Please contact SHARK maintainer on discord."
|
||||
bench_result = run_cmd(" ".join(benchmark_cl))
|
||||
regex_split = re.compile("([0-9]+[.]*[0-9]*)([a-zA-Z]+)")
|
||||
match = regex_split.match(bench_result)
|
||||
time = float(match.group(1))
|
||||
unit = match.group(2)
|
||||
return 1.0 / (time * UNIT_TO_SECOND_MAP[unit])
|
||||
@@ -1,173 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import iree.runtime as ireert
|
||||
import iree.compiler as ireec
|
||||
from shark.iree_utils._common import IREE_DEVICE_MAP, IREE_TARGET_MAP
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
# Get the iree-compile arguments given device.
|
||||
def get_iree_device_args(device):
|
||||
if device == "cpu":
|
||||
from shark.iree_utils.cpu_utils import get_iree_cpu_args
|
||||
|
||||
return get_iree_cpu_args()
|
||||
if device in ["gpu", "cuda"]:
|
||||
from shark.iree_utils.gpu_utils import get_iree_gpu_args
|
||||
|
||||
return get_iree_gpu_args()
|
||||
if device in ["metal", "vulkan"]:
|
||||
from shark.iree_utils.vulkan_utils import get_iree_vulkan_args
|
||||
|
||||
return get_iree_vulkan_args()
|
||||
return []
|
||||
|
||||
|
||||
# Get the iree-compiler arguments given frontend.
|
||||
def get_iree_frontend_args(frontend):
|
||||
if frontend in ["torch", "pytorch", "linalg"]:
|
||||
return ["--iree-llvm-target-cpu-features=host"]
|
||||
elif frontend in ["tensorflow", "tf", "mhlo"]:
|
||||
return [
|
||||
"--iree-llvm-target-cpu-features=host",
|
||||
"--iree-mhlo-demote-i64-to-i32=false",
|
||||
"--iree-flow-demote-i64-to-i32",
|
||||
]
|
||||
else:
|
||||
# Frontend not found.
|
||||
return []
|
||||
|
||||
|
||||
# Common args to be used given any frontend or device.
|
||||
def get_iree_common_args():
|
||||
return [
|
||||
"--iree-stream-resource-index-bits=64",
|
||||
"--iree-vm-target-index-bits=64",
|
||||
]
|
||||
|
||||
|
||||
def compile_module_to_flatbuffer(
|
||||
module, device, frontend, func_name, model_config_path
|
||||
):
|
||||
# Setup Compile arguments wrt to frontends.
|
||||
input_type = ""
|
||||
args = get_iree_frontend_args(frontend)
|
||||
args += get_iree_device_args(device)
|
||||
args += get_iree_common_args()
|
||||
|
||||
if frontend in ["tensorflow", "tf"]:
|
||||
input_type = "mhlo"
|
||||
elif frontend in ["mhlo", "tosa"]:
|
||||
input_type = frontend
|
||||
elif frontend in ["tflite", "tflite-tosa"]:
|
||||
input_type = "tosa"
|
||||
|
||||
# TODO: make it simpler.
|
||||
# Compile according to the input type, else just try compiling.
|
||||
if input_type not in ["mhlo", "tosa"]:
|
||||
module = str(module)
|
||||
if input_type != "":
|
||||
# Currently for MHLO/TOSA.
|
||||
flatbuffer_blob = ireec.compile_str(
|
||||
module,
|
||||
target_backends=[IREE_TARGET_MAP[device]],
|
||||
extra_args=args,
|
||||
input_type=input_type,
|
||||
)
|
||||
else:
|
||||
# Currently for Torch.
|
||||
flatbuffer_blob = ireec.compile_str(
|
||||
str(module),
|
||||
target_backends=[IREE_TARGET_MAP[device]],
|
||||
extra_args=args,
|
||||
)
|
||||
|
||||
return flatbuffer_blob
|
||||
|
||||
|
||||
def get_iree_module(flatbuffer_blob, device, func_name):
|
||||
# Returns the compiled module and the configs.
|
||||
config = ireert.Config(IREE_DEVICE_MAP[device])
|
||||
vm_module = ireert.VmModule.from_flatbuffer(
|
||||
config.vm_instance, flatbuffer_blob
|
||||
)
|
||||
ctx = ireert.SystemContext(config=config)
|
||||
ctx.add_vm_module(vm_module)
|
||||
ModuleCompiled = ctx.modules.module[func_name]
|
||||
return ModuleCompiled, config
|
||||
|
||||
|
||||
def get_iree_compiled_module(
|
||||
module,
|
||||
device: str,
|
||||
frontend: str = "torch",
|
||||
func_name: str = "forward",
|
||||
model_config_path: str = None,
|
||||
):
|
||||
"""Given a module returns the compiled .vmfb and configs"""
|
||||
flatbuffer_blob = compile_module_to_flatbuffer(
|
||||
module, device, frontend, func_name, model_config_path
|
||||
)
|
||||
return get_iree_module(flatbuffer_blob, device, func_name)
|
||||
|
||||
|
||||
def export_iree_module_to_vmfb(
|
||||
module,
|
||||
device: str,
|
||||
directory: str,
|
||||
mlir_dialect: str = "linalg",
|
||||
func_name: str = "forward",
|
||||
model_config_path: str = None,
|
||||
):
|
||||
# Compiles the module given specs and saves it as .vmfb file.
|
||||
flatbuffer_blob = compile_module_to_flatbuffer(
|
||||
module, device, mlir_dialect, func_name, model_config_path
|
||||
)
|
||||
module_name = f"{mlir_dialect}_{func_name}_{device}"
|
||||
filename = os.path.join(directory, module_name + ".vmfb")
|
||||
print(f"Saved vmfb in {filename}.")
|
||||
with open(filename, "wb") as f:
|
||||
f.write(flatbuffer_blob)
|
||||
return filename
|
||||
|
||||
|
||||
def export_module_to_mlir_file(module, frontend, directory: str):
|
||||
# TODO: write proper documentation.
|
||||
mlir_str = module
|
||||
if frontend in ["tensorflow", "tf", "mhlo", "tflite"]:
|
||||
mlir_str = module.decode("utf-8")
|
||||
elif frontend in ["pytorch", "torch"]:
|
||||
mlir_str = module.operation.get_asm()
|
||||
filename = os.path.join(directory, "model.mlir")
|
||||
with open(filename, "w") as f:
|
||||
f.write(mlir_str)
|
||||
print(f"Saved mlir in {filename}.")
|
||||
return filename
|
||||
|
||||
|
||||
def get_results(compiled_vm, input, config, frontend="torch"):
|
||||
"""Runs a .vmfb file given inputs and config and returns output."""
|
||||
device_inputs = [ireert.asdevicearray(config.device, a) for a in input]
|
||||
result = compiled_vm(*device_inputs)
|
||||
result_tensors = []
|
||||
if isinstance(result, tuple):
|
||||
for val in result:
|
||||
result_tensors.append(np.copy(np.asarray(val, val.dtype)))
|
||||
return result_tensors
|
||||
elif isinstance(result, dict):
|
||||
data = list(result.items())
|
||||
res = np.array(data, dtype=object)
|
||||
return np.copy(res)
|
||||
else:
|
||||
return np.copy(np.asarray(result, dtype=result.dtype))
|
||||
@@ -1,44 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# All the iree_cpu related functionalities go here.
|
||||
|
||||
import subprocess
|
||||
|
||||
# Get the default cpu args.
|
||||
def get_iree_cpu_args():
|
||||
find_triple_cmd = "uname -s -m"
|
||||
os_name, proc_name = (
|
||||
subprocess.run(
|
||||
find_triple_cmd, shell=True, stdout=subprocess.PIPE, check=True
|
||||
)
|
||||
.stdout.decode("utf-8")
|
||||
.split()
|
||||
)
|
||||
if os_name == "Darwin":
|
||||
find_kernel_version_cmd = "uname -r"
|
||||
kernel_version = subprocess.run(
|
||||
find_kernel_version_cmd,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
check=True,
|
||||
).stdout.decode("utf-8")
|
||||
target_triple = f"{proc_name}-apple-darwin{kernel_version}"
|
||||
elif os_name == "Linux":
|
||||
target_triple = f"{proc_name}-linux-gnu"
|
||||
else:
|
||||
error_message = f"OS Type f{os_name} not supported and triple can't be determined, open issue to dSHARK team please :)"
|
||||
raise Exception(error_message)
|
||||
print(f"Target triple found:{target_triple}")
|
||||
return [f"-iree-llvm-target-triple={target_triple}"]
|
||||
@@ -1,60 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# All the iree_vulkan related functionalities go here.
|
||||
|
||||
from shark.iree_utils._common import run_cmd
|
||||
|
||||
|
||||
def get_vulkan_triple_flag():
|
||||
vulkan_device_cmd = "vulkaninfo | grep deviceName | awk 'END{{print $NF}}'"
|
||||
vulkan_device = run_cmd(vulkan_device_cmd).strip()
|
||||
if vulkan_device == "Ultra":
|
||||
print("Found MacStudio M1 Device. Using m1-moltenvk-macos")
|
||||
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
|
||||
elif vulkan_device == "M2":
|
||||
print("Found Apple M2 Device. Using m1-moltenvk-macos")
|
||||
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
|
||||
elif vulkan_device == "Max":
|
||||
print("Found Apple M1 Max Device. Using m1-moltenvk-macos")
|
||||
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
|
||||
elif vulkan_device == "Pro":
|
||||
print("Found Apple M1 Pro Device. Using m1-moltenvk-macos")
|
||||
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
|
||||
elif vulkan_device == "M1":
|
||||
print("Found Apple M1 Device. Using m1-moltenvk-macos")
|
||||
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
|
||||
elif vulkan_device == "A100-SXM4-40GB":
|
||||
print("Found Nvidia Device. Using ampere-rtx3080-linux")
|
||||
return "-iree-vulkan-target-triple=ampere-rtx3080-linux"
|
||||
elif vulkan_device == "3090":
|
||||
print("Found Nvidia Device. Using ampere-rtx3090-linux")
|
||||
return "-iree-vulkan-target-triple=ampere-rtx3090-linux"
|
||||
else:
|
||||
print(
|
||||
"""Optimized kernel for your target device is not added yet.
|
||||
Contact SHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u]
|
||||
or pull up an issue."""
|
||||
)
|
||||
print(f"Target : {vulkan_device}")
|
||||
return None
|
||||
|
||||
|
||||
def get_iree_vulkan_args():
|
||||
# vulkan_flag = ["--iree-flow-demote-i64-to-i32"]
|
||||
vulkan_flag = []
|
||||
vulkan_triple_flag = get_vulkan_triple_flag()
|
||||
if vulkan_triple_flag is not None:
|
||||
vulkan_flag.append(vulkan_triple_flag)
|
||||
return vulkan_flag
|
||||
@@ -21,14 +21,11 @@ from iree.compiler import ir
|
||||
from iree.compiler.transforms import ireec as ireec_trans
|
||||
|
||||
MATMUL_OP_NAMES = set(
|
||||
["linalg.matmul", "linalg.batch_matmul", "mhlo.dot", "mhlo.dot_general"]
|
||||
)
|
||||
["linalg.matmul", "linalg.batch_matmul", "mhlo.dot", "mhlo.dot_general"])
|
||||
idx = 0
|
||||
|
||||
|
||||
def model_annotation(
|
||||
ctx: ir.Context, *, input_contents: str, config_path: str
|
||||
):
|
||||
def model_annotation(ctx: ir.Context, *, input_contents: str, config_path: str):
|
||||
if os.path.isfile(input_contents):
|
||||
with open(input_contents, "rb") as f:
|
||||
input_contents = f.read()
|
||||
@@ -50,8 +47,7 @@ def model_annotation(
|
||||
# - Disables verification (already done above)
|
||||
# - Writes as binary, avoiding costly unicode conversions
|
||||
sys.stdout.buffer.write(
|
||||
module.operation.get_asm(assume_verified=True, binary=True)
|
||||
)
|
||||
module.operation.get_asm(assume_verified=True, binary=True))
|
||||
return module
|
||||
|
||||
|
||||
@@ -65,21 +61,14 @@ def walk_children(op: ir.Operation, configs: List[Dict]):
|
||||
child_op = child_op.operation
|
||||
if child_op.name in MATMUL_OP_NAMES:
|
||||
global idx
|
||||
(
|
||||
tile_sizes,
|
||||
pipeline,
|
||||
workgroup_size,
|
||||
split_k,
|
||||
pipeline_depth,
|
||||
) = parse_config(configs[idx])
|
||||
tile_sizes, pipeline, workgroup_size, \
|
||||
split_k, pipeline_depth = parse_config(configs[idx])
|
||||
|
||||
add_compilation_info(
|
||||
child_op,
|
||||
tile_sizes=tile_sizes,
|
||||
pipeline=pipeline,
|
||||
workgroup_size=workgroup_size,
|
||||
pipeline_depth=pipeline_depth,
|
||||
)
|
||||
add_compilation_info(child_op,
|
||||
tile_sizes=tile_sizes,
|
||||
pipeline=pipeline,
|
||||
workgroup_size=workgroup_size,
|
||||
pipeline_depth=pipeline_depth)
|
||||
|
||||
if split_k:
|
||||
add_split_k(child_op, split_k)
|
||||
@@ -91,11 +80,8 @@ def walk_children(op: ir.Operation, configs: List[Dict]):
|
||||
|
||||
def parse_config(config: Dict):
|
||||
if config["pipeline"] == "GPU" or config["pipeline"] == "GPU_TENSORCORE":
|
||||
pipeline = (
|
||||
"LLVMGPUMatmulSimt"
|
||||
if config["pipeline"] == "GPU"
|
||||
else "LLVMGPUMatmulTensorCore"
|
||||
)
|
||||
pipeline = "LLVMGPUMatmulSimt" if config[
|
||||
"pipeline"] == "GPU" else "LLVMGPUMatmulTensorCore"
|
||||
tile_sizes = [config["work_group_tile_sizes"]]
|
||||
workgroup_size = config["work_group_sizes"]
|
||||
try:
|
||||
@@ -109,9 +95,8 @@ def parse_config(config: Dict):
|
||||
else:
|
||||
pipeline = config["pipeline"]
|
||||
tile_sizes = [
|
||||
config["work_group_tile_sizes"],
|
||||
config["l1_tile_sizes"],
|
||||
config["vector_tile_sizes"],
|
||||
config["work_group_tile_sizes"], config["l1_tile_sizes"],
|
||||
config["vector_tile_sizes"]
|
||||
]
|
||||
workgroup_size = []
|
||||
split_k = None
|
||||
@@ -119,13 +104,9 @@ def parse_config(config: Dict):
|
||||
return tile_sizes, pipeline, workgroup_size, split_k, pipeline_depth
|
||||
|
||||
|
||||
def add_compilation_info(
|
||||
op: ir.Operation,
|
||||
tile_sizes: List[List[int]],
|
||||
pipeline: str,
|
||||
workgroup_size: List[int],
|
||||
pipeline_depth: int,
|
||||
):
|
||||
def add_compilation_info(op: ir.Operation, tile_sizes: List[List[int]],
|
||||
pipeline: str, workgroup_size: List[int],
|
||||
pipeline_depth: int):
|
||||
# We don't have a Python binding for CompilationInfo, so we just parse
|
||||
# its string form.
|
||||
if pipeline_depth:
|
||||
@@ -133,15 +114,13 @@ def add_compilation_info(
|
||||
f"#iree_codegen.compilation_info<"
|
||||
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
|
||||
f"translation_info = <{pipeline} pipeline_depth = {pipeline_depth}>, "
|
||||
f"workgroup_size = {repr(workgroup_size)}>"
|
||||
)
|
||||
f"workgroup_size = {repr(workgroup_size)}>")
|
||||
else:
|
||||
attr = ir.Attribute.parse(
|
||||
f"#iree_codegen.compilation_info<"
|
||||
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
|
||||
f"translation_info = <{pipeline}>, "
|
||||
f"workgroup_size = {repr(workgroup_size)}>"
|
||||
)
|
||||
f"workgroup_size = {repr(workgroup_size)}>")
|
||||
op.attributes["compilation_info"] = attr
|
||||
|
||||
|
||||
@@ -159,6 +138,6 @@ def create_context() -> ir.Context:
|
||||
|
||||
if __name__ == "__main__":
|
||||
with create_context() as ctx:
|
||||
model_annotation(
|
||||
ctx, input_contents=sys.argv[1], config_path=sys.argv[2]
|
||||
)
|
||||
model_annotation(ctx,
|
||||
input_contents=sys.argv[1],
|
||||
config_path=sys.argv[2])
|
||||
|
||||
@@ -20,8 +20,8 @@ def dir_path(path):
|
||||
if os.path.isdir(path):
|
||||
return path
|
||||
else:
|
||||
os.mkdir(path)
|
||||
return path
|
||||
raise argparse.ArgumentTypeError(
|
||||
f"readable_dir:{path} is not a valid path")
|
||||
|
||||
|
||||
def dir_file(path):
|
||||
@@ -29,52 +29,43 @@ def dir_file(path):
|
||||
return path
|
||||
else:
|
||||
raise argparse.ArgumentTypeError(
|
||||
f"readable_file:{path} is not a valid file"
|
||||
)
|
||||
f"readable_file:{path} is not a valid file")
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description="SHARK runner.")
|
||||
parser = argparse.ArgumentParser(description='SHARK runner.')
|
||||
parser.add_argument(
|
||||
"--device",
|
||||
type=str,
|
||||
default="cpu",
|
||||
help="Device on which shark_runner runs. options are cpu, gpu, and vulkan",
|
||||
)
|
||||
help="Device on which shark_runner runs. options are cpu, gpu, and vulkan")
|
||||
parser.add_argument(
|
||||
"--repro_dir",
|
||||
help="Directory to which module files will be saved for reproduction or debugging.",
|
||||
help=
|
||||
"Directory to which module files will be saved for reproduction or debugging.",
|
||||
type=dir_path,
|
||||
default="./shark_tmp",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable_tf32",
|
||||
type=bool,
|
||||
default=False,
|
||||
help="Enables TF32 precision calculations on supported GPUs.",
|
||||
)
|
||||
default="/tmp/")
|
||||
parser.add_argument("--save_mlir",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Saves input MLIR module to /tmp/ directory.")
|
||||
parser.add_argument("--save_vmfb",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Saves iree .vmfb module to /tmp/ directory.")
|
||||
parser.add_argument(
|
||||
"--model_config_path",
|
||||
help="Directory to where the tuned model config file is located.",
|
||||
default=None,
|
||||
)
|
||||
default=None)
|
||||
|
||||
parser.add_argument(
|
||||
"--num_warmup_iterations",
|
||||
type=int,
|
||||
default=5,
|
||||
help="Run the model for the specified number of warmup iterations.",
|
||||
)
|
||||
default=2,
|
||||
help="Run the model for the specified number of warmup iterations.")
|
||||
parser.add_argument(
|
||||
"--num_iterations",
|
||||
type=int,
|
||||
default=100,
|
||||
help="Run the model for the specified number of iterations.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--onnx_bench",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="When enabled, pytest bench results will include ONNX benchmark results.",
|
||||
)
|
||||
default=1,
|
||||
help="Run the model for the specified number of iterations.")
|
||||
|
||||
shark_args, unknown = parser.parse_known_args()
|
||||
|
||||
@@ -1,301 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from shark.shark_runner import SharkRunner
|
||||
from shark.iree_utils.compile_utils import export_iree_module_to_vmfb
|
||||
from shark.iree_utils.benchmark_utils import (
|
||||
build_benchmark_args,
|
||||
run_benchmark_module,
|
||||
)
|
||||
from shark.parser import shark_args
|
||||
from datetime import datetime
|
||||
import time
|
||||
import csv
|
||||
import os
|
||||
|
||||
|
||||
class OnnxFusionOptions(object):
|
||||
def __init__(self):
|
||||
self.disable_gelu = False
|
||||
self.disable_layer_norm = False
|
||||
self.disable_attention = False
|
||||
self.disable_skip_layer_norm = False
|
||||
self.disable_embed_layer_norm = False
|
||||
self.disable_bias_skip_layer_norm = False
|
||||
self.disable_bias_gelu = False
|
||||
self.enable_gelu_approximation = False
|
||||
self.use_mask_index = False
|
||||
self.no_attention_mask = False
|
||||
|
||||
|
||||
class SharkBenchmarkRunner(SharkRunner):
|
||||
# SharkRunner derived class with Benchmarking capabilities.
|
||||
def __init__(
|
||||
self,
|
||||
mlir_module: str,
|
||||
function_name: str = "forward",
|
||||
device: str = "none",
|
||||
mlir_dialect: str = "linalg",
|
||||
):
|
||||
self.device = shark_args.device if device == "none" else device
|
||||
self.frontend_model = None
|
||||
self.vmfb_file = None
|
||||
self.mlir_dialect = mlir_dialect
|
||||
SharkRunner.__init__(
|
||||
self,
|
||||
mlir_module,
|
||||
function_name,
|
||||
device,
|
||||
self.mlir_dialect,
|
||||
)
|
||||
if self.vmfb_file == None:
|
||||
self.vmfb_file = export_iree_module_to_vmfb(
|
||||
mlir_module, device, shark_args.repro_dir, self.mlir_dialect
|
||||
)
|
||||
|
||||
def setup_cl(self, input_tensors):
|
||||
self.benchmark_cl = build_benchmark_args(
|
||||
self.vmfb_file,
|
||||
self.device,
|
||||
input_tensors,
|
||||
mlir_dialect=self.mlir_dialect,
|
||||
)
|
||||
# print(self.benchmark_cl)
|
||||
|
||||
def benchmark_frontend(self, modelname):
|
||||
if self.mlir_dialect in ["linalg", "torch"]:
|
||||
return self.benchmark_torch(modelname)
|
||||
elif self.mlir_dialect in ["mhlo", "tf"]:
|
||||
return self.benchmark_tf(modelname)
|
||||
|
||||
def benchmark_torch(self, modelname):
|
||||
import torch
|
||||
from tank.model_utils import get_torch_model
|
||||
|
||||
if self.device == "gpu":
|
||||
torch.set_default_tensor_type(torch.cuda.FloatTensor)
|
||||
else:
|
||||
torch.set_default_tensor_type(torch.FloatTensor)
|
||||
torch_device = torch.device(
|
||||
"cuda:0" if self.device == "gpu" else "cpu"
|
||||
)
|
||||
HFmodel, input = get_torch_model(modelname)[:2]
|
||||
frontend_model = HFmodel.model
|
||||
frontend_model.to(torch_device)
|
||||
input.to(torch_device)
|
||||
|
||||
for i in range(shark_args.num_warmup_iterations):
|
||||
frontend_model.forward(input)
|
||||
|
||||
begin = time.time()
|
||||
for i in range(shark_args.num_iterations):
|
||||
out = frontend_model.forward(input)
|
||||
if i == shark_args.num_iterations - 1:
|
||||
end = time.time()
|
||||
break
|
||||
print(
|
||||
f"Torch benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
return [
|
||||
f"{shark_args.num_iterations/(end-begin)}",
|
||||
f"{((end-begin)/shark_args.num_iterations)*1000}",
|
||||
]
|
||||
|
||||
def benchmark_tf(self, modelname):
|
||||
import tensorflow as tf
|
||||
from tank.model_utils_tf import get_tf_model
|
||||
|
||||
model, input, = get_tf_model(
|
||||
modelname
|
||||
)[:2]
|
||||
frontend_model = model
|
||||
|
||||
for i in range(shark_args.num_warmup_iterations):
|
||||
frontend_model.forward(*input)
|
||||
|
||||
begin = time.time()
|
||||
for i in range(shark_args.num_iterations):
|
||||
out = frontend_model.forward(*input)
|
||||
if i == shark_args.num_iterations - 1:
|
||||
end = time.time()
|
||||
break
|
||||
print(
|
||||
f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
return [
|
||||
f"{shark_args.num_iterations/(end-begin)}",
|
||||
f"{((end-begin)/shark_args.num_iterations)*1000}",
|
||||
]
|
||||
|
||||
def benchmark_c(self):
|
||||
print(self.benchmark_cl)
|
||||
result = run_benchmark_module(self.benchmark_cl)
|
||||
print(f"Shark-IREE-C benchmark:{result} iter/second")
|
||||
return [f"{result}", f"{1000/result}"]
|
||||
|
||||
def benchmark_python(self, inputs):
|
||||
input_list = [x for x in inputs]
|
||||
for i in range(shark_args.num_warmup_iterations):
|
||||
self.run(input_list)
|
||||
|
||||
begin = time.time()
|
||||
for i in range(shark_args.num_iterations):
|
||||
out = self.run(input_list)
|
||||
if i == shark_args.num_iterations - 1:
|
||||
end = time.time()
|
||||
print(
|
||||
f"Shark-IREE Python benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
return [
|
||||
f"{shark_args.num_iterations/(end-begin)}",
|
||||
f"{((end-begin)/shark_args.num_iterations)*1000}",
|
||||
]
|
||||
|
||||
def benchmark_onnx(self, modelname, inputs):
|
||||
if self.device == "gpu":
|
||||
print(
|
||||
"Currently GPU benchmarking on ONNX is not supported in SHARK."
|
||||
)
|
||||
return ["N/A", "N/A"]
|
||||
else:
|
||||
from onnxruntime.transformers.benchmark import run_onnxruntime
|
||||
from onnxruntime.transformers.huggingface_models import MODELS
|
||||
from onnxruntime.transformers.benchmark_helper import (
|
||||
ConfigModifier,
|
||||
Precision,
|
||||
)
|
||||
import psutil
|
||||
|
||||
if modelname == "microsoft/MiniLM-L12-H384-uncased":
|
||||
modelname = "bert-base-uncased"
|
||||
if modelname not in MODELS:
|
||||
print(
|
||||
f"{modelname} is currently not supported in ORT's HF. Check \
|
||||
https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/huggingface_models.py \
|
||||
for currently supported models. Exiting benchmark ONNX."
|
||||
)
|
||||
return ["N/A", "N/A"]
|
||||
use_gpu = self.device == "gpu"
|
||||
num_threads = psutil.cpu_count(logical=False)
|
||||
batch_sizes = [1]
|
||||
sequence_lengths = [128]
|
||||
cache_dir = os.path.join(".", "cache_models")
|
||||
onnx_dir = os.path.join(".", "onnx_models")
|
||||
verbose = False
|
||||
input_counts = [1]
|
||||
optimize_onnx = True
|
||||
validate_onnx = False
|
||||
disable_ort_io_binding = False
|
||||
use_raw_attention_mask = True
|
||||
model_fusion_statistics = {}
|
||||
overwrite = False
|
||||
model_source = "pt" # Either "pt" or "tf"
|
||||
provider = None
|
||||
config_modifier = ConfigModifier(None)
|
||||
onnx_args = OnnxFusionOptions()
|
||||
result = run_onnxruntime(
|
||||
use_gpu,
|
||||
provider,
|
||||
(modelname,),
|
||||
None,
|
||||
config_modifier,
|
||||
Precision.FLOAT32,
|
||||
num_threads,
|
||||
batch_sizes,
|
||||
sequence_lengths,
|
||||
shark_args.num_iterations,
|
||||
input_counts,
|
||||
optimize_onnx,
|
||||
validate_onnx,
|
||||
cache_dir,
|
||||
onnx_dir,
|
||||
verbose,
|
||||
overwrite,
|
||||
disable_ort_io_binding,
|
||||
use_raw_attention_mask,
|
||||
model_fusion_statistics,
|
||||
model_source,
|
||||
onnx_args,
|
||||
)
|
||||
print(
|
||||
f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
return [
|
||||
result[0]["QPS"],
|
||||
result[0]["average_latency_ms"],
|
||||
]
|
||||
|
||||
def benchmark_all_csv(
|
||||
self, inputs: tuple, modelname, dynamic, device_str, frontend
|
||||
):
|
||||
self.setup_cl(inputs)
|
||||
field_names = [
|
||||
"model",
|
||||
"engine",
|
||||
"dynamic",
|
||||
"dialect",
|
||||
"device",
|
||||
"iter/sec",
|
||||
"ms/iter",
|
||||
"iterations",
|
||||
"datetime",
|
||||
]
|
||||
engines = ["frontend", "shark_python", "shark_iree_c"]
|
||||
if shark_args.onnx_bench == True:
|
||||
engines.append("onnxruntime")
|
||||
|
||||
if not os.path.exists("bench_results.csv"):
|
||||
with open("bench_results.csv", mode="w", newline="") as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(field_names)
|
||||
|
||||
with open("bench_results.csv", mode="a", newline="") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=field_names)
|
||||
bench_result = {}
|
||||
bench_result["model"] = modelname
|
||||
if dynamic == True:
|
||||
bench_result["dynamic"] = "True"
|
||||
else:
|
||||
bench_result["dynamic"] = "False"
|
||||
bench_result["device"] = device_str
|
||||
for e in engines:
|
||||
if e == "frontend":
|
||||
bench_result["engine"] = frontend
|
||||
(
|
||||
bench_result["iter/sec"],
|
||||
bench_result["ms/iter"],
|
||||
) = self.benchmark_frontend(modelname)
|
||||
elif e == "shark_python":
|
||||
bench_result["engine"] = "shark_python"
|
||||
(
|
||||
bench_result["iter/sec"],
|
||||
bench_result["ms/iter"],
|
||||
) = self.benchmark_python(inputs)
|
||||
elif e == "shark_iree_c":
|
||||
bench_result["engine"] = "shark_iree_c"
|
||||
(
|
||||
bench_result["iter/sec"],
|
||||
bench_result["ms/iter"],
|
||||
) = self.benchmark_c()
|
||||
elif e == "onnxruntime":
|
||||
bench_result["engine"] = "onnxruntime"
|
||||
(
|
||||
bench_result["iter/sec"],
|
||||
bench_result["ms/iter"],
|
||||
) = self.benchmark_onnx(modelname, inputs)
|
||||
|
||||
bench_result["dialect"] = self.mlir_dialect
|
||||
bench_result["iterations"] = shark_args.num_iterations
|
||||
bench_result["datetime"] = str(datetime.now())
|
||||
writer.writerow(bench_result)
|
||||
@@ -1,236 +0,0 @@
|
||||
# Lint as: python3
|
||||
"""SHARK Downloader"""
|
||||
# Requirements : Put shark_tank in SHARK directory
|
||||
# /SHARK
|
||||
# /gen_shark_tank
|
||||
# /tflite
|
||||
# /albert_lite_base
|
||||
# /...model_name...
|
||||
# /tf
|
||||
# /pytorch
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import urllib.request
|
||||
import json
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
input_type_to_np_dtype = {
|
||||
"float32": np.float32,
|
||||
"float64": np.float64,
|
||||
"bool": np.bool_,
|
||||
"int32": np.int32,
|
||||
"int64": np.int64,
|
||||
"uint8": np.uint8,
|
||||
"int8": np.int8,
|
||||
}
|
||||
|
||||
# default hash is updated when nightly populate_sharktank_ci is successful
|
||||
shark_default_sha = "latest"
|
||||
|
||||
# Save the model in the home local so it needn't be fetched everytime in the CI.
|
||||
home = str(Path.home())
|
||||
WORKDIR = os.path.join(home, ".local/shark_tank/")
|
||||
print(WORKDIR)
|
||||
|
||||
|
||||
# Checks whether the directory and files exists.
|
||||
def check_dir_exists(model_name, frontend="torch", dynamic=""):
|
||||
model_dir = os.path.join(WORKDIR, model_name)
|
||||
|
||||
# Remove the _tf keyword from end.
|
||||
if frontend in ["tf", "tensorflow"]:
|
||||
model_name = model_name[:-3]
|
||||
elif frontend in ["tflite"]:
|
||||
model_name = model_name[:-7]
|
||||
elif frontend in ["torch", "pytorch"]:
|
||||
model_name = model_name[:-6]
|
||||
|
||||
if os.path.isdir(model_dir):
|
||||
if (
|
||||
os.path.isfile(
|
||||
os.path.join(
|
||||
model_dir,
|
||||
model_name + dynamic + "_" + str(frontend) + ".mlir",
|
||||
)
|
||||
)
|
||||
and os.path.isfile(os.path.join(model_dir, "function_name.npy"))
|
||||
and os.path.isfile(os.path.join(model_dir, "inputs.npz"))
|
||||
and os.path.isfile(os.path.join(model_dir, "golden_out.npz"))
|
||||
and os.path.isfile(os.path.join(model_dir, "hash.npy"))
|
||||
):
|
||||
print(
|
||||
f"""The models are present in the {WORKDIR}. If you want a fresh
|
||||
download, consider deleting the directory."""
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# Downloads the torch model from gs://shark_tank dir.
|
||||
def download_torch_model(model_name, dynamic=False):
|
||||
model_name = model_name.replace("/", "_")
|
||||
dyn_str = "_dynamic" if dynamic else ""
|
||||
os.makedirs(WORKDIR, exist_ok=True)
|
||||
model_dir_name = model_name + "_torch"
|
||||
|
||||
def gs_download_model():
|
||||
gs_command = (
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp -r gs://shark_tank/'
|
||||
+ shark_default_sha
|
||||
+ "/"
|
||||
+ model_dir_name
|
||||
+ " "
|
||||
+ WORKDIR
|
||||
)
|
||||
if os.system(gs_command) != 0:
|
||||
raise Exception("model not present in the tank. Contact Nod Admin")
|
||||
|
||||
if not check_dir_exists(model_dir_name, frontend="torch", dynamic=dyn_str):
|
||||
gs_download_model()
|
||||
else:
|
||||
model_dir = os.path.join(WORKDIR, model_dir_name)
|
||||
local_hash = str(np.load(os.path.join(model_dir, "hash.npy")))
|
||||
gs_hash = (
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp gs://shark_tank/'
|
||||
+ shark_default_sha
|
||||
+ "/"
|
||||
+ model_dir_name
|
||||
+ "/hash.npy"
|
||||
+ " "
|
||||
+ os.path.join(model_dir, "upstream_hash.npy")
|
||||
)
|
||||
if os.system(gs_hash) != 0:
|
||||
raise Exception("hash of the model not present in the tank.")
|
||||
upstream_hash = str(
|
||||
np.load(os.path.join(model_dir, "upstream_hash.npy"))
|
||||
)
|
||||
if local_hash != upstream_hash:
|
||||
gs_download_model()
|
||||
|
||||
model_dir = os.path.join(WORKDIR, model_dir_name)
|
||||
with open(
|
||||
os.path.join(model_dir, model_name + dyn_str + "_torch.mlir")
|
||||
) as f:
|
||||
mlir_file = f.read()
|
||||
|
||||
function_name = str(np.load(os.path.join(model_dir, "function_name.npy")))
|
||||
inputs = np.load(os.path.join(model_dir, "inputs.npz"))
|
||||
golden_out = np.load(os.path.join(model_dir, "golden_out.npz"))
|
||||
|
||||
inputs_tuple = tuple([inputs[key] for key in inputs])
|
||||
golden_out_tuple = tuple([golden_out[key] for key in golden_out])
|
||||
return mlir_file, function_name, inputs_tuple, golden_out_tuple
|
||||
|
||||
|
||||
# Downloads the tflite model from gs://shark_tank dir.
|
||||
def download_tflite_model(model_name, dynamic=False):
|
||||
dyn_str = "_dynamic" if dynamic else ""
|
||||
os.makedirs(WORKDIR, exist_ok=True)
|
||||
model_dir_name = model_name + "_tflite"
|
||||
|
||||
def gs_download_model():
|
||||
gs_command = (
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp -r gs://shark_tank/'
|
||||
+ shark_default_sha
|
||||
+ "/"
|
||||
+ model_dir_name
|
||||
+ " "
|
||||
+ WORKDIR
|
||||
)
|
||||
if os.system(gs_command) != 0:
|
||||
raise Exception("model not present in the tank. Contact Nod Admin")
|
||||
|
||||
if not check_dir_exists(
|
||||
model_dir_name, frontend="tflite", dynamic=dyn_str
|
||||
):
|
||||
gs_download_model()
|
||||
else:
|
||||
model_dir = os.path.join(WORKDIR, model_dir_name)
|
||||
local_hash = str(np.load(os.path.join(model_dir, "hash.npy")))
|
||||
gs_hash = (
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp gs://shark_tank/'
|
||||
+ shark_default_sha
|
||||
+ "/"
|
||||
+ model_dir_name
|
||||
+ "/hash.npy"
|
||||
+ " "
|
||||
+ os.path.join(model_dir, "upstream_hash.npy")
|
||||
)
|
||||
if os.system(gs_hash) != 0:
|
||||
raise Exception("hash of the model not present in the tank.")
|
||||
upstream_hash = str(
|
||||
np.load(os.path.join(model_dir, "upstream_hash.npy"))
|
||||
)
|
||||
if local_hash != upstream_hash:
|
||||
gs_download_model()
|
||||
|
||||
model_dir = os.path.join(WORKDIR, model_dir_name)
|
||||
with open(
|
||||
os.path.join(model_dir, model_name + dyn_str + "_tflite.mlir")
|
||||
) as f:
|
||||
mlir_file = f.read()
|
||||
|
||||
function_name = str(np.load(os.path.join(model_dir, "function_name.npy")))
|
||||
inputs = np.load(os.path.join(model_dir, "inputs.npz"))
|
||||
golden_out = np.load(os.path.join(model_dir, "golden_out.npz"))
|
||||
|
||||
inputs_tuple = tuple([inputs[key] for key in inputs])
|
||||
golden_out_tuple = tuple([golden_out[key] for key in golden_out])
|
||||
return mlir_file, function_name, inputs_tuple, golden_out_tuple
|
||||
|
||||
|
||||
def download_tf_model(model_name):
|
||||
model_name = model_name.replace("/", "_")
|
||||
os.makedirs(WORKDIR, exist_ok=True)
|
||||
model_dir_name = model_name + "_tf"
|
||||
|
||||
def gs_download_model():
|
||||
gs_command = (
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp -r gs://shark_tank/'
|
||||
+ shark_default_sha
|
||||
+ "/"
|
||||
+ model_dir_name
|
||||
+ " "
|
||||
+ WORKDIR
|
||||
)
|
||||
if os.system(gs_command) != 0:
|
||||
raise Exception("model not present in the tank. Contact Nod Admin")
|
||||
|
||||
if not check_dir_exists(model_dir_name, frontend="tf"):
|
||||
gs_download_model()
|
||||
else:
|
||||
model_dir = os.path.join(WORKDIR, model_dir_name)
|
||||
local_hash = str(np.load(os.path.join(model_dir, "hash.npy")))
|
||||
gs_hash = (
|
||||
'gsutil -o "GSUtil:parallel_process_count=1" cp gs://shark_tank/'
|
||||
+ shark_default_sha
|
||||
+ "/"
|
||||
+ model_dir_name
|
||||
+ "/hash.npy"
|
||||
+ " "
|
||||
+ os.path.join(model_dir, "upstream_hash.npy")
|
||||
)
|
||||
if os.system(gs_hash) != 0:
|
||||
raise Exception("hash of the model not present in the tank.")
|
||||
upstream_hash = str(
|
||||
np.load(os.path.join(model_dir, "upstream_hash.npy"))
|
||||
)
|
||||
if local_hash != upstream_hash:
|
||||
gs_download_model()
|
||||
|
||||
model_dir = os.path.join(WORKDIR, model_dir_name)
|
||||
with open(os.path.join(model_dir, model_name + "_tf.mlir")) as f:
|
||||
mlir_file = f.read()
|
||||
|
||||
function_name = str(np.load(os.path.join(model_dir, "function_name.npy")))
|
||||
inputs = np.load(os.path.join(model_dir, "inputs.npz"))
|
||||
golden_out = np.load(os.path.join(model_dir, "golden_out.npz"))
|
||||
|
||||
inputs_tuple = tuple([inputs[key] for key in inputs])
|
||||
golden_out_tuple = tuple([golden_out[key] for key in golden_out])
|
||||
return mlir_file, function_name, inputs_tuple, golden_out_tuple
|
||||
@@ -1,236 +1,136 @@
|
||||
# Lint as: python3
|
||||
"""SHARK Importer"""
|
||||
|
||||
import sys
|
||||
import tempfile
|
||||
import iree.compiler.tflite as iree_tflite_compile
|
||||
import iree.runtime as iree_rt
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
# List of the supported frontends.
|
||||
supported_frontends = {
|
||||
"tensorflow",
|
||||
"tf",
|
||||
"pytorch",
|
||||
"torch",
|
||||
"tf-lite",
|
||||
"tflite",
|
||||
}
|
||||
import sys
|
||||
import tensorflow.compat.v2 as tf
|
||||
import urllib.request
|
||||
from shark.shark_inference import SharkInference
|
||||
|
||||
|
||||
class SharkImporter:
|
||||
"""
|
||||
SharkImporter converts frontend modules into a
|
||||
mlir_module. The supported frameworks are tensorflow,
|
||||
pytorch, and tf-lite.
|
||||
|
||||
...
|
||||
def __init__(self,
|
||||
model_path,
|
||||
model_type: str = "tflite",
|
||||
model_source_hub: str = "tfhub",
|
||||
device: str = None,
|
||||
dynamic: bool = False,
|
||||
jit_trace: bool = False,
|
||||
benchmark_mode: bool = False):
|
||||
self.model_path = model_path
|
||||
self.model_type = model_type
|
||||
self.model_source_hub = model_source_hub
|
||||
self.device = device
|
||||
self.dynamic = dynamic
|
||||
self.jit_trace = jit_trace
|
||||
self.benchmark_mode = benchmark_mode
|
||||
self.inputs = None
|
||||
self.input_details = None
|
||||
self.output_details = None
|
||||
|
||||
Attributes
|
||||
----------
|
||||
module :
|
||||
torch, tensorflow or tf-lite module.
|
||||
inputs :
|
||||
inputs to the module, may be required for the shape
|
||||
information.
|
||||
frontend: str
|
||||
frontend to which the module belongs.
|
||||
raw_model_file: str
|
||||
temp tflite model path
|
||||
# create tmp model file directory
|
||||
if self.model_path is None:
|
||||
print("Error. No model_path, Please input model path.")
|
||||
return
|
||||
|
||||
Methods
|
||||
-------
|
||||
import_mlir(is_dynamic, tracing_required, func_name):
|
||||
is_dynamic: input shapes to be totally dynamic (pytorch specific).
|
||||
tracing_required: whether tracing is required (pytorch specific.
|
||||
func_name: The function to be traced out or imported to mlir.
|
||||
|
||||
import_debug(is_dynamic, tracing_required, func_name):
|
||||
returns the converted (mlir_module,func_name) with inputs and golden
|
||||
outputs.
|
||||
The inputs and outputs are converted into np array.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
module,
|
||||
inputs: tuple = (),
|
||||
frontend: str = "torch",
|
||||
raw_model_file: str = "",
|
||||
):
|
||||
self.module = module
|
||||
self.inputs = None if len(inputs) == 0 else inputs
|
||||
self.frontend = frontend
|
||||
if not self.frontend in supported_frontends:
|
||||
print(
|
||||
f"The frontend is not in the supported_frontends: {supported_frontends}"
|
||||
)
|
||||
sys.exit(1)
|
||||
self.raw_model_file = raw_model_file
|
||||
|
||||
# NOTE: The default function for torch is "forward" and tf-lite is "main".
|
||||
|
||||
def _torch_mlir(self, is_dynamic, tracing_required):
|
||||
from shark.torch_mlir_utils import get_torch_mlir_module
|
||||
|
||||
return get_torch_mlir_module(
|
||||
self.module, self.inputs, is_dynamic, tracing_required
|
||||
)
|
||||
|
||||
def _tf_mlir(self, func_name):
|
||||
from iree.compiler import tf as tfc
|
||||
|
||||
return tfc.compile_module(
|
||||
self.module, exported_names=[func_name], import_only=True
|
||||
)
|
||||
|
||||
def _tflite_mlir(self, func_name):
|
||||
from iree.compiler import tflite as tflitec
|
||||
from shark.iree_utils._common import IREE_TARGET_MAP
|
||||
|
||||
self.mlir_model = tflitec.compile_file(
|
||||
self.raw_model_file, # in tflite, it is a path to .tflite file, not a tflite interpreter
|
||||
input_type="tosa",
|
||||
import_only=True,
|
||||
)
|
||||
return self.mlir_model
|
||||
|
||||
# Adds the conversion of the frontend with the private function.
|
||||
def import_mlir(
|
||||
self,
|
||||
is_dynamic=False,
|
||||
tracing_required=False,
|
||||
func_name="forward",
|
||||
):
|
||||
if self.frontend in ["torch", "pytorch"]:
|
||||
if self.inputs == None:
|
||||
print(
|
||||
"Please pass in the inputs, the inputs are required to determine the shape of the mlir_module"
|
||||
if self.model_source_hub == "tfhub":
|
||||
# compile and run tfhub tflite
|
||||
if self.model_type == "tflite":
|
||||
print("Setting up for TMP_DIR")
|
||||
exe_basename = os.path.basename(sys.argv[0])
|
||||
self.workdir = os.path.join(os.path.dirname(__file__), "tmp",
|
||||
exe_basename)
|
||||
print(f"TMP_DIR = {self.workdir}")
|
||||
os.makedirs(self.workdir, exist_ok=True)
|
||||
self.tflite_file = '/'.join([self.workdir, 'model.tflite'])
|
||||
print("Setting up local address for tflite model file: ",
|
||||
self.tflite_file)
|
||||
if os.path.exists(self.model_path):
|
||||
self.tflite_file = self.model_path
|
||||
else:
|
||||
print("Download tflite model")
|
||||
urllib.request.urlretrieve(self.model_path,
|
||||
self.tflite_file)
|
||||
print("Setting up tflite interpreter")
|
||||
self.tflite_interpreter = tf.lite.Interpreter(
|
||||
model_path=self.tflite_file)
|
||||
self.tflite_interpreter.allocate_tensors()
|
||||
# default input initialization
|
||||
self.input_details, self.output_details = self.get_model_details(
|
||||
)
|
||||
sys.exit(1)
|
||||
return self._torch_mlir(is_dynamic, tracing_required), func_name
|
||||
if self.frontend in ["tf", "tensorflow"]:
|
||||
return self._tf_mlir(func_name), func_name
|
||||
if self.frontend in ["tflite", "tf-lite"]:
|
||||
func_name = "main"
|
||||
return self._tflite_mlir(func_name), func_name
|
||||
inputs = self.generate_inputs(
|
||||
self.input_details) # device_inputs
|
||||
self.setup_inputs(inputs)
|
||||
|
||||
# Converts the frontend specific tensors into np array.
|
||||
def convert_to_numpy(self, array_tuple: tuple):
|
||||
if self.frontend in ["torch", "pytorch"]:
|
||||
return [x.detach().numpy() for x in array_tuple]
|
||||
if self.frontend in ["tf", "tensorflow"]:
|
||||
return [x.numpy() for x in array_tuple]
|
||||
def generate_inputs(self, input_details):
|
||||
args = []
|
||||
for input in input_details:
|
||||
print(str(input["shape"]), input["dtype"].__name__)
|
||||
args.append(np.zeros(shape=input["shape"], dtype=input["dtype"]))
|
||||
return args
|
||||
|
||||
# Saves `function_name.npy`, `inputs.npz`, `golden_out.npz` and `model_name.mlir` in the directory `dir`.
|
||||
def save_data(
|
||||
self, dir, model_name, mlir_data, func_name, inputs, outputs
|
||||
):
|
||||
import numpy as np
|
||||
def get_model_details(self):
|
||||
if self.model_type == "tflite":
|
||||
print("Get tflite input output details")
|
||||
self.input_details = self.tflite_interpreter.get_input_details()
|
||||
self.output_details = self.tflite_interpreter.get_output_details()
|
||||
return self.input_details, self.output_details
|
||||
|
||||
inputs_name = "inputs.npz"
|
||||
outputs_name = "golden_out.npz"
|
||||
func_file_name = "function_name"
|
||||
model_name_mlir = model_name + "_" + self.frontend + ".mlir"
|
||||
np.savez(os.path.join(dir, inputs_name), *inputs)
|
||||
np.savez(os.path.join(dir, outputs_name), *outputs)
|
||||
np.save(os.path.join(dir, func_file_name), np.array(func_name))
|
||||
def setup_inputs(self, inputs):
|
||||
print("Setting up inputs")
|
||||
self.inputs = inputs
|
||||
|
||||
mlir_str = mlir_data
|
||||
if self.frontend == "torch":
|
||||
mlir_str = mlir_data.operation.get_asm()
|
||||
elif self.frontend == "tf":
|
||||
mlir_str = mlir_data.decode("utf-8")
|
||||
elif self.frontend == "tflite":
|
||||
mlir_str = mlir_data.decode("utf-8")
|
||||
with open(os.path.join(dir, model_name_mlir), "w") as mlir_file:
|
||||
mlir_file.write(mlir_str)
|
||||
def compile(self, inputs=None):
|
||||
if inputs is not None:
|
||||
self.setup_inputs(inputs)
|
||||
# preprocess model_path to get model_type and Model Source Hub
|
||||
print("Shark Importer Intialize SharkInference and Do Compile")
|
||||
if self.model_source_hub == "tfhub":
|
||||
# compile and run tfhub tflite
|
||||
print("Inference tfhub model")
|
||||
self.shark_module = SharkInference(self.tflite_file,
|
||||
self.inputs,
|
||||
device=self.device,
|
||||
dynamic=self.dynamic,
|
||||
jit_trace=self.jit_trace)
|
||||
self.shark_module.set_frontend("tflite")
|
||||
self.shark_module.compile()
|
||||
elif self.model_source_hub == "huggingface":
|
||||
print("Inference", self.model_source_hub, " not implemented yet")
|
||||
elif self.model_source_hub == "jaxhub":
|
||||
print("Inference", self.model_source_hub, " not implemented yet")
|
||||
|
||||
return
|
||||
def forward(self, inputs=None):
|
||||
if inputs is not None:
|
||||
self.setup_inputs(inputs)
|
||||
# preprocess model_path to get model_type and Model Source Hub
|
||||
print("Shark Importer forward Model")
|
||||
if self.model_source_hub == "tfhub":
|
||||
shark_results = self.shark_module.forward(self.inputs)
|
||||
# Fix type information for unsigned cases.
|
||||
# for test compare result
|
||||
shark_results = list(shark_results)
|
||||
for i in range(len(self.output_details)):
|
||||
dtype = self.output_details[i]["dtype"]
|
||||
shark_results[i] = shark_results[i].astype(dtype)
|
||||
return shark_results
|
||||
elif self.model_source_hub == "huggingface":
|
||||
print("Inference", self.model_source_hub, " not implemented yet")
|
||||
elif self.model_source_hub == "jaxhub":
|
||||
print("Inference", self.model_source_hub, " not implemented yet")
|
||||
|
||||
def import_debug(
|
||||
self,
|
||||
is_dynamic=False,
|
||||
tracing_required=False,
|
||||
func_name="forward",
|
||||
dir=tempfile.gettempdir(),
|
||||
model_name="model",
|
||||
):
|
||||
if self.inputs == None:
|
||||
print(
|
||||
f"There is no input provided: {self.inputs}, please provide inputs or simply run import_mlir."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
imported_mlir = self.import_mlir(
|
||||
is_dynamic, tracing_required, func_name
|
||||
def shark_load(model_name, file_path):
|
||||
file_link = f"https://storage.googleapis.com/shark_tank/users/stanley/{model_name}.mlir"
|
||||
response = urllib.request.urlretrieve(file_link, file_path)
|
||||
if not os.path.isfile(file_path):
|
||||
raise ValueError(
|
||||
f"Tried looking for target mlir in {file_path}, but cannot be found."
|
||||
)
|
||||
# TODO: Make sure that any generic function name is accepted. Currently takes in the default function names.
|
||||
# TODO: Check for multiple outputs.
|
||||
if self.frontend in ["torch", "pytorch"]:
|
||||
import torch
|
||||
|
||||
golden_out = self.module(*self.inputs)
|
||||
if torch.is_tensor(golden_out):
|
||||
golden_out = tuple(
|
||||
golden_out.detach().numpy(),
|
||||
)
|
||||
else:
|
||||
golden_out = self.convert_to_numpy(golden_out)
|
||||
# Save the artifacts in the directory dir.
|
||||
self.save_data(
|
||||
dir,
|
||||
model_name,
|
||||
imported_mlir[0],
|
||||
imported_mlir[1],
|
||||
self.inputs,
|
||||
golden_out,
|
||||
)
|
||||
return (
|
||||
imported_mlir,
|
||||
self.convert_to_numpy(self.inputs),
|
||||
golden_out,
|
||||
)
|
||||
if self.frontend in ["tf", "tensorflow"]:
|
||||
import tensorflow as tf
|
||||
|
||||
golden_out = self.module.forward(*self.inputs)
|
||||
if tf.is_tensor(golden_out):
|
||||
golden_out = tuple(
|
||||
golden_out.numpy(),
|
||||
)
|
||||
elif golden_out is tuple:
|
||||
golden_out = self.convert_to_numpy(golden_out)
|
||||
elif hasattr(golden_out, "logits"):
|
||||
# from transformers import TFSequenceClassifierOutput
|
||||
golden_out = golden_out.logits
|
||||
else:
|
||||
golden_out = golden_out.last_hidden_state
|
||||
# Save the artifacts in the directory dir.
|
||||
self.save_data(
|
||||
dir,
|
||||
model_name,
|
||||
imported_mlir[0],
|
||||
imported_mlir[1],
|
||||
self.inputs,
|
||||
golden_out,
|
||||
)
|
||||
return (
|
||||
imported_mlir,
|
||||
self.convert_to_numpy(self.inputs),
|
||||
golden_out,
|
||||
)
|
||||
if self.frontend in ["tflite", "tf-lite"]:
|
||||
# TODO(Chi): Validate it for tflite models.
|
||||
golden_out = self.module.invoke_tflite(self.inputs)
|
||||
self.save_data(
|
||||
dir,
|
||||
model_name,
|
||||
imported_mlir[0],
|
||||
imported_mlir[1],
|
||||
self.inputs,
|
||||
golden_out,
|
||||
)
|
||||
return (
|
||||
imported_mlir,
|
||||
self.inputs,
|
||||
golden_out,
|
||||
)
|
||||
with open(file_path, "rb") as input_file:
|
||||
model_mlir = input_file.read()
|
||||
return model_mlir
|
||||
|
||||
@@ -9,129 +9,107 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from shark.shark_runner import SharkRunner
|
||||
import numpy as np
|
||||
from shark.torch_mlir_utils import get_torch_mlir_module, run_on_refbackend
|
||||
import os
|
||||
from shark.parser import shark_args
|
||||
from shark.shark_runner import SharkRunner, SharkBenchmarkRunner
|
||||
import time
|
||||
import sys
|
||||
|
||||
|
||||
dtype_to_np_dtype = {
|
||||
"f32": np.float32,
|
||||
"f64": np.float64,
|
||||
"i32": np.int32,
|
||||
"i64": np.int64,
|
||||
"i1": np.bool_,
|
||||
}
|
||||
# Prints to stderr.
|
||||
def print_err(*a):
|
||||
print(*a, file=sys.stderr)
|
||||
|
||||
|
||||
class SharkInference:
|
||||
"""
|
||||
Runs prediction or inference on mlir_module.
|
||||
"""Inference API targeting pytorch, tensorflow, linalg, mhlo and tosa frontend."""
|
||||
|
||||
...
|
||||
def __init__(self,
|
||||
model,
|
||||
input: tuple,
|
||||
device: str = None,
|
||||
dynamic: bool = False,
|
||||
jit_trace: bool = False,
|
||||
benchmark_mode: bool = False):
|
||||
self.model = model
|
||||
self.input = input
|
||||
self.dynamic = dynamic
|
||||
self.jit_trace = jit_trace
|
||||
self.benchmark_mode = benchmark_mode
|
||||
|
||||
Attributes
|
||||
----------
|
||||
mlir_module : str
|
||||
mlir_module represented in string.
|
||||
function_name : str
|
||||
function to execute in the given mlir_module.
|
||||
device : str
|
||||
device to execute the mlir_module on.
|
||||
currently supports cpu, cuda, vulkan, and metal backends.
|
||||
mlir_dialect: str
|
||||
The dialect in which the given mlir_module is in.
|
||||
Refer to {https://mlir.llvm.org/docs/Dialects/}
|
||||
is_benchmark: bool
|
||||
Whether this SharkInference module should be benchmark-enabled.
|
||||
# By default it's torch frontend.
|
||||
self.frontend = "pytorch"
|
||||
|
||||
Methods
|
||||
-------
|
||||
run(inputs=None):
|
||||
Runs the mlir_module with the given inputs, if the inputs are not
|
||||
given it autogenerates the inputs. Also, the inputs should be a
|
||||
numpy array.
|
||||
input_info():
|
||||
Gives the information about the inputs required by the `function_name`.
|
||||
This can be expensive as it does string matching to do so.
|
||||
# Sets the device.
|
||||
self.device = device if device is not None else shark_args.device
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
mlir_module: str,
|
||||
function_name: str = "forward",
|
||||
device: str = "none",
|
||||
mlir_dialect: str = "linalg",
|
||||
is_benchmark: bool = False,
|
||||
):
|
||||
self.mlir_module = mlir_module
|
||||
self.function_name = function_name
|
||||
self.device = device
|
||||
self.mlir_dialect = mlir_dialect
|
||||
self.is_benchmark = is_benchmark
|
||||
self.model_config_path = shark_args.model_config_path
|
||||
|
||||
self.shark_runner = None
|
||||
|
||||
def compile(self):
|
||||
|
||||
if self.is_benchmark == True:
|
||||
from shark.shark_benchmark_runner import SharkBenchmarkRunner
|
||||
|
||||
self.shark_runner = SharkBenchmarkRunner(
|
||||
self.mlir_module,
|
||||
self.function_name,
|
||||
self.device,
|
||||
self.mlir_dialect,
|
||||
)
|
||||
|
||||
# Sets the frontend i.e `pytorch` or `tensorflow`.
|
||||
def set_frontend(self, frontend: str):
|
||||
if frontend not in [
|
||||
"pytorch", "torch", "tensorflow", "tf", "mhlo", "linalg",
|
||||
"tosa", "tflite"
|
||||
]:
|
||||
print_err("frontend not supported.")
|
||||
else:
|
||||
self.shark_runner = SharkRunner(
|
||||
self.mlir_module,
|
||||
self.function_name,
|
||||
self.device,
|
||||
self.mlir_dialect,
|
||||
)
|
||||
self.frontend = frontend
|
||||
|
||||
# inputs are considered to be tuple of np.array.
|
||||
def forward(self, inputs: tuple):
|
||||
return self.shark_runner.run(inputs)
|
||||
def compile(self):
|
||||
# Inference do not use AOT.
|
||||
from_aot = False
|
||||
if (self.benchmark_mode == True):
|
||||
self.shark_runner = SharkBenchmarkRunner(self.model, self.input,
|
||||
self.dynamic, self.device,
|
||||
self.jit_trace, from_aot,
|
||||
self.frontend)
|
||||
else:
|
||||
self.shark_runner = SharkRunner(self.model, self.input,
|
||||
self.dynamic, self.device,
|
||||
self.jit_trace, from_aot,
|
||||
self.frontend,
|
||||
self.model_config_path)
|
||||
|
||||
# Captures the static input information from the mlir_module.
|
||||
# TODO(pashu123): Generate the input information for dynamic shapes.
|
||||
def _input_info(self):
|
||||
# func_key to get the line which contains the function.
|
||||
func_key = "func.func @" + self.function_name
|
||||
func_header = None
|
||||
for line in str(self.mlir_module).splitlines():
|
||||
if func_key in line:
|
||||
func_header = line
|
||||
break
|
||||
if func_header is None:
|
||||
print(f"Function: {self.function_name} not found")
|
||||
# inputs are considered to be np.array.
|
||||
def forward(self, inputs):
|
||||
input_list = inputs
|
||||
# converts the inputs to numpy.
|
||||
if self.frontend in ["pytorch", "torch"]:
|
||||
input_list = [x.detach().numpy() for x in inputs]
|
||||
elif self.frontend in ["tensorflow", "tf"]:
|
||||
input_list = [x.numpy() for x in inputs]
|
||||
return self.shark_runner.forward(input_list, self.frontend)
|
||||
|
||||
import re
|
||||
# Saves the .vmfb module.
|
||||
def save_module(self, dir=None):
|
||||
if dir is None:
|
||||
return self.shark_runner.save_module()
|
||||
return self.shark_runner.save_module(dir)
|
||||
|
||||
inputs = re.findall("\(.*?\)", func_header)[0].split(",")
|
||||
shapes = []
|
||||
dtype = []
|
||||
for inp in inputs:
|
||||
shape_dtype = re.findall(r"<[^>]*>", inp)[0].split("x")
|
||||
shape_dtype[0], shape_dtype[-1] = (
|
||||
shape_dtype[0][1:],
|
||||
shape_dtype[-1][:-1],
|
||||
)
|
||||
shapes.append(tuple([int(x) for x in shape_dtype[:-1]]))
|
||||
dtype.append(shape_dtype[-1])
|
||||
######### Benchmark Related Functions #########
|
||||
def benchmark_mode(func):
|
||||
|
||||
return shapes, dtype
|
||||
def inner(self, *args, **kwargs):
|
||||
assert self.benchmark_mode, "SharkRunner needs to be in benchmark mode to run benchmark methods."
|
||||
return func(self, *args, **kwargs)
|
||||
|
||||
# Generates random input to be feed into the graph.
|
||||
def generate_random_inputs(self, low=0, high=1):
|
||||
shapes, dtype = self._input_info()
|
||||
inputs = []
|
||||
for i, j in zip(shapes, dtype):
|
||||
inputs.append(
|
||||
np.random.uniform(low, high, size=i).astype(
|
||||
dtype_to_np_dtype[j]
|
||||
)
|
||||
)
|
||||
return tuple(inputs)
|
||||
return inner
|
||||
|
||||
@benchmark_mode
|
||||
def benchmark_all(self, inputs):
|
||||
self.shark_runner.benchmark_all(inputs)
|
||||
|
||||
@benchmark_mode
|
||||
def benchmark_frontend(self, inputs):
|
||||
self.shark_runner.benchmark_frontend(inputs)
|
||||
|
||||
@benchmark_mode
|
||||
def benchmark_python(self, inputs):
|
||||
self.shark_runner.benchmark_python(inputs)
|
||||
|
||||
@benchmark_mode
|
||||
def benchmark_c(self):
|
||||
self.shark_runner.benchmark_c()
|
||||
|
||||
@@ -11,91 +11,195 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from iree.compiler import tf as tfc
|
||||
import iree.compiler.tflite as ireec_tflite
|
||||
from torch.utils._python_dispatch import enable_torch_dispatch_mode
|
||||
from torch_mlir.eager_mode import torch_mlir_tensor
|
||||
from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
|
||||
from torch_mlir_e2e_test.eager_backends.refbackend import EagerModeRefBackend
|
||||
|
||||
from shark.iree_utils.compile_utils import (
|
||||
get_iree_compiled_module,
|
||||
get_results,
|
||||
export_iree_module_to_vmfb,
|
||||
)
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.parser import shark_args
|
||||
from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
|
||||
from shark.torch_mlir_utils import get_torch_mlir_module, run_on_refbackend
|
||||
from shark.iree_utils import get_results, get_iree_compiled_module, export_iree_module_to_vmfb, export_module_to_mlir_file, build_benchmark_args, run_benchmark_module
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
# supported dialects by the shark-runtime.
|
||||
supported_dialects = {"linalg", "mhlo", "tosa", "tf-lite"}
|
||||
from shark.parser import shark_args
|
||||
from tqdm import tqdm
|
||||
import time
|
||||
|
||||
|
||||
class SharkRunner:
|
||||
"""
|
||||
Base class for SharkInference and SharkTrainer
|
||||
used to execute an mlir_module.
|
||||
|
||||
...
|
||||
|
||||
Attributes
|
||||
----------
|
||||
mlir_module : str
|
||||
mlir_module represented in string.
|
||||
function_name : str
|
||||
function to execute in the given mlir_module.
|
||||
device : str
|
||||
device to execute the mlir_module on.
|
||||
currently supports cpu, cuda, vulkan, and metal backends.
|
||||
mlir_dialect: str
|
||||
The dialect in which the given mlir_module is in.
|
||||
Refer to {https://mlir.llvm.org/docs/Dialects/}
|
||||
|
||||
Methods
|
||||
-------
|
||||
run(inputs=None):
|
||||
Runs the mlir_module with the given inputs, if the inputs are not
|
||||
given it autogenerates the inputs. Also, the inputs should be a
|
||||
numpy array.
|
||||
input_info():
|
||||
Gives the information about the inputs required by the `function_name`.
|
||||
This can be expensive as it does string matching to do so.
|
||||
"""
|
||||
"""Base class for Shark Inference and Shark Runner."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
mlir_module: str,
|
||||
function_name: str = "forward",
|
||||
device: str = "none",
|
||||
mlir_dialect: str = "linalg",
|
||||
model,
|
||||
input: tuple,
|
||||
dynamic: bool = False,
|
||||
device: str = None,
|
||||
jit_trace: bool = False,
|
||||
from_aot: bool = False,
|
||||
frontend: str = "torch",
|
||||
model_config_path: str = None,
|
||||
):
|
||||
self.mlir_module = mlir_module
|
||||
self.function_name = function_name
|
||||
self.device = shark_args.device if device == "none" else device
|
||||
self.mlir_dialect = mlir_dialect
|
||||
self.model = model
|
||||
self.frontend_model = model
|
||||
self.from_aot = from_aot
|
||||
self.input = input
|
||||
self.frontend = frontend
|
||||
self.vmfb_file = None
|
||||
func_name = "forward"
|
||||
self.device = device if device is not None else shark_args.device
|
||||
if self.frontend in ["pytorch", "torch"]:
|
||||
# get torch-mlir dialect
|
||||
# self.model = torch.Module
|
||||
# TODO assert
|
||||
self.model = get_torch_mlir_module(self.model, input, dynamic,
|
||||
jit_trace, from_aot)
|
||||
elif self.frontend in ["tensorflow", "tf"]:
|
||||
# get mhlo dialect
|
||||
# self.model = tf.Module
|
||||
# TODO assert
|
||||
self.model = tfc.compile_module(self.model,
|
||||
exported_names=[func_name],
|
||||
import_only=True)
|
||||
elif self.frontend in ["tflite"]:
|
||||
print("Setting up for IREE compiler tflite")
|
||||
# get tosa dialect
|
||||
# self.model = model.tflite
|
||||
# TODO assert
|
||||
self.model = ireec_tflite.compile_file(self.model,
|
||||
input_type="tosa",
|
||||
import_only=True)
|
||||
func_name = "main"
|
||||
|
||||
if check_device_drivers(self.device):
|
||||
device_driver_info(self.device)
|
||||
sys.exit(1)
|
||||
|
||||
# Compile the module to get the .vmfb.
|
||||
# TODO: We can capture the .vmfb module here and later use it for saving
|
||||
# rather than recompiling it again, if used for saving.
|
||||
(
|
||||
self.iree_compilation_module,
|
||||
self.iree_config,
|
||||
) = get_iree_compiled_module(
|
||||
self.mlir_module,
|
||||
self.device,
|
||||
self.mlir_dialect,
|
||||
func_name=self.function_name,
|
||||
)
|
||||
) = get_iree_compiled_module(self.model,
|
||||
self.device,
|
||||
self.frontend,
|
||||
func_name=func_name,
|
||||
model_config_path=model_config_path)
|
||||
|
||||
def run(self, inputs: tuple):
|
||||
return get_results(
|
||||
self.iree_compilation_module,
|
||||
inputs,
|
||||
self.iree_config,
|
||||
self.mlir_dialect,
|
||||
)
|
||||
# Debugging Options:
|
||||
if shark_args.save_mlir:
|
||||
export_module_to_mlir_file(self.model, self.frontend,
|
||||
shark_args.repro_dir)
|
||||
if shark_args.save_vmfb:
|
||||
self.vmfb_file = self.save_module(shark_args.repro_dir)
|
||||
|
||||
# All the timings and benchmarking can be done here.
|
||||
def forward(self, input, frontend):
|
||||
return get_results(self.iree_compilation_module, input,
|
||||
self.iree_config, frontend)
|
||||
|
||||
# TODO: Instead of passing directory and having names decided by the module
|
||||
# , user may want to save the module with manual names.
|
||||
def save_module(self, dir=os.getcwd()):
|
||||
return export_iree_module_to_vmfb(
|
||||
self.model, self.device, dir, self.mlir_dialect
|
||||
return export_iree_module_to_vmfb(self.model, self.device, dir,
|
||||
self.frontend)
|
||||
|
||||
# TODO: Load a module and directly use it, we will need to set the frontend
|
||||
# in this case.
|
||||
def load_module(self, name):
|
||||
pass
|
||||
|
||||
|
||||
class SharkEagerMode:
|
||||
|
||||
def __init__(self, device="cpu"):
|
||||
if device == "refbackend":
|
||||
torch_mlir_tensor.backend = EagerModeRefBackend()
|
||||
else:
|
||||
torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(
|
||||
device)
|
||||
self.guard = enable_torch_dispatch_mode(TorchMLIRTensor)
|
||||
self.guard.__enter__()
|
||||
|
||||
def __del__(self):
|
||||
self.guard.__exit__(None, None, None)
|
||||
|
||||
|
||||
class SharkBenchmarkRunner(SharkRunner):
|
||||
# SharkRunner derived class with Benchmarking capabilities.
|
||||
def __init__(
|
||||
self,
|
||||
model,
|
||||
input: tuple,
|
||||
dynamic: bool = False,
|
||||
device: str = None,
|
||||
jit_trace: bool = False,
|
||||
from_aot: bool = False,
|
||||
frontend: str = "torch",
|
||||
):
|
||||
SharkRunner.__init__(self, model, input, dynamic, device, jit_trace,
|
||||
from_aot, frontend)
|
||||
if (self.vmfb_file == None):
|
||||
self.vmfb_file = export_iree_module_to_vmfb(self.model, device,
|
||||
shark_args.repro_dir,
|
||||
frontend)
|
||||
self.benchmark_cl = build_benchmark_args(self.vmfb_file, device, input,
|
||||
frontend, from_aot)
|
||||
|
||||
def benchmark_frontend(self, inputs):
|
||||
if self.frontend in ["pytorch", "torch"]:
|
||||
self.benchmark_torch(inputs)
|
||||
elif self.frontend in ["tensorflow", "tf"]:
|
||||
self.benchmark_tf(inputs)
|
||||
|
||||
def benchmark_torch(self, inputs):
|
||||
inputs = self.input if self.from_aot else inputs
|
||||
inputs = inputs[0]
|
||||
for i in range(shark_args.num_warmup_iterations):
|
||||
self.frontend_model.forward(inputs)
|
||||
|
||||
begin = time.time()
|
||||
for i in range(shark_args.num_iterations):
|
||||
out = self.frontend_model.forward(inputs)
|
||||
if i == shark_args.num_iterations - 1:
|
||||
end = time.time()
|
||||
break
|
||||
print(
|
||||
f"Torch benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
|
||||
def benchmark_tf(self, inputs):
|
||||
for i in range(shark_args.num_warmup_iterations):
|
||||
self.frontend_model.forward(*inputs)
|
||||
|
||||
begin = time.time()
|
||||
for i in range(shark_args.num_iterations):
|
||||
out = self.frontend_model.forward(*inputs)
|
||||
if i == shark_args.num_iterations - 1:
|
||||
end = time.time()
|
||||
break
|
||||
print(
|
||||
f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
return
|
||||
|
||||
def benchmark_c(self):
|
||||
result = run_benchmark_module(self.benchmark_cl)
|
||||
print(f"Shark-{self.frontend} C-benchmark:{result} iter/second")
|
||||
|
||||
def benchmark_python(self, inputs):
|
||||
inputs = self.input if self.from_aot else inputs
|
||||
input_list = [x for x in inputs]
|
||||
for i in range(shark_args.num_warmup_iterations):
|
||||
self.forward(input_list, self.frontend)
|
||||
|
||||
begin = time.time()
|
||||
for i in range(shark_args.num_iterations):
|
||||
out = self.forward(input_list, self.frontend)
|
||||
if i == shark_args.num_iterations - 1:
|
||||
end = time.time()
|
||||
print(
|
||||
f"Shark-{self.frontend} Python-benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
|
||||
def benchmark_all(self, inputs):
|
||||
self.benchmark_frontend(inputs)
|
||||
self.benchmark_python(inputs)
|
||||
self.benchmark_c()
|
||||
|
||||
@@ -12,11 +12,15 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from shark.torch_mlir_utils import get_torch_mlir_module, run_on_refbackend
|
||||
from shark.iree_utils import get_results, get_iree_compiled_module, export_iree_module_to_vmfb
|
||||
import os
|
||||
from shark.parser import shark_args
|
||||
from shark.shark_runner import SharkRunner
|
||||
from shark.backward_makefx import MakeFxModule
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import time
|
||||
import sys
|
||||
|
||||
|
||||
@@ -54,13 +58,7 @@ class SharkTrainer:
|
||||
# Sets the frontend i.e `pytorch` or `tensorflow`.
|
||||
def set_frontend(self, frontend: str):
|
||||
if frontend not in [
|
||||
"pytorch",
|
||||
"torch",
|
||||
"tensorflow",
|
||||
"tf",
|
||||
"mhlo",
|
||||
"linalg",
|
||||
"tosa",
|
||||
"pytorch", "torch", "tensorflow", "tf", "mhlo", "linalg", "tosa"
|
||||
]:
|
||||
print_err("frontend not supported.")
|
||||
else:
|
||||
@@ -69,32 +67,22 @@ class SharkTrainer:
|
||||
# Training function is needed in the case of torch_fn.
|
||||
def compile(self, training_fn=None):
|
||||
if self.frontend in ["torch", "pytorch"]:
|
||||
aot_module = MakeFxModule(
|
||||
self.model, tuple(self.input), custom_inference_fn=training_fn
|
||||
)
|
||||
aot_module = MakeFxModule(self.model,
|
||||
tuple(self.input),
|
||||
custom_inference_fn=training_fn)
|
||||
aot_module.generate_graph()
|
||||
# Returns the backward graph.
|
||||
training_graph = aot_module.training_graph
|
||||
weights = self.get_torch_params()
|
||||
self.shark_runner = SharkRunner(
|
||||
training_graph,
|
||||
weights + self.input,
|
||||
self.dynamic,
|
||||
self.device,
|
||||
self.jit_trace,
|
||||
self.from_aot,
|
||||
self.frontend,
|
||||
)
|
||||
self.shark_runner = SharkRunner(training_graph,
|
||||
weights + self.input, self.dynamic,
|
||||
self.device, self.jit_trace,
|
||||
self.from_aot, self.frontend)
|
||||
elif self.frontend in ["tensorflow", "tf", "mhlo"]:
|
||||
self.shark_runner = SharkRunner(
|
||||
self.model,
|
||||
self.input,
|
||||
self.dynamic,
|
||||
self.device,
|
||||
self.jit_trace,
|
||||
self.from_aot,
|
||||
self.frontend,
|
||||
)
|
||||
self.shark_runner = SharkRunner(self.model, self.input,
|
||||
self.dynamic, self.device,
|
||||
self.jit_trace, self.from_aot,
|
||||
self.frontend)
|
||||
else:
|
||||
print_err("Unknown frontend")
|
||||
return
|
||||
@@ -112,9 +100,8 @@ class SharkTrainer:
|
||||
params = [x.numpy() for x in params]
|
||||
print(f"Training started for {num_iters} iterations:")
|
||||
for i in tqdm(range(num_iters)):
|
||||
params = self.shark_runner.forward(
|
||||
params + self.input, self.frontend
|
||||
)
|
||||
params = self.shark_runner.forward(params + self.input,
|
||||
self.frontend)
|
||||
|
||||
return params
|
||||
|
||||
@@ -124,15 +111,15 @@ class SharkTrainer:
|
||||
def _train_tf(self, num_iters):
|
||||
input_list = []
|
||||
for x in self.input:
|
||||
if isinstance(x, list):
|
||||
if (isinstance(x, list)):
|
||||
nested_list = []
|
||||
for val in x:
|
||||
if isinstance(val, np.ndarray):
|
||||
if (isinstance(val, np.ndarray)):
|
||||
nested_list.append(val)
|
||||
else:
|
||||
nested_list.append(val.numpy())
|
||||
input_list.append(nested_list)
|
||||
elif isinstance(x, np.ndarray):
|
||||
elif (isinstance(x, np.ndarray)):
|
||||
input_list.append(x)
|
||||
else:
|
||||
input_list.append(x.numpy())
|
||||
|
||||
@@ -2,143 +2,51 @@
|
||||
import numpy as np
|
||||
from shark.shark_importer import SharkImporter
|
||||
import pytest
|
||||
from shark.parser import shark_args
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.tflite_utils import TFLitePreprocessor
|
||||
import sys
|
||||
|
||||
# model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
|
||||
model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
|
||||
|
||||
|
||||
# Inputs modified to be useful albert inputs.
|
||||
def generate_inputs(input_details):
|
||||
for input in input_details:
|
||||
print(str(input["shape"]), input["dtype"].__name__)
|
||||
print("\t%s, %s", str(input["shape"]), input["dtype"].__name__)
|
||||
|
||||
args = []
|
||||
args.append(
|
||||
np.random.randint(
|
||||
low=0,
|
||||
high=256,
|
||||
size=input_details[0]["shape"],
|
||||
dtype=input_details[0]["dtype"],
|
||||
)
|
||||
)
|
||||
np.random.randint(low=0,
|
||||
high=256,
|
||||
size=input_details[0]["shape"],
|
||||
dtype=input_details[0]["dtype"]))
|
||||
args.append(
|
||||
np.ones(
|
||||
shape=input_details[1]["shape"], dtype=input_details[1]["dtype"]
|
||||
)
|
||||
)
|
||||
np.ones(shape=input_details[1]["shape"],
|
||||
dtype=input_details[1]["dtype"]))
|
||||
args.append(
|
||||
np.zeros(
|
||||
shape=input_details[2]["shape"], dtype=input_details[2]["dtype"]
|
||||
)
|
||||
)
|
||||
np.zeros(shape=input_details[2]["shape"],
|
||||
dtype=input_details[2]["dtype"]))
|
||||
return args
|
||||
|
||||
|
||||
def compare_results(mlir_results, tflite_results, details):
|
||||
print("Compare mlir_results VS tflite_results: ")
|
||||
assert len(mlir_results) == len(
|
||||
tflite_results
|
||||
), "Number of results do not match"
|
||||
for i in range(len(details)):
|
||||
mlir_result = mlir_results[i]
|
||||
tflite_result = tflite_results[i]
|
||||
mlir_result = mlir_result.astype(np.single)
|
||||
tflite_result = tflite_result.astype(np.single)
|
||||
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
|
||||
max_error = np.max(np.abs(mlir_result - tflite_result))
|
||||
print("Max error (%d): %f", i, max_error)
|
||||
|
||||
|
||||
class AlbertTfliteModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
|
||||
def create_and_check_module(self):
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_args.save_vmfb = self.save_vmfb
|
||||
tflite_preprocessor = TFLitePreprocessor(model_name="albert_lite_base")
|
||||
|
||||
raw_model_file_path = tflite_preprocessor.get_raw_model_file()
|
||||
inputs = tflite_preprocessor.get_inputs()
|
||||
tflite_interpreter = tflite_preprocessor.get_interpreter()
|
||||
|
||||
my_shark_importer = SharkImporter(
|
||||
module=tflite_interpreter,
|
||||
inputs=inputs,
|
||||
frontend="tflite",
|
||||
raw_model_file=raw_model_file_path,
|
||||
)
|
||||
mlir_model, func_name = my_shark_importer.import_mlir()
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name=func_name,
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
|
||||
# Case1: Use shark_importer default generate inputs
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
## post process results for compare
|
||||
input_details, output_details = tflite_preprocessor.get_model_details()
|
||||
mlir_results = list(mlir_results)
|
||||
for i in range(len(output_details)):
|
||||
dtype = output_details[i]["dtype"]
|
||||
mlir_results[i] = mlir_results[i].astype(dtype)
|
||||
tflite_results = tflite_preprocessor.get_golden_output()
|
||||
compare_results(mlir_results, tflite_results, output_details)
|
||||
|
||||
# Case2: Use manually set inputs
|
||||
input_details, output_details = tflite_preprocessor.get_model_details()
|
||||
inputs = generate_inputs(input_details) # new inputs
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name=func_name,
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
## post process results for compare
|
||||
tflite_results = tflite_preprocessor.get_golden_output()
|
||||
compare_results(mlir_results, tflite_results, output_details)
|
||||
# print(mlir_results)
|
||||
|
||||
|
||||
# A specific case can be run by commenting different cases. Runs all the test
|
||||
# across cpu, gpu and vulkan according to available drivers.
|
||||
pytest_param = pytest.mark.parametrize(
|
||||
("dynamic", "device"),
|
||||
('dynamic', 'device'),
|
||||
[
|
||||
pytest.param(False, "cpu"),
|
||||
pytest.param(False, 'cpu'),
|
||||
# TODO: Language models are failing for dynamic case..
|
||||
pytest.param(True, "cpu", marks=pytest.mark.skip),
|
||||
],
|
||||
)
|
||||
pytest.param(True, 'cpu', marks=pytest.mark.skip),
|
||||
])
|
||||
|
||||
|
||||
@pytest_param
|
||||
@pytest.mark.xfail(
|
||||
sys.platform == "darwin", reason="known macos tflite install issue"
|
||||
)
|
||||
def test_albert(dynamic, device):
|
||||
module_tester = AlbertTfliteModuleTester(dynamic=dynamic, device=device)
|
||||
module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_albert(False, "cpu")
|
||||
my_shark_importer = SharkImporter(model_path=model_path,
|
||||
model_type="tflite",
|
||||
model_source_hub="tfhub",
|
||||
device=device,
|
||||
dynamic=dynamic,
|
||||
jit_trace=True)
|
||||
input_details, output_details = my_shark_importer.get_model_details()
|
||||
inputs = generate_inputs(input_details) # device_inputs
|
||||
my_shark_importer.compile(inputs)
|
||||
shark_results = my_shark_importer.forward(inputs)
|
||||
# print(shark_results)
|
||||
|
||||
@@ -1,208 +0,0 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import os
|
||||
import csv
|
||||
import urllib.request
|
||||
|
||||
|
||||
class TFLiteModelUtil:
|
||||
def __init__(self, raw_model_file):
|
||||
self.raw_model_file = str(raw_model_file)
|
||||
self.tflite_interpreter = None
|
||||
self.input_details = None
|
||||
self.output_details = None
|
||||
self.inputs = []
|
||||
|
||||
def setup_tflite_interpreter(self):
|
||||
self.tflite_interpreter = tf.lite.Interpreter(
|
||||
model_path=self.raw_model_file
|
||||
)
|
||||
self.tflite_interpreter.allocate_tensors()
|
||||
# default input initialization
|
||||
return self.get_model_details()
|
||||
|
||||
def get_model_details(self):
|
||||
print("Get tflite input output details")
|
||||
self.input_details = self.tflite_interpreter.get_input_details()
|
||||
self.output_details = self.tflite_interpreter.get_output_details()
|
||||
return self.input_details, self.output_details
|
||||
|
||||
def invoke_tflite(self, inputs):
|
||||
self.inputs = inputs
|
||||
print("invoke_tflite")
|
||||
for i, input in enumerate(self.inputs):
|
||||
self.tflite_interpreter.set_tensor(
|
||||
self.input_details[i]["index"], input
|
||||
)
|
||||
self.tflite_interpreter.invoke()
|
||||
|
||||
# post process tflite_result for compare with mlir_result,
|
||||
# for tflite the output is a list of numpy.tensor
|
||||
tflite_results = []
|
||||
for output_detail in self.output_details:
|
||||
tflite_results.append(
|
||||
np.array(
|
||||
self.tflite_interpreter.get_tensor(output_detail["index"])
|
||||
)
|
||||
)
|
||||
|
||||
for i in range(len(self.output_details)):
|
||||
# print("output_details ", i, "shape", self.output_details[i]["shape"].__name__,
|
||||
# ", dtype: ", self.output_details[i]["dtype"].__name__)
|
||||
out_dtype = self.output_details[i]["dtype"]
|
||||
tflite_results[i] = tflite_results[i].astype(out_dtype)
|
||||
return tflite_results
|
||||
|
||||
|
||||
class TFLitePreprocessor:
|
||||
def __init__(
|
||||
self,
|
||||
model_name,
|
||||
input_details=None,
|
||||
output_details=None,
|
||||
model_path=None,
|
||||
):
|
||||
self.model_name = model_name
|
||||
self.input_details = (
|
||||
input_details # used for tflite, optional for tf/pytorch
|
||||
)
|
||||
self.output_details = (
|
||||
output_details # used for tflite, optional for tf/pytorch
|
||||
)
|
||||
self.inputs = []
|
||||
self.model_path = model_path # url to download the model
|
||||
self.raw_model_file = (
|
||||
None # local address for raw tf/tflite/pytorch model
|
||||
)
|
||||
self.mlir_file = (
|
||||
None # local address for .mlir file of tf/tflite/pytorch model
|
||||
)
|
||||
self.mlir_model = None # read of .mlir file
|
||||
self.output_tensor = (
|
||||
None # the raw tf/pytorch/tflite_output_tensor, not mlir_tensor
|
||||
)
|
||||
self.interpreter = (
|
||||
None # could be tflite/tf/torch_interpreter in utils
|
||||
)
|
||||
self.input_file = None
|
||||
self.output_file = None
|
||||
|
||||
# create tmp model file directory
|
||||
if self.model_path is None and self.model_name is None:
|
||||
print(
|
||||
"Error. No model_path, No model name,Please input either one."
|
||||
)
|
||||
return
|
||||
|
||||
print("Setting up for TMP_WORK_DIR")
|
||||
self.workdir = os.path.join(
|
||||
os.path.dirname(__file__), "./../gen_shark_tank"
|
||||
)
|
||||
os.makedirs(self.workdir, exist_ok=True)
|
||||
print(f"TMP_WORK_DIR = {self.workdir}")
|
||||
|
||||
# compile and run tfhub tflite
|
||||
load_model_success = self.load_tflite_model()
|
||||
if not load_model_success:
|
||||
print("Error, load tflite model fail")
|
||||
return
|
||||
|
||||
if (self.input_details is None) or (self.output_details is None):
|
||||
# print("Setting up tflite interpreter to get model input details")
|
||||
self.setup_interpreter()
|
||||
|
||||
inputs = self.generate_inputs(self.input_details) # device_inputs
|
||||
self.setup_inputs(inputs)
|
||||
|
||||
def load_tflite_model(self):
|
||||
# use model name get dir.
|
||||
tflite_model_name_dir = os.path.join(
|
||||
self.workdir, str(self.model_name)
|
||||
)
|
||||
|
||||
os.makedirs(tflite_model_name_dir, exist_ok=True)
|
||||
print(f"TMP_TFLITE_MODELNAME_DIR = {tflite_model_name_dir}")
|
||||
|
||||
self.raw_model_file = "/".join(
|
||||
[tflite_model_name_dir, str(self.model_name) + "_tflite.tflite"]
|
||||
)
|
||||
self.mlir_file = "/".join(
|
||||
[tflite_model_name_dir, str(self.model_name) + "_tflite.mlir"]
|
||||
)
|
||||
self.input_file = "/".join([tflite_model_name_dir, "inputs"])
|
||||
self.output_file = "/".join([tflite_model_name_dir, "golden_out"])
|
||||
# np.save("/".join([tflite_model_name_dir, "function_name"]), np.array("main"))
|
||||
|
||||
if os.path.exists(self.raw_model_file):
|
||||
print(
|
||||
"Local address for .tflite model file Exists: ",
|
||||
self.raw_model_file,
|
||||
)
|
||||
else:
|
||||
print("No local tflite file, Download tflite model")
|
||||
if self.model_path is None:
|
||||
# get model file from tflite_model_list.csv or download from gs://bucket
|
||||
print("No model_path, get from tflite_model_list.csv")
|
||||
tflite_model_list_path = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"../tank/tflite/tflite_model_list.csv",
|
||||
)
|
||||
tflite_model_list = csv.reader(open(tflite_model_list_path))
|
||||
for row in tflite_model_list:
|
||||
if str(row[0]) == str(self.model_name):
|
||||
self.model_path = row[1]
|
||||
print("tflite_model_name", str(row[0]))
|
||||
print("tflite_model_link", self.model_path)
|
||||
if self.model_path is None:
|
||||
print("Error, No model path find in tflite_model_list.csv")
|
||||
return False
|
||||
urllib.request.urlretrieve(self.model_path, self.raw_model_file)
|
||||
return True
|
||||
|
||||
def setup_interpreter(self):
|
||||
self.interpreter = TFLiteModelUtil(self.raw_model_file)
|
||||
(
|
||||
self.input_details,
|
||||
self.output_details,
|
||||
) = self.interpreter.setup_tflite_interpreter()
|
||||
|
||||
def generate_inputs(self, input_details):
|
||||
self.inputs = []
|
||||
for tmp_input in input_details:
|
||||
print(
|
||||
"input_details shape:",
|
||||
str(tmp_input["shape"]),
|
||||
" type:",
|
||||
tmp_input["dtype"].__name__,
|
||||
)
|
||||
self.inputs.append(
|
||||
np.ones(shape=tmp_input["shape"], dtype=tmp_input["dtype"])
|
||||
)
|
||||
return self.inputs
|
||||
|
||||
def setup_inputs(self, inputs):
|
||||
# print("Setting up inputs")
|
||||
self.inputs = inputs
|
||||
|
||||
def get_mlir_model(self):
|
||||
return self.mlir_model
|
||||
|
||||
def get_mlir_file(self):
|
||||
return self.mlir_file
|
||||
|
||||
def get_inputs(self):
|
||||
return self.inputs
|
||||
|
||||
def get_golden_output(self):
|
||||
self.output_tensor = self.interpreter.invoke_tflite(self.inputs)
|
||||
np.savez(self.output_file, *self.output_tensor)
|
||||
return self.output_tensor
|
||||
|
||||
def get_model_details(self):
|
||||
return self.input_details, self.output_details
|
||||
|
||||
def get_raw_model_file(self):
|
||||
return self.raw_model_file
|
||||
|
||||
def get_interpreter(self):
|
||||
return self.interpreter
|
||||
@@ -12,10 +12,27 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from torch_mlir.ir import StringAttr
|
||||
import torch_mlir
|
||||
import torch
|
||||
import io
|
||||
import pickle
|
||||
import sys
|
||||
import os
|
||||
|
||||
from io import StringIO
|
||||
from torch_mlir.dialects.torch.importer.jit_ir import (
|
||||
ClassAnnotator,
|
||||
ModuleBuilder,
|
||||
)
|
||||
from torch_mlir_e2e_test.torchscript.serialization import (
|
||||
extract_serializable_annotations, apply_serializable_annotations,
|
||||
SerializableTest)
|
||||
|
||||
from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend
|
||||
|
||||
from torch_mlir.passmanager import PassManager
|
||||
from torch_mlir_e2e_test.torchscript.annotations import annotate_args, export
|
||||
from torch_mlir.ir import StringAttr
|
||||
|
||||
|
||||
def get_module_name_for_asm_dump(module):
|
||||
"""Gets a name suitable for an assembly dump.
|
||||
@@ -24,8 +41,23 @@ def get_module_name_for_asm_dump(module):
|
||||
if not "torch.debug_module_name" in module.operation.attributes:
|
||||
return "UnnammedModule"
|
||||
return StringAttr(
|
||||
module.operation.attributes["torch.debug_module_name"]
|
||||
).value
|
||||
module.operation.attributes["torch.debug_module_name"]).value
|
||||
|
||||
|
||||
def get_input_annotations(inputs: tuple, dynamic: bool) -> list:
|
||||
"""TODO: Include necessary documentation"""
|
||||
|
||||
annotations_list = [None]
|
||||
for i in inputs:
|
||||
temp_list = []
|
||||
if dynamic:
|
||||
temp_list.append([-1 for i in range(len(i.shape))])
|
||||
else:
|
||||
temp_list.append(list(i.shape))
|
||||
temp_list.append(i.dtype)
|
||||
temp_list.append(True)
|
||||
annotations_list.append(tuple(temp_list))
|
||||
return annotations_list
|
||||
|
||||
|
||||
def run_on_refbackend(torch_module, inputs):
|
||||
@@ -36,37 +68,66 @@ def run_on_refbackend(torch_module, inputs):
|
||||
return jit_module.forward(np_inputs[0])
|
||||
|
||||
|
||||
# Creates dynamic dims for all dims.
|
||||
# TODO: Pass user specified dynamic dims.
|
||||
def create_dynamic_placeholders(inputs):
|
||||
placeholders = []
|
||||
for inp in inputs:
|
||||
placeholder = torch_mlir.TensorPlaceholder.like(
|
||||
inp, dynamic_axes=[i for i in range(len(inp.shape))]
|
||||
)
|
||||
placeholders.append(placeholder)
|
||||
return tuple(placeholders)
|
||||
def shark_jit_trace(module, input: tuple, dynamic: bool,
|
||||
tracing_required: bool):
|
||||
"""TODO: Include necessary documentation."""
|
||||
|
||||
if not tracing_required:
|
||||
return torch.jit.script(module)
|
||||
|
||||
traced_module = torch.jit.trace_module(module, {"forward": input})
|
||||
actual_script = traced_module._actual_script_module
|
||||
export(actual_script.forward)
|
||||
annotate_args_decorator = annotate_args(
|
||||
get_input_annotations(input, dynamic))
|
||||
annotate_args_decorator(actual_script.forward)
|
||||
module = torch.jit.script(actual_script)
|
||||
|
||||
# TODO: remove saved annotations.pickle
|
||||
torchscript_module_bytes = module.save_to_buffer({
|
||||
"annotations.pkl":
|
||||
pickle.dumps(extract_serializable_annotations(module))
|
||||
})
|
||||
serializable_test = SerializableTest(unique_name="",
|
||||
program=torchscript_module_bytes,
|
||||
trace=None)
|
||||
_extra_files = {"annotations.pkl": ""}
|
||||
module = torch.jit.load(io.BytesIO(serializable_test.program),
|
||||
_extra_files=_extra_files)
|
||||
# Load the pickled annotations.
|
||||
annotations = pickle.loads(_extra_files["annotations.pkl"])
|
||||
apply_serializable_annotations(module, annotations)
|
||||
return module
|
||||
|
||||
|
||||
def get_torch_mlir_module(
|
||||
module,
|
||||
input: tuple,
|
||||
dynamic: bool,
|
||||
jit_trace: bool,
|
||||
from_torchscript: bool = False,
|
||||
tracing_required: bool,
|
||||
from_aot: bool = False,
|
||||
):
|
||||
"""Get the MLIR's linalg-on-tensors module from torchscipt module."""
|
||||
ignore_traced_shapes = False
|
||||
if dynamic:
|
||||
input = create_dynamic_placeholders(input)
|
||||
if jit_trace:
|
||||
ignore_traced_shapes = True
|
||||
"""TODO: Include necessary documentation."""
|
||||
|
||||
module = torch_mlir.compile(
|
||||
module,
|
||||
input,
|
||||
output_type=torch_mlir.OutputType.LINALG_ON_TENSORS,
|
||||
use_tracing=jit_trace,
|
||||
ignore_traced_shapes=ignore_traced_shapes,
|
||||
# Tracing is not required from the aot_module.
|
||||
if not from_aot:
|
||||
module = shark_jit_trace(module, input, dynamic, tracing_required)
|
||||
|
||||
mb = ModuleBuilder()
|
||||
class_annotator = ClassAnnotator()
|
||||
class_annotator.exportNone(module._c._type())
|
||||
class_annotator.exportPath(module._c._type(), ["forward"])
|
||||
class_annotator.annotateArgs(
|
||||
module._c._type(),
|
||||
["forward"],
|
||||
get_input_annotations(input, dynamic),
|
||||
)
|
||||
return module
|
||||
mb.import_module(module._c, class_annotator)
|
||||
|
||||
with mb.module.context:
|
||||
pm = PassManager.parse(
|
||||
"torchscript-module-to-torch-backend-pipeline,torch-backend-to-linalg-on-tensors-backend-pipeline"
|
||||
)
|
||||
pm.run(mb.module)
|
||||
|
||||
return mb.module
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
from shark.parser import shark_args
|
||||
|
||||
import iree.compiler as ireec
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class MiniLMModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
onnx_bench=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
self.onnx_bench = onnx_bench
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"microsoft/MiniLM-L12-H384-uncased"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="mhlo",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
if self.benchmark == True:
|
||||
shark_args.enable_tf32 = True
|
||||
shark_module.compile()
|
||||
shark_args.onnx_bench = self.onnx_bench
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(inputs),
|
||||
"microsoft/MiniLM-L12-H384-uncased",
|
||||
dynamic,
|
||||
device,
|
||||
"tensorflow",
|
||||
)
|
||||
shark_args.enable_tf32 = False
|
||||
rtol = 1e-01
|
||||
atol = 1e-02
|
||||
|
||||
else:
|
||||
shark_module.compile()
|
||||
rtol = 1e-02
|
||||
atol = 1e-03
|
||||
|
||||
# TODO: Remove catch once new MiniLM stable
|
||||
try:
|
||||
result = shark_module.forward(inputs)[0][1].to_host()
|
||||
|
||||
except:
|
||||
result = shark_module.forward(inputs)
|
||||
|
||||
np.testing.assert_allclose(golden_out, result, rtol=rtol, atol=atol)
|
||||
|
||||
|
||||
class MiniLMModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = MiniLMModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,114 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import compare_tensors
|
||||
from shark.shark_downloader import download_torch_model
|
||||
from shark.parser import shark_args
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class MiniLMModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
onnx_bench=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
self.onnx_bench = onnx_bench
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"microsoft/MiniLM-L12-H384-uncased", dynamic
|
||||
)
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
if self.benchmark == True:
|
||||
shark_args.enable_tf32 = True
|
||||
shark_module.compile()
|
||||
shark_args.onnx_bench = self.onnx_bench
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"microsoft/MiniLM-L12-H384-uncased",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
shark_args.enable_tf32 = False
|
||||
rtol = 1e-01
|
||||
atol = 1e-02
|
||||
else:
|
||||
shark_module.compile()
|
||||
rtol = 1e-02
|
||||
atol = 1e-03
|
||||
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results, rtol, atol)
|
||||
|
||||
|
||||
class MiniLMModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = MiniLMModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,13 +0,0 @@
|
||||
To run the fine tuning example, from the root SHARK directory, run:
|
||||
|
||||
```shell
|
||||
IMPORTER=1 ./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
pip install jupyter tf-models-nightly tf-datasets
|
||||
jupyter-notebook
|
||||
```
|
||||
if running from a google vm, you can view jupyter notebooks on your local system with:
|
||||
```shell
|
||||
gcloud compute ssh <YOUR_INSTANCE_DETAILS> --ssh-flag="-N -L localhost:8888:localhost:8888"
|
||||
```
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
|
||||
import iree.compiler as ireec
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class AlbertBaseModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"albert-base-v2"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model, func_name, device=device, mlir_dialect="mhlo"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
|
||||
|
||||
class AlbertBaseModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = AlbertBaseModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,113 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import compare_tensors
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class AlbertModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"albert-base-v2", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"albert-base-v2",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class AlbertModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = AlbertModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,177 +0,0 @@
|
||||
# import numpy as np
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# from shark.shark_inference import SharkInference
|
||||
# import pytest
|
||||
# import unittest
|
||||
# from shark.parser import shark_args
|
||||
# from shark.tflite_utils import TFLitePreprocessor
|
||||
#
|
||||
#
|
||||
# # model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
|
||||
# # model_path = model_path
|
||||
#
|
||||
# # Inputs modified to be useful albert inputs.
|
||||
# def generate_inputs(input_details):
|
||||
# for input in input_details:
|
||||
# print(str(input["shape"]), input["dtype"].__name__)
|
||||
# # [ 1 384] int32
|
||||
# # [ 1 384] int32
|
||||
# # [ 1 384] int32
|
||||
#
|
||||
# args = []
|
||||
# args.append(
|
||||
# np.random.randint(
|
||||
# low=0,
|
||||
# high=256,
|
||||
# size=input_details[0]["shape"],
|
||||
# dtype=input_details[0]["dtype"],
|
||||
# )
|
||||
# )
|
||||
# args.append(
|
||||
# np.ones(
|
||||
# shape=input_details[1]["shape"], dtype=input_details[1]["dtype"]
|
||||
# )
|
||||
# )
|
||||
# args.append(
|
||||
# np.zeros(
|
||||
# shape=input_details[2]["shape"], dtype=input_details[2]["dtype"]
|
||||
# )
|
||||
# )
|
||||
# return args
|
||||
#
|
||||
#
|
||||
# def compare_results(mlir_results, tflite_results):
|
||||
# print("Compare mlir_results VS tflite_results: ")
|
||||
# assert len(mlir_results) == len(
|
||||
# tflite_results
|
||||
# ), "Number of results do not match"
|
||||
# rtol = 1e-02
|
||||
# atol = 1e-03
|
||||
# print(
|
||||
# "numpy.allclose: ",
|
||||
# np.allclose(mlir_results, tflite_results, rtol, atol),
|
||||
# )
|
||||
# for i in range(len(mlir_results)):
|
||||
# mlir_result = mlir_results[i]
|
||||
# tflite_result = tflite_results[i]
|
||||
# mlir_result = mlir_result.astype(np.single)
|
||||
# tflite_result = tflite_result.astype(np.single)
|
||||
# assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
|
||||
# max_error = np.max(np.abs(mlir_result - tflite_result))
|
||||
# print("Max error (%d): %f", i, max_error)
|
||||
#
|
||||
#
|
||||
# class AlbertTfliteModuleTester:
|
||||
# def __init__(
|
||||
# self,
|
||||
# dynamic=False,
|
||||
# device="cpu",
|
||||
# save_mlir=False,
|
||||
# save_vmfb=False,
|
||||
# ):
|
||||
# self.dynamic = dynamic
|
||||
# self.device = device
|
||||
# self.save_mlir = save_mlir
|
||||
# self.save_vmfb = save_vmfb
|
||||
#
|
||||
# def create_and_check_module(self):
|
||||
# shark_args.save_mlir = self.save_mlir
|
||||
# shark_args.save_vmfb = self.save_vmfb
|
||||
#
|
||||
# # Preprocess to get SharkImporter input args
|
||||
# tflite_preprocessor = TFLitePreprocessor(model_name="albert_lite_base")
|
||||
# raw_model_file_path = tflite_preprocessor.get_raw_model_file()
|
||||
# inputs = tflite_preprocessor.get_inputs()
|
||||
# tflite_interpreter = tflite_preprocessor.get_interpreter()
|
||||
#
|
||||
# # Use SharkImporter to get SharkInference input args
|
||||
# my_shark_importer = SharkImporter(
|
||||
# module=tflite_interpreter,
|
||||
# inputs=inputs,
|
||||
# frontend="tflite",
|
||||
# raw_model_file=raw_model_file_path,
|
||||
# )
|
||||
# mlir_model, func_name = my_shark_importer.import_mlir()
|
||||
#
|
||||
# # Use SharkInference to get inference result
|
||||
# shark_module = SharkInference(
|
||||
# mlir_module=mlir_model,
|
||||
# function_name=func_name,
|
||||
# device=self.device,
|
||||
# mlir_dialect="tflite",
|
||||
# )
|
||||
#
|
||||
# # Case1: Use shark_importer default generate inputs
|
||||
# shark_module.compile()
|
||||
# mlir_results = shark_module.forward(inputs)
|
||||
# ## post process results for compare
|
||||
# # input_details, output_details = tflite_preprocessor.get_model_details()
|
||||
# # mlir_results = list(mlir_results)
|
||||
# # for i in range(len(output_details)):
|
||||
# # dtype = output_details[i]["dtype"]
|
||||
# # mlir_results[i] = mlir_results[i].astype(dtype)
|
||||
# tflite_results = tflite_preprocessor.get_golden_output()
|
||||
# compare_results(mlir_results, tflite_results)
|
||||
# # import pdb
|
||||
# # pdb.set_trace()
|
||||
#
|
||||
# # Case2: Use manually set inputs
|
||||
# # input_details, output_details = tflite_preprocessor.get_model_details()
|
||||
# input_details = [
|
||||
# {
|
||||
# "shape": [1, 384],
|
||||
# "dtype": np.int32,
|
||||
# },
|
||||
# {
|
||||
# "shape": [1, 384],
|
||||
# "dtype": np.int32,
|
||||
# },
|
||||
# {
|
||||
# "shape": [1, 384],
|
||||
# "dtype": np.int32,
|
||||
# },
|
||||
# ]
|
||||
# inputs = generate_inputs(input_details) # new inputs
|
||||
#
|
||||
# shark_module = SharkInference(
|
||||
# mlir_module=mlir_model,
|
||||
# function_name=func_name,
|
||||
# device=self.device,
|
||||
# mlir_dialect="tflite",
|
||||
# )
|
||||
# shark_module.compile()
|
||||
# mlir_results = shark_module.forward(inputs)
|
||||
# ## post process results for compare
|
||||
# tflite_results = tflite_preprocessor.get_golden_output()
|
||||
# compare_results(mlir_results, tflite_results)
|
||||
# # print(mlir_results)
|
||||
#
|
||||
#
|
||||
# class AlbertTfliteModuleTest(unittest.TestCase):
|
||||
# @pytest.fixture(autouse=True)
|
||||
# def configure(self, pytestconfig):
|
||||
# self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
# self.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
#
|
||||
# def setUp(self):
|
||||
# self.module_tester = AlbertTfliteModuleTester(self)
|
||||
# self.module_tester.save_mlir = self.save_mlir
|
||||
#
|
||||
# import sys
|
||||
#
|
||||
# @pytest.mark.xfail(
|
||||
# sys.platform == "darwin", reason="known macos tflite install issue"
|
||||
# )
|
||||
# def test_module_static_cpu(self):
|
||||
# self.module_tester.dynamic = False
|
||||
# self.module_tester.device = "cpu"
|
||||
# self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# module_tester = AlbertTfliteModuleTester()
|
||||
# module_tester.save_mlir = True
|
||||
# module_tester.save_vmfb = True
|
||||
# module_tester.create_and_check_module()
|
||||
|
||||
# unittest.main()
|
||||
@@ -1,118 +0,0 @@
|
||||
import numpy as np
|
||||
from shark.shark_downloader import download_tflite_model
|
||||
from shark.shark_inference import SharkInference
|
||||
import pytest
|
||||
import unittest
|
||||
from shark.parser import shark_args
|
||||
|
||||
|
||||
# model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
|
||||
# model_path = model_path
|
||||
|
||||
# Inputs modified to be useful albert inputs.
|
||||
def generate_inputs(input_details):
|
||||
for input in input_details:
|
||||
print(str(input["shape"]), input["dtype"].__name__)
|
||||
# [ 1 384] int32
|
||||
# [ 1 384] int32
|
||||
# [ 1 384] int32
|
||||
|
||||
args = []
|
||||
args.append(
|
||||
np.random.randint(
|
||||
low=0,
|
||||
high=256,
|
||||
size=input_details[0]["shape"],
|
||||
dtype=input_details[0]["dtype"],
|
||||
)
|
||||
)
|
||||
args.append(
|
||||
np.ones(
|
||||
shape=input_details[1]["shape"], dtype=input_details[1]["dtype"]
|
||||
)
|
||||
)
|
||||
args.append(
|
||||
np.zeros(
|
||||
shape=input_details[2]["shape"], dtype=input_details[2]["dtype"]
|
||||
)
|
||||
)
|
||||
return args
|
||||
|
||||
|
||||
def compare_results(mlir_results, tflite_results):
|
||||
print("Compare mlir_results VS tflite_results: ")
|
||||
assert len(mlir_results) == len(
|
||||
tflite_results
|
||||
), "Number of results do not match"
|
||||
rtol = 1e-02
|
||||
atol = 1e-03
|
||||
print(
|
||||
"numpy.allclose: ",
|
||||
np.allclose(mlir_results, tflite_results, rtol, atol),
|
||||
)
|
||||
for i in range(len(mlir_results)):
|
||||
mlir_result = mlir_results[i]
|
||||
tflite_result = tflite_results[i]
|
||||
mlir_result = mlir_result.astype(np.single)
|
||||
tflite_result = tflite_result.astype(np.single)
|
||||
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
|
||||
max_error = np.max(np.abs(mlir_result - tflite_result))
|
||||
print("Max error (%d): %f", i, max_error)
|
||||
|
||||
|
||||
class AlbertTfliteModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
|
||||
def create_and_check_module(self):
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_args.save_vmfb = self.save_vmfb
|
||||
|
||||
(
|
||||
mlir_model,
|
||||
function_name,
|
||||
inputs,
|
||||
tflite_results,
|
||||
) = download_tflite_model(model_name="albert_lite_base")
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name="main",
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
# print(shark_results)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
|
||||
|
||||
class AlbertTfliteModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = AlbertTfliteModuleTester(self)
|
||||
self.module_tester.save_mlir = self.save_mlir
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
# module_tester = AlbertTfliteModuleTester()
|
||||
# module_tester.create_and_check_module()
|
||||
@@ -1,115 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import compare_tensors
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class AlexnetModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"alexnet", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"alexnet",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class AlexnetModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = AlexnetModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
@pytest.mark.xfail(
|
||||
reason="Issue known, WIP",
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,97 +0,0 @@
|
||||
import numpy as np
|
||||
from shark.shark_downloader import download_tflite_model
|
||||
from shark.shark_inference import SharkInference
|
||||
import pytest
|
||||
import unittest
|
||||
from shark.parser import shark_args
|
||||
|
||||
|
||||
# model_path = "https://tfhub.dev/google/lite-model/magenta/arbitrary-image-stylization-v1-256/int8/prediction/1?lite-format=tflite"
|
||||
|
||||
|
||||
def compare_results(mlir_results, tflite_results):
|
||||
print("Compare mlir_results VS tflite_results: ")
|
||||
assert len(mlir_results) == len(
|
||||
tflite_results
|
||||
), "Number of results do not match"
|
||||
for i in range(len(mlir_results)):
|
||||
mlir_result = mlir_results[i]
|
||||
tflite_result = tflite_results[i]
|
||||
mlir_result = mlir_result.astype(np.single)
|
||||
tflite_result = tflite_result.astype(np.single)
|
||||
mlir_result = np.expand_dims(mlir_result, axis=0)
|
||||
print("mlir_result.shape", mlir_result.shape)
|
||||
print("tflite_result.shape", tflite_result.shape)
|
||||
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
|
||||
max_error = np.max(np.abs(mlir_result - tflite_result))
|
||||
print("Max error (%d): %f", i, max_error)
|
||||
|
||||
|
||||
class ArbitraryImageStylizationV1TfliteModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
|
||||
def create_and_check_module(self):
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_args.save_vmfb = self.save_vmfb
|
||||
|
||||
(
|
||||
mlir_model,
|
||||
function_name,
|
||||
inputs,
|
||||
tflite_results,
|
||||
) = download_tflite_model(
|
||||
model_name="arbitrary-image-stylization-v1-256"
|
||||
)
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name="main",
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
# Case1: Use shark_importer default generate inputs
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
# print(shark_results)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
|
||||
|
||||
class ArbitraryImageStylizationV1TfliteModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = ArbitraryImageStylizationV1TfliteModuleTester(
|
||||
self
|
||||
)
|
||||
self.module_tester.save_mlir = self.save_mlir
|
||||
|
||||
import sys
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="'tosa.conv2d' op attribute 'quantization_info' failed ",
|
||||
)
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# module_tester = ArbitraryImageStylizationV1TfliteModuleTester()
|
||||
# module_tester.save_mlir = True
|
||||
# module_tester.save_vmfb = True
|
||||
# module_tester.create_and_check_module()
|
||||
|
||||
unittest.main()
|
||||
@@ -1,117 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import compare_tensors
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import torch
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class BertBaseUncasedModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
benchmark=False,
|
||||
):
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"bert-base-cased", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"bert-base-cased",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class BertBaseUncasedModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = BertBaseUncasedModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,71 +0,0 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
from shark.parser import shark_args
|
||||
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class BertBaseUncasedModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
onnx_bench=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
self.onnx_bench = onnx_bench
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"bert-base-uncased"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model, func_name, device=device, mlir_dialect="mhlo"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
|
||||
|
||||
class BertBaseUncasedModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = BertBaseUncasedModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,108 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import compare_tensors
|
||||
from shark.shark_downloader import download_torch_model
|
||||
from shark.parser import shark_args
|
||||
|
||||
import torch
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class BertBaseUncasedModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
onnx_bench=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
self.onnx_bench = onnx_bench
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"bert-base-uncased", dynamic
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_args.onnx_bench = self.onnx_bench
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"bert-base-uncased",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class BertBaseUncasedModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = BertBaseUncasedModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,182 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from iree import runtime as ireert
|
||||
from iree.tf.support import module_utils
|
||||
from iree.compiler import tf as tfc
|
||||
from iree.compiler import compile_str
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
try:
|
||||
import tensorflow_datasets as tfds
|
||||
import tensorflow_models as tfm
|
||||
from official.nlp.modeling import layers
|
||||
from official.nlp.modeling import networks
|
||||
from official.nlp.modeling.models import bert_classifier
|
||||
except ModuleNotFoundError:
|
||||
print(
|
||||
"tensorflow models or datasets not found please run the following command with your virtual env active:\npip install tf-models-nightly tf-datasets"
|
||||
)
|
||||
import json
|
||||
import time
|
||||
import os
|
||||
|
||||
gs_folder_bert = "gs://cloud-tpu-checkpoints/bert/v3/uncased_L-12_H-768_A-12"
|
||||
tf.io.gfile.listdir(gs_folder_bert)
|
||||
vocab_size = 100
|
||||
NUM_CLASSES = 2
|
||||
SEQUENCE_LENGTH = 128
|
||||
BATCH_SIZE = 1
|
||||
# Create a set of 2-dimensional inputs
|
||||
bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
]
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
dict_outputs = False
|
||||
|
||||
bert_config_file = os.path.join(gs_folder_bert, "bert_config.json")
|
||||
|
||||
config_dict = json.loads(tf.io.gfile.GFile(bert_config_file).read())
|
||||
encoder_config = tfm.nlp.encoders.EncoderConfig(
|
||||
{"type": "bert", "bert": config_dict}
|
||||
)
|
||||
bert_encoder = tfm.nlp.encoders.build_encoder(encoder_config)
|
||||
|
||||
# Create a BERT trainer with the created network.
|
||||
bert_trainer_model = bert_classifier.BertClassifier(
|
||||
bert_encoder, num_classes=NUM_CLASSES
|
||||
)
|
||||
bert_trainer_model.summary()
|
||||
checkpoint = tf.train.Checkpoint(encoder=bert_encoder)
|
||||
checkpoint.read(
|
||||
os.path.join(gs_folder_bert, "bert_model.ckpt")
|
||||
).assert_consumed()
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m = bert_trainer_model
|
||||
self.m.predict = lambda x: self.m.call(x, training=False)
|
||||
self.predict = tf.function(input_signature=[bert_input])(
|
||||
self.m.predict
|
||||
)
|
||||
self.m.learn = lambda x, y: self.m.call(x, training=False)
|
||||
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
|
||||
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
|
||||
|
||||
@tf.function(
|
||||
input_signature=[
|
||||
bert_input, # inputs
|
||||
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32), # labels
|
||||
]
|
||||
)
|
||||
def learn(self, inputs, labels):
|
||||
with tf.GradientTape() as tape:
|
||||
# Capture the gradients from forward prop...
|
||||
probs = self.m.call(inputs, training=True)
|
||||
loss = self.loss(labels, probs)
|
||||
|
||||
# ...and use them to update the model's weights.
|
||||
variables = self.m.trainable_variables
|
||||
gradients = tape.gradient(loss, variables)
|
||||
self.optimizer.apply_gradients(zip(gradients, variables))
|
||||
return loss
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
glue, info = tfds.load("glue/mrpc", with_info=True, batch_size=BATCH_SIZE)
|
||||
|
||||
tokenizer = tfm.nlp.layers.FastWordpieceBertTokenizer(
|
||||
vocab_file=os.path.join(gs_folder_bert, "vocab.txt"), lower_case=True
|
||||
)
|
||||
|
||||
max_seq_length = SEQUENCE_LENGTH
|
||||
|
||||
packer = tfm.nlp.layers.BertPackInputs(
|
||||
seq_length=max_seq_length,
|
||||
special_tokens_dict=tokenizer.get_special_tokens_dict(),
|
||||
)
|
||||
|
||||
class BertInputProcessor(tf.keras.layers.Layer):
|
||||
def __init__(self, tokenizer, packer):
|
||||
super().__init__()
|
||||
self.tokenizer = tokenizer
|
||||
self.packer = packer
|
||||
|
||||
def call(self, inputs):
|
||||
tok1 = self.tokenizer(inputs["sentence1"])
|
||||
tok2 = self.tokenizer(inputs["sentence2"])
|
||||
|
||||
packed = self.packer([tok1, tok2])
|
||||
|
||||
if "label" in inputs:
|
||||
return packed, inputs["label"]
|
||||
else:
|
||||
return packed
|
||||
|
||||
bert_inputs_processor = BertInputProcessor(tokenizer, packer)
|
||||
glue_train = glue["train"].map(bert_inputs_processor).prefetch(1)
|
||||
glue_validation = glue["validation"].map(bert_inputs_processor).prefetch(1)
|
||||
glue_test = glue["test"].map(bert_inputs_processor).prefetch(1)
|
||||
|
||||
# base tensorflow model
|
||||
bert_model = BertModule()
|
||||
|
||||
# Compile the model using IREE
|
||||
compiler_module = tfc.compile_module(
|
||||
bert_model, exported_names=["learn"], import_only=True
|
||||
)
|
||||
|
||||
# choose from dylib-llvm-aot or cuda
|
||||
backend = "dylib-llvm-aot"
|
||||
if backend == "dylib-llvm-aot":
|
||||
args = [
|
||||
"--iree-llvm-target-cpu-features=host",
|
||||
"--iree-mhlo-demote-i64-to-i32=false",
|
||||
"--iree-flow-demote-i64-to-i32",
|
||||
]
|
||||
backend_config = "dylib"
|
||||
|
||||
else:
|
||||
backend_config = "cuda"
|
||||
args = [
|
||||
"--iree-cuda-llvm-target-arch=sm_80",
|
||||
"--iree-hal-cuda-disable-loop-nounroll-wa",
|
||||
"--iree-enable-fusion-with-reduction-ops",
|
||||
]
|
||||
|
||||
flatbuffer_blob = compile_str(
|
||||
compiler_module,
|
||||
target_backends=[backend],
|
||||
extra_args=args,
|
||||
input_type="mhlo",
|
||||
)
|
||||
|
||||
# Save module as MLIR file in a directory
|
||||
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
|
||||
tracer = ireert.Tracer(os.getcwd())
|
||||
config = ireert.Config("local-sync", tracer)
|
||||
ctx = ireert.SystemContext(config=config)
|
||||
ctx.add_vm_module(vm_module)
|
||||
BertCompiled = ctx.modules.module
|
||||
|
||||
# compare output losses:
|
||||
|
||||
iterations = 10
|
||||
for i in range(iterations):
|
||||
example_inputs, example_labels = next(iter(glue_train))
|
||||
example_labels = tf.cast(example_labels, tf.int32)
|
||||
example_inputs = [value for key, value in example_inputs.items()]
|
||||
|
||||
# iree version
|
||||
iree_loss = BertCompiled.learn(
|
||||
example_inputs, example_labels
|
||||
).to_host()
|
||||
|
||||
# base tensorflow
|
||||
tf_loss = np.array(bert_model.learn(example_inputs, example_labels))
|
||||
print(np.allclose(iree_loss, tf_loss))
|
||||
@@ -1,131 +0,0 @@
|
||||
import numpy as np
|
||||
from shark.shark_downloader import download_tflite_model
|
||||
from shark.shark_inference import SharkInference
|
||||
import pytest
|
||||
import unittest
|
||||
from shark.parser import shark_args
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
from PIL import Image
|
||||
|
||||
# model_path = "https://tfhub.dev/google/lite-model/aiy/vision/classifier/birds_V1/3?lite-format=tflite"
|
||||
|
||||
|
||||
def generate_inputs(input_details):
|
||||
# input_details shape: [ 1 224 224 3] type: uint8
|
||||
exe_basename = os.path.basename(sys.argv[0])
|
||||
workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
|
||||
os.makedirs(workdir, exist_ok=True)
|
||||
|
||||
img_path = "https://github.com/google-coral/test_data/raw/master/bird.bmp"
|
||||
local_path = "/".join([workdir, "bird.bmp"])
|
||||
urllib.request.urlretrieve(img_path, local_path)
|
||||
|
||||
shape = input_details[0]["shape"]
|
||||
im = np.array(Image.open(local_path).resize((shape[1], shape[2])))
|
||||
args = [im.reshape(shape)]
|
||||
return args
|
||||
|
||||
|
||||
def compare_results(mlir_results, tflite_results):
|
||||
print("Compare mlir_results VS tflite_results: ")
|
||||
assert len(mlir_results) == len(
|
||||
tflite_results
|
||||
), "Number of results do not match"
|
||||
for i in range(len(mlir_results)):
|
||||
mlir_result = mlir_results[i]
|
||||
tflite_result = tflite_results[i]
|
||||
mlir_result = mlir_result.astype(np.single)
|
||||
tflite_result = tflite_result.astype(np.single)
|
||||
mlir_result = np.expand_dims(mlir_result, axis=0)
|
||||
print("mlir_result.shape", mlir_result.shape)
|
||||
print("tflite_result.shape", tflite_result.shape)
|
||||
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
|
||||
max_error = np.max(np.abs(mlir_result - tflite_result))
|
||||
print("Max error (%d): %f", i, max_error)
|
||||
|
||||
|
||||
class BirdsV1TfliteModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
|
||||
def create_and_check_module(self):
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_args.save_vmfb = self.save_vmfb
|
||||
|
||||
(
|
||||
mlir_model,
|
||||
function_name,
|
||||
inputs,
|
||||
tflite_results,
|
||||
) = download_tflite_model(model_name="birds_V1")
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name="main",
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
|
||||
# Case1: Use shark_importer default generate inputs
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
|
||||
# Case2: Use manually set inputs
|
||||
input_details = [
|
||||
{
|
||||
"shape": [1, 224, 224, 3],
|
||||
"dtype": np.uint8,
|
||||
}
|
||||
]
|
||||
inputs = generate_inputs(input_details) # device_inputs
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name="main",
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
# print(mlir_results)
|
||||
|
||||
|
||||
class BirdsV1TfliteModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = BirdsV1TfliteModuleTester(self)
|
||||
self.module_tester.save_mlir = self.save_mlir
|
||||
|
||||
import sys
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="'tosa.conv2d' op attribute 'quantization_info' failed ",
|
||||
)
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# module_tester = BirdsV1TfliteModuleTester()
|
||||
# module_tester.save_mlir = True
|
||||
# module_tester.save_vmfb = True
|
||||
# module_tester.create_and_check_module()
|
||||
|
||||
unittest.main()
|
||||
@@ -1,68 +0,0 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
|
||||
import iree.compiler as ireec
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class CamemBertModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"camembert-base"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model, func_name, device=device, mlir_dialect="mhlo"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
|
||||
|
||||
class CamemBertModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = CamemBertModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,88 +0,0 @@
|
||||
import numpy as np
|
||||
from shark.shark_downloader import download_tflite_model
|
||||
from shark.shark_inference import SharkInference
|
||||
import pytest
|
||||
import unittest
|
||||
from shark.parser import shark_args
|
||||
|
||||
|
||||
# model_path = "https://tfhub.dev/sayakpaul/lite-model/cartoongan/dr/1?lite-format=tflite"
|
||||
|
||||
|
||||
def compare_results(mlir_results, tflite_results):
|
||||
print("Compare mlir_results VS tflite_results: ")
|
||||
assert len(mlir_results) == len(
|
||||
tflite_results
|
||||
), "Number of results do not match"
|
||||
for i in range(len(mlir_results)):
|
||||
mlir_result = mlir_results[i]
|
||||
tflite_result = tflite_results[i]
|
||||
mlir_result = mlir_result.astype(np.single)
|
||||
tflite_result = tflite_result.astype(np.single)
|
||||
mlir_result = np.expand_dims(mlir_result, axis=0)
|
||||
print("mlir_result.shape", mlir_result.shape)
|
||||
print("tflite_result.shape", tflite_result.shape)
|
||||
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
|
||||
max_error = np.max(np.abs(mlir_result - tflite_result))
|
||||
print("Max error (%d): %f", i, max_error)
|
||||
|
||||
|
||||
class CartoonganTfliteModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
|
||||
def create_and_check_module(self):
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_args.save_vmfb = self.save_vmfb
|
||||
|
||||
(
|
||||
mlir_model,
|
||||
function_name,
|
||||
inputs,
|
||||
tflite_results,
|
||||
) = download_tflite_model(model_name="cartoongan")
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name="main",
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
|
||||
# Case1: Use shark_importer default generate inputs
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
|
||||
|
||||
class CartoonganTfliteModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = CartoonganTfliteModuleTester(self)
|
||||
self.module_tester.save_mlir = self.save_mlir
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# module_tester = CartoonganTfliteModuleTester()
|
||||
# module_tester.save_mlir = True
|
||||
# module_tester.save_vmfb = True
|
||||
# module_tester.create_and_check_module()
|
||||
|
||||
unittest.main()
|
||||
@@ -1,71 +0,0 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
|
||||
import iree.compiler as ireec
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class ConvBertModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"dbmdz/convbert-base-turkish-cased"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model, func_name, device=device, mlir_dialect="mhlo"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
|
||||
|
||||
class ConvBertModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = ConvBertModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
@pytest.mark.xfail(
|
||||
reason="Issue: https://github.com/iree-org/iree/issues/9971",
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,72 +0,0 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
from shark.parser import shark_args
|
||||
|
||||
import iree.compiler as ireec
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
|
||||
class DebertaBaseModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"microsoft/deberta-base"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model, func_name, device=device, mlir_dialect="mhlo"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
|
||||
|
||||
class DebertaBaseModuleTest(unittest.TestCase):
|
||||
@pytest.skip(reason="Model can't be imported.", allow_module_level=True)
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = DebertaBaseModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,90 +0,0 @@
|
||||
import numpy as np
|
||||
from shark.shark_downloader import download_tflite_model
|
||||
from shark.shark_inference import SharkInference
|
||||
import pytest
|
||||
import unittest
|
||||
from shark.parser import shark_args
|
||||
|
||||
|
||||
# model_path = "https://tfhub.dev/google/lite-model/aiy/vision/classifier/birds_V1/3?lite-format=tflite"
|
||||
|
||||
|
||||
def compare_results(mlir_results, tflite_results):
|
||||
print("Compare mlir_results VS tflite_results: ")
|
||||
assert len(mlir_results) == len(
|
||||
tflite_results
|
||||
), "Number of results do not match"
|
||||
for i in range(len(mlir_results)):
|
||||
mlir_result = mlir_results[i]
|
||||
tflite_result = tflite_results[i]
|
||||
mlir_result = mlir_result.astype(np.single)
|
||||
tflite_result = tflite_result.astype(np.single)
|
||||
mlir_result = np.expand_dims(mlir_result, axis=0)
|
||||
print("mlir_result.shape", mlir_result.shape)
|
||||
print("tflite_result.shape", tflite_result.shape)
|
||||
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
|
||||
max_error = np.max(np.abs(mlir_result - tflite_result))
|
||||
print("Max error (%d): %f", i, max_error)
|
||||
|
||||
|
||||
class DeepLabV3TfliteModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
|
||||
def create_and_check_module(self):
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_args.save_vmfb = self.save_vmfb
|
||||
|
||||
# preprocess to get SharkImporter input args
|
||||
(
|
||||
mlir_model,
|
||||
function_name,
|
||||
inputs,
|
||||
tflite_results,
|
||||
) = download_tflite_model(model_name="deeplabv3")
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name="main",
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
|
||||
# Case1: Use shark_importer default generate inputs
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
|
||||
|
||||
class DeepLabV3TfliteModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = DeepLabV3TfliteModuleTester(self)
|
||||
self.module_tester.save_mlir = self.save_mlir
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# module_tester = DeepLabV3TfliteModuleTester()
|
||||
# module_tester.save_mlir = True
|
||||
# module_tester.save_vmfb = True
|
||||
# module_tester.create_and_check_module()
|
||||
|
||||
unittest.main()
|
||||
@@ -1,90 +0,0 @@
|
||||
import numpy as np
|
||||
from shark.shark_downloader import download_tflite_model
|
||||
from shark.shark_inference import SharkInference
|
||||
import pytest
|
||||
import unittest
|
||||
from shark.parser import shark_args
|
||||
|
||||
|
||||
# model_path = "https://tfhub.dev/tensorflow/lite-model/densenet/1/metadata/1?lite-format=tflite"
|
||||
|
||||
|
||||
def compare_results(mlir_results, tflite_results):
|
||||
print("Compare mlir_results VS tflite_results: ")
|
||||
assert len(mlir_results) == len(
|
||||
tflite_results
|
||||
), "Number of results do not match"
|
||||
for i in range(len(mlir_results)):
|
||||
mlir_result = mlir_results[i]
|
||||
tflite_result = tflite_results[i]
|
||||
mlir_result = mlir_result.astype(np.single)
|
||||
tflite_result = tflite_result.astype(np.single)
|
||||
mlir_result = np.expand_dims(mlir_result, axis=0)
|
||||
print("mlir_result.shape", mlir_result.shape)
|
||||
print("tflite_result.shape", tflite_result.shape)
|
||||
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
|
||||
max_error = np.max(np.abs(mlir_result - tflite_result))
|
||||
print("Max error (%d): %f", i, max_error)
|
||||
|
||||
|
||||
class DensenetTfliteModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
|
||||
def create_and_check_module(self):
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_args.save_vmfb = self.save_vmfb
|
||||
|
||||
# Preprocess to get SharkImporter input args
|
||||
(
|
||||
mlir_model,
|
||||
function_name,
|
||||
inputs,
|
||||
tflite_results,
|
||||
) = download_tflite_model(model_name="densenet")
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name="main",
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
|
||||
# Case1: Use shark_importer default generate inputs
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
|
||||
|
||||
class DensenetTfliteModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = DensenetTfliteModuleTester(self)
|
||||
self.module_tester.save_mlir = self.save_mlir
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# module_tester = DensenetTfliteModuleTester()
|
||||
# module_tester.save_mlir = True
|
||||
# module_tester.save_vmfb = True
|
||||
# module_tester.create_and_check_module()
|
||||
|
||||
unittest.main()
|
||||
@@ -1,71 +0,0 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
|
||||
import iree.compiler as ireec
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class DistilBertModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"distilbert-base-uncased"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model, func_name, device=device, mlir_dialect="mhlo"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
|
||||
|
||||
class DistilBertModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = DistilBertModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
@pytest.mark.xfail(reason="shark_tank hash issues -- awaiting triage")
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="shark_tank hash issues -- awaiting triage")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(reason="shark_tank hash issues -- awaiting triage")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,128 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from tank.model_utils import compare_tensors
|
||||
from shark.parser import shark_args
|
||||
from shark.shark_downloader import download_torch_model
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
class DistilBertModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model_mlir, func_name, input, act_out = download_torch_model(
|
||||
"distilbert-base-uncased", dynamic
|
||||
)
|
||||
|
||||
# from shark.shark_importer import SharkImporter
|
||||
# mlir_importer = SharkImporter(
|
||||
# model,
|
||||
# (input,),
|
||||
# frontend="torch",
|
||||
# )
|
||||
# minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||
# is_dynamic=dynamic, tracing_required=True
|
||||
# )
|
||||
|
||||
shark_module = SharkInference(
|
||||
model_mlir,
|
||||
func_name,
|
||||
device=device,
|
||||
mlir_dialect="linalg",
|
||||
is_benchmark=self.benchmark,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward(input)
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
if self.benchmark == True:
|
||||
shark_module.shark_runner.benchmark_all_csv(
|
||||
(input),
|
||||
"distilbert-base-uncased",
|
||||
dynamic,
|
||||
device,
|
||||
"torch",
|
||||
)
|
||||
|
||||
|
||||
class DistilBertModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = DistilBertModuleTester(self)
|
||||
self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="Fails to lower in torch-mlir. See https://github.com/nod-ai/SHARK/issues/222"
|
||||
)
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="Fails to lower in torch-mlir. See https://github.com/nod-ai/SHARK/issues/222"
|
||||
)
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="Fails to lower in torch-mlir. See https://github.com/nod-ai/SHARK/issues/222"
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skip(reason="DistilBert needs to be uploaded to cloud.")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skip(reason="DistilBert needs to be uploaded to cloud.")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skip(reason="DistilBert needs to be uploaded to cloud.")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
# @pytest.mark.skip(reason="DistilBert needs to be uploaded to cloud.")
|
||||
# @pytest.mark.skipif(
|
||||
# check_device_drivers("intel-gpu"),
|
||||
# reason=device_driver_info("intel-gpu"),
|
||||
# )
|
||||
# def test_module_static_intel_gpu(self):
|
||||
# dynamic = False
|
||||
# device = "intel-gpu"
|
||||
# self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,122 +0,0 @@
|
||||
import numpy as np
|
||||
from shark.shark_downloader import download_tflite_model
|
||||
from shark.shark_inference import SharkInference
|
||||
import pytest
|
||||
import unittest
|
||||
from shark.parser import shark_args
|
||||
import os
|
||||
import sys
|
||||
from tank.tflite import imagenet_data
|
||||
|
||||
|
||||
# # Source https://tfhub.dev/sayannath/lite-model/image-scene/1
|
||||
# model_path = "https://storage.googleapis.com/iree-model-artifacts/efficientnet_224_fp32.tflite"
|
||||
|
||||
|
||||
def generate_inputs(input_details):
|
||||
exe_basename = os.path.basename(sys.argv[0])
|
||||
workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
|
||||
os.makedirs(workdir, exist_ok=True)
|
||||
inputs = imagenet_data.generate_input(workdir, input_details)
|
||||
# Normalize inputs to [-1, 1].
|
||||
inputs = (inputs.astype("float32") / 127.5) - 1
|
||||
return [inputs]
|
||||
|
||||
|
||||
def compare_results(mlir_results, tflite_results):
|
||||
print("Compare mlir_results VS tflite_results: ")
|
||||
assert len(mlir_results) == len(
|
||||
tflite_results
|
||||
), "Number of results do not match"
|
||||
for i in range(len(mlir_results)):
|
||||
mlir_result = mlir_results[i]
|
||||
tflite_result = tflite_results[i]
|
||||
mlir_result = mlir_result.astype(np.single)
|
||||
tflite_result = tflite_result.astype(np.single)
|
||||
mlir_result = np.expand_dims(mlir_result, axis=0)
|
||||
print("mlir_result.shape", mlir_result.shape)
|
||||
print("tflite_result.shape", tflite_result.shape)
|
||||
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
|
||||
max_error = np.max(np.abs(mlir_result - tflite_result))
|
||||
print("Max error (%d): %f", i, max_error)
|
||||
|
||||
|
||||
class Efficientnet_224_fp32TfliteModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
|
||||
def create_and_check_module(self):
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_args.save_vmfb = self.save_vmfb
|
||||
|
||||
# Preprocess to get SharkImporter input args
|
||||
mlir_model, func_name, inputs, tflite_results = download_tflite_model(
|
||||
model_name="efficientnet_224_fp32"
|
||||
)
|
||||
|
||||
# Use SharkInference to get inference result
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name=func_name,
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
|
||||
# Case1: Use shark_importer default generate inputs
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
|
||||
# Case2: Use manually set inputs
|
||||
input_details = [
|
||||
{
|
||||
"shape": [1, 224, 224, 3],
|
||||
"dtype": np.float32,
|
||||
}
|
||||
]
|
||||
inputs = generate_inputs(input_details) # new inputs
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name=func_name,
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
# print(mlir_results)
|
||||
|
||||
|
||||
class Efficientnet_224_fp32TfliteModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = Efficientnet_224_fp32TfliteModuleTester(self)
|
||||
self.module_tester.save_mlir = self.save_mlir
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# module_tester = Efficientnet_224_fp32TfliteModuleTester()
|
||||
# module_tester.save_mlir = True
|
||||
# module_tester.save_vmfb = True
|
||||
# module_tester.create_and_check_module()
|
||||
|
||||
unittest.main()
|
||||
@@ -1,121 +0,0 @@
|
||||
import numpy as np
|
||||
from shark.shark_downloader import download_tflite_model
|
||||
from shark.shark_inference import SharkInference
|
||||
import pytest
|
||||
import unittest
|
||||
from shark.parser import shark_args
|
||||
import os
|
||||
import sys
|
||||
from tank.tflite import imagenet_data
|
||||
|
||||
|
||||
# # Source https://tfhub.dev/tensorflow/lite-model/efficientnet/lite0/fp32/2
|
||||
# model_path = "https://storage.googleapis.com/iree-model-artifacts/efficientnet_lite0_fp32_2.tflite"
|
||||
|
||||
|
||||
def generate_inputs(input_details):
|
||||
exe_basename = os.path.basename(sys.argv[0])
|
||||
workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
|
||||
os.makedirs(workdir, exist_ok=True)
|
||||
inputs = imagenet_data.generate_input(workdir, input_details)
|
||||
# Normalize inputs to [-1, 1].
|
||||
inputs = (inputs.astype("float32") / 127.5) - 1
|
||||
return [inputs]
|
||||
|
||||
|
||||
def compare_results(mlir_results, tflite_results):
|
||||
print("Compare mlir_results VS tflite_results: ")
|
||||
assert len(mlir_results) == len(
|
||||
tflite_results
|
||||
), "Number of results do not match"
|
||||
for i in range(len(mlir_results)):
|
||||
mlir_result = mlir_results[i]
|
||||
tflite_result = tflite_results[i]
|
||||
mlir_result = mlir_result.astype(np.single)
|
||||
tflite_result = tflite_result.astype(np.single)
|
||||
mlir_result = np.expand_dims(mlir_result, axis=0)
|
||||
print("mlir_result.shape", mlir_result.shape)
|
||||
print("tflite_result.shape", tflite_result.shape)
|
||||
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
|
||||
max_error = np.max(np.abs(mlir_result - tflite_result))
|
||||
print("Max error (%d): %f", i, max_error)
|
||||
|
||||
|
||||
class Efficientnet_lite0_fp32_2TfliteModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
|
||||
def create_and_check_module(self):
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_args.save_vmfb = self.save_vmfb
|
||||
|
||||
# Preprocess to get SharkImporter input args
|
||||
mlir_model, func_name, inputs, tflite_results = download_tflite_model(
|
||||
model_name="efficientnet_lite0_fp32_2"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name="main",
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
|
||||
# Case1: Use shark_importer default generate inputs
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
|
||||
# Case2: Use manually set inputs
|
||||
input_details = [
|
||||
{
|
||||
"shape": [1, 224, 224, 3],
|
||||
"dtype": np.float32,
|
||||
}
|
||||
]
|
||||
inputs = generate_inputs(input_details) # new inputs
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name="main",
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
# print(mlir_results)
|
||||
|
||||
|
||||
class Efficientnet_lite0_fp32_2TfliteModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = Efficientnet_lite0_fp32_2TfliteModuleTester(self)
|
||||
self.module_tester.save_mlir = self.save_mlir
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# module_tester = Efficientnet_lite0_fp32_2TfliteModuleTester()
|
||||
# module_tester.save_mlir = True
|
||||
# module_tester.save_vmfb = True
|
||||
# module_tester.create_and_check_module()
|
||||
|
||||
unittest.main()
|
||||
@@ -1,125 +0,0 @@
|
||||
import numpy as np
|
||||
from shark.shark_downloader import download_tflite_model
|
||||
from shark.shark_inference import SharkInference
|
||||
import pytest
|
||||
import unittest
|
||||
from shark.parser import shark_args
|
||||
import os
|
||||
import sys
|
||||
from tank.tflite import imagenet_data
|
||||
|
||||
|
||||
# # Source https://tfhub.dev/tensorflow/lite-model/efficientnet/lite0/int8/2
|
||||
# model_path = "https://storage.googleapis.com/iree-model-artifacts/efficientnet_lite0_int8_2.tflite"
|
||||
|
||||
|
||||
def generate_inputs(input_details):
|
||||
exe_basename = os.path.basename(sys.argv[0])
|
||||
workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
|
||||
os.makedirs(workdir, exist_ok=True)
|
||||
|
||||
return [imagenet_data.generate_input(workdir, input_details)]
|
||||
|
||||
|
||||
def compare_results(mlir_results, tflite_results):
|
||||
print("Compare mlir_results VS tflite_results: ")
|
||||
assert len(mlir_results) == len(
|
||||
tflite_results
|
||||
), "Number of results do not match"
|
||||
for i in range(len(mlir_results)):
|
||||
mlir_result = mlir_results[i]
|
||||
tflite_result = tflite_results[i]
|
||||
mlir_result = mlir_result.astype(np.single)
|
||||
tflite_result = tflite_result.astype(np.single)
|
||||
mlir_result = np.expand_dims(mlir_result, axis=0)
|
||||
print("mlir_result.shape", mlir_result.shape)
|
||||
print("tflite_result.shape", tflite_result.shape)
|
||||
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
|
||||
max_error = np.max(np.abs(mlir_result - tflite_result))
|
||||
print("Max error (%d): %f", i, max_error)
|
||||
|
||||
|
||||
class Efficientnet_lite0_int8_2TfliteModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
|
||||
def create_and_check_module(self):
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_args.save_vmfb = self.save_vmfb
|
||||
|
||||
# Preprocess to get SharkImporter input args
|
||||
mlir_model, func_name, inputs, tflite_results = download_tflite_model(
|
||||
model_name="efficientnet_lite0_int8_2"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name="main",
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
|
||||
# Case1: Use shark_importer default generate inputs
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
|
||||
# Case2: Use manually set inputs
|
||||
input_details = [
|
||||
{
|
||||
"shape": [1, 224, 224, 3],
|
||||
"dtype": np.uint8,
|
||||
}
|
||||
]
|
||||
inputs = generate_inputs(input_details) # new inputs
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name="main",
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
# print(mlir_results)
|
||||
|
||||
|
||||
class Efficientnet_lite0_int8_2TfliteModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = Efficientnet_lite0_int8_2TfliteModuleTester(self)
|
||||
self.module_tester.save_mlir = self.save_mlir
|
||||
|
||||
import sys
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="known macos tflite install issue & "
|
||||
"'tosa.conv2d' op attribute 'quantization_info' failed "
|
||||
)
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# module_tester = Efficientnet_lite0_int8_2TfliteModuleTester()
|
||||
# module_tester.save_mlir = True
|
||||
# module_tester.save_vmfb = True
|
||||
# module_tester.create_and_check_module()
|
||||
|
||||
unittest.main()
|
||||
@@ -1,68 +0,0 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
|
||||
import iree.compiler as ireec
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class ElectraModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"google/electra-small-discriminator"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model, func_name, device=device, mlir_dialect="mhlo"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
|
||||
|
||||
class ElectraModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = ElectraModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,81 +0,0 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class ConvNextTinyModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"facebook/convnext-tiny-224"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model, func_name, device=device, mlir_dialect="mhlo"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
# result: array([['logits',
|
||||
# <IREE DeviceArray: shape=[1, 1000], dtype=<class 'numpy.float32'>>]],
|
||||
# dtype=object)
|
||||
|
||||
# post process of img output
|
||||
ir_device_array = result[0][1]
|
||||
logits = ir_device_array.astype(ir_device_array.dtype)
|
||||
logits = np.squeeze(logits, axis=0)
|
||||
print("logits: ", logits.shape)
|
||||
print("golden_out: ", golden_out[0].shape)
|
||||
print(np.allclose(golden_out[0], logits, rtol=1e-02, atol=1e-03))
|
||||
|
||||
|
||||
class ConvNextTinyModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = ConvNextTinyModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
# @pytest.mark.skipif(
|
||||
# check_device_drivers("intel-gpu"),
|
||||
# reason=device_driver_info("intel-gpu"),
|
||||
# )
|
||||
# def test_module_static_intel_gpu(self):
|
||||
# dynamic = False
|
||||
# device = "intel-gpu"
|
||||
# self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# dynamic = False
|
||||
# device = "cpu"
|
||||
# module_tester = ConvNextTinyModuleTester()
|
||||
# module_tester.create_and_check_module(dynamic, device)
|
||||
unittest.main()
|
||||
@@ -1,74 +0,0 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
|
||||
import iree.compiler as ireec
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class FunnelModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"funnel-transformer/small"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model, func_name, device=device, mlir_dialect="mhlo"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
|
||||
|
||||
class FunnelModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = FunnelModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="failing in the iree-compiler passes, see https://github.com/nod-ai/SHARK/issues/201"
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="failing in the iree-compiler passes, see https://github.com/nod-ai/SHARK/issues/201"
|
||||
)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
# @pytest.mark.skipif(
|
||||
# check_device_drivers("intel-gpu"),
|
||||
# reason=device_driver_info("intel-gpu"),
|
||||
# )
|
||||
# def test_module_static_intel_gpu(self):
|
||||
# dynamic = False
|
||||
# device = "intel-gpu"
|
||||
# self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,78 +0,0 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class VitBaseModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"google/vit-base-patch16-224"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model, func_name, device=device, mlir_dialect="mhlo"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
|
||||
# post process of img output
|
||||
ir_device_array = result[0][1]
|
||||
logits = ir_device_array.astype(ir_device_array.dtype)
|
||||
logits = np.squeeze(logits, axis=0)
|
||||
print("logits: ", logits.shape)
|
||||
print("golden_out: ", golden_out[0].shape)
|
||||
print(np.allclose(golden_out[0], logits, rtol=1e-02, atol=1e-03))
|
||||
|
||||
|
||||
class VitBaseModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = VitBaseModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
# @pytest.mark.skipif(
|
||||
# check_device_drivers("intel-gpu"),
|
||||
# reason=device_driver_info("intel-gpu"),
|
||||
# )
|
||||
# def test_module_static_intel_gpu(self):
|
||||
# dynamic = False
|
||||
# device = "intel-gpu"
|
||||
# self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
module_tester = VitBaseModuleTester()
|
||||
module_tester.create_and_check_module(dynamic, device)
|
||||
# unittest.main()
|
||||
@@ -1,119 +0,0 @@
|
||||
import numpy as np
|
||||
from shark.shark_downloader import download_tflite_model
|
||||
from shark.shark_inference import SharkInference
|
||||
import pytest
|
||||
import unittest
|
||||
from shark.parser import shark_args
|
||||
|
||||
|
||||
# model_path = "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-64.tflite"
|
||||
|
||||
|
||||
def generate_inputs(input_details):
|
||||
args = []
|
||||
args.append(
|
||||
np.random.randint(
|
||||
low=0,
|
||||
high=256,
|
||||
size=input_details[0]["shape"],
|
||||
dtype=input_details[0]["dtype"],
|
||||
)
|
||||
)
|
||||
return args
|
||||
|
||||
|
||||
def compare_results(mlir_results, tflite_results):
|
||||
print("Compare mlir_results VS tflite_results: ")
|
||||
assert len(mlir_results) == len(
|
||||
tflite_results
|
||||
), "Number of results do not match"
|
||||
for i in range(len(mlir_results)):
|
||||
mlir_result = mlir_results[i]
|
||||
tflite_result = tflite_results[i]
|
||||
mlir_result = mlir_result.astype(np.single)
|
||||
tflite_result = tflite_result.astype(np.single)
|
||||
# mlir_result = np.expand_dims(mlir_result, axis=0)
|
||||
print("mlir_result.shape", mlir_result.shape)
|
||||
print("tflite_result.shape", tflite_result.shape)
|
||||
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
|
||||
max_error = np.max(np.abs(mlir_result - tflite_result))
|
||||
print("Max error (%d): %f", i, max_error)
|
||||
|
||||
|
||||
class GptTfliteModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
|
||||
def create_and_check_module(self):
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_args.save_vmfb = self.save_vmfb
|
||||
|
||||
# Preprocess to get SharkImporter input args
|
||||
mlir_model, func_name, inputs, tflite_results = download_tflite_model(
|
||||
model_name="gpt2-64"
|
||||
)
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name="main",
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
|
||||
# Case1: Use shark_importer default generate inputs
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
|
||||
# Case2: Use manually set inputs
|
||||
input_details = [
|
||||
{
|
||||
"shape": [1, 64],
|
||||
"dtype": np.int32,
|
||||
}
|
||||
]
|
||||
inputs = generate_inputs(input_details) # new inputs
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name="main",
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
# print(mlir_results)
|
||||
|
||||
|
||||
class GptTfliteModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = GptTfliteModuleTester(self)
|
||||
self.module_tester.save_mlir = self.save_mlir
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# module_tester = GptTfliteModuleTester()
|
||||
# module_tester.save_mlir = True
|
||||
# module_tester.save_vmfb = True
|
||||
# module_tester.create_and_check_module()
|
||||
|
||||
unittest.main()
|
||||
@@ -1,121 +0,0 @@
|
||||
import numpy as np
|
||||
from shark.shark_downloader import download_tflite_model
|
||||
from shark.shark_inference import SharkInference
|
||||
import pytest
|
||||
import unittest
|
||||
from shark.parser import shark_args
|
||||
import os
|
||||
import sys
|
||||
from tank.tflite import imagenet_data
|
||||
|
||||
|
||||
# # Source https://tfhub.dev/tensorflow/lite-model/inception_v4/1/default/1
|
||||
# model_path = "https://storage.googleapis.com/iree-model-artifacts/inception_v4_299_fp32.tflite"
|
||||
|
||||
|
||||
def generate_inputs(input_details):
|
||||
exe_basename = os.path.basename(sys.argv[0])
|
||||
workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
|
||||
os.makedirs(workdir, exist_ok=True)
|
||||
inputs = imagenet_data.generate_input(workdir, input_details)
|
||||
# Normalize inputs to [-1, 1].
|
||||
inputs = (inputs.astype("float32") / 127.5) - 1
|
||||
return [inputs]
|
||||
|
||||
|
||||
def compare_results(mlir_results, tflite_results):
|
||||
print("Compare mlir_results VS tflite_results: ")
|
||||
assert len(mlir_results) == len(
|
||||
tflite_results
|
||||
), "Number of results do not match"
|
||||
for i in range(len(mlir_results)):
|
||||
mlir_result = mlir_results[i]
|
||||
tflite_result = tflite_results[i]
|
||||
mlir_result = mlir_result.astype(np.single)
|
||||
tflite_result = tflite_result.astype(np.single)
|
||||
mlir_result = np.expand_dims(mlir_result, axis=0)
|
||||
print("mlir_result.shape", mlir_result.shape)
|
||||
print("tflite_result.shape", tflite_result.shape)
|
||||
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
|
||||
max_error = np.max(np.abs(mlir_result - tflite_result))
|
||||
print("Max error (%d): %f", i, max_error)
|
||||
|
||||
|
||||
class Inception_v4_299_fp32TfliteModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
|
||||
def create_and_check_module(self):
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_args.save_vmfb = self.save_vmfb
|
||||
|
||||
# Preprocess to get SharkImporter input args
|
||||
mlir_model, func_name, inputs, tflite_results = download_tflite_model(
|
||||
model_name="inception_v4_299_fp32"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name=func_name,
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
|
||||
# Case1: Use shark_importer default generate inputs
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
|
||||
# Case2: Use manually set inputs
|
||||
input_details = [
|
||||
{
|
||||
"shape": [1, 299, 299, 3],
|
||||
"dtype": np.float32,
|
||||
}
|
||||
]
|
||||
inputs = generate_inputs(input_details) # new inputs
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name=func_name,
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
# print(mlir_results)
|
||||
|
||||
|
||||
class Inception_v4_299_fp32TfliteModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = Inception_v4_299_fp32TfliteModuleTester(self)
|
||||
self.module_tester.save_mlir = self.save_mlir
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# module_tester = Inception_v4_299_fp32TfliteModuleTester()
|
||||
# module_tester.save_mlir = True
|
||||
# module_tester.save_vmfb = True
|
||||
# module_tester.create_and_check_module()
|
||||
|
||||
unittest.main()
|
||||
@@ -1,125 +0,0 @@
|
||||
import numpy as np
|
||||
from shark.shark_downloader import download_tflite_model
|
||||
from shark.shark_inference import SharkInference
|
||||
import pytest
|
||||
import unittest
|
||||
from shark.parser import shark_args
|
||||
import os
|
||||
import sys
|
||||
from tank.tflite import imagenet_data
|
||||
|
||||
|
||||
# Source https://tfhub.dev/tensorflow/lite-model/inception_v4_quant/1/default/1
|
||||
# model_path = "https://storage.googleapis.com/iree-model-artifacts/inception_v4_299_uint8.tflite"
|
||||
|
||||
|
||||
def generate_inputs(input_details):
|
||||
exe_basename = os.path.basename(sys.argv[0])
|
||||
workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
|
||||
os.makedirs(workdir, exist_ok=True)
|
||||
return [imagenet_data.generate_input(workdir, input_details)]
|
||||
|
||||
|
||||
def compare_results(mlir_results, tflite_results):
|
||||
print("Compare mlir_results VS tflite_results: ")
|
||||
assert len(mlir_results) == len(
|
||||
tflite_results
|
||||
), "Number of results do not match"
|
||||
for i in range(len(mlir_results)):
|
||||
mlir_result = mlir_results[i]
|
||||
tflite_result = tflite_results[i]
|
||||
mlir_result = mlir_result.astype(np.single)
|
||||
tflite_result = tflite_result.astype(np.single)
|
||||
mlir_result = np.expand_dims(mlir_result, axis=0)
|
||||
print("mlir_result.shape", mlir_result.shape)
|
||||
print("tflite_result.shape", tflite_result.shape)
|
||||
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
|
||||
max_error = np.max(np.abs(mlir_result - tflite_result))
|
||||
print("Max error (%d): %f", i, max_error)
|
||||
|
||||
|
||||
class Inception_v4_299_uint8TfliteModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
save_vmfb=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
self.save_vmfb = save_vmfb
|
||||
|
||||
def create_and_check_module(self):
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_args.save_vmfb = self.save_vmfb
|
||||
|
||||
# Preprocess to get SharkImporter input args
|
||||
mlir_model, func_name, inputs, tflite_results = download_tflite_model(
|
||||
model_name="inception_v4_299_uint8"
|
||||
)
|
||||
|
||||
# Use SharkInference to get inference result
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name=func_name,
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
|
||||
# Case1: Use shark_importer default generate inputs
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
|
||||
# Case2: Use manually set inputs
|
||||
input_details = [
|
||||
{
|
||||
"shape": [1, 299, 299, 3],
|
||||
"dtype": np.uint8,
|
||||
}
|
||||
]
|
||||
inputs = generate_inputs(input_details) # new inputs
|
||||
|
||||
shark_module = SharkInference(
|
||||
mlir_module=mlir_model,
|
||||
function_name=func_name,
|
||||
device=self.device,
|
||||
mlir_dialect="tflite",
|
||||
)
|
||||
shark_module.compile()
|
||||
mlir_results = shark_module.forward(inputs)
|
||||
compare_results(mlir_results, tflite_results)
|
||||
# print(mlir_results)
|
||||
|
||||
|
||||
class Inception_v4_299_uint8TfliteModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
self.save_vmfb = pytestconfig.getoption("save_vmfb")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = Inception_v4_299_uint8TfliteModuleTester(self)
|
||||
self.module_tester.save_mlir = self.save_mlir
|
||||
|
||||
import sys
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="known macos tflite install issue & "
|
||||
"'tosa.conv2d' op attribute 'quantization_info' failed "
|
||||
)
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# module_tester = Inception_v4_299_uint8TfliteModuleTester()
|
||||
# module_tester.save_mlir = True
|
||||
# module_tester.save_vmfb = True
|
||||
# module_tester.create_and_check_module()
|
||||
|
||||
unittest.main()
|
||||
@@ -1,68 +0,0 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
|
||||
import iree.compiler as ireec
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class LayoutLMModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"microsoft/layoutlm-base-uncased"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model, func_name, device=device, mlir_dialect="mhlo"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
|
||||
|
||||
class LayoutLMModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = LayoutLMModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,69 +0,0 @@
|
||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_downloader import download_tf_model
|
||||
|
||||
import iree.compiler as ireec
|
||||
import unittest
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
|
||||
class LongformerModuleTester:
|
||||
def __init__(
|
||||
self,
|
||||
benchmark=False,
|
||||
):
|
||||
self.benchmark = benchmark
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, func_name, inputs, golden_out = download_tf_model(
|
||||
"allenai/longformer-base-4096"
|
||||
)
|
||||
|
||||
shark_module = SharkInference(
|
||||
model, func_name, device=device, mlir_dialect="mhlo"
|
||||
)
|
||||
shark_module.compile()
|
||||
result = shark_module.forward(inputs)
|
||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||
|
||||
|
||||
class LongformerModuleTest(unittest.TestCase):
|
||||
@pytest.skip(reason="Model can't be imported.", allow_module_level=True)
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.module_tester = LongformerModuleTester(self)
|
||||
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("gpu"), reason=device_driver_info("gpu")
|
||||
)
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("intel-gpu"),
|
||||
reason=device_driver_info("intel-gpu"),
|
||||
)
|
||||
def test_module_static_intel_gpu(self):
|
||||
dynamic = False
|
||||
device = "intel-gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user