Compare commits

..

3 Commits

Author SHA1 Message Date
Prashant Kumar
d8c9225af8 Add unet_torch reference. (#283)
* Add unet_torch reference.

* Delete distilbert-base-uncased_torch_test.py
2022-08-19 13:35:10 +05:30
Prashant Kumar
62f3573d43 Add distilbert_torch reference. 2022-08-19 13:34:26 +05:30
Stanley Winata
b73f79be66 Add Substantial testing for IntelGPU 2022-08-18 21:21:43 -07:00
239 changed files with 22782 additions and 141 deletions

View File

@@ -1,37 +0,0 @@
# See: https://github.com/llvm/torch-mlir/issues/1374
name: Publish releases page
on:
workflow_dispatch:
jobs:
scrape_and_publish_releases:
name: "Scrape and publish releases"
runs-on: ubuntu-latest
# Don't run this in everyone's forks.
if: github.repository == 'nod-ai/SHARK'
steps:
- name: Checking out repository
uses: actions/checkout@v2
with:
token: ${{ secrets.NODAI_INVOCATION_TOKEN }}
- name: Run scrape releases script
run: python ./build_tools/scrape_releases.py nod-ai SHARK > /tmp/index.html
shell: bash
- run: git fetch --all
- run: git switch github-pages
- run: git config --global user.email "none@none.com"
- run: git config --global user.name "nod-team"
- run: mv /tmp/index.html package-index/index.html
- run: git add package-index/index.html
# Only try to make a commit if the file has changed.
- run: git diff --cached --exit-code || git commit -m "Update releases."
- name: GitHub Push
uses: ad-m/github-push-action@v0.6.0
with:
github_token: ${{ secrets.NODAI_INVOCATION_TOKEN }}
branch: github-pages

View File

@@ -16,7 +16,6 @@ jobs:
fail-fast: false
matrix:
python-version: ["3.10"]
backend: [IREE, SHARK]
steps:
- uses: actions/checkout@v3
@@ -39,10 +38,6 @@ jobs:
tag_name="${package_version}"
echo "package_version=${package_version}" >> $GITHUB_ENV
echo "tag_name=${tag_name}" >> $GITHUB_ENV
- name: Set Environment Variables
run: |
echo "SHORT_SHA=`git rev-parse --short=4 HEAD`" >> $GITHUB_ENV
echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Create Release
id: create_release
uses: actions/create-release@v1
@@ -54,18 +49,12 @@ jobs:
body: |
Automatic snapshot release of nod.ai SHARK.
draft: true
prerelease: false
- name: Find Torch-MLIR Release
run: |
TM_HTML_URL="$(python3 -c "import urllib.request, json, sys; u=json.loads(urllib.request.urlopen('https://api.github.com/repos/llvm/torch-mlir/releases/latest').read().decode()).get('html_url', False); print(u) if u else sys.exit(1);")"
TM_RELEASE_DIR=${TM_HTML_URL/"tag"/"expanded_assets"}
echo "TM_RELEASE_DIR=${TM_RELEASE_DIR}" >> $GITHUB_ENV
prerelease: false
- name: Install dependencies
run: |
echo "Torch-MLIR Release DIR is ${{ env.TM_RELEASE_DIR }}"
python -m pip install --upgrade pip
python -m pip install flake8 pytest toml
if [ -f requirements.txt ]; then pip install -r requirements.txt -f ${{ env.TM_RELEASE_DIR }} -f https://github.com/nod-ai/SHARK-Runtime/releases; fi
if [ -f requirements.txt ]; then pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/SHARK-Runtime/releases; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
@@ -73,19 +62,46 @@ jobs:
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude shark.venv,lit.cfg.py
- name: Build and validate the IREE package
if: ${{ matrix.backend == 'IREE' }}
run: |
cd $GITHUB_WORKSPACE
USE_IREE=1 VENV_DIR=iree.venv ./setup_venv.sh
source iree.venv/bin/activate
package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
SHARK_PACKAGE_VERSION=${package_version} \
pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f ${{ env.TM_RELEASE_DIR }} -f https://github.com/iree-org/iree/releases
pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://github.com/llvm/torch-mlir/releases -f https://github.com/iree-org/iree/releases
# Install the built wheel
pip install ./wheelhouse/nodai*
# Validate the Models
/bin/bash "$GITHUB_WORKSPACE/build_tools/populate_sharktank_ci.sh"
pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="./gen_shark_tank/" tank/test_models.py |
pytest -k 'cpu' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
tail -n 1 |
tee -a pytest_results.txt
pytest -k 'gpu' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
tail -n 1 |
tee -a pytest_results.txt
pytest -k 'vulkan' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
tail -n 1 |
tee -a pytest_results.txt
rm -rf ./wheelhouse/nodai*
- name: Build and validate the SHARK Runtime package
run: |
cd $GITHUB_WORKSPACE
./setup_venv.sh
source shark.venv/bin/activate
package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
SHARK_PACKAGE_VERSION=${package_version} \
pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/SHARK-Runtime/releases
# Install the built wheel
pip install ./wheelhouse/nodai*
# Validate the Models
pytest -k 'cpu' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
tail -n 1 |
tee -a pytest_results.txt
pytest -k 'gpu' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
tail -n 1 |
tee -a pytest_results.txt
pytest -k 'vulkan' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/ |
tail -n 1 |
tee -a pytest_results.txt
if !(grep -Fxq " failed" pytest_results.txt)
@@ -94,36 +110,20 @@ jobs:
gsutil -m cp -r $GITHUB_WORKSPACE/gen_shark_tank/* gs://shark_tank/$SHA
gsutil -m cp -r gs://shark_tank/$SHA/* gs://shark_tank/latest/
fi
rm pytest_results.txt
rm -rf ./wheelhouse/nodai*
- name: Build and validate the SHARK Runtime package
if: ${{ matrix.backend == 'SHARK' }}
run: |
cd $GITHUB_WORKSPACE
./setup_venv.sh
source shark.venv/bin/activate
package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
SHARK_PACKAGE_VERSION=${package_version} \
pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f ${{ env.TM_RELEASE_DIR }} -f https://github.com/nod-ai/SHARK-Runtime/releases
# Install the built wheel
pip install ./wheelhouse/nodai*
# Validate the Models
pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="./gen_shark_tank/" tank/test_models.py |
tail -n 1 |
tee -a pytest_results.txt
- name: Upload Release Assets
if: ${{ matrix.backend == 'SHARK' }}
id: upload-release-assets
uses: dwenegar/upload-release-assets@v1
env:
GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
with:
release_id: ${{ steps.create_release.outputs.id }}
assets_path: ${GITHUB_WORKSPACE}/wheelhouse/nodai_*.whl
assets_path: ./wheelhouse/nodai_*.whl
- name: Publish Release
if: ${{ matrix.backend == 'SHARK' }}
id: publish_release
uses: eregon/publish-release@v1
env:

View File

@@ -15,8 +15,8 @@ jobs:
strategy:
fail-fast: true
matrix:
os: [icelake, a100, MacStudio, ubuntu-latest]
suite: [cpu,cuda,vulkan]
os: [a100, MacStudio, ubuntu-latest]
suite: [cpu,gpu,vulkan]
python-version: ["3.10"]
include:
- os: ubuntu-latest
@@ -25,21 +25,15 @@ jobs:
- os: ubuntu-latest
suite: vulkan
- os: ubuntu-latest
suite: cuda
suite: gpu
- os: ubuntu-latest
suite: cpu
- os: MacStudio
suite: cuda
suite: gpu
- os: MacStudio
suite: cpu
- os: MacStudio
suite: vulkan
- os: icelake
suite: vulkan
- os: icelake
suite: cuda
- os: a100
suite: cpu
runs-on: ${{ matrix.os }}
@@ -52,13 +46,13 @@ jobs:
echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Set up Python Version File ${{ matrix.python-version }}
if: matrix.os == 'a100' || matrix.os == 'ubuntu-latest' || matrix.os == 'icelake'
if: matrix.os == 'a100' || matrix.os == 'ubuntu-latest'
run: |
# See https://github.com/actions/setup-python/issues/433
echo ${{ matrix.python-version }} >> $GITHUB_WORKSPACE/.python-version
- name: Set up Python ${{ matrix.python-version }}
if: matrix.os == 'a100' || matrix.os == 'ubuntu-latest' || matrix.os == 'icelake'
if: matrix.os == 'a100' || matrix.os == 'ubuntu-latest'
uses: actions/setup-python@v4
with:
python-version: '${{ matrix.python-version }}'
@@ -84,30 +78,27 @@ jobs:
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude lit.cfg.py
- name: Validate Models on CPU
- name: Validate CPU Models
if: matrix.suite == 'cpu'
run: |
cd $GITHUB_WORKSPACE
PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
source shark.venv/bin/activate
pytest --benchmark --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k cpu
gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv
gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cpu_latest.csv
pytest -k 'cpu' --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
- name: Validate Models on NVIDIA GPU
if: matrix.suite == 'cuda'
- name: Validate GPU Models
if: matrix.suite == 'gpu'
run: |
cd $GITHUB_WORKSPACE
PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
source shark.venv/bin/activate
pytest --benchmark --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k cuda
gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv
gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cuda_latest.csv
pytest --benchmark -k "gpu" --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_gpu_${SHORT_SHA}.csv
- name: Validate Vulkan Models
if: matrix.suite == 'vulkan'
run: |
cd $GITHUB_WORKSPACE
PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
source shark.venv/bin/activate
pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k vulkan
pytest -k 'vulkan' --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py

4
.gitmodules vendored Normal file
View File

@@ -0,0 +1,4 @@
[submodule "inference/thirdparty/shark-runtime"]
path = inference/thirdparty/shark-runtime
url =https://github.com/nod-ai/SHARK-Runtime.git
branch = shark-06032022

3
.style.yapf Normal file
View File

@@ -0,0 +1,3 @@
[style]
based_on_style = google
column_limit = 80

392
README.md Normal file
View File

@@ -0,0 +1,392 @@
# SHARK
High Performance Machine Learning and Data Analytics for CPUs, GPUs, Accelerators and Heterogeneous Clusters
[![Nightly Release](https://github.com/nod-ai/SHARK/actions/workflows/nightly.yml/badge.svg)](https://github.com/nod-ai/SHARK/actions/workflows/nightly.yml)
[![Validate torch-models on Shark Runtime](https://github.com/nod-ai/SHARK/actions/workflows/test-models.yml/badge.svg)](https://github.com/nod-ai/SHARK/actions/workflows/test-models.yml)
## Communication Channels
* [SHARK Discord server](https://discord.gg/RUqY2h2s9u): Real time discussions with the SHARK team and other users
* [GitHub issues](https://github.com/nod-ai/SHARK/issues): Feature requests, bugs etc
## Installation
<details>
<summary>Installation (Linux and macOS)</summary>
### Setup a new pip Virtual Environment
This step sets up a new VirtualEnv for Python
```shell
python --version #Check you have 3.7->3.10 on Linux or 3.10 on macOS
python -m venv shark_venv
source shark_venv/bin/activate
# If you are using conda create and activate a new conda env
# Some older pip installs may not be able to handle the recent PyTorch deps
python -m pip install --upgrade pip
```
*macOS Metal* users please install https://sdk.lunarg.com/sdk/download/latest/mac/vulkan-sdk.dmg and enable "System wide install"
### Install SHARK
This step pip installs SHARK and related packages on Linux Python 3.7, 3.8, 3.9, 3.10 and macOS Python 3.10
```shell
pip install nodai-shark -f https://github.com/nod-ai/SHARK/releases -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/shark-runtime/releases --extra-index-url https://download.pytorch.org/whl/nightly/cpu
```
If you are on an Intel macOS machine you need this [workaround](https://github.com/nod-ai/SHARK/issues/102) for an upstream issue.
### Download and run Resnet50 sample
```shell
curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/resnet50_script.py
#Install deps for test script
pip install --pre torch torchvision torchaudio tqdm pillow --extra-index-url https://download.pytorch.org/whl/nightly/cpu
python ./resnet50_script.py --device="cpu" #use cuda or vulkan or metal
```
### Download and run BERT (MiniLM) sample
```shell
curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/minilm_jit.py
#Install deps for test script
pip install transformers torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu
python ./minilm_jit.py --device="cpu" #use cuda or vulkan or metal
```
</details>
<details>
<summary>Source Installation</summary>
## Check out the code
```shell
git clone https://github.com/nod-ai/SHARK.git
```
## Setup your Python VirtualEnvironment and Dependencies
```shell
# Setup venv and install necessary packages (torch-mlir, nodLabs/Shark, ...).
./setup_venv.sh
source shark.venv/bin/activate
```
For example if you want to use Python3.10 and upstream IREE with TF Import tools you can use the environment variables like:
```
# PYTHON=python3.10 VENV_DIR=0617_venv IMPORTER=1 USE_IREE=1 ./setup_venv.sh
```
If you are a Torch-mlir developer or an IREE developer and want to test local changes you can uninstall
the provided packages with `pip uninstall torch-mlir` and / or `pip uninstall iree-compiler iree-runtime` and build locally
with Python bindings and set your PYTHONPATH as mentioned [here](https://google.github.io/iree/bindings/python/)
for IREE and [here](https://github.com/llvm/torch-mlir/blob/main/development.md#setup-python-environment-to-export-the-built-python-packages)
for Torch-MLIR.
### Run a demo script
```shell
python -m shark.examples.shark_inference.resnet50_script --device="cpu" # Use gpu | vulkan
# Or a pytest
pytest tank/tf/hf_masked_lm/albert-base-v2_test.py::AlbertBaseModuleTest::test_module_static_cpu
```
</details>
<details>
<summary>Testing</summary>
### Run all model tests on CPU/GPU/VULKAN/Metal
```shell
pytest tank
# If on Linux for multithreading on CPU (faster results):
pytest tank -n auto
```
### Running specific tests
```shell
# Run tests for a specific model:
pytest tank/<MODEL_NAME> #i.e., pytest tank/bert-base-uncased
# Run tests for a specific case:
pytest tank/<MODEL_NAME> -k "keyword"
# i.e., pytest tank/bert-base-uncased/bert-base-uncased_test.py -k "static_gpu"
```
### Run benchmarks on SHARK tank pytests and generate bench_results.csv with results.
(requires source installation with `IMPORTER=1 ./setup_venv.sh`)
```shell
pytest --benchmark tank
# Just do static GPU benchmarks for PyTorch tests:
pytest --benchmark tank --ignore-glob="_tf*" -k "static_gpu"
```
### Benchmark Resnet50, MiniLM on CPU
(requires source installation with `IMPORTER=1 ./setup_venv.sh`)
```shell
# We suggest running the following commands as root before running benchmarks on CPU:
cat /sys/devices/system/cpu/cpu*/topology/thread_siblings_list | awk -F, '{print $2}' | sort -n | uniq | ( while read X ; do echo $X ; echo 0 > /sys/devices/system/cpu/cpu$X/online ; done )
echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
# Benchmark canonical Resnet50 on CPU via pytest
pytest --benchmark tank/resnet50/ -k "cpu"
# Benchmark canonical MiniLM on CPU via pytest
pytest --benchmark tank/MiniLM-L12-H384-uncased/ -k "cpu"
# Benchmark MiniLM on CPU via transformer-benchmarks:
git clone --recursive https://github.com/nod-ai/transformer-benchmarks.git
cd transformer-benchmarks
./perf-ci.sh -n
# Check detail.csv for MLIR/IREE results.
```
</details>
<details>
<summary>API Reference</summary>
### Shark Inference API
```
from shark.shark_importer import SharkImporter
# SharkImporter imports mlir file from the torch, tensorflow or tf-lite module.
mlir_importer = SharkImporter(
torch_module,
(input),
frontend="torch", #tf, #tf-lite
)
torch_mlir, func_name = mlir_importer.import_mlir(tracing_required=True)
# SharkInference accepts mlir in linalg, mhlo, and tosa dialect.
from shark.shark_inference import SharkInference
shark_module = SharkInference(torch_mlir, func_name, device="cpu", mlir_dialect="linalg")
shark_module.compile()
result = shark_module.forward((input))
```
### Example demonstrating running MHLO IR.
```
from shark.shark_inference import SharkInference
import numpy as np
mhlo_ir = r"""builtin.module {
func.func @forward(%arg0: tensor<1x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4x4xf32> {
%0 = chlo.broadcast_add %arg0, %arg1 : (tensor<1x4xf32>, tensor<4x1xf32>) -> tensor<4x4xf32>
%1 = "mhlo.abs"(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
return %1 : tensor<4x4xf32>
}
}"""
arg0 = np.ones((1, 4)).astype(np.float32)
arg1 = np.ones((4, 1)).astype(np.float32)
shark_module = SharkInference(mhlo_ir, func_name="forward", device="cpu", mlir_dialect="mhlo")
shark_module.compile()
result = shark_module.forward((arg0, arg1))
```
</details>
## Supported and Validated Models
<details>
<summary>PyTorch Models</summary>
### Huggingface PyTorch Models
| Hugging Face Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|---------------------|----------------------|----------|----------|-------------|
| BERT | :green_heart: (JIT) | :green_heart: | :green_heart: | :green_heart: |
| Albert | :green_heart: (JIT) | :green_heart: | :green_heart: | :green_heart: |
| BigBird | :green_heart: (AOT) | | | |
| DistilBERT | :green_heart: (JIT) | :green_heart: | :green_heart: | :green_heart: |
| GPT2 | :broken_heart: (AOT) | | | |
| MobileBert | :green_heart: (JIT) | :green_heart: | :green_heart: | :green_heart: |
### Torchvision Models
| TORCHVISION Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|--------------------|----------------------|----------|----------|-------------|
| AlexNet | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
| DenseNet121 | :green_heart: (Script) | | | |
| MNasNet1_0 | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
| MobileNetV2 | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
| MobileNetV3 | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
| Unet | :broken_heart: (Script) | | | |
| Resnet18 | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
| Resnet50 | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
| Resnet101 | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
| Resnext50_32x4d | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
| ShuffleNet_v2 | :broken_heart: (Script) | | | |
| SqueezeNet | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
| EfficientNet | :green_heart: (Script) | | | |
| Regnet | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
| Resnest | :broken_heart: (Script) | | | |
| Vision Transformer | :green_heart: (Script) | | | |
| VGG 16 | :green_heart: (Script) | :green_heart: | :green_heart: | |
| Wide Resnet | :green_heart: (Script) | :green_heart: | :green_heart: | :green_heart: |
| RAFT | :broken_heart: (JIT) | | | |
For more information refer to [MODEL TRACKING SHEET](https://docs.google.com/spreadsheets/d/15PcjKeHZIrB5LfDyuw7DGEEE8XnQEX2aX8lm8qbxV8A/edit#gid=0)
### PyTorch Training Models
| Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|---------------------|----------------------|----------|----------|-------------|
| BERT | :broken_heart: | :broken_heart: | | |
| FullyConnected | :green_heart: | :green_heart: | | |
</details>
<details>
<summary>JAX Models</summary>
### JAX Models
| Models | JAX-MHLO lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|---------------------|----------------------|----------|----------|-------------|
| DALL-E | :broken_heart: | :broken_heart: | | |
| FullyConnected | :green_heart: | :green_heart: | | |
</details>
<details>
<summary>TFLite Models</summary>
### TFLite Models
| Models | TOSA/LinAlg | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|---------------------|----------------------|----------|----------|-------------|
| BERT | :broken_heart: | :broken_heart: | | |
| FullyConnected | :green_heart: | :green_heart: | | |
| albert | :green_heart: | :green_heart: | | |
| asr_conformer | :green_heart: | :green_heart: | | |
| bird_classifier | :green_heart: | :green_heart: | | |
| cartoon_gan | :green_heart: | :green_heart: | | |
| craft_text | :green_heart: | :green_heart: | | |
| deeplab_v3 | :green_heart: | :green_heart: | | |
| densenet | :green_heart: | :green_heart: | | |
| east_text_detector | :green_heart: | :green_heart: | | |
| efficientnet_lite0_int8 | :green_heart: | :green_heart: | | |
| efficientnet | :green_heart: | :green_heart: | | |
| gpt2 | :green_heart: | :green_heart: | | |
| image_stylization | :green_heart: | :green_heart: | | |
| inception_v4 | :green_heart: | :green_heart: | | |
| inception_v4_uint8 | :green_heart: | :green_heart: | | |
| lightning_fp16 | :green_heart: | :green_heart: | | |
| lightning_i8 | :green_heart: | :green_heart: | | |
| lightning | :green_heart: | :green_heart: | | |
| magenta | :green_heart: | :green_heart: | | |
| midas | :green_heart: | :green_heart: | | |
| mirnet | :green_heart: | :green_heart: | | |
| mnasnet | :green_heart: | :green_heart: | | |
| mobilebert_edgetpu_s_float | :green_heart: | :green_heart: | | |
| mobilebert_edgetpu_s_quant | :green_heart: | :green_heart: | | |
| mobilebert | :green_heart: | :green_heart: | | |
| mobilebert_tf2_float | :green_heart: | :green_heart: | | |
| mobilebert_tf2_quant | :green_heart: | :green_heart: | | |
| mobilenet_ssd_quant | :green_heart: | :green_heart: | | |
| mobilenet_v1 | :green_heart: | :green_heart: | | |
| mobilenet_v1_uint8 | :green_heart: | :green_heart: | | |
| mobilenet_v2_int8 | :green_heart: | :green_heart: | | |
| mobilenet_v2 | :green_heart: | :green_heart: | | |
| mobilenet_v2_uint8 | :green_heart: | :green_heart: | | |
| mobilenet_v3-large | :green_heart: | :green_heart: | | |
| mobilenet_v3-large_uint8 | :green_heart: | :green_heart: | | |
| mobilenet_v35-int8 | :green_heart: | :green_heart: | | |
| nasnet | :green_heart: | :green_heart: | | |
| person_detect | :green_heart: | :green_heart: | | |
| posenet | :green_heart: | :green_heart: | | |
| resnet_50_int8 | :green_heart: | :green_heart: | | |
| rosetta | :green_heart: | :green_heart: | | |
| spice | :green_heart: | :green_heart: | | |
| squeezenet | :green_heart: | :green_heart: | | |
| ssd_mobilenet_v1 | :green_heart: | :green_heart: | | |
| ssd_mobilenet_v1_uint8 | :green_heart: | :green_heart: | | |
| ssd_mobilenet_v2_fpnlite | :green_heart: | :green_heart: | | |
| ssd_mobilenet_v2_fpnlite_uint8 | :green_heart: | :green_heart: | | |
| ssd_mobilenet_v2_int8 | :green_heart: | :green_heart: | | |
| ssd_mobilenet_v2 | :green_heart: | :green_heart: | | |
| ssd_spaghettinet_large | :green_heart: | :green_heart: | | |
| ssd_spaghettinet_large_uint8 | :green_heart: | :green_heart: | | |
| visual_wake_words_i8 | :green_heart: | :green_heart: | | |
</details>
<details>
<summary>TF Models</summary>
### Tensorflow Models (Inference)
| Hugging Face Models | tf-mhlo lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|---------------------|----------------------|----------|----------|-------------|
| BERT | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
| albert-base-v2 | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
| DistilBERT | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
| CamemBert | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
| ConvBert | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
| Deberta | | | | |
| electra | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
| funnel | | | | |
| layoutlm | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
| longformer | | | | |
| mobile-bert | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
| remembert | | | | |
| tapas | | | | |
| flaubert | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
| roberta | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
| xlm-roberta | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
| mpnet | :green_heart: | :green_heart: | :green_heart: | :green_heart: |
</details>
## Related Projects
<details>
<summary>IREE Project Channels</summary>
* [Upstream IREE issues](https://github.com/google/iree/issues): Feature requests,
bugs, and other work tracking
* [Upstream IREE Discord server](https://discord.gg/26P4xW4): Daily development
discussions with the core team and collaborators
* [iree-discuss email list](https://groups.google.com/forum/#!forum/iree-discuss):
Announcements, general and low-priority discussion
</details>
<details>
<summary>MLIR and Torch-MLIR Project Channels</summary>
* `#torch-mlir` channel on the LLVM [Discord](https://discord.gg/xS7Z362) - this is the most active communication channel
* Torch-MLIR Github issues [here](https://github.com/llvm/torch-mlir/issues)
* [`torch-mlir` section](https://llvm.discourse.group/c/projects-that-want-to-become-official-llvm-projects/torch-mlir/41) of LLVM Discourse
* Weekly meetings on Mondays 9AM PST. See [here](https://discourse.llvm.org/t/community-meeting-developer-hour-refactoring-recurring-meetings/62575) for more information.
* [MLIR topic within LLVM Discourse](https://llvm.discourse.group/c/llvm-project/mlir/31) SHARK and IREE is enabled by and heavily relies on [MLIR](https://mlir.llvm.org).
</details>
## License
nod.ai SHARK is licensed under the terms of the Apache 2.0 License with LLVM Exceptions.
See [LICENSE](LICENSE) for more information.

0
benchmarks/__init__.py Normal file
View File

View File

@@ -0,0 +1,22 @@
import torch
from shark.parser import parser
from benchmarks.hf_transformer import SharkHFBenchmarkRunner
parser.add_argument(
"--model_name",
type=str,
required=True,
help='Specifies name of HF model to benchmark. (For exmaple "microsoft/MiniLM-L12-H384-uncased"',
)
load_args, unknown = parser.parse_known_args()
if __name__ == "__main__":
model_name = load_args.model_name
test_input = torch.randint(2, (1, 128))
shark_module = SharkHFBenchmarkRunner(
model_name, (test_input,), jit_trace=True
)
shark_module.benchmark_c()
shark_module.benchmark_python((test_input,))
shark_module.benchmark_torch(test_input)
shark_module.benchmark_onnx(test_input)

View File

@@ -0,0 +1,181 @@
import torch
from shark.shark_benchmark_runner import SharkBenchmarkRunner
from shark.parser import shark_args
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from onnxruntime.transformers.benchmark import (
run_pytorch,
run_tensorflow,
run_onnxruntime,
)
from onnxruntime.transformers.huggingface_models import MODELS
from onnxruntime.transformers.benchmark_helper import ConfigModifier, Precision
import os
import psutil
class OnnxFusionOptions(object):
def __init__(self):
self.disable_gelu = False
self.disable_layer_norm = False
self.disable_attention = False
self.disable_skip_layer_norm = False
self.disable_embed_layer_norm = False
self.disable_bias_skip_layer_norm = False
self.disable_bias_gelu = False
self.enable_gelu_approximation = False
self.use_mask_index = False
self.no_attention_mask = False
class HuggingFaceLanguage(torch.nn.Module):
def __init__(self, hf_model_name):
super().__init__()
self.model = AutoModelForSequenceClassification.from_pretrained(
hf_model_name, # The pretrained model.
num_labels=2, # The number of output labels--2 for binary classification.
output_attentions=False, # Whether the model returns attentions weights.
output_hidden_states=False, # Whether the model returns all hidden-states.
torchscript=True,
)
def forward(self, tokens):
return self.model.forward(tokens)[0]
class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
# SharkRunner derived class with Benchmarking capabilities.
def __init__(
self,
model_name: str,
input: tuple,
dynamic: bool = False,
device: str = None,
jit_trace: bool = False,
from_aot: bool = False,
frontend: str = "torch",
):
self.device = device if device is not None else shark_args.device
if self.device == "gpu":
raise ValueError(
"Currently GPU Benchmarking is not supported due to OOM from ORT."
)
self.model_name = model_name
model = HuggingFaceLanguage(model_name)
SharkBenchmarkRunner.__init__(
self,
model,
input,
dynamic,
self.device,
jit_trace,
from_aot,
frontend,
)
def benchmark_torch(self, inputs):
use_gpu = self.device == "gpu"
# Set set the model's layer number to automatic.
config_modifier = ConfigModifier(None)
num_threads = psutil.cpu_count(logical=False)
batch_sizes = [inputs.shape[0]]
sequence_lengths = [inputs.shape[-1]]
cache_dir = os.path.join(".", "cache_models")
verbose = False
result = run_pytorch(
use_gpu,
[self.model_name],
None,
config_modifier,
Precision.FLOAT32,
num_threads,
batch_sizes,
sequence_lengths,
shark_args.num_iterations,
False,
cache_dir,
verbose,
)
print(
f"ONNX Pytorch-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
)
# TODO: Currently non-functional due to TF runtime error. There might be some issue with, initializing TF.
def benchmark_tf(self, inputs):
use_gpu = self.device == "gpu"
# Set set the model's layer number to automatic.
config_modifier = ConfigModifier(None)
num_threads = psutil.cpu_count(logical=False)
batch_sizes = [inputs.shape[0]]
sequence_lengths = [inputs.shape[-1]]
cache_dir = os.path.join(".", "cache_models")
verbose = False
result = run_tensorflow(
use_gpu,
[self.model_name],
None,
config_modifier,
Precision.FLOAT32,
num_threads,
batch_sizes,
sequence_lengths,
shark_args.num_iterations,
cache_dir,
verbose,
)
print(
f"ONNX TF-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
)
def benchmark_onnx(self, inputs):
if self.model_name not in MODELS:
print(
f"{self.model_name} is currently not supported in ORT's HF. Check \
https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/huggingface_models.py \
for currently supported models. Exiting benchmark ONNX."
)
return
use_gpu = self.device == "gpu"
num_threads = psutil.cpu_count(logical=False)
batch_sizes = [inputs.shape[0]]
sequence_lengths = [inputs.shape[-1]]
cache_dir = os.path.join(".", "cache_models")
onnx_dir = os.path.join(".", "onnx_models")
verbose = False
input_counts = [1]
optimize_onnx = True
validate_onnx = False
disable_ort_io_binding = False
use_raw_attention_mask = True
model_fusion_statistics = {}
overwrite = False
model_source = "pt" # Either "pt" or "tf"
provider = None
config_modifier = ConfigModifier(None)
onnx_args = OnnxFusionOptions()
result = run_onnxruntime(
use_gpu,
provider,
[self.model_name],
None,
config_modifier,
Precision.FLOAT32,
num_threads,
batch_sizes,
sequence_lengths,
shark_args.num_iterations,
input_counts,
optimize_onnx,
validate_onnx,
cache_dir,
onnx_dir,
verbose,
overwrite,
disable_ort_io_binding,
use_raw_attention_mask,
model_fusion_statistics,
model_source,
onnx_args,
)
print(
f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
)

View File

@@ -0,0 +1,231 @@
from shark.shark_inference import SharkInference
from shark.iree_utils._common import check_device_drivers
import torch
import tensorflow as tf
import numpy as np
import torchvision.models as models
from transformers import (
AutoModelForSequenceClassification,
BertTokenizer,
TFBertModel,
)
import importlib
import pytest
import unittest
torch.manual_seed(0)
gpus = tf.config.experimental.list_physical_devices("GPU")
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
##################### Tensorflow Hugging Face LM Models ###################################
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
tf_bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
]
class TFHuggingFaceLanguage(tf.Module):
def __init__(self, hf_model_name):
super(TFHuggingFaceLanguage, self).__init__()
# Create a BERT trainer with the created network.
self.m = TFBertModel.from_pretrained(hf_model_name, from_pt=True)
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m.predict = lambda x, y, z: self.m.call(
input_ids=x, attention_mask=y, token_type_ids=z, training=False
)
@tf.function(input_signature=tf_bert_input)
def forward(self, input_ids, attention_mask, token_type_ids):
return self.m.predict(input_ids, attention_mask, token_type_ids)
def get_TFhf_model(name):
model = TFHuggingFaceLanguage(name)
tokenizer = BertTokenizer.from_pretrained(name)
text = "Replace me by any text you'd like."
encoded_input = tokenizer(
text,
padding="max_length",
truncation=True,
max_length=MAX_SEQUENCE_LENGTH,
)
for key in encoded_input:
encoded_input[key] = tf.expand_dims(
tf.convert_to_tensor(encoded_input[key]), 0
)
test_input = (
encoded_input["input_ids"],
encoded_input["attention_mask"],
encoded_input["token_type_ids"],
)
actual_out = model.forward(*test_input)
return model, test_input, actual_out
##################### Hugging Face LM Models ###################################
class HuggingFaceLanguage(torch.nn.Module):
def __init__(self, hf_model_name):
super().__init__()
self.model = AutoModelForSequenceClassification.from_pretrained(
hf_model_name, # The pretrained model.
num_labels=2, # The number of output labels--2 for binary classification.
output_attentions=False, # Whether the model returns attentions weights.
output_hidden_states=False, # Whether the model returns all hidden-states.
torchscript=True,
)
def forward(self, tokens):
return self.model.forward(tokens)[0]
def get_hf_model(name):
model = HuggingFaceLanguage(name)
# TODO: Currently the test input is set to (1,128)
test_input = torch.randint(2, (1, 128))
actual_out = model(test_input)
return model, test_input, actual_out
################################################################################
##################### Torch Vision Models ###################################
class VisionModule(torch.nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
self.train(False)
def forward(self, input):
return self.model.forward(input)
def get_vision_model(torch_model):
model = VisionModule(torch_model)
# TODO: Currently the test input is set to (1,128)
test_input = torch.randn(1, 3, 224, 224)
actual_out = model(test_input)
return model, test_input, actual_out
############################# Benchmark Tests ####################################
pytest_benchmark_param = pytest.mark.parametrize(
("dynamic", "device"),
[
pytest.param(False, "cpu"),
# TODO: Language models are failing for dynamic case..
pytest.param(True, "cpu", marks=pytest.mark.skip),
pytest.param(
False,
"gpu",
marks=pytest.mark.skipif(
check_device_drivers("gpu"), reason="nvidia-smi not found"
),
),
pytest.param(True, "gpu", marks=pytest.mark.skip),
pytest.param(
False,
"vulkan",
marks=pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases",
),
),
pytest.param(
True,
"vulkan",
marks=pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases",
),
),
],
)
@pytest.mark.skipif(
importlib.util.find_spec("iree.tools") is None,
reason="Cannot find tools to import TF",
)
@pytest_benchmark_param
def test_bench_minilm_torch(dynamic, device):
model, test_input, act_out = get_hf_model(
"microsoft/MiniLM-L12-H384-uncased"
)
shark_module = SharkInference(
model,
(test_input,),
device=device,
dynamic=dynamic,
jit_trace=True,
benchmark_mode=True,
)
try:
# If becnhmarking succesful, assert success/True.
shark_module.compile()
shark_module.benchmark_all((test_input,))
assert True
except Exception as e:
# If anything happen during benchmarking, assert False/failure.
assert False
@pytest.mark.skipif(
importlib.util.find_spec("iree.tools") is None,
reason="Cannot find tools to import TF",
)
@pytest_benchmark_param
def test_bench_distilbert(dynamic, device):
model, test_input, act_out = get_TFhf_model("distilbert-base-uncased")
shark_module = SharkInference(
model,
test_input,
device=device,
dynamic=dynamic,
jit_trace=True,
benchmark_mode=True,
)
try:
# If becnhmarking succesful, assert success/True.
shark_module.set_frontend("tensorflow")
shark_module.compile()
shark_module.benchmark_all(test_input)
assert True
except Exception as e:
# If anything happen during benchmarking, assert False/failure.
assert False
@pytest.mark.skip(reason="XLM Roberta too large to test.")
@pytest_benchmark_param
def test_bench_xlm_roberta(dynamic, device):
model, test_input, act_out = get_TFhf_model("xlm-roberta-base")
shark_module = SharkInference(
model,
test_input,
device=device,
dynamic=dynamic,
jit_trace=True,
benchmark_mode=True,
)
try:
# If becnhmarking succesful, assert success/True.
shark_module.set_frontend("tensorflow")
shark_module.compile()
shark_module.benchmark_all(test_input)
assert True
except Exception as e:
# If anything happen during benchmarking, assert False/failure.
assert False

View File

@@ -0,0 +1,45 @@
import torch
from benchmarks.hf_transformer import SharkHFBenchmarkRunner
import importlib
import pytest
torch.manual_seed(0)
############################# HF Benchmark Tests ####################################
# Test running benchmark module without failing.
pytest_benchmark_param = pytest.mark.parametrize(
("dynamic", "device"),
[
pytest.param(False, "cpu"),
# TODO: Language models are failing for dynamic case..
pytest.param(True, "cpu", marks=pytest.mark.skip),
],
)
@pytest.mark.skipif(
importlib.util.find_spec("onnxruntime") is None,
reason="Cannot find ONNXRUNTIME.",
)
@pytest_benchmark_param
def test_HFbench_minilm_torch(dynamic, device):
model_name = "bert-base-uncased"
test_input = torch.randint(2, (1, 128))
try:
shark_module = SharkHFBenchmarkRunner(
model_name,
(test_input,),
jit_trace=True,
dynamic=dynamic,
device=device,
)
shark_module.benchmark_c()
shark_module.benchmark_python((test_input,))
shark_module.benchmark_torch(test_input)
shark_module.benchmark_onnx(test_input)
# If becnhmarking succesful, assert success/True.
assert True
except Exception as e:
# If anything happen during benchmarking, assert False/failure.
assert False

View File

@@ -0,0 +1,5 @@
#!/bin/bash
IMPORTER=1 ./setup_venv.sh
source $GITHUB_WORKSPACE/shark.venv/bin/activate
python generate_sharktank.py --upload=False

33
conftest.py Normal file
View File

@@ -0,0 +1,33 @@
def pytest_addoption(parser):
# Attaches SHARK command-line arguments to the pytest machinery.
parser.addoption(
"--benchmark",
action="store_true",
default="False",
help="Pass option to benchmark and write results.csv",
)
parser.addoption(
"--onnx_bench",
action="store_true",
default="False",
help="Add ONNX benchmark results to pytest benchmarks.",
)
# The following options are deprecated and pending removal.
parser.addoption(
"--save_mlir",
action="store_true",
default="False",
help="Pass option to save input MLIR",
)
parser.addoption(
"--save_vmfb",
action="store_true",
default="False",
help="Pass option to save IREE output .vmfb",
)
parser.addoption(
"--save_temps",
action="store_true",
default="False",
help="Saves IREE reproduction artifacts for filing upstream issues.",
)

235
generate_sharktank.py Normal file
View File

@@ -0,0 +1,235 @@
# Lint as: python3
"""SHARK Tank"""
# python generate_sharktank.py, you have to give a csv tile with [model_name, model_download_url]
# will generate local shark tank folder like this:
# /SHARK
# /gen_shark_tank
# /albert_lite_base
# /...model_name...
#
import os
import csv
import argparse
from shark.shark_importer import SharkImporter
import tensorflow as tf
import subprocess as sp
import hashlib
import numpy as np
visible_default = tf.config.list_physical_devices("GPU")
try:
tf.config.set_visible_devices([], "GPU")
visible_devices = tf.config.get_visible_devices()
for device in visible_devices:
assert device.device_type != "GPU"
except:
# Invalid device or cannot modify virtual devices once initialized.
pass
# All generated models and metadata will be saved under this directory.
WORKDIR = os.path.join(os.path.dirname(__file__), "gen_shark_tank")
def create_hash(file_name):
with open(file_name, "rb") as f:
file_hash = hashlib.blake2b()
while chunk := f.read(2**20):
file_hash.update(chunk)
return file_hash.hexdigest()
def save_torch_model(torch_model_list):
from tank.model_utils import get_hf_model
from tank.model_utils import get_vision_model
with open(torch_model_list) as csvfile:
torch_reader = csv.reader(csvfile, delimiter=",")
fields = next(torch_reader)
for row in torch_reader:
torch_model_name = row[0]
tracing_required = row[1]
model_type = row[2]
tracing_required = False if tracing_required == "False" else True
model = None
input = None
if model_type == "vision":
model, input, _ = get_vision_model(torch_model_name)
elif model_type == "hf":
model, input, _ = get_hf_model(torch_model_name)
torch_model_name = torch_model_name.replace("/", "_")
torch_model_dir = os.path.join(
WORKDIR, str(torch_model_name) + "_torch"
)
os.makedirs(torch_model_dir, exist_ok=True)
mlir_importer = SharkImporter(
model,
(input,),
frontend="torch",
)
mlir_importer.import_debug(
is_dynamic=False,
tracing_required=tracing_required,
dir=torch_model_dir,
model_name=torch_model_name,
)
mlir_hash = create_hash(
os.path.join(
torch_model_dir, torch_model_name + "_torch" + ".mlir"
)
)
np.save(os.path.join(torch_model_dir, "hash"), np.array(mlir_hash))
# Generate torch dynamic models.
mlir_importer.import_debug(
is_dynamic=True,
tracing_required=tracing_required,
dir=torch_model_dir,
model_name=torch_model_name + "_dynamic",
)
def save_tf_model(tf_model_list):
from tank.model_utils_tf import (
get_causal_image_model,
get_causal_lm_model,
get_keras_model,
get_TFhf_model,
)
with open(tf_model_list) as csvfile:
tf_reader = csv.reader(csvfile, delimiter=",")
fields = next(tf_reader)
for row in tf_reader:
tf_model_name = row[0]
model_type = row[1]
model = None
input = None
print(f"Generating artifacts for model {tf_model_name}")
if model_type == "hf":
model, input, _ = get_causal_lm_model(tf_model_name)
if model_type == "img":
model, input, _ = get_causal_image_model(tf_model_name)
if model_type == "keras":
model, input, _ = get_keras_model(tf_model_name)
if model_type == "TFhf":
model, input, _ = get_TFhf_model(tf_model_name)
tf_model_name = tf_model_name.replace("/", "_")
tf_model_dir = os.path.join(WORKDIR, str(tf_model_name) + "_tf")
os.makedirs(tf_model_dir, exist_ok=True)
mlir_importer = SharkImporter(
model,
input,
frontend="tf",
)
mlir_importer.import_debug(
dir=tf_model_dir,
model_name=tf_model_name,
)
mlir_hash = create_hash(
os.path.join(tf_model_dir, tf_model_name + "_tf" + ".mlir")
)
np.save(os.path.join(tf_model_dir, "hash"), np.array(mlir_hash))
def save_tflite_model(tflite_model_list):
from shark.tflite_utils import TFLitePreprocessor
with open(tflite_model_list) as csvfile:
tflite_reader = csv.reader(csvfile, delimiter=",")
for row in tflite_reader:
print("\n")
tflite_model_name = row[0]
tflite_model_link = row[1]
print("tflite_model_name", tflite_model_name)
print("tflite_model_link", tflite_model_link)
tflite_model_name_dir = os.path.join(
WORKDIR, str(tflite_model_name) + "_tflite"
)
os.makedirs(tflite_model_name_dir, exist_ok=True)
print(f"TMP_TFLITE_MODELNAME_DIR = {tflite_model_name_dir}")
# Preprocess to get SharkImporter input args
tflite_preprocessor = TFLitePreprocessor(str(tflite_model_name))
raw_model_file_path = tflite_preprocessor.get_raw_model_file()
inputs = tflite_preprocessor.get_inputs()
tflite_interpreter = tflite_preprocessor.get_interpreter()
# Use SharkImporter to get SharkInference input args
my_shark_importer = SharkImporter(
module=tflite_interpreter,
inputs=inputs,
frontend="tflite",
raw_model_file=raw_model_file_path,
)
my_shark_importer.import_debug(
dir=tflite_model_name_dir,
model_name=tflite_model_name,
func_name="main",
)
mlir_hash = create_hash(
os.path.join(
tflite_model_name_dir,
tflite_model_name + "_tflite" + ".mlir",
)
)
np.save(
os.path.join(tflite_model_name_dir, "hash"),
np.array(mlir_hash),
)
# Validates whether the file is present or not.
def is_valid_file(arg):
if not os.path.exists(arg):
return None
else:
return arg
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--torch_model_csv",
type=lambda x: is_valid_file(x),
default="./tank/pytorch/torch_model_list.csv",
help="""Contains the file with torch_model name and args.
Please see: https://github.com/nod-ai/SHARK/blob/main/tank/pytorch/torch_model_list.csv""",
)
parser.add_argument(
"--tf_model_csv",
type=lambda x: is_valid_file(x),
default="./tank/tf/tf_model_list.csv",
help="Contains the file with tf model name and args.",
)
parser.add_argument(
"--tflite_model_csv",
type=lambda x: is_valid_file(x),
default="./tank/tflite/tflite_model_list.csv",
help="Contains the file with tf model name and args.",
)
parser.add_argument("--upload", type=bool, default=False)
args = parser.parse_args()
if args.torch_model_csv:
save_torch_model(args.torch_model_csv)
if args.tf_model_csv:
save_tf_model(args.tf_model_csv)
if args.tflite_model_csv:
save_tflite_model(args.tflite_model_csv)
if args.upload:
git_hash = sp.getoutput("git log -1 --format='%h'") + "/"
print("uploading files to gs://shark_tank/" + git_hash)
os.system(
"gsutil cp -r ./gen_shark_tank/* gs://shark_tank/" + git_hash
)

192
inference/CMakeLists.txt Normal file
View File

@@ -0,0 +1,192 @@
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.17)
project(sharkbackend LANGUAGES C CXX)
#
# Options
#
option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
#
# Dependencies
#
# FetchContent requires us to include the transitive closure of all
# repos that we depend on so that we can override the tags.
#
include(FetchContent)
FetchContent_Declare(
repo-common
GIT_REPOSITORY https://github.com/triton-inference-server/common.git
GIT_TAG ${TRITON_COMMON_REPO_TAG}
GIT_SHALLOW ON
)
FetchContent_Declare(
repo-core
GIT_REPOSITORY https://github.com/triton-inference-server/core.git
GIT_TAG ${TRITON_CORE_REPO_TAG}
GIT_SHALLOW ON
)
FetchContent_Declare(
repo-backend
GIT_REPOSITORY https://github.com/triton-inference-server/backend.git
GIT_TAG ${TRITON_BACKEND_REPO_TAG}
GIT_SHALLOW ON
)
FetchContent_MakeAvailable(repo-common repo-core repo-backend)
#
# The backend must be built into a shared library. Use an ldscript to
# hide all symbols except for the TRITONBACKEND API.
#
configure_file(src/libtriton_dshark.ldscript libtriton_dshark.ldscript COPYONLY)
add_library(
triton-dshark-backend SHARED
src/dshark.cc
#src/dshark_driver_module.c
)
add_library(
SharkBackend::triton-dshark-backend ALIAS triton-dshark-backend
)
target_include_directories(
triton-dshark-backend
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src
)
list(APPEND CMAKE_MODULE_PATH "${PROJECT_BINARY_DIR}/lib/cmake/mlir")
add_subdirectory(thirdparty/shark-runtime EXCLUDE_FROM_ALL)
target_link_libraries(triton-dshark-backend PRIVATE iree_base_base
iree_hal_hal
iree_hal_cuda_cuda
iree_hal_cuda_registration_registration
iree_hal_vmvx_registration_registration
iree_hal_dylib_registration_registration
iree_modules_hal_hal
iree_vm_vm
iree_vm_bytecode_module
iree_hal_local_loaders_system_library_loader
iree_hal_local_loaders_vmvx_module_loader
)
target_compile_features(triton-dshark-backend PRIVATE cxx_std_11)
target_link_libraries(
triton-dshark-backend
PRIVATE
triton-core-serverapi # from repo-core
triton-core-backendapi # from repo-core
triton-core-serverstub # from repo-core
triton-backend-utils # from repo-backend
)
if(WIN32)
set_target_properties(
triton-dshark-backend PROPERTIES
POSITION_INDEPENDENT_CODE ON
OUTPUT_NAME triton_dshark
)
else()
set_target_properties(
triton-dshark-backend PROPERTIES
POSITION_INDEPENDENT_CODE ON
OUTPUT_NAME triton_dshark
LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_dshark.ldscript
LINK_FLAGS "-Wl,--version-script libtriton_dshark.ldscript"
)
endif()
#
# Install
#
include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/SharkBackend)
install(
TARGETS
triton-dshark-backend
EXPORT
triton-dshark-backend-targets
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/dshark
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/dshark
)
install(
EXPORT
triton-dshark-backend-targets
FILE
SharkBackendTargets.cmake
NAMESPACE
SharkBackend::
DESTINATION
${INSTALL_CONFIGDIR}
)
include(CMakePackageConfigHelpers)
configure_package_config_file(
${CMAKE_CURRENT_LIST_DIR}/cmake/SharkBackendConfig.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/SharkBackendConfig.cmake
INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
)
install(
FILES
${CMAKE_CURRENT_BINARY_DIR}/SharkBackendConfig.cmake
DESTINATION ${INSTALL_CONFIGDIR}
)
#
# Export from build tree
#
export(
EXPORT triton-dshark-backend-targets
FILE ${CMAKE_CURRENT_BINARY_DIR}/SharkBackendTargets.cmake
NAMESPACE SharkBackend::
)
export(PACKAGE SharkBackend)

100
inference/README.md Normal file
View File

@@ -0,0 +1,100 @@
# SHARK Triton Backend
The triton backend for shark.
# Build
Install SHARK
```
git clone https://github.com/nod-ai/SHARK.git
# skip above step if dshark is already installed
cd SHARK/inference
```
install dependancies
```
apt-get install patchelf rapidjson-dev python3-dev
git submodule update --init
```
update the submodules of iree
```
cd thirdparty/shark-runtime
git submodule update --init
```
Next, make the backend and install it
```
cd ../..
mkdir build && cd build
cmake -DTRITON_ENABLE_GPU=ON \
-DIREE_HAL_DRIVER_CUDA=ON \
-DIREE_TARGET_BACKEND_CUDA=ON \
-DMLIR_ENABLE_CUDA_RUNNER=ON \
-DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
-DTRITON_BACKEND_REPO_TAG=r22.02 \
-DTRITON_CORE_REPO_TAG=r22.02 \
-DTRITON_COMMON_REPO_TAG=r22.02 ..
make install
```
# Incorporating into Triton
There are much more in depth explenations for the following steps in triton's documentation:
https://github.com/triton-inference-server/server/blob/main/docs/compose.md#triton-with-unsupported-and-custom-backends
There should be a file at /build/install/backends/dshark/libtriton_dshark.so. You will need to copy it into your triton server image.
More documentation is in the link above, but to create the docker image, you need to run the compose.py command in the triton-backend server repo
To first build your image, clone the tritonserver repo.
```
git clone https://github.com/triton-inference-server/server.git
```
then run `compose.py` to build a docker compose file
```
cd server
python3 compose.py --repoagent checksum --dry-run
```
Because dshark is a third party backend, you will need to manually modify the `Dockerfile.compose` to include the dshark backend. To do this, in the Dockerfile.compose file produced, copy this line.
the dshark backend will be located in the build folder from earlier under `/build/install/backends`
```
COPY /path/to/build/install/backends/dshark /opt/tritonserver/backends/dshark
```
Next run
```
docker build -t tritonserver_custom -f Dockerfile.compose .
docker run -it --gpus=1 --net=host -v/path/to/model_repos:/models tritonserver_custom:latest tritonserver --model-repository=/models
```
where `path/to/model_repos` is where you are storing the models you want to run
if your not using gpus, omit `--gpus=1`
```
docker run -it --net=host -v/path/to/model_repos:/models tritonserver_custom:latest tritonserver --model-repository=/models
```
# Setting up a model
to include a model in your backend, add a directory with your model name to your model repository directory. examples of models can be seen here: https://github.com/triton-inference-server/backend/tree/main/examples/model_repos/minimal_models
make sure to adjust the input correctly in the config.pbtxt file, and save a vmfb file under 1/model.vmfb
# CUDA
if you're having issues with cuda, make sure your correct drivers are installed, and that `nvidia-smi` works, and also make sure that the nvcc compiler is on the path.

View File

@@ -0,0 +1,39 @@
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include(CMakeFindDependencyMacro)
get_filename_component(
SHARKBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
)
list(APPEND CMAKE_MODULE_PATH ${SHARKBACKEND_CMAKE_DIR})
if(NOT TARGET SharkBackend::triton-dshark-backend)
include("${SHARKBACKEND_CMAKE_DIR}/SharkBackendTargets.cmake")
endif()
set(SHARKBACKEND_LIBRARIES SharkBackend::triton-dshark-backend)

1409
inference/src/dshark.cc Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,30 @@
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
{
global:
TRITONBACKEND_*;
local: *;
};

View File

@@ -1,45 +0,0 @@
<!DOCTYPE html>
<html>
<body>
<a href='https://github.com/nod-ai/SHARK/releases/download/20230130.481/shark_sd_20230130_481.exe'>shark_sd_20230130_481.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230130.481/shark_sd_cli_20230130_481.exe'>shark_sd_cli_20230130_481.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.479/shark_sd_20230129_479.exe'>shark_sd_20230129_479.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.479/shark_sd_cli_20230129_479.exe'>shark_sd_cli_20230129_479.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.480/shark_sd_20230129_480.exe'>shark_sd_20230129_480.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.480/shark_sd_cli_20230129_480.exe'>shark_sd_cli_20230129_480.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.478/shark_sd_20230129_478.exe'>shark_sd_20230129_478.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.478/shark_sd_cli_20230129_478.exe'>shark_sd_cli_20230129_478.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230128.477/shark_sd_20230128_477.exe'>shark_sd_20230128_477.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230128.477/shark_sd_cli_20230128_477.exe'>shark_sd_cli_20230128_477.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230127.476/shark_sd_20230127_476.exe'>shark_sd_20230127_476.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230127.476/shark_sd_cli_20230127_476.exe'>shark_sd_cli_20230127_476.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230126.475/shark_sd_20230126_475.exe'>shark_sd_20230126_475.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230126.475/shark_sd_cli_20230126_475.exe'>shark_sd_cli_20230126_475.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.474/shark_sd_20230125_474.exe'>shark_sd_20230125_474.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.474/shark_sd_cli_20230125_474.exe'>shark_sd_cli_20230125_474.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.473/shark_sd_20230125_473.exe'>shark_sd_20230125_473.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.473/shark_sd_cli_20230125_473.exe'>shark_sd_cli_20230125_473.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.472/shark_sd_20230125_472.exe'>shark_sd_20230125_472.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.471/shark_sd_20230125_471.exe'>shark_sd_20230125_471.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.468/shark_sd_20230125_468.exe'>shark_sd_20230125_468.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.470/shark_sd_20230124_470.exe'>shark_sd_20230124_470.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.470/shark_sd_cli_20230124_470.exe'>shark_sd_cli_20230124_470.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.469/shark_sd_20230124_469.exe'>shark_sd_20230124_469.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.467/shark_sd_20230124_467.exe'>shark_sd_20230124_467.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.466/shark_sd_20230124_466.exe'>shark_sd_20230124_466.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.462/shark_sd_20230124_462.exe'>shark_sd_20230124_462.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230123.461/shark_sd_20230123_461.exe'>shark_sd_20230123_461.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230123.460/shark_sd_20230123_460.exe'>shark_sd_20230123_460.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230122.459/shark_sd_20230122_459.exe'>shark_sd_20230122_459.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230122.458/shark_sd_20230122_458.exe'>shark_sd_20230122_458.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230122.457/shark_sd_20230122_457.exe'>shark_sd_20230122_457.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230121.456/shark_sd_20230121_456.exe'>shark_sd_20230121_456.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230120.455/shark_sd_20230120_455.exe'>shark_sd_20230120_455.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230119.454/shark_sd_20230119_454.exe'>shark_sd_20230119_454.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230118.453/shark_sd_20230118_453.exe'>shark_sd_20230118_453.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230117.452/shark_sd_20230117_452.exe'>shark_sd_20230117_452.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230116.451/shark_sd_20230116_451.exe'>shark_sd_20230116_451.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230115.450/shark_sd_20230115_450.exe'>shark_sd_20230115_450.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230114.449/shark_sd_20230114_449.exe'>shark_sd_20230114_449.exe</a><br />
</body>
</html>

12
pyproject.toml Normal file
View File

@@ -0,0 +1,12 @@
[build-system]
requires = [
"setuptools>=42",
"wheel",
"packaging",
"numpy==1.22.4",
"torch-mlir>=20220428.420",
"iree-compiler>=20220427.13",
"iree-runtime>=20220427.13",
]
build-backend = "setuptools.build_meta"

3
pytest.ini Normal file
View File

@@ -0,0 +1,3 @@
[pytest]
addopts = --verbose -p no:warnings
norecursedirs = inference tank/tflite

View File

@@ -0,0 +1,109 @@
from shark.shark_inference import SharkInference
from shark.iree_utils._common import check_device_drivers, device_driver_info
from tank.model_utils import compare_tensors
from shark.shark_downloader import download_torch_model
from shark.parser import shark_args
import torch
import unittest
import numpy as np
import pytest
class BertBaseUncasedModuleTester:
def __init__(
self,
benchmark=False,
onnx_bench=False,
):
self.benchmark = benchmark
self.onnx_bench = onnx_bench
def create_and_check_module(self, dynamic, device):
model_mlir, func_name, input, act_out = download_torch_model(
"bert-base-uncased", dynamic
)
shark_module = SharkInference(
model_mlir,
func_name,
device=device,
mlir_dialect="linalg",
is_benchmark=self.benchmark,
)
shark_module.compile()
results = shark_module.forward(input)
assert True == compare_tensors(act_out, results)
if self.benchmark == True:
shark_args.onnx_bench = self.onnx_bench
shark_module.shark_runner.benchmark_all_csv(
(input),
"bert-base-uncased",
dynamic,
device,
"torch",
)
class BertBaseUncasedModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = BertBaseUncasedModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
def test_module_dynamic_cpu(self):
dynamic = True
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_dynamic_gpu(self):
dynamic = True
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_dynamic_vulkan(self):
dynamic = True
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,71 @@
from shark.iree_utils._common import check_device_drivers, device_driver_info
from shark.shark_inference import SharkInference
from shark.shark_downloader import download_tf_model
import iree.compiler as ireec
import unittest
import pytest
import numpy as np
class DistilBertModuleTester:
def __init__(
self,
benchmark=False,
):
self.benchmark = benchmark
def create_and_check_module(self, dynamic, device):
model, func_name, inputs, golden_out = download_tf_model(
"distilbert-base-uncased"
)
shark_module = SharkInference(
model, func_name, device=device, mlir_dialect="mhlo"
)
shark_module.compile()
result = shark_module.forward(inputs)
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
class DistilBertModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = DistilBertModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
@pytest.mark.xfail(reason="shark_tank hash issues -- awaiting triage")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.xfail(reason="shark_tank hash issues -- awaiting triage")
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.xfail(reason="shark_tank hash issues -- awaiting triage")
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,95 @@
from shark.shark_inference import SharkInference
from shark.iree_utils._common import check_device_drivers, device_driver_info
from tank.model_utils import compare_tensors
from shark.parser import shark_args
from shark.shark_downloader import download_torch_model
import unittest
import numpy as np
import pytest
class DistilBertModuleTester:
def __init__(
self,
benchmark=False,
):
self.benchmark = benchmark
def create_and_check_module(self, dynamic, device):
model_mlir, func_name, input, act_out = download_torch_model(
"distilbert-base-uncased", dynamic
)
# from shark.shark_importer import SharkImporter
# mlir_importer = SharkImporter(
# model,
# (input,),
# frontend="torch",
# )
# minilm_mlir, func_name = mlir_importer.import_mlir(
# is_dynamic=dynamic, tracing_required=True
# )
shark_module = SharkInference(
model_mlir,
func_name,
device=device,
mlir_dialect="linalg",
is_benchmark=self.benchmark,
)
shark_module.compile()
results = shark_module.forward(input)
assert True == compare_tensors(act_out, results)
if self.benchmark == True:
shark_module.shark_runner.benchmark_all_csv(
(input),
"distilbert-base-uncased",
dynamic,
device,
"torch",
)
class DistilBertModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = DistilBertModuleTester(self)
self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,114 @@
from shark.shark_inference import SharkInference
from shark.iree_utils._common import check_device_drivers, device_driver_info
from shark.shark_downloader import download_torch_model
import unittest
import numpy as np
import pytest
class MobileNetV3ModuleTester:
def __init__(
self,
benchmark=False,
):
self.benchmark = benchmark
def create_and_check_module(self, dynamic, device):
model_mlir, func_name, input, act_out = download_torch_model(
"mobilenet_v3_small", dynamic
)
# from shark.shark_importer import SharkImporter
# mlir_importer = SharkImporter(
# model,
# (input,),
# frontend="torch",
# )
# minilm_mlir, func_name = mlir_importer.import_mlir(
# is_dynamic=dynamic, tracing_required=True
# )
shark_module = SharkInference(
model_mlir,
func_name,
device=device,
mlir_dialect="linalg",
is_benchmark=self.benchmark,
)
shark_module.compile()
results = shark_module.forward(input)
np.testing.assert_allclose(act_out, results, rtol=1e-02, atol=1e-03)
if self.benchmark == True:
shark_module.shark_runner.benchmark_all_csv(
(input),
"alexnet",
dynamic,
device,
"torch",
)
class MobileNetV3ModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = MobileNetV3ModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
def test_module_dynamic_cpu(self):
dynamic = True
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.xfail(reason="golden results don't match.")
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.xfail(reason="golden results don't match.")
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_dynamic_gpu(self):
dynamic = True
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.xfail(reason="stuck in the pipeline.")
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_dynamic_vulkan(self):
dynamic = True
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,114 @@
from shark.shark_inference import SharkInference
from shark.iree_utils._common import check_device_drivers, device_driver_info
from tank.model_utils import compare_tensors
from shark.shark_downloader import download_torch_model
import unittest
import numpy as np
import pytest
class Resnet101ModuleTester:
def __init__(
self,
benchmark=False,
):
self.benchmark = benchmark
def create_and_check_module(self, dynamic, device):
model_mlir, func_name, input, act_out = download_torch_model(
"resnet101", dynamic
)
# from shark.shark_importer import SharkImporter
# mlir_importer = SharkImporter(
# model,
# (input,),
# frontend="torch",
# )
# minilm_mlir, func_name = mlir_importer.import_mlir(
# is_dynamic=dynamic, tracing_required=True
# )
shark_module = SharkInference(
model_mlir,
func_name,
device=device,
mlir_dialect="linalg",
is_benchmark=self.benchmark,
)
shark_module.compile()
results = shark_module.forward(input)
assert True == compare_tensors(act_out, results)
if self.benchmark == True:
shark_module.shark_runner.benchmark_all_csv(
(input),
"resnet101",
dynamic,
device,
"torch",
)
class Resnet101ModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = Resnet101ModuleTester(self)
self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
def test_module_dynamic_cpu(self):
dynamic = True
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_dynamic_gpu(self):
dynamic = True
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_dynamic_vulkan(self):
dynamic = True
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,114 @@
from shark.shark_inference import SharkInference
from shark.iree_utils._common import check_device_drivers, device_driver_info
from tank.model_utils import get_vision_model, compare_tensors
from shark.shark_downloader import download_torch_model
import unittest
import numpy as np
import pytest
class Resnet50ModuleTester:
def __init__(
self,
benchmark=False,
):
self.benchmark = benchmark
def create_and_check_module(self, dynamic, device):
model_mlir, func_name, input, act_out = download_torch_model(
"resnet50", dynamic
)
# from shark.shark_importer import SharkImporter
# mlir_importer = SharkImporter(
# model,
# (input,),
# frontend="torch",
# )
# minilm_mlir, func_name = mlir_importer.import_mlir(
# is_dynamic=dynamic, tracing_required=True
# )
shark_module = SharkInference(
model_mlir,
func_name,
device=device,
mlir_dialect="linalg",
is_benchmark=self.benchmark,
)
shark_module.compile()
results = shark_module.forward(input)
assert True == compare_tensors(act_out, results)
if self.benchmark == True:
shark_module.shark_runner.benchmark_all_csv(
(input),
"resnet50",
dynamic,
device,
"torch",
)
class Resnet50ModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = Resnet50ModuleTester(self)
self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
def test_module_dynamic_cpu(self):
dynamic = True
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_dynamic_gpu(self):
dynamic = True
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_dynamic_vulkan(self):
dynamic = True
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,91 @@
from shark.shark_inference import SharkInference
from shark.iree_utils._common import check_device_drivers, device_driver_info
from shark.shark_downloader import download_torch_model
import unittest
import numpy as np
import pytest
class UnetModuleTester:
def __init__(
self,
benchmark=False,
):
self.benchmark = benchmark
def create_and_check_module(self, dynamic, device):
model_mlir, func_name, input, act_out = download_torch_model(
"unet", dynamic
)
# from shark.shark_importer import SharkImporter
# mlir_importer = SharkImporter(
# model,
# (input,),
# frontend="torch",
# )
# minilm_mlir, func_name = mlir_importer.import_mlir(
# is_dynamic=dynamic, tracing_required=True
# )
shark_module = SharkInference(
model_mlir,
func_name,
device=device,
mlir_dialect="linalg",
is_benchmark=self.benchmark,
)
shark_module.compile()
results = shark_module.forward(input)
np.testing.assert_allclose(act_out, results, rtol=1e-02, atol=1e-03)
if self.benchmark == True:
shark_module.shark_runner.benchmark_all_csv(
(input),
"unet",
dynamic,
device,
"torch",
)
class UnetModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = UnetModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,41 @@
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
--pre
numpy
torch
torchvision
tqdm
#iree-compiler | iree-runtime should already be installed
#these dont work ok osx
#iree-tools-tflite
#iree-tools-xla
#iree-tools-tf
# TensorFlow and JAX.
gin-config
tensorflow-macos
tensorflow-metal
#tf-models-nightly
#tensorflow-text-nightly
transformers==4.18.0
tensorflow-probability
#jax[cpu]
# tflitehub dependencies.
Pillow
# Testing and support.
#lit
#pyyaml
#ONNX and ORT for benchmarking
#--extra-index-url https://test.pypi.org/simple/
#protobuf
#coloredlogs
#flatbuffers
#sympy
#psutil
#onnx-weekly
#ort-nightly

40
requirements-importer.txt Normal file
View File

@@ -0,0 +1,40 @@
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
--pre
numpy==1.22.4
torch
torchvision
tqdm
#iree-compiler | iree-runtime should already be installed
iree-tools-tflite
iree-tools-xla
iree-tools-tf
# TensorFlow and JAX.
gin-config
tensorflow
#tf-models-nightly
#tensorflow-text-nightly
transformers==4.18.0
#tensorflow-probability
#jax[cpu]
# tflitehub dependencies.
Pillow
# Testing and support.
lit
pyyaml
#ONNX and ORT for benchmarking
#--extra-index-url https://test.pypi.org/simple/
#protobuf
#coloredlogs
#flatbuffers
#sympy
#psutil
#onnx-weekly
#ort-nightly

13
requirements.txt Normal file
View File

@@ -0,0 +1,13 @@
setuptools
wheel
# SHARK Runner
tqdm
# SHARK Downloader
gsutil
# Testing
pytest
pytest-xdist
Pillow

38
setup.py Normal file
View File

@@ -0,0 +1,38 @@
from setuptools import find_packages
from setuptools import setup
import os
with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
PACKAGE_VERSION = os.environ.get("SHARK_PACKAGE_VERSION") or "0.0.4"
setup(
name="nodai-SHARK",
version=f"{PACKAGE_VERSION}",
description="SHARK provides a High Performance Machine Learning Framework",
author="nod.ai",
author_email="stdin@nod.ai",
url="https://nod.ai",
long_description=long_description,
long_description_content_type="text/markdown",
project_urls={
"Code": "https://github.com/nod-ai/SHARK",
"Bug Tracker": "https://github.com/nod-ai/SHARK/issues",
},
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
packages=find_packages(exclude=("examples")),
python_requires=">=3.7",
install_requires=[
"numpy",
"PyYAML",
"torch-mlir>=20220428.420",
"iree-compiler>=20220427.13",
"iree-runtime>=20220427.13",
],
)

135
setup_venv.sh Executable file
View File

@@ -0,0 +1,135 @@
#!/bin/bash
# Sets up a venv suitable for running samples.
# e.g:
# ./setup_venv.sh #setup a default $PYTHON3 shark.venv
# Environment Variables by the script.
# PYTHON=$PYTHON3.10 ./setup_venv.sh #pass a version of $PYTHON to use
# VENV_DIR=myshark.venv #create a venv called myshark.venv
# USE_IREE=1 #use stock IREE instead of Nod.ai's SHARK build
# IMPORTER=1 #Install importer deps
# if you run the script from a conda env it will install in your conda env
TD="$(cd $(dirname $0) && pwd)"
if [ -z "$PYTHON" ]; then
PYTHON="$(which python3)"
fi
function die() {
echo "Error executing command: $*"
exit 1
}
PYTHON_VERSION_X_Y=`${PYTHON} -c 'import sys; version=sys.version_info[:2]; print("{0}.{1}".format(*version))'`
echo "Python: $PYTHON"
echo "Python version: $PYTHON_VERSION_X_Y"
if [[ -z "${CONDA_PREFIX}" ]]; then
# Not a conda env. So create a new VENV dir
VENV_DIR=${VENV_DIR:-shark.venv}
echo "Using pip venv.. Setting up venv dir: $VENV_DIR"
$PYTHON -m venv "$VENV_DIR" || die "Could not create venv."
source "$VENV_DIR/bin/activate" || die "Could not activate venv"
PYTHON="$(which python3)"
else
echo "Found conda env $CONDA_DEFAULT_ENV. Running pip install inside the conda env"
fi
Red=`tput setaf 1`
Green=`tput setaf 2`
Yellow=`tput setaf 3`
# Assume no binary torch-mlir.
# Currently available for macOS m1&intel (3.10) and Linux(3.7,3.8,3.9,3.10)
torch_mlir_bin=false
if [[ $(uname -s) = 'Darwin' ]]; then
echo "${Yellow}Apple macOS detected"
if [[ $(uname -m) == 'arm64' ]]; then
echo "${Yellow}Apple M1 Detected"
hash rustc 2>/dev/null
if [ $? -eq 0 ];then
echo "${Green}rustc found to compile HF tokenizers"
else
echo "${Red}Could not find rustc" >&2
echo "${Red}Please run:"
echo "${Red}curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh"
exit 1
fi
fi
echo "${Yellow}Run the following commands to setup your SSL certs for your Python version if you see SSL errors with tests"
echo "${Yellow}/Applications/Python\ 3.XX/Install\ Certificates.command"
if [ "$PYTHON_VERSION_X_Y" == "3.10" ]; then
torch_mlir_bin=true
fi
elif [[ $(uname -s) = 'Linux' ]]; then
echo "${Yellow}Linux detected"
if [ "$PYTHON_VERSION_X_Y" == "3.7" ] || [ "$PYTHON_VERSION_X_Y" == "3.8" ] || [ "$PYTHON_VERSION_X_Y" == "3.9" ] || [ "$PYTHON_VERSION_X_Y" == "3.10" ] ; then
torch_mlir_bin=true
fi
else
echo "${Red}OS not detected. Pray and Play"
fi
# Upgrade pip and install requirements.
$PYTHON -m pip install --upgrade pip || die "Could not upgrade pip"
$PYTHON -m pip install --upgrade -r "$TD/requirements.txt"
if [ "$torch_mlir_bin" = true ]; then
$PYTHON -m pip install --find-links https://github.com/llvm/torch-mlir/releases torch-mlir --extra-index-url https://download.pytorch.org/whl/nightly/cpu
if [ $? -eq 0 ];then
echo "Successfully Installed torch-mlir"
else
echo "Could not install torch-mlir" >&2
fi
else
echo "${Red}No binaries found for Python $PYTHON_VERSION_X_Y on $(uname -s)"
echo "${Yello}Python 3.10 supported on macOS and 3.7,3.8,3.9 and 3.10 on Linux"
echo "${Red}Please build torch-mlir from source in your environment"
exit 1
fi
if [[ -z "${USE_IREE}" ]]; then
RUNTIME="nod-ai/SHARK-Runtime"
else
RUNTIME="google/iree"
fi
echo "Installing ${RUNTIME}..."
$PYTHON -m pip install --find-links https://github.com/${RUNTIME}/releases iree-compiler iree-runtime
if [[ ! -z "${IMPORTER}" ]]; then
echo "${Yellow}Installing importer tools.."
if [[ $(uname -s) = 'Linux' ]]; then
echo "${Yellow}Linux detected.. installing Linux importer tools"
$PYTHON -m pip install --upgrade -r "$TD/requirements-importer.txt" -f https://github.com/${RUNTIME}/releases --extra-index-url https://test.pypi.org/simple/ --extra-index-url https://download.pytorch.org/whl/nightly/cu116
elif [[ $(uname -s) = 'Darwin' ]]; then
echo "${Yellow}macOS detected.. installing macOS importer tools"
#Conda seems to have some problems installing these packages and hope they get resolved upstream.
$PYTHON -m pip install --upgrade -r "$TD/requirements-importer-macos.txt" -f https://github.com/${RUNTIME}/releases --extra-index-url https://download.pytorch.org/whl/nightly/cpu
fi
fi
$PYTHON -m pip install -e . --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://github.com/llvm/torch-mlir/releases -f https://github.com/${RUNTIME}/releases
if [[ $(uname -s) = 'Linux' && ! -z "${IMPORTER}" ]]; then
$PYTHON -m pip uninstall -y torch torchvision
$PYTHON -m pip install --pre torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cu116
if [ $? -eq 0 ];then
echo "Successfully Installed torch + cu116."
else
echo "Could not install torch + cu116." >&2
fi
fi
if [[ ! -z "${ONNX}" ]]; then
echo "${Yellow}Installing ONNX and onnxruntime for benchmarks..."
$PYTHON -m pip install onnx onnxruntime psutil
if [ $? -eq 0 ];then
echo "Successfully installed ONNX and ONNX runtime."
else
echo "Could not install ONNX." >&2
fi
fi
if [[ -z "${CONDA_PREFIX}" ]]; then
echo "${Green}Before running examples activate venv with:"
echo " ${Green}source $VENV_DIR/bin/activate"
fi

0
shark/__init__.py Normal file
View File

78
shark/backward_makefx.py Normal file
View File

@@ -0,0 +1,78 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch
from torch._decomp import get_decompositions
from torch.fx.experimental.proxy_tensor import make_fx
from torch.nn.utils import _stateless
from torch import fx
import tempfile
class MakeFxModule:
def __init__(self, model, inputs, labels=None, custom_inference_fn=None):
self.model = model
self.inputs = inputs
self.custom_inference_fn = custom_inference_fn
self.training_graph = None
# Doesn't replace the None type.
def change_fx_graph_return_to_tuple(self, fx_g: fx.GraphModule):
for node in fx_g.graph.nodes:
if node.op == "output":
# output nodes always have one argument
node_arg = node.args[0]
out_nodes = []
if isinstance(node_arg, list):
# Don't return NoneType elements.
for out_node in node_arg:
if not isinstance(out_node, type(None)):
out_nodes.append(out_node)
# If there is a single tensor/element to be returned don't
# a tuple for it.
if len(out_nodes) == 1:
node.args = out_nodes
else:
node.args = (tuple(out_nodes),)
fx_g.graph.lint()
fx_g.recompile()
return fx_g
def generate_graph(self):
fx_g = make_fx(
self.custom_inference_fn,
decomposition_table=get_decompositions(
[
torch.ops.aten.embedding_dense_backward,
torch.ops.aten.native_layer_norm_backward,
torch.ops.aten.slice_backward,
torch.ops.aten.select_backward,
]
),
)(
dict(self.model.named_parameters()),
dict(self.model.named_buffers()),
self.inputs,
)
fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
fx_g.recompile()
fx_g = self.change_fx_graph_return_to_tuple(fx_g)
ts_g = torch.jit.script(fx_g)
temp = tempfile.NamedTemporaryFile(
suffix="_shark_ts", prefix="temp_ts_"
)
ts_g.save(temp.name)
new_ts = torch.jit.load(temp.name)
self.training_graph = new_ts

View File

@@ -0,0 +1,300 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/mlevental/miniconda3/envs/torch-mlir/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"# standard imports\n",
"import torch\n",
"from shark.iree_utils import get_iree_compiled_module"
]
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [],
"source": [
"# torch dynamo related imports\n",
"try:\n",
" import torchdynamo\n",
" from torchdynamo.optimizations.backends import create_backend\n",
" from torchdynamo.optimizations.subgraph import SubGraph\n",
"except ModuleNotFoundError:\n",
" print(\"Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo\")\n",
" exit()\n",
"\n",
"# torch-mlir imports for compiling\n",
"from torch_mlir import compile, OutputType"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"[TorchDynamo](https://github.com/pytorch/torchdynamo) is a compiler for PyTorch programs that uses the [frame evaluation API](https://www.python.org/dev/peps/pep-0523/) in CPython to dynamically modify Python bytecode right before it is executed. It creates this FX Graph through bytecode analysis and is designed to mix Python execution with compiled backends."
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 3,
"outputs": [],
"source": [
"def toy_example(*args):\n",
" a, b = args\n",
"\n",
" x = a / (torch.abs(a) + 1)\n",
" if b.sum() < 0:\n",
" b = b * -1\n",
" return x * b"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 4,
"outputs": [],
"source": [
"# compiler that lowers fx_graph to through MLIR\n",
"def __torch_mlir(fx_graph, *args, **kwargs):\n",
" assert isinstance(\n",
" fx_graph, torch.fx.GraphModule\n",
" ), \"Model must be an FX GraphModule.\"\n",
"\n",
" def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule):\n",
" \"\"\"Replace tuple with tuple element in functions that return one-element tuples.\"\"\"\n",
"\n",
" for node in fx_g.graph.nodes:\n",
" if node.op == \"output\":\n",
" assert len(node.args) == 1, \"Output node must have a single argument\"\n",
" node_arg = node.args[0]\n",
" if isinstance(node_arg, tuple) and len(node_arg) == 1:\n",
" node.args = (node_arg[0],)\n",
" fx_g.graph.lint()\n",
" fx_g.recompile()\n",
" return fx_g\n",
"\n",
" fx_graph = _unwrap_single_tuple_return(fx_graph)\n",
" ts_graph = torch.jit.script(fx_graph)\n",
"\n",
" # torchdynamo does munges the args differently depending on whether you use\n",
" # the @torchdynamo.optimize decorator or the context manager\n",
" if isinstance(args, tuple):\n",
" args = list(args)\n",
" assert isinstance(args, list)\n",
" if len(args) == 1 and isinstance(args[0], list):\n",
" args = args[0]\n",
"\n",
" linalg_module = compile(ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS)\n",
" callable, _ = get_iree_compiled_module(linalg_module, \"cuda\", func_name=\"forward\")\n",
"\n",
" def forward(*inputs):\n",
" return callable(*inputs)\n",
"\n",
" return forward"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"Simplest way to use TorchDynamo with the `torchdynamo.optimize` context manager:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 5,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found 1 device(s).\n",
"Device: 0\n",
" Name: NVIDIA GeForce RTX 3080\n",
" Compute Capability: 8.6\n",
"[-0.40066046 -0.4210303 0.03225489 -0.44849953 0.10370405 -0.04422468\n",
" 0.33262825 -0.20109026 0.02102537 -0.24882983]\n",
"[-0.07824923 -0.17004533 0.06439921 -0.06163602 0.26633525 -1.1560082\n",
" -0.06660341 0.24227881 0.1462235 -0.32055548]\n",
"[-0.01464001 0.442209 -0.0607936 -0.5477967 -0.25226554 -0.08588809\n",
" -0.30497575 0.00061084 -0.50069696 0.2317973 ]\n",
"[ 0.25726247 0.39388427 -0.24093066 0.12316308 -0.01981307 0.5661146\n",
" 0.26199922 0.8123446 -0.01576749 0.30846444]\n",
"[ 0.7878203 -0.45975062 -0.29956317 -0.07032048 -0.55817443 -0.62506855\n",
" -1.6837492 -0.38442805 0.28220773 -1.5325156 ]\n",
"[ 0.07975311 0.67754704 -0.30927914 0.00347631 -0.07326564 0.01893554\n",
" -0.7518105 -0.03078967 -0.07623022 0.38865626]\n",
"[-0.7751679 -0.5841397 -0.6622711 0.18574935 -0.6049372 0.02844244\n",
" -0.20471913 0.3337415 -0.3619432 -0.35087156]\n",
"[-0.08569919 -0.10775139 -0.02338934 0.21933547 -0.46712473 0.00062137\n",
" -0.58207744 0.06457533 0.18276742 0.03866556]\n",
"[-0.2311981 -0.43036282 0.20561649 -0.10363232 -0.13248594 0.02885137\n",
" -0.31241602 -0.36907142 0.08861586 0.2331427 ]\n",
"[-0.07273526 -0.31246194 -0.24218291 -0.24145737 0.0364486 0.14382267\n",
" -0.00531162 0.15447603 -0.5220248 -0.09016377]\n"
]
}
],
"source": [
"with torchdynamo.optimize(__torch_mlir):\n",
" for _ in range(10):\n",
" print(toy_example(torch.randn(10), torch.randn(10)))"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"It can also be used through a decorator:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [],
"source": [
"@create_backend\n",
"def torch_mlir(subgraph, *args, **kwargs):\n",
" assert isinstance(subgraph, SubGraph), \"Model must be a dynamo SubGraph.\"\n",
" return __torch_mlir(subgraph.model, *list(subgraph.example_inputs))\n",
"\n",
"@torchdynamo.optimize(\"torch_mlir\")\n",
"def toy_example2(*args):\n",
" a, b = args\n",
"\n",
" x = a / (torch.abs(a) + 1)\n",
" if b.sum() < 0:\n",
" b = b * -1\n",
" return x * b"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 7,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found 1 device(s).\n",
"Device: 0\n",
" Name: NVIDIA GeForce RTX 3080\n",
" Compute Capability: 8.6\n",
"[-0.35494277 0.03409214 -0.02271946 0.7335942 0.03122527 -0.41881397\n",
" -0.6609761 -0.6418614 0.29336175 -0.01973678]\n",
"[-2.7246824e-01 -3.5543957e-01 6.0087401e-01 -7.4570496e-03\n",
" -4.2481605e-02 -5.0296803e-04 7.2928613e-01 -1.4673788e-03\n",
" -2.7621329e-01 -6.0995776e-02]\n",
"[-0.03165906 0.3889693 0.24052973 0.27279532 -0.02773128 -0.12602475\n",
" -1.0124422 0.5720256 -0.35437614 -0.20992722]\n",
"[-0.41831446 0.5525326 -0.29749998 -0.17044766 0.11804754 -0.05210691\n",
" -0.46145165 -0.8776549 0.10090438 0.17463352]\n",
"[ 0.02194221 0.20959911 0.26973712 0.12551276 -0.0020404 0.1490246\n",
" -0.04456685 1.1100804 0.8105744 0.6676846 ]\n",
"[ 0.06528181 -0.13591261 0.5370964 -0.4398162 -0.03372452 0.9691372\n",
" -0.01120087 0.2947028 0.4804801 -0.3324341 ]\n",
"[ 0.33549032 -0.23001772 -0.08681437 0.16490957 -0.11223086 0.09168988\n",
" 0.02403045 0.17344482 0.46406478 -0.00129451]\n",
"[-0.27475086 0.42384806 1.9090122 -0.41147137 -0.6888369 0.08435658\n",
" -0.26628923 -0.17436793 -0.8058869 -0.02582378]\n",
"[-0.10109414 0.08681287 -0.10055986 0.6858881 0.29267687 -0.02797117\n",
" -0.01425194 0.4882803 0.3551982 -0.858935 ]\n",
"[-0.22086617 0.524994 0.17721705 -0.03813264 -0.54570735 -0.4421502\n",
" 0.11938014 -0.01122053 0.39294165 -0.61770755]\n"
]
}
],
"source": [
"for _ in range(10):\n",
" print(toy_example2(torch.randn(10), torch.randn(10)))"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -0,0 +1,92 @@
import torch
from torch_mlir import compile, OutputType
from shark.iree_utils import get_iree_compiled_module
try:
import torchdynamo
from torchdynamo.optimizations.backends import create_backend
from torchdynamo.optimizations.subgraph import SubGraph
except ModuleNotFoundError:
print(
"Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo"
)
exit()
NUM_ITERS = 10
def __torch_mlir(fx_graph, *args, **kwargs):
assert isinstance(
fx_graph, torch.fx.GraphModule
), "Model must be an FX GraphModule."
def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule):
"""Replace tuple with tuple element in functions that return one-element tuples."""
for node in fx_g.graph.nodes:
if node.op == "output":
assert (
len(node.args) == 1
), "Output node must have a single argument"
node_arg = node.args[0]
if isinstance(node_arg, tuple) and len(node_arg) == 1:
node.args = (node_arg[0],)
fx_g.graph.lint()
fx_g.recompile()
return fx_g
fx_graph = _unwrap_single_tuple_return(fx_graph)
ts_graph = torch.jit.script(fx_graph)
if isinstance(args, tuple):
args = list(args)
assert isinstance(args, list)
if len(args) == 1 and isinstance(args[0], list):
args = args[0]
linalg_module = compile(
ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS
)
callable, _ = get_iree_compiled_module(
linalg_module, "cuda", func_name="forward"
)
def forward(*inputs):
return callable(*inputs)
return forward
def toy_example(*args):
a, b = args
x = a / (torch.abs(a) + 1)
if b.sum() < 0:
b = b * -1
return x * b
with torchdynamo.optimize(__torch_mlir):
for _ in range(10):
print(toy_example(torch.randn(10), torch.randn(10)))
@create_backend
def torch_mlir(subgraph, *args, **kwargs):
assert isinstance(subgraph, SubGraph), "Model must be a dynamo SubGraph."
return __torch_mlir(subgraph.model, *list(subgraph.example_inputs))
@torchdynamo.optimize("torch_mlir")
def toy_example2(*args):
a, b = args
x = a / (torch.abs(a) + 1)
if b.sum() < 0:
b = b * -1
return x * b
for _ in range(10):
print(toy_example2(torch.randn(10), torch.randn(10)))

View File

@@ -0,0 +1,805 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/mlevental/miniconda3/envs/torch-mlir/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"# standard imports\n",
"import torch\n",
"from torch_mlir.eager_mode import torch_mlir_tensor"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [],
"source": [
"# eager mode imports\n",
"from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor\n",
"from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"The simplest way of using Eager Mode (through IREE) requires setting a \"backend\":"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 3,
"outputs": [],
"source": [
"torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(\"cpu\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"and wrapping all your `torch.Tensor`s:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 4,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n"
]
}
],
"source": [
"NUM_ITERS = 10\n",
"\n",
"t = torch.ones((10, 10))\n",
"u = 2 * torch.ones((10, 10))\n",
"\n",
"tt = TorchMLIRTensor(t)\n",
"print(tt)\n",
"uu = TorchMLIRTensor(u)\n",
"print(uu)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"`TorchMLIRTensor` is a \"tensor wrapper subclass\" (more info [here](https://github.com/albanD/subclass_zoo)) that keeps the IREE `DeviceArray` in a field `elem`:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 5,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n"
]
}
],
"source": [
"for i in range(NUM_ITERS):\n",
" yy = tt + uu\n",
" print(type(yy))\n",
" print(yy.elem.to_host())\n",
" yy = tt * uu\n",
" print(type(yy))\n",
" print(yy.elem.to_host())"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"If you have a GPU (and CUDA installed) that works too (you can verify by having `watch -n1 nvidia-smi` up in a terminal while running the next cell):"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n"
]
}
],
"source": [
"torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(\"gpu\")\n",
"\n",
"t = torch.ones((10, 10))\n",
"u = 2 * torch.ones((10, 10))\n",
"\n",
"tt = TorchMLIRTensor(t)\n",
"print(tt)\n",
"uu = TorchMLIRTensor(u)\n",
"print(uu)\n",
"\n",
"yy = tt + uu\n",
"print(yy.elem.to_host())\n",
"yy = tt * uu\n",
"print(yy.elem.to_host())"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"There is a convenience class `SharkEagerMode` that will handle both the installation of the backend and the wrapping of `torch.Tensor`s:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 7,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n"
]
}
],
"source": [
"# eager mode RAII\n",
"from shark.shark_runner import SharkEagerMode\n",
"\n",
"shark_eager_mode = SharkEagerMode(\"cpu\")\n",
"\n",
"t = torch.ones((10, 10))\n",
"u = torch.ones((10, 10))\n",
"\n",
"print(t)\n",
"print(u)\n",
"\n",
"for i in range(NUM_ITERS):\n",
" yy = t + u\n",
" print(type(yy))\n",
" print(yy.elem.to_host())\n",
" yy = t * u\n",
" print(type(yy))\n",
" print(yy.elem.to_host())"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"The `SharkEagerMode` class is a hacky take on [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization) that defines a \"deleter\" that runs when an instantiation (of `SharkEagerMode`) is garbage collected. Takeaway is that if you want to turn off `SharkEagerMode`, or switch backends, you need to `del` the instance:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 8,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n"
]
}
],
"source": [
"del shark_eager_mode\n",
"shark_eager_mode = SharkEagerMode(\"cuda\")\n",
"\n",
"t = torch.ones((10, 10))\n",
"u = torch.ones((10, 10))\n",
"\n",
"print(t)\n",
"print(u)\n",
"\n",
"yy = t + u\n",
"print(type(yy))\n",
"print(yy.elem.to_host())\n",
"yy = t * u\n",
"print(type(yy))\n",
"print(yy.elem.to_host())"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -0,0 +1,148 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch
from torch.utils.cpp_extension import load_inline, include_paths
from torch_mlir.eager_mode import torch_mlir_tensor
from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
from shark.shark_runner import SharkEagerMode
def test_cpu():
torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend("cpu")
t = torch.ones((10, 10), device="cpu")
u = 2 * torch.ones((10, 10), device="cpu")
tt = TorchMLIRTensor(t)
print(tt)
uu = TorchMLIRTensor(u)
print(uu)
for i in range(NUM_ITERS):
yy = tt + uu
print(type(yy))
print(yy.elem.to_host())
yy = tt * uu
print(type(yy))
print(yy.elem.to_host())
def test_gpu():
source = """
#include <iostream>
#include "cuda.h"
#include "cuda_runtime_api.h"
using namespace std;
void print_free_mem() {
int num_gpus;
size_t free, total;
cudaSetDevice(0);
int id;
cudaGetDevice(&id);
cudaMemGetInfo(&free, &total);
cout << "GPU " << id << " memory: used=" << (total-free)/(1<<20) << endl;
}
"""
gpu_stats = load_inline(
name="inline_extension",
cpp_sources=[source],
extra_include_paths=include_paths(cuda=True),
functions=["print_free_mem"],
)
torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend("gpu")
t = torch.ones((10, 10), device="cpu")
u = 2 * torch.ones((10, 10), device="cpu")
tt = TorchMLIRTensor(t)
print(tt)
uu = TorchMLIRTensor(u)
print(uu)
for i in range(NUM_ITERS):
yy = tt + uu
print(yy.elem.to_host())
yy = tt * uu
print(yy.elem.to_host())
gpu_stats.print_free_mem()
def test_python_mode_ref_backend():
# hide this wherever you want?
_ = SharkEagerMode("refbackend")
t = torch.ones((10, 10), device="cpu")
u = torch.ones((10, 10), device="cpu")
print(t)
print(u)
for i in range(NUM_ITERS):
print(i)
yy = t + u
print(yy.elem)
yy = t * u
print(yy.elem)
def test_python_mode_iree_cpu():
# hide this wherever you want?
_ = SharkEagerMode("cpu")
t = torch.ones((10, 10), device="cpu")
u = torch.ones((10, 10), device="cpu")
print(t)
print(u)
for i in range(NUM_ITERS):
yy = t + u
print(type(yy))
print(yy.elem.to_host())
yy = t * u
print(type(yy))
print(yy.elem.to_host())
def test_python_mode_iree_gpu():
_ = SharkEagerMode("gpu")
t = torch.ones((10, 10), device="cpu")
u = torch.ones((10, 10), device="cpu")
print(t)
print(u)
for i in range(NUM_ITERS):
yy = t + u
print(type(yy))
print(yy.elem.to_host())
yy = t * u
print(type(yy))
print(yy.elem.to_host())
if __name__ == "__main__":
NUM_ITERS = 10
test_cpu()
if torch.cuda.is_available():
test_gpu()
test_python_mode_ref_backend()
test_python_mode_iree_cpu()
test_python_mode_iree_gpu()

View File

@@ -0,0 +1,65 @@
from PIL import Image
import requests
from transformers import CLIPProcessor, TFCLIPModel
import tensorflow as tf
from shark.shark_inference import SharkInference
# Create a set of inputs
clip_vit_inputs = [
tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
tf.TensorSpec(shape=[1, 3, 224, 224], dtype=tf.float32),
]
class CLIPModule(tf.Module):
def __init__(self):
super(CLIPModule, self).__init__()
self.m = TFCLIPModel.from_pretrained("openai/clip-vit-base-patch32")
self.m.predict = lambda x, y, z: self.m(
input_ids=x, attention_mask=y, pixel_values=z
)
@tf.function(input_signature=clip_vit_inputs)
def forward(self, input_ids, attention_mask, pixel_values):
return self.m.predict(
input_ids, attention_mask, pixel_values
).logits_per_image
if __name__ == "__main__":
# Prepping Data
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
inputs = processor(
text=["a photo of a cat", "a photo of a dog"],
images=image,
return_tensors="tf",
padding=True,
)
shark_module = SharkInference(
CLIPModule(),
(
inputs["input_ids"],
inputs["attention_mask"],
inputs["pixel_values"],
),
)
shark_module.set_frontend("tensorflow")
shark_module.compile()
print(
shark_module.forward(
(
inputs["input_ids"],
inputs["attention_mask"],
inputs["pixel_values"],
)
)
)

View File

@@ -0,0 +1,88 @@
from transformers import AutoModelForMaskedLM, AutoTokenizer
import torch
from shark.shark_inference import SharkInference
from shark.shark_importer import SharkImporter
from iree.compiler import compile_str
from iree import runtime as ireert
import os
import numpy as np
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
class AlbertModule(torch.nn.Module):
def __init__(self):
super().__init__()
self.model = AutoModelForMaskedLM.from_pretrained("albert-base-v2")
self.model.eval()
def forward(self, input_ids, attention_mask):
return self.model(
input_ids=input_ids, attention_mask=attention_mask
).logits
if __name__ == "__main__":
# Prepping Data
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
text = "This [MASK] is very tasty."
encoded_inputs = tokenizer(
text,
padding="max_length",
truncation=True,
max_length=MAX_SEQUENCE_LENGTH,
return_tensors="pt",
)
inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
mlir_importer = SharkImporter(
AlbertModule(),
inputs,
frontend="torch",
)
minilm_mlir, func_name = mlir_importer.import_mlir(
is_dynamic=False, tracing_required=True
)
shark_module = SharkInference(
minilm_mlir, func_name, mlir_dialect="linalg"
)
shark_module.compile()
token_logits = torch.tensor(shark_module.forward(inputs))
mask_id = torch.where(
encoded_inputs["input_ids"] == tokenizer.mask_token_id
)[1]
mask_token_logits = token_logits[0, mask_id, :]
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
for token in top_5_tokens:
print(
f"'>>> Sample/Warmup output: {text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
)
while True:
try:
new_text = input("Give me a sentence with [MASK] to fill: ")
encoded_inputs = tokenizer(
new_text,
padding="max_length",
truncation=True,
max_length=MAX_SEQUENCE_LENGTH,
return_tensors="pt",
)
inputs = (
encoded_inputs["input_ids"],
encoded_inputs["attention_mask"],
)
token_logits = torch.tensor(shark_module.forward(inputs))
mask_id = torch.where(
encoded_inputs["input_ids"] == tokenizer.mask_token_id
)[1]
mask_token_logits = token_logits[0, mask_id, :]
top_5_tokens = (
torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
)
for token in top_5_tokens:
print(
f"'>>> {new_text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
)
except KeyboardInterrupt:
print("Exiting program.")
break

View File

@@ -0,0 +1,100 @@
from PIL import Image
import requests
from transformers import TFAutoModelForMaskedLM, AutoTokenizer
import tensorflow as tf
from shark.shark_inference import SharkInference
from shark.shark_importer import SharkImporter
from iree.compiler import tf as tfc
from iree.compiler import compile_str
from iree import runtime as ireert
import os
import numpy as np
import sys
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of inputs
t5_inputs = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
]
class AlbertModule(tf.Module):
def __init__(self):
super(AlbertModule, self).__init__()
self.m = TFAutoModelForMaskedLM.from_pretrained("albert-base-v2")
self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)
@tf.function(input_signature=t5_inputs)
def forward(self, input_ids, attention_mask):
return self.m.predict(input_ids, attention_mask)
if __name__ == "__main__":
# Prepping Data
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
# text = "This is a great [MASK]."
text = "This [MASK] is very tasty."
encoded_inputs = tokenizer(
text,
padding="max_length",
truncation=True,
max_length=MAX_SEQUENCE_LENGTH,
return_tensors="tf",
)
inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
mlir_importer = SharkImporter(
AlbertModule(),
inputs,
frontend="tf",
)
minilm_mlir, func_name = mlir_importer.import_mlir(
is_dynamic=False, tracing_required=False
)
shark_module = SharkInference(minilm_mlir, func_name, mlir_dialect="mhlo")
shark_module.compile()
output_idx = 0
data_idx = 1
token_logits = shark_module.forward(inputs)[output_idx][data_idx]
mask_id = np.where(
tf.squeeze(encoded_inputs["input_ids"]) == tokenizer.mask_token_id
)
mask_token_logits = token_logits[0, mask_id, :]
top_5_tokens = np.flip(np.argsort(mask_token_logits)).squeeze()[0:5]
for token in top_5_tokens:
print(
f"'>>> Sample/Warmup output: {text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
)
while True:
try:
new_text = input("Give me a sentence with [MASK] to fill: ")
encoded_inputs = tokenizer(
new_text,
padding="max_length",
truncation=True,
max_length=MAX_SEQUENCE_LENGTH,
return_tensors="tf",
)
inputs = (
encoded_inputs["input_ids"],
encoded_inputs["attention_mask"],
)
token_logits = shark_module.forward(inputs)[output_idx][data_idx]
mask_id = np.where(
tf.squeeze(encoded_inputs["input_ids"])
== tokenizer.mask_token_id
)
mask_token_logits = token_logits[0, mask_id, :]
top_5_tokens = np.flip(np.argsort(mask_token_logits)).squeeze()[
0:5
]
for token in top_5_tokens:
print(
f"'>>> {new_text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
)
except KeyboardInterrupt:
print("Exiting program.")
sys.exit()

View File

@@ -0,0 +1,40 @@
from PIL import Image
import requests
from transformers import GPT2Tokenizer, TFGPT2Model
import tensorflow as tf
from shark.shark_inference import SharkInference
# Create a set of inputs
gpt2_inputs = [
tf.TensorSpec(shape=[1, 8], dtype=tf.int32),
tf.TensorSpec(shape=[1, 8], dtype=tf.int32),
]
class GPT2Module(tf.Module):
def __init__(self):
super(GPT2Module, self).__init__()
self.m = TFGPT2Model.from_pretrained("distilgpt2")
self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)
@tf.function(input_signature=gpt2_inputs)
def forward(self, input_ids, attention_mask):
return self.m.predict(input_ids, attention_mask)
if __name__ == "__main__":
# Prepping Data
tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
text = "I love the distilled version of models."
inputs = tokenizer(text, return_tensors="tf")
shark_module = SharkInference(
GPT2Module(), (inputs["input_ids"], inputs["attention_mask"])
)
shark_module.set_frontend("tensorflow")
shark_module.compile()
print(
shark_module.forward((inputs["input_ids"], inputs["attention_mask"]))
)

View File

@@ -0,0 +1,37 @@
from shark.shark_inference import SharkInference
import numpy as np
mhlo_ir = r"""builtin.module {
func.func @forward(%arg0: tensor<1x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4x4xf32> {
%0 = chlo.broadcast_add %arg0, %arg1 : (tensor<1x4xf32>, tensor<4x1xf32>) -> tensor<4x4xf32>
%1 = "mhlo.abs"(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
return %1 : tensor<4x4xf32>
}
}"""
arg0 = np.ones((1, 4)).astype(np.float32)
arg1 = np.ones((4, 1)).astype(np.float32)
print("Running shark on cpu backend")
shark_module = SharkInference(
mhlo_ir, function_name="forward", device="cpu", mlir_dialect="mhlo"
)
# Generate the random inputs and feed into the graph.
x = shark_module.generate_random_inputs()
shark_module.compile()
print(shark_module.forward(x))
print("Running shark on cuda backend")
shark_module = SharkInference(
mhlo_ir, function_name="forward", device="cuda", mlir_dialect="mhlo"
)
shark_module.compile()
print(shark_module.forward(x))
print("Running shark on vulkan backend")
shark_module = SharkInference(
mhlo_ir, function_name="forward", device="vulkan", mlir_dialect="mhlo"
)
shark_module.compile()
print(shark_module.forward(x))

View File

@@ -0,0 +1,35 @@
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from shark.shark_inference import SharkInference
torch.manual_seed(0)
tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
class MiniLMSequenceClassification(torch.nn.Module):
def __init__(self):
super().__init__()
self.model = AutoModelForSequenceClassification.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased", # The pretrained model.
num_labels=2, # The number of output labels--2 for binary classification.
output_attentions=False, # Whether the model returns attentions weights.
output_hidden_states=False, # Whether the model returns all hidden-states.
torchscript=True,
)
def forward(self, tokens):
return self.model.forward(tokens)[0]
test_input = torch.randint(2, (1, 128))
shark_module = SharkInference(
MiniLMSequenceClassification(),
(test_input,),
jit_trace=True,
benchmark_mode=True,
)
shark_module.compile()
shark_module.forward((test_input,))
shark_module.benchmark_all((test_input,))

View File

@@ -0,0 +1,61 @@
import tensorflow as tf
from transformers import BertModel, BertTokenizer, TFBertModel
from shark.shark_inference import SharkInference
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
]
class BertModule(tf.Module):
def __init__(self):
super(BertModule, self).__init__()
# Create a BERT trainer with the created network.
self.m = TFBertModel.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased", from_pt=True
)
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m.predict = lambda x, y, z: self.m.call(
input_ids=x, attention_mask=y, token_type_ids=z, training=False
)
@tf.function(input_signature=bert_input)
def forward(self, input_ids, attention_mask, token_type_ids):
return self.m.predict(input_ids, attention_mask, token_type_ids)
if __name__ == "__main__":
# Prepping Data
tokenizer = BertTokenizer.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased"
)
text = "Replace me by any text you'd like."
encoded_input = tokenizer(
text,
padding="max_length",
truncation=True,
max_length=MAX_SEQUENCE_LENGTH,
)
for key in encoded_input:
encoded_input[key] = tf.expand_dims(
tf.convert_to_tensor(encoded_input[key]), 0
)
test_input = (
encoded_input["input_ids"],
encoded_input["attention_mask"],
encoded_input["token_type_ids"],
)
shark_module = SharkInference(
BertModule(), test_input, benchmark_mode=True
)
shark_module.set_frontend("tensorflow")
shark_module.compile()
shark_module.benchmark_all(test_input)

View File

@@ -0,0 +1,24 @@
from shark.shark_inference import SharkInference
from shark.shark_downloader import download_torch_model
mlir_model, func_name, inputs, golden_out = download_torch_model(
"microsoft/MiniLM-L12-H384-uncased"
)
shark_module = SharkInference(
mlir_model, func_name, device="cpu", mlir_dialect="linalg"
)
shark_module.compile()
result = shark_module.forward(inputs)
print("The obtained result via shark is: ", result)
print("The golden result is:", golden_out)
# Let's generate random inputs, currently supported
# for static models.
rand_inputs = shark_module.generate_random_inputs()
rand_results = shark_module.forward(rand_inputs)
print("Running shark_module with random_inputs is: ", rand_results)

View File

@@ -0,0 +1,70 @@
import tensorflow as tf
from transformers import BertModel, BertTokenizer, TFBertModel
from shark.shark_inference import SharkInference
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
]
class BertModule(tf.Module):
def __init__(self):
super(BertModule, self).__init__()
# Create a BERT trainer with the created network.
self.m = TFBertModel.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased", from_pt=True
)
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m.predict = lambda x, y, z: self.m.call(
input_ids=x, attention_mask=y, token_type_ids=z, training=False
)
@tf.function(input_signature=bert_input)
def forward(self, input_ids, attention_mask, token_type_ids):
return self.m.predict(input_ids, attention_mask, token_type_ids)
if __name__ == "__main__":
# Prepping Data
tokenizer = BertTokenizer.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased"
)
text = "Replace me by any text you'd like."
encoded_input = tokenizer(
text,
padding="max_length",
truncation=True,
max_length=MAX_SEQUENCE_LENGTH,
)
for key in encoded_input:
encoded_input[key] = tf.expand_dims(
tf.convert_to_tensor(encoded_input[key]), 0
)
shark_module = SharkInference(
BertModule(),
(
encoded_input["input_ids"],
encoded_input["attention_mask"],
encoded_input["token_type_ids"],
),
)
shark_module.set_frontend("tensorflow")
shark_module.compile()
print(
shark_module.forward(
(
encoded_input["input_ids"],
encoded_input["attention_mask"],
encoded_input["token_type_ids"],
)
)
)

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,39 @@
import torch
import torchvision.models as models
from shark.shark_inference import SharkInference
from shark.shark_importer import SharkImporter
torch.hub.list("zhanghang1989/ResNeSt", force_reload=True)
class ResnestModule(torch.nn.Module):
def __init__(self):
super().__init__()
self.model = torch.hub.load(
"zhanghang1989/ResNeSt", "resnest50", pretrained=True
)
self.model.eval()
def forward(self, input):
return self.model.forward(input)
input = torch.randn(1, 3, 224, 224)
mlir_importer = SharkImporter(
ResnestModule(),
(input,),
frontend="torch",
)
(vision_mlir, func_name), inputs, golden_out = mlir_importer.import_debug(
tracing_required=True
)
print(golden_out)
shark_module = SharkInference(vision_mlir, func_name, mlir_dialect="linalg")
shark_module.compile()
result = shark_module.forward((input,))
print("Obtained result", result)

View File

@@ -0,0 +1,81 @@
from PIL import Image
import requests
import torch
import torchvision.models as models
from torchvision import transforms
import sys
from shark.shark_inference import SharkInference
from shark.shark_downloader import download_torch_model
################################## Preprocessing inputs and model ############
def load_and_preprocess_image(url: str):
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
}
img = Image.open(
requests.get(url, headers=headers, stream=True).raw
).convert("RGB")
# preprocessing pipeline
preprocess = transforms.Compose(
[
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
),
]
)
img_preprocessed = preprocess(img)
return torch.unsqueeze(img_preprocessed, 0)
def load_labels():
classes_text = requests.get(
"https://raw.githubusercontent.com/cathyzhyi/ml-data/main/imagenet-classes.txt",
stream=True,
).text
labels = [line.strip() for line in classes_text.splitlines()]
return labels
def top3_possibilities(res):
_, indexes = torch.sort(res, descending=True)
percentage = torch.nn.functional.softmax(res, dim=1)[0] * 100
top3 = [(labels[idx], percentage[idx].item()) for idx in indexes[0][:3]]
return top3
class Resnet50Module(torch.nn.Module):
def __init__(self):
super().__init__()
self.resnet = models.resnet50(pretrained=True)
self.train(False)
def forward(self, img):
return self.resnet.forward(img)
image_url = "https://upload.wikimedia.org/wikipedia/commons/2/26/YellowLabradorLooking_new.jpg"
print("load image from " + image_url, file=sys.stderr)
img = load_and_preprocess_image(image_url)
labels = load_labels()
##############################################################################
## Can pass any img or input to the forward module.
mlir_model, func_name, inputs, golden_out = download_torch_model("resnet50")
shark_module = SharkInference(mlir_model, func_name, mlir_dialect="linalg")
shark_module.compile()
result = shark_module.forward((img.detach().numpy(),))
print("The top 3 results obtained via shark_runner is:")
print(top3_possibilities(torch.from_numpy(result)))
print()
print("The top 3 results obtained via torch is:")
print(top3_possibilities(Resnet50Module()(img)))

View File

@@ -0,0 +1,35 @@
from PIL import Image
import requests
from transformers import T5Tokenizer, TFT5Model
import tensorflow as tf
from shark.shark_inference import SharkInference
# Create a set of inputs
t5_inputs = [
tf.TensorSpec(shape=[1, 10], dtype=tf.int32),
tf.TensorSpec(shape=[1, 10], dtype=tf.int32),
]
class T5Module(tf.Module):
def __init__(self):
super(T5Module, self).__init__()
self.m = TFT5Model.from_pretrained("t5-small")
self.m.predict = lambda x, y: self.m(input_ids=x, decoder_input_ids=y)
@tf.function(input_signature=t5_inputs)
def forward(self, input_ids, decoder_input_ids):
return self.m.predict(input_ids, decoder_input_ids)
if __name__ == "__main__":
# Prepping Data
tokenizer = T5Tokenizer.from_pretrained("t5-small")
text = "I love the distilled version of models."
inputs = tokenizer(text, return_tensors="tf").input_ids
shark_module = SharkInference(T5Module(), (inputs, inputs))
shark_module.set_frontend("tensorflow")
shark_module.compile()
print(shark_module.forward((inputs, inputs)))

View File

@@ -0,0 +1,43 @@
import torch
import torchvision.models as models
from shark.shark_inference import SharkInference
class VisionModule(torch.nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
self.train(False)
def forward(self, input):
return self.model.forward(input)
input = torch.randn(1, 3, 224, 224)
## The vision models present here: https://pytorch.org/vision/stable/models.html
vision_models_list = [
models.resnet18(pretrained=True),
models.alexnet(pretrained=True),
models.vgg16(pretrained=True),
models.squeezenet1_0(pretrained=True),
models.densenet161(pretrained=True),
models.inception_v3(pretrained=True),
models.shufflenet_v2_x1_0(pretrained=True),
models.mobilenet_v2(pretrained=True),
models.mobilenet_v3_small(pretrained=True),
models.resnext50_32x4d(pretrained=True),
models.wide_resnet50_2(pretrained=True),
models.mnasnet1_0(pretrained=True),
models.efficientnet_b0(pretrained=True),
models.regnet_y_400mf(pretrained=True),
models.regnet_x_400mf(pretrained=True),
]
for i, vision_model in enumerate(vision_models_list):
shark_module = SharkInference(
VisionModule(vision_model),
(input,),
)
shark_module.compile()
shark_module.forward((input,))

View File

@@ -0,0 +1,39 @@
import torch
import numpy as np
from shark.shark_inference import SharkInference
from shark.shark_importer import SharkImporter
class UnetModule(torch.nn.Module):
def __init__(self):
super().__init__()
self.model = torch.hub.load(
"mateuszbuda/brain-segmentation-pytorch",
"unet",
in_channels=3,
out_channels=1,
init_features=32,
pretrained=True,
)
self.model.eval()
def forward(self, input):
return self.model(input)
input = torch.randn(1, 3, 224, 224)
mlir_importer = SharkImporter(
UnetModule(),
(input,),
frontend="torch",
)
(vision_mlir, func_name), inputs, golden_out = mlir_importer.import_debug(
tracing_required=False
)
shark_module = SharkInference(vision_mlir, func_name, mlir_dialect="linalg")
shark_module.compile()
result = shark_module.forward((input,))
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)

View File

@@ -0,0 +1,13 @@
from shark.shark_inference import SharkInference
from shark.shark_downloader import download_torch_model
mlir_model, func_name, inputs, golden_out = download_torch_model("v_diffusion")
shark_module = SharkInference(
mlir_model, func_name, device="vulkan", mlir_dialect="linalg"
)
shark_module.compile()
result = shark_module.forward(inputs)
print("The obtained result via shark is: ", result)
print("The golden result is:", golden_out)

View File

@@ -0,0 +1,47 @@
import torch
from torch.nn.utils import _stateless
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from shark.shark_runner import SharkTrainer
class MiniLMSequenceClassification(torch.nn.Module):
def __init__(self):
super().__init__()
self.model = AutoModelForSequenceClassification.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased", # The pretrained model.
num_labels=2, # The number of output labels--2 for binary classification.
output_attentions=False, # Whether the model returns attentions weights.
output_hidden_states=False, # Whether the model returns all hidden-states.
torchscript=True,
)
def forward(self, tokens):
return self.model.forward(tokens)[0]
mod = MiniLMSequenceClassification()
def get_sorted_params(named_params):
return [i[1] for i in sorted(named_params.items())]
print(dict(mod.named_buffers()))
inp = (torch.randint(2, (1, 128)),)
def forward(params, buffers, args):
params_and_buffers = {**params, **buffers}
_stateless.functional_call(
mod, params_and_buffers, args, {}
).sum().backward()
optim = torch.optim.SGD(get_sorted_params(params), lr=0.01)
# optim.load_state_dict(optim_state)
optim.step()
return params, buffers
shark_module = SharkTrainer(mod, inp, custom_inference_fn=forward)
print(shark_module.forward())

View File

@@ -0,0 +1,60 @@
import numpy as np
import os
import time
import tensorflow as tf
from shark.shark_trainer import SharkTrainer
from shark.parser import parser
from urllib import request
parser.add_argument(
"--download_mlir_path",
type=str,
default="bert_tf_training.mlir",
help="Specifies path to target mlir file that will be loaded.",
)
load_args, unknown = parser.parse_known_args()
tf.random.set_seed(0)
vocab_size = 100
NUM_CLASSES = 5
SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Download BERT model from tank and train.
if __name__ == "__main__":
predict_sample_input = [
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
]
file_link = "https://storage.googleapis.com/shark_tank/users/stanley/bert_tf_training.mlir"
response = request.urlretrieve(file_link, load_args.download_mlir_path)
sample_input_tensors = [
tf.convert_to_tensor(val, dtype=tf.int32)
for val in predict_sample_input
]
num_iter = 10
if not os.path.isfile(load_args.download_mlir_path):
raise ValueError(
f"Tried looking for target mlir in {load_args.download_mlir_path}, but cannot be found."
)
with open(load_args.download_mlir_path, "rb") as input_file:
bert_mlir = input_file.read()
shark_module = SharkTrainer(
bert_mlir,
(
sample_input_tensors,
tf.convert_to_tensor(
np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32
),
),
)
shark_module.set_frontend("mhlo")
shark_module.compile()
start = time.time()
print(shark_module.train(num_iter))
end = time.time()
total_time = end - start
print("time: " + str(total_time))
print("time/iter: " + str(total_time / num_iter))

View File

@@ -0,0 +1,97 @@
from absl import app
import time
import numpy as np
import tensorflow as tf
from official.nlp.modeling import layers
from official.nlp.modeling import networks
from official.nlp.modeling.models import bert_classifier
from shark.shark_trainer import SharkTrainer
tf.random.set_seed(0)
vocab_size = 100
NUM_CLASSES = 5
SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
]
class BertModule(tf.Module):
def __init__(self):
super(BertModule, self).__init__()
dict_outputs = False
test_network = networks.BertEncoder(
vocab_size=vocab_size, num_layers=2, dict_outputs=dict_outputs
)
# Create a BERT trainer with the created network.
bert_trainer_model = bert_classifier.BertClassifier(
test_network, num_classes=NUM_CLASSES
)
bert_trainer_model.summary()
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m = bert_trainer_model
self.m.predict = lambda x: self.m.call(x, training=False)
self.predict = tf.function(input_signature=[bert_input])(
self.m.predict
)
self.m.learn = lambda x, y: self.m.call(x, training=False)
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
@tf.function(
input_signature=[
bert_input, # inputs
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32), # labels
]
)
def forward(self, inputs, labels):
with tf.GradientTape() as tape:
# Capture the gradients from forward prop...
probs = self.m(inputs, training=True)
loss = self.loss(labels, probs)
# ...and use them to update the model's weights.
variables = self.m.trainable_variables
gradients = tape.gradient(loss, variables)
self.optimizer.apply_gradients(zip(gradients, variables))
return loss
if __name__ == "__main__":
predict_sample_input = [
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
]
sample_input_tensors = [
tf.convert_to_tensor(val, dtype=tf.int32)
for val in predict_sample_input
]
num_iter = 10
shark_module = SharkTrainer(
BertModule(),
(
sample_input_tensors,
tf.convert_to_tensor(
np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32
),
),
)
shark_module.set_frontend("tensorflow")
shark_module.compile()
start = time.time()
print(shark_module.train(num_iter))
end = time.time()
total_time = end - start
print("time: " + str(total_time))
print("time/iter: " + str(total_time / num_iter))

View File

@@ -0,0 +1,44 @@
import torch
from torch.nn.utils import _stateless
from shark.shark_trainer import SharkTrainer
class Foo(torch.nn.Module):
def __init__(self):
super(Foo, self).__init__()
self.l1 = torch.nn.Linear(10, 16)
self.relu = torch.nn.ReLU()
self.l2 = torch.nn.Linear(16, 2)
def forward(self, x):
out = self.l1(x)
out = self.relu(out)
out = self.l2(out)
return out
mod = Foo()
inp = (torch.randn(10, 10),)
def get_sorted_params(named_params):
return [i[1] for i in sorted(named_params.items())]
def forward(params, buffers, args):
params_and_buffers = {**params, **buffers}
_stateless.functional_call(
mod, params_and_buffers, args, {}
).sum().backward()
optim = torch.optim.SGD(get_sorted_params(params), lr=0.01)
optim.step()
return params, buffers
# fx_graph = forward(dict(mod.named_parameters()), dict(mod.named_buffers()), inp)
shark_module = SharkTrainer(mod, inp)
# Pass the training function in case of torch
shark_module.compile(training_fn=forward)
shark_module.train(num_iters=10)

View File

@@ -0,0 +1,88 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict, Any
import iree
import iree.runtime as ireert
import numpy as np
import torch
from iree.runtime import DeviceArray
from torch_mlir._mlir_libs._mlir.ir import Module
from torch_mlir.compiler_utils import (
get_module_name_for_debug_dump,
run_pipeline_with_repro_report,
)
from torch_mlir.eager_mode.torch_mlir_eager_backend import (
TorchMLIREagerBackend,
TensorMetaData,
)
from torch_mlir_e2e_test.eager_backends.refbackend import (
NUMPY_TO_TORCH_DTYPE_DICT,
)
from shark.iree_utils.compile_utils import (
get_iree_compiled_module,
IREE_DEVICE_MAP,
)
class EagerModeIREELinalgOnTensorsBackend(TorchMLIREagerBackend):
"""Main entry-point for the iree backend for torch-mlir eager mode.
EagerModeIREELinalgOnTensorsBackend uses iree.DeviceArray representations of tensors and
thus all of the wrapping and unwrapping and munging here is done to between torch.Tensor and iree.DeviceArray,
with np.ndarray as an intermediary.
"""
def __init__(self, device: str):
self.torch_device_str = device
self.config = ireert.Config(IREE_DEVICE_MAP[device])
self.raw_device_str = device
def get_torch_metadata(
self, tensor: DeviceArray, kwargs: Dict[str, Any]
) -> TensorMetaData:
return TensorMetaData(
size=tensor.shape,
dtype=NUMPY_TO_TORCH_DTYPE_DICT[tensor.dtype.type],
device=torch.device(self.torch_device_str),
requires_grad=tensor.dtype.type
in {np.float, np.float32, np.float64}
and kwargs.get("requires_grad", False),
)
def compile(self, imported_module: Module):
fn_name = get_module_name_for_debug_dump(imported_module)
run_pipeline_with_repro_report(
imported_module,
"torch-function-to-torch-backend-pipeline,torch-backend-to-linalg-on-tensors-backend-pipeline",
"EagerMode",
)
callable, _ = get_iree_compiled_module(
imported_module, self.raw_device_str, func_name=fn_name
)
return callable
def copy_into(self, dst, src):
"""Copy output back to appropriate arg that it should alias."""
np.copyto(dst, src)
def transfer_from_device_to_torch(self, e):
return torch.from_numpy(e.to_host())
def transfer_from_torch_to_device(
self, tensor: torch.Tensor
) -> DeviceArray:
return iree.runtime.asdevicearray(self.config.device, tensor.numpy())

View File

View File

@@ -0,0 +1,95 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## Common utilities to be shared by iree utilities.
import os
import sys
import subprocess
def run_cmd(cmd):
"""
Inputs: cli command string.
"""
try:
result = subprocess.run(
cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=True,
)
result_str = result.stdout.decode()
return result_str
except Exception:
sys.exit("Exiting program due to error running:", cmd)
IREE_DEVICE_MAP = {
"cpu": "local-task",
"gpu": "cuda",
"cuda": "cuda",
"vulkan": "vulkan",
"metal": "vulkan",
"rocm": "rocm",
"intel-gpu": "level_zero",
}
IREE_TARGET_MAP = {
"cpu": "llvm-cpu",
"gpu": "cuda",
"cuda": "cuda",
"vulkan": "vulkan",
"metal": "vulkan",
"rocm": "rocm",
"intel-gpu": "opencl-spirv",
}
# Finds whether the required drivers are installed for the given device.
def check_device_drivers(device):
"""Checks necessary drivers present for gpu and vulkan devices"""
if device in ["gpu", "cuda"]:
try:
subprocess.check_output("nvidia-smi")
except Exception:
return True
elif device in ["metal", "vulkan"]:
try:
subprocess.check_output("vulkaninfo")
except Exception:
return True
elif device in ["intel-gpu"]:
try:
subprocess.check_output(["dpkg", "-L", "intel-level-zero-gpu"])
return False
except Exception:
return True
elif device == "cpu":
return False
# Unknown device.
else:
return True
return False
# Installation info for the missing device drivers.
def device_driver_info(device):
if device in ["gpu", "cuda"]:
return "nvidia-smi not found, please install the required drivers from https://www.nvidia.in/Download/index.aspx?lang=en-in"
elif device in ["metal", "vulkan"]:
return "vulkaninfo not found, Install from https://vulkan.lunarg.com/sdk/home or your distribution"
else:
return f"{device} is not supported."

View File

@@ -0,0 +1,97 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import iree.runtime.scripts.iree_benchmark_module as benchmark_module
from shark.iree_utils._common import run_cmd, IREE_DEVICE_MAP
import numpy as np
import os
import re
UNIT_TO_SECOND_MAP = {"ms": 0.001, "s": 1}
def tensor_to_type_str(input_tensors: tuple, mlir_dialect: str):
"""
Input: A tuple of input tensors i.e tuple(torch.tensor)
Output: list of string that represent mlir types (i.e 1x24xf64)
# TODO: Support more than floats, and ints
"""
list_of_type = []
for input_tensor in input_tensors:
type_string = "x".join([str(dim) for dim in input_tensor.shape])
if mlir_dialect in ["linalg", "tosa"]:
dtype_string = str(input_tensor.dtype).replace("torch.", "")
elif mlir_dialect in ["mhlo", "tflite"]:
dtype = input_tensor.dtype
try:
dtype_string = re.findall("'[^\"]*'", str(dtype))[0].replace(
"'", ""
)
except IndexError:
dtype_string = str(dtype)
regex_split = re.compile("([a-zA-Z]+)([0-9]+)")
match = regex_split.match(dtype_string)
mlir_type_string = str(match.group(1)[0]) + str(match.group(2))
type_string += f"x{mlir_type_string}"
list_of_type.append(type_string)
return list_of_type
def build_benchmark_args(
input_file: str,
device: str,
input_tensors: tuple,
mlir_dialect: str,
training=False,
):
"""
Inputs: input_file leading to vmfb, input_tensor to function, target device,
and whether it is training or not.
Outputs: string that execute benchmark-module on target model.
"""
path = benchmark_module.__path__[0]
benchmarker_path = os.path.join(path, "..", "..", "iree-benchmark-module")
benchmark_cl = [benchmarker_path, f"--module_file={input_file}"]
# TODO: The function named can be passed as one of the args.
fn_name = "forward"
if training == True:
# TODO: Replace name of train with actual train fn name.
fn_name = "train"
benchmark_cl.append(f"--entry_function={fn_name}")
benchmark_cl.append(f"--device={IREE_DEVICE_MAP[device]}")
mlir_input_types = tensor_to_type_str(input_tensors, mlir_dialect)
for mlir_input in mlir_input_types:
benchmark_cl.append(f"--function_input={mlir_input}")
time_extractor = "| awk 'END{{print $2 $3}}'"
benchmark_cl.append(time_extractor)
return benchmark_cl
def run_benchmark_module(benchmark_cl):
"""
Run benchmark command, extract result and return iteration/seconds.
# TODO: Add an example of the benchmark command.
Input: benchmark command.
"""
benchmark_path = benchmark_cl[0]
assert os.path.exists(
benchmark_path
), "Cannot find benchmark_module, Please contact SHARK maintainer on discord."
bench_result = run_cmd(" ".join(benchmark_cl))
regex_split = re.compile("([0-9]+[.]*[0-9]*)([a-zA-Z]+)")
match = regex_split.match(bench_result)
time = float(match.group(1))
unit = match.group(2)
return 1.0 / (time * UNIT_TO_SECOND_MAP[unit])

View File

@@ -0,0 +1,173 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import iree.runtime as ireert
import iree.compiler as ireec
from shark.iree_utils._common import IREE_DEVICE_MAP, IREE_TARGET_MAP
import numpy as np
import os
# Get the iree-compile arguments given device.
def get_iree_device_args(device):
if device == "cpu":
from shark.iree_utils.cpu_utils import get_iree_cpu_args
return get_iree_cpu_args()
if device in ["gpu", "cuda"]:
from shark.iree_utils.gpu_utils import get_iree_gpu_args
return get_iree_gpu_args()
if device in ["metal", "vulkan"]:
from shark.iree_utils.vulkan_utils import get_iree_vulkan_args
return get_iree_vulkan_args()
return []
# Get the iree-compiler arguments given frontend.
def get_iree_frontend_args(frontend):
if frontend in ["torch", "pytorch", "linalg"]:
return ["--iree-llvm-target-cpu-features=host"]
elif frontend in ["tensorflow", "tf", "mhlo"]:
return [
"--iree-llvm-target-cpu-features=host",
"--iree-mhlo-demote-i64-to-i32=false",
"--iree-flow-demote-i64-to-i32",
]
else:
# Frontend not found.
return []
# Common args to be used given any frontend or device.
def get_iree_common_args():
return [
"--iree-stream-resource-index-bits=64",
"--iree-vm-target-index-bits=64",
]
def compile_module_to_flatbuffer(
module, device, frontend, func_name, model_config_path
):
# Setup Compile arguments wrt to frontends.
input_type = ""
args = get_iree_frontend_args(frontend)
args += get_iree_device_args(device)
args += get_iree_common_args()
if frontend in ["tensorflow", "tf"]:
input_type = "mhlo"
elif frontend in ["mhlo", "tosa"]:
input_type = frontend
elif frontend in ["tflite", "tflite-tosa"]:
input_type = "tosa"
# TODO: make it simpler.
# Compile according to the input type, else just try compiling.
if input_type not in ["mhlo", "tosa"]:
module = str(module)
if input_type != "":
# Currently for MHLO/TOSA.
flatbuffer_blob = ireec.compile_str(
module,
target_backends=[IREE_TARGET_MAP[device]],
extra_args=args,
input_type=input_type,
)
else:
# Currently for Torch.
flatbuffer_blob = ireec.compile_str(
str(module),
target_backends=[IREE_TARGET_MAP[device]],
extra_args=args,
)
return flatbuffer_blob
def get_iree_module(flatbuffer_blob, device, func_name):
# Returns the compiled module and the configs.
config = ireert.Config(IREE_DEVICE_MAP[device])
vm_module = ireert.VmModule.from_flatbuffer(
config.vm_instance, flatbuffer_blob
)
ctx = ireert.SystemContext(config=config)
ctx.add_vm_module(vm_module)
ModuleCompiled = ctx.modules.module[func_name]
return ModuleCompiled, config
def get_iree_compiled_module(
module,
device: str,
frontend: str = "torch",
func_name: str = "forward",
model_config_path: str = None,
):
"""Given a module returns the compiled .vmfb and configs"""
flatbuffer_blob = compile_module_to_flatbuffer(
module, device, frontend, func_name, model_config_path
)
return get_iree_module(flatbuffer_blob, device, func_name)
def export_iree_module_to_vmfb(
module,
device: str,
directory: str,
mlir_dialect: str = "linalg",
func_name: str = "forward",
model_config_path: str = None,
):
# Compiles the module given specs and saves it as .vmfb file.
flatbuffer_blob = compile_module_to_flatbuffer(
module, device, mlir_dialect, func_name, model_config_path
)
module_name = f"{mlir_dialect}_{func_name}_{device}"
filename = os.path.join(directory, module_name + ".vmfb")
print(f"Saved vmfb in {filename}.")
with open(filename, "wb") as f:
f.write(flatbuffer_blob)
return filename
def export_module_to_mlir_file(module, frontend, directory: str):
# TODO: write proper documentation.
mlir_str = module
if frontend in ["tensorflow", "tf", "mhlo", "tflite"]:
mlir_str = module.decode("utf-8")
elif frontend in ["pytorch", "torch"]:
mlir_str = module.operation.get_asm()
filename = os.path.join(directory, "model.mlir")
with open(filename, "w") as f:
f.write(mlir_str)
print(f"Saved mlir in {filename}.")
return filename
def get_results(compiled_vm, input, config, frontend="torch"):
"""Runs a .vmfb file given inputs and config and returns output."""
device_inputs = [ireert.asdevicearray(config.device, a) for a in input]
result = compiled_vm(*device_inputs)
result_tensors = []
if isinstance(result, tuple):
for val in result:
result_tensors.append(np.copy(np.asarray(val, val.dtype)))
return result_tensors
elif isinstance(result, dict):
data = list(result.items())
res = np.array(data, dtype=object)
return np.copy(res)
else:
return np.copy(np.asarray(result, dtype=result.dtype))

View File

@@ -0,0 +1,44 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# All the iree_cpu related functionalities go here.
import subprocess
# Get the default cpu args.
def get_iree_cpu_args():
find_triple_cmd = "uname -s -m"
os_name, proc_name = (
subprocess.run(
find_triple_cmd, shell=True, stdout=subprocess.PIPE, check=True
)
.stdout.decode("utf-8")
.split()
)
if os_name == "Darwin":
find_kernel_version_cmd = "uname -r"
kernel_version = subprocess.run(
find_kernel_version_cmd,
shell=True,
stdout=subprocess.PIPE,
check=True,
).stdout.decode("utf-8")
target_triple = f"{proc_name}-apple-darwin{kernel_version}"
elif os_name == "Linux":
target_triple = f"{proc_name}-linux-gnu"
else:
error_message = f"OS Type f{os_name} not supported and triple can't be determined, open issue to dSHARK team please :)"
raise Exception(error_message)
print(f"Target triple found:{target_triple}")
return [f"-iree-llvm-target-triple={target_triple}"]

View File

@@ -0,0 +1,111 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# All the iree_gpu related functionalities go here.
import iree.runtime as ireert
import ctypes
from shark.parser import shark_args
# Get the default gpu args given the architecture.
def get_iree_gpu_args():
ireert.flags.FUNCTION_INPUT_VALIDATION = False
ireert.flags.parse_flags("--cuda_allow_inline_execution")
# TODO: Give the user_interface to pass the sm_arch.
sm_arch = get_cuda_sm_cc()
if (
sm_arch in ["sm_70", "sm_72", "sm_75", "sm_80", "sm_84", "sm_86"]
) and (shark_args.enable_tf32 == True):
return [
"--iree-hal-cuda-disable-loop-nounroll-wa",
f"--iree-hal-cuda-llvm-target-arch={sm_arch}",
]
else:
return ["--iree-hal-cuda-disable-loop-nounroll-wa"]
# Some constants taken from cuda.h
CUDA_SUCCESS = 0
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36
def get_cuda_sm_cc():
libnames = ("libcuda.so", "libcuda.dylib", "cuda.dll")
for libname in libnames:
try:
cuda = ctypes.CDLL(libname)
except OSError:
continue
else:
break
else:
raise OSError("could not load any of: " + " ".join(libnames))
nGpus = ctypes.c_int()
name = b" " * 100
cc_major = ctypes.c_int()
cc_minor = ctypes.c_int()
result = ctypes.c_int()
device = ctypes.c_int()
context = ctypes.c_void_p()
error_str = ctypes.c_char_p()
result = cuda.cuInit(0)
if result != CUDA_SUCCESS:
cuda.cuGetErrorString(result, ctypes.byref(error_str))
print(
"cuInit failed with error code %d: %s"
% (result, error_str.value.decode())
)
return 1
result = cuda.cuDeviceGetCount(ctypes.byref(nGpus))
if result != CUDA_SUCCESS:
cuda.cuGetErrorString(result, ctypes.byref(error_str))
print(
"cuDeviceGetCount failed with error code %d: %s"
% (result, error_str.value.decode())
)
return 1
print("Found %d device(s)." % nGpus.value)
for i in range(nGpus.value):
result = cuda.cuDeviceGet(ctypes.byref(device), i)
if result != CUDA_SUCCESS:
cuda.cuGetErrorString(result, ctypes.byref(error_str))
print(
"cuDeviceGet failed with error code %d: %s"
% (result, error_str.value.decode())
)
return 1
print("Device: %d" % i)
if (
cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device)
== CUDA_SUCCESS
):
print(" Name: %s" % (name.split(b"\0", 1)[0].decode()))
if (
cuda.cuDeviceComputeCapability(
ctypes.byref(cc_major), ctypes.byref(cc_minor), device
)
== CUDA_SUCCESS
):
print(
" Compute Capability: %d.%d"
% (cc_major.value, cc_minor.value)
)
sm = f"sm_{cc_major.value}{cc_minor.value}"
return sm

View File

@@ -0,0 +1,60 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# All the iree_vulkan related functionalities go here.
from shark.iree_utils._common import run_cmd
def get_vulkan_triple_flag():
vulkan_device_cmd = "vulkaninfo | grep deviceName | awk 'END{{print $NF}}'"
vulkan_device = run_cmd(vulkan_device_cmd).strip()
if vulkan_device == "Ultra":
print("Found MacStudio M1 Device. Using m1-moltenvk-macos")
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
elif vulkan_device == "M2":
print("Found Apple M2 Device. Using m1-moltenvk-macos")
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
elif vulkan_device == "Max":
print("Found Apple M1 Max Device. Using m1-moltenvk-macos")
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
elif vulkan_device == "Pro":
print("Found Apple M1 Pro Device. Using m1-moltenvk-macos")
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
elif vulkan_device == "M1":
print("Found Apple M1 Device. Using m1-moltenvk-macos")
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
elif vulkan_device == "A100-SXM4-40GB":
print("Found Nvidia Device. Using ampere-rtx3080-linux")
return "-iree-vulkan-target-triple=ampere-rtx3080-linux"
elif vulkan_device == "3090":
print("Found Nvidia Device. Using ampere-rtx3090-linux")
return "-iree-vulkan-target-triple=ampere-rtx3090-linux"
else:
print(
"""Optimized kernel for your target device is not added yet.
Contact SHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u]
or pull up an issue."""
)
print(f"Target : {vulkan_device}")
return None
def get_iree_vulkan_args():
# vulkan_flag = ["--iree-flow-demote-i64-to-i32"]
vulkan_flag = []
vulkan_triple_flag = get_vulkan_triple_flag()
if vulkan_triple_flag is not None:
vulkan_flag.append(vulkan_triple_flag)
return vulkan_flag

164
shark/model_annotation.py Normal file
View File

@@ -0,0 +1,164 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import json
import os
from typing import List, Dict
from iree.compiler import ir
from iree.compiler.transforms import ireec as ireec_trans
MATMUL_OP_NAMES = set(
["linalg.matmul", "linalg.batch_matmul", "mhlo.dot", "mhlo.dot_general"]
)
idx = 0
def model_annotation(
ctx: ir.Context, *, input_contents: str, config_path: str
):
if os.path.isfile(input_contents):
with open(input_contents, "rb") as f:
input_contents = f.read()
module = ir.Module.parse(input_contents)
with open(config_path, "r") as f:
data = json.load(f)
configs = data["options"]
# The Python API does not expose a general walk() function, so we just
# do it ourselves.
walk_children(module.operation, configs)
if not module.operation.verify():
raise RuntimeError("Modified program does not verify!")
# More efficient than: print(module)
# - Disables verification (already done above)
# - Writes as binary, avoiding costly unicode conversions
sys.stdout.buffer.write(
module.operation.get_asm(assume_verified=True, binary=True)
)
return module
def walk_children(op: ir.Operation, configs: List[Dict]):
for region in op.regions:
for block in region.blocks:
for child_op in block.operations:
# TODO: This is dumb. Both Operation and OpView should expose
# 'operation' and 'name' attributes.
if isinstance(child_op, ir.OpView):
child_op = child_op.operation
if child_op.name in MATMUL_OP_NAMES:
global idx
(
tile_sizes,
pipeline,
workgroup_size,
split_k,
pipeline_depth,
) = parse_config(configs[idx])
add_compilation_info(
child_op,
tile_sizes=tile_sizes,
pipeline=pipeline,
workgroup_size=workgroup_size,
pipeline_depth=pipeline_depth,
)
if split_k:
add_split_k(child_op, split_k)
idx = idx + 1
print(f"Updated op {child_op}", file=sys.stderr)
walk_children(child_op, configs)
def parse_config(config: Dict):
if config["pipeline"] == "GPU" or config["pipeline"] == "GPU_TENSORCORE":
pipeline = (
"LLVMGPUMatmulSimt"
if config["pipeline"] == "GPU"
else "LLVMGPUMatmulTensorCore"
)
tile_sizes = [config["work_group_tile_sizes"]]
workgroup_size = config["work_group_sizes"]
try:
pipeline_depth = config["pipeline_depth"]
except:
pipeline_depth = None
try:
split_k = config["split_k"]
except:
split_k = None
else:
pipeline = config["pipeline"]
tile_sizes = [
config["work_group_tile_sizes"],
config["l1_tile_sizes"],
config["vector_tile_sizes"],
]
workgroup_size = []
split_k = None
pipeline_depth = None
return tile_sizes, pipeline, workgroup_size, split_k, pipeline_depth
def add_compilation_info(
op: ir.Operation,
tile_sizes: List[List[int]],
pipeline: str,
workgroup_size: List[int],
pipeline_depth: int,
):
# We don't have a Python binding for CompilationInfo, so we just parse
# its string form.
if pipeline_depth:
attr = ir.Attribute.parse(
f"#iree_codegen.compilation_info<"
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
f"translation_info = <{pipeline} pipeline_depth = {pipeline_depth}>, "
f"workgroup_size = {repr(workgroup_size)}>"
)
else:
attr = ir.Attribute.parse(
f"#iree_codegen.compilation_info<"
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
f"translation_info = <{pipeline}>, "
f"workgroup_size = {repr(workgroup_size)}>"
)
op.attributes["compilation_info"] = attr
def add_split_k(op: ir.Operation, k: int):
attr = ir.IntegerAttr.get(ir.IntegerType.get_signless(64), k)
op.attributes["iree_flow_split_k"] = attr
def create_context() -> ir.Context:
context = ir.Context()
ireec_trans.register_all_dialects(context)
context.allow_unregistered_dialects = True
return context
if __name__ == "__main__":
with create_context() as ctx:
model_annotation(
ctx, input_contents=sys.argv[1], config_path=sys.argv[2]
)

80
shark/parser.py Normal file
View File

@@ -0,0 +1,80 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
def dir_path(path):
if os.path.isdir(path):
return path
else:
os.mkdir(path)
return path
def dir_file(path):
if os.path.isfile(path):
return path
else:
raise argparse.ArgumentTypeError(
f"readable_file:{path} is not a valid file"
)
parser = argparse.ArgumentParser(description="SHARK runner.")
parser.add_argument(
"--device",
type=str,
default="cpu",
help="Device on which shark_runner runs. options are cpu, gpu, and vulkan",
)
parser.add_argument(
"--repro_dir",
help="Directory to which module files will be saved for reproduction or debugging.",
type=dir_path,
default="./shark_tmp",
)
parser.add_argument(
"--enable_tf32",
type=bool,
default=False,
help="Enables TF32 precision calculations on supported GPUs.",
)
parser.add_argument(
"--model_config_path",
help="Directory to where the tuned model config file is located.",
default=None,
)
parser.add_argument(
"--num_warmup_iterations",
type=int,
default=5,
help="Run the model for the specified number of warmup iterations.",
)
parser.add_argument(
"--num_iterations",
type=int,
default=100,
help="Run the model for the specified number of iterations.",
)
parser.add_argument(
"--onnx_bench",
default=False,
action="store_true",
help="When enabled, pytest bench results will include ONNX benchmark results.",
)
shark_args, unknown = parser.parse_known_args()

View File

@@ -0,0 +1,301 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from shark.shark_runner import SharkRunner
from shark.iree_utils.compile_utils import export_iree_module_to_vmfb
from shark.iree_utils.benchmark_utils import (
build_benchmark_args,
run_benchmark_module,
)
from shark.parser import shark_args
from datetime import datetime
import time
import csv
import os
class OnnxFusionOptions(object):
def __init__(self):
self.disable_gelu = False
self.disable_layer_norm = False
self.disable_attention = False
self.disable_skip_layer_norm = False
self.disable_embed_layer_norm = False
self.disable_bias_skip_layer_norm = False
self.disable_bias_gelu = False
self.enable_gelu_approximation = False
self.use_mask_index = False
self.no_attention_mask = False
class SharkBenchmarkRunner(SharkRunner):
# SharkRunner derived class with Benchmarking capabilities.
def __init__(
self,
mlir_module: str,
function_name: str = "forward",
device: str = "none",
mlir_dialect: str = "linalg",
):
self.device = shark_args.device if device == "none" else device
self.frontend_model = None
self.vmfb_file = None
self.mlir_dialect = mlir_dialect
SharkRunner.__init__(
self,
mlir_module,
function_name,
device,
self.mlir_dialect,
)
if self.vmfb_file == None:
self.vmfb_file = export_iree_module_to_vmfb(
mlir_module, device, shark_args.repro_dir, self.mlir_dialect
)
def setup_cl(self, input_tensors):
self.benchmark_cl = build_benchmark_args(
self.vmfb_file,
self.device,
input_tensors,
mlir_dialect=self.mlir_dialect,
)
# print(self.benchmark_cl)
def benchmark_frontend(self, modelname):
if self.mlir_dialect in ["linalg", "torch"]:
return self.benchmark_torch(modelname)
elif self.mlir_dialect in ["mhlo", "tf"]:
return self.benchmark_tf(modelname)
def benchmark_torch(self, modelname):
import torch
from tank.model_utils import get_torch_model
if self.device == "gpu":
torch.set_default_tensor_type(torch.cuda.FloatTensor)
else:
torch.set_default_tensor_type(torch.FloatTensor)
torch_device = torch.device(
"cuda:0" if self.device == "gpu" else "cpu"
)
HFmodel, input = get_torch_model(modelname)[:2]
frontend_model = HFmodel.model
frontend_model.to(torch_device)
input.to(torch_device)
for i in range(shark_args.num_warmup_iterations):
frontend_model.forward(input)
begin = time.time()
for i in range(shark_args.num_iterations):
out = frontend_model.forward(input)
if i == shark_args.num_iterations - 1:
end = time.time()
break
print(
f"Torch benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
)
return [
f"{shark_args.num_iterations/(end-begin)}",
f"{((end-begin)/shark_args.num_iterations)*1000}",
]
def benchmark_tf(self, modelname):
import tensorflow as tf
from tank.model_utils_tf import get_tf_model
model, input, = get_tf_model(
modelname
)[:2]
frontend_model = model
for i in range(shark_args.num_warmup_iterations):
frontend_model.forward(*input)
begin = time.time()
for i in range(shark_args.num_iterations):
out = frontend_model.forward(*input)
if i == shark_args.num_iterations - 1:
end = time.time()
break
print(
f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
)
return [
f"{shark_args.num_iterations/(end-begin)}",
f"{((end-begin)/shark_args.num_iterations)*1000}",
]
def benchmark_c(self):
print(self.benchmark_cl)
result = run_benchmark_module(self.benchmark_cl)
print(f"Shark-IREE-C benchmark:{result} iter/second")
return [f"{result}", f"{1000/result}"]
def benchmark_python(self, inputs):
input_list = [x for x in inputs]
for i in range(shark_args.num_warmup_iterations):
self.run(input_list)
begin = time.time()
for i in range(shark_args.num_iterations):
out = self.run(input_list)
if i == shark_args.num_iterations - 1:
end = time.time()
print(
f"Shark-IREE Python benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
)
return [
f"{shark_args.num_iterations/(end-begin)}",
f"{((end-begin)/shark_args.num_iterations)*1000}",
]
def benchmark_onnx(self, modelname, inputs):
if self.device == "gpu":
print(
"Currently GPU benchmarking on ONNX is not supported in SHARK."
)
return ["N/A", "N/A"]
else:
from onnxruntime.transformers.benchmark import run_onnxruntime
from onnxruntime.transformers.huggingface_models import MODELS
from onnxruntime.transformers.benchmark_helper import (
ConfigModifier,
Precision,
)
import psutil
if modelname == "microsoft/MiniLM-L12-H384-uncased":
modelname = "bert-base-uncased"
if modelname not in MODELS:
print(
f"{modelname} is currently not supported in ORT's HF. Check \
https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/huggingface_models.py \
for currently supported models. Exiting benchmark ONNX."
)
return ["N/A", "N/A"]
use_gpu = self.device == "gpu"
num_threads = psutil.cpu_count(logical=False)
batch_sizes = [1]
sequence_lengths = [128]
cache_dir = os.path.join(".", "cache_models")
onnx_dir = os.path.join(".", "onnx_models")
verbose = False
input_counts = [1]
optimize_onnx = True
validate_onnx = False
disable_ort_io_binding = False
use_raw_attention_mask = True
model_fusion_statistics = {}
overwrite = False
model_source = "pt" # Either "pt" or "tf"
provider = None
config_modifier = ConfigModifier(None)
onnx_args = OnnxFusionOptions()
result = run_onnxruntime(
use_gpu,
provider,
(modelname,),
None,
config_modifier,
Precision.FLOAT32,
num_threads,
batch_sizes,
sequence_lengths,
shark_args.num_iterations,
input_counts,
optimize_onnx,
validate_onnx,
cache_dir,
onnx_dir,
verbose,
overwrite,
disable_ort_io_binding,
use_raw_attention_mask,
model_fusion_statistics,
model_source,
onnx_args,
)
print(
f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
)
return [
result[0]["QPS"],
result[0]["average_latency_ms"],
]
def benchmark_all_csv(
self, inputs: tuple, modelname, dynamic, device_str, frontend
):
self.setup_cl(inputs)
field_names = [
"model",
"engine",
"dynamic",
"dialect",
"device",
"iter/sec",
"ms/iter",
"iterations",
"datetime",
]
engines = ["frontend", "shark_python", "shark_iree_c"]
if shark_args.onnx_bench == True:
engines.append("onnxruntime")
if not os.path.exists("bench_results.csv"):
with open("bench_results.csv", mode="w", newline="") as f:
writer = csv.writer(f)
writer.writerow(field_names)
with open("bench_results.csv", mode="a", newline="") as f:
writer = csv.DictWriter(f, fieldnames=field_names)
bench_result = {}
bench_result["model"] = modelname
if dynamic == True:
bench_result["dynamic"] = "True"
else:
bench_result["dynamic"] = "False"
bench_result["device"] = device_str
for e in engines:
if e == "frontend":
bench_result["engine"] = frontend
(
bench_result["iter/sec"],
bench_result["ms/iter"],
) = self.benchmark_frontend(modelname)
elif e == "shark_python":
bench_result["engine"] = "shark_python"
(
bench_result["iter/sec"],
bench_result["ms/iter"],
) = self.benchmark_python(inputs)
elif e == "shark_iree_c":
bench_result["engine"] = "shark_iree_c"
(
bench_result["iter/sec"],
bench_result["ms/iter"],
) = self.benchmark_c()
elif e == "onnxruntime":
bench_result["engine"] = "onnxruntime"
(
bench_result["iter/sec"],
bench_result["ms/iter"],
) = self.benchmark_onnx(modelname, inputs)
bench_result["dialect"] = self.mlir_dialect
bench_result["iterations"] = shark_args.num_iterations
bench_result["datetime"] = str(datetime.now())
writer.writerow(bench_result)

236
shark/shark_downloader.py Normal file
View File

@@ -0,0 +1,236 @@
# Lint as: python3
"""SHARK Downloader"""
# Requirements : Put shark_tank in SHARK directory
# /SHARK
# /gen_shark_tank
# /tflite
# /albert_lite_base
# /...model_name...
# /tf
# /pytorch
#
#
#
import numpy as np
import os
import urllib.request
import json
import hashlib
from pathlib import Path
input_type_to_np_dtype = {
"float32": np.float32,
"float64": np.float64,
"bool": np.bool_,
"int32": np.int32,
"int64": np.int64,
"uint8": np.uint8,
"int8": np.int8,
}
# default hash is updated when nightly populate_sharktank_ci is successful
shark_default_sha = "latest"
# Save the model in the home local so it needn't be fetched everytime in the CI.
home = str(Path.home())
WORKDIR = os.path.join(home, ".local/shark_tank/")
print(WORKDIR)
# Checks whether the directory and files exists.
def check_dir_exists(model_name, frontend="torch", dynamic=""):
model_dir = os.path.join(WORKDIR, model_name)
# Remove the _tf keyword from end.
if frontend in ["tf", "tensorflow"]:
model_name = model_name[:-3]
elif frontend in ["tflite"]:
model_name = model_name[:-7]
elif frontend in ["torch", "pytorch"]:
model_name = model_name[:-6]
if os.path.isdir(model_dir):
if (
os.path.isfile(
os.path.join(
model_dir,
model_name + dynamic + "_" + str(frontend) + ".mlir",
)
)
and os.path.isfile(os.path.join(model_dir, "function_name.npy"))
and os.path.isfile(os.path.join(model_dir, "inputs.npz"))
and os.path.isfile(os.path.join(model_dir, "golden_out.npz"))
and os.path.isfile(os.path.join(model_dir, "hash.npy"))
):
print(
f"""The models are present in the {WORKDIR}. If you want a fresh
download, consider deleting the directory."""
)
return True
return False
# Downloads the torch model from gs://shark_tank dir.
def download_torch_model(model_name, dynamic=False):
model_name = model_name.replace("/", "_")
dyn_str = "_dynamic" if dynamic else ""
os.makedirs(WORKDIR, exist_ok=True)
model_dir_name = model_name + "_torch"
def gs_download_model():
gs_command = (
'gsutil -o "GSUtil:parallel_process_count=1" cp -r gs://shark_tank/'
+ shark_default_sha
+ "/"
+ model_dir_name
+ " "
+ WORKDIR
)
if os.system(gs_command) != 0:
raise Exception("model not present in the tank. Contact Nod Admin")
if not check_dir_exists(model_dir_name, frontend="torch", dynamic=dyn_str):
gs_download_model()
else:
model_dir = os.path.join(WORKDIR, model_dir_name)
local_hash = str(np.load(os.path.join(model_dir, "hash.npy")))
gs_hash = (
'gsutil -o "GSUtil:parallel_process_count=1" cp gs://shark_tank/'
+ shark_default_sha
+ "/"
+ model_dir_name
+ "/hash.npy"
+ " "
+ os.path.join(model_dir, "upstream_hash.npy")
)
if os.system(gs_hash) != 0:
raise Exception("hash of the model not present in the tank.")
upstream_hash = str(
np.load(os.path.join(model_dir, "upstream_hash.npy"))
)
if local_hash != upstream_hash:
gs_download_model()
model_dir = os.path.join(WORKDIR, model_dir_name)
with open(
os.path.join(model_dir, model_name + dyn_str + "_torch.mlir")
) as f:
mlir_file = f.read()
function_name = str(np.load(os.path.join(model_dir, "function_name.npy")))
inputs = np.load(os.path.join(model_dir, "inputs.npz"))
golden_out = np.load(os.path.join(model_dir, "golden_out.npz"))
inputs_tuple = tuple([inputs[key] for key in inputs])
golden_out_tuple = tuple([golden_out[key] for key in golden_out])
return mlir_file, function_name, inputs_tuple, golden_out_tuple
# Downloads the tflite model from gs://shark_tank dir.
def download_tflite_model(model_name, dynamic=False):
dyn_str = "_dynamic" if dynamic else ""
os.makedirs(WORKDIR, exist_ok=True)
model_dir_name = model_name + "_tflite"
def gs_download_model():
gs_command = (
'gsutil -o "GSUtil:parallel_process_count=1" cp -r gs://shark_tank/'
+ shark_default_sha
+ "/"
+ model_dir_name
+ " "
+ WORKDIR
)
if os.system(gs_command) != 0:
raise Exception("model not present in the tank. Contact Nod Admin")
if not check_dir_exists(
model_dir_name, frontend="tflite", dynamic=dyn_str
):
gs_download_model()
else:
model_dir = os.path.join(WORKDIR, model_dir_name)
local_hash = str(np.load(os.path.join(model_dir, "hash.npy")))
gs_hash = (
'gsutil -o "GSUtil:parallel_process_count=1" cp gs://shark_tank/'
+ shark_default_sha
+ "/"
+ model_dir_name
+ "/hash.npy"
+ " "
+ os.path.join(model_dir, "upstream_hash.npy")
)
if os.system(gs_hash) != 0:
raise Exception("hash of the model not present in the tank.")
upstream_hash = str(
np.load(os.path.join(model_dir, "upstream_hash.npy"))
)
if local_hash != upstream_hash:
gs_download_model()
model_dir = os.path.join(WORKDIR, model_dir_name)
with open(
os.path.join(model_dir, model_name + dyn_str + "_tflite.mlir")
) as f:
mlir_file = f.read()
function_name = str(np.load(os.path.join(model_dir, "function_name.npy")))
inputs = np.load(os.path.join(model_dir, "inputs.npz"))
golden_out = np.load(os.path.join(model_dir, "golden_out.npz"))
inputs_tuple = tuple([inputs[key] for key in inputs])
golden_out_tuple = tuple([golden_out[key] for key in golden_out])
return mlir_file, function_name, inputs_tuple, golden_out_tuple
def download_tf_model(model_name):
model_name = model_name.replace("/", "_")
os.makedirs(WORKDIR, exist_ok=True)
model_dir_name = model_name + "_tf"
def gs_download_model():
gs_command = (
'gsutil -o "GSUtil:parallel_process_count=1" cp -r gs://shark_tank/'
+ shark_default_sha
+ "/"
+ model_dir_name
+ " "
+ WORKDIR
)
if os.system(gs_command) != 0:
raise Exception("model not present in the tank. Contact Nod Admin")
if not check_dir_exists(model_dir_name, frontend="tf"):
gs_download_model()
else:
model_dir = os.path.join(WORKDIR, model_dir_name)
local_hash = str(np.load(os.path.join(model_dir, "hash.npy")))
gs_hash = (
'gsutil -o "GSUtil:parallel_process_count=1" cp gs://shark_tank/'
+ shark_default_sha
+ "/"
+ model_dir_name
+ "/hash.npy"
+ " "
+ os.path.join(model_dir, "upstream_hash.npy")
)
if os.system(gs_hash) != 0:
raise Exception("hash of the model not present in the tank.")
upstream_hash = str(
np.load(os.path.join(model_dir, "upstream_hash.npy"))
)
if local_hash != upstream_hash:
gs_download_model()
model_dir = os.path.join(WORKDIR, model_dir_name)
with open(os.path.join(model_dir, model_name + "_tf.mlir")) as f:
mlir_file = f.read()
function_name = str(np.load(os.path.join(model_dir, "function_name.npy")))
inputs = np.load(os.path.join(model_dir, "inputs.npz"))
golden_out = np.load(os.path.join(model_dir, "golden_out.npz"))
inputs_tuple = tuple([inputs[key] for key in inputs])
golden_out_tuple = tuple([golden_out[key] for key in golden_out])
return mlir_file, function_name, inputs_tuple, golden_out_tuple

236
shark/shark_importer.py Normal file
View File

@@ -0,0 +1,236 @@
# Lint as: python3
"""SHARK Importer"""
import sys
import tempfile
import os
# List of the supported frontends.
supported_frontends = {
"tensorflow",
"tf",
"pytorch",
"torch",
"tf-lite",
"tflite",
}
class SharkImporter:
"""
SharkImporter converts frontend modules into a
mlir_module. The supported frameworks are tensorflow,
pytorch, and tf-lite.
...
Attributes
----------
module :
torch, tensorflow or tf-lite module.
inputs :
inputs to the module, may be required for the shape
information.
frontend: str
frontend to which the module belongs.
raw_model_file: str
temp tflite model path
Methods
-------
import_mlir(is_dynamic, tracing_required, func_name):
is_dynamic: input shapes to be totally dynamic (pytorch specific).
tracing_required: whether tracing is required (pytorch specific.
func_name: The function to be traced out or imported to mlir.
import_debug(is_dynamic, tracing_required, func_name):
returns the converted (mlir_module,func_name) with inputs and golden
outputs.
The inputs and outputs are converted into np array.
"""
def __init__(
self,
module,
inputs: tuple = (),
frontend: str = "torch",
raw_model_file: str = "",
):
self.module = module
self.inputs = None if len(inputs) == 0 else inputs
self.frontend = frontend
if not self.frontend in supported_frontends:
print(
f"The frontend is not in the supported_frontends: {supported_frontends}"
)
sys.exit(1)
self.raw_model_file = raw_model_file
# NOTE: The default function for torch is "forward" and tf-lite is "main".
def _torch_mlir(self, is_dynamic, tracing_required):
from shark.torch_mlir_utils import get_torch_mlir_module
return get_torch_mlir_module(
self.module, self.inputs, is_dynamic, tracing_required
)
def _tf_mlir(self, func_name):
from iree.compiler import tf as tfc
return tfc.compile_module(
self.module, exported_names=[func_name], import_only=True
)
def _tflite_mlir(self, func_name):
from iree.compiler import tflite as tflitec
from shark.iree_utils._common import IREE_TARGET_MAP
self.mlir_model = tflitec.compile_file(
self.raw_model_file, # in tflite, it is a path to .tflite file, not a tflite interpreter
input_type="tosa",
import_only=True,
)
return self.mlir_model
# Adds the conversion of the frontend with the private function.
def import_mlir(
self,
is_dynamic=False,
tracing_required=False,
func_name="forward",
):
if self.frontend in ["torch", "pytorch"]:
if self.inputs == None:
print(
"Please pass in the inputs, the inputs are required to determine the shape of the mlir_module"
)
sys.exit(1)
return self._torch_mlir(is_dynamic, tracing_required), func_name
if self.frontend in ["tf", "tensorflow"]:
return self._tf_mlir(func_name), func_name
if self.frontend in ["tflite", "tf-lite"]:
func_name = "main"
return self._tflite_mlir(func_name), func_name
# Converts the frontend specific tensors into np array.
def convert_to_numpy(self, array_tuple: tuple):
if self.frontend in ["torch", "pytorch"]:
return [x.detach().numpy() for x in array_tuple]
if self.frontend in ["tf", "tensorflow"]:
return [x.numpy() for x in array_tuple]
# Saves `function_name.npy`, `inputs.npz`, `golden_out.npz` and `model_name.mlir` in the directory `dir`.
def save_data(
self, dir, model_name, mlir_data, func_name, inputs, outputs
):
import numpy as np
inputs_name = "inputs.npz"
outputs_name = "golden_out.npz"
func_file_name = "function_name"
model_name_mlir = model_name + "_" + self.frontend + ".mlir"
np.savez(os.path.join(dir, inputs_name), *inputs)
np.savez(os.path.join(dir, outputs_name), *outputs)
np.save(os.path.join(dir, func_file_name), np.array(func_name))
mlir_str = mlir_data
if self.frontend == "torch":
mlir_str = mlir_data.operation.get_asm()
elif self.frontend == "tf":
mlir_str = mlir_data.decode("utf-8")
elif self.frontend == "tflite":
mlir_str = mlir_data.decode("utf-8")
with open(os.path.join(dir, model_name_mlir), "w") as mlir_file:
mlir_file.write(mlir_str)
return
def import_debug(
self,
is_dynamic=False,
tracing_required=False,
func_name="forward",
dir=tempfile.gettempdir(),
model_name="model",
):
if self.inputs == None:
print(
f"There is no input provided: {self.inputs}, please provide inputs or simply run import_mlir."
)
sys.exit(1)
imported_mlir = self.import_mlir(
is_dynamic, tracing_required, func_name
)
# TODO: Make sure that any generic function name is accepted. Currently takes in the default function names.
# TODO: Check for multiple outputs.
if self.frontend in ["torch", "pytorch"]:
import torch
golden_out = self.module(*self.inputs)
if torch.is_tensor(golden_out):
golden_out = tuple(
golden_out.detach().numpy(),
)
else:
golden_out = self.convert_to_numpy(golden_out)
# Save the artifacts in the directory dir.
self.save_data(
dir,
model_name,
imported_mlir[0],
imported_mlir[1],
self.inputs,
golden_out,
)
return (
imported_mlir,
self.convert_to_numpy(self.inputs),
golden_out,
)
if self.frontend in ["tf", "tensorflow"]:
import tensorflow as tf
golden_out = self.module.forward(*self.inputs)
if tf.is_tensor(golden_out):
golden_out = tuple(
golden_out.numpy(),
)
elif golden_out is tuple:
golden_out = self.convert_to_numpy(golden_out)
elif hasattr(golden_out, "logits"):
# from transformers import TFSequenceClassifierOutput
golden_out = golden_out.logits
else:
golden_out = golden_out.last_hidden_state
# Save the artifacts in the directory dir.
self.save_data(
dir,
model_name,
imported_mlir[0],
imported_mlir[1],
self.inputs,
golden_out,
)
return (
imported_mlir,
self.convert_to_numpy(self.inputs),
golden_out,
)
if self.frontend in ["tflite", "tf-lite"]:
# TODO(Chi): Validate it for tflite models.
golden_out = self.module.invoke_tflite(self.inputs)
self.save_data(
dir,
model_name,
imported_mlir[0],
imported_mlir[1],
self.inputs,
golden_out,
)
return (
imported_mlir,
self.inputs,
golden_out,
)

137
shark/shark_inference.py Normal file
View File

@@ -0,0 +1,137 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from shark.shark_runner import SharkRunner
import numpy as np
dtype_to_np_dtype = {
"f32": np.float32,
"f64": np.float64,
"i32": np.int32,
"i64": np.int64,
"i1": np.bool_,
}
class SharkInference:
"""
Runs prediction or inference on mlir_module.
...
Attributes
----------
mlir_module : str
mlir_module represented in string.
function_name : str
function to execute in the given mlir_module.
device : str
device to execute the mlir_module on.
currently supports cpu, cuda, vulkan, and metal backends.
mlir_dialect: str
The dialect in which the given mlir_module is in.
Refer to {https://mlir.llvm.org/docs/Dialects/}
is_benchmark: bool
Whether this SharkInference module should be benchmark-enabled.
Methods
-------
run(inputs=None):
Runs the mlir_module with the given inputs, if the inputs are not
given it autogenerates the inputs. Also, the inputs should be a
numpy array.
input_info():
Gives the information about the inputs required by the `function_name`.
This can be expensive as it does string matching to do so.
"""
def __init__(
self,
mlir_module: str,
function_name: str = "forward",
device: str = "none",
mlir_dialect: str = "linalg",
is_benchmark: bool = False,
):
self.mlir_module = mlir_module
self.function_name = function_name
self.device = device
self.mlir_dialect = mlir_dialect
self.is_benchmark = is_benchmark
self.shark_runner = None
def compile(self):
if self.is_benchmark == True:
from shark.shark_benchmark_runner import SharkBenchmarkRunner
self.shark_runner = SharkBenchmarkRunner(
self.mlir_module,
self.function_name,
self.device,
self.mlir_dialect,
)
else:
self.shark_runner = SharkRunner(
self.mlir_module,
self.function_name,
self.device,
self.mlir_dialect,
)
# inputs are considered to be tuple of np.array.
def forward(self, inputs: tuple):
return self.shark_runner.run(inputs)
# Captures the static input information from the mlir_module.
# TODO(pashu123): Generate the input information for dynamic shapes.
def _input_info(self):
# func_key to get the line which contains the function.
func_key = "func.func @" + self.function_name
func_header = None
for line in str(self.mlir_module).splitlines():
if func_key in line:
func_header = line
break
if func_header is None:
print(f"Function: {self.function_name} not found")
import re
inputs = re.findall("\(.*?\)", func_header)[0].split(",")
shapes = []
dtype = []
for inp in inputs:
shape_dtype = re.findall(r"<[^>]*>", inp)[0].split("x")
shape_dtype[0], shape_dtype[-1] = (
shape_dtype[0][1:],
shape_dtype[-1][:-1],
)
shapes.append(tuple([int(x) for x in shape_dtype[:-1]]))
dtype.append(shape_dtype[-1])
return shapes, dtype
# Generates random input to be feed into the graph.
def generate_random_inputs(self, low=0, high=1):
shapes, dtype = self._input_info()
inputs = []
for i, j in zip(shapes, dtype):
inputs.append(
np.random.uniform(low, high, size=i).astype(
dtype_to_np_dtype[j]
)
)
return tuple(inputs)

101
shark/shark_runner.py Normal file
View File

@@ -0,0 +1,101 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from shark.iree_utils.compile_utils import (
get_iree_compiled_module,
get_results,
export_iree_module_to_vmfb,
)
from shark.iree_utils._common import check_device_drivers, device_driver_info
from shark.parser import shark_args
import os
import sys
# supported dialects by the shark-runtime.
supported_dialects = {"linalg", "mhlo", "tosa", "tf-lite"}
class SharkRunner:
"""
Base class for SharkInference and SharkTrainer
used to execute an mlir_module.
...
Attributes
----------
mlir_module : str
mlir_module represented in string.
function_name : str
function to execute in the given mlir_module.
device : str
device to execute the mlir_module on.
currently supports cpu, cuda, vulkan, and metal backends.
mlir_dialect: str
The dialect in which the given mlir_module is in.
Refer to {https://mlir.llvm.org/docs/Dialects/}
Methods
-------
run(inputs=None):
Runs the mlir_module with the given inputs, if the inputs are not
given it autogenerates the inputs. Also, the inputs should be a
numpy array.
input_info():
Gives the information about the inputs required by the `function_name`.
This can be expensive as it does string matching to do so.
"""
def __init__(
self,
mlir_module: str,
function_name: str = "forward",
device: str = "none",
mlir_dialect: str = "linalg",
):
self.mlir_module = mlir_module
self.function_name = function_name
self.device = shark_args.device if device == "none" else device
self.mlir_dialect = mlir_dialect
if check_device_drivers(self.device):
device_driver_info(self.device)
sys.exit(1)
# Compile the module to get the .vmfb.
(
self.iree_compilation_module,
self.iree_config,
) = get_iree_compiled_module(
self.mlir_module,
self.device,
self.mlir_dialect,
func_name=self.function_name,
)
def run(self, inputs: tuple):
return get_results(
self.iree_compilation_module,
inputs,
self.iree_config,
self.mlir_dialect,
)
# TODO: Instead of passing directory and having names decided by the module
# , user may want to save the module with manual names.
def save_module(self, dir=os.getcwd()):
return export_iree_module_to_vmfb(
self.model, self.device, dir, self.mlir_dialect
)

152
shark/shark_trainer.py Normal file
View File

@@ -0,0 +1,152 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from shark.parser import shark_args
from shark.shark_runner import SharkRunner
from shark.backward_makefx import MakeFxModule
import numpy as np
from tqdm import tqdm
import sys
# Prints to stderr.
def print_err(*a):
print(*a, file=sys.stderr)
class SharkTrainer:
"""Training pytorch, tensorflow module on shark runtime."""
def __init__(
self,
model,
input: tuple,
dynamic: bool = False,
device: str = None,
jit_trace: bool = False,
from_aot: bool = True,
):
self.model = model
# Change tuple to list.
self.input = [x for x in input]
self.dynamic = dynamic
self.from_aot = from_aot
self.jit_trace = jit_trace
self.from_aot = from_aot
# By default it's the torch frontend.
self.frontend = "pytorch"
self.device = device if device is not None else shark_args.device
self.shark_runner = None
# Sets the frontend i.e `pytorch` or `tensorflow`.
def set_frontend(self, frontend: str):
if frontend not in [
"pytorch",
"torch",
"tensorflow",
"tf",
"mhlo",
"linalg",
"tosa",
]:
print_err("frontend not supported.")
else:
self.frontend = frontend
# Training function is needed in the case of torch_fn.
def compile(self, training_fn=None):
if self.frontend in ["torch", "pytorch"]:
aot_module = MakeFxModule(
self.model, tuple(self.input), custom_inference_fn=training_fn
)
aot_module.generate_graph()
# Returns the backward graph.
training_graph = aot_module.training_graph
weights = self.get_torch_params()
self.shark_runner = SharkRunner(
training_graph,
weights + self.input,
self.dynamic,
self.device,
self.jit_trace,
self.from_aot,
self.frontend,
)
elif self.frontend in ["tensorflow", "tf", "mhlo"]:
self.shark_runner = SharkRunner(
self.model,
self.input,
self.dynamic,
self.device,
self.jit_trace,
self.from_aot,
self.frontend,
)
else:
print_err("Unknown frontend")
return
# The inputs to the mlir-graph are weights, buffers and inputs respectively.
def get_torch_params(self):
params = [i.detach() for i in self.model.parameters()]
buffers = [i.detach() for i in self.model.buffers()]
return params + buffers
# Function to train pytorch module.
def _train_torch(self, num_iters):
"""Returns the updated weights after num_iters"""
params = self.get_torch_params()
params = [x.numpy() for x in params]
print(f"Training started for {num_iters} iterations:")
for i in tqdm(range(num_iters)):
params = self.shark_runner.forward(
params + self.input, self.frontend
)
return params
# Function to train tensorflow module.
# Output final loss.
# TODO(raikonenfnu): Save updated weight/states in SHARK.
def _train_tf(self, num_iters):
input_list = []
for x in self.input:
if isinstance(x, list):
nested_list = []
for val in x:
if isinstance(val, np.ndarray):
nested_list.append(val)
else:
nested_list.append(val.numpy())
input_list.append(nested_list)
elif isinstance(x, np.ndarray):
input_list.append(x)
else:
input_list.append(x.numpy())
print(f"Training started for {num_iters} iterations:")
for i in tqdm(range(num_iters)):
outputs = self.shark_runner.forward(input_list, self.frontend)
return outputs
def train(self, num_iters=1):
if self.frontend in ["torch", "pytorch"]:
return self._train_torch(num_iters)
elif self.frontend in ["tf", "tensorflow", "mhlo"]:
return self._train_tf(num_iters)
else:
print_err("Unknown frontend")
return

View File

@@ -0,0 +1,144 @@
# RUN: %PYTHON %s
import numpy as np
from shark.shark_importer import SharkImporter
import pytest
from shark.parser import shark_args
from shark.shark_inference import SharkInference
from shark.tflite_utils import TFLitePreprocessor
import sys
# model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
# Inputs modified to be useful albert inputs.
def generate_inputs(input_details):
for input in input_details:
print(str(input["shape"]), input["dtype"].__name__)
args = []
args.append(
np.random.randint(
low=0,
high=256,
size=input_details[0]["shape"],
dtype=input_details[0]["dtype"],
)
)
args.append(
np.ones(
shape=input_details[1]["shape"], dtype=input_details[1]["dtype"]
)
)
args.append(
np.zeros(
shape=input_details[2]["shape"], dtype=input_details[2]["dtype"]
)
)
return args
def compare_results(mlir_results, tflite_results, details):
print("Compare mlir_results VS tflite_results: ")
assert len(mlir_results) == len(
tflite_results
), "Number of results do not match"
for i in range(len(details)):
mlir_result = mlir_results[i]
tflite_result = tflite_results[i]
mlir_result = mlir_result.astype(np.single)
tflite_result = tflite_result.astype(np.single)
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
max_error = np.max(np.abs(mlir_result - tflite_result))
print("Max error (%d): %f", i, max_error)
class AlbertTfliteModuleTester:
def __init__(
self,
dynamic=False,
device="cpu",
save_mlir=False,
save_vmfb=False,
):
self.dynamic = dynamic
self.device = device
self.save_mlir = save_mlir
self.save_vmfb = save_vmfb
def create_and_check_module(self):
shark_args.save_mlir = self.save_mlir
shark_args.save_vmfb = self.save_vmfb
tflite_preprocessor = TFLitePreprocessor(model_name="albert_lite_base")
raw_model_file_path = tflite_preprocessor.get_raw_model_file()
inputs = tflite_preprocessor.get_inputs()
tflite_interpreter = tflite_preprocessor.get_interpreter()
my_shark_importer = SharkImporter(
module=tflite_interpreter,
inputs=inputs,
frontend="tflite",
raw_model_file=raw_model_file_path,
)
mlir_model, func_name = my_shark_importer.import_mlir()
shark_module = SharkInference(
mlir_module=mlir_model,
function_name=func_name,
device=self.device,
mlir_dialect="tflite",
)
# Case1: Use shark_importer default generate inputs
shark_module.compile()
mlir_results = shark_module.forward(inputs)
## post process results for compare
input_details, output_details = tflite_preprocessor.get_model_details()
mlir_results = list(mlir_results)
for i in range(len(output_details)):
dtype = output_details[i]["dtype"]
mlir_results[i] = mlir_results[i].astype(dtype)
tflite_results = tflite_preprocessor.get_golden_output()
compare_results(mlir_results, tflite_results, output_details)
# Case2: Use manually set inputs
input_details, output_details = tflite_preprocessor.get_model_details()
inputs = generate_inputs(input_details) # new inputs
shark_module = SharkInference(
mlir_module=mlir_model,
function_name=func_name,
device=self.device,
mlir_dialect="tflite",
)
shark_module.compile()
mlir_results = shark_module.forward(inputs)
## post process results for compare
tflite_results = tflite_preprocessor.get_golden_output()
compare_results(mlir_results, tflite_results, output_details)
# print(mlir_results)
# A specific case can be run by commenting different cases. Runs all the test
# across cpu, gpu and vulkan according to available drivers.
pytest_param = pytest.mark.parametrize(
("dynamic", "device"),
[
pytest.param(False, "cpu"),
# TODO: Language models are failing for dynamic case..
pytest.param(True, "cpu", marks=pytest.mark.skip),
],
)
@pytest_param
@pytest.mark.xfail(
sys.platform == "darwin", reason="known macos tflite install issue"
)
def test_albert(dynamic, device):
module_tester = AlbertTfliteModuleTester(dynamic=dynamic, device=device)
module_tester.create_and_check_module()
if __name__ == "__main__":
test_albert(False, "cpu")

208
shark/tflite_utils.py Normal file
View File

@@ -0,0 +1,208 @@
import tensorflow as tf
import numpy as np
import os
import csv
import urllib.request
class TFLiteModelUtil:
def __init__(self, raw_model_file):
self.raw_model_file = str(raw_model_file)
self.tflite_interpreter = None
self.input_details = None
self.output_details = None
self.inputs = []
def setup_tflite_interpreter(self):
self.tflite_interpreter = tf.lite.Interpreter(
model_path=self.raw_model_file
)
self.tflite_interpreter.allocate_tensors()
# default input initialization
return self.get_model_details()
def get_model_details(self):
print("Get tflite input output details")
self.input_details = self.tflite_interpreter.get_input_details()
self.output_details = self.tflite_interpreter.get_output_details()
return self.input_details, self.output_details
def invoke_tflite(self, inputs):
self.inputs = inputs
print("invoke_tflite")
for i, input in enumerate(self.inputs):
self.tflite_interpreter.set_tensor(
self.input_details[i]["index"], input
)
self.tflite_interpreter.invoke()
# post process tflite_result for compare with mlir_result,
# for tflite the output is a list of numpy.tensor
tflite_results = []
for output_detail in self.output_details:
tflite_results.append(
np.array(
self.tflite_interpreter.get_tensor(output_detail["index"])
)
)
for i in range(len(self.output_details)):
# print("output_details ", i, "shape", self.output_details[i]["shape"].__name__,
# ", dtype: ", self.output_details[i]["dtype"].__name__)
out_dtype = self.output_details[i]["dtype"]
tflite_results[i] = tflite_results[i].astype(out_dtype)
return tflite_results
class TFLitePreprocessor:
def __init__(
self,
model_name,
input_details=None,
output_details=None,
model_path=None,
):
self.model_name = model_name
self.input_details = (
input_details # used for tflite, optional for tf/pytorch
)
self.output_details = (
output_details # used for tflite, optional for tf/pytorch
)
self.inputs = []
self.model_path = model_path # url to download the model
self.raw_model_file = (
None # local address for raw tf/tflite/pytorch model
)
self.mlir_file = (
None # local address for .mlir file of tf/tflite/pytorch model
)
self.mlir_model = None # read of .mlir file
self.output_tensor = (
None # the raw tf/pytorch/tflite_output_tensor, not mlir_tensor
)
self.interpreter = (
None # could be tflite/tf/torch_interpreter in utils
)
self.input_file = None
self.output_file = None
# create tmp model file directory
if self.model_path is None and self.model_name is None:
print(
"Error. No model_path, No model name,Please input either one."
)
return
print("Setting up for TMP_WORK_DIR")
self.workdir = os.path.join(
os.path.dirname(__file__), "./../gen_shark_tank"
)
os.makedirs(self.workdir, exist_ok=True)
print(f"TMP_WORK_DIR = {self.workdir}")
# compile and run tfhub tflite
load_model_success = self.load_tflite_model()
if not load_model_success:
print("Error, load tflite model fail")
return
if (self.input_details is None) or (self.output_details is None):
# print("Setting up tflite interpreter to get model input details")
self.setup_interpreter()
inputs = self.generate_inputs(self.input_details) # device_inputs
self.setup_inputs(inputs)
def load_tflite_model(self):
# use model name get dir.
tflite_model_name_dir = os.path.join(
self.workdir, str(self.model_name)
)
os.makedirs(tflite_model_name_dir, exist_ok=True)
print(f"TMP_TFLITE_MODELNAME_DIR = {tflite_model_name_dir}")
self.raw_model_file = "/".join(
[tflite_model_name_dir, str(self.model_name) + "_tflite.tflite"]
)
self.mlir_file = "/".join(
[tflite_model_name_dir, str(self.model_name) + "_tflite.mlir"]
)
self.input_file = "/".join([tflite_model_name_dir, "inputs"])
self.output_file = "/".join([tflite_model_name_dir, "golden_out"])
# np.save("/".join([tflite_model_name_dir, "function_name"]), np.array("main"))
if os.path.exists(self.raw_model_file):
print(
"Local address for .tflite model file Exists: ",
self.raw_model_file,
)
else:
print("No local tflite file, Download tflite model")
if self.model_path is None:
# get model file from tflite_model_list.csv or download from gs://bucket
print("No model_path, get from tflite_model_list.csv")
tflite_model_list_path = os.path.join(
os.path.dirname(__file__),
"../tank/tflite/tflite_model_list.csv",
)
tflite_model_list = csv.reader(open(tflite_model_list_path))
for row in tflite_model_list:
if str(row[0]) == str(self.model_name):
self.model_path = row[1]
print("tflite_model_name", str(row[0]))
print("tflite_model_link", self.model_path)
if self.model_path is None:
print("Error, No model path find in tflite_model_list.csv")
return False
urllib.request.urlretrieve(self.model_path, self.raw_model_file)
return True
def setup_interpreter(self):
self.interpreter = TFLiteModelUtil(self.raw_model_file)
(
self.input_details,
self.output_details,
) = self.interpreter.setup_tflite_interpreter()
def generate_inputs(self, input_details):
self.inputs = []
for tmp_input in input_details:
print(
"input_details shape:",
str(tmp_input["shape"]),
" type:",
tmp_input["dtype"].__name__,
)
self.inputs.append(
np.ones(shape=tmp_input["shape"], dtype=tmp_input["dtype"])
)
return self.inputs
def setup_inputs(self, inputs):
# print("Setting up inputs")
self.inputs = inputs
def get_mlir_model(self):
return self.mlir_model
def get_mlir_file(self):
return self.mlir_file
def get_inputs(self):
return self.inputs
def get_golden_output(self):
self.output_tensor = self.interpreter.invoke_tflite(self.inputs)
np.savez(self.output_file, *self.output_tensor)
return self.output_tensor
def get_model_details(self):
return self.input_details, self.output_details
def get_raw_model_file(self):
return self.raw_model_file
def get_interpreter(self):
return self.interpreter

72
shark/torch_mlir_utils.py Normal file
View File

@@ -0,0 +1,72 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from torch_mlir.ir import StringAttr
import torch_mlir
from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend
def get_module_name_for_asm_dump(module):
"""Gets a name suitable for an assembly dump.
The name is not guaranteed to be unique.
"""
if not "torch.debug_module_name" in module.operation.attributes:
return "UnnammedModule"
return StringAttr(
module.operation.attributes["torch.debug_module_name"]
).value
def run_on_refbackend(torch_module, inputs):
backend = refbackend.RefBackendLinalgOnTensorsBackend()
compiled = backend.compile(torch_module)
jit_module = backend.load(compiled)
np_inputs = [x.numpy() for x in inputs]
return jit_module.forward(np_inputs[0])
# Creates dynamic dims for all dims.
# TODO: Pass user specified dynamic dims.
def create_dynamic_placeholders(inputs):
placeholders = []
for inp in inputs:
placeholder = torch_mlir.TensorPlaceholder.like(
inp, dynamic_axes=[i for i in range(len(inp.shape))]
)
placeholders.append(placeholder)
return tuple(placeholders)
def get_torch_mlir_module(
module,
input: tuple,
dynamic: bool,
jit_trace: bool,
from_torchscript: bool = False,
):
"""Get the MLIR's linalg-on-tensors module from torchscipt module."""
ignore_traced_shapes = False
if dynamic:
input = create_dynamic_placeholders(input)
if jit_trace:
ignore_traced_shapes = True
module = torch_mlir.compile(
module,
input,
output_type=torch_mlir.OutputType.LINALG_ON_TENSORS,
use_tracing=jit_trace,
ignore_traced_shapes=ignore_traced_shapes,
)
return module

View File

@@ -0,0 +1,101 @@
from shark.iree_utils._common import check_device_drivers, device_driver_info
from shark.shark_inference import SharkInference
from shark.shark_downloader import download_tf_model
from shark.parser import shark_args
import iree.compiler as ireec
import unittest
import pytest
import numpy as np
class MiniLMModuleTester:
def __init__(
self,
benchmark=False,
onnx_bench=False,
):
self.benchmark = benchmark
self.onnx_bench = onnx_bench
def create_and_check_module(self, dynamic, device):
model, func_name, inputs, golden_out = download_tf_model(
"microsoft/MiniLM-L12-H384-uncased"
)
shark_module = SharkInference(
model,
func_name,
device=device,
mlir_dialect="mhlo",
is_benchmark=self.benchmark,
)
if self.benchmark == True:
shark_args.enable_tf32 = True
shark_module.compile()
shark_args.onnx_bench = self.onnx_bench
shark_module.shark_runner.benchmark_all_csv(
(inputs),
"microsoft/MiniLM-L12-H384-uncased",
dynamic,
device,
"tensorflow",
)
shark_args.enable_tf32 = False
rtol = 1e-01
atol = 1e-02
else:
shark_module.compile()
rtol = 1e-02
atol = 1e-03
# TODO: Remove catch once new MiniLM stable
try:
result = shark_module.forward(inputs)[0][1].to_host()
except:
result = shark_module.forward(inputs)
np.testing.assert_allclose(golden_out, result, rtol=rtol, atol=atol)
class MiniLMModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = MiniLMModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,114 @@
from shark.shark_inference import SharkInference
from shark.iree_utils._common import check_device_drivers, device_driver_info
from tank.model_utils import compare_tensors
from shark.shark_downloader import download_torch_model
from shark.parser import shark_args
import unittest
import numpy as np
import pytest
class MiniLMModuleTester:
def __init__(
self,
benchmark=False,
onnx_bench=False,
):
self.benchmark = benchmark
self.onnx_bench = onnx_bench
def create_and_check_module(self, dynamic, device):
model_mlir, func_name, input, act_out = download_torch_model(
"microsoft/MiniLM-L12-H384-uncased", dynamic
)
shark_module = SharkInference(
model_mlir,
func_name,
device=device,
mlir_dialect="linalg",
is_benchmark=self.benchmark,
)
if self.benchmark == True:
shark_args.enable_tf32 = True
shark_module.compile()
shark_args.onnx_bench = self.onnx_bench
shark_module.shark_runner.benchmark_all_csv(
(input),
"microsoft/MiniLM-L12-H384-uncased",
dynamic,
device,
"torch",
)
shark_args.enable_tf32 = False
rtol = 1e-01
atol = 1e-02
else:
shark_module.compile()
rtol = 1e-02
atol = 1e-03
results = shark_module.forward(input)
assert True == compare_tensors(act_out, results, rtol, atol)
class MiniLMModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = MiniLMModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
def test_module_dynamic_cpu(self):
dynamic = True
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_dynamic_gpu(self):
dynamic = True
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_dynamic_vulkan(self):
dynamic = True
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

13
tank/README.md Normal file
View File

@@ -0,0 +1,13 @@
To run the fine tuning example, from the root SHARK directory, run:
```shell
IMPORTER=1 ./setup_venv.sh
source shark.venv/bin/activate
pip install jupyter tf-models-nightly tf-datasets
jupyter-notebook
```
if running from a google vm, you can view jupyter notebooks on your local system with:
```shell
gcloud compute ssh <YOUR_INSTANCE_DETAILS> --ssh-flag="-N -L localhost:8888:localhost:8888"
```

0
tank/__init__.py Normal file
View File

View File

@@ -0,0 +1,69 @@
from shark.iree_utils._common import check_device_drivers, device_driver_info
from shark.shark_inference import SharkInference
from shark.shark_downloader import download_tf_model
import iree.compiler as ireec
import unittest
import pytest
import numpy as np
class AlbertBaseModuleTester:
def __init__(
self,
benchmark=False,
):
self.benchmark = benchmark
def create_and_check_module(self, dynamic, device):
model, func_name, inputs, golden_out = download_tf_model(
"albert-base-v2"
)
shark_module = SharkInference(
model, func_name, device=device, mlir_dialect="mhlo"
)
shark_module.compile()
result = shark_module.forward(inputs)
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
class AlbertBaseModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = AlbertBaseModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,113 @@
from shark.shark_inference import SharkInference
from shark.iree_utils._common import check_device_drivers, device_driver_info
from tank.model_utils import compare_tensors
from shark.shark_downloader import download_torch_model
import unittest
import numpy as np
import pytest
class AlbertModuleTester:
def __init__(
self,
benchmark=False,
):
self.benchmark = benchmark
def create_and_check_module(self, dynamic, device):
model_mlir, func_name, input, act_out = download_torch_model(
"albert-base-v2", dynamic
)
# from shark.shark_importer import SharkImporter
# mlir_importer = SharkImporter(
# model,
# (input,),
# frontend="torch",
# )
# minilm_mlir, func_name = mlir_importer.import_mlir(
# is_dynamic=dynamic, tracing_required=True
# )
shark_module = SharkInference(
model_mlir,
func_name,
device=device,
mlir_dialect="linalg",
is_benchmark=self.benchmark,
)
shark_module.compile()
results = shark_module.forward(input)
assert True == compare_tensors(act_out, results)
if self.benchmark == True:
shark_module.shark_runner.benchmark_all_csv(
(input),
"albert-base-v2",
dynamic,
device,
"torch",
)
class AlbertModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = AlbertModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
def test_module_dynamic_cpu(self):
dynamic = True
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_dynamic_gpu(self):
dynamic = True
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_dynamic_vulkan(self):
dynamic = True
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,177 @@
# import numpy as np
# from shark.shark_importer import SharkImporter
# from shark.shark_inference import SharkInference
# import pytest
# import unittest
# from shark.parser import shark_args
# from shark.tflite_utils import TFLitePreprocessor
#
#
# # model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
# # model_path = model_path
#
# # Inputs modified to be useful albert inputs.
# def generate_inputs(input_details):
# for input in input_details:
# print(str(input["shape"]), input["dtype"].__name__)
# # [ 1 384] int32
# # [ 1 384] int32
# # [ 1 384] int32
#
# args = []
# args.append(
# np.random.randint(
# low=0,
# high=256,
# size=input_details[0]["shape"],
# dtype=input_details[0]["dtype"],
# )
# )
# args.append(
# np.ones(
# shape=input_details[1]["shape"], dtype=input_details[1]["dtype"]
# )
# )
# args.append(
# np.zeros(
# shape=input_details[2]["shape"], dtype=input_details[2]["dtype"]
# )
# )
# return args
#
#
# def compare_results(mlir_results, tflite_results):
# print("Compare mlir_results VS tflite_results: ")
# assert len(mlir_results) == len(
# tflite_results
# ), "Number of results do not match"
# rtol = 1e-02
# atol = 1e-03
# print(
# "numpy.allclose: ",
# np.allclose(mlir_results, tflite_results, rtol, atol),
# )
# for i in range(len(mlir_results)):
# mlir_result = mlir_results[i]
# tflite_result = tflite_results[i]
# mlir_result = mlir_result.astype(np.single)
# tflite_result = tflite_result.astype(np.single)
# assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
# max_error = np.max(np.abs(mlir_result - tflite_result))
# print("Max error (%d): %f", i, max_error)
#
#
# class AlbertTfliteModuleTester:
# def __init__(
# self,
# dynamic=False,
# device="cpu",
# save_mlir=False,
# save_vmfb=False,
# ):
# self.dynamic = dynamic
# self.device = device
# self.save_mlir = save_mlir
# self.save_vmfb = save_vmfb
#
# def create_and_check_module(self):
# shark_args.save_mlir = self.save_mlir
# shark_args.save_vmfb = self.save_vmfb
#
# # Preprocess to get SharkImporter input args
# tflite_preprocessor = TFLitePreprocessor(model_name="albert_lite_base")
# raw_model_file_path = tflite_preprocessor.get_raw_model_file()
# inputs = tflite_preprocessor.get_inputs()
# tflite_interpreter = tflite_preprocessor.get_interpreter()
#
# # Use SharkImporter to get SharkInference input args
# my_shark_importer = SharkImporter(
# module=tflite_interpreter,
# inputs=inputs,
# frontend="tflite",
# raw_model_file=raw_model_file_path,
# )
# mlir_model, func_name = my_shark_importer.import_mlir()
#
# # Use SharkInference to get inference result
# shark_module = SharkInference(
# mlir_module=mlir_model,
# function_name=func_name,
# device=self.device,
# mlir_dialect="tflite",
# )
#
# # Case1: Use shark_importer default generate inputs
# shark_module.compile()
# mlir_results = shark_module.forward(inputs)
# ## post process results for compare
# # input_details, output_details = tflite_preprocessor.get_model_details()
# # mlir_results = list(mlir_results)
# # for i in range(len(output_details)):
# # dtype = output_details[i]["dtype"]
# # mlir_results[i] = mlir_results[i].astype(dtype)
# tflite_results = tflite_preprocessor.get_golden_output()
# compare_results(mlir_results, tflite_results)
# # import pdb
# # pdb.set_trace()
#
# # Case2: Use manually set inputs
# # input_details, output_details = tflite_preprocessor.get_model_details()
# input_details = [
# {
# "shape": [1, 384],
# "dtype": np.int32,
# },
# {
# "shape": [1, 384],
# "dtype": np.int32,
# },
# {
# "shape": [1, 384],
# "dtype": np.int32,
# },
# ]
# inputs = generate_inputs(input_details) # new inputs
#
# shark_module = SharkInference(
# mlir_module=mlir_model,
# function_name=func_name,
# device=self.device,
# mlir_dialect="tflite",
# )
# shark_module.compile()
# mlir_results = shark_module.forward(inputs)
# ## post process results for compare
# tflite_results = tflite_preprocessor.get_golden_output()
# compare_results(mlir_results, tflite_results)
# # print(mlir_results)
#
#
# class AlbertTfliteModuleTest(unittest.TestCase):
# @pytest.fixture(autouse=True)
# def configure(self, pytestconfig):
# self.save_mlir = pytestconfig.getoption("save_mlir")
# self.save_vmfb = pytestconfig.getoption("save_vmfb")
#
# def setUp(self):
# self.module_tester = AlbertTfliteModuleTester(self)
# self.module_tester.save_mlir = self.save_mlir
#
# import sys
#
# @pytest.mark.xfail(
# sys.platform == "darwin", reason="known macos tflite install issue"
# )
# def test_module_static_cpu(self):
# self.module_tester.dynamic = False
# self.module_tester.device = "cpu"
# self.module_tester.create_and_check_module()
# if __name__ == "__main__":
# module_tester = AlbertTfliteModuleTester()
# module_tester.save_mlir = True
# module_tester.save_vmfb = True
# module_tester.create_and_check_module()
# unittest.main()

View File

@@ -0,0 +1,118 @@
import numpy as np
from shark.shark_downloader import download_tflite_model
from shark.shark_inference import SharkInference
import pytest
import unittest
from shark.parser import shark_args
# model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
# model_path = model_path
# Inputs modified to be useful albert inputs.
def generate_inputs(input_details):
for input in input_details:
print(str(input["shape"]), input["dtype"].__name__)
# [ 1 384] int32
# [ 1 384] int32
# [ 1 384] int32
args = []
args.append(
np.random.randint(
low=0,
high=256,
size=input_details[0]["shape"],
dtype=input_details[0]["dtype"],
)
)
args.append(
np.ones(
shape=input_details[1]["shape"], dtype=input_details[1]["dtype"]
)
)
args.append(
np.zeros(
shape=input_details[2]["shape"], dtype=input_details[2]["dtype"]
)
)
return args
def compare_results(mlir_results, tflite_results):
print("Compare mlir_results VS tflite_results: ")
assert len(mlir_results) == len(
tflite_results
), "Number of results do not match"
rtol = 1e-02
atol = 1e-03
print(
"numpy.allclose: ",
np.allclose(mlir_results, tflite_results, rtol, atol),
)
for i in range(len(mlir_results)):
mlir_result = mlir_results[i]
tflite_result = tflite_results[i]
mlir_result = mlir_result.astype(np.single)
tflite_result = tflite_result.astype(np.single)
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
max_error = np.max(np.abs(mlir_result - tflite_result))
print("Max error (%d): %f", i, max_error)
class AlbertTfliteModuleTester:
def __init__(
self,
dynamic=False,
device="cpu",
save_mlir=False,
save_vmfb=False,
):
self.dynamic = dynamic
self.device = device
self.save_mlir = save_mlir
self.save_vmfb = save_vmfb
def create_and_check_module(self):
shark_args.save_mlir = self.save_mlir
shark_args.save_vmfb = self.save_vmfb
(
mlir_model,
function_name,
inputs,
tflite_results,
) = download_tflite_model(model_name="albert_lite_base")
shark_module = SharkInference(
mlir_module=mlir_model,
function_name="main",
device=self.device,
mlir_dialect="tflite",
)
shark_module.compile()
mlir_results = shark_module.forward(inputs)
# print(shark_results)
compare_results(mlir_results, tflite_results)
class AlbertTfliteModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.save_mlir = pytestconfig.getoption("save_mlir")
self.save_vmfb = pytestconfig.getoption("save_vmfb")
def setUp(self):
self.module_tester = AlbertTfliteModuleTester(self)
self.module_tester.save_mlir = self.save_mlir
def test_module_static_cpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
if __name__ == "__main__":
unittest.main()
# module_tester = AlbertTfliteModuleTester()
# module_tester.create_and_check_module()

View File

@@ -0,0 +1,115 @@
from shark.shark_inference import SharkInference
from shark.iree_utils._common import check_device_drivers, device_driver_info
from tank.model_utils import compare_tensors
from shark.shark_downloader import download_torch_model
import unittest
import numpy as np
import pytest
class AlexnetModuleTester:
def __init__(
self,
benchmark=False,
):
self.benchmark = benchmark
def create_and_check_module(self, dynamic, device):
model_mlir, func_name, input, act_out = download_torch_model(
"alexnet", dynamic
)
# from shark.shark_importer import SharkImporter
# mlir_importer = SharkImporter(
# model,
# (input,),
# frontend="torch",
# )
# minilm_mlir, func_name = mlir_importer.import_mlir(
# is_dynamic=dynamic, tracing_required=True
# )
shark_module = SharkInference(
model_mlir,
func_name,
device=device,
mlir_dialect="linalg",
is_benchmark=self.benchmark,
)
shark_module.compile()
results = shark_module.forward(input)
assert True == compare_tensors(act_out, results)
if self.benchmark == True:
shark_module.shark_runner.benchmark_all_csv(
(input),
"alexnet",
dynamic,
device,
"torch",
)
class AlexnetModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = AlexnetModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
def test_module_dynamic_cpu(self):
dynamic = True
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_dynamic_gpu(self):
dynamic = True
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
@pytest.mark.xfail(
reason="Issue known, WIP",
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_dynamic_vulkan(self):
dynamic = True
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,97 @@
import numpy as np
from shark.shark_downloader import download_tflite_model
from shark.shark_inference import SharkInference
import pytest
import unittest
from shark.parser import shark_args
# model_path = "https://tfhub.dev/google/lite-model/magenta/arbitrary-image-stylization-v1-256/int8/prediction/1?lite-format=tflite"
def compare_results(mlir_results, tflite_results):
print("Compare mlir_results VS tflite_results: ")
assert len(mlir_results) == len(
tflite_results
), "Number of results do not match"
for i in range(len(mlir_results)):
mlir_result = mlir_results[i]
tflite_result = tflite_results[i]
mlir_result = mlir_result.astype(np.single)
tflite_result = tflite_result.astype(np.single)
mlir_result = np.expand_dims(mlir_result, axis=0)
print("mlir_result.shape", mlir_result.shape)
print("tflite_result.shape", tflite_result.shape)
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
max_error = np.max(np.abs(mlir_result - tflite_result))
print("Max error (%d): %f", i, max_error)
class ArbitraryImageStylizationV1TfliteModuleTester:
def __init__(
self,
dynamic=False,
device="cpu",
save_mlir=False,
save_vmfb=False,
):
self.dynamic = dynamic
self.device = device
self.save_mlir = save_mlir
self.save_vmfb = save_vmfb
def create_and_check_module(self):
shark_args.save_mlir = self.save_mlir
shark_args.save_vmfb = self.save_vmfb
(
mlir_model,
function_name,
inputs,
tflite_results,
) = download_tflite_model(
model_name="arbitrary-image-stylization-v1-256"
)
shark_module = SharkInference(
mlir_module=mlir_model,
function_name="main",
device=self.device,
mlir_dialect="tflite",
)
# Case1: Use shark_importer default generate inputs
shark_module.compile()
mlir_results = shark_module.forward(inputs)
# print(shark_results)
compare_results(mlir_results, tflite_results)
class ArbitraryImageStylizationV1TfliteModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.save_mlir = pytestconfig.getoption("save_mlir")
self.save_vmfb = pytestconfig.getoption("save_vmfb")
def setUp(self):
self.module_tester = ArbitraryImageStylizationV1TfliteModuleTester(
self
)
self.module_tester.save_mlir = self.save_mlir
import sys
@pytest.mark.xfail(
reason="'tosa.conv2d' op attribute 'quantization_info' failed ",
)
def test_module_static_cpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
if __name__ == "__main__":
# module_tester = ArbitraryImageStylizationV1TfliteModuleTester()
# module_tester.save_mlir = True
# module_tester.save_vmfb = True
# module_tester.create_and_check_module()
unittest.main()

View File

@@ -0,0 +1,117 @@
from shark.shark_inference import SharkInference
from shark.iree_utils._common import check_device_drivers, device_driver_info
from tank.model_utils import compare_tensors
from shark.shark_downloader import download_torch_model
import torch
import unittest
import numpy as np
import pytest
class BertBaseUncasedModuleTester:
def __init__(
self,
save_mlir=False,
save_vmfb=False,
benchmark=False,
):
self.save_mlir = save_mlir
self.save_vmfb = save_vmfb
self.benchmark = benchmark
def create_and_check_module(self, dynamic, device):
model_mlir, func_name, input, act_out = download_torch_model(
"bert-base-cased", dynamic
)
# from shark.shark_importer import SharkImporter
# mlir_importer = SharkImporter(
# model,
# (input,),
# frontend="torch",
# )
# minilm_mlir, func_name = mlir_importer.import_mlir(
# is_dynamic=dynamic, tracing_required=True
# )
shark_module = SharkInference(
model_mlir,
func_name,
device=device,
mlir_dialect="linalg",
is_benchmark=self.benchmark,
)
shark_module.compile()
results = shark_module.forward(input)
assert True == compare_tensors(act_out, results)
if self.benchmark == True:
shark_module.shark_runner.benchmark_all_csv(
(input),
"bert-base-cased",
dynamic,
device,
"torch",
)
class BertBaseUncasedModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = BertBaseUncasedModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
def test_module_dynamic_cpu(self):
dynamic = True
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_dynamic_gpu(self):
dynamic = True
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_dynamic_vulkan(self):
dynamic = True
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,71 @@
from shark.iree_utils._common import check_device_drivers, device_driver_info
from shark.shark_inference import SharkInference
from shark.shark_downloader import download_tf_model
from shark.parser import shark_args
import unittest
import pytest
import numpy as np
class BertBaseUncasedModuleTester:
def __init__(
self,
benchmark=False,
onnx_bench=False,
):
self.benchmark = benchmark
self.onnx_bench = onnx_bench
def create_and_check_module(self, dynamic, device):
model, func_name, inputs, golden_out = download_tf_model(
"bert-base-uncased"
)
shark_module = SharkInference(
model, func_name, device=device, mlir_dialect="mhlo"
)
shark_module.compile()
result = shark_module.forward(inputs)
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
class BertBaseUncasedModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = BertBaseUncasedModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,108 @@
from shark.shark_inference import SharkInference
from shark.iree_utils._common import check_device_drivers, device_driver_info
from tank.model_utils import compare_tensors
from shark.shark_downloader import download_torch_model
from shark.parser import shark_args
import torch
import unittest
import numpy as np
import pytest
class BertBaseUncasedModuleTester:
def __init__(
self,
benchmark=False,
onnx_bench=False,
):
self.benchmark = benchmark
self.onnx_bench = onnx_bench
def create_and_check_module(self, dynamic, device):
model_mlir, func_name, input, act_out = download_torch_model(
"bert-base-uncased", dynamic
)
shark_module = SharkInference(
model_mlir,
func_name,
device=device,
mlir_dialect="linalg",
is_benchmark=self.benchmark,
)
shark_module.compile()
results = shark_module.forward(input)
assert True == compare_tensors(act_out, results)
if self.benchmark == True:
shark_args.onnx_bench = self.onnx_bench
shark_module.shark_runner.benchmark_all_csv(
(input),
"bert-base-uncased",
dynamic,
device,
"torch",
)
class BertBaseUncasedModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = BertBaseUncasedModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
def test_module_dynamic_cpu(self):
dynamic = True
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_dynamic_gpu(self):
dynamic = True
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_dynamic_vulkan(self):
dynamic = True
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,182 @@
import numpy as np
from iree import runtime as ireert
from iree.tf.support import module_utils
from iree.compiler import tf as tfc
from iree.compiler import compile_str
import tensorflow as tf
try:
import tensorflow_datasets as tfds
import tensorflow_models as tfm
from official.nlp.modeling import layers
from official.nlp.modeling import networks
from official.nlp.modeling.models import bert_classifier
except ModuleNotFoundError:
print(
"tensorflow models or datasets not found please run the following command with your virtual env active:\npip install tf-models-nightly tf-datasets"
)
import json
import time
import os
gs_folder_bert = "gs://cloud-tpu-checkpoints/bert/v3/uncased_L-12_H-768_A-12"
tf.io.gfile.listdir(gs_folder_bert)
vocab_size = 100
NUM_CLASSES = 2
SEQUENCE_LENGTH = 128
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
]
class BertModule(tf.Module):
def __init__(self):
super(BertModule, self).__init__()
dict_outputs = False
bert_config_file = os.path.join(gs_folder_bert, "bert_config.json")
config_dict = json.loads(tf.io.gfile.GFile(bert_config_file).read())
encoder_config = tfm.nlp.encoders.EncoderConfig(
{"type": "bert", "bert": config_dict}
)
bert_encoder = tfm.nlp.encoders.build_encoder(encoder_config)
# Create a BERT trainer with the created network.
bert_trainer_model = bert_classifier.BertClassifier(
bert_encoder, num_classes=NUM_CLASSES
)
bert_trainer_model.summary()
checkpoint = tf.train.Checkpoint(encoder=bert_encoder)
checkpoint.read(
os.path.join(gs_folder_bert, "bert_model.ckpt")
).assert_consumed()
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m = bert_trainer_model
self.m.predict = lambda x: self.m.call(x, training=False)
self.predict = tf.function(input_signature=[bert_input])(
self.m.predict
)
self.m.learn = lambda x, y: self.m.call(x, training=False)
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
@tf.function(
input_signature=[
bert_input, # inputs
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32), # labels
]
)
def learn(self, inputs, labels):
with tf.GradientTape() as tape:
# Capture the gradients from forward prop...
probs = self.m.call(inputs, training=True)
loss = self.loss(labels, probs)
# ...and use them to update the model's weights.
variables = self.m.trainable_variables
gradients = tape.gradient(loss, variables)
self.optimizer.apply_gradients(zip(gradients, variables))
return loss
if __name__ == "__main__":
glue, info = tfds.load("glue/mrpc", with_info=True, batch_size=BATCH_SIZE)
tokenizer = tfm.nlp.layers.FastWordpieceBertTokenizer(
vocab_file=os.path.join(gs_folder_bert, "vocab.txt"), lower_case=True
)
max_seq_length = SEQUENCE_LENGTH
packer = tfm.nlp.layers.BertPackInputs(
seq_length=max_seq_length,
special_tokens_dict=tokenizer.get_special_tokens_dict(),
)
class BertInputProcessor(tf.keras.layers.Layer):
def __init__(self, tokenizer, packer):
super().__init__()
self.tokenizer = tokenizer
self.packer = packer
def call(self, inputs):
tok1 = self.tokenizer(inputs["sentence1"])
tok2 = self.tokenizer(inputs["sentence2"])
packed = self.packer([tok1, tok2])
if "label" in inputs:
return packed, inputs["label"]
else:
return packed
bert_inputs_processor = BertInputProcessor(tokenizer, packer)
glue_train = glue["train"].map(bert_inputs_processor).prefetch(1)
glue_validation = glue["validation"].map(bert_inputs_processor).prefetch(1)
glue_test = glue["test"].map(bert_inputs_processor).prefetch(1)
# base tensorflow model
bert_model = BertModule()
# Compile the model using IREE
compiler_module = tfc.compile_module(
bert_model, exported_names=["learn"], import_only=True
)
# choose from dylib-llvm-aot or cuda
backend = "dylib-llvm-aot"
if backend == "dylib-llvm-aot":
args = [
"--iree-llvm-target-cpu-features=host",
"--iree-mhlo-demote-i64-to-i32=false",
"--iree-flow-demote-i64-to-i32",
]
backend_config = "dylib"
else:
backend_config = "cuda"
args = [
"--iree-cuda-llvm-target-arch=sm_80",
"--iree-hal-cuda-disable-loop-nounroll-wa",
"--iree-enable-fusion-with-reduction-ops",
]
flatbuffer_blob = compile_str(
compiler_module,
target_backends=[backend],
extra_args=args,
input_type="mhlo",
)
# Save module as MLIR file in a directory
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
tracer = ireert.Tracer(os.getcwd())
config = ireert.Config("local-sync", tracer)
ctx = ireert.SystemContext(config=config)
ctx.add_vm_module(vm_module)
BertCompiled = ctx.modules.module
# compare output losses:
iterations = 10
for i in range(iterations):
example_inputs, example_labels = next(iter(glue_train))
example_labels = tf.cast(example_labels, tf.int32)
example_inputs = [value for key, value in example_inputs.items()]
# iree version
iree_loss = BertCompiled.learn(
example_inputs, example_labels
).to_host()
# base tensorflow
tf_loss = np.array(bert_model.learn(example_inputs, example_labels))
print(np.allclose(iree_loss, tf_loss))

View File

@@ -0,0 +1,131 @@
import numpy as np
from shark.shark_downloader import download_tflite_model
from shark.shark_inference import SharkInference
import pytest
import unittest
from shark.parser import shark_args
import os
import sys
import urllib.request
from PIL import Image
# model_path = "https://tfhub.dev/google/lite-model/aiy/vision/classifier/birds_V1/3?lite-format=tflite"
def generate_inputs(input_details):
# input_details shape: [ 1 224 224 3] type: uint8
exe_basename = os.path.basename(sys.argv[0])
workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
os.makedirs(workdir, exist_ok=True)
img_path = "https://github.com/google-coral/test_data/raw/master/bird.bmp"
local_path = "/".join([workdir, "bird.bmp"])
urllib.request.urlretrieve(img_path, local_path)
shape = input_details[0]["shape"]
im = np.array(Image.open(local_path).resize((shape[1], shape[2])))
args = [im.reshape(shape)]
return args
def compare_results(mlir_results, tflite_results):
print("Compare mlir_results VS tflite_results: ")
assert len(mlir_results) == len(
tflite_results
), "Number of results do not match"
for i in range(len(mlir_results)):
mlir_result = mlir_results[i]
tflite_result = tflite_results[i]
mlir_result = mlir_result.astype(np.single)
tflite_result = tflite_result.astype(np.single)
mlir_result = np.expand_dims(mlir_result, axis=0)
print("mlir_result.shape", mlir_result.shape)
print("tflite_result.shape", tflite_result.shape)
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
max_error = np.max(np.abs(mlir_result - tflite_result))
print("Max error (%d): %f", i, max_error)
class BirdsV1TfliteModuleTester:
def __init__(
self,
dynamic=False,
device="cpu",
save_mlir=False,
save_vmfb=False,
):
self.dynamic = dynamic
self.device = device
self.save_mlir = save_mlir
self.save_vmfb = save_vmfb
def create_and_check_module(self):
shark_args.save_mlir = self.save_mlir
shark_args.save_vmfb = self.save_vmfb
(
mlir_model,
function_name,
inputs,
tflite_results,
) = download_tflite_model(model_name="birds_V1")
shark_module = SharkInference(
mlir_module=mlir_model,
function_name="main",
device=self.device,
mlir_dialect="tflite",
)
# Case1: Use shark_importer default generate inputs
shark_module.compile()
mlir_results = shark_module.forward(inputs)
compare_results(mlir_results, tflite_results)
# Case2: Use manually set inputs
input_details = [
{
"shape": [1, 224, 224, 3],
"dtype": np.uint8,
}
]
inputs = generate_inputs(input_details) # device_inputs
shark_module = SharkInference(
mlir_module=mlir_model,
function_name="main",
device=self.device,
mlir_dialect="tflite",
)
shark_module.compile()
mlir_results = shark_module.forward(inputs)
compare_results(mlir_results, tflite_results)
# print(mlir_results)
class BirdsV1TfliteModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.save_mlir = pytestconfig.getoption("save_mlir")
self.save_vmfb = pytestconfig.getoption("save_vmfb")
def setUp(self):
self.module_tester = BirdsV1TfliteModuleTester(self)
self.module_tester.save_mlir = self.save_mlir
import sys
@pytest.mark.xfail(
reason="'tosa.conv2d' op attribute 'quantization_info' failed ",
)
def test_module_static_cpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
if __name__ == "__main__":
# module_tester = BirdsV1TfliteModuleTester()
# module_tester.save_mlir = True
# module_tester.save_vmfb = True
# module_tester.create_and_check_module()
unittest.main()

View File

@@ -0,0 +1,68 @@
from shark.iree_utils._common import check_device_drivers, device_driver_info
from shark.shark_inference import SharkInference
from shark.shark_downloader import download_tf_model
import iree.compiler as ireec
import unittest
import pytest
import numpy as np
class CamemBertModuleTester:
def __init__(
self,
benchmark=False,
):
self.benchmark = benchmark
def create_and_check_module(self, dynamic, device):
model, func_name, inputs, golden_out = download_tf_model(
"camembert-base"
)
shark_module = SharkInference(
model, func_name, device=device, mlir_dialect="mhlo"
)
shark_module.compile()
result = shark_module.forward(inputs)
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
class CamemBertModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = CamemBertModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,88 @@
import numpy as np
from shark.shark_downloader import download_tflite_model
from shark.shark_inference import SharkInference
import pytest
import unittest
from shark.parser import shark_args
# model_path = "https://tfhub.dev/sayakpaul/lite-model/cartoongan/dr/1?lite-format=tflite"
def compare_results(mlir_results, tflite_results):
print("Compare mlir_results VS tflite_results: ")
assert len(mlir_results) == len(
tflite_results
), "Number of results do not match"
for i in range(len(mlir_results)):
mlir_result = mlir_results[i]
tflite_result = tflite_results[i]
mlir_result = mlir_result.astype(np.single)
tflite_result = tflite_result.astype(np.single)
mlir_result = np.expand_dims(mlir_result, axis=0)
print("mlir_result.shape", mlir_result.shape)
print("tflite_result.shape", tflite_result.shape)
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
max_error = np.max(np.abs(mlir_result - tflite_result))
print("Max error (%d): %f", i, max_error)
class CartoonganTfliteModuleTester:
def __init__(
self,
dynamic=False,
device="cpu",
save_mlir=False,
save_vmfb=False,
):
self.dynamic = dynamic
self.device = device
self.save_mlir = save_mlir
self.save_vmfb = save_vmfb
def create_and_check_module(self):
shark_args.save_mlir = self.save_mlir
shark_args.save_vmfb = self.save_vmfb
(
mlir_model,
function_name,
inputs,
tflite_results,
) = download_tflite_model(model_name="cartoongan")
shark_module = SharkInference(
mlir_module=mlir_model,
function_name="main",
device=self.device,
mlir_dialect="tflite",
)
# Case1: Use shark_importer default generate inputs
shark_module.compile()
mlir_results = shark_module.forward(inputs)
compare_results(mlir_results, tflite_results)
class CartoonganTfliteModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.save_mlir = pytestconfig.getoption("save_mlir")
self.save_vmfb = pytestconfig.getoption("save_vmfb")
def setUp(self):
self.module_tester = CartoonganTfliteModuleTester(self)
self.module_tester.save_mlir = self.save_mlir
def test_module_static_cpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
if __name__ == "__main__":
# module_tester = CartoonganTfliteModuleTester()
# module_tester.save_mlir = True
# module_tester.save_vmfb = True
# module_tester.create_and_check_module()
unittest.main()

View File

@@ -0,0 +1,71 @@
from shark.iree_utils._common import check_device_drivers, device_driver_info
from shark.shark_inference import SharkInference
from shark.shark_downloader import download_tf_model
import iree.compiler as ireec
import unittest
import pytest
import numpy as np
class ConvBertModuleTester:
def __init__(
self,
benchmark=False,
):
self.benchmark = benchmark
def create_and_check_module(self, dynamic, device):
model, func_name, inputs, golden_out = download_tf_model(
"dbmdz/convbert-base-turkish-cased"
)
shark_module = SharkInference(
model, func_name, device=device, mlir_dialect="mhlo"
)
shark_module.compile()
result = shark_module.forward(inputs)
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
class ConvBertModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = ConvBertModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
@pytest.mark.xfail(
reason="Issue: https://github.com/iree-org/iree/issues/9971",
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,72 @@
from shark.iree_utils._common import check_device_drivers, device_driver_info
from shark.shark_inference import SharkInference
from shark.shark_downloader import download_tf_model
from shark.parser import shark_args
import iree.compiler as ireec
import unittest
import pytest
import numpy as np
import tempfile
import os
class DebertaBaseModuleTester:
def __init__(
self,
benchmark=False,
):
self.benchmark = benchmark
def create_and_check_module(self, dynamic, device):
model, func_name, inputs, golden_out = download_tf_model(
"microsoft/deberta-base"
)
shark_module = SharkInference(
model, func_name, device=device, mlir_dialect="mhlo"
)
shark_module.compile()
result = shark_module.forward(inputs)
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
class DebertaBaseModuleTest(unittest.TestCase):
@pytest.skip(reason="Model can't be imported.", allow_module_level=True)
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = DebertaBaseModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"),
reason=device_driver_info("intel-gpu"),
)
def test_module_static_intel_gpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,90 @@
import numpy as np
from shark.shark_downloader import download_tflite_model
from shark.shark_inference import SharkInference
import pytest
import unittest
from shark.parser import shark_args
# model_path = "https://tfhub.dev/google/lite-model/aiy/vision/classifier/birds_V1/3?lite-format=tflite"
def compare_results(mlir_results, tflite_results):
print("Compare mlir_results VS tflite_results: ")
assert len(mlir_results) == len(
tflite_results
), "Number of results do not match"
for i in range(len(mlir_results)):
mlir_result = mlir_results[i]
tflite_result = tflite_results[i]
mlir_result = mlir_result.astype(np.single)
tflite_result = tflite_result.astype(np.single)
mlir_result = np.expand_dims(mlir_result, axis=0)
print("mlir_result.shape", mlir_result.shape)
print("tflite_result.shape", tflite_result.shape)
assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
max_error = np.max(np.abs(mlir_result - tflite_result))
print("Max error (%d): %f", i, max_error)
class DeepLabV3TfliteModuleTester:
def __init__(
self,
dynamic=False,
device="cpu",
save_mlir=False,
save_vmfb=False,
):
self.dynamic = dynamic
self.device = device
self.save_mlir = save_mlir
self.save_vmfb = save_vmfb
def create_and_check_module(self):
shark_args.save_mlir = self.save_mlir
shark_args.save_vmfb = self.save_vmfb
# preprocess to get SharkImporter input args
(
mlir_model,
function_name,
inputs,
tflite_results,
) = download_tflite_model(model_name="deeplabv3")
shark_module = SharkInference(
mlir_module=mlir_model,
function_name="main",
device=self.device,
mlir_dialect="tflite",
)
# Case1: Use shark_importer default generate inputs
shark_module.compile()
mlir_results = shark_module.forward(inputs)
compare_results(mlir_results, tflite_results)
class DeepLabV3TfliteModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.save_mlir = pytestconfig.getoption("save_mlir")
self.save_vmfb = pytestconfig.getoption("save_vmfb")
def setUp(self):
self.module_tester = DeepLabV3TfliteModuleTester(self)
self.module_tester.save_mlir = self.save_mlir
def test_module_static_cpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
if __name__ == "__main__":
# module_tester = DeepLabV3TfliteModuleTester()
# module_tester.save_mlir = True
# module_tester.save_vmfb = True
# module_tester.create_and_check_module()
unittest.main()

Some files were not shown because too many files have changed in this diff Show More