Compare commits

..

1 Commits

Author SHA1 Message Date
stanley
14a56ca9b0 Mini LM Loader Example
-Add example to load miniLM from SharkHUB and benchmark.
-Modify TF benchmark to have growing GPU allocation.
-Add shark_load helper function
2022-06-15 02:57:42 +00:00
149 changed files with 10744 additions and 205 deletions

View File

@@ -1,37 +0,0 @@
# See: https://github.com/llvm/torch-mlir/issues/1374
name: Publish releases page
on:
workflow_dispatch:
jobs:
scrape_and_publish_releases:
name: "Scrape and publish releases"
runs-on: ubuntu-latest
# Don't run this in everyone's forks.
if: github.repository == 'nod-ai/SHARK'
steps:
- name: Checking out repository
uses: actions/checkout@v2
with:
token: ${{ secrets.NODAI_INVOCATION_TOKEN }}
- name: Run scrape releases script
run: python ./build_tools/scrape_releases.py nod-ai SHARK > /tmp/index.html
shell: bash
- run: git fetch --all
- run: git switch github-pages
- run: git config --global user.email "none@none.com"
- run: git config --global user.name "nod-team"
- run: mv /tmp/index.html package-index/index.html
- run: git add package-index/index.html
# Only try to make a commit if the file has changed.
- run: git diff --cached --exit-code || git commit -m "Update releases."
- name: GitHub Push
uses: ad-m/github-push-action@v0.6.0
with:
github_token: ${{ secrets.NODAI_INVOCATION_TOKEN }}
branch: github-pages

View File

@@ -11,12 +11,11 @@ on:
jobs:
build:
runs-on: a100
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
backend: [IREE, SHARK]
steps:
- uses: actions/checkout@v3
@@ -39,10 +38,6 @@ jobs:
tag_name="${package_version}"
echo "package_version=${package_version}" >> $GITHUB_ENV
echo "tag_name=${tag_name}" >> $GITHUB_ENV
- name: Set Environment Variables
run: |
echo "SHORT_SHA=`git rev-parse --short=4 HEAD`" >> $GITHUB_ENV
echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Create Release
id: create_release
uses: actions/create-release@v1
@@ -54,76 +49,43 @@ jobs:
body: |
Automatic snapshot release of nod.ai SHARK.
draft: true
prerelease: false
- name: Find Torch-MLIR Release
run: |
TM_HTML_URL="$(python3 -c "import urllib.request, json, sys; u=json.loads(urllib.request.urlopen('https://api.github.com/repos/llvm/torch-mlir/releases/latest').read().decode()).get('html_url', False); print(u) if u else sys.exit(1);")"
TM_RELEASE_DIR=${TM_HTML_URL/"tag"/"expanded_assets"}
echo "TM_RELEASE_DIR=${TM_RELEASE_DIR}" >> $GITHUB_ENV
prerelease: false
- name: Install dependencies
run: |
echo "Torch-MLIR Release DIR is ${{ env.TM_RELEASE_DIR }}"
python -m pip install --upgrade pip
python -m pip install flake8 pytest toml
if [ -f requirements.txt ]; then pip install -r requirements.txt -f ${{ env.TM_RELEASE_DIR }} -f https://github.com/nod-ai/SHARK-Runtime/releases; fi
python -m pip install flake8 pytest yapf toml
if [ -f requirements.txt ]; then pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/SHARK-Runtime/releases; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude shark.venv,lit.cfg.py
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude shark.venv,lit.cfg.py
- name: Build and validate the IREE package
if: ${{ matrix.backend == 'IREE' }}
run: |
cd $GITHUB_WORKSPACE
USE_IREE=1 VENV_DIR=iree.venv ./setup_venv.sh
source iree.venv/bin/activate
package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
SHARK_PACKAGE_VERSION=${package_version} \
pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f ${{ env.TM_RELEASE_DIR }} -f https://github.com/iree-org/iree/releases
# Install the built wheel
pip install ./wheelhouse/nodai*
# Validate the Models
/bin/bash "$GITHUB_WORKSPACE/build_tools/populate_sharktank_ci.sh"
pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="./gen_shark_tank/" tank/test_models.py |
tail -n 1 |
tee -a pytest_results.txt
if !(grep -Fxq " failed" pytest_results.txt)
then
export SHA=$(git log -1 --format='%h')
gsutil -m cp -r $GITHUB_WORKSPACE/gen_shark_tank/* gs://shark_tank/$SHA
gsutil -m cp -r gs://shark_tank/$SHA/* gs://shark_tank/latest/
fi
rm -rf ./wheelhouse/nodai*
yapf -i --style .style.yapf shark/*.py
- name: Build and validate the SHARK Runtime package
if: ${{ matrix.backend == 'SHARK' }}
- name: Build and validate the package
run: |
cd $GITHUB_WORKSPACE
./setup_venv.sh
IMPORTER=1 ./setup_venv.sh
source shark.venv/bin/activate
package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
SHARK_PACKAGE_VERSION=${package_version} \
pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f ${{ env.TM_RELEASE_DIR }} -f https://github.com/nod-ai/SHARK-Runtime/releases
pip wheel -v -w wheelhouse . --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/SHARK-Runtime/releases
# Install the built wheel
pip install ./wheelhouse/nodai*
# Validate the Models
pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="./gen_shark_tank/" tank/test_models.py |
tail -n 1 |
tee -a pytest_results.txt
pytest -k 'not benchmark' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/
- name: Upload Release Assets
if: ${{ matrix.backend == 'SHARK' }}
id: upload-release-assets
uses: dwenegar/upload-release-assets@v1
env:
GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
with:
release_id: ${{ steps.create_release.outputs.id }}
assets_path: ${GITHUB_WORKSPACE}/wheelhouse/nodai_*.whl
assets_path: ./wheelhouse/nodai_*.whl
- name: Publish Release
if: ${{ matrix.backend == 'SHARK' }}
id: publish_release
uses: eregon/publish-release@v1
env:

View File

@@ -1,7 +1,7 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: Validate Models on Shark Runtime
name: Validate torch-models on Shark Runtime
on:
push:
@@ -11,103 +11,92 @@ on:
workflow_dispatch:
jobs:
build-validate:
strategy:
fail-fast: true
matrix:
os: [icelake, a100, MacStudio, ubuntu-latest]
suite: [cpu,cuda,vulkan]
python-version: ["3.10"]
include:
- os: ubuntu-latest
suite: lint
exclude:
- os: ubuntu-latest
suite: vulkan
- os: ubuntu-latest
suite: cuda
- os: ubuntu-latest
suite: cpu
- os: MacStudio
suite: cuda
- os: MacStudio
suite: cpu
- os: MacStudio
suite: vulkan
- os: icelake
suite: vulkan
- os: icelake
suite: cuda
- os: a100
suite: cpu
build-linux:
runs-on: ${{ matrix.os }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
steps:
- uses: actions/checkout@v3
- name: Set Environment Variables
run: |
echo "SHORT_SHA=`git rev-parse --short=4 HEAD`" >> $GITHUB_ENV
echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Set up Python Version File ${{ matrix.python-version }}
if: matrix.os == 'a100' || matrix.os == 'ubuntu-latest' || matrix.os == 'icelake'
run: |
# See https://github.com/actions/setup-python/issues/433
echo ${{ matrix.python-version }} >> $GITHUB_WORKSPACE/.python-version
- name: Set up Python ${{ matrix.python-version }}
if: matrix.os == 'a100' || matrix.os == 'ubuntu-latest' || matrix.os == 'icelake'
uses: actions/setup-python@v4
uses: actions/setup-python@v3
with:
python-version: '${{ matrix.python-version }}'
#cache: 'pip'
#cache-dependency-path: |
# **/requirements-importer.txt
# **/requirements.txt
python-version: ${{ matrix.python-version }}
- name: Setup pip cache
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
if: matrix.suite == 'lint'
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest toml black
python -m pip install flake8 pytest yapf toml
- name: Lint with flake8
if: matrix.suite == 'lint'
run: |
# black format check
black --version
black --line-length 79 --check .
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude lit.cfg.py
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude lit.cfg.py
yapf -i --style .style.yapf shark/*.py
- name: Validate Models on CPU
if: matrix.suite == 'cpu'
- name: Validate Models
run: |
cd $GITHUB_WORKSPACE
PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
IMPORTER=1 ./setup_venv.sh
source shark.venv/bin/activate
pytest --benchmark --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k cpu
gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv
gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cpu_latest.csv
pytest -k 'not benchmark' --ignore=tank/tf/ --ignore=shark/tests/test_shark_importer.py
perf-macOS:
runs-on: MacStudio
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
- name: Validate Models on NVIDIA GPU
if: matrix.suite == 'cuda'
steps:
- uses: actions/checkout@v3
- name: Validate Models dependencies
run: |
cd $GITHUB_WORKSPACE
PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
PYTHON=python3.10 IMPORTER=1 ./setup_venv.sh
source shark.venv/bin/activate
pytest --benchmark --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k cuda
gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv
gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cuda_latest.csv
pytest -k 'not benchmark' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=tank/tf/ --ignore=shark/tests/test_shark_importer.py
perf-linux:
runs-on: a100
timeout-minutes: 45
continue-on-error: true
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
- name: Validate Vulkan Models
if: matrix.suite == 'vulkan'
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Setup pip cache
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Validate Models
run: |
cd $GITHUB_WORKSPACE
PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
IMPORTER=1 ./setup_venv.sh
source shark.venv/bin/activate
pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k vulkan
pytest --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/

1
.gitignore vendored
View File

@@ -162,7 +162,6 @@ cython_debug/
# Shark related artefacts
*venv/
shark_tmp/
# ORT related artefacts
cache_models/

4
.gitmodules vendored Normal file
View File

@@ -0,0 +1,4 @@
[submodule "inference/thirdparty/shark-runtime"]
path = inference/thirdparty/shark-runtime
url =https://github.com/nod-ai/SHARK-Runtime.git
branch = shark-06032022

3
.style.yapf Normal file
View File

@@ -0,0 +1,3 @@
[style]
based_on_style = google
column_limit = 80

260
README.md Normal file
View File

@@ -0,0 +1,260 @@
# SHARK
High Performance Machine Learning and Data Analytics for CPUs, GPUs, Accelerators and Heterogeneous Clusters
[![Nightly Release](https://github.com/nod-ai/SHARK/actions/workflows/nightly.yml/badge.svg)](https://github.com/nod-ai/SHARK/actions/workflows/nightly.yml)
[![Validate torch-models on Shark Runtime](https://github.com/nod-ai/SHARK/actions/workflows/test-models.yml/badge.svg)](https://github.com/nod-ai/SHARK/actions/workflows/test-models.yml)
## Communication Channels
* [SHARK Discord server](https://discord.gg/RUqY2h2s9u): Real time discussions with the SHARK team and other users
* [GitHub issues](https://github.com/nod-ai/SHARK/issues): Feature requests, bugs etc
## Installation
<details>
<summary>Installation (Linux and macOS)</summary>
### Setup a new pip Virtual Environment
This step sets up a new VirtualEnv for Python
```shell
python --version #Check you have 3.7->3.10 on Linux or 3.10 on macOS
python -m venv shark_venv
source shark_venv/bin/activate
# If you are using conda create and activate a new conda env
# Some older pip installs may not be able to handle the recent PyTorch deps
python -m pip install --upgrade pip
```
*macOS Metal* users please install https://sdk.lunarg.com/sdk/download/latest/mac/vulkan-sdk.dmg
### Install SHARK
This step pip installs SHARK and related packages on Linux Python 3.7, 3.8, 3.9, 3.10 and macOS Python 3.10
```shell
pip install nodai-shark -f https://github.com/nod-ai/SHARK/releases -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/shark-runtime/releases --extra-index-url https://download.pytorch.org/whl/nightly/cpu
```
If you are on an Intel macOS machine you need this [workaround](https://github.com/nod-ai/SHARK/issues/102) for an upstream issue.
### Download and run Resnet50 sample
```shell
curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/resnet50_script.py
#Install deps for test script
pip install --pre torch torchvision torchaudio tqdm pillow --extra-index-url https://download.pytorch.org/whl/nightly/cpu
python ./resnet50_script.py --device="cpu" #use cuda or vulkan or metal
```
### Download and run BERT (MiniLM) sample
```shell
curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/minilm_jit.py
#Install deps for test script
pip install transformers torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu
python ./minilm_jit.py --device="cpu" #use cuda or vulkan or metal
```
</details>
<details>
<summary>Source Installation</summary>
## Check out the code
```shell
git clone https://github.com/nod-ai/SHARK.git
```
## Setup your Python VirtualEnvironment and Dependencies
```shell
# Setup venv and install necessary packages (torch-mlir, nodLabs/Shark, ...).
./setup_venv.sh
# Please activate the venv after installation.
```
### Run a demo script
```shell
python -m shark.examples.shark_inference.resnet50_script --device="cpu" # Use gpu | vulkan
```
### Run all model tests on CPU/GPU/VULKAN/Metal
```shell
pytest shark/tests/models
# If on Linux for quicker results:
pytest shark/tests/models -n auto
```
### Run all model benchmark tests on CPU/GPU/VULKAN/Metal
```shell
pytest shark/tests/benchmarks
```
</details>
<details>
<summary>API Reference</summary>
### Shark Inference API
```
from shark_runner import SharkInference
shark_module = SharkInference(
module = model class.
(input,) = inputs to model (must be a torch-tensor)
dynamic (boolean) = Pass the input shapes as static or dynamic.
device = `cpu`, `gpu` or `vulkan` is supported.
tracing_required = (boolean) = Jit trace the module with the given input, useful in the case where jit.script doesn't work. )
shark_module.set_frontend("pytorch") # Use tensorflow, mhlo, linalg, tosa
shark_module.compile()
result = shark_module.forward(inputs)
```
### Example demonstrating running MHLO IR.
```
from shark.shark_inference import SharkInference
import numpy as np
mhlo_ir = r"""builtin.module {
func.func @forward(%arg0: tensor<1x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4x4xf32> {
%0 = chlo.broadcast_add %arg0, %arg1 : (tensor<1x4xf32>, tensor<4x1xf32>) -> tensor<4x4xf32>
%1 = "mhlo.abs"(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
return %1 : tensor<4x4xf32>
}
}"""
arg0 = np.ones((1, 4)).astype(np.float32)
arg1 = np.ones((4, 1)).astype(np.float32)
shark_module = SharkInference(mhlo_ir, (arg0, arg1))
shark_module.set_frontend("mhlo")
shark_module.compile()
print(shark_module.forward((arg0, arg1)))
```
</details>
## Supported and Validated Models
<details>
<summary>PyTorch Models</summary>
### Huggingface PyTorch Models
| Hugging Face Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|---------------------|----------------------|----------|----------|-------------|
| BERT | :heavy_check_mark: (JIT) | :heavy_check_mark: | | |
| Albert | :heavy_check_mark: (JIT) | :heavy_check_mark: | | |
| BigBird | :heavy_check_mark: (AOT) | | | |
| DistilBERT | :heavy_check_mark: (JIT) | :heavy_check_mark: | | |
| GPT2 | :x: (AOT) | | | |
### Torchvision Models
| TORCHVISION Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|--------------------|----------------------|----------|----------|-------------|
| AlexNet | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
| DenseNet121 | :heavy_check_mark: (Script) | | | |
| MNasNet1_0 | :heavy_check_mark: (Script) | | | |
| MobileNetV2 | :heavy_check_mark: (Script) | | | |
| MobileNetV3 | :heavy_check_mark: (Script) | | | |
| Unet | :x: (Script) | | | |
| Resnet18 | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
| Resnet50 | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
| Resnet101 | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
| Resnext50_32x4d | :heavy_check_mark: (Script) | | | |
| ShuffleNet_v2 | :x: (Script) | | | |
| SqueezeNet | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
| EfficientNet | :heavy_check_mark: (Script) | | | |
| Regnet | :heavy_check_mark: (Script) | | | |
| Resnest | :x: (Script) | | | |
| Vision Transformer | :heavy_check_mark: (Script) | | | |
| VGG 16 | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
| Wide Resnet | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
| RAFT | :x: (JIT) | | | |
For more information refer to [MODEL TRACKING SHEET](https://docs.google.com/spreadsheets/d/15PcjKeHZIrB5LfDyuw7DGEEE8XnQEX2aX8lm8qbxV8A/edit#gid=0)
### PyTorch Training Models
| Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|---------------------|----------------------|----------|----------|-------------|
| BERT | :x: | :x: | | |
| FullyConnected | :heavy_check_mark: | :heavy_check_mark: | | |
</details>
<details>
<summary>JAX Models</summary>
### JAX Models
| Models | JAX-MHLO lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|---------------------|----------------------|----------|----------|-------------|
| DALL-E | :x: | :x: | | |
| FullyConnected | :heavy_check_mark: | :heavy_check_mark: | | |
</details>
<details>
<summary>TFLite Models</summary>
### TFLite Models
| Models | TOSA/LinAlg | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|---------------------|----------------------|----------|----------|-------------|
| BERT | :x: | :x: | | |
| FullyConnected | :heavy_check_mark: | :heavy_check_mark: | | |
</details>
<details>
<summary>TF Models</summary>
### Tensorflow Models
| Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|---------------------|----------------------|----------|----------|-------------|
| BERT | :x: | :x: | | |
| FullyConnected | :heavy_check_mark: | :heavy_check_mark: | | |
</details>
## Related Projects
<details>
<summary>IREE Project Channels</summary>
* [Upstream IREE issues](https://github.com/google/iree/issues): Feature requests,
bugs, and other work tracking
* [Upstream IREE Discord server](https://discord.gg/26P4xW4): Daily development
discussions with the core team and collaborators
* [iree-discuss email list](https://groups.google.com/forum/#!forum/iree-discuss):
Announcements, general and low-priority discussion
</details>
<details>
<summary>MLIR and Torch-MLIR Project Channels</summary>
* `#torch-mlir` channel on the LLVM [Discord](https://discord.gg/xS7Z362) - this is the most active communication channel
* Torch-MLIR Github issues [here](https://github.com/llvm/torch-mlir/issues)
* [`torch-mlir` section](https://llvm.discourse.group/c/projects-that-want-to-become-official-llvm-projects/torch-mlir/41) of LLVM Discourse
* Weekly meetings on Mondays 9AM PST. See [here](https://discourse.llvm.org/t/community-meeting-developer-hour-refactoring-recurring-meetings/62575) for more information.
* [MLIR topic within LLVM Discourse](https://llvm.discourse.group/c/llvm-project/mlir/31) SHARK and IREE is enabled by and heavily relies on [MLIR](https://mlir.llvm.org).
</details>
## License
nod.ai SHARK is licensed under the terms of the Apache 2.0 License with LLVM Exceptions.
See [LICENSE](LICENSE) for more information.

0
benchmarks/__init__.py Normal file
View File

View File

@@ -0,0 +1,22 @@
import torch
from shark.parser import parser
from benchmarks.hf_transformer import SharkHFBenchmarkRunner
parser.add_argument(
"--model_name",
type=str,
required=True,
help=
"Specifies name of HF model to benchmark. (For exmaple \"microsoft/MiniLM-L12-H384-uncased\""
)
load_args, unknown = parser.parse_known_args()
if __name__ == "__main__":
model_name = load_args.model_name
test_input = torch.randint(2, (1, 128))
shark_module = SharkHFBenchmarkRunner(model_name, (test_input,),
jit_trace=True)
shark_module.benchmark_c()
shark_module.benchmark_python((test_input,))
shark_module.benchmark_torch(test_input)
shark_module.benchmark_onnx(test_input)

View File

@@ -0,0 +1,137 @@
import torch
from shark.shark_runner import SharkBenchmarkRunner
from shark.parser import shark_args
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from onnxruntime.transformers.benchmark import run_pytorch, run_tensorflow, run_onnxruntime
from onnxruntime.transformers.huggingface_models import MODELS
from onnxruntime.transformers.benchmark_helper import ConfigModifier, Precision
import os
import psutil
class OnnxFusionOptions(object):
def __init__(self):
self.disable_gelu = False
self.disable_layer_norm = False
self.disable_attention = False
self.disable_skip_layer_norm = False
self.disable_embed_layer_norm = False
self.disable_bias_skip_layer_norm = False
self.disable_bias_gelu = False
self.enable_gelu_approximation = False
self.use_mask_index = False
self.no_attention_mask = False
class HuggingFaceLanguage(torch.nn.Module):
def __init__(self, hf_model_name):
super().__init__()
self.model = AutoModelForSequenceClassification.from_pretrained(
hf_model_name, # The pretrained model.
num_labels=
2, # The number of output labels--2 for binary classification.
output_attentions=
False, # Whether the model returns attentions weights.
output_hidden_states=
False, # Whether the model returns all hidden-states.
torchscript=True,
)
def forward(self, tokens):
return self.model.forward(tokens)[0]
class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
# SharkRunner derived class with Benchmarking capabilities.
def __init__(
self,
model_name: str,
input: tuple,
dynamic: bool = False,
device: str = None,
jit_trace: bool = False,
from_aot: bool = False,
frontend: str = "torch",
):
self.device = device if device is not None else shark_args.device
if self.device == "gpu":
raise ValueError(
"Currently GPU Benchmarking is not supported due to OOM from ORT."
)
self.model_name = model_name
model = HuggingFaceLanguage(model_name)
SharkBenchmarkRunner.__init__(self, model, input, dynamic, self.device,
jit_trace, from_aot, frontend)
def benchmark_torch(self, inputs):
use_gpu = self.device == "gpu"
# Set set the model's layer number to automatic.
config_modifier = ConfigModifier(None)
num_threads = psutil.cpu_count(logical=False)
batch_sizes = [inputs.shape[0]]
sequence_lengths = [inputs.shape[-1]]
cache_dir = os.path.join(".", "cache_models")
verbose = False
result = run_pytorch(use_gpu, [self.model_name], None, config_modifier,
Precision.FLOAT32, num_threads, batch_sizes,
sequence_lengths, shark_args.num_iterations, False,
cache_dir, verbose)
print(
f"ONNX Pytorch-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
)
# TODO: Currently non-functional due to TF runtime error. There might be some issue with, initializing TF.
def benchmark_tf(self, inputs):
use_gpu = self.device == "gpu"
# Set set the model's layer number to automatic.
config_modifier = ConfigModifier(None)
num_threads = psutil.cpu_count(logical=False)
batch_sizes = [inputs.shape[0]]
sequence_lengths = [inputs.shape[-1]]
cache_dir = os.path.join(".", "cache_models")
verbose = False
result = run_tensorflow(use_gpu, [self.model_name], None,
config_modifier, Precision.FLOAT32, num_threads,
batch_sizes, sequence_lengths,
shark_args.num_iterations, cache_dir, verbose)
print(
f"ONNX TF-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
)
def benchmark_onnx(self, inputs):
if self.model_name not in MODELS:
print(
f"{self.model_name} is currently not supported in ORT's HF. Check \
https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/huggingface_models.py \
for currently supported models. Exiting benchmark ONNX.")
return
use_gpu = self.device == "gpu"
num_threads = psutil.cpu_count(logical=False)
batch_sizes = [inputs.shape[0]]
sequence_lengths = [inputs.shape[-1]]
cache_dir = os.path.join(".", "cache_models")
onnx_dir = os.path.join(".", "onnx_models")
verbose = False
input_counts = [1]
optimize_onnx = True
validate_onnx = False
disable_ort_io_binding = False
use_raw_attention_mask = True
model_fusion_statistics = {}
overwrite = False
model_source = "pt" #Either "pt" or "tf"
provider = None
config_modifier = ConfigModifier(None)
onnx_args = OnnxFusionOptions()
result = run_onnxruntime(
use_gpu, provider, [self.model_name], None, config_modifier,
Precision.FLOAT32, num_threads, batch_sizes, sequence_lengths,
shark_args.num_iterations, input_counts, optimize_onnx,
validate_onnx, cache_dir, onnx_dir, verbose, overwrite,
disable_ort_io_binding, use_raw_attention_mask,
model_fusion_statistics, model_source, onnx_args)
print(
f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
)

View File

@@ -0,0 +1,210 @@
from shark.shark_inference import SharkInference
from shark.iree_utils import check_device_drivers
import torch
import tensorflow as tf
import numpy as np
import torchvision.models as models
from transformers import AutoModelForSequenceClassification, BertTokenizer, TFBertModel
import importlib
import pytest
import unittest
torch.manual_seed(0)
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
##################### Tensorflow Hugging Face LM Models ###################################
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
tf_bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
]
class TFHuggingFaceLanguage(tf.Module):
def __init__(self, hf_model_name):
super(TFHuggingFaceLanguage, self).__init__()
# Create a BERT trainer with the created network.
self.m = TFBertModel.from_pretrained(hf_model_name, from_pt=True)
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m.predict = lambda x, y, z: self.m.call(
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
@tf.function(input_signature=tf_bert_input)
def forward(self, input_ids, attention_mask, token_type_ids):
return self.m.predict(input_ids, attention_mask, token_type_ids)
def get_TFhf_model(name):
model = TFHuggingFaceLanguage(name)
tokenizer = BertTokenizer.from_pretrained(name)
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text,
padding='max_length',
truncation=True,
max_length=MAX_SEQUENCE_LENGTH)
for key in encoded_input:
encoded_input[key] = tf.expand_dims(
tf.convert_to_tensor(encoded_input[key]), 0)
test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
encoded_input["token_type_ids"])
actual_out = model.forward(*test_input)
return model, test_input, actual_out
##################### Hugging Face LM Models ###################################
class HuggingFaceLanguage(torch.nn.Module):
def __init__(self, hf_model_name):
super().__init__()
self.model = AutoModelForSequenceClassification.from_pretrained(
hf_model_name, # The pretrained model.
num_labels=
2, # The number of output labels--2 for binary classification.
output_attentions=
False, # Whether the model returns attentions weights.
output_hidden_states=
False, # Whether the model returns all hidden-states.
torchscript=True,
)
def forward(self, tokens):
return self.model.forward(tokens)[0]
def get_hf_model(name):
model = HuggingFaceLanguage(name)
# TODO: Currently the test input is set to (1,128)
test_input = torch.randint(2, (1, 128))
actual_out = model(test_input)
return model, test_input, actual_out
################################################################################
##################### Torch Vision Models ###################################
class VisionModule(torch.nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
self.train(False)
def forward(self, input):
return self.model.forward(input)
def get_vision_model(torch_model):
model = VisionModule(torch_model)
# TODO: Currently the test input is set to (1,128)
test_input = torch.randn(1, 3, 224, 224)
actual_out = model(test_input)
return model, test_input, actual_out
############################# Benchmark Tests ####################################
pytest_benchmark_param = pytest.mark.parametrize(
('dynamic', 'device'),
[
pytest.param(False, 'cpu'),
# TODO: Language models are failing for dynamic case..
pytest.param(True, 'cpu', marks=pytest.mark.skip),
pytest.param(False,
'gpu',
marks=pytest.mark.skipif(check_device_drivers("gpu"),
reason="nvidia-smi not found")),
pytest.param(True,
'gpu',
marks=pytest.mark.skip),
pytest.param(
False,
'vulkan',
marks=pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)),
pytest.param(
True,
'vulkan',
marks=pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)),
])
@pytest.mark.skipif(importlib.util.find_spec("iree.tools") is None,
reason="Cannot find tools to import TF")
@pytest_benchmark_param
def test_bench_minilm_torch(dynamic, device):
model, test_input, act_out = get_hf_model(
"microsoft/MiniLM-L12-H384-uncased")
shark_module = SharkInference(model, (test_input,),
device=device,
dynamic=dynamic,
jit_trace=True,
benchmark_mode=True)
try:
# If becnhmarking succesful, assert success/True.
shark_module.compile()
shark_module.benchmark_all((test_input,))
assert True
except Exception as e:
# If anything happen during benchmarking, assert False/failure.
assert False
@pytest.mark.skipif(importlib.util.find_spec("iree.tools") is None,
reason="Cannot find tools to import TF")
@pytest_benchmark_param
def test_bench_distilbert(dynamic, device):
model, test_input, act_out = get_TFhf_model("distilbert-base-uncased")
shark_module = SharkInference(model,
test_input,
device=device,
dynamic=dynamic,
jit_trace=True,
benchmark_mode=True)
try:
# If becnhmarking succesful, assert success/True.
shark_module.set_frontend("tensorflow")
shark_module.compile()
shark_module.benchmark_all(test_input)
assert True
except Exception as e:
# If anything happen during benchmarking, assert False/failure.
assert False
@pytest.mark.skip(reason="XLM Roberta too large to test.")
@pytest_benchmark_param
def test_bench_xlm_roberta(dynamic, device):
model, test_input, act_out = get_TFhf_model("xlm-roberta-base")
shark_module = SharkInference(model,
test_input,
device=device,
dynamic=dynamic,
jit_trace=True,
benchmark_mode=True)
try:
# If becnhmarking succesful, assert success/True.
shark_module.set_frontend("tensorflow")
shark_module.compile()
shark_module.benchmark_all(test_input)
assert True
except Exception as e:
# If anything happen during benchmarking, assert False/failure.
assert False

View File

@@ -0,0 +1,39 @@
import torch
from benchmarks.hf_transformer import SharkHFBenchmarkRunner
import importlib
import pytest
torch.manual_seed(0)
############################# HF Benchmark Tests ####################################
# Test running benchmark module without failing.
pytest_benchmark_param = pytest.mark.parametrize(
('dynamic', 'device'),
[
pytest.param(False, 'cpu'),
# TODO: Language models are failing for dynamic case..
pytest.param(True, 'cpu', marks=pytest.mark.skip),
])
@pytest.mark.skipif(importlib.util.find_spec("onnxruntime") is None,
reason="Cannot find ONNXRUNTIME.")
@pytest_benchmark_param
def test_HFbench_minilm_torch(dynamic, device):
model_name = "bert-base-uncased"
test_input = torch.randint(2, (1, 128))
try:
shark_module = SharkHFBenchmarkRunner(model_name, (test_input,),
jit_trace=True,
dynamic=dynamic,
device=device)
shark_module.benchmark_c()
shark_module.benchmark_python((test_input,))
shark_module.benchmark_torch(test_input)
shark_module.benchmark_onnx(test_input)
# If becnhmarking succesful, assert success/True.
assert True
except Exception as e:
# If anything happen during benchmarking, assert False/failure.
assert False

192
inference/CMakeLists.txt Normal file
View File

@@ -0,0 +1,192 @@
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.17)
project(sharkbackend LANGUAGES C CXX)
#
# Options
#
option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
#
# Dependencies
#
# FetchContent requires us to include the transitive closure of all
# repos that we depend on so that we can override the tags.
#
include(FetchContent)
FetchContent_Declare(
repo-common
GIT_REPOSITORY https://github.com/triton-inference-server/common.git
GIT_TAG ${TRITON_COMMON_REPO_TAG}
GIT_SHALLOW ON
)
FetchContent_Declare(
repo-core
GIT_REPOSITORY https://github.com/triton-inference-server/core.git
GIT_TAG ${TRITON_CORE_REPO_TAG}
GIT_SHALLOW ON
)
FetchContent_Declare(
repo-backend
GIT_REPOSITORY https://github.com/triton-inference-server/backend.git
GIT_TAG ${TRITON_BACKEND_REPO_TAG}
GIT_SHALLOW ON
)
FetchContent_MakeAvailable(repo-common repo-core repo-backend)
#
# The backend must be built into a shared library. Use an ldscript to
# hide all symbols except for the TRITONBACKEND API.
#
configure_file(src/libtriton_dshark.ldscript libtriton_dshark.ldscript COPYONLY)
add_library(
triton-dshark-backend SHARED
src/dshark.cc
#src/dshark_driver_module.c
)
add_library(
SharkBackend::triton-dshark-backend ALIAS triton-dshark-backend
)
target_include_directories(
triton-dshark-backend
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src
)
list(APPEND CMAKE_MODULE_PATH "${PROJECT_BINARY_DIR}/lib/cmake/mlir")
add_subdirectory(thirdparty/shark-runtime EXCLUDE_FROM_ALL)
target_link_libraries(triton-dshark-backend PRIVATE iree_base_base
iree_hal_hal
iree_hal_cuda_cuda
iree_hal_cuda_registration_registration
iree_hal_vmvx_registration_registration
iree_hal_dylib_registration_registration
iree_modules_hal_hal
iree_vm_vm
iree_vm_bytecode_module
iree_hal_local_loaders_system_library_loader
iree_hal_local_loaders_vmvx_module_loader
)
target_compile_features(triton-dshark-backend PRIVATE cxx_std_11)
target_link_libraries(
triton-dshark-backend
PRIVATE
triton-core-serverapi # from repo-core
triton-core-backendapi # from repo-core
triton-core-serverstub # from repo-core
triton-backend-utils # from repo-backend
)
if(WIN32)
set_target_properties(
triton-dshark-backend PROPERTIES
POSITION_INDEPENDENT_CODE ON
OUTPUT_NAME triton_dshark
)
else()
set_target_properties(
triton-dshark-backend PROPERTIES
POSITION_INDEPENDENT_CODE ON
OUTPUT_NAME triton_dshark
LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_dshark.ldscript
LINK_FLAGS "-Wl,--version-script libtriton_dshark.ldscript"
)
endif()
#
# Install
#
include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/SharkBackend)
install(
TARGETS
triton-dshark-backend
EXPORT
triton-dshark-backend-targets
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/dshark
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/dshark
)
install(
EXPORT
triton-dshark-backend-targets
FILE
SharkBackendTargets.cmake
NAMESPACE
SharkBackend::
DESTINATION
${INSTALL_CONFIGDIR}
)
include(CMakePackageConfigHelpers)
configure_package_config_file(
${CMAKE_CURRENT_LIST_DIR}/cmake/SharkBackendConfig.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/SharkBackendConfig.cmake
INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
)
install(
FILES
${CMAKE_CURRENT_BINARY_DIR}/SharkBackendConfig.cmake
DESTINATION ${INSTALL_CONFIGDIR}
)
#
# Export from build tree
#
export(
EXPORT triton-dshark-backend-targets
FILE ${CMAKE_CURRENT_BINARY_DIR}/SharkBackendTargets.cmake
NAMESPACE SharkBackend::
)
export(PACKAGE SharkBackend)

100
inference/README.md Normal file
View File

@@ -0,0 +1,100 @@
# SHARK Triton Backend
The triton backend for shark.
# Build
Install SHARK
```
git clone https://github.com/nod-ai/SHARK.git
# skip above step if dshark is already installed
cd SHARK/inference
```
install dependancies
```
apt-get install patchelf rapidjson-dev python3-dev
git submodule update --init
```
update the submodules of iree
```
cd thirdparty/shark-runtime
git submodule update --init
```
Next, make the backend and install it
```
cd ../..
mkdir build && cd build
cmake -DTRITON_ENABLE_GPU=ON \
-DIREE_HAL_DRIVER_CUDA=ON \
-DIREE_TARGET_BACKEND_CUDA=ON \
-DMLIR_ENABLE_CUDA_RUNNER=ON \
-DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
-DTRITON_BACKEND_REPO_TAG=r22.02 \
-DTRITON_CORE_REPO_TAG=r22.02 \
-DTRITON_COMMON_REPO_TAG=r22.02 ..
make install
```
# Incorporating into Triton
There are much more in depth explenations for the following steps in triton's documentation:
https://github.com/triton-inference-server/server/blob/main/docs/compose.md#triton-with-unsupported-and-custom-backends
There should be a file at /build/install/backends/dshark/libtriton_dshark.so. You will need to copy it into your triton server image.
More documentation is in the link above, but to create the docker image, you need to run the compose.py command in the triton-backend server repo
To first build your image, clone the tritonserver repo.
```
git clone https://github.com/triton-inference-server/server.git
```
then run `compose.py` to build a docker compose file
```
cd server
python3 compose.py --repoagent checksum --dry-run
```
Because dshark is a third party backend, you will need to manually modify the `Dockerfile.compose` to include the dshark backend. To do this, in the Dockerfile.compose file produced, copy this line.
the dshark backend will be located in the build folder from earlier under `/build/install/backends`
```
COPY /path/to/build/install/backends/dshark /opt/tritonserver/backends/dshark
```
Next run
```
docker build -t tritonserver_custom -f Dockerfile.compose .
docker run -it --gpus=1 --net=host -v/path/to/model_repos:/models tritonserver_custom:latest tritonserver --model-repository=/models
```
where `path/to/model_repos` is where you are storing the models you want to run
if your not using gpus, omit `--gpus=1`
```
docker run -it --net=host -v/path/to/model_repos:/models tritonserver_custom:latest tritonserver --model-repository=/models
```
# Setting up a model
to include a model in your backend, add a directory with your model name to your model repository directory. examples of models can be seen here: https://github.com/triton-inference-server/backend/tree/main/examples/model_repos/minimal_models
make sure to adjust the input correctly in the config.pbtxt file, and save a vmfb file under 1/model.vmfb
# CUDA
if you're having issues with cuda, make sure your correct drivers are installed, and that `nvidia-smi` works, and also make sure that the nvcc compiler is on the path.

View File

@@ -0,0 +1,39 @@
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include(CMakeFindDependencyMacro)
get_filename_component(
SHARKBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
)
list(APPEND CMAKE_MODULE_PATH ${SHARKBACKEND_CMAKE_DIR})
if(NOT TARGET SharkBackend::triton-dshark-backend)
include("${SHARKBACKEND_CMAKE_DIR}/SharkBackendTargets.cmake")
endif()
set(SHARKBACKEND_LIBRARIES SharkBackend::triton-dshark-backend)

1462
inference/src/dshark.cc Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,30 @@
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
{
global:
TRITONBACKEND_*;
local: *;
};

View File

@@ -1,45 +0,0 @@
<!DOCTYPE html>
<html>
<body>
<a href='https://github.com/nod-ai/SHARK/releases/download/20230130.481/shark_sd_20230130_481.exe'>shark_sd_20230130_481.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230130.481/shark_sd_cli_20230130_481.exe'>shark_sd_cli_20230130_481.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.479/shark_sd_20230129_479.exe'>shark_sd_20230129_479.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.479/shark_sd_cli_20230129_479.exe'>shark_sd_cli_20230129_479.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.480/shark_sd_20230129_480.exe'>shark_sd_20230129_480.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.480/shark_sd_cli_20230129_480.exe'>shark_sd_cli_20230129_480.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.478/shark_sd_20230129_478.exe'>shark_sd_20230129_478.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.478/shark_sd_cli_20230129_478.exe'>shark_sd_cli_20230129_478.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230128.477/shark_sd_20230128_477.exe'>shark_sd_20230128_477.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230128.477/shark_sd_cli_20230128_477.exe'>shark_sd_cli_20230128_477.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230127.476/shark_sd_20230127_476.exe'>shark_sd_20230127_476.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230127.476/shark_sd_cli_20230127_476.exe'>shark_sd_cli_20230127_476.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230126.475/shark_sd_20230126_475.exe'>shark_sd_20230126_475.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230126.475/shark_sd_cli_20230126_475.exe'>shark_sd_cli_20230126_475.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.474/shark_sd_20230125_474.exe'>shark_sd_20230125_474.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.474/shark_sd_cli_20230125_474.exe'>shark_sd_cli_20230125_474.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.473/shark_sd_20230125_473.exe'>shark_sd_20230125_473.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.473/shark_sd_cli_20230125_473.exe'>shark_sd_cli_20230125_473.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.472/shark_sd_20230125_472.exe'>shark_sd_20230125_472.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.471/shark_sd_20230125_471.exe'>shark_sd_20230125_471.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.468/shark_sd_20230125_468.exe'>shark_sd_20230125_468.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.470/shark_sd_20230124_470.exe'>shark_sd_20230124_470.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.470/shark_sd_cli_20230124_470.exe'>shark_sd_cli_20230124_470.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.469/shark_sd_20230124_469.exe'>shark_sd_20230124_469.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.467/shark_sd_20230124_467.exe'>shark_sd_20230124_467.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.466/shark_sd_20230124_466.exe'>shark_sd_20230124_466.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.462/shark_sd_20230124_462.exe'>shark_sd_20230124_462.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230123.461/shark_sd_20230123_461.exe'>shark_sd_20230123_461.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230123.460/shark_sd_20230123_460.exe'>shark_sd_20230123_460.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230122.459/shark_sd_20230122_459.exe'>shark_sd_20230122_459.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230122.458/shark_sd_20230122_458.exe'>shark_sd_20230122_458.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230122.457/shark_sd_20230122_457.exe'>shark_sd_20230122_457.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230121.456/shark_sd_20230121_456.exe'>shark_sd_20230121_456.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230120.455/shark_sd_20230120_455.exe'>shark_sd_20230120_455.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230119.454/shark_sd_20230119_454.exe'>shark_sd_20230119_454.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230118.453/shark_sd_20230118_453.exe'>shark_sd_20230118_453.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230117.452/shark_sd_20230117_452.exe'>shark_sd_20230117_452.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230116.451/shark_sd_20230116_451.exe'>shark_sd_20230116_451.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230115.450/shark_sd_20230115_450.exe'>shark_sd_20230115_450.exe</a><br />
<a href='https://github.com/nod-ai/SHARK/releases/download/20230114.449/shark_sd_20230114_449.exe'>shark_sd_20230114_449.exe</a><br />
</body>
</html>

12
pyproject.toml Normal file
View File

@@ -0,0 +1,12 @@
[build-system]
requires = [
"setuptools>=42",
"wheel",
"packaging",
"numpy==1.22.4",
"torch-mlir>=20220428.420",
"iree-compiler>=20220427.13",
"iree-runtime>=20220427.13",
]
build-backend = "setuptools.build_meta"

3
pytest.ini Normal file
View File

@@ -0,0 +1,3 @@
[pytest]
addopts = --verbose -p no:warnings
norecursedirs = inference tank/tflite

View File

@@ -0,0 +1,40 @@
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
--pre
numpy
torch
torchvision
tqdm
#iree-compiler | iree-runtime should already be installed
#these dont work ok osx
#iree-tools-tflite
#iree-tools-xla
#iree-tools-tf
# TensorFlow and JAX.
gin-config
tensorflow-macos
tensorflow-metal
#tf-models-nightly
#tensorflow-text-nightly
transformers==4.18.0
#jax[cpu]
# tflitehub dependencies.
Pillow
# Testing and support.
#lit
#pyyaml
#ONNX and ORT for benchmarking
#--extra-index-url https://test.pypi.org/simple/
#protobuf
#coloredlogs
#flatbuffers
#sympy
#psutil
#onnx-weekly
#ort-nightly

39
requirements-importer.txt Normal file
View File

@@ -0,0 +1,39 @@
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
--pre
numpy==1.22.4
torch
torchvision
tqdm
#iree-compiler | iree-runtime should already be installed
iree-tools-tflite
iree-tools-xla
iree-tools-tf
# TensorFlow and JAX.
gin-config
tensorflow
tf-models-nightly
tensorflow-text-nightly
transformers==4.18.0
#jax[cpu]
# tflitehub dependencies.
Pillow
# Testing and support.
lit
pyyaml
#ONNX and ORT for benchmarking
--extra-index-url https://test.pypi.org/simple/
protobuf
coloredlogs
flatbuffers
sympy
psutil
onnx-weekly
ort-nightly

9
requirements.txt Normal file
View File

@@ -0,0 +1,9 @@
setuptools
wheel
#SHARK Runner
tqdm
#Testing
pytest
pytest-xdist

38
setup.py Normal file
View File

@@ -0,0 +1,38 @@
from setuptools import find_packages
from setuptools import setup
import os
with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
PACKAGE_VERSION = os.environ.get("SHARK_PACKAGE_VERSION") or "0.0.4"
setup(
name="nodai-SHARK",
version=f"{PACKAGE_VERSION}",
description="SHARK provides a High Performance Machine Learning Framework",
author="nod.ai",
author_email="stdin@nod.ai",
url="https://nod.ai",
long_description=long_description,
long_description_content_type="text/markdown",
project_urls={
"Code": "https://github.com/nod-ai/SHARK",
"Bug Tracker": "https://github.com/nod-ai/SHARK/issues",
},
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
packages=find_packages(exclude=('examples')),
python_requires=">=3.7",
install_requires=[
"numpy",
"PyYAML",
"torch-mlir>=20220428.420",
"iree-compiler>=20220427.13",
"iree-runtime>=20220427.13",
],
)

115
setup_venv.sh Executable file
View File

@@ -0,0 +1,115 @@
#!/bin/bash
# Sets up a venv suitable for running samples.
# e.g:
# ./setup_venv.sh #setup a default $PYTHON3 shark.venv
# Environment Variables by the script.
# PYTHON=$PYTHON3.10 ./setup_venv.sh #pass a version of $PYTHON to use
# VENV_DIR=myshark.venv #create a venv called myshark.venv
# USE_IREE=1 #use stock IREE instead of Nod.ai's SHARK build
# IMPORTER=1 #Install importer deps
# if you run the script from a conda env it will install in your conda env
TD="$(cd $(dirname $0) && pwd)"
if [ -z "$PYTHON" ]; then
PYTHON="$(which python3)"
fi
function die() {
echo "Error executing command: $*"
exit 1
}
PYTHON_VERSION_X_Y=`${PYTHON} -c 'import sys; version=sys.version_info[:2]; print("{0}.{1}".format(*version))'`
echo "Python: $PYTHON"
echo "Python version: $PYTHON_VERSION_X_Y"
if [[ -z "${CONDA_PREFIX}" ]]; then
# Not a conda env. So create a new VENV dir
VENV_DIR=${VENV_DIR:-shark.venv}
echo "Using pip venv.. Setting up venv dir: $VENV_DIR"
$PYTHON -m venv "$VENV_DIR" || die "Could not create venv."
source "$VENV_DIR/bin/activate" || die "Could not activate venv"
PYTHON="$(which python3)"
else
echo "Found conda env $CONDA_DEFAULT_ENV. Running pip install inside the conda env"
fi
Red=`tput setaf 1`
Green=`tput setaf 2`
Yellow=`tput setaf 3`
# Assume no binary torch-mlir.
# Currently available for macOS m1&intel (3.10) and Linux(3.7,3.8,3.9,3.10)
torch_mlir_bin=false
if [[ $(uname -s) = 'Darwin' ]]; then
echo "${Yellow}Apple macOS detected"
if [[ $(uname -m) == 'arm64' ]]; then
echo "${Yellow}Apple M1 Detected"
hash rustc 2>/dev/null
if [ $? -eq 0 ];then
echo "${Green}rustc found to compile HF tokenizers"
else
echo "${Red}Could not find rustc" >&2
echo "${Red}Please run:"
echo "${Red}curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh"
exit 1
fi
fi
echo "${Yellow}Run the following commands to setup your SSL certs for your Python version if you see SSL errors with tests"
echo "${Yellow}/Applications/Python\ 3.XX/Install\ Certificates.command"
if [ "$PYTHON_VERSION_X_Y" == "3.10" ]; then
torch_mlir_bin=true
fi
elif [[ $(uname -s) = 'Linux' ]]; then
echo "${Yellow}Linux detected"
if [ "$PYTHON_VERSION_X_Y" == "3.7" ] || [ "$PYTHON_VERSION_X_Y" == "3.8" ] || [ "$PYTHON_VERSION_X_Y" == "3.9" ] || [ "$PYTHON_VERSION_X_Y" == "3.10" ] ; then
torch_mlir_bin=true
fi
else
echo "${Red}OS not detected. Pray and Play"
fi
# Upgrade pip and install requirements.
$PYTHON -m pip install --upgrade pip || die "Could not upgrade pip"
$PYTHON -m pip install --upgrade -r "$TD/requirements.txt"
if [ "$torch_mlir_bin" = true ]; then
$PYTHON -m pip install --find-links https://github.com/llvm/torch-mlir/releases torch-mlir --extra-index-url https://download.pytorch.org/whl/nightly/cpu
if [ $? -eq 0 ];then
echo "Successfully Installed torch-mlir"
else
echo "Could not install torch-mlir" >&2
fi
else
echo "${Red}No binaries found for Python $PYTHON_VERSION_X_Y on $(uname -s)"
echo "${Yello}Python 3.10 supported on macOS and 3.7,3.8,3.9 and 3.10 on Linux"
echo "${Red}Please build torch-mlir from source in your environment"
exit 1
fi
if [[ -z "${USE_IREE}" ]]; then
RUNTIME="nod-ai/SHARK-Runtime"
else
RUNTIME="google/iree"
fi
echo "Installing ${RUNTIME}..."
$PYTHON -m pip install --find-links https://github.com/${RUNTIME}/releases iree-compiler iree-runtime
if [[ ! -z "${IMPORTER}" ]]; then
echo "${Yellow}Installing importer tools.."
if [[ $(uname -s) = 'Linux' ]]; then
echo "${Yellow}Linux detected.. installing Linux importer tools"
$PYTHON -m pip install --upgrade -r "$TD/requirements-importer.txt" -f https://github.com/${RUNTIME}/releases --extra-index-url https://test.pypi.org/simple/ --extra-index-url https://download.pytorch.org/whl/nightly/cpu
elif [[ $(uname -s) = 'Darwin' ]]; then
echo "${Yellow}macOS detected.. installing macOS importer tools"
#Conda seems to have some problems installing these packages and hope they get resolved upstream.
$PYTHON -m pip install --upgrade -r "$TD/requirements-importer-macos.txt" -f https://github.com/${RUNTIME}/releases --extra-index-url https://download.pytorch.org/whl/nightly/cpu
fi
fi
$PYTHON -m pip install -e . --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://github.com/llvm/torch-mlir/releases -f https://github.com/${RUNTIME}/releases
if [[ -z "${CONDA_PREFIX}" ]]; then
echo "${Green}Before running examples activate venv with:"
echo " ${Green}source $VENV_DIR/bin/activate"
fi

0
shark/__init__.py Normal file
View File

72
shark/backward_makefx.py Normal file
View File

@@ -0,0 +1,72 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch
from torch._decomp import get_decompositions
from torch.fx.experimental.proxy_tensor import make_fx
from torch.nn.utils import _stateless
from torch import fx
import copy
import tempfile
class MakeFxModule:
def __init__(self, model, inputs, labels=None, custom_inference_fn=None):
self.model = model
self.inputs = inputs
self.custom_inference_fn = custom_inference_fn
self.training_graph = None
# Doesn't replace the None type.
def change_fx_graph_return_to_tuple(self, fx_g: fx.GraphModule):
for node in fx_g.graph.nodes:
if node.op == "output":
# output nodes always have one argument
node_arg = node.args[0]
out_nodes = []
if isinstance(node_arg, list):
# Don't return NoneType elements.
for out_node in node_arg:
if not isinstance(out_node, type(None)):
out_nodes.append(out_node)
# If there is a single tensor/element to be returned don't
# a tuple for it.
if len(out_nodes) == 1:
node.args = out_nodes
else:
node.args = (tuple(out_nodes),)
fx_g.graph.lint()
fx_g.recompile()
return fx_g
def generate_graph(self):
fx_g = make_fx(self.custom_inference_fn,
decomposition_table=get_decompositions([
torch.ops.aten.embedding_dense_backward,
torch.ops.aten.native_layer_norm_backward,
torch.ops.aten.slice_backward,
torch.ops.aten.select_backward
]))(dict(self.model.named_parameters()),
dict(self.model.named_buffers()), self.inputs)
fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
fx_g.recompile()
fx_g = self.change_fx_graph_return_to_tuple(fx_g)
ts_g = torch.jit.script(fx_g)
temp = tempfile.NamedTemporaryFile(suffix='_shark_ts',
prefix='temp_ts_')
ts_g.save(temp.name)
new_ts = torch.jit.load(temp.name)
self.training_graph = new_ts

78
shark/cuda_utils.py Normal file
View File

@@ -0,0 +1,78 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import ctypes
#Some constants taken from cuda.h
CUDA_SUCCESS = 0
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36
def get_cuda_sm_cc():
libnames = ('libcuda.so', 'libcuda.dylib', 'cuda.dll')
for libname in libnames:
try:
cuda = ctypes.CDLL(libname)
except OSError:
continue
else:
break
else:
raise OSError("could not load any of: " + ' '.join(libnames))
nGpus = ctypes.c_int()
name = b' ' * 100
cc_major = ctypes.c_int()
cc_minor = ctypes.c_int()
result = ctypes.c_int()
device = ctypes.c_int()
context = ctypes.c_void_p()
error_str = ctypes.c_char_p()
result = cuda.cuInit(0)
if result != CUDA_SUCCESS:
cuda.cuGetErrorString(result, ctypes.byref(error_str))
print("cuInit failed with error code %d: %s" %
(result, error_str.value.decode()))
return 1
result = cuda.cuDeviceGetCount(ctypes.byref(nGpus))
if result != CUDA_SUCCESS:
cuda.cuGetErrorString(result, ctypes.byref(error_str))
print("cuDeviceGetCount failed with error code %d: %s" %
(result, error_str.value.decode()))
return 1
print("Found %d device(s)." % nGpus.value)
for i in range(nGpus.value):
result = cuda.cuDeviceGet(ctypes.byref(device), i)
if result != CUDA_SUCCESS:
cuda.cuGetErrorString(result, ctypes.byref(error_str))
print("cuDeviceGet failed with error code %d: %s" %
(result, error_str.value.decode()))
return 1
print("Device: %d" % i)
if cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name),
device) == CUDA_SUCCESS:
print(" Name: %s" % (name.split(b'\0', 1)[0].decode()))
if cuda.cuDeviceComputeCapability(ctypes.byref(cc_major),
ctypes.byref(cc_minor),
device) == CUDA_SUCCESS:
print(" Compute Capability: %d.%d" %
(cc_major.value, cc_minor.value))
sm = f"sm_{cc_major.value}{cc_minor.value}"
return sm

View File

@@ -0,0 +1,300 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/mlevental/miniconda3/envs/torch-mlir/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"# standard imports\n",
"import torch\n",
"from shark.iree_utils import get_iree_compiled_module"
]
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [],
"source": [
"# torch dynamo related imports\n",
"try:\n",
" import torchdynamo\n",
" from torchdynamo.optimizations.backends import create_backend\n",
" from torchdynamo.optimizations.subgraph import SubGraph\n",
"except ModuleNotFoundError:\n",
" print(\"Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo\")\n",
" exit()\n",
"\n",
"# torch-mlir imports for compiling\n",
"from torch_mlir import compile, OutputType"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"[TorchDynamo](https://github.com/pytorch/torchdynamo) is a compiler for PyTorch programs that uses the [frame evaluation API](https://www.python.org/dev/peps/pep-0523/) in CPython to dynamically modify Python bytecode right before it is executed. It creates this FX Graph through bytecode analysis and is designed to mix Python execution with compiled backends."
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 3,
"outputs": [],
"source": [
"def toy_example(*args):\n",
" a, b = args\n",
"\n",
" x = a / (torch.abs(a) + 1)\n",
" if b.sum() < 0:\n",
" b = b * -1\n",
" return x * b"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 4,
"outputs": [],
"source": [
"# compiler that lowers fx_graph to through MLIR\n",
"def __torch_mlir(fx_graph, *args, **kwargs):\n",
" assert isinstance(\n",
" fx_graph, torch.fx.GraphModule\n",
" ), \"Model must be an FX GraphModule.\"\n",
"\n",
" def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule):\n",
" \"\"\"Replace tuple with tuple element in functions that return one-element tuples.\"\"\"\n",
"\n",
" for node in fx_g.graph.nodes:\n",
" if node.op == \"output\":\n",
" assert len(node.args) == 1, \"Output node must have a single argument\"\n",
" node_arg = node.args[0]\n",
" if isinstance(node_arg, tuple) and len(node_arg) == 1:\n",
" node.args = (node_arg[0],)\n",
" fx_g.graph.lint()\n",
" fx_g.recompile()\n",
" return fx_g\n",
"\n",
" fx_graph = _unwrap_single_tuple_return(fx_graph)\n",
" ts_graph = torch.jit.script(fx_graph)\n",
"\n",
" # torchdynamo does munges the args differently depending on whether you use\n",
" # the @torchdynamo.optimize decorator or the context manager\n",
" if isinstance(args, tuple):\n",
" args = list(args)\n",
" assert isinstance(args, list)\n",
" if len(args) == 1 and isinstance(args[0], list):\n",
" args = args[0]\n",
"\n",
" linalg_module = compile(ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS)\n",
" callable, _ = get_iree_compiled_module(linalg_module, \"cuda\", func_name=\"forward\")\n",
"\n",
" def forward(*inputs):\n",
" return callable(*inputs)\n",
"\n",
" return forward"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"Simplest way to use TorchDynamo with the `torchdynamo.optimize` context manager:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 5,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found 1 device(s).\n",
"Device: 0\n",
" Name: NVIDIA GeForce RTX 3080\n",
" Compute Capability: 8.6\n",
"[-0.40066046 -0.4210303 0.03225489 -0.44849953 0.10370405 -0.04422468\n",
" 0.33262825 -0.20109026 0.02102537 -0.24882983]\n",
"[-0.07824923 -0.17004533 0.06439921 -0.06163602 0.26633525 -1.1560082\n",
" -0.06660341 0.24227881 0.1462235 -0.32055548]\n",
"[-0.01464001 0.442209 -0.0607936 -0.5477967 -0.25226554 -0.08588809\n",
" -0.30497575 0.00061084 -0.50069696 0.2317973 ]\n",
"[ 0.25726247 0.39388427 -0.24093066 0.12316308 -0.01981307 0.5661146\n",
" 0.26199922 0.8123446 -0.01576749 0.30846444]\n",
"[ 0.7878203 -0.45975062 -0.29956317 -0.07032048 -0.55817443 -0.62506855\n",
" -1.6837492 -0.38442805 0.28220773 -1.5325156 ]\n",
"[ 0.07975311 0.67754704 -0.30927914 0.00347631 -0.07326564 0.01893554\n",
" -0.7518105 -0.03078967 -0.07623022 0.38865626]\n",
"[-0.7751679 -0.5841397 -0.6622711 0.18574935 -0.6049372 0.02844244\n",
" -0.20471913 0.3337415 -0.3619432 -0.35087156]\n",
"[-0.08569919 -0.10775139 -0.02338934 0.21933547 -0.46712473 0.00062137\n",
" -0.58207744 0.06457533 0.18276742 0.03866556]\n",
"[-0.2311981 -0.43036282 0.20561649 -0.10363232 -0.13248594 0.02885137\n",
" -0.31241602 -0.36907142 0.08861586 0.2331427 ]\n",
"[-0.07273526 -0.31246194 -0.24218291 -0.24145737 0.0364486 0.14382267\n",
" -0.00531162 0.15447603 -0.5220248 -0.09016377]\n"
]
}
],
"source": [
"with torchdynamo.optimize(__torch_mlir):\n",
" for _ in range(10):\n",
" print(toy_example(torch.randn(10), torch.randn(10)))"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"It can also be used through a decorator:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [],
"source": [
"@create_backend\n",
"def torch_mlir(subgraph, *args, **kwargs):\n",
" assert isinstance(subgraph, SubGraph), \"Model must be a dynamo SubGraph.\"\n",
" return __torch_mlir(subgraph.model, *list(subgraph.example_inputs))\n",
"\n",
"@torchdynamo.optimize(\"torch_mlir\")\n",
"def toy_example2(*args):\n",
" a, b = args\n",
"\n",
" x = a / (torch.abs(a) + 1)\n",
" if b.sum() < 0:\n",
" b = b * -1\n",
" return x * b"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 7,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found 1 device(s).\n",
"Device: 0\n",
" Name: NVIDIA GeForce RTX 3080\n",
" Compute Capability: 8.6\n",
"[-0.35494277 0.03409214 -0.02271946 0.7335942 0.03122527 -0.41881397\n",
" -0.6609761 -0.6418614 0.29336175 -0.01973678]\n",
"[-2.7246824e-01 -3.5543957e-01 6.0087401e-01 -7.4570496e-03\n",
" -4.2481605e-02 -5.0296803e-04 7.2928613e-01 -1.4673788e-03\n",
" -2.7621329e-01 -6.0995776e-02]\n",
"[-0.03165906 0.3889693 0.24052973 0.27279532 -0.02773128 -0.12602475\n",
" -1.0124422 0.5720256 -0.35437614 -0.20992722]\n",
"[-0.41831446 0.5525326 -0.29749998 -0.17044766 0.11804754 -0.05210691\n",
" -0.46145165 -0.8776549 0.10090438 0.17463352]\n",
"[ 0.02194221 0.20959911 0.26973712 0.12551276 -0.0020404 0.1490246\n",
" -0.04456685 1.1100804 0.8105744 0.6676846 ]\n",
"[ 0.06528181 -0.13591261 0.5370964 -0.4398162 -0.03372452 0.9691372\n",
" -0.01120087 0.2947028 0.4804801 -0.3324341 ]\n",
"[ 0.33549032 -0.23001772 -0.08681437 0.16490957 -0.11223086 0.09168988\n",
" 0.02403045 0.17344482 0.46406478 -0.00129451]\n",
"[-0.27475086 0.42384806 1.9090122 -0.41147137 -0.6888369 0.08435658\n",
" -0.26628923 -0.17436793 -0.8058869 -0.02582378]\n",
"[-0.10109414 0.08681287 -0.10055986 0.6858881 0.29267687 -0.02797117\n",
" -0.01425194 0.4882803 0.3551982 -0.858935 ]\n",
"[-0.22086617 0.524994 0.17721705 -0.03813264 -0.54570735 -0.4421502\n",
" 0.11938014 -0.01122053 0.39294165 -0.61770755]\n"
]
}
],
"source": [
"for _ in range(10):\n",
" print(toy_example2(torch.randn(10), torch.randn(10)))"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -0,0 +1,84 @@
import torch
from torch_mlir import compile, OutputType
from shark.iree_utils import get_iree_compiled_module
try:
import torchdynamo
from torchdynamo.optimizations.backends import create_backend
from torchdynamo.optimizations.subgraph import SubGraph
except ModuleNotFoundError:
print("Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo")
exit()
NUM_ITERS = 10
def __torch_mlir(fx_graph, *args, **kwargs):
assert isinstance(
fx_graph, torch.fx.GraphModule
), "Model must be an FX GraphModule."
def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule):
"""Replace tuple with tuple element in functions that return one-element tuples."""
for node in fx_g.graph.nodes:
if node.op == "output":
assert len(node.args) == 1, "Output node must have a single argument"
node_arg = node.args[0]
if isinstance(node_arg, tuple) and len(node_arg) == 1:
node.args = (node_arg[0],)
fx_g.graph.lint()
fx_g.recompile()
return fx_g
fx_graph = _unwrap_single_tuple_return(fx_graph)
ts_graph = torch.jit.script(fx_graph)
if isinstance(args, tuple):
args = list(args)
assert isinstance(args, list)
if len(args) == 1 and isinstance(args[0], list):
args = args[0]
linalg_module = compile(ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS)
callable, _ = get_iree_compiled_module(linalg_module, "cuda", func_name="forward")
def forward(*inputs):
return callable(*inputs)
return forward
def toy_example(*args):
a, b = args
x = a / (torch.abs(a) + 1)
if b.sum() < 0:
b = b * -1
return x * b
with torchdynamo.optimize(__torch_mlir):
for _ in range(10):
print(toy_example(torch.randn(10), torch.randn(10)))
@create_backend
def torch_mlir(subgraph, *args, **kwargs):
assert isinstance(subgraph, SubGraph), "Model must be a dynamo SubGraph."
return __torch_mlir(subgraph.model, *list(subgraph.example_inputs))
@torchdynamo.optimize("torch_mlir")
def toy_example2(*args):
a, b = args
x = a / (torch.abs(a) + 1)
if b.sum() < 0:
b = b * -1
return x * b
for _ in range(10):
print(toy_example2(torch.randn(10), torch.randn(10)))

View File

@@ -0,0 +1,805 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/mlevental/miniconda3/envs/torch-mlir/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"# standard imports\n",
"import torch\n",
"from torch_mlir.eager_mode import torch_mlir_tensor"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [],
"source": [
"# eager mode imports\n",
"from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor\n",
"from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"The simplest way of using Eager Mode (through IREE) requires setting a \"backend\":"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 3,
"outputs": [],
"source": [
"torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(\"cpu\")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"and wrapping all your `torch.Tensor`s:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 4,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n"
]
}
],
"source": [
"NUM_ITERS = 10\n",
"\n",
"t = torch.ones((10, 10))\n",
"u = 2 * torch.ones((10, 10))\n",
"\n",
"tt = TorchMLIRTensor(t)\n",
"print(tt)\n",
"uu = TorchMLIRTensor(u)\n",
"print(uu)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"`TorchMLIRTensor` is a \"tensor wrapper subclass\" (more info [here](https://github.com/albanD/subclass_zoo)) that keeps the IREE `DeviceArray` in a field `elem`:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 5,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n"
]
}
],
"source": [
"for i in range(NUM_ITERS):\n",
" yy = tt + uu\n",
" print(type(yy))\n",
" print(yy.elem.to_host())\n",
" yy = tt * uu\n",
" print(type(yy))\n",
" print(yy.elem.to_host())"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"If you have a GPU (and CUDA installed) that works too (you can verify by having `watch -n1 nvidia-smi` up in a terminal while running the next cell):"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n"
]
}
],
"source": [
"torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(\"gpu\")\n",
"\n",
"t = torch.ones((10, 10))\n",
"u = 2 * torch.ones((10, 10))\n",
"\n",
"tt = TorchMLIRTensor(t)\n",
"print(tt)\n",
"uu = TorchMLIRTensor(u)\n",
"print(uu)\n",
"\n",
"yy = tt + uu\n",
"print(yy.elem.to_host())\n",
"yy = tt * uu\n",
"print(yy.elem.to_host())"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"There is a convenience class `SharkEagerMode` that will handle both the installation of the backend and the wrapping of `torch.Tensor`s:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 7,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n"
]
}
],
"source": [
"# eager mode RAII\n",
"from shark.shark_runner import SharkEagerMode\n",
"\n",
"shark_eager_mode = SharkEagerMode(\"cpu\")\n",
"\n",
"t = torch.ones((10, 10))\n",
"u = torch.ones((10, 10))\n",
"\n",
"print(t)\n",
"print(u)\n",
"\n",
"for i in range(NUM_ITERS):\n",
" yy = t + u\n",
" print(type(yy))\n",
" print(yy.elem.to_host())\n",
" yy = t * u\n",
" print(type(yy))\n",
" print(yy.elem.to_host())"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"The `SharkEagerMode` class is a hacky take on [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization) that defines a \"deleter\" that runs when an instantiation (of `SharkEagerMode`) is garbage collected. Takeaway is that if you want to turn off `SharkEagerMode`, or switch backends, you need to `del` the instance:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 8,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n"
]
}
],
"source": [
"del shark_eager_mode\n",
"shark_eager_mode = SharkEagerMode(\"cuda\")\n",
"\n",
"t = torch.ones((10, 10))\n",
"u = torch.ones((10, 10))\n",
"\n",
"print(t)\n",
"print(u)\n",
"\n",
"yy = t + u\n",
"print(type(yy))\n",
"print(yy.elem.to_host())\n",
"yy = t * u\n",
"print(type(yy))\n",
"print(yy.elem.to_host())"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -0,0 +1,148 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch
from torch.utils.cpp_extension import load_inline, include_paths
from torch_mlir.eager_mode import torch_mlir_tensor
from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
from shark.shark_runner import SharkEagerMode
def test_cpu():
torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend("cpu")
t = torch.ones((10, 10), device="cpu")
u = 2 * torch.ones((10, 10), device="cpu")
tt = TorchMLIRTensor(t)
print(tt)
uu = TorchMLIRTensor(u)
print(uu)
for i in range(NUM_ITERS):
yy = tt + uu
print(type(yy))
print(yy.elem.to_host())
yy = tt * uu
print(type(yy))
print(yy.elem.to_host())
def test_gpu():
source = """
#include <iostream>
#include "cuda.h"
#include "cuda_runtime_api.h"
using namespace std;
void print_free_mem() {
int num_gpus;
size_t free, total;
cudaSetDevice(0);
int id;
cudaGetDevice(&id);
cudaMemGetInfo(&free, &total);
cout << "GPU " << id << " memory: used=" << (total-free)/(1<<20) << endl;
}
"""
gpu_stats = load_inline(
name="inline_extension",
cpp_sources=[source],
extra_include_paths=include_paths(cuda=True),
functions=["print_free_mem"],
)
torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend("gpu")
t = torch.ones((10, 10), device="cpu")
u = 2 * torch.ones((10, 10), device="cpu")
tt = TorchMLIRTensor(t)
print(tt)
uu = TorchMLIRTensor(u)
print(uu)
for i in range(NUM_ITERS):
yy = tt + uu
print(yy.elem.to_host())
yy = tt * uu
print(yy.elem.to_host())
gpu_stats.print_free_mem()
def test_python_mode_ref_backend():
# hide this wherever you want?
_ = SharkEagerMode("refbackend")
t = torch.ones((10, 10), device="cpu")
u = torch.ones((10, 10), device="cpu")
print(t)
print(u)
for i in range(NUM_ITERS):
print(i)
yy = t + u
print(yy.elem)
yy = t * u
print(yy.elem)
def test_python_mode_iree_cpu():
# hide this wherever you want?
_ = SharkEagerMode("cpu")
t = torch.ones((10, 10), device="cpu")
u = torch.ones((10, 10), device="cpu")
print(t)
print(u)
for i in range(NUM_ITERS):
yy = t + u
print(type(yy))
print(yy.elem.to_host())
yy = t * u
print(type(yy))
print(yy.elem.to_host())
def test_python_mode_iree_gpu():
_ = SharkEagerMode("gpu")
t = torch.ones((10, 10), device="cpu")
u = torch.ones((10, 10), device="cpu")
print(t)
print(u)
for i in range(NUM_ITERS):
yy = t + u
print(type(yy))
print(yy.elem.to_host())
yy = t * u
print(type(yy))
print(yy.elem.to_host())
if __name__ == "__main__":
NUM_ITERS = 10
test_cpu()
if torch.cuda.is_available():
test_gpu()
test_python_mode_ref_backend()
test_python_mode_iree_cpu()
test_python_mode_iree_gpu()

View File

@@ -0,0 +1,51 @@
from PIL import Image
import requests
from transformers import CLIPProcessor, TFCLIPModel
import tensorflow as tf
from shark.shark_inference import SharkInference
# Create a set of inputs
clip_vit_inputs = [
tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
tf.TensorSpec(shape=[1, 3, 224, 224], dtype=tf.float32)
]
class CLIPModule(tf.Module):
def __init__(self):
super(CLIPModule, self).__init__()
self.m = TFCLIPModel.from_pretrained("openai/clip-vit-base-patch32")
self.m.predict = lambda x, y, z: self.m(
input_ids=x, attention_mask=y, pixel_values=z)
@tf.function(input_signature=clip_vit_inputs)
def forward(self, input_ids, attention_mask, pixel_values):
return self.m.predict(input_ids, attention_mask,
pixel_values).logits_per_image
if __name__ == "__main__":
# Prepping Data
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
inputs = processor(text=["a photo of a cat", "a photo of a dog"],
images=image,
return_tensors="tf",
padding=True)
shark_module = SharkInference(
CLIPModule(),
(inputs["input_ids"], inputs["attention_mask"], inputs["pixel_values"]))
shark_module.set_frontend("tensorflow")
shark_module.compile()
print(
shark_module.forward((inputs["input_ids"], inputs["attention_mask"],
inputs["pixel_values"])))

View File

@@ -0,0 +1,38 @@
from PIL import Image
import requests
from transformers import GPT2Tokenizer, TFGPT2Model
import tensorflow as tf
from shark.shark_inference import SharkInference
# Create a set of inputs
gpt2_inputs = [
tf.TensorSpec(shape=[1, 8], dtype=tf.int32),
tf.TensorSpec(shape=[1, 8], dtype=tf.int32),
]
class GPT2Module(tf.Module):
def __init__(self):
super(GPT2Module, self).__init__()
self.m = TFGPT2Model.from_pretrained("distilgpt2")
self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)
@tf.function(input_signature=gpt2_inputs)
def forward(self, input_ids, attention_mask):
return self.m.predict(input_ids, attention_mask)
if __name__ == "__main__":
# Prepping Data
tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
text = "I love the distilled version of models."
inputs = tokenizer(text, return_tensors='tf')
shark_module = SharkInference(
GPT2Module(), (inputs["input_ids"], inputs["attention_mask"]))
shark_module.set_frontend("tensorflow")
shark_module.compile()
print(shark_module.forward((inputs["input_ids"], inputs["attention_mask"])))

View File

@@ -0,0 +1,18 @@
from shark.shark_inference import SharkInference
import numpy as np
mhlo_ir = r"""builtin.module {
func.func @forward(%arg0: tensor<1x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4x4xf32> {
%0 = chlo.broadcast_add %arg0, %arg1 : (tensor<1x4xf32>, tensor<4x1xf32>) -> tensor<4x4xf32>
%1 = "mhlo.abs"(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
return %1 : tensor<4x4xf32>
}
}"""
arg0 = np.ones((1, 4)).astype(np.float32)
arg1 = np.ones((4, 1)).astype(np.float32)
shark_module = SharkInference(mhlo_ir, (arg0, arg1))
shark_module.set_frontend("mhlo")
shark_module.compile()
print(shark_module.forward((arg0, arg1)))

View File

@@ -0,0 +1,36 @@
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from shark.shark_inference import SharkInference
torch.manual_seed(0)
tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
class MiniLMSequenceClassification(torch.nn.Module):
def __init__(self):
super().__init__()
self.model = AutoModelForSequenceClassification.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased", # The pretrained model.
num_labels=
2, # The number of output labels--2 for binary classification.
output_attentions=
False, # Whether the model returns attentions weights.
output_hidden_states=
False, # Whether the model returns all hidden-states.
torchscript=True,
)
def forward(self, tokens):
return self.model.forward(tokens)[0]
test_input = torch.randint(2, (1, 128))
shark_module = SharkInference(MiniLMSequenceClassification(), (test_input,),
jit_trace=True,
benchmark_mode=True)
shark_module.compile()
shark_module.forward((test_input,))
shark_module.benchmark_all((test_input,))

View File

@@ -0,0 +1,58 @@
import tensorflow as tf
from transformers import BertModel, BertTokenizer, TFBertModel
from shark.shark_inference import SharkInference
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
]
class BertModule(tf.Module):
def __init__(self):
super(BertModule, self).__init__()
# Create a BERT trainer with the created network.
self.m = TFBertModel.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased", from_pt=True)
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m.predict = lambda x, y, z: self.m.call(
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
@tf.function(input_signature=bert_input)
def forward(self, input_ids, attention_mask, token_type_ids):
return self.m.predict(input_ids, attention_mask, token_type_ids)
if __name__ == "__main__":
# Prepping Data
tokenizer = BertTokenizer.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased")
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text,
padding='max_length',
truncation=True,
max_length=MAX_SEQUENCE_LENGTH)
for key in encoded_input:
encoded_input[key] = tf.expand_dims(
tf.convert_to_tensor(encoded_input[key]), 0)
test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
encoded_input["token_type_ids"])
shark_module = SharkInference(
BertModule(),
test_input,
benchmark_mode=True)
shark_module.set_frontend("tensorflow")
shark_module.compile()
shark_module.benchmark_all(test_input)

View File

@@ -0,0 +1,35 @@
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from shark.shark_inference import SharkInference
torch.manual_seed(0)
tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
class MiniLMSequenceClassification(torch.nn.Module):
def __init__(self):
super().__init__()
self.model = AutoModelForSequenceClassification.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased", # The pretrained model.
num_labels=
2, # The number of output labels--2 for binary classification.
output_attentions=
False, # Whether the model returns attentions weights.
output_hidden_states=
False, # Whether the model returns all hidden-states.
torchscript=True,
)
def forward(self, tokens):
return self.model.forward(tokens)[0]
test_input = torch.randint(2, (1, 128))
shark_module = SharkInference(MiniLMSequenceClassification(), (test_input,),
jit_trace=True)
shark_module.compile()
result = shark_module.forward((test_input,))
print("Obtained result", result)

View File

@@ -0,0 +1,41 @@
import tensorflow as tf
from transformers import BertModel, BertTokenizer, TFBertModel
from shark.shark_inference import SharkInference
from shark.shark_importer import shark_load
from shark.parser import parser
import os
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
parser.add_argument(
"--download_mlir_path",
type=str,
default="minilm_tf_inference.mlir",
help="Specifies path to target mlir file that will be loaded.")
load_args, unknown = parser.parse_known_args()
MAX_SEQUENCE_LENGTH = 512
if __name__ == "__main__":
# Prepping Data
tokenizer = BertTokenizer.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased")
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text,
padding='max_length',
truncation=True,
max_length=MAX_SEQUENCE_LENGTH)
for key in encoded_input:
encoded_input[key] = tf.expand_dims(
tf.convert_to_tensor(encoded_input[key]), 0)
model_name = "minilm_tf_inference"
minilm_mlir = shark_load(model_name, load_args.download_mlir_path)
test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
encoded_input["token_type_ids"])
shark_module = SharkInference(
minilm_mlir, test_input, benchmark_mode=True)
shark_module.set_frontend("mhlo")
shark_module.compile()
shark_module.benchmark_all(test_input)

View File

@@ -0,0 +1,56 @@
import tensorflow as tf
from transformers import BertModel, BertTokenizer, TFBertModel
from shark.shark_inference import SharkInference
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
]
class BertModule(tf.Module):
def __init__(self):
super(BertModule, self).__init__()
# Create a BERT trainer with the created network.
self.m = TFBertModel.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased", from_pt=True)
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m.predict = lambda x, y, z: self.m.call(
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
@tf.function(input_signature=bert_input)
def forward(self, input_ids, attention_mask, token_type_ids):
return self.m.predict(input_ids, attention_mask, token_type_ids)
if __name__ == "__main__":
# Prepping Data
tokenizer = BertTokenizer.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased")
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text,
padding='max_length',
truncation=True,
max_length=MAX_SEQUENCE_LENGTH)
for key in encoded_input:
encoded_input[key] = tf.expand_dims(
tf.convert_to_tensor(encoded_input[key]), 0)
shark_module = SharkInference(
BertModule(),
(encoded_input["input_ids"], encoded_input["attention_mask"],
encoded_input["token_type_ids"]))
shark_module.set_frontend("tensorflow")
shark_module.compile()
print(
shark_module.forward(
(encoded_input["input_ids"], encoded_input["attention_mask"],
encoded_input["token_type_ids"])))

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,80 @@
from PIL import Image
import requests
import torch
import torchvision.models as models
from torchvision import transforms
import sys
from shark.shark_inference import SharkInference
################################## Preprocessing inputs and model ############
def load_and_preprocess_image(url: str):
headers = {
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
}
img = Image.open(requests.get(url, headers=headers,
stream=True).raw).convert("RGB")
# preprocessing pipeline
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
img_preprocessed = preprocess(img)
return torch.unsqueeze(img_preprocessed, 0)
def load_labels():
classes_text = requests.get(
"https://raw.githubusercontent.com/cathyzhyi/ml-data/main/imagenet-classes.txt",
stream=True,
).text
labels = [line.strip() for line in classes_text.splitlines()]
return labels
def top3_possibilities(res):
_, indexes = torch.sort(res, descending=True)
percentage = torch.nn.functional.softmax(res, dim=1)[0] * 100
top3 = [(labels[idx], percentage[idx].item()) for idx in indexes[0][:3]]
return top3
class Resnet50Module(torch.nn.Module):
def __init__(self):
super().__init__()
self.resnet = models.resnet50(pretrained=True)
self.train(False)
def forward(self, img):
return self.resnet.forward(img)
image_url = "https://upload.wikimedia.org/wikipedia/commons/2/26/YellowLabradorLooking_new.jpg"
print("load image from " + image_url, file=sys.stderr)
img = load_and_preprocess_image(image_url)
labels = load_labels()
##############################################################################
input = torch.randn(1, 3, 224, 224)
print(input.shape)
## The img is passed to determine the input shape.
shark_module = SharkInference(Resnet50Module(), (img,))
shark_module.compile()
## Can pass any img or input to the forward module.
results = shark_module.forward((img,))
print("The top 3 results obtained via shark_runner is:")
print(top3_possibilities(torch.from_numpy(results)))
print()
print("The top 3 results obtained via torch is:")
print(top3_possibilities(Resnet50Module()(img)))

View File

@@ -0,0 +1,38 @@
from PIL import Image
import requests
from transformers import T5Tokenizer, TFT5Model
import tensorflow as tf
from shark.shark_inference import SharkInference
# Create a set of inputs
t5_inputs = [
tf.TensorSpec(shape=[1, 10], dtype=tf.int32),
tf.TensorSpec(shape=[1, 10], dtype=tf.int32),
]
class T5Module(tf.Module):
def __init__(self):
super(T5Module, self).__init__()
self.m = TFT5Model.from_pretrained("t5-small")
self.m.predict = lambda x,y: self.m(input_ids=x, decoder_input_ids=y)
@tf.function(input_signature=t5_inputs)
def forward(self, input_ids, decoder_input_ids):
return self.m.predict(input_ids, decoder_input_ids)
if __name__ == "__main__":
# Prepping Data
tokenizer = T5Tokenizer.from_pretrained("t5-small")
text = "I love the distilled version of models."
inputs = tokenizer(
text, return_tensors="tf"
).input_ids
shark_module = SharkInference(
T5Module(), (inputs, inputs))
shark_module.set_frontend("tensorflow")
shark_module.compile()
print(shark_module.forward((inputs,inputs)))

View File

@@ -0,0 +1,44 @@
import torch
import torchvision.models as models
from shark.shark_inference import SharkInference
class VisionModule(torch.nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
self.train(False)
def forward(self, input):
return self.model.forward(input)
input = torch.randn(1, 3, 224, 224)
## The vision models present here: https://pytorch.org/vision/stable/models.html
vision_models_list = [
models.resnet18(pretrained=True),
models.alexnet(pretrained=True),
models.vgg16(pretrained=True),
models.squeezenet1_0(pretrained=True),
models.densenet161(pretrained=True),
models.inception_v3(pretrained=True),
models.shufflenet_v2_x1_0(pretrained=True),
models.mobilenet_v2(pretrained=True),
models.mobilenet_v3_small(pretrained=True),
models.resnext50_32x4d(pretrained=True),
models.wide_resnet50_2(pretrained=True),
models.mnasnet1_0(pretrained=True),
models.efficientnet_b0(pretrained=True),
models.regnet_y_400mf(pretrained=True),
models.regnet_x_400mf(pretrained=True),
]
for i, vision_model in enumerate(vision_models_list):
shark_module = SharkInference(
VisionModule(vision_model),
(input,),
)
shark_module.compile()
shark_module.forward((input,))

View File

@@ -0,0 +1,32 @@
import torch
from shark_runner import SharkInference
# Currently not supported aten.transpose_conv2d missing.
class UnetModule(torch.nn.Module):
def __init__(self):
super().__init__()
self.model = torch.hub.load(
"mateuszbuda/brain-segmentation-pytorch",
"unet",
in_channels=3,
out_channels=1,
init_features=32,
pretrained=True,
)
self.train(False)
def forward(self, input):
return self.model(input)
input = torch.randn(1, 3, 224, 224)
print(input)
shark_module = SharkInference(
UnetModule(),
(input,),
)
shark_module.benchmark_forward((input,))
print(input)

View File

@@ -0,0 +1,50 @@
import torch
from torch.nn.utils import _stateless
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from shark.shark_runner import SharkTrainer
class MiniLMSequenceClassification(torch.nn.Module):
def __init__(self):
super().__init__()
self.model = AutoModelForSequenceClassification.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased", # The pretrained model.
num_labels=
2, # The number of output labels--2 for binary classification.
output_attentions=
False, # Whether the model returns attentions weights.
output_hidden_states=
False, # Whether the model returns all hidden-states.
torchscript=True,
)
def forward(self, tokens):
return self.model.forward(tokens)[0]
mod = MiniLMSequenceClassification()
def get_sorted_params(named_params):
return [i[1] for i in sorted(named_params.items())]
print(dict(mod.named_buffers()))
inp = (torch.randint(2, (1, 128)),)
def forward(params, buffers, args):
params_and_buffers = {**params, **buffers}
_stateless.functional_call(mod, params_and_buffers, args,
{}).sum().backward()
optim = torch.optim.SGD(get_sorted_params(params), lr=0.01)
# optim.load_state_dict(optim_state)
optim.step()
return params, buffers
shark_module = SharkTrainer(mod, inp, custom_inference_fn=forward)
print(shark_module.forward())

View File

@@ -0,0 +1,45 @@
import numpy as np
import os
import time
import tensorflow as tf
from shark.shark_trainer import SharkTrainer
from shark.parser import parser
from shark.shark_importer import shark_load
parser.add_argument(
"--download_mlir_path",
type=str,
default="bert_tf_training.mlir",
help="Specifies path to target mlir file that will be loaded.")
load_args, unknown = parser.parse_known_args()
tf.random.set_seed(0)
vocab_size = 100
NUM_CLASSES = 5
SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Download BERT model from tank and train.
if __name__ == "__main__":
predict_sample_input = [
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
]
model_name = "bert_tf_training"
bert_mlir = shark_load(model_name, load_args.download_mlir_path)
sample_input_tensors = [tf.convert_to_tensor(val, dtype=tf.int32) for val in predict_sample_input]
num_iter = 10
shark_module = SharkTrainer(
bert_mlir,
(sample_input_tensors,
tf.convert_to_tensor(np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32)))
shark_module.set_frontend("mhlo")
shark_module.compile()
start = time.time()
print(shark_module.train(num_iter))
end = time.time()
total_time = end - start
print("time: " + str(total_time))
print("time/iter: " + str(total_time / num_iter))

View File

@@ -0,0 +1,88 @@
import sys
from absl import app
import time
import numpy as np
import os
import tempfile
import tensorflow as tf
from official.nlp.modeling import layers
from official.nlp.modeling import networks
from official.nlp.modeling.models import bert_classifier
from shark.shark_trainer import SharkTrainer
tf.random.set_seed(0)
vocab_size = 100
NUM_CLASSES = 5
SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
]
class BertModule(tf.Module):
def __init__(self):
super(BertModule, self).__init__()
dict_outputs = False
test_network = networks.BertEncoder(vocab_size=vocab_size,
num_layers=2,
dict_outputs=dict_outputs)
# Create a BERT trainer with the created network.
bert_trainer_model = bert_classifier.BertClassifier(
test_network, num_classes=NUM_CLASSES)
bert_trainer_model.summary()
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m = bert_trainer_model
self.m.predict = lambda x: self.m.call(x, training=False)
self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
self.m.learn = lambda x, y: self.m.call(x, training=False)
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
@tf.function(input_signature=[
bert_input, # inputs
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32) # labels
])
def forward(self, inputs, labels):
with tf.GradientTape() as tape:
# Capture the gradients from forward prop...
probs = self.m(inputs, training=True)
loss = self.loss(labels, probs)
# ...and use them to update the model's weights.
variables = self.m.trainable_variables
gradients = tape.gradient(loss, variables)
self.optimizer.apply_gradients(zip(gradients, variables))
return loss
if __name__ == "__main__":
predict_sample_input = [
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
]
sample_input_tensors = [tf.convert_to_tensor(val, dtype=tf.int32) for val in predict_sample_input]
num_iter = 10
shark_module = SharkTrainer(
BertModule(),
(sample_input_tensors,
tf.convert_to_tensor(np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32)))
shark_module.set_frontend("tensorflow")
shark_module.compile()
start = time.time()
print(shark_module.train(num_iter))
end = time.time()
total_time = end - start
print("time: " + str(total_time))
print("time/iter: " + str(total_time / num_iter))

View File

@@ -0,0 +1,44 @@
import torch
from torch.nn.utils import _stateless
from shark.shark_trainer import SharkTrainer
class Foo(torch.nn.Module):
def __init__(self):
super(Foo, self).__init__()
self.l1 = torch.nn.Linear(10, 16)
self.relu = torch.nn.ReLU()
self.l2 = torch.nn.Linear(16, 2)
def forward(self, x):
out = self.l1(x)
out = self.relu(out)
out = self.l2(out)
return out
mod = Foo()
inp = (torch.randn(10, 10),)
def get_sorted_params(named_params):
return [i[1] for i in sorted(named_params.items())]
def forward(params, buffers, args):
params_and_buffers = {**params, **buffers}
_stateless.functional_call(mod, params_and_buffers, args,
{}).sum().backward()
optim = torch.optim.SGD(get_sorted_params(params), lr=0.01)
optim.step()
return params, buffers
# fx_graph = forward(dict(mod.named_parameters()), dict(mod.named_buffers()), inp)
shark_module = SharkTrainer(mod, inp)
# Pass the training function in case of torch
shark_module.compile(training_fn=forward)
shark_module.train(num_iters=10)

View File

@@ -0,0 +1,81 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict, Any
import iree
import iree.runtime as ireert
import numpy as np
import torch
from iree.runtime import DeviceArray
from torch_mlir._mlir_libs._mlir.ir import Module
from torch_mlir.compiler_utils import (
get_module_name_for_debug_dump,
run_pipeline_with_repro_report,
)
from torch_mlir.eager_mode.torch_mlir_eager_backend import (
TorchMLIREagerBackend,
TensorMetaData,
)
from torch_mlir_e2e_test.eager_backends.refbackend import NUMPY_TO_TORCH_DTYPE_DICT
from shark.iree_utils import get_iree_compiled_module, IREE_DEVICE_MAP
class EagerModeIREELinalgOnTensorsBackend(TorchMLIREagerBackend):
"""Main entry-point for the iree backend for torch-mlir eager mode.
EagerModeIREELinalgOnTensorsBackend uses iree.DeviceArray representations of tensors and
thus all of the wrapping and unwrapping and munging here is done to between torch.Tensor and iree.DeviceArray,
with np.ndarray as an intermediary.
"""
def __init__(self, device: str):
self.torch_device_str = device
self.iree_device_str = IREE_DEVICE_MAP[device]
self.config = ireert.Config(self.iree_device_str)
def get_torch_metadata(self, tensor: DeviceArray,
kwargs: Dict[str, Any]) -> TensorMetaData:
return TensorMetaData(
size=tensor.shape,
dtype=NUMPY_TO_TORCH_DTYPE_DICT[tensor.dtype.type],
device=torch.device(self.torch_device_str),
requires_grad=tensor.dtype.type
in {np.float, np.float32, np.float64} and
kwargs.get("requires_grad", False),
)
def compile(self, imported_module: Module):
fn_name = get_module_name_for_debug_dump(imported_module)
run_pipeline_with_repro_report(
imported_module,
"torch-function-to-torch-backend-pipeline,torch-backend-to-linalg-on-tensors-backend-pipeline",
"EagerMode",
)
callable, _ = get_iree_compiled_module(imported_module,
self.iree_device_str,
func_name=fn_name)
return callable
def copy_into(self, dst, src):
"""Copy output back to appropriate arg that it should alias."""
np.copyto(dst, src)
def transfer_from_device_to_torch(self, e):
return torch.from_numpy(e.to_host())
def transfer_from_torch_to_device(self,
tensor: torch.Tensor) -> DeviceArray:
return iree.runtime.asdevicearray(self.config.device, tensor.numpy())

359
shark/iree_utils.py Normal file
View File

@@ -0,0 +1,359 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import iree.runtime as ireert
import iree.runtime.scripts.iree_benchmark_module as benchmark_module
import iree.compiler as ireec
from shark.torch_mlir_utils import get_module_name_for_asm_dump
from shark.cuda_utils import get_cuda_sm_cc
from shark.model_annotation import *
import subprocess
import numpy as np
import os
import re
import sys
IREE_DEVICE_MAP = {
"cpu": "local-task",
"gpu": "cuda",
"cuda": "cuda",
"vulkan": "vulkan",
"metal": "vulkan",
"rocm": "rocm"
}
IREE_TARGET_MAP = {
"cpu": "dylib",
"gpu": "cuda",
"cuda": "cuda",
"vulkan": "vulkan",
"metal": "vulkan",
"rocm": "rocm"
}
UNIT_TO_SECOND_MAP = {"ms": 0.001, "s": 1}
def check_device_drivers(device):
"""Checks necessary drivers present for gpu and vulkan devices"""
if (device in ["gpu", "cuda"]):
try:
subprocess.check_output('nvidia-smi')
except Exception:
return True
elif (device in ["metal", "vulkan"]):
try:
subprocess.check_output('vulkaninfo')
except Exception:
return True
elif (device == "cpu"):
return False
# Unknown device.
else:
return True
return False
def get_iree_cpu_args():
find_triple_cmd = "uname -s -m"
os_name, proc_name = subprocess.run(
find_triple_cmd, shell=True, stdout=subprocess.PIPE,
check=True).stdout.decode('utf-8').split()
if os_name == "Darwin":
find_kernel_version_cmd = "uname -r"
kernel_version = subprocess.run(find_kernel_version_cmd,
shell=True,
stdout=subprocess.PIPE,
check=True).stdout.decode('utf-8')
target_triple = f"{proc_name}-apple-darwin{kernel_version}"
elif os_name == "Linux":
target_triple = f"{proc_name}-linux-gnu"
else:
error_message = f"OS Type f{os_name} not supported and triple can't be determined, open issue to dSHARK team please :)"
raise Exception(error_message)
print(f"Target triple found:{target_triple}")
return [f"-iree-llvm-target-triple={target_triple}"]
def get_iree_gpu_args():
ireert.flags.FUNCTION_INPUT_VALIDATION = False
ireert.flags.parse_flags("--cuda_allow_inline_execution")
sm_arch = get_cuda_sm_cc()
if sm_arch in ['sm_70', 'sm_72', 'sm_75', 'sm_80', 'sm_84', 'sm_86']:
return [
"--iree-hal-cuda-disable-loop-nounroll-wa",
f"--iree-hal-cuda-llvm-target-arch={sm_arch}"
]
else:
return ["--iree-hal-cuda-disable-loop-nounroll-wa"]
def get_vulkan_triple_flag():
vulkan_device_cmd = "vulkaninfo | grep deviceName | awk \'END{{print $NF}}\'"
vulkan_device = run_cmd(vulkan_device_cmd).strip()
if vulkan_device == "M1":
print("Found Apple Device. Using m1-moltenvk-macos")
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
elif vulkan_device == "A100-SXM4-40GB":
print("Found Nvidia Device. Using ampere-rtx3080-linux")
return "-iree-vulkan-target-triple=ampere-rtx3080-linux"
else:
print(
"Optimized kernel for your target device is not added yet. Contact SHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u] or pull up an issue."
)
return None
def get_iree_vulkan_args():
#vulkan_flag = ["--iree-flow-demote-i64-to-i32"]
vulkan_flag = []
vulkan_triple_flag = get_vulkan_triple_flag()
if vulkan_triple_flag is not None:
vulkan_flag.append(vulkan_triple_flag)
return vulkan_flag
def get_iree_device_args(device):
if device == "cpu":
return get_iree_cpu_args()
if device in ["gpu", "cuda"]:
return get_iree_gpu_args()
if device in ["metal", "vulkan"]:
return get_iree_vulkan_args()
return []
def get_iree_frontend_args(frontend):
if frontend in ["torch", "pytorch", "linalg"]:
return ["--iree-llvm-target-cpu-features=host"]
elif frontend in ["tensorflow", "tf", "mhlo"]:
return [
"--iree-llvm-target-cpu-features=host",
"--iree-mhlo-demote-i64-to-i32=false",
"--iree-flow-demote-i64-to-i32"
]
else:
# Frontend not found.
return []
def compile_module_to_flatbuffer(module, device, frontend, func_name,
model_config_path):
# Setup Compile arguments wrt to frontends.
input_type = ""
args = get_iree_frontend_args(frontend)
args += get_iree_device_args(device)
if frontend in ["tensorflow", "tf"]:
input_type = "mhlo"
elif frontend in ["mhlo", "tosa"]:
input_type = frontend
elif frontend in ["tflite"]:
input_type = "tosa"
# Annotate the input module with the configs
if model_config_path != None:
# Currently tuned model only works on tf frontend
if frontend in ["tensorflow", "tf"]:
input_module = module.decode('utf-8')
elif frontend in ["pytorch", "torch"]:
input_module = module.operation.get_asm()
with create_context() as ctx:
module = model_annotation(ctx,
input_contents=input_module,
config_path=model_config_path)
module = str(module)
# Compile according to the input type, else just try compiling.
if input_type not in ["mhlo", "tosa"]:
module = str(module)
if input_type != "":
# Currently for MHLO/TOSA.
flatbuffer_blob = ireec.compile_str(
module,
target_backends=[IREE_TARGET_MAP[device]],
extra_args=args,
input_type=input_type)
else:
# Currently for Torch.
flatbuffer_blob = ireec.compile_str(
str(module),
target_backends=[IREE_TARGET_MAP[device]],
extra_args=args)
return flatbuffer_blob
def get_iree_module(flatbuffer_blob, device, func_name):
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
config = ireert.Config(IREE_DEVICE_MAP[device])
ctx = ireert.SystemContext(config=config)
ctx.add_vm_module(vm_module)
ModuleCompiled = ctx.modules.module[func_name]
return ModuleCompiled, config
def get_iree_compiled_module(module,
device: str,
frontend: str = "torch",
func_name: str = "forward",
model_config_path: str = None):
"""Given a module returns the compiled .vmfb and configs"""
flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend,
func_name, model_config_path)
return get_iree_module(flatbuffer_blob, device, func_name)
def export_iree_module_to_vmfb(module,
device: str,
directory: str,
frontend: str = "torch",
func_name: str = "forward",
model_config_path: str = None):
flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend,
func_name, model_config_path)
module_name = f"{frontend}_{func_name}_{device}"
filename = os.path.join(directory, module_name + ".vmfb")
print(f"Saved vmfb in {filename}.")
with open(filename, 'wb') as f:
f.write(flatbuffer_blob)
return filename
def export_module_to_mlir_file(module, frontend, directory: str):
mlir_str = module
if frontend in ["tensorflow", "tf", "mhlo"]:
mlir_str = module.decode('utf-8')
elif frontend in ["pytorch", "torch"]:
mlir_str = module.operation.get_asm()
filename = os.path.join(directory, "model.mlir")
with open(filename, 'w') as f:
f.write(mlir_str)
print(f"Saved mlir in {filename}.")
return filename
def get_results(compiled_vm, input, config, frontend="torch"):
"""Runs a .vmfb file given inputs and config and returns output."""
device_inputs = input
if frontend in ["torch", "pytorch"]:
device_inputs = [ireert.asdevicearray(config.device, a) for a in input]
if frontend in ["tensorflow", "tf", "tflite"]:
device_inputs = []
for a in input:
if (isinstance(a, list)):
device_inputs.append([
ireert.asdevicearray(config.device, val, dtype=np.int32)
for val in a
])
else:
device_inputs.append(ireert.asdevicearray(config.device, a))
result = compiled_vm(*device_inputs)
result_tensors = []
if (isinstance(result, tuple)):
for val in result:
result_tensors.append(np.copy(np.asarray(val, val.dtype)))
return result_tensors
elif (isinstance(result, dict)):
data = list(result.items())
res = np.array(data, dtype=object)
return np.copy(res)
else:
return np.copy(np.asarray(result, dtype=result.dtype))
######### Benchmark Related Tools ###########
def tensor_to_type_str(input_tensors: tuple, frontend: str):
"""
Input: A tuple of input tensors i.e tuple(torch.tensor)
Output: list of string that represent mlir types (i.e 1x24xf64)
# TODO: Support more than floats, and ints
"""
list_of_type = []
for input_tensor in input_tensors:
type_string = "x".join([str(dim) for dim in input_tensor.shape])
if frontend in ["torch", "pytorch"]:
dtype_string = str(input_tensor.dtype).replace("torch.", "")
elif frontend in ["tensorflow", "tf", "mhlo"]:
dtype = input_tensor.dtype
dtype_string = re.findall('\'[^"]*\'',
str(dtype))[0].replace("\'", "")
regex_split = re.compile("([a-zA-Z]+)([0-9]+)")
match = regex_split.match(dtype_string)
mlir_type_string = str(match.group(1)[0]) + str(match.group(2))
type_string += f"x{mlir_type_string}"
list_of_type.append(type_string)
return list_of_type
def build_benchmark_args(input_file: str,
device: str,
input_tensors: tuple,
frontend: str,
training=False):
"""
Inputs: input_file leading to vmfb, input_tensor to function, target device, and whether it is training or not.
Outputs: string that execute benchmark-module on target model.
"""
path = benchmark_module.__path__[0]
benchmarker_path = os.path.join(path, "..", "..", "iree-benchmark-module")
benchmark_cl = [benchmarker_path, f"--module_file={input_file}"]
fn_name = "forward"
if training == True:
# TODO: Replace name of train with actual train fn name.
fn_name = "train"
benchmark_cl.append(f"--entry_function={fn_name}")
benchmark_cl.append(f"--device={IREE_DEVICE_MAP[device]}")
mlir_input_types = tensor_to_type_str(input_tensors, frontend)
for mlir_input in mlir_input_types:
benchmark_cl.append(f"--function_input={mlir_input}")
time_extractor = "| awk \'END{{print $2 $3}}\'"
benchmark_cl.append(time_extractor)
return benchmark_cl
def run_cmd(cmd):
"""
Inputs: cli command string.
"""
try:
result = subprocess.run(cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=True)
result_str = result.stdout.decode()
return result_str
except Exception:
sys.exit("Exiting program due to error running:", cmd)
def run_benchmark_module(benchmark_cl):
"""
Run benchmark command, extract result and return iteration/seconds.
Input: benchmark command.
"""
benchmark_path = benchmark_cl[0]
assert os.path.exists(
benchmark_path
), "Cannot find benchmark_module, Please contact SHARK maintainer on discord."
bench_result = run_cmd(' '.join(benchmark_cl))
regex_split = re.compile("([0-9]+[.]*[0-9]*)([a-zA-Z]+)")
match = regex_split.match(bench_result)
time = float(match.group(1))
unit = match.group(2)
return 1.0 / (time * UNIT_TO_SECOND_MAP[unit])

143
shark/model_annotation.py Normal file
View File

@@ -0,0 +1,143 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import json
import os
from typing import List, Dict
from iree.compiler import ir
from iree.compiler.transforms import ireec as ireec_trans
MATMUL_OP_NAMES = set(
["linalg.matmul", "linalg.batch_matmul", "mhlo.dot", "mhlo.dot_general"])
idx = 0
def model_annotation(ctx: ir.Context, *, input_contents: str, config_path: str):
if os.path.isfile(input_contents):
with open(input_contents, "rb") as f:
input_contents = f.read()
module = ir.Module.parse(input_contents)
with open(config_path, "r") as f:
data = json.load(f)
configs = data["options"]
# The Python API does not expose a general walk() function, so we just
# do it ourselves.
walk_children(module.operation, configs)
if not module.operation.verify():
raise RuntimeError("Modified program does not verify!")
# More efficient than: print(module)
# - Disables verification (already done above)
# - Writes as binary, avoiding costly unicode conversions
sys.stdout.buffer.write(
module.operation.get_asm(assume_verified=True, binary=True))
return module
def walk_children(op: ir.Operation, configs: List[Dict]):
for region in op.regions:
for block in region.blocks:
for child_op in block.operations:
# TODO: This is dumb. Both Operation and OpView should expose
# 'operation' and 'name' attributes.
if isinstance(child_op, ir.OpView):
child_op = child_op.operation
if child_op.name in MATMUL_OP_NAMES:
global idx
tile_sizes, pipeline, workgroup_size, \
split_k, pipeline_depth = parse_config(configs[idx])
add_compilation_info(child_op,
tile_sizes=tile_sizes,
pipeline=pipeline,
workgroup_size=workgroup_size,
pipeline_depth=pipeline_depth)
if split_k:
add_split_k(child_op, split_k)
idx = idx + 1
print(f"Updated op {child_op}", file=sys.stderr)
walk_children(child_op, configs)
def parse_config(config: Dict):
if config["pipeline"] == "GPU" or config["pipeline"] == "GPU_TENSORCORE":
pipeline = "LLVMGPUMatmulSimt" if config[
"pipeline"] == "GPU" else "LLVMGPUMatmulTensorCore"
tile_sizes = [config["work_group_tile_sizes"]]
workgroup_size = config["work_group_sizes"]
try:
pipeline_depth = config["pipeline_depth"]
except:
pipeline_depth = None
try:
split_k = config["split_k"]
except:
split_k = None
else:
pipeline = config["pipeline"]
tile_sizes = [
config["work_group_tile_sizes"], config["l1_tile_sizes"],
config["vector_tile_sizes"]
]
workgroup_size = []
split_k = None
pipeline_depth = None
return tile_sizes, pipeline, workgroup_size, split_k, pipeline_depth
def add_compilation_info(op: ir.Operation, tile_sizes: List[List[int]],
pipeline: str, workgroup_size: List[int],
pipeline_depth: int):
# We don't have a Python binding for CompilationInfo, so we just parse
# its string form.
if pipeline_depth:
attr = ir.Attribute.parse(
f"#iree_codegen.compilation_info<"
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
f"translation_info = <{pipeline} pipeline_depth = {pipeline_depth}>, "
f"workgroup_size = {repr(workgroup_size)}>")
else:
attr = ir.Attribute.parse(
f"#iree_codegen.compilation_info<"
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
f"translation_info = <{pipeline}>, "
f"workgroup_size = {repr(workgroup_size)}>")
op.attributes["compilation_info"] = attr
def add_split_k(op: ir.Operation, k: int):
attr = ir.IntegerAttr.get(ir.IntegerType.get_signless(64), k)
op.attributes["iree_flow_split_k"] = attr
def create_context() -> ir.Context:
context = ir.Context()
ireec_trans.register_all_dialects(context)
context.allow_unregistered_dialects = True
return context
if __name__ == "__main__":
with create_context() as ctx:
model_annotation(ctx,
input_contents=sys.argv[1],
config_path=sys.argv[2])

71
shark/parser.py Normal file
View File

@@ -0,0 +1,71 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
def dir_path(path):
if os.path.isdir(path):
return path
else:
raise argparse.ArgumentTypeError(
f"readable_dir:{path} is not a valid path")
def dir_file(path):
if os.path.isfile(path):
return path
else:
raise argparse.ArgumentTypeError(
f"readable_file:{path} is not a valid file")
parser = argparse.ArgumentParser(description='SHARK runner.')
parser.add_argument(
"--device",
type=str,
default="cpu",
help="Device on which shark_runner runs. options are cpu, gpu, and vulkan")
parser.add_argument(
"--repro_dir",
help=
"Directory to which module files will be saved for reproduction or debugging.",
type=dir_path,
default="/tmp/")
parser.add_argument("--save_mlir",
default=False,
action="store_true",
help="Saves input MLIR module to /tmp/ directory.")
parser.add_argument("--save_vmfb",
default=False,
action="store_true",
help="Saves iree .vmfb module to /tmp/ directory.")
parser.add_argument(
"--model_config_path",
help="Directory to where the tuned model config file is located.",
default=None)
parser.add_argument(
"--num_warmup_iterations",
type=int,
default=2,
help="Run the model for the specified number of warmup iterations.")
parser.add_argument(
"--num_iterations",
type=int,
default=1,
help="Run the model for the specified number of iterations.")
shark_args, unknown = parser.parse_known_args()

136
shark/shark_importer.py Normal file
View File

@@ -0,0 +1,136 @@
# Lint as: python3
"""SHARK Importer"""
import iree.compiler.tflite as iree_tflite_compile
import iree.runtime as iree_rt
import numpy as np
import os
import sys
import tensorflow.compat.v2 as tf
import urllib.request
from shark.shark_inference import SharkInference
class SharkImporter:
def __init__(self,
model_path,
model_type: str = "tflite",
model_source_hub: str = "tfhub",
device: str = None,
dynamic: bool = False,
jit_trace: bool = False,
benchmark_mode: bool = False):
self.model_path = model_path
self.model_type = model_type
self.model_source_hub = model_source_hub
self.device = device
self.dynamic = dynamic
self.jit_trace = jit_trace
self.benchmark_mode = benchmark_mode
self.inputs = None
self.input_details = None
self.output_details = None
# create tmp model file directory
if self.model_path is None:
print("Error. No model_path, Please input model path.")
return
if self.model_source_hub == "tfhub":
# compile and run tfhub tflite
if self.model_type == "tflite":
print("Setting up for TMP_DIR")
exe_basename = os.path.basename(sys.argv[0])
self.workdir = os.path.join(os.path.dirname(__file__), "tmp",
exe_basename)
print(f"TMP_DIR = {self.workdir}")
os.makedirs(self.workdir, exist_ok=True)
self.tflite_file = '/'.join([self.workdir, 'model.tflite'])
print("Setting up local address for tflite model file: ",
self.tflite_file)
if os.path.exists(self.model_path):
self.tflite_file = self.model_path
else:
print("Download tflite model")
urllib.request.urlretrieve(self.model_path,
self.tflite_file)
print("Setting up tflite interpreter")
self.tflite_interpreter = tf.lite.Interpreter(
model_path=self.tflite_file)
self.tflite_interpreter.allocate_tensors()
# default input initialization
self.input_details, self.output_details = self.get_model_details(
)
inputs = self.generate_inputs(
self.input_details) # device_inputs
self.setup_inputs(inputs)
def generate_inputs(self, input_details):
args = []
for input in input_details:
print(str(input["shape"]), input["dtype"].__name__)
args.append(np.zeros(shape=input["shape"], dtype=input["dtype"]))
return args
def get_model_details(self):
if self.model_type == "tflite":
print("Get tflite input output details")
self.input_details = self.tflite_interpreter.get_input_details()
self.output_details = self.tflite_interpreter.get_output_details()
return self.input_details, self.output_details
def setup_inputs(self, inputs):
print("Setting up inputs")
self.inputs = inputs
def compile(self, inputs=None):
if inputs is not None:
self.setup_inputs(inputs)
# preprocess model_path to get model_type and Model Source Hub
print("Shark Importer Intialize SharkInference and Do Compile")
if self.model_source_hub == "tfhub":
# compile and run tfhub tflite
print("Inference tfhub model")
self.shark_module = SharkInference(self.tflite_file,
self.inputs,
device=self.device,
dynamic=self.dynamic,
jit_trace=self.jit_trace)
self.shark_module.set_frontend("tflite")
self.shark_module.compile()
elif self.model_source_hub == "huggingface":
print("Inference", self.model_source_hub, " not implemented yet")
elif self.model_source_hub == "jaxhub":
print("Inference", self.model_source_hub, " not implemented yet")
def forward(self, inputs=None):
if inputs is not None:
self.setup_inputs(inputs)
# preprocess model_path to get model_type and Model Source Hub
print("Shark Importer forward Model")
if self.model_source_hub == "tfhub":
shark_results = self.shark_module.forward(self.inputs)
# Fix type information for unsigned cases.
# for test compare result
shark_results = list(shark_results)
for i in range(len(self.output_details)):
dtype = self.output_details[i]["dtype"]
shark_results[i] = shark_results[i].astype(dtype)
return shark_results
elif self.model_source_hub == "huggingface":
print("Inference", self.model_source_hub, " not implemented yet")
elif self.model_source_hub == "jaxhub":
print("Inference", self.model_source_hub, " not implemented yet")
def shark_load(model_name, file_path):
file_link = f"https://storage.googleapis.com/shark_tank/users/stanley/{model_name}.mlir"
response = urllib.request.urlretrieve(file_link, file_path)
if not os.path.isfile(file_path):
raise ValueError(
f"Tried looking for target mlir in {file_path}, but cannot be found."
)
with open(file_path, "rb") as input_file:
model_mlir = input_file.read()
return model_mlir

115
shark/shark_inference.py Normal file
View File

@@ -0,0 +1,115 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from shark.torch_mlir_utils import get_torch_mlir_module, run_on_refbackend
import os
from shark.parser import shark_args
from shark.shark_runner import SharkRunner, SharkBenchmarkRunner
import time
import sys
# Prints to stderr.
def print_err(*a):
print(*a, file=sys.stderr)
class SharkInference:
"""Inference API targeting pytorch, tensorflow, linalg, mhlo and tosa frontend."""
def __init__(self,
model,
input: tuple,
device: str = None,
dynamic: bool = False,
jit_trace: bool = False,
benchmark_mode: bool = False):
self.model = model
self.input = input
self.dynamic = dynamic
self.jit_trace = jit_trace
self.benchmark_mode = benchmark_mode
# By default it's torch frontend.
self.frontend = "pytorch"
# Sets the device.
self.device = device if device is not None else shark_args.device
self.model_config_path = shark_args.model_config_path
self.shark_runner = None
# Sets the frontend i.e `pytorch` or `tensorflow`.
def set_frontend(self, frontend: str):
if frontend not in [
"pytorch", "torch", "tensorflow", "tf", "mhlo", "linalg",
"tosa", "tflite"
]:
print_err("frontend not supported.")
else:
self.frontend = frontend
def compile(self):
# Inference do not use AOT.
from_aot = False
if (self.benchmark_mode == True):
self.shark_runner = SharkBenchmarkRunner(self.model, self.input,
self.dynamic, self.device,
self.jit_trace, from_aot,
self.frontend)
else:
self.shark_runner = SharkRunner(self.model, self.input,
self.dynamic, self.device,
self.jit_trace, from_aot,
self.frontend,
self.model_config_path)
# inputs are considered to be np.array.
def forward(self, inputs):
input_list = inputs
# converts the inputs to numpy.
if self.frontend in ["pytorch", "torch"]:
input_list = [x.detach().numpy() for x in inputs]
elif self.frontend in ["tensorflow", "tf"]:
input_list = [x.numpy() for x in inputs]
return self.shark_runner.forward(input_list, self.frontend)
# Saves the .vmfb module.
def save_module(self, dir=None):
if dir is None:
return self.shark_runner.save_module()
return self.shark_runner.save_module(dir)
######### Benchmark Related Functions #########
def benchmark_mode(func):
def inner(self, *args, **kwargs):
assert self.benchmark_mode, "SharkRunner needs to be in benchmark mode to run benchmark methods."
return func(self, *args, **kwargs)
return inner
@benchmark_mode
def benchmark_all(self, inputs):
self.shark_runner.benchmark_all(inputs)
@benchmark_mode
def benchmark_frontend(self, inputs):
self.shark_runner.benchmark_frontend(inputs)
@benchmark_mode
def benchmark_python(self, inputs):
self.shark_runner.benchmark_python(inputs)
@benchmark_mode
def benchmark_c(self):
self.shark_runner.benchmark_c()

205
shark/shark_runner.py Normal file
View File

@@ -0,0 +1,205 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from iree.compiler import tf as tfc
import iree.compiler.tflite as ireec_tflite
from torch.utils._python_dispatch import enable_torch_dispatch_mode
from torch_mlir.eager_mode import torch_mlir_tensor
from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
from torch_mlir_e2e_test.eager_backends.refbackend import EagerModeRefBackend
from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
from shark.torch_mlir_utils import get_torch_mlir_module, run_on_refbackend
from shark.iree_utils import get_results, get_iree_compiled_module, export_iree_module_to_vmfb, export_module_to_mlir_file, build_benchmark_args, run_benchmark_module
import os
from shark.parser import shark_args
from tqdm import tqdm
import time
class SharkRunner:
"""Base class for Shark Inference and Shark Runner."""
def __init__(
self,
model,
input: tuple,
dynamic: bool = False,
device: str = None,
jit_trace: bool = False,
from_aot: bool = False,
frontend: str = "torch",
model_config_path: str = None,
):
self.model = model
self.frontend_model = model
self.from_aot = from_aot
self.input = input
self.frontend = frontend
self.vmfb_file = None
func_name = "forward"
self.device = device if device is not None else shark_args.device
if self.frontend in ["pytorch", "torch"]:
# get torch-mlir dialect
# self.model = torch.Module
# TODO assert
self.model = get_torch_mlir_module(self.model, input, dynamic,
jit_trace, from_aot)
elif self.frontend in ["tensorflow", "tf"]:
# get mhlo dialect
# self.model = tf.Module
# TODO assert
self.model = tfc.compile_module(self.model,
exported_names=[func_name],
import_only=True)
elif self.frontend in ["tflite"]:
print("Setting up for IREE compiler tflite")
# get tosa dialect
# self.model = model.tflite
# TODO assert
self.model = ireec_tflite.compile_file(self.model,
input_type="tosa",
import_only=True)
func_name = "main"
# TODO: We can capture the .vmfb module here and later use it for saving
# rather than recompiling it again, if used for saving.
(
self.iree_compilation_module,
self.iree_config,
) = get_iree_compiled_module(self.model,
self.device,
self.frontend,
func_name=func_name,
model_config_path=model_config_path)
# Debugging Options:
if shark_args.save_mlir:
export_module_to_mlir_file(self.model, self.frontend,
shark_args.repro_dir)
if shark_args.save_vmfb:
self.vmfb_file = self.save_module(shark_args.repro_dir)
# All the timings and benchmarking can be done here.
def forward(self, input, frontend):
return get_results(self.iree_compilation_module, input,
self.iree_config, frontend)
# TODO: Instead of passing directory and having names decided by the module
# , user may want to save the module with manual names.
def save_module(self, dir=os.getcwd()):
return export_iree_module_to_vmfb(self.model, self.device, dir,
self.frontend)
# TODO: Load a module and directly use it, we will need to set the frontend
# in this case.
def load_module(self, name):
pass
class SharkEagerMode:
def __init__(self, device="cpu"):
if device == "refbackend":
torch_mlir_tensor.backend = EagerModeRefBackend()
else:
torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(
device)
self.guard = enable_torch_dispatch_mode(TorchMLIRTensor)
self.guard.__enter__()
def __del__(self):
self.guard.__exit__(None, None, None)
class SharkBenchmarkRunner(SharkRunner):
# SharkRunner derived class with Benchmarking capabilities.
def __init__(
self,
model,
input: tuple,
dynamic: bool = False,
device: str = None,
jit_trace: bool = False,
from_aot: bool = False,
frontend: str = "torch",
):
SharkRunner.__init__(self, model, input, dynamic, device, jit_trace,
from_aot, frontend)
if (self.vmfb_file == None):
self.vmfb_file = export_iree_module_to_vmfb(self.model, device,
shark_args.repro_dir,
frontend)
self.benchmark_cl = build_benchmark_args(self.vmfb_file, device, input,
frontend, from_aot)
def benchmark_frontend(self, inputs):
if self.frontend in ["pytorch", "torch"]:
self.benchmark_torch(inputs)
elif self.frontend in ["tensorflow", "tf"]:
self.benchmark_tf(inputs)
def benchmark_torch(self, inputs):
inputs = self.input if self.from_aot else inputs
inputs = inputs[0]
for i in range(shark_args.num_warmup_iterations):
self.frontend_model.forward(inputs)
begin = time.time()
for i in range(shark_args.num_iterations):
out = self.frontend_model.forward(inputs)
if i == shark_args.num_iterations - 1:
end = time.time()
break
print(
f"Torch benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
)
def benchmark_tf(self, inputs):
for i in range(shark_args.num_warmup_iterations):
self.frontend_model.forward(*inputs)
begin = time.time()
for i in range(shark_args.num_iterations):
out = self.frontend_model.forward(*inputs)
if i == shark_args.num_iterations - 1:
end = time.time()
break
print(
f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
)
return
def benchmark_c(self):
result = run_benchmark_module(self.benchmark_cl)
print(f"Shark-{self.frontend} C-benchmark:{result} iter/second")
def benchmark_python(self, inputs):
inputs = self.input if self.from_aot else inputs
input_list = [x for x in inputs]
for i in range(shark_args.num_warmup_iterations):
self.forward(input_list, self.frontend)
begin = time.time()
for i in range(shark_args.num_iterations):
out = self.forward(input_list, self.frontend)
if i == shark_args.num_iterations - 1:
end = time.time()
print(
f"Shark-{self.frontend} Python-benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
)
def benchmark_all(self, inputs):
self.benchmark_frontend(inputs)
self.benchmark_python(inputs)
self.benchmark_c()

139
shark/shark_trainer.py Normal file
View File

@@ -0,0 +1,139 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from shark.torch_mlir_utils import get_torch_mlir_module, run_on_refbackend
from shark.iree_utils import get_results, get_iree_compiled_module, export_iree_module_to_vmfb
import os
from shark.parser import shark_args
from shark.shark_runner import SharkRunner
from shark.backward_makefx import MakeFxModule
import numpy as np
from tqdm import tqdm
import time
import sys
# Prints to stderr.
def print_err(*a):
print(*a, file=sys.stderr)
class SharkTrainer:
"""Training pytorch, tensorflow module on shark runtime."""
def __init__(
self,
model,
input: tuple,
dynamic: bool = False,
device: str = None,
jit_trace: bool = False,
from_aot: bool = True,
):
self.model = model
# Change tuple to list.
self.input = [x for x in input]
self.dynamic = dynamic
self.from_aot = from_aot
self.jit_trace = jit_trace
self.from_aot = from_aot
# By default it's the torch frontend.
self.frontend = "pytorch"
self.device = device if device is not None else shark_args.device
self.shark_runner = None
# Sets the frontend i.e `pytorch` or `tensorflow`.
def set_frontend(self, frontend: str):
if frontend not in [
"pytorch", "torch", "tensorflow", "tf", "mhlo", "linalg", "tosa"
]:
print_err("frontend not supported.")
else:
self.frontend = frontend
# Training function is needed in the case of torch_fn.
def compile(self, training_fn=None):
if self.frontend in ["torch", "pytorch"]:
aot_module = MakeFxModule(self.model,
tuple(self.input),
custom_inference_fn=training_fn)
aot_module.generate_graph()
# Returns the backward graph.
training_graph = aot_module.training_graph
weights = self.get_torch_params()
self.shark_runner = SharkRunner(training_graph,
weights + self.input, self.dynamic,
self.device, self.jit_trace,
self.from_aot, self.frontend)
elif self.frontend in ["tensorflow", "tf", "mhlo"]:
self.shark_runner = SharkRunner(self.model, self.input,
self.dynamic, self.device,
self.jit_trace, self.from_aot,
self.frontend)
else:
print_err("Unknown frontend")
return
# The inputs to the mlir-graph are weights, buffers and inputs respectively.
def get_torch_params(self):
params = [i.detach() for i in self.model.parameters()]
buffers = [i.detach() for i in self.model.buffers()]
return params + buffers
# Function to train pytorch module.
def _train_torch(self, num_iters):
"""Returns the updated weights after num_iters"""
params = self.get_torch_params()
params = [x.numpy() for x in params]
print(f"Training started for {num_iters} iterations:")
for i in tqdm(range(num_iters)):
params = self.shark_runner.forward(params + self.input,
self.frontend)
return params
# Function to train tensorflow module.
# Output final loss.
# TODO(raikonenfnu): Save updated weight/states in SHARK.
def _train_tf(self, num_iters):
input_list = []
for x in self.input:
if (isinstance(x, list)):
nested_list = []
for val in x:
if (isinstance(val, np.ndarray)):
nested_list.append(val)
else:
nested_list.append(val.numpy())
input_list.append(nested_list)
elif (isinstance(x, np.ndarray)):
input_list.append(x)
else:
input_list.append(x.numpy())
print(f"Training started for {num_iters} iterations:")
for i in tqdm(range(num_iters)):
outputs = self.shark_runner.forward(input_list, self.frontend)
return outputs
def train(self, num_iters=1):
if self.frontend in ["torch", "pytorch"]:
return self._train_torch(num_iters)
elif self.frontend in ["tf", "tensorflow", "mhlo"]:
return self._train_tf(num_iters)
else:
print_err("Unknown frontend")
return

View File

@@ -0,0 +1,52 @@
# RUN: %PYTHON %s
import numpy as np
from shark.shark_importer import SharkImporter
import pytest
model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
# Inputs modified to be useful albert inputs.
def generate_inputs(input_details):
for input in input_details:
print("\t%s, %s", str(input["shape"]), input["dtype"].__name__)
args = []
args.append(
np.random.randint(low=0,
high=256,
size=input_details[0]["shape"],
dtype=input_details[0]["dtype"]))
args.append(
np.ones(shape=input_details[1]["shape"],
dtype=input_details[1]["dtype"]))
args.append(
np.zeros(shape=input_details[2]["shape"],
dtype=input_details[2]["dtype"]))
return args
# A specific case can be run by commenting different cases. Runs all the test
# across cpu, gpu and vulkan according to available drivers.
pytest_param = pytest.mark.parametrize(
('dynamic', 'device'),
[
pytest.param(False, 'cpu'),
# TODO: Language models are failing for dynamic case..
pytest.param(True, 'cpu', marks=pytest.mark.skip),
])
@pytest_param
def test_albert(dynamic, device):
my_shark_importer = SharkImporter(model_path=model_path,
model_type="tflite",
model_source_hub="tfhub",
device=device,
dynamic=dynamic,
jit_trace=True)
input_details, output_details = my_shark_importer.get_model_details()
inputs = generate_inputs(input_details) # device_inputs
my_shark_importer.compile(inputs)
shark_results = my_shark_importer.forward(inputs)
# print(shark_results)

133
shark/torch_mlir_utils.py Normal file
View File

@@ -0,0 +1,133 @@
# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch
import io
import pickle
import sys
import os
from io import StringIO
from torch_mlir.dialects.torch.importer.jit_ir import (
ClassAnnotator,
ModuleBuilder,
)
from torch_mlir_e2e_test.torchscript.serialization import (
extract_serializable_annotations, apply_serializable_annotations,
SerializableTest)
from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend
from torch_mlir.passmanager import PassManager
from torch_mlir_e2e_test.torchscript.annotations import annotate_args, export
from torch_mlir.ir import StringAttr
def get_module_name_for_asm_dump(module):
"""Gets a name suitable for an assembly dump.
The name is not guaranteed to be unique.
"""
if not "torch.debug_module_name" in module.operation.attributes:
return "UnnammedModule"
return StringAttr(
module.operation.attributes["torch.debug_module_name"]).value
def get_input_annotations(inputs: tuple, dynamic: bool) -> list:
"""TODO: Include necessary documentation"""
annotations_list = [None]
for i in inputs:
temp_list = []
if dynamic:
temp_list.append([-1 for i in range(len(i.shape))])
else:
temp_list.append(list(i.shape))
temp_list.append(i.dtype)
temp_list.append(True)
annotations_list.append(tuple(temp_list))
return annotations_list
def run_on_refbackend(torch_module, inputs):
backend = refbackend.RefBackendLinalgOnTensorsBackend()
compiled = backend.compile(torch_module)
jit_module = backend.load(compiled)
np_inputs = [x.numpy() for x in inputs]
return jit_module.forward(np_inputs[0])
def shark_jit_trace(module, input: tuple, dynamic: bool,
tracing_required: bool):
"""TODO: Include necessary documentation."""
if not tracing_required:
return torch.jit.script(module)
traced_module = torch.jit.trace_module(module, {"forward": input})
actual_script = traced_module._actual_script_module
export(actual_script.forward)
annotate_args_decorator = annotate_args(
get_input_annotations(input, dynamic))
annotate_args_decorator(actual_script.forward)
module = torch.jit.script(actual_script)
# TODO: remove saved annotations.pickle
torchscript_module_bytes = module.save_to_buffer({
"annotations.pkl":
pickle.dumps(extract_serializable_annotations(module))
})
serializable_test = SerializableTest(unique_name="",
program=torchscript_module_bytes,
trace=None)
_extra_files = {"annotations.pkl": ""}
module = torch.jit.load(io.BytesIO(serializable_test.program),
_extra_files=_extra_files)
# Load the pickled annotations.
annotations = pickle.loads(_extra_files["annotations.pkl"])
apply_serializable_annotations(module, annotations)
return module
def get_torch_mlir_module(
module,
input: tuple,
dynamic: bool,
tracing_required: bool,
from_aot: bool = False,
):
"""TODO: Include necessary documentation."""
# Tracing is not required from the aot_module.
if not from_aot:
module = shark_jit_trace(module, input, dynamic, tracing_required)
mb = ModuleBuilder()
class_annotator = ClassAnnotator()
class_annotator.exportNone(module._c._type())
class_annotator.exportPath(module._c._type(), ["forward"])
class_annotator.annotateArgs(
module._c._type(),
["forward"],
get_input_annotations(input, dynamic),
)
mb.import_module(module._c, class_annotator)
with mb.module.context:
pm = PassManager.parse(
"torchscript-module-to-torch-backend-pipeline,torch-backend-to-linalg-on-tensors-backend-pipeline"
)
pm.run(mb.module)
return mb.module

0
tank/__init__.py Normal file
View File

74
tank/model_utils.py Normal file
View File

@@ -0,0 +1,74 @@
from shark.shark_inference import SharkInference
from shark.iree_utils import check_device_drivers
import torch
import numpy as np
import torchvision.models as models
from transformers import AutoModelForSequenceClassification, BertTokenizer, TFBertModel
import importlib
torch.manual_seed(0)
##################### Hugging Face LM Models ###################################
class HuggingFaceLanguage(torch.nn.Module):
def __init__(self, hf_model_name):
super().__init__()
self.model = AutoModelForSequenceClassification.from_pretrained(
hf_model_name, # The pretrained model.
num_labels=
2, # The number of output labels--2 for binary classification.
output_attentions=
False, # Whether the model returns attentions weights.
output_hidden_states=
False, # Whether the model returns all hidden-states.
torchscript=True,
)
def forward(self, tokens):
return self.model.forward(tokens)[0]
def get_hf_model(name):
model = HuggingFaceLanguage(name)
# TODO: Currently the test input is set to (1,128)
test_input = torch.randint(2, (1, 128))
actual_out = model(test_input)
return model, test_input, actual_out
################################################################################
##################### Torch Vision Models ###################################
class VisionModule(torch.nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
self.train(False)
def forward(self, input):
return self.model.forward(input)
def get_vision_model(torch_model):
model = VisionModule(torch_model)
# TODO: Currently the test input is set to (1,128)
test_input = torch.randn(1, 3, 224, 224)
actual_out = model(test_input)
return model, test_input, actual_out
################################################################################
# Utility function for comparing two tensors (torch).
def compare_tensors(torch_tensor, numpy_tensor):
# setting the absolute and relative tolerance
rtol = 1e-02
atol = 1e-03
torch_to_numpy = torch_tensor.detach().numpy()
return np.allclose(torch_to_numpy, numpy_tensor, rtol, atol)

63
tank/model_utils_tf.py Normal file
View File

@@ -0,0 +1,63 @@
from shark.shark_inference import SharkInference
from shark.iree_utils import check_device_drivers
import tensorflow as tf
import numpy as np
from transformers import AutoModelForSequenceClassification, BertTokenizer, TFBertModel
import importlib
##################### Tensorflow Hugging Face LM Models ###################################
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
tf_bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
]
class TFHuggingFaceLanguage(tf.Module):
def __init__(self, hf_model_name):
super(TFHuggingFaceLanguage, self).__init__()
# Create a BERT trainer with the created network.
self.m = TFBertModel.from_pretrained(
hf_model_name, from_pt=True)
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m.predict = lambda x, y, z: self.m.call(
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
@tf.function(input_signature=tf_bert_input)
def forward(self, input_ids, attention_mask, token_type_ids):
return self.m.predict(input_ids, attention_mask, token_type_ids)
def get_TFhf_model(name):
model = TFHuggingFaceLanguage(name)
tokenizer = BertTokenizer.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased")
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text,
padding='max_length',
truncation=True,
max_length=MAX_SEQUENCE_LENGTH)
for key in encoded_input:
encoded_input[key] = tf.expand_dims(
tf.convert_to_tensor(encoded_input[key]), 0)
test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
encoded_input["token_type_ids"])
actual_out = model.forward(*test_input)
return model, test_input, actual_out
# Utility function for comparing two tensors (tensorflow).
def compare_tensors_tf(tf_tensor, numpy_tensor):
# setting the absolute and relative tolerance
rtol = 1e-02
atol = 1e-03
tf_to_numpy = tf_tensor.pooler_output.numpy()
return np.allclose(tf_to_numpy, numpy_tensor, rtol, atol)

View File

@@ -0,0 +1,92 @@
from shark.shark_inference import SharkInference
from shark.iree_utils import check_device_drivers
from tank.model_utils import get_hf_model, compare_tensors
from shark.parser import shark_args
import torch
import unittest
import numpy as np
import pytest
#torch.manual_seed(0)
class AlbertModuleTester:
def __init__(
self,
dynamic=False,
device="cpu",
save_mlir=False,
):
self.dynamic = dynamic
self.device = device
self.save_mlir = save_mlir
def create_and_check_module(self):
model, input, act_out = get_hf_model("albert-base-v2")
shark_args.save_mlir = self.save_mlir
shark_module = SharkInference(model, (input,),
device=self.device,
dynamic=self.dynamic,
jit_trace=True)
shark_module.compile()
results = shark_module.forward((input,))
assert True == compare_tensors(act_out, results)
class AlbertModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.save_mlir = pytestconfig.getoption("save_mlir")
def setUp(self):
self.module_tester = AlbertModuleTester(self)
self.module_tester.save_mlir = self.save_mlir
def test_module_static_cpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
@pytest.mark.xfail(reason="Language models currently failing for dynamic case")
def test_module_dynamic_cpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
@pytest.mark.xfail(reason="Albert model on GPU currently fails to produce torch numbers")
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_static_gpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.xfail(reason="Language models currently failing for dynamic case")
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_dynamic_gpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.xfail(reason="Static albert model on vulkan currently fails to validate.")
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_static_vulkan(self):
self.module_tester.dynamic = False
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
@pytest.mark.xfail(reason="Language models currently failing for dynamic case")
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_dynamic_vulkan(self):
self.module_tester.dynamic = True
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,90 @@
from shark.shark_inference import SharkInference
from shark.iree_utils import check_device_drivers
from tank.model_utils import get_vision_model, compare_tensors
from shark.parser import shark_args
import torch
import unittest
import numpy as np
import torchvision.models as models
import pytest
torch.manual_seed(0)
class AlexnetModuleTester:
def __init__(
self,
dynamic=False,
device="cpu",
save_mlir=False,
):
self.dynamic = dynamic
self.device = device
self.save_mlir = save_mlir
def create_and_check_module(self):
model, input, act_out = get_vision_model(models.alexnet(pretrained=True))
shark_args.save_mlir = self.save_mlir
shark_module = SharkInference(
model,
(input,),
device=self.device,
dynamic=self.dynamic,
)
shark_module.compile()
results = shark_module.forward((input,))
assert True == compare_tensors(act_out, results)
class AlexnetModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.save_mlir = pytestconfig.getoption("save_mlir")
def setUp(self):
self.module_tester = AlexnetModuleTester(self)
def test_module_static_cpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
def test_module_dynamic_cpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_static_gpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_dynamic_gpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_static_vulkan(self):
self.module_tester.dynamic = False
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_dynamic_vulkan(self):
self.module_tester.dynamic = True
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
if __name__ == '__main__':
unittest.main()

91
tank/pytorch/bert_test.py Normal file
View File

@@ -0,0 +1,91 @@
from shark.shark_inference import SharkInference
from shark.iree_utils import check_device_drivers
from tank.model_utils import get_hf_model, compare_tensors
from shark.parser import shark_args
import torch
import unittest
import numpy as np
import pytest
#torch.manual_seed(0)
class BertModuleTester:
def __init__(
self,
dynamic=False,
device="cpu",
save_mlir=False,
):
self.dynamic = dynamic
self.device = device
self.save_mlir = save_mlir
def create_and_check_module(self):
model, input, act_out = get_hf_model("bert-base-uncased")
shark_args.save_mlir = self.save_mlir
shark_module = SharkInference(model, (input,),
device=self.device,
dynamic=self.dynamic,
jit_trace=True)
shark_module.compile()
results = shark_module.forward((input,))
assert True == compare_tensors(act_out, results)
class BertModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.save_mlir = pytestconfig.getoption("save_mlir")
def setUp(self):
self.module_tester = BertModuleTester(self)
def test_module_static_cpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
@pytest.mark.xfail(reason="Language models currently failing for dynamic case")
def test_module_dynamic_cpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
@pytest.mark.xfail(reason="BERT model on GPU currently fails to produce torch numbers")
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_static_gpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.xfail(reason="Language models currently failing for dynamic case")
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_dynamic_gpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_static_vulkan(self):
self.module_tester.dynamic = False
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
@pytest.mark.xfail(reason="Language models currently failing for dynamic case")
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_dynamic_vulkan(self):
self.module_tester.dynamic = True
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
if __name__ == '__main__':
unittest.main()

3
tank/pytorch/conftest.py Normal file
View File

@@ -0,0 +1,3 @@
def pytest_addoption(parser):
# Attaches SHARK command-line arguments to the pytest machinery.
parser.addoption("--save_mlir", action="store_true", default="False", help="Pass option to save input MLIR module to /tmp/ directory.")

View File

@@ -0,0 +1,91 @@
from shark.shark_inference import SharkInference
from shark.iree_utils import check_device_drivers
from tank.model_utils import get_hf_model, compare_tensors
from shark.parser import shark_args
import torch
import unittest
import numpy as np
import pytest
torch.manual_seed(0)
class MiniLMModuleTester:
def __init__(
self,
dynamic=False,
device="cpu",
save_mlir=False,
):
self.dynamic = dynamic
self.device = device
self.save_mlir = save_mlir
def create_and_check_module(self):
model, input, act_out = get_hf_model("microsoft/MiniLM-L12-H384-uncased")
shark_args.save_mlir = self.save_mlir
shark_module = SharkInference(model, (input,),
device=self.device,
dynamic=self.dynamic,
jit_trace=True)
shark_module.compile()
results = shark_module.forward((input,))
assert True == compare_tensors(act_out, results)
class MiniLMModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.save_mlir = pytestconfig.getoption("save_mlir")
def setUp(self):
self.module_tester = MiniLMModuleTester(self)
def test_module_static_cpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
@pytest.mark.xfail(reason="language models failing for dynamic case")
def test_module_dynamic_cpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
@pytest.mark.xfail(reason="minilm inference on gpu currently returns invalid results")
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_static_gpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.xfail(reason="language models failing for dynamic case")
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_dynamic_gpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_static_vulkan(self):
self.module_tester.dynamic = False
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
@pytest.mark.xfail(reason="language models failing for dynamic case")
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_dynamic_vulkan(self):
self.module_tester.dynamic = True
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,89 @@
from shark.shark_inference import SharkInference
from shark.iree_utils import check_device_drivers
from tank.model_utils import get_vision_model, compare_tensors
from shark.parser import shark_args
import torch
import unittest
import numpy as np
import torchvision.models as models
import pytest
torch.manual_seed(0)
class Resnet101ModuleTester:
def __init__(
self,
dynamic=False,
device="cpu",
save_mlir=False,
):
self.dynamic = dynamic
self.device = device
self.save_mlir = save_mlir
def create_and_check_module(self):
model, input, act_out = get_vision_model(models.resnet101(pretrained=True))
shark_args.save_mlir = self.save_mlir
shark_module = SharkInference(
model,
(input,),
device=self.device,
dynamic=self.dynamic,
)
shark_module.compile()
results = shark_module.forward((input,))
assert True == compare_tensors(act_out, results)
class Resnet101ModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.save_mlir = pytestconfig.getoption("save_mlir")
def setUp(self):
self.module_tester = Resnet101ModuleTester(self)
def test_module_static_cpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
def test_module_dynamic_cpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_static_gpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_dynamic_gpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_static_vulkan(self):
self.module_tester.dynamic = False
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_dynamic_vulkan(self):
self.module_tester.dynamic = True
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,90 @@
from shark.shark_inference import SharkInference
from shark.iree_utils import check_device_drivers
from tank.model_utils import get_vision_model, compare_tensors
from shark.parser import shark_args
import torch
import unittest
import numpy as np
import torchvision.models as models
import pytest
torch.manual_seed(0)
class Resnet18ModuleTester:
def __init__(
self,
dynamic=False,
device="cpu",
save_mlir=False,
):
self.dynamic = dynamic
self.device = device
self.save_mlir = save_mlir
def create_and_check_module(self):
model, input, act_out = get_vision_model(models.resnet18(pretrained=True))
shark_args.save_mlir = self.save_mlir
shark_module = SharkInference(
model,
(input,),
device=self.device,
dynamic=self.dynamic,
)
shark_module.compile()
results = shark_module.forward((input,))
assert True == compare_tensors(act_out, results)
class Resnet18ModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.save_mlir = pytestconfig.getoption("save_mlir")
def setUp(self):
self.module_tester = Resnet18ModuleTester(self)
def test_module_static_cpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
def test_module_dynamic_cpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_static_gpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_dynamic_gpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_static_vulkan(self):
self.module_tester.dynamic = False
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_dynamic_vulkan(self):
self.module_tester.dynamic = True
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,90 @@
from shark.shark_inference import SharkInference
from shark.iree_utils import check_device_drivers
from tank.model_utils import get_vision_model, compare_tensors
from shark.parser import shark_args
import torch
import unittest
import numpy as np
import torchvision.models as models
import pytest
torch.manual_seed(0)
class Resnet50ModuleTester:
def __init__(
self,
dynamic=False,
device="cpu",
save_mlir=False,
):
self.dynamic = dynamic
self.device = device
self.save_mlir = save_mlir
def create_and_check_module(self):
model, input, act_out = get_vision_model(models.resnet50(pretrained=True))
shark_args.save_mlir = self.save_mlir
shark_module = SharkInference(
model,
(input,),
device=self.device,
dynamic=self.dynamic,
)
shark_module.compile()
results = shark_module.forward((input,))
assert True == compare_tensors(act_out, results)
class Resnet50ModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.save_mlir = pytestconfig.getoption("save_mlir")
def setUp(self):
self.module_tester = Resnet50ModuleTester(self)
def test_module_static_cpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
def test_module_dynamic_cpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_static_gpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_dynamic_gpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_static_vulkan(self):
self.module_tester.dynamic = False
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_dynamic_vulkan(self):
self.module_tester.dynamic = True
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,90 @@
from shark.shark_inference import SharkInference
from shark.iree_utils import check_device_drivers
from tank.model_utils import get_vision_model, compare_tensors
from shark.parser import shark_args
import torch
import unittest
import numpy as np
import torchvision.models as models
import pytest
torch.manual_seed(0)
class SqueezenetModuleTester:
def __init__(
self,
dynamic=False,
device="cpu",
save_mlir=False
):
self.dynamic = dynamic
self.device = device
self.save_mlir = save_mlir
def create_and_check_module(self):
model, input, act_out = get_vision_model(models.squeezenet1_0(pretrained=True))
shark_args.save_mlir = self.save_mlir
shark_module = SharkInference(
model,
(input,),
device=self.device,
dynamic=self.dynamic,
)
shark_module.compile()
results = shark_module.forward((input,))
assert True == compare_tensors(act_out, results)
class SqueezenetModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.save_mlir = pytestconfig.getoption("save_mlir")
def setUp(self):
self.module_tester = SqueezenetModuleTester(self)
def test_module_static_cpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
def test_module_dynamic_cpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_static_gpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_dynamic_gpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_static_vulkan(self):
self.module_tester.dynamic = False
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_dynamic_vulkan(self):
self.module_tester.dynamic = True
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,111 @@
import argparse
import os
from functools import partial
import clip
import torch
from torchvision import transforms
from tqdm import trange
try:
from diffusion import get_model, sampling, utils
except ModuleNotFoundError:
print(
"You need to download v-diffusion source from https://github.com/crowsonkb/v-diffusion-pytorch"
)
raise
torch.manual_seed(0)
def parse_prompt(prompt, default_weight=3.0):
if prompt.startswith("http://") or prompt.startswith("https://"):
vals = prompt.rsplit(":", 2)
vals = [vals[0] + ":" + vals[1], *vals[2:]]
else:
vals = prompt.rsplit(":", 1)
vals = vals + ["", default_weight][len(vals) :]
return vals[0], float(vals[1])
args = argparse.Namespace(
prompts=["New York City, oil on canvas"],
batch_size=1,
device="cuda",
model="cc12m_1_cfg",
n=1,
steps=10,
)
device = torch.device(args.device)
print("Using device:", device)
model = get_model(args.model)()
_, side_y, side_x = model.shape
checkpoint = f"{args.model}.pth"
if os.path.exists(checkpoint):
model.load_state_dict(torch.load(checkpoint, map_location="cpu"))
model = model.to(device).eval().requires_grad_(False)
clip_model_name = model.clip_model if hasattr(model, "clip_model") else "ViT-B/16"
clip_model = clip.load(clip_model_name, jit=False, device=device)[0]
clip_model.eval().requires_grad_(False)
normalize = transforms.Normalize(
mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]
)
zero_embed = torch.zeros([1, clip_model.visual.output_dim], device=device)
target_embeds, weights = [zero_embed], []
txt, weight = parse_prompt(args.prompts[0])
target_embeds.append(clip_model.encode_text(clip.tokenize(txt).to(device)).float())
weights.append(weight)
weights = torch.tensor([1 - sum(weights), *weights], device=device)
def cfg_model_fn(model, x, t):
n = x.shape[0]
n_conds = len(target_embeds)
x_in = x.repeat([n_conds, 1, 1, 1])
t_in = t.repeat([n_conds])
clip_embed_in = torch.cat([*target_embeds]).repeat_interleave(n, 0)
vs = model(x_in, t_in, clip_embed_in).view([n_conds, n, *x.shape[1:]])
v = vs.mul(weights[:, None, None, None, None]).sum(0)
return v
x = torch.randn([args.n, 3, side_y, side_x], device=device)
t = torch.linspace(1, 0, args.steps + 1, device=device)[:-1]
def repro(model):
if device.type == "cuda":
model = model.half()
steps = utils.get_spliced_ddpm_cosine_schedule(t)
for i in trange(0, args.n, args.batch_size):
cur_batch_size = min(args.n - i, args.batch_size)
outs = sampling.plms_sample(
partial(cfg_model_fn, model), x[i : i + cur_batch_size], steps, {}
)
for j, out in enumerate(outs):
utils.to_pil_image(out).save(f"out_{i + j:05}.png")
def trace(model, x, t):
n = x.shape[0]
n_conds = len(target_embeds)
x_in = x.repeat([n_conds, 1, 1, 1])
t_in = t.repeat([n_conds])
clip_embed_in = torch.cat([*target_embeds]).repeat_interleave(n, 0)
ts_mod = torch.jit.trace(model, (x_in, t_in, clip_embed_in))
print(ts_mod.graph)
clip_model = clip.load(clip_model_name, jit=True, device=device)[0]
print(clip_model.graph)
# You can't run both of these because repro will `.half()` the model
# repro(model)
trace(model, x, t[0])

Binary file not shown.

After

Width:  |  Height:  |  Size: 145 KiB

View File

@@ -0,0 +1,90 @@
from shark.shark_inference import SharkInference
from shark.iree_utils import check_device_drivers
from tank.model_utils import get_vision_model, compare_tensors
from shark.parser import shark_args
import torch
import unittest
import numpy as np
import torchvision.models as models
import pytest
torch.manual_seed(0)
class WideResnet50ModuleTester:
def __init__(
self,
dynamic=False,
device="cpu",
save_mlir=False,
):
self.dynamic = dynamic
self.device = device
self.save_mlir = save_mlir
def create_and_check_module(self):
model, input, act_out = get_vision_model(models.wide_resnet50_2(pretrained=True))
shark_args.save_mlir = self.save_mlir
shark_module = SharkInference(
model,
(input,),
device=self.device,
dynamic=self.dynamic,
)
shark_module.compile()
results = shark_module.forward((input,))
assert True == compare_tensors(act_out, results)
class WideResnet50ModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.save_mlir = pytestconfig.getoption("save_mlir")
def setUp(self):
self.module_tester = WideResnet50ModuleTester(self)
def test_module_static_cpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
def test_module_dynamic_cpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "cpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_static_gpu(self):
self.module_tester.dynamic = False
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
def test_module_dynamic_gpu(self):
self.module_tester.dynamic = True
self.module_tester.device = "gpu"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_static_vulkan(self):
self.module_tester.dynamic = False
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_dynamic_vulkan(self):
self.module_tester.dynamic = True
self.module_tester.device = "vulkan"
self.module_tester.create_and_check_module()
if __name__ == '__main__':
unittest.main()

15
tank/tf/README.md Normal file
View File

@@ -0,0 +1,15 @@
## Running SharkInference on CPUs, GPUs and MAC.
### Run the binary sequence_classification.
#### The models supported are: [hugging face sequence classification](https://huggingface.co/docs/transformers/model_doc/auto#transformers.TFAutoModelForSequenceClassification)
```shell
./seq_classification.py --hf_model_name="hf_model" --device="cpu" # Use gpu | vulkan
```
Once the model is compiled to run on the device mentioned, we can pass in text and
get the logits.

View File

@@ -0,0 +1,47 @@
from transformers import TFAutoModelForMaskedLM
import tensorflow as tf
from shark.shark_inference import SharkInference
# Create a set of input signature.
inputs_signature = [
tf.TensorSpec(shape=[1, 512], dtype=tf.int32),
]
class AutoModelMaskedLM(tf.Module):
def __init__(self, model_name):
super(AutoModelMaskedLM, self).__init__()
self.m = TFAutoModelForMaskedLM.from_pretrained(model_name,
output_attentions=False)
self.m.predict = lambda x: self.m(input_ids=x)
@tf.function(input_signature=inputs_signature)
def forward(self, input_ids):
return self.m.predict(input_ids)
fail_models = ["microsoft/deberta-base", "google/rembert", "google/tapas-base"]
supported_models = [
"albert-base-v2", "bert-base-uncased", "camembert-base",
"dbmdz/convbert-base-turkish-cased", "distilbert-base-uncased",
"google/electra-small-discriminator",
"hf-internal-testing/tiny-random-flaubert", "funnel-transformer/small",
"microsoft/layoutlm-base-uncased", "allenai/longformer-base-4096",
"google/mobilebert-uncased", "microsoft/mpnet-base", "roberta-base",
"xlm-roberta-base"
]
if __name__ == "__main__":
inputs = tf.random.uniform(shape=[1, 512],
maxval=3,
dtype=tf.int32,
seed=10)
for model_name in supported_models:
print(f"Running model: {model_name}")
shark_module = SharkInference(AutoModelMaskedLM(model_name), (inputs,))
shark_module.set_frontend("tensorflow")
shark_module.compile()
print(shark_module.forward((inputs,)))

90
tank/tf/bert_large_gen.py Normal file
View File

@@ -0,0 +1,90 @@
from iree import runtime as ireert
from iree.tf.support import module_utils
from iree.compiler import tf as tfc
import sys
from absl import app
import numpy as np
import os
import tempfile
import tensorflow as tf
from official.nlp.modeling import layers
from official.nlp.modeling import networks
from official.nlp.modeling.models import bert_classifier
vocab_size = 100
NUM_CLASSES = 5
SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
]
class BertModule(tf.Module):
def __init__(self):
super(BertModule, self).__init__()
dict_outputs = False
test_network = networks.BertEncoder(vocab_size=vocab_size,
num_layers=24,
hidden_size=1024,
num_attention_heads=16,
dict_outputs=dict_outputs)
# Create a BERT trainer with the created network.
bert_trainer_model = bert_classifier.BertClassifier(
test_network, num_classes=NUM_CLASSES)
bert_trainer_model.summary()
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m = bert_trainer_model
self.m.predict = lambda x: self.m.call(x, training=False)
self.m.learn = lambda x, y: self.m.call(x, training=False)
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
@tf.function(input_signature=[
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
dtype=tf.int32), #input0: input_word_ids
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
dtype=tf.int32), #input1: input_mask
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
dtype=tf.int32), #input2: segment_ids
tf.TensorSpec([BATCH_SIZE], tf.int32) # input3: labels
])
def learn(self, input_word_ids, input_mask, segment_ids, labels):
with tf.GradientTape() as tape:
# Capture the gradients from forward prop...
inputs = [input_word_ids, input_mask, segment_ids]
probs = self.m(inputs, training=True)
loss = self.loss(labels, probs)
# ...and use them to update the model's weights.
variables = self.m.trainable_variables
gradients = tape.gradient(loss, variables)
self.optimizer.apply_gradients(zip(gradients, variables))
return loss
@tf.function(input_signature=bert_input)
def predict(self, input_word_ids, input_mask, segment_ids):
inputs = [input_word_ids, input_mask, segment_ids]
return self.m.predict(inputs)
if __name__ == "__main__":
# BertModule()
# Compile the model using IREE
compiler_module = tfc.compile_module(BertModule(),
exported_names=["learn"],
import_only=True)
# Save module as MLIR file in a directory
ARITFACTS_DIR = os.getcwd()
mlir_path = os.path.join(ARITFACTS_DIR, "model.mlir")
with open(mlir_path, "wt") as output_file:
output_file.write(compiler_module.decode('utf-8'))
print(f"Wrote MLIR to path '{mlir_path}'")

123
tank/tf/bert_large_run.py Normal file
View File

@@ -0,0 +1,123 @@
from iree import runtime as ireert
from iree.tf.support import module_utils
from iree.compiler import tf as tfc
from iree.compiler import compile_str
import sys
from absl import app
import time
import numpy as np
import os
import tempfile
import tensorflow as tf
from official.nlp.modeling import layers
from official.nlp.modeling import networks
from official.nlp.modeling.models import bert_classifier
vocab_size = 100
NUM_CLASSES = 5
SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
]
class BertModule(tf.Module):
def __init__(self):
super(BertModule, self).__init__()
dict_outputs = False
test_network = networks.BertEncoder(vocab_size=vocab_size,
num_layers=24,
hidden_size=1024,
num_attention_heads=16,
dict_outputs=dict_outputs)
# Create a BERT trainer with the created network.
bert_trainer_model = bert_classifier.BertClassifier(
test_network, num_classes=NUM_CLASSES)
bert_trainer_model.summary()
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m = bert_trainer_model
self.m.predict = lambda x: self.m.call(x, training=False)
self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
self.m.learn = lambda x, y: self.m.call(x, training=False)
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
@tf.function(input_signature=[
bert_input, # inputs
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32) # labels
])
def learn(self, inputs, labels):
with tf.GradientTape() as tape:
# Capture the gradients from forward prop...
probs = self.m(inputs, training=True)
loss = self.loss(labels, probs)
# ...and use them to update the model's weights.
variables = self.m.trainable_variables
gradients = tape.gradient(loss, variables)
self.optimizer.apply_gradients(zip(gradients, variables))
return loss
if __name__ == "__main__":
# BertModule()
# Compile the model using IREE
compiler_module = tfc.compile_module(BertModule(),
exported_names=["learn"],
import_only=True)
# Compile the model using IREE
backend = "dylib-llvm-aot"
args = [
"--iree-llvm-target-cpu-features=host",
"--iree-mhlo-demote-i64-to-i32=false",
"--iree-stream-resource-index-bits=64", "--iree-vm-target-index-bits=64"
]
backend_config = "dylib"
#backend = "cuda"
#backend_config = "cuda"
#args = ["--iree-cuda-llvm-target-arch=sm_80", "--iree-hal-cuda-disable-loop-nounroll-wa", "--iree-enable-fusion-with-reduction-ops"]
flatbuffer_blob = compile_str(compiler_module,
target_backends=[backend],
extra_args=args,
input_type="mhlo")
#flatbuffer_blob = compile_str(compiler_module, target_backends=["dylib-llvm-aot"])
# Save module as MLIR file in a directory
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
tracer = ireert.Tracer(os.getcwd())
config = ireert.Config("dylib", tracer)
ctx = ireert.SystemContext(config=config)
ctx.add_vm_module(vm_module)
BertCompiled = ctx.modules.module
predict_sample_input = [
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
]
learn_sample_input = [
predict_sample_input,
np.random.randint(5, size=(BATCH_SIZE))
]
warmup = 5
total_iter = 10
num_iter = total_iter - warmup
for i in range(10):
if (i == warmup - 1):
start = time.time()
print(
BertCompiled.learn(predict_sample_input,
np.random.randint(5, size=(BATCH_SIZE))))
end = time.time()
total_time = end - start
print("time: " + str(total_time))
print("time/iter: " + str(total_time / num_iter))

85
tank/tf/bert_large_tf.py Normal file
View File

@@ -0,0 +1,85 @@
import numpy as np
import os
import tempfile
import tensorflow as tf
import time
from official.nlp.modeling import layers
from official.nlp.modeling import networks
from official.nlp.modeling.models import bert_classifier
vocab_size = 100
NUM_CLASSES = 5
SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
]
class BertModule(tf.Module):
def __init__(self):
super(BertModule, self).__init__()
dict_outputs = False
test_network = networks.BertEncoder(vocab_size=vocab_size,
num_layers=24,
hidden_size=1024,
num_attention_heads=16,
dict_outputs=dict_outputs)
# Create a BERT trainer with the created network.
bert_trainer_model = bert_classifier.BertClassifier(
test_network, num_classes=NUM_CLASSES)
bert_trainer_model.summary()
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m = bert_trainer_model
self.m.predict = lambda x: self.m.call(x, training=False)
self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
self.m.learn = lambda x, y: self.m.call(x, training=False)
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
@tf.function(input_signature=[
bert_input, # inputs
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32) # labels
])
def learn(self, inputs, labels):
with tf.GradientTape() as tape:
# Capture the gradients from forward prop...
probs = self.m(inputs, training=True)
loss = self.loss(labels, probs)
# ...and use them to update the model's weights.
variables = self.m.trainable_variables
gradients = tape.gradient(loss, variables)
self.optimizer.apply_gradients(zip(gradients, variables))
return loss
if __name__ == "__main__":
# BertModule()
predict_sample_input = [
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
]
bert_model = BertModule()
warmup = 1
total_iter = 10
num_iter = total_iter - warmup
for i in range(total_iter):
print(
bert_model.learn(predict_sample_input,
np.random.randint(5, size=(BATCH_SIZE))))
if (i == warmup - 1):
start = time.time()
end = time.time()
total_time = end - start
print("time: " + str(total_time))
print("time/iter: " + str(total_time / num_iter))

89
tank/tf/bert_small_gen.py Normal file
View File

@@ -0,0 +1,89 @@
from iree import runtime as ireert
#from iree.tf.support import module_utils
from iree.compiler import tf as tfc
import sys
from absl import app
import numpy as np
import os
import tempfile
import tensorflow as tf
from official.nlp.modeling import layers
from official.nlp.modeling import networks
from official.nlp.modeling.models import bert_classifier
vocab_size = 100
NUM_CLASSES = 5
SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
]
class BertModule(tf.Module):
def __init__(self):
super(BertModule, self).__init__()
dict_outputs = False
test_network = networks.BertEncoder(vocab_size=vocab_size,
num_layers=2,
dict_outputs=dict_outputs)
# Create a BERT trainer with the created network.
bert_trainer_model = bert_classifier.BertClassifier(
test_network, num_classes=NUM_CLASSES)
bert_trainer_model.summary()
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m = bert_trainer_model
self.m.predict = lambda x: self.m.call(x, training=False)
self.m.learn = lambda x, y: self.m.call(x, training=False)
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
@tf.function(input_signature=[
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
dtype=tf.int32), #input0: input_word_ids
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
dtype=tf.int32), #input1: input_mask
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
dtype=tf.int32), #input2: segment_ids
tf.TensorSpec([BATCH_SIZE], tf.int32) # input3: labels
])
def learn(self, input_word_ids, input_mask, segment_ids, labels):
with tf.GradientTape() as tape:
# Capture the gradients from forward prop...
inputs = [input_word_ids, input_mask, segment_ids]
probs = self.m(inputs, training=True)
loss = self.loss(labels, probs)
# ...and use them to update the model's weights.
variables = self.m.trainable_variables
gradients = tape.gradient(loss, variables)
self.optimizer.apply_gradients(zip(gradients, variables))
return loss
@tf.function(input_signature=bert_input)
def predict(self, input_word_ids, input_mask, segment_ids):
inputs = [input_word_ids, input_mask, segment_ids]
return self.m.predict(inputs)
if __name__ == "__main__":
# BertModule()
# Compile the model using IREE
compiler_module = tfc.compile_module(BertModule(),
exported_names=["learn"],
import_only=True)
print(type(compiler_module))
# Save module as MLIR file in a directory
ARITFACTS_DIR = os.getcwd()
mlir_path = os.path.join(ARITFACTS_DIR, "model.mlir")
with open(mlir_path, "wt") as output_file:
output_file.write(compiler_module.decode('utf-8'))
print(f"Wrote MLIR to path '{mlir_path}'")

120
tank/tf/bert_small_run.py Normal file
View File

@@ -0,0 +1,120 @@
from iree import runtime as ireert
from iree.tf.support import module_utils
from iree.compiler import tf as tfc
from iree.compiler import compile_str
import sys
from absl import app
import time
import numpy as np
import os
import tempfile
import tensorflow as tf
from official.nlp.modeling import layers
from official.nlp.modeling import networks
from official.nlp.modeling.models import bert_classifier
vocab_size = 100
NUM_CLASSES = 5
SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
]
class BertModule(tf.Module):
def __init__(self):
super(BertModule, self).__init__()
dict_outputs = False
test_network = networks.BertEncoder(vocab_size=vocab_size,
num_layers=2,
dict_outputs=dict_outputs)
# Create a BERT trainer with the created network.
bert_trainer_model = bert_classifier.BertClassifier(
test_network, num_classes=NUM_CLASSES)
bert_trainer_model.summary()
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m = bert_trainer_model
self.m.predict = lambda x: self.m.call(x, training=False)
self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
self.m.learn = lambda x, y: self.m.call(x, training=False)
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
@tf.function(input_signature=[
bert_input, # inputs
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32) # labels
])
def learn(self, inputs, labels):
with tf.GradientTape() as tape:
# Capture the gradients from forward prop...
probs = self.m(inputs, training=True)
loss = self.loss(labels, probs)
# ...and use them to update the model's weights.
variables = self.m.trainable_variables
gradients = tape.gradient(loss, variables)
self.optimizer.apply_gradients(zip(gradients, variables))
return loss
if __name__ == "__main__":
# BertModule()
# Compile the model using IREE
compiler_module = tfc.compile_module(BertModule(),
exported_names=["learn"],
import_only=True)
# Compile the model using IREE
backend = "dylib-llvm-aot"
args = [
"--iree-llvm-target-cpu-features=host",
"--iree-mhlo-demote-i64-to-i32=false", "--iree-flow-demote-i64-to-i32"
]
backend_config = "dylib"
#backend = "cuda"
#backend_config = "cuda"
#args = ["--iree-cuda-llvm-target-arch=sm_80", "--iree-hal-cuda-disable-loop-nounroll-wa", "--iree-enable-fusion-with-reduction-ops"]
flatbuffer_blob = compile_str(compiler_module,
target_backends=[backend],
extra_args=args,
input_type="mhlo")
#flatbuffer_blob = compile_str(compiler_module, target_backends=["dylib-llvm-aot"])
# Save module as MLIR file in a directory
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
tracer = ireert.Tracer(os.getcwd())
config = ireert.Config("dylib", tracer)
ctx = ireert.SystemContext(config=config)
ctx.add_vm_module(vm_module)
BertCompiled = ctx.modules.module
predict_sample_input = [
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
]
learn_sample_input = [
predict_sample_input,
np.random.randint(5, size=(BATCH_SIZE))
]
warmup = 5
total_iter = 10
num_iter = total_iter - warmup
for i in range(10):
if (i == warmup - 1):
start = time.time()
print(
BertCompiled.learn(predict_sample_input,
np.random.randint(5, size=(BATCH_SIZE))))
end = time.time()
total_time = end - start
print("time: " + str(total_time))
print("time/iter: " + str(total_time / num_iter))

View File

@@ -0,0 +1,83 @@
import numpy as np
import os
import tempfile
import tensorflow as tf
import time
from official.nlp.modeling import layers
from official.nlp.modeling import networks
from official.nlp.modeling.models import bert_classifier
vocab_size = 100
NUM_CLASSES = 5
SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
]
class BertModule(tf.Module):
def __init__(self):
super(BertModule, self).__init__()
dict_outputs = False
test_network = networks.BertEncoder(vocab_size=vocab_size,
num_layers=2,
dict_outputs=dict_outputs)
# Create a BERT trainer with the created network.
bert_trainer_model = bert_classifier.BertClassifier(
test_network, num_classes=NUM_CLASSES)
bert_trainer_model.summary()
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m = bert_trainer_model
self.m.predict = lambda x: self.m.call(x, training=False)
self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
self.m.learn = lambda x, y: self.m.call(x, training=False)
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
@tf.function(input_signature=[
bert_input, # inputs
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32) # labels
])
def learn(self, inputs, labels):
with tf.GradientTape() as tape:
# Capture the gradients from forward prop...
probs = self.m(inputs, training=True)
loss = self.loss(labels, probs)
# ...and use them to update the model's weights.
variables = self.m.trainable_variables
gradients = tape.gradient(loss, variables)
self.optimizer.apply_gradients(zip(gradients, variables))
return loss
if __name__ == "__main__":
# BertModule()
predict_sample_input = [
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
]
bert_model = BertModule()
warmup = 1
total_iter = 10
num_iter = total_iter - warmup
for i in range(total_iter):
print(
bert_model.learn(predict_sample_input,
np.random.randint(5, size=(BATCH_SIZE))))
if (i == warmup - 1):
start = time.time()
end = time.time()
total_time = end - start
print("time: " + str(total_time))
print("time/iter: " + str(total_time / num_iter))

View File

@@ -0,0 +1,52 @@
from iree import runtime as ireert
from iree.compiler import tf as tfc
import sys
from absl import app
import numpy as np
import os
import tempfile
import tensorflow as tf
from transformers import BertModel, BertTokenizer, TFBertModel
SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
]
class BertModule(tf.Module):
def __init__(self):
super(BertModule, self).__init__()
# Create a BERT trainer with the created network.
self.m = TFBertModel.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased", from_pt=True)
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m.predict = lambda x, y, z: self.m.call(
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
@tf.function(input_signature=bert_input)
def predict(self, input_word_ids, input_mask, segment_ids):
return self.m.predict(input_word_ids, input_mask, segment_ids)
if __name__ == "__main__":
# BertModule()
# Compile the model using IREE
compiler_module = tfc.compile_module(BertModule(),
exported_names=["predict"],
import_only=True)
# Save module as MLIR file in a directory
ARITFACTS_DIR = os.getcwd()
mlir_path = os.path.join(ARITFACTS_DIR, "model.mlir")
with open(mlir_path, "wt") as output_file:
output_file.write(compiler_module.decode('utf-8'))
print(f"Wrote MLIR to path '{mlir_path}'")

View File

@@ -0,0 +1,87 @@
from iree import runtime as ireert
from iree.compiler import tf as tfc
from iree.compiler import compile_str
import sys
from absl import app
import numpy as np
import os
import tempfile
import tensorflow as tf
import time
from transformers import BertModel, BertTokenizer, TFBertModel
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of 2-dimensional inputs
bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
]
class BertModule(tf.Module):
def __init__(self):
super(BertModule, self).__init__()
# Create a BERT trainer with the created network.
self.m = TFBertModel.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased", from_pt=True)
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m.predict = lambda x, y, z: self.m.call(
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
@tf.function(input_signature=bert_input)
def predict(self, input_ids, attention_mask, token_type_ids):
return self.m.predict(input_ids, attention_mask, token_type_ids)
if __name__ == "__main__":
# Prepping Data
tokenizer = BertTokenizer.from_pretrained(
"microsoft/MiniLM-L12-H384-uncased")
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text,
padding='max_length',
truncation=True,
max_length=MAX_SEQUENCE_LENGTH)
for key in encoded_input:
encoded_input[key] = tf.expand_dims(
tf.convert_to_tensor(encoded_input[key]), 0)
# Compile the model using IREE
compiler_module = tfc.compile_module(BertModule(),
exported_names=["predict"],
import_only=True)
# Compile the model using IREE
backend = "dylib-llvm-aot"
args = [
"--iree-llvm-target-cpu-features=host",
"--iree-mhlo-demote-i64-to-i32=false", "--iree-flow-demote-i64-to-i32"
]
backend_config = "dylib"
#backend = "cuda"
#backend_config = "cuda"
#args = ["--iree-cuda-llvm-target-arch=sm_80", "--iree-hal-cuda-disable-loop-nounroll-wa", "--iree-enable-fusion-with-reduction-ops"]
flatbuffer_blob = compile_str(compiler_module,
target_backends=[backend],
extra_args=args,
input_type="mhlo")
#flatbuffer_blob = compile_str(compiler_module, target_backends=["dylib-llvm-aot"])
# Save module as MLIR file in a directory
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
tracer = ireert.Tracer(os.getcwd())
config = ireert.Config("dylib", tracer)
ctx = ireert.SystemContext(config=config)
ctx.add_vm_module(vm_module)
BertCompiled = ctx.modules.module
result = BertCompiled.predict(encoded_input["input_ids"],
encoded_input["attention_mask"],
encoded_input["token_type_ids"])
print(result)

View File

@@ -0,0 +1,18 @@
import tensorflow as tf
from transformers import BertModel, BertTokenizer, TFBertModel
tf_model = TFBertModel.from_pretrained("microsoft/MiniLM-L12-H384-uncased",
from_pt=True)
tokenizer = BertTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text,
padding='max_length',
truncation=True,
max_length=512)
for key in encoded_input:
encoded_input[key] = tf.expand_dims(
tf.convert_to_tensor(encoded_input[key]), 0)
output = tf_model(encoded_input)
print(output)

99
tank/tf/minilm_tf_test.py Normal file
View File

@@ -0,0 +1,99 @@
from shark.shark_inference import SharkInference
from shark.iree_utils import check_device_drivers
from tank.model_utils_tf import get_TFhf_model, compare_tensors_tf
import tensorflow as tf
import unittest
import numpy as np
import pytest
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
#Create a set of 2-dimensional inputs
tf_bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
]
class MiniLMTFModuleTester:
def create_and_check_module(self, dynamic, device):
model, input, act_out = get_TFhf_model(
"microsoft/MiniLM-L12-H384-uncased")
shark_module = SharkInference(model, (input,),
device=device,
dynamic=dynamic,
jit_trace=True)
shark_module.set_frontend("tensorflow")
shark_module.compile()
results = shark_module.forward((input))
assert True == compare_tensors_tf(act_out, results)
class MiniLMTFModuleTest(unittest.TestCase):
def setUp(self):
self.module_tester = MiniLMTFModuleTester()
@pytest.mark.skip(reason="TF testing temporarily unavailable.")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skip(reason="TF testing temporarily unavailable.")
@pytest.mark.xfail(
reason="Language models currently failing for dynamic case")
def test_module_dynamic_cpu(self):
dynamic = True
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skip(reason="TF testing temporarily unavailable.")
@pytest.mark.skipif(check_device_drivers("gpu"),
reason="nvidia-smi not found")
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skip(reason="TF testing temporarily unavailable.")
@pytest.mark.xfail(
reason="Language models currently failing for dynamic case")
@pytest.mark.skipif(check_device_drivers("gpu"),
reason="nvidia-smi not found")
def test_module_dynamic_gpu(self):
dynamic = True
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skip(reason="TF testing temporarily unavailable.")
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason=
"vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skip(reason="TF testing temporarily unavailable.")
@pytest.mark.xfail(
reason="Language models currently failing for dynamic case")
@pytest.mark.skipif(
check_device_drivers("vulkan"),
reason=
"vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)
def test_module_dynamic_vulkan(self):
dynamic = True
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == '__main__':
unittest.main()

70
tank/tf/seq_classification.py Executable file
View File

@@ -0,0 +1,70 @@
#!/usr/bin/env python
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
import tensorflow as tf
from shark.shark_inference import SharkInference
from shark.parser import shark_args
import argparse
import os
seq_parser = argparse.ArgumentParser(description='Shark Sequence Classification.')
seq_parser.add_argument(
"--hf_model_name",
type=str,
default="bert-base-uncased",
help="Hugging face model to run sequence classification.")
seq_args, unknown = seq_parser.parse_known_args()
BATCH_SIZE = 1
MAX_SEQUENCE_LENGTH = 16
# Create a set of input signature.
inputs_signature = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
]
# For supported models please see here:
# https://huggingface.co/docs/transformers/model_doc/auto#transformers.TFAutoModelForSequenceClassification
def preprocess_input(text = "This is just used to compile the model"):
tokenizer = AutoTokenizer.from_pretrained(seq_args.hf_model_name)
inputs = tokenizer(text,
padding="max_length",
return_tensors="tf",
truncation=True,
max_length=MAX_SEQUENCE_LENGTH)
return inputs
class SeqClassification(tf.Module):
def __init__(self, model_name):
super(SeqClassification, self).__init__()
self.m = TFAutoModelForSequenceClassification.from_pretrained(
model_name, output_attentions=False, num_labels=2)
self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)[0]
@tf.function(input_signature=inputs_signature)
def forward(self, input_ids, attention_mask):
return tf.math.softmax(self.m.predict(input_ids, attention_mask),
axis=-1)
if __name__ == "__main__":
inputs = preprocess_input()
shark_module = SharkInference(
SeqClassification(seq_args.hf_model_name),
(inputs["input_ids"], inputs["attention_mask"]))
shark_module.set_frontend("tensorflow")
shark_module.compile()
print(f"Model has been successfully compiled on {shark_args.device}")
while True:
input_text = input("Enter the text to classify (press q or nothing to exit): ")
if not input_text or input_text == "q":
break
inputs = preprocess_input(input_text)
print(shark_module.forward((inputs["input_ids"], inputs["attention_mask"])))

2
tank/tflite/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
tmp/
.lit_test_times.txt

15
tank/tflite/README.md Normal file
View File

@@ -0,0 +1,15 @@
# Sample compile and execution of TFLite models
This directory contains test scripts to compile/run/compare various TFLite
models from TFHub. It aims for simplicity and hackability.
Follow the instructions at the repository root to install a functioning
python venv. Then you can just run individual python files.
Or, use something like the following to collect all artifacts and traces,
which can be fed to other tools:
```
export IREE_SAVE_TEMPS="/tmp/iree/models/{main}/{id}"
for i in *.py; do export IREE_SAVE_CALLS=/tmp/iree/traces/$i; python $i; done
```

44
tank/tflite/albert.py Executable file
View File

@@ -0,0 +1,44 @@
# RUN: %PYTHON %s
import numpy as np
from shark.shark_importer import SharkImporter
import pytest
model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
# Inputs modified to be useful albert inputs.
def generate_inputs(input_details):
for input in input_details:
print(str(input["shape"]), input["dtype"].__name__)
args = []
args.append(
np.random.randint(low=0,
high=256,
size=input_details[0]["shape"],
dtype=input_details[0]["dtype"]))
args.append(
np.ones(shape=input_details[1]["shape"],
dtype=input_details[1]["dtype"]))
args.append(
np.zeros(shape=input_details[2]["shape"],
dtype=input_details[2]["dtype"]))
return args
if __name__ == '__main__':
my_shark_importer = SharkImporter(model_path=model_path,
model_type="tflite",
model_source_hub="tfhub",
device="cpu",
dynamic=False,
jit_trace=True)
# Case1: Use default inputs
my_shark_importer.compile()
shark_results = my_shark_importer.forward()
# Case2: Use manually set inputs
input_details, output_details = my_shark_importer.get_model_details()
inputs = generate_inputs(input_details) # device_inputs
my_shark_importer.compile(inputs)
shark_results = my_shark_importer.forward(inputs)
# print(shark_results)

View File

@@ -0,0 +1,22 @@
# RUN: %PYTHON %s
# XFAIL: *
import absl.testing
import test_util
model_path = "https://tfhub.dev/neso613/lite-model/ASR_TFLite/pre_trained_models/English/1?lite-format=tflite"
# Failure is due to dynamic shapes:
# - Some improvements to tfl.strided_slice lowering are next steps
class AsrConformerTest(test_util.TFLiteModelTest):
def __init__(self, *args, **kwargs):
super(AsrConformerTest, self).__init__(model_path, *args, **kwargs)
def test_compile_tflite(self):
self.compile_and_execute()
if __name__ == '__main__':
absl.testing.absltest.main()

View File

@@ -0,0 +1,39 @@
# RUN: %PYTHON %s
import absl.testing
import numpy
import test_util
import urllib.request
from PIL import Image
model_path = "https://tfhub.dev/google/lite-model/aiy/vision/classifier/birds_V1/3?lite-format=tflite"
class BirdClassifierTest(test_util.TFLiteModelTest):
def __init__(self, *args, **kwargs):
super(BirdClassifierTest, self).__init__(model_path, *args, **kwargs)
def compare_results(self, iree_results, tflite_results, details):
super(BirdClassifierTest, self).compare_results(iree_results,
tflite_results, details)
self.assertTrue(
numpy.isclose(iree_results[0], tflite_results[0], atol=1e-3).all())
def generate_inputs(self, input_details):
img_path = "https://github.com/google-coral/test_data/raw/master/bird.bmp"
local_path = "/".join([self.workdir, "bird.bmp"])
urllib.request.urlretrieve(img_path, local_path)
shape = input_details[0]["shape"]
im = numpy.array(Image.open(local_path).resize((shape[1], shape[2])))
args = [im.reshape(shape)]
return args
def test_compile_tflite(self):
self.compile_and_execute()
if __name__ == '__main__':
absl.testing.absltest.main()

View File

@@ -0,0 +1,20 @@
# RUN: %PYTHON %s
# REQUIRES: hugetest
import absl.testing
import test_util
model_path = "https://tfhub.dev/sayakpaul/lite-model/cartoongan/dr/1?lite-format=tflite"
class CartoonGanTest(test_util.TFLiteModelTest):
def __init__(self, *args, **kwargs):
super(CartoonGanTest, self).__init__(model_path, *args, **kwargs)
def test_compile_tflite(self):
self.compile_and_execute()
if __name__ == '__main__':
absl.testing.absltest.main()

View File

@@ -0,0 +1,16 @@
import numpy as np
import urllib.request
from PIL import Image
# Returns a sample image in the COCO 2017 dataset in uint8.
def generate_input(workdir, input_details):
# We use an image of a bear since this is an easy example.
img_path = "https://storage.googleapis.com/iree-model-artifacts/coco_2017_000000000285.jpg"
local_path = "/".join([workdir, "coco_2017_000000000285.jpg"])
urllib.request.urlretrieve(img_path, local_path)
shape = input_details[0]["shape"]
im = np.array(Image.open(local_path).resize((shape[1], shape[2])))
return im.reshape(shape)

View File

@@ -0,0 +1,26 @@
# RUN: %PYTHON %s
# XFAIL: *
import absl.testing
import test_util
model_path = "https://tfhub.dev/tulasiram58827/lite-model/craft-text-detector/dr/1?lite-format=tflite"
# Failure: Resize lowering does not handle inferred dynamic shapes. Furthermore, the entire model
# requires dynamic shape support.
class CraftTextTest(test_util.TFLiteModelTest):
def __init__(self, *args, **kwargs):
super(CraftTextTest, self).__init__(model_path, *args, **kwargs)
def compare_results(self, iree_results, tflite_results, details):
super(CraftTextTest, self).compare_results(iree_results, tflite_results,
details)
def test_compile_tflite(self):
self.compile_and_execute()
if __name__ == '__main__':
absl.testing.absltest.main()

View File

@@ -0,0 +1,26 @@
# RUN: %PYTHON %s
import absl.testing
import numpy
import test_util
model_path = "https://tfhub.dev/tensorflow/lite-model/deeplabv3/1/metadata/2?lite-format=tflite"
class DeepLabV3Test(test_util.TFLiteModelTest):
def __init__(self, *args, **kwargs):
super(DeepLabV3Test, self).__init__(model_path, *args, **kwargs)
def compare_results(self, iree_results, tflite_results, details):
super(DeepLabV3Test, self).compare_results(iree_results, tflite_results,
details)
self.assertTrue(
numpy.isclose(iree_results[0], tflite_results[0], atol=1e-3).all())
def test_compile_tflite(self):
self.compile_and_execute()
if __name__ == '__main__':
absl.testing.absltest.main()

View File

@@ -0,0 +1,26 @@
# RUN: %PYTHON %s
import absl.testing
import numpy
import test_util
model_path = "https://tfhub.dev/tensorflow/lite-model/densenet/1/metadata/1?lite-format=tflite"
class DenseNetTest(test_util.TFLiteModelTest):
def __init__(self, *args, **kwargs):
super(DenseNetTest, self).__init__(model_path, *args, **kwargs)
def compare_results(self, iree_results, tflite_results, details):
super(DenseNetTest, self).compare_results(iree_results, tflite_results,
details)
self.assertTrue(
numpy.isclose(iree_results[0], tflite_results[0], atol=1e-5).all())
def test_compile_tflite(self):
self.compile_and_execute()
if __name__ == '__main__':
absl.testing.absltest.main()

View File

@@ -0,0 +1,35 @@
# RUN: %PYTHON %s
import absl.testing
import numpy
import test_util
model_path = "https://tfhub.dev/sayakpaul/lite-model/east-text-detector/dr/1?lite-format=tflite"
class EastTextDetectorTest(test_util.TFLiteModelTest):
def __init__(self, *args, **kwargs):
super(EastTextDetectorTest, self).__init__(model_path, *args, **kwargs)
def compare_results(self, iree_results, tflite_results, details):
super(EastTextDetectorTest,
self).compare_results(iree_results, tflite_results, details)
self.assertTrue(
numpy.isclose(iree_results[0], tflite_results[0], atol=1e-3).all())
# The second return is extremely noisy as it is not a binary classification. To handle we
# check normalized correlation with an expectation of "close enough".
iree_norm = numpy.sqrt(iree_results[1] * iree_results[1])
tflite_norm = numpy.sqrt(tflite_results[1] * tflite_results[1])
correlation = numpy.average(iree_results[1] * tflite_results[1] /
iree_norm / tflite_norm)
self.assertTrue(numpy.isclose(correlation, 1.0, atol=1e-2).all())
def test_compile_tflite(self):
self.compile_and_execute()
if __name__ == '__main__':
absl.testing.absltest.main()

View File

@@ -0,0 +1,39 @@
# RUN: %PYTHON %s
import absl.testing
import imagenet_test_data
import numpy
import test_util
# Source https://tfhub.dev/tensorflow/lite-model/efficientnet/lite0/int8/2
model_path = "https://storage.googleapis.com/iree-model-artifacts/efficientnet_lite0_int8_2.tflite"
class EfficientnetLite0Int8Test(test_util.TFLiteModelTest):
def __init__(self, *args, **kwargs):
super(EfficientnetLite0Int8Test, self).__init__(model_path, *args,
**kwargs)
def compare_results(self, iree_results, tflite_results, details):
super(EfficientnetLite0Int8Test,
self).compare_results(iree_results, tflite_results, details)
# Dequantize outputs.
zero_point = details[0]['quantization_parameters']['zero_points'][0]
scale = details[0]['quantization_parameters']['scales'][0]
dequantized_iree_results = (iree_results - zero_point) * scale
dequantized_tflite_results = (tflite_results - zero_point) * scale
self.assertTrue(
numpy.isclose(dequantized_iree_results,
dequantized_tflite_results,
atol=5e-3).all())
def generate_inputs(self, input_details):
return [imagenet_test_data.generate_input(self.workdir, input_details)]
def test_compile_tflite(self):
self.compile_and_execute()
if __name__ == '__main__':
absl.testing.absltest.main()

Some files were not shown because too many files have changed in this diff Show More