mirror of
https://github.com/nod-ai/SHARK-Studio.git
synced 2026-01-11 23:08:19 -05:00
Compare commits
259 Commits
minilmLoad
...
github-pag
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d9c62e547c | ||
|
|
d84a86f6d2 | ||
|
|
dadd6640fb | ||
|
|
23501d34a1 | ||
|
|
9b9eef1d22 | ||
|
|
e4b156f3b4 | ||
|
|
ce26492a10 | ||
|
|
f508c80c23 | ||
|
|
53df0620e3 | ||
|
|
a63755bc24 | ||
|
|
d93d0783a8 | ||
|
|
d38e37bd99 | ||
|
|
3618fb3ada | ||
|
|
70a29b03e0 | ||
|
|
006adf8746 | ||
|
|
33b53e7caf | ||
|
|
c54815de17 | ||
|
|
0013fb0753 | ||
|
|
56f8a0d85a | ||
|
|
9035a2eed3 | ||
|
|
28daf410b6 | ||
|
|
cbf3f784aa | ||
|
|
ef4b306c7b | ||
|
|
5316c1e0bf | ||
|
|
0228973eef | ||
|
|
d4eeff0a5d | ||
|
|
c7b2d39ab2 | ||
|
|
21958cc02a | ||
|
|
de23e5d9d7 | ||
|
|
6438bce023 | ||
|
|
587d74b449 | ||
|
|
b9c8985047 | ||
|
|
93ebe07d2b | ||
|
|
d82b305781 | ||
|
|
1df20fac95 | ||
|
|
991e7043d1 | ||
|
|
1c4d6c23fa | ||
|
|
87895446a5 | ||
|
|
c0f3a09a40 | ||
|
|
e9ad4b9fc4 | ||
|
|
c061a8897d | ||
|
|
4253551b67 | ||
|
|
e4991c049e | ||
|
|
5df582e7e8 | ||
|
|
814a6f8295 | ||
|
|
7013c3cd4a | ||
|
|
0ddd65b6f1 | ||
|
|
44d8f08bfc | ||
|
|
fc8aa6ae63 | ||
|
|
9bd951b083 | ||
|
|
c43448a826 | ||
|
|
864723a473 | ||
|
|
3b0ec8ce4e | ||
|
|
174b171913 | ||
|
|
cfd9733c2b | ||
|
|
8d4d543a49 | ||
|
|
1b9c88a052 | ||
|
|
e212ff2071 | ||
|
|
8d21292d34 | ||
|
|
e304041574 | ||
|
|
1776c55e73 | ||
|
|
4e4c34c717 | ||
|
|
23378b6be8 | ||
|
|
6cf5564c84 | ||
|
|
7143902a90 | ||
|
|
15186db73f | ||
|
|
ccd7a01ce2 | ||
|
|
1d7035117d | ||
|
|
1710abd366 | ||
|
|
6aeda3670f | ||
|
|
bb52b224d0 | ||
|
|
95ec3d7216 | ||
|
|
18872222d3 | ||
|
|
d453f2e49d | ||
|
|
3824d37d27 | ||
|
|
d946287723 | ||
|
|
885b0969f5 | ||
|
|
a886cba655 | ||
|
|
4afe2e3adb | ||
|
|
fe080eaee6 | ||
|
|
3703f014d9 | ||
|
|
d45a496030 | ||
|
|
4ee164c66f | ||
|
|
bf84c033bb | ||
|
|
5105f62551 | ||
|
|
99be837d84 | ||
|
|
b7766898ee | ||
|
|
57f73dfbc9 | ||
|
|
50b2b9638d | ||
|
|
1bfd00e2f8 | ||
|
|
64424877ac | ||
|
|
02d857260c | ||
|
|
1322ec5935 | ||
|
|
48e9818f7e | ||
|
|
14857770dc | ||
|
|
f79a6bf5aa | ||
|
|
7dc27a7477 | ||
|
|
17dba601c8 | ||
|
|
064aa3b1f4 | ||
|
|
4960efc686 | ||
|
|
a3654f33da | ||
|
|
82c541dfb8 | ||
|
|
55bcb2eb3c | ||
|
|
1a85550879 | ||
|
|
334f2f76c4 | ||
|
|
03601ccdd6 | ||
|
|
88b0dec0ee | ||
|
|
3514822cac | ||
|
|
a8b021dc8d | ||
|
|
5e931debd5 | ||
|
|
22ff92c48b | ||
|
|
7f5aaa3477 | ||
|
|
904e0e1444 | ||
|
|
db6e2207ed | ||
|
|
7975087ee2 | ||
|
|
e8482d47f5 | ||
|
|
3e900d2b25 | ||
|
|
4b5d09fc6c | ||
|
|
02b1e7ac36 | ||
|
|
23619068eb | ||
|
|
f7f24dc4d9 | ||
|
|
c2aa451767 | ||
|
|
7023d556b5 | ||
|
|
274650fd43 | ||
|
|
d934765b1d | ||
|
|
6f5ceb4e61 | ||
|
|
6c22139ac9 | ||
|
|
1c4f5e0c34 | ||
|
|
7dc0a4f74d | ||
|
|
90fddc6cb0 | ||
|
|
934f15ebb7 | ||
|
|
38664a4c68 | ||
|
|
abce0b1c91 | ||
|
|
189466bbe4 | ||
|
|
c6b4ad1e26 | ||
|
|
198755788c | ||
|
|
c1a14a8db1 | ||
|
|
0e42c19f33 | ||
|
|
0c31bb82cd | ||
|
|
315ec72984 | ||
|
|
d556c0d6ef | ||
|
|
0ee515a7be | ||
|
|
4c0deb9899 | ||
|
|
94d9275515 | ||
|
|
c1cb7bb3fd | ||
|
|
ff20ddeb97 | ||
|
|
2c7d879a4e | ||
|
|
4e5c592094 | ||
|
|
af4257d05f | ||
|
|
dc1a283ab7 | ||
|
|
cc4fa96831 | ||
|
|
921ccdc40b | ||
|
|
49fc6d2f4b | ||
|
|
e62c531170 | ||
|
|
3c2f8ef243 | ||
|
|
ec870f45fd | ||
|
|
0e2485d85b | ||
|
|
6401687733 | ||
|
|
39c5f940c9 | ||
|
|
9fc1c84cef | ||
|
|
fa6f19679e | ||
|
|
9105f5d54e | ||
|
|
54a642e76a | ||
|
|
d047eff086 | ||
|
|
5da32601c0 | ||
|
|
c5ae01311d | ||
|
|
1191f53c9d | ||
|
|
8434c67d96 | ||
|
|
79caf729f7 | ||
|
|
b647764b9a | ||
|
|
08448a013f | ||
|
|
69f169be1b | ||
|
|
0dcf387089 | ||
|
|
2e22d0b690 | ||
|
|
b98c9ee484 | ||
|
|
fa7ee7e099 | ||
|
|
9cc92d0e7d | ||
|
|
2e5cb4ba76 | ||
|
|
f49a2c3df4 | ||
|
|
d496c66d11 | ||
|
|
1a861acbb2 | ||
|
|
1cad50d521 | ||
|
|
c351bb50b6 | ||
|
|
ca36d40f0a | ||
|
|
4e9e35a9ff | ||
|
|
41a8cbb5b6 | ||
|
|
cc11a71ec8 | ||
|
|
1d1a1b131f | ||
|
|
84a2969381 | ||
|
|
babd3d002b | ||
|
|
c1cde2e252 | ||
|
|
638f982c94 | ||
|
|
84978cf3c5 | ||
|
|
96dd08cca4 | ||
|
|
06a45d9025 | ||
|
|
193a735865 | ||
|
|
8199ea15e8 | ||
|
|
958e0100bf | ||
|
|
6ca726615e | ||
|
|
d1b398cb9b | ||
|
|
6b0bd7181f | ||
|
|
b24ce861d0 | ||
|
|
79e392d85f | ||
|
|
e57cee21a5 | ||
|
|
a064a32755 | ||
|
|
24ba2bcdc8 | ||
|
|
a7435973d9 | ||
|
|
cf4d363436 | ||
|
|
b012b9a5f7 | ||
|
|
9556060c27 | ||
|
|
1841fcc088 | ||
|
|
36e2f80fdb | ||
|
|
188ec5431f | ||
|
|
7532a8db0c | ||
|
|
6ed957f5fb | ||
|
|
b01ffb5a22 | ||
|
|
83855e7b08 | ||
|
|
2adea76b8c | ||
|
|
210d28fce9 | ||
|
|
b07377cbfd | ||
|
|
44dce561e9 | ||
|
|
4ae9331a77 | ||
|
|
0a6bc6e17f | ||
|
|
fa0aaf63c2 | ||
|
|
b7b2dfaa8d | ||
|
|
a7ca9b8d68 | ||
|
|
2c90ddde5b | ||
|
|
71f5cfcb30 | ||
|
|
a635b6fbef | ||
|
|
e8aa105b2a | ||
|
|
08eda2ce35 | ||
|
|
206ba11cc8 | ||
|
|
6bc9ebad24 | ||
|
|
334d129443 | ||
|
|
28a620f0e2 | ||
|
|
af582925f2 | ||
|
|
f6e9f2d571 | ||
|
|
c66e285e2b | ||
|
|
7198f0db0e | ||
|
|
aae8754ace | ||
|
|
f1edf88c33 | ||
|
|
b522f933a9 | ||
|
|
dbabb48582 | ||
|
|
2b3fac7519 | ||
|
|
de223e7824 | ||
|
|
fd4df79573 | ||
|
|
0093610054 | ||
|
|
ce7350f5f8 | ||
|
|
2bcd261a82 | ||
|
|
918b36c177 | ||
|
|
c8230fe093 | ||
|
|
559d9be4a5 | ||
|
|
5fbd868296 | ||
|
|
334b50b963 | ||
|
|
4d3acff484 | ||
|
|
f5e67e3aa4 | ||
|
|
b43d627b20 | ||
|
|
0ddf7ad5e8 | ||
|
|
dc8c221467 |
37
.github/workflows/gh-pages-releases.yml
vendored
Normal file
37
.github/workflows/gh-pages-releases.yml
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
# See: https://github.com/llvm/torch-mlir/issues/1374
|
||||
name: Publish releases page
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
scrape_and_publish_releases:
|
||||
name: "Scrape and publish releases"
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# Don't run this in everyone's forks.
|
||||
if: github.repository == 'nod-ai/SHARK'
|
||||
|
||||
steps:
|
||||
- name: Checking out repository
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
token: ${{ secrets.NODAI_INVOCATION_TOKEN }}
|
||||
- name: Run scrape releases script
|
||||
run: python ./build_tools/scrape_releases.py nod-ai SHARK > /tmp/index.html
|
||||
shell: bash
|
||||
- run: git fetch --all
|
||||
- run: git switch github-pages
|
||||
- run: git config --global user.email "none@none.com"
|
||||
- run: git config --global user.name "nod-team"
|
||||
- run: mv /tmp/index.html package-index/index.html
|
||||
- run: git add package-index/index.html
|
||||
|
||||
# Only try to make a commit if the file has changed.
|
||||
- run: git diff --cached --exit-code || git commit -m "Update releases."
|
||||
|
||||
- name: GitHub Push
|
||||
uses: ad-m/github-push-action@v0.6.0
|
||||
with:
|
||||
github_token: ${{ secrets.NODAI_INVOCATION_TOKEN }}
|
||||
branch: github-pages
|
||||
62
.github/workflows/nightly.yml
vendored
62
.github/workflows/nightly.yml
vendored
@@ -11,11 +11,12 @@ on:
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: a100
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
backend: [IREE, SHARK]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
@@ -38,6 +39,10 @@ jobs:
|
||||
tag_name="${package_version}"
|
||||
echo "package_version=${package_version}" >> $GITHUB_ENV
|
||||
echo "tag_name=${tag_name}" >> $GITHUB_ENV
|
||||
- name: Set Environment Variables
|
||||
run: |
|
||||
echo "SHORT_SHA=`git rev-parse --short=4 HEAD`" >> $GITHUB_ENV
|
||||
echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
|
||||
- name: Create Release
|
||||
id: create_release
|
||||
uses: actions/create-release@v1
|
||||
@@ -49,43 +54,76 @@ jobs:
|
||||
body: |
|
||||
Automatic snapshot release of nod.ai SHARK.
|
||||
draft: true
|
||||
prerelease: false
|
||||
prerelease: false
|
||||
- name: Find Torch-MLIR Release
|
||||
run: |
|
||||
TM_HTML_URL="$(python3 -c "import urllib.request, json, sys; u=json.loads(urllib.request.urlopen('https://api.github.com/repos/llvm/torch-mlir/releases/latest').read().decode()).get('html_url', False); print(u) if u else sys.exit(1);")"
|
||||
TM_RELEASE_DIR=${TM_HTML_URL/"tag"/"expanded_assets"}
|
||||
echo "TM_RELEASE_DIR=${TM_RELEASE_DIR}" >> $GITHUB_ENV
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
echo "Torch-MLIR Release DIR is ${{ env.TM_RELEASE_DIR }}"
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install flake8 pytest yapf toml
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/SHARK-Runtime/releases; fi
|
||||
python -m pip install flake8 pytest toml
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt -f ${{ env.TM_RELEASE_DIR }} -f https://github.com/nod-ai/SHARK-Runtime/releases; fi
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude shark.venv,lit.cfg.py
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude shark.venv,lit.cfg.py
|
||||
yapf -i --style .style.yapf shark/*.py
|
||||
|
||||
- name: Build and validate the package
|
||||
- name: Build and validate the IREE package
|
||||
if: ${{ matrix.backend == 'IREE' }}
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE
|
||||
IMPORTER=1 ./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
USE_IREE=1 VENV_DIR=iree.venv ./setup_venv.sh
|
||||
source iree.venv/bin/activate
|
||||
package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
|
||||
SHARK_PACKAGE_VERSION=${package_version} \
|
||||
pip wheel -v -w wheelhouse . --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/SHARK-Runtime/releases
|
||||
pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f ${{ env.TM_RELEASE_DIR }} -f https://github.com/iree-org/iree/releases
|
||||
# Install the built wheel
|
||||
pip install ./wheelhouse/nodai*
|
||||
# Validate the Models
|
||||
pytest -k 'not benchmark' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/
|
||||
/bin/bash "$GITHUB_WORKSPACE/build_tools/populate_sharktank_ci.sh"
|
||||
pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="./gen_shark_tank/" tank/test_models.py |
|
||||
tail -n 1 |
|
||||
tee -a pytest_results.txt
|
||||
if !(grep -Fxq " failed" pytest_results.txt)
|
||||
then
|
||||
export SHA=$(git log -1 --format='%h')
|
||||
gsutil -m cp -r $GITHUB_WORKSPACE/gen_shark_tank/* gs://shark_tank/$SHA
|
||||
gsutil -m cp -r gs://shark_tank/$SHA/* gs://shark_tank/latest/
|
||||
fi
|
||||
rm -rf ./wheelhouse/nodai*
|
||||
|
||||
- name: Build and validate the SHARK Runtime package
|
||||
if: ${{ matrix.backend == 'SHARK' }}
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE
|
||||
./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
|
||||
SHARK_PACKAGE_VERSION=${package_version} \
|
||||
pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f ${{ env.TM_RELEASE_DIR }} -f https://github.com/nod-ai/SHARK-Runtime/releases
|
||||
# Install the built wheel
|
||||
pip install ./wheelhouse/nodai*
|
||||
# Validate the Models
|
||||
pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="./gen_shark_tank/" tank/test_models.py |
|
||||
tail -n 1 |
|
||||
tee -a pytest_results.txt
|
||||
|
||||
- name: Upload Release Assets
|
||||
if: ${{ matrix.backend == 'SHARK' }}
|
||||
id: upload-release-assets
|
||||
uses: dwenegar/upload-release-assets@v1
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
|
||||
with:
|
||||
release_id: ${{ steps.create_release.outputs.id }}
|
||||
assets_path: ./wheelhouse/nodai_*.whl
|
||||
assets_path: ${GITHUB_WORKSPACE}/wheelhouse/nodai_*.whl
|
||||
|
||||
- name: Publish Release
|
||||
if: ${{ matrix.backend == 'SHARK' }}
|
||||
id: publish_release
|
||||
uses: eregon/publish-release@v1
|
||||
env:
|
||||
|
||||
133
.github/workflows/test-models.yml
vendored
133
.github/workflows/test-models.yml
vendored
@@ -1,7 +1,7 @@
|
||||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||
|
||||
name: Validate torch-models on Shark Runtime
|
||||
name: Validate Models on Shark Runtime
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -11,92 +11,103 @@ on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build-linux:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
build-validate:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os: [icelake, a100, MacStudio, ubuntu-latest]
|
||||
suite: [cpu,cuda,vulkan]
|
||||
python-version: ["3.10"]
|
||||
include:
|
||||
- os: ubuntu-latest
|
||||
suite: lint
|
||||
exclude:
|
||||
- os: ubuntu-latest
|
||||
suite: vulkan
|
||||
- os: ubuntu-latest
|
||||
suite: cuda
|
||||
- os: ubuntu-latest
|
||||
suite: cpu
|
||||
- os: MacStudio
|
||||
suite: cuda
|
||||
- os: MacStudio
|
||||
suite: cpu
|
||||
- os: MacStudio
|
||||
suite: vulkan
|
||||
- os: icelake
|
||||
suite: vulkan
|
||||
- os: icelake
|
||||
suite: cuda
|
||||
- os: a100
|
||||
suite: cpu
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Setup pip cache
|
||||
uses: actions/cache@v3
|
||||
- name: Set Environment Variables
|
||||
run: |
|
||||
echo "SHORT_SHA=`git rev-parse --short=4 HEAD`" >> $GITHUB_ENV
|
||||
echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
|
||||
|
||||
- name: Set up Python Version File ${{ matrix.python-version }}
|
||||
if: matrix.os == 'a100' || matrix.os == 'ubuntu-latest' || matrix.os == 'icelake'
|
||||
run: |
|
||||
# See https://github.com/actions/setup-python/issues/433
|
||||
echo ${{ matrix.python-version }} >> $GITHUB_WORKSPACE/.python-version
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
if: matrix.os == 'a100' || matrix.os == 'ubuntu-latest' || matrix.os == 'icelake'
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
python-version: '${{ matrix.python-version }}'
|
||||
#cache: 'pip'
|
||||
#cache-dependency-path: |
|
||||
# **/requirements-importer.txt
|
||||
# **/requirements.txt
|
||||
|
||||
- name: Install dependencies
|
||||
if: matrix.suite == 'lint'
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install flake8 pytest yapf toml
|
||||
|
||||
python -m pip install flake8 pytest toml black
|
||||
|
||||
- name: Lint with flake8
|
||||
if: matrix.suite == 'lint'
|
||||
run: |
|
||||
# black format check
|
||||
black --version
|
||||
black --line-length 79 --check .
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude lit.cfg.py
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude lit.cfg.py
|
||||
yapf -i --style .style.yapf shark/*.py
|
||||
|
||||
- name: Validate Models
|
||||
- name: Validate Models on CPU
|
||||
if: matrix.suite == 'cpu'
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE
|
||||
IMPORTER=1 ./setup_venv.sh
|
||||
PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
pytest -k 'not benchmark' --ignore=tank/tf/ --ignore=shark/tests/test_shark_importer.py
|
||||
|
||||
perf-macOS:
|
||||
runs-on: MacStudio
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
pytest --benchmark --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k cpu
|
||||
gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv
|
||||
gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cpu_latest.csv
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Validate Models dependencies
|
||||
- name: Validate Models on NVIDIA GPU
|
||||
if: matrix.suite == 'cuda'
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE
|
||||
PYTHON=python3.10 IMPORTER=1 ./setup_venv.sh
|
||||
PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
pytest -k 'not benchmark' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=tank/tf/ --ignore=shark/tests/test_shark_importer.py
|
||||
|
||||
perf-linux:
|
||||
runs-on: a100
|
||||
timeout-minutes: 45
|
||||
continue-on-error: true
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
pytest --benchmark --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k cuda
|
||||
gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv
|
||||
gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cuda_latest.csv
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Setup pip cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Validate Models
|
||||
- name: Validate Vulkan Models
|
||||
if: matrix.suite == 'vulkan'
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE
|
||||
IMPORTER=1 ./setup_venv.sh
|
||||
PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
|
||||
source shark.venv/bin/activate
|
||||
pytest --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/
|
||||
pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k vulkan
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -162,6 +162,7 @@ cython_debug/
|
||||
|
||||
# Shark related artefacts
|
||||
*venv/
|
||||
shark_tmp/
|
||||
|
||||
# ORT related artefacts
|
||||
cache_models/
|
||||
|
||||
4
.gitmodules
vendored
4
.gitmodules
vendored
@@ -1,4 +0,0 @@
|
||||
[submodule "inference/thirdparty/shark-runtime"]
|
||||
path = inference/thirdparty/shark-runtime
|
||||
url =https://github.com/nod-ai/SHARK-Runtime.git
|
||||
branch = shark-06032022
|
||||
@@ -1,3 +0,0 @@
|
||||
[style]
|
||||
based_on_style = google
|
||||
column_limit = 80
|
||||
260
README.md
260
README.md
@@ -1,260 +0,0 @@
|
||||
# SHARK
|
||||
|
||||
High Performance Machine Learning and Data Analytics for CPUs, GPUs, Accelerators and Heterogeneous Clusters
|
||||
|
||||
[](https://github.com/nod-ai/SHARK/actions/workflows/nightly.yml)
|
||||
[](https://github.com/nod-ai/SHARK/actions/workflows/test-models.yml)
|
||||
|
||||
## Communication Channels
|
||||
|
||||
* [SHARK Discord server](https://discord.gg/RUqY2h2s9u): Real time discussions with the SHARK team and other users
|
||||
* [GitHub issues](https://github.com/nod-ai/SHARK/issues): Feature requests, bugs etc
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
<details>
|
||||
<summary>Installation (Linux and macOS)</summary>
|
||||
|
||||
### Setup a new pip Virtual Environment
|
||||
|
||||
This step sets up a new VirtualEnv for Python
|
||||
|
||||
```shell
|
||||
python --version #Check you have 3.7->3.10 on Linux or 3.10 on macOS
|
||||
python -m venv shark_venv
|
||||
source shark_venv/bin/activate
|
||||
|
||||
# If you are using conda create and activate a new conda env
|
||||
|
||||
# Some older pip installs may not be able to handle the recent PyTorch deps
|
||||
python -m pip install --upgrade pip
|
||||
```
|
||||
|
||||
*macOS Metal* users please install https://sdk.lunarg.com/sdk/download/latest/mac/vulkan-sdk.dmg
|
||||
|
||||
### Install SHARK
|
||||
|
||||
This step pip installs SHARK and related packages on Linux Python 3.7, 3.8, 3.9, 3.10 and macOS Python 3.10
|
||||
|
||||
```shell
|
||||
pip install nodai-shark -f https://github.com/nod-ai/SHARK/releases -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/shark-runtime/releases --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
||||
```
|
||||
If you are on an Intel macOS machine you need this [workaround](https://github.com/nod-ai/SHARK/issues/102) for an upstream issue.
|
||||
|
||||
### Download and run Resnet50 sample
|
||||
|
||||
```shell
|
||||
curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/resnet50_script.py
|
||||
#Install deps for test script
|
||||
pip install --pre torch torchvision torchaudio tqdm pillow --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
||||
python ./resnet50_script.py --device="cpu" #use cuda or vulkan or metal
|
||||
```
|
||||
|
||||
### Download and run BERT (MiniLM) sample
|
||||
```shell
|
||||
curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/minilm_jit.py
|
||||
#Install deps for test script
|
||||
pip install transformers torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
||||
python ./minilm_jit.py --device="cpu" #use cuda or vulkan or metal
|
||||
```
|
||||
</details>
|
||||
|
||||
|
||||
<details>
|
||||
<summary>Source Installation</summary>
|
||||
|
||||
## Check out the code
|
||||
|
||||
```shell
|
||||
git clone https://github.com/nod-ai/SHARK.git
|
||||
```
|
||||
|
||||
## Setup your Python VirtualEnvironment and Dependencies
|
||||
```shell
|
||||
# Setup venv and install necessary packages (torch-mlir, nodLabs/Shark, ...).
|
||||
./setup_venv.sh
|
||||
# Please activate the venv after installation.
|
||||
```
|
||||
|
||||
### Run a demo script
|
||||
```shell
|
||||
python -m shark.examples.shark_inference.resnet50_script --device="cpu" # Use gpu | vulkan
|
||||
```
|
||||
|
||||
|
||||
### Run all model tests on CPU/GPU/VULKAN/Metal
|
||||
```shell
|
||||
pytest shark/tests/models
|
||||
|
||||
# If on Linux for quicker results:
|
||||
pytest shark/tests/models -n auto
|
||||
```
|
||||
|
||||
### Run all model benchmark tests on CPU/GPU/VULKAN/Metal
|
||||
```shell
|
||||
pytest shark/tests/benchmarks
|
||||
```
|
||||
</details>
|
||||
|
||||
|
||||
<details>
|
||||
<summary>API Reference</summary>
|
||||
|
||||
### Shark Inference API
|
||||
|
||||
```
|
||||
from shark_runner import SharkInference
|
||||
|
||||
shark_module = SharkInference(
|
||||
module = model class.
|
||||
(input,) = inputs to model (must be a torch-tensor)
|
||||
dynamic (boolean) = Pass the input shapes as static or dynamic.
|
||||
device = `cpu`, `gpu` or `vulkan` is supported.
|
||||
tracing_required = (boolean) = Jit trace the module with the given input, useful in the case where jit.script doesn't work. )
|
||||
shark_module.set_frontend("pytorch") # Use tensorflow, mhlo, linalg, tosa
|
||||
shark_module.compile()
|
||||
|
||||
result = shark_module.forward(inputs)
|
||||
```
|
||||
|
||||
|
||||
### Example demonstrating running MHLO IR.
|
||||
|
||||
```
|
||||
from shark.shark_inference import SharkInference
|
||||
import numpy as np
|
||||
|
||||
mhlo_ir = r"""builtin.module {
|
||||
func.func @forward(%arg0: tensor<1x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4x4xf32> {
|
||||
%0 = chlo.broadcast_add %arg0, %arg1 : (tensor<1x4xf32>, tensor<4x1xf32>) -> tensor<4x4xf32>
|
||||
%1 = "mhlo.abs"(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
|
||||
return %1 : tensor<4x4xf32>
|
||||
}
|
||||
}"""
|
||||
|
||||
arg0 = np.ones((1, 4)).astype(np.float32)
|
||||
arg1 = np.ones((4, 1)).astype(np.float32)
|
||||
|
||||
shark_module = SharkInference(mhlo_ir, (arg0, arg1))
|
||||
shark_module.set_frontend("mhlo")
|
||||
shark_module.compile()
|
||||
print(shark_module.forward((arg0, arg1)))
|
||||
```
|
||||
</details>
|
||||
|
||||
|
||||
## Supported and Validated Models
|
||||
|
||||
<details>
|
||||
<summary>PyTorch Models</summary>
|
||||
|
||||
### Huggingface PyTorch Models
|
||||
|
||||
| Hugging Face Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|
||||
|---------------------|----------------------|----------|----------|-------------|
|
||||
| BERT | :heavy_check_mark: (JIT) | :heavy_check_mark: | | |
|
||||
| Albert | :heavy_check_mark: (JIT) | :heavy_check_mark: | | |
|
||||
| BigBird | :heavy_check_mark: (AOT) | | | |
|
||||
| DistilBERT | :heavy_check_mark: (JIT) | :heavy_check_mark: | | |
|
||||
| GPT2 | :x: (AOT) | | | |
|
||||
|
||||
### Torchvision Models
|
||||
|
||||
| TORCHVISION Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|
||||
|--------------------|----------------------|----------|----------|-------------|
|
||||
| AlexNet | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
|
||||
| DenseNet121 | :heavy_check_mark: (Script) | | | |
|
||||
| MNasNet1_0 | :heavy_check_mark: (Script) | | | |
|
||||
| MobileNetV2 | :heavy_check_mark: (Script) | | | |
|
||||
| MobileNetV3 | :heavy_check_mark: (Script) | | | |
|
||||
| Unet | :x: (Script) | | | |
|
||||
| Resnet18 | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
|
||||
| Resnet50 | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
|
||||
| Resnet101 | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
|
||||
| Resnext50_32x4d | :heavy_check_mark: (Script) | | | |
|
||||
| ShuffleNet_v2 | :x: (Script) | | | |
|
||||
| SqueezeNet | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
|
||||
| EfficientNet | :heavy_check_mark: (Script) | | | |
|
||||
| Regnet | :heavy_check_mark: (Script) | | | |
|
||||
| Resnest | :x: (Script) | | | |
|
||||
| Vision Transformer | :heavy_check_mark: (Script) | | | |
|
||||
| VGG 16 | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
|
||||
| Wide Resnet | :heavy_check_mark: (Script) | :heavy_check_mark: | :heavy_check_mark: | |
|
||||
| RAFT | :x: (JIT) | | | |
|
||||
|
||||
For more information refer to [MODEL TRACKING SHEET](https://docs.google.com/spreadsheets/d/15PcjKeHZIrB5LfDyuw7DGEEE8XnQEX2aX8lm8qbxV8A/edit#gid=0)
|
||||
|
||||
### PyTorch Training Models
|
||||
|
||||
| Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|
||||
|---------------------|----------------------|----------|----------|-------------|
|
||||
| BERT | :x: | :x: | | |
|
||||
| FullyConnected | :heavy_check_mark: | :heavy_check_mark: | | |
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>JAX Models</summary>
|
||||
|
||||
|
||||
### JAX Models
|
||||
|
||||
| Models | JAX-MHLO lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|
||||
|---------------------|----------------------|----------|----------|-------------|
|
||||
| DALL-E | :x: | :x: | | |
|
||||
| FullyConnected | :heavy_check_mark: | :heavy_check_mark: | | |
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>TFLite Models</summary>
|
||||
|
||||
### TFLite Models
|
||||
|
||||
| Models | TOSA/LinAlg | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|
||||
|---------------------|----------------------|----------|----------|-------------|
|
||||
| BERT | :x: | :x: | | |
|
||||
| FullyConnected | :heavy_check_mark: | :heavy_check_mark: | | |
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>TF Models</summary>
|
||||
|
||||
### Tensorflow Models
|
||||
|
||||
| Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|
||||
|---------------------|----------------------|----------|----------|-------------|
|
||||
| BERT | :x: | :x: | | |
|
||||
| FullyConnected | :heavy_check_mark: | :heavy_check_mark: | | |
|
||||
|
||||
</details>
|
||||
|
||||
## Related Projects
|
||||
|
||||
<details>
|
||||
<summary>IREE Project Channels</summary>
|
||||
|
||||
* [Upstream IREE issues](https://github.com/google/iree/issues): Feature requests,
|
||||
bugs, and other work tracking
|
||||
* [Upstream IREE Discord server](https://discord.gg/26P4xW4): Daily development
|
||||
discussions with the core team and collaborators
|
||||
* [iree-discuss email list](https://groups.google.com/forum/#!forum/iree-discuss):
|
||||
Announcements, general and low-priority discussion
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>MLIR and Torch-MLIR Project Channels</summary>
|
||||
|
||||
* `#torch-mlir` channel on the LLVM [Discord](https://discord.gg/xS7Z362) - this is the most active communication channel
|
||||
* Torch-MLIR Github issues [here](https://github.com/llvm/torch-mlir/issues)
|
||||
* [`torch-mlir` section](https://llvm.discourse.group/c/projects-that-want-to-become-official-llvm-projects/torch-mlir/41) of LLVM Discourse
|
||||
* Weekly meetings on Mondays 9AM PST. See [here](https://discourse.llvm.org/t/community-meeting-developer-hour-refactoring-recurring-meetings/62575) for more information.
|
||||
* [MLIR topic within LLVM Discourse](https://llvm.discourse.group/c/llvm-project/mlir/31) SHARK and IREE is enabled by and heavily relies on [MLIR](https://mlir.llvm.org).
|
||||
</details>
|
||||
|
||||
## License
|
||||
|
||||
nod.ai SHARK is licensed under the terms of the Apache 2.0 License with LLVM Exceptions.
|
||||
See [LICENSE](LICENSE) for more information.
|
||||
@@ -1,22 +0,0 @@
|
||||
import torch
|
||||
from shark.parser import parser
|
||||
from benchmarks.hf_transformer import SharkHFBenchmarkRunner
|
||||
|
||||
parser.add_argument(
|
||||
"--model_name",
|
||||
type=str,
|
||||
required=True,
|
||||
help=
|
||||
"Specifies name of HF model to benchmark. (For exmaple \"microsoft/MiniLM-L12-H384-uncased\""
|
||||
)
|
||||
load_args, unknown = parser.parse_known_args()
|
||||
|
||||
if __name__ == "__main__":
|
||||
model_name = load_args.model_name
|
||||
test_input = torch.randint(2, (1, 128))
|
||||
shark_module = SharkHFBenchmarkRunner(model_name, (test_input,),
|
||||
jit_trace=True)
|
||||
shark_module.benchmark_c()
|
||||
shark_module.benchmark_python((test_input,))
|
||||
shark_module.benchmark_torch(test_input)
|
||||
shark_module.benchmark_onnx(test_input)
|
||||
@@ -1,137 +0,0 @@
|
||||
import torch
|
||||
from shark.shark_runner import SharkBenchmarkRunner
|
||||
from shark.parser import shark_args
|
||||
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
||||
from onnxruntime.transformers.benchmark import run_pytorch, run_tensorflow, run_onnxruntime
|
||||
from onnxruntime.transformers.huggingface_models import MODELS
|
||||
from onnxruntime.transformers.benchmark_helper import ConfigModifier, Precision
|
||||
import os
|
||||
import psutil
|
||||
|
||||
|
||||
class OnnxFusionOptions(object):
|
||||
|
||||
def __init__(self):
|
||||
self.disable_gelu = False
|
||||
self.disable_layer_norm = False
|
||||
self.disable_attention = False
|
||||
self.disable_skip_layer_norm = False
|
||||
self.disable_embed_layer_norm = False
|
||||
self.disable_bias_skip_layer_norm = False
|
||||
self.disable_bias_gelu = False
|
||||
self.enable_gelu_approximation = False
|
||||
self.use_mask_index = False
|
||||
self.no_attention_mask = False
|
||||
|
||||
|
||||
class HuggingFaceLanguage(torch.nn.Module):
|
||||
|
||||
def __init__(self, hf_model_name):
|
||||
super().__init__()
|
||||
self.model = AutoModelForSequenceClassification.from_pretrained(
|
||||
hf_model_name, # The pretrained model.
|
||||
num_labels=
|
||||
2, # The number of output labels--2 for binary classification.
|
||||
output_attentions=
|
||||
False, # Whether the model returns attentions weights.
|
||||
output_hidden_states=
|
||||
False, # Whether the model returns all hidden-states.
|
||||
torchscript=True,
|
||||
)
|
||||
|
||||
def forward(self, tokens):
|
||||
return self.model.forward(tokens)[0]
|
||||
|
||||
|
||||
class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
|
||||
# SharkRunner derived class with Benchmarking capabilities.
|
||||
def __init__(
|
||||
self,
|
||||
model_name: str,
|
||||
input: tuple,
|
||||
dynamic: bool = False,
|
||||
device: str = None,
|
||||
jit_trace: bool = False,
|
||||
from_aot: bool = False,
|
||||
frontend: str = "torch",
|
||||
):
|
||||
self.device = device if device is not None else shark_args.device
|
||||
if self.device == "gpu":
|
||||
raise ValueError(
|
||||
"Currently GPU Benchmarking is not supported due to OOM from ORT."
|
||||
)
|
||||
self.model_name = model_name
|
||||
model = HuggingFaceLanguage(model_name)
|
||||
SharkBenchmarkRunner.__init__(self, model, input, dynamic, self.device,
|
||||
jit_trace, from_aot, frontend)
|
||||
|
||||
def benchmark_torch(self, inputs):
|
||||
use_gpu = self.device == "gpu"
|
||||
# Set set the model's layer number to automatic.
|
||||
config_modifier = ConfigModifier(None)
|
||||
num_threads = psutil.cpu_count(logical=False)
|
||||
batch_sizes = [inputs.shape[0]]
|
||||
sequence_lengths = [inputs.shape[-1]]
|
||||
cache_dir = os.path.join(".", "cache_models")
|
||||
verbose = False
|
||||
result = run_pytorch(use_gpu, [self.model_name], None, config_modifier,
|
||||
Precision.FLOAT32, num_threads, batch_sizes,
|
||||
sequence_lengths, shark_args.num_iterations, False,
|
||||
cache_dir, verbose)
|
||||
print(
|
||||
f"ONNX Pytorch-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
|
||||
# TODO: Currently non-functional due to TF runtime error. There might be some issue with, initializing TF.
|
||||
def benchmark_tf(self, inputs):
|
||||
use_gpu = self.device == "gpu"
|
||||
# Set set the model's layer number to automatic.
|
||||
config_modifier = ConfigModifier(None)
|
||||
num_threads = psutil.cpu_count(logical=False)
|
||||
batch_sizes = [inputs.shape[0]]
|
||||
sequence_lengths = [inputs.shape[-1]]
|
||||
cache_dir = os.path.join(".", "cache_models")
|
||||
verbose = False
|
||||
result = run_tensorflow(use_gpu, [self.model_name], None,
|
||||
config_modifier, Precision.FLOAT32, num_threads,
|
||||
batch_sizes, sequence_lengths,
|
||||
shark_args.num_iterations, cache_dir, verbose)
|
||||
print(
|
||||
f"ONNX TF-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
|
||||
def benchmark_onnx(self, inputs):
|
||||
if self.model_name not in MODELS:
|
||||
print(
|
||||
f"{self.model_name} is currently not supported in ORT's HF. Check \
|
||||
https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/huggingface_models.py \
|
||||
for currently supported models. Exiting benchmark ONNX.")
|
||||
return
|
||||
use_gpu = self.device == "gpu"
|
||||
num_threads = psutil.cpu_count(logical=False)
|
||||
batch_sizes = [inputs.shape[0]]
|
||||
sequence_lengths = [inputs.shape[-1]]
|
||||
cache_dir = os.path.join(".", "cache_models")
|
||||
onnx_dir = os.path.join(".", "onnx_models")
|
||||
verbose = False
|
||||
input_counts = [1]
|
||||
optimize_onnx = True
|
||||
validate_onnx = False
|
||||
disable_ort_io_binding = False
|
||||
use_raw_attention_mask = True
|
||||
model_fusion_statistics = {}
|
||||
overwrite = False
|
||||
model_source = "pt" #Either "pt" or "tf"
|
||||
provider = None
|
||||
config_modifier = ConfigModifier(None)
|
||||
onnx_args = OnnxFusionOptions()
|
||||
result = run_onnxruntime(
|
||||
use_gpu, provider, [self.model_name], None, config_modifier,
|
||||
Precision.FLOAT32, num_threads, batch_sizes, sequence_lengths,
|
||||
shark_args.num_iterations, input_counts, optimize_onnx,
|
||||
validate_onnx, cache_dir, onnx_dir, verbose, overwrite,
|
||||
disable_ort_io_binding, use_raw_attention_mask,
|
||||
model_fusion_statistics, model_source, onnx_args)
|
||||
print(
|
||||
f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
@@ -1,210 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils import check_device_drivers
|
||||
|
||||
import torch
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import torchvision.models as models
|
||||
from transformers import AutoModelForSequenceClassification, BertTokenizer, TFBertModel
|
||||
import importlib
|
||||
import pytest
|
||||
import unittest
|
||||
|
||||
torch.manual_seed(0)
|
||||
gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||
for gpu in gpus:
|
||||
tf.config.experimental.set_memory_growth(gpu, True)
|
||||
|
||||
##################### Tensorflow Hugging Face LM Models ###################################
|
||||
MAX_SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
|
||||
# Create a set of 2-dimensional inputs
|
||||
tf_bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
|
||||
class TFHuggingFaceLanguage(tf.Module):
|
||||
|
||||
def __init__(self, hf_model_name):
|
||||
super(TFHuggingFaceLanguage, self).__init__()
|
||||
# Create a BERT trainer with the created network.
|
||||
self.m = TFBertModel.from_pretrained(hf_model_name, from_pt=True)
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m.predict = lambda x, y, z: self.m.call(
|
||||
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
|
||||
|
||||
@tf.function(input_signature=tf_bert_input)
|
||||
def forward(self, input_ids, attention_mask, token_type_ids):
|
||||
return self.m.predict(input_ids, attention_mask, token_type_ids)
|
||||
|
||||
|
||||
def get_TFhf_model(name):
|
||||
model = TFHuggingFaceLanguage(name)
|
||||
tokenizer = BertTokenizer.from_pretrained(name)
|
||||
text = "Replace me by any text you'd like."
|
||||
encoded_input = tokenizer(text,
|
||||
padding='max_length',
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH)
|
||||
for key in encoded_input:
|
||||
encoded_input[key] = tf.expand_dims(
|
||||
tf.convert_to_tensor(encoded_input[key]), 0)
|
||||
test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"])
|
||||
actual_out = model.forward(*test_input)
|
||||
return model, test_input, actual_out
|
||||
|
||||
|
||||
##################### Hugging Face LM Models ###################################
|
||||
|
||||
|
||||
class HuggingFaceLanguage(torch.nn.Module):
|
||||
|
||||
def __init__(self, hf_model_name):
|
||||
super().__init__()
|
||||
self.model = AutoModelForSequenceClassification.from_pretrained(
|
||||
hf_model_name, # The pretrained model.
|
||||
num_labels=
|
||||
2, # The number of output labels--2 for binary classification.
|
||||
output_attentions=
|
||||
False, # Whether the model returns attentions weights.
|
||||
output_hidden_states=
|
||||
False, # Whether the model returns all hidden-states.
|
||||
torchscript=True,
|
||||
)
|
||||
|
||||
def forward(self, tokens):
|
||||
return self.model.forward(tokens)[0]
|
||||
|
||||
|
||||
def get_hf_model(name):
|
||||
model = HuggingFaceLanguage(name)
|
||||
# TODO: Currently the test input is set to (1,128)
|
||||
test_input = torch.randint(2, (1, 128))
|
||||
actual_out = model(test_input)
|
||||
return model, test_input, actual_out
|
||||
|
||||
|
||||
################################################################################
|
||||
|
||||
##################### Torch Vision Models ###################################
|
||||
|
||||
|
||||
class VisionModule(torch.nn.Module):
|
||||
|
||||
def __init__(self, model):
|
||||
super().__init__()
|
||||
self.model = model
|
||||
self.train(False)
|
||||
|
||||
def forward(self, input):
|
||||
return self.model.forward(input)
|
||||
|
||||
|
||||
def get_vision_model(torch_model):
|
||||
model = VisionModule(torch_model)
|
||||
# TODO: Currently the test input is set to (1,128)
|
||||
test_input = torch.randn(1, 3, 224, 224)
|
||||
actual_out = model(test_input)
|
||||
return model, test_input, actual_out
|
||||
|
||||
|
||||
############################# Benchmark Tests ####################################
|
||||
|
||||
pytest_benchmark_param = pytest.mark.parametrize(
|
||||
('dynamic', 'device'),
|
||||
[
|
||||
pytest.param(False, 'cpu'),
|
||||
# TODO: Language models are failing for dynamic case..
|
||||
pytest.param(True, 'cpu', marks=pytest.mark.skip),
|
||||
pytest.param(False,
|
||||
'gpu',
|
||||
marks=pytest.mark.skipif(check_device_drivers("gpu"),
|
||||
reason="nvidia-smi not found")),
|
||||
pytest.param(True,
|
||||
'gpu',
|
||||
marks=pytest.mark.skip),
|
||||
pytest.param(
|
||||
False,
|
||||
'vulkan',
|
||||
marks=pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)),
|
||||
pytest.param(
|
||||
True,
|
||||
'vulkan',
|
||||
marks=pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)),
|
||||
])
|
||||
|
||||
|
||||
@pytest.mark.skipif(importlib.util.find_spec("iree.tools") is None,
|
||||
reason="Cannot find tools to import TF")
|
||||
@pytest_benchmark_param
|
||||
def test_bench_minilm_torch(dynamic, device):
|
||||
model, test_input, act_out = get_hf_model(
|
||||
"microsoft/MiniLM-L12-H384-uncased")
|
||||
shark_module = SharkInference(model, (test_input,),
|
||||
device=device,
|
||||
dynamic=dynamic,
|
||||
jit_trace=True,
|
||||
benchmark_mode=True)
|
||||
try:
|
||||
# If becnhmarking succesful, assert success/True.
|
||||
shark_module.compile()
|
||||
shark_module.benchmark_all((test_input,))
|
||||
assert True
|
||||
except Exception as e:
|
||||
# If anything happen during benchmarking, assert False/failure.
|
||||
assert False
|
||||
|
||||
|
||||
@pytest.mark.skipif(importlib.util.find_spec("iree.tools") is None,
|
||||
reason="Cannot find tools to import TF")
|
||||
@pytest_benchmark_param
|
||||
def test_bench_distilbert(dynamic, device):
|
||||
model, test_input, act_out = get_TFhf_model("distilbert-base-uncased")
|
||||
shark_module = SharkInference(model,
|
||||
test_input,
|
||||
device=device,
|
||||
dynamic=dynamic,
|
||||
jit_trace=True,
|
||||
benchmark_mode=True)
|
||||
try:
|
||||
# If becnhmarking succesful, assert success/True.
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
shark_module.benchmark_all(test_input)
|
||||
assert True
|
||||
except Exception as e:
|
||||
# If anything happen during benchmarking, assert False/failure.
|
||||
assert False
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="XLM Roberta too large to test.")
|
||||
@pytest_benchmark_param
|
||||
def test_bench_xlm_roberta(dynamic, device):
|
||||
model, test_input, act_out = get_TFhf_model("xlm-roberta-base")
|
||||
shark_module = SharkInference(model,
|
||||
test_input,
|
||||
device=device,
|
||||
dynamic=dynamic,
|
||||
jit_trace=True,
|
||||
benchmark_mode=True)
|
||||
try:
|
||||
# If becnhmarking succesful, assert success/True.
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
shark_module.benchmark_all(test_input)
|
||||
assert True
|
||||
except Exception as e:
|
||||
# If anything happen during benchmarking, assert False/failure.
|
||||
assert False
|
||||
@@ -1,39 +0,0 @@
|
||||
import torch
|
||||
from benchmarks.hf_transformer import SharkHFBenchmarkRunner
|
||||
import importlib
|
||||
import pytest
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
############################# HF Benchmark Tests ####################################
|
||||
|
||||
# Test running benchmark module without failing.
|
||||
pytest_benchmark_param = pytest.mark.parametrize(
|
||||
('dynamic', 'device'),
|
||||
[
|
||||
pytest.param(False, 'cpu'),
|
||||
# TODO: Language models are failing for dynamic case..
|
||||
pytest.param(True, 'cpu', marks=pytest.mark.skip),
|
||||
])
|
||||
|
||||
|
||||
@pytest.mark.skipif(importlib.util.find_spec("onnxruntime") is None,
|
||||
reason="Cannot find ONNXRUNTIME.")
|
||||
@pytest_benchmark_param
|
||||
def test_HFbench_minilm_torch(dynamic, device):
|
||||
model_name = "bert-base-uncased"
|
||||
test_input = torch.randint(2, (1, 128))
|
||||
try:
|
||||
shark_module = SharkHFBenchmarkRunner(model_name, (test_input,),
|
||||
jit_trace=True,
|
||||
dynamic=dynamic,
|
||||
device=device)
|
||||
shark_module.benchmark_c()
|
||||
shark_module.benchmark_python((test_input,))
|
||||
shark_module.benchmark_torch(test_input)
|
||||
shark_module.benchmark_onnx(test_input)
|
||||
# If becnhmarking succesful, assert success/True.
|
||||
assert True
|
||||
except Exception as e:
|
||||
# If anything happen during benchmarking, assert False/failure.
|
||||
assert False
|
||||
@@ -1,192 +0,0 @@
|
||||
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
cmake_minimum_required(VERSION 3.17)
|
||||
|
||||
project(sharkbackend LANGUAGES C CXX)
|
||||
|
||||
#
|
||||
# Options
|
||||
#
|
||||
|
||||
option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
|
||||
option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
|
||||
|
||||
set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
|
||||
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
|
||||
set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
|
||||
#
|
||||
# Dependencies
|
||||
#
|
||||
# FetchContent requires us to include the transitive closure of all
|
||||
# repos that we depend on so that we can override the tags.
|
||||
#
|
||||
include(FetchContent)
|
||||
|
||||
FetchContent_Declare(
|
||||
repo-common
|
||||
GIT_REPOSITORY https://github.com/triton-inference-server/common.git
|
||||
GIT_TAG ${TRITON_COMMON_REPO_TAG}
|
||||
GIT_SHALLOW ON
|
||||
)
|
||||
FetchContent_Declare(
|
||||
repo-core
|
||||
GIT_REPOSITORY https://github.com/triton-inference-server/core.git
|
||||
GIT_TAG ${TRITON_CORE_REPO_TAG}
|
||||
GIT_SHALLOW ON
|
||||
)
|
||||
FetchContent_Declare(
|
||||
repo-backend
|
||||
GIT_REPOSITORY https://github.com/triton-inference-server/backend.git
|
||||
GIT_TAG ${TRITON_BACKEND_REPO_TAG}
|
||||
GIT_SHALLOW ON
|
||||
)
|
||||
FetchContent_MakeAvailable(repo-common repo-core repo-backend)
|
||||
|
||||
#
|
||||
# The backend must be built into a shared library. Use an ldscript to
|
||||
# hide all symbols except for the TRITONBACKEND API.
|
||||
#
|
||||
configure_file(src/libtriton_dshark.ldscript libtriton_dshark.ldscript COPYONLY)
|
||||
|
||||
add_library(
|
||||
triton-dshark-backend SHARED
|
||||
src/dshark.cc
|
||||
#src/dshark_driver_module.c
|
||||
)
|
||||
|
||||
add_library(
|
||||
SharkBackend::triton-dshark-backend ALIAS triton-dshark-backend
|
||||
)
|
||||
|
||||
target_include_directories(
|
||||
triton-dshark-backend
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src
|
||||
)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_BINARY_DIR}/lib/cmake/mlir")
|
||||
|
||||
add_subdirectory(thirdparty/shark-runtime EXCLUDE_FROM_ALL)
|
||||
|
||||
target_link_libraries(triton-dshark-backend PRIVATE iree_base_base
|
||||
iree_hal_hal
|
||||
iree_hal_cuda_cuda
|
||||
iree_hal_cuda_registration_registration
|
||||
iree_hal_vmvx_registration_registration
|
||||
iree_hal_dylib_registration_registration
|
||||
iree_modules_hal_hal
|
||||
iree_vm_vm
|
||||
iree_vm_bytecode_module
|
||||
iree_hal_local_loaders_system_library_loader
|
||||
iree_hal_local_loaders_vmvx_module_loader
|
||||
)
|
||||
|
||||
target_compile_features(triton-dshark-backend PRIVATE cxx_std_11)
|
||||
|
||||
|
||||
target_link_libraries(
|
||||
triton-dshark-backend
|
||||
PRIVATE
|
||||
triton-core-serverapi # from repo-core
|
||||
triton-core-backendapi # from repo-core
|
||||
triton-core-serverstub # from repo-core
|
||||
triton-backend-utils # from repo-backend
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
set_target_properties(
|
||||
triton-dshark-backend PROPERTIES
|
||||
POSITION_INDEPENDENT_CODE ON
|
||||
OUTPUT_NAME triton_dshark
|
||||
)
|
||||
else()
|
||||
set_target_properties(
|
||||
triton-dshark-backend PROPERTIES
|
||||
POSITION_INDEPENDENT_CODE ON
|
||||
OUTPUT_NAME triton_dshark
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_dshark.ldscript
|
||||
LINK_FLAGS "-Wl,--version-script libtriton_dshark.ldscript"
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
#
|
||||
# Install
|
||||
#
|
||||
include(GNUInstallDirs)
|
||||
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/SharkBackend)
|
||||
|
||||
install(
|
||||
TARGETS
|
||||
triton-dshark-backend
|
||||
EXPORT
|
||||
triton-dshark-backend-targets
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/dshark
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/dshark
|
||||
)
|
||||
|
||||
install(
|
||||
EXPORT
|
||||
triton-dshark-backend-targets
|
||||
FILE
|
||||
SharkBackendTargets.cmake
|
||||
NAMESPACE
|
||||
SharkBackend::
|
||||
DESTINATION
|
||||
${INSTALL_CONFIGDIR}
|
||||
)
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
configure_package_config_file(
|
||||
${CMAKE_CURRENT_LIST_DIR}/cmake/SharkBackendConfig.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/SharkBackendConfig.cmake
|
||||
INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
|
||||
)
|
||||
|
||||
install(
|
||||
FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/SharkBackendConfig.cmake
|
||||
DESTINATION ${INSTALL_CONFIGDIR}
|
||||
)
|
||||
|
||||
#
|
||||
# Export from build tree
|
||||
#
|
||||
export(
|
||||
EXPORT triton-dshark-backend-targets
|
||||
FILE ${CMAKE_CURRENT_BINARY_DIR}/SharkBackendTargets.cmake
|
||||
NAMESPACE SharkBackend::
|
||||
)
|
||||
|
||||
export(PACKAGE SharkBackend)
|
||||
|
||||
@@ -1,100 +0,0 @@
|
||||
# SHARK Triton Backend
|
||||
|
||||
The triton backend for shark.
|
||||
|
||||
# Build
|
||||
|
||||
Install SHARK
|
||||
|
||||
```
|
||||
git clone https://github.com/nod-ai/SHARK.git
|
||||
# skip above step if dshark is already installed
|
||||
cd SHARK/inference
|
||||
```
|
||||
|
||||
install dependancies
|
||||
|
||||
```
|
||||
apt-get install patchelf rapidjson-dev python3-dev
|
||||
git submodule update --init
|
||||
```
|
||||
|
||||
update the submodules of iree
|
||||
|
||||
```
|
||||
cd thirdparty/shark-runtime
|
||||
git submodule update --init
|
||||
```
|
||||
|
||||
Next, make the backend and install it
|
||||
|
||||
```
|
||||
cd ../..
|
||||
mkdir build && cd build
|
||||
cmake -DTRITON_ENABLE_GPU=ON \
|
||||
-DIREE_HAL_DRIVER_CUDA=ON \
|
||||
-DIREE_TARGET_BACKEND_CUDA=ON \
|
||||
-DMLIR_ENABLE_CUDA_RUNNER=ON \
|
||||
-DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
|
||||
-DTRITON_BACKEND_REPO_TAG=r22.02 \
|
||||
-DTRITON_CORE_REPO_TAG=r22.02 \
|
||||
-DTRITON_COMMON_REPO_TAG=r22.02 ..
|
||||
make install
|
||||
```
|
||||
|
||||
# Incorporating into Triton
|
||||
|
||||
There are much more in depth explenations for the following steps in triton's documentation:
|
||||
https://github.com/triton-inference-server/server/blob/main/docs/compose.md#triton-with-unsupported-and-custom-backends
|
||||
|
||||
There should be a file at /build/install/backends/dshark/libtriton_dshark.so. You will need to copy it into your triton server image.
|
||||
More documentation is in the link above, but to create the docker image, you need to run the compose.py command in the triton-backend server repo
|
||||
|
||||
|
||||
To first build your image, clone the tritonserver repo.
|
||||
|
||||
```
|
||||
git clone https://github.com/triton-inference-server/server.git
|
||||
```
|
||||
|
||||
then run `compose.py` to build a docker compose file
|
||||
```
|
||||
cd server
|
||||
python3 compose.py --repoagent checksum --dry-run
|
||||
```
|
||||
|
||||
Because dshark is a third party backend, you will need to manually modify the `Dockerfile.compose` to include the dshark backend. To do this, in the Dockerfile.compose file produced, copy this line.
|
||||
the dshark backend will be located in the build folder from earlier under `/build/install/backends`
|
||||
|
||||
```
|
||||
COPY /path/to/build/install/backends/dshark /opt/tritonserver/backends/dshark
|
||||
```
|
||||
|
||||
Next run
|
||||
```
|
||||
docker build -t tritonserver_custom -f Dockerfile.compose .
|
||||
docker run -it --gpus=1 --net=host -v/path/to/model_repos:/models tritonserver_custom:latest tritonserver --model-repository=/models
|
||||
```
|
||||
|
||||
where `path/to/model_repos` is where you are storing the models you want to run
|
||||
|
||||
if your not using gpus, omit `--gpus=1`
|
||||
|
||||
```
|
||||
docker run -it --net=host -v/path/to/model_repos:/models tritonserver_custom:latest tritonserver --model-repository=/models
|
||||
```
|
||||
|
||||
# Setting up a model
|
||||
|
||||
to include a model in your backend, add a directory with your model name to your model repository directory. examples of models can be seen here: https://github.com/triton-inference-server/backend/tree/main/examples/model_repos/minimal_models
|
||||
|
||||
make sure to adjust the input correctly in the config.pbtxt file, and save a vmfb file under 1/model.vmfb
|
||||
|
||||
# CUDA
|
||||
|
||||
if you're having issues with cuda, make sure your correct drivers are installed, and that `nvidia-smi` works, and also make sure that the nvcc compiler is on the path.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
include(CMakeFindDependencyMacro)
|
||||
|
||||
get_filename_component(
|
||||
SHARKBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
|
||||
)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${SHARKBACKEND_CMAKE_DIR})
|
||||
|
||||
if(NOT TARGET SharkBackend::triton-dshark-backend)
|
||||
include("${SHARKBACKEND_CMAKE_DIR}/SharkBackendTargets.cmake")
|
||||
endif()
|
||||
|
||||
set(SHARKBACKEND_LIBRARIES SharkBackend::triton-dshark-backend)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,30 +0,0 @@
|
||||
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
{
|
||||
global:
|
||||
TRITONBACKEND_*;
|
||||
local: *;
|
||||
};
|
||||
1
inference/thirdparty/shark-runtime
vendored
1
inference/thirdparty/shark-runtime
vendored
Submodule inference/thirdparty/shark-runtime deleted from 7b82d90c72
45
package-index/index.html
Normal file
45
package-index/index.html
Normal file
@@ -0,0 +1,45 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230130.481/shark_sd_20230130_481.exe'>shark_sd_20230130_481.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230130.481/shark_sd_cli_20230130_481.exe'>shark_sd_cli_20230130_481.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.479/shark_sd_20230129_479.exe'>shark_sd_20230129_479.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.479/shark_sd_cli_20230129_479.exe'>shark_sd_cli_20230129_479.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.480/shark_sd_20230129_480.exe'>shark_sd_20230129_480.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.480/shark_sd_cli_20230129_480.exe'>shark_sd_cli_20230129_480.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.478/shark_sd_20230129_478.exe'>shark_sd_20230129_478.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230129.478/shark_sd_cli_20230129_478.exe'>shark_sd_cli_20230129_478.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230128.477/shark_sd_20230128_477.exe'>shark_sd_20230128_477.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230128.477/shark_sd_cli_20230128_477.exe'>shark_sd_cli_20230128_477.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230127.476/shark_sd_20230127_476.exe'>shark_sd_20230127_476.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230127.476/shark_sd_cli_20230127_476.exe'>shark_sd_cli_20230127_476.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230126.475/shark_sd_20230126_475.exe'>shark_sd_20230126_475.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230126.475/shark_sd_cli_20230126_475.exe'>shark_sd_cli_20230126_475.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.474/shark_sd_20230125_474.exe'>shark_sd_20230125_474.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.474/shark_sd_cli_20230125_474.exe'>shark_sd_cli_20230125_474.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.473/shark_sd_20230125_473.exe'>shark_sd_20230125_473.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.473/shark_sd_cli_20230125_473.exe'>shark_sd_cli_20230125_473.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.472/shark_sd_20230125_472.exe'>shark_sd_20230125_472.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.471/shark_sd_20230125_471.exe'>shark_sd_20230125_471.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230125.468/shark_sd_20230125_468.exe'>shark_sd_20230125_468.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.470/shark_sd_20230124_470.exe'>shark_sd_20230124_470.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.470/shark_sd_cli_20230124_470.exe'>shark_sd_cli_20230124_470.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.469/shark_sd_20230124_469.exe'>shark_sd_20230124_469.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.467/shark_sd_20230124_467.exe'>shark_sd_20230124_467.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.466/shark_sd_20230124_466.exe'>shark_sd_20230124_466.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230124.462/shark_sd_20230124_462.exe'>shark_sd_20230124_462.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230123.461/shark_sd_20230123_461.exe'>shark_sd_20230123_461.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230123.460/shark_sd_20230123_460.exe'>shark_sd_20230123_460.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230122.459/shark_sd_20230122_459.exe'>shark_sd_20230122_459.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230122.458/shark_sd_20230122_458.exe'>shark_sd_20230122_458.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230122.457/shark_sd_20230122_457.exe'>shark_sd_20230122_457.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230121.456/shark_sd_20230121_456.exe'>shark_sd_20230121_456.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230120.455/shark_sd_20230120_455.exe'>shark_sd_20230120_455.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230119.454/shark_sd_20230119_454.exe'>shark_sd_20230119_454.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230118.453/shark_sd_20230118_453.exe'>shark_sd_20230118_453.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230117.452/shark_sd_20230117_452.exe'>shark_sd_20230117_452.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230116.451/shark_sd_20230116_451.exe'>shark_sd_20230116_451.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230115.450/shark_sd_20230115_450.exe'>shark_sd_20230115_450.exe</a><br />
|
||||
<a href='https://github.com/nod-ai/SHARK/releases/download/20230114.449/shark_sd_20230114_449.exe'>shark_sd_20230114_449.exe</a><br />
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,12 +0,0 @@
|
||||
[build-system]
|
||||
requires = [
|
||||
"setuptools>=42",
|
||||
"wheel",
|
||||
"packaging",
|
||||
|
||||
"numpy==1.22.4",
|
||||
"torch-mlir>=20220428.420",
|
||||
"iree-compiler>=20220427.13",
|
||||
"iree-runtime>=20220427.13",
|
||||
]
|
||||
build-backend = "setuptools.build_meta"
|
||||
@@ -1,3 +0,0 @@
|
||||
[pytest]
|
||||
addopts = --verbose -p no:warnings
|
||||
norecursedirs = inference tank/tflite
|
||||
@@ -1,40 +0,0 @@
|
||||
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
|
||||
--pre
|
||||
|
||||
numpy
|
||||
torch
|
||||
torchvision
|
||||
|
||||
tqdm
|
||||
|
||||
#iree-compiler | iree-runtime should already be installed
|
||||
#these dont work ok osx
|
||||
#iree-tools-tflite
|
||||
#iree-tools-xla
|
||||
#iree-tools-tf
|
||||
|
||||
# TensorFlow and JAX.
|
||||
gin-config
|
||||
tensorflow-macos
|
||||
tensorflow-metal
|
||||
#tf-models-nightly
|
||||
#tensorflow-text-nightly
|
||||
transformers==4.18.0
|
||||
#jax[cpu]
|
||||
|
||||
# tflitehub dependencies.
|
||||
Pillow
|
||||
|
||||
# Testing and support.
|
||||
#lit
|
||||
#pyyaml
|
||||
|
||||
#ONNX and ORT for benchmarking
|
||||
#--extra-index-url https://test.pypi.org/simple/
|
||||
#protobuf
|
||||
#coloredlogs
|
||||
#flatbuffers
|
||||
#sympy
|
||||
#psutil
|
||||
#onnx-weekly
|
||||
#ort-nightly
|
||||
@@ -1,39 +0,0 @@
|
||||
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
|
||||
--pre
|
||||
|
||||
numpy==1.22.4
|
||||
torch
|
||||
torchvision
|
||||
|
||||
tqdm
|
||||
|
||||
#iree-compiler | iree-runtime should already be installed
|
||||
iree-tools-tflite
|
||||
iree-tools-xla
|
||||
iree-tools-tf
|
||||
|
||||
# TensorFlow and JAX.
|
||||
gin-config
|
||||
tensorflow
|
||||
tf-models-nightly
|
||||
tensorflow-text-nightly
|
||||
transformers==4.18.0
|
||||
#jax[cpu]
|
||||
|
||||
|
||||
# tflitehub dependencies.
|
||||
Pillow
|
||||
|
||||
# Testing and support.
|
||||
lit
|
||||
pyyaml
|
||||
|
||||
#ONNX and ORT for benchmarking
|
||||
--extra-index-url https://test.pypi.org/simple/
|
||||
protobuf
|
||||
coloredlogs
|
||||
flatbuffers
|
||||
sympy
|
||||
psutil
|
||||
onnx-weekly
|
||||
ort-nightly
|
||||
@@ -1,9 +0,0 @@
|
||||
setuptools
|
||||
wheel
|
||||
|
||||
#SHARK Runner
|
||||
tqdm
|
||||
|
||||
#Testing
|
||||
pytest
|
||||
pytest-xdist
|
||||
38
setup.py
38
setup.py
@@ -1,38 +0,0 @@
|
||||
from setuptools import find_packages
|
||||
from setuptools import setup
|
||||
|
||||
import os
|
||||
|
||||
with open("README.md", "r", encoding="utf-8") as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
PACKAGE_VERSION = os.environ.get("SHARK_PACKAGE_VERSION") or "0.0.4"
|
||||
|
||||
setup(
|
||||
name="nodai-SHARK",
|
||||
version=f"{PACKAGE_VERSION}",
|
||||
description="SHARK provides a High Performance Machine Learning Framework",
|
||||
author="nod.ai",
|
||||
author_email="stdin@nod.ai",
|
||||
url="https://nod.ai",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
project_urls={
|
||||
"Code": "https://github.com/nod-ai/SHARK",
|
||||
"Bug Tracker": "https://github.com/nod-ai/SHARK/issues",
|
||||
},
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
packages=find_packages(exclude=('examples')),
|
||||
python_requires=">=3.7",
|
||||
install_requires=[
|
||||
"numpy",
|
||||
"PyYAML",
|
||||
"torch-mlir>=20220428.420",
|
||||
"iree-compiler>=20220427.13",
|
||||
"iree-runtime>=20220427.13",
|
||||
],
|
||||
)
|
||||
115
setup_venv.sh
115
setup_venv.sh
@@ -1,115 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Sets up a venv suitable for running samples.
|
||||
# e.g:
|
||||
# ./setup_venv.sh #setup a default $PYTHON3 shark.venv
|
||||
# Environment Variables by the script.
|
||||
# PYTHON=$PYTHON3.10 ./setup_venv.sh #pass a version of $PYTHON to use
|
||||
# VENV_DIR=myshark.venv #create a venv called myshark.venv
|
||||
# USE_IREE=1 #use stock IREE instead of Nod.ai's SHARK build
|
||||
# IMPORTER=1 #Install importer deps
|
||||
# if you run the script from a conda env it will install in your conda env
|
||||
|
||||
TD="$(cd $(dirname $0) && pwd)"
|
||||
if [ -z "$PYTHON" ]; then
|
||||
PYTHON="$(which python3)"
|
||||
fi
|
||||
|
||||
function die() {
|
||||
echo "Error executing command: $*"
|
||||
exit 1
|
||||
}
|
||||
|
||||
PYTHON_VERSION_X_Y=`${PYTHON} -c 'import sys; version=sys.version_info[:2]; print("{0}.{1}".format(*version))'`
|
||||
|
||||
echo "Python: $PYTHON"
|
||||
echo "Python version: $PYTHON_VERSION_X_Y"
|
||||
|
||||
if [[ -z "${CONDA_PREFIX}" ]]; then
|
||||
# Not a conda env. So create a new VENV dir
|
||||
VENV_DIR=${VENV_DIR:-shark.venv}
|
||||
echo "Using pip venv.. Setting up venv dir: $VENV_DIR"
|
||||
$PYTHON -m venv "$VENV_DIR" || die "Could not create venv."
|
||||
source "$VENV_DIR/bin/activate" || die "Could not activate venv"
|
||||
PYTHON="$(which python3)"
|
||||
else
|
||||
echo "Found conda env $CONDA_DEFAULT_ENV. Running pip install inside the conda env"
|
||||
fi
|
||||
|
||||
Red=`tput setaf 1`
|
||||
Green=`tput setaf 2`
|
||||
Yellow=`tput setaf 3`
|
||||
|
||||
# Assume no binary torch-mlir.
|
||||
# Currently available for macOS m1&intel (3.10) and Linux(3.7,3.8,3.9,3.10)
|
||||
torch_mlir_bin=false
|
||||
if [[ $(uname -s) = 'Darwin' ]]; then
|
||||
echo "${Yellow}Apple macOS detected"
|
||||
if [[ $(uname -m) == 'arm64' ]]; then
|
||||
echo "${Yellow}Apple M1 Detected"
|
||||
hash rustc 2>/dev/null
|
||||
if [ $? -eq 0 ];then
|
||||
echo "${Green}rustc found to compile HF tokenizers"
|
||||
else
|
||||
echo "${Red}Could not find rustc" >&2
|
||||
echo "${Red}Please run:"
|
||||
echo "${Red}curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
echo "${Yellow}Run the following commands to setup your SSL certs for your Python version if you see SSL errors with tests"
|
||||
echo "${Yellow}/Applications/Python\ 3.XX/Install\ Certificates.command"
|
||||
if [ "$PYTHON_VERSION_X_Y" == "3.10" ]; then
|
||||
torch_mlir_bin=true
|
||||
fi
|
||||
elif [[ $(uname -s) = 'Linux' ]]; then
|
||||
echo "${Yellow}Linux detected"
|
||||
if [ "$PYTHON_VERSION_X_Y" == "3.7" ] || [ "$PYTHON_VERSION_X_Y" == "3.8" ] || [ "$PYTHON_VERSION_X_Y" == "3.9" ] || [ "$PYTHON_VERSION_X_Y" == "3.10" ] ; then
|
||||
torch_mlir_bin=true
|
||||
fi
|
||||
else
|
||||
echo "${Red}OS not detected. Pray and Play"
|
||||
fi
|
||||
|
||||
# Upgrade pip and install requirements.
|
||||
$PYTHON -m pip install --upgrade pip || die "Could not upgrade pip"
|
||||
$PYTHON -m pip install --upgrade -r "$TD/requirements.txt"
|
||||
if [ "$torch_mlir_bin" = true ]; then
|
||||
$PYTHON -m pip install --find-links https://github.com/llvm/torch-mlir/releases torch-mlir --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
||||
if [ $? -eq 0 ];then
|
||||
echo "Successfully Installed torch-mlir"
|
||||
else
|
||||
echo "Could not install torch-mlir" >&2
|
||||
fi
|
||||
else
|
||||
echo "${Red}No binaries found for Python $PYTHON_VERSION_X_Y on $(uname -s)"
|
||||
echo "${Yello}Python 3.10 supported on macOS and 3.7,3.8,3.9 and 3.10 on Linux"
|
||||
echo "${Red}Please build torch-mlir from source in your environment"
|
||||
exit 1
|
||||
fi
|
||||
if [[ -z "${USE_IREE}" ]]; then
|
||||
RUNTIME="nod-ai/SHARK-Runtime"
|
||||
else
|
||||
RUNTIME="google/iree"
|
||||
fi
|
||||
echo "Installing ${RUNTIME}..."
|
||||
$PYTHON -m pip install --find-links https://github.com/${RUNTIME}/releases iree-compiler iree-runtime
|
||||
|
||||
if [[ ! -z "${IMPORTER}" ]]; then
|
||||
echo "${Yellow}Installing importer tools.."
|
||||
if [[ $(uname -s) = 'Linux' ]]; then
|
||||
echo "${Yellow}Linux detected.. installing Linux importer tools"
|
||||
$PYTHON -m pip install --upgrade -r "$TD/requirements-importer.txt" -f https://github.com/${RUNTIME}/releases --extra-index-url https://test.pypi.org/simple/ --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
||||
elif [[ $(uname -s) = 'Darwin' ]]; then
|
||||
echo "${Yellow}macOS detected.. installing macOS importer tools"
|
||||
#Conda seems to have some problems installing these packages and hope they get resolved upstream.
|
||||
$PYTHON -m pip install --upgrade -r "$TD/requirements-importer-macos.txt" -f https://github.com/${RUNTIME}/releases --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
||||
fi
|
||||
fi
|
||||
|
||||
$PYTHON -m pip install -e . --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://github.com/llvm/torch-mlir/releases -f https://github.com/${RUNTIME}/releases
|
||||
|
||||
if [[ -z "${CONDA_PREFIX}" ]]; then
|
||||
echo "${Green}Before running examples activate venv with:"
|
||||
echo " ${Green}source $VENV_DIR/bin/activate"
|
||||
fi
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import torch
|
||||
from torch._decomp import get_decompositions
|
||||
from torch.fx.experimental.proxy_tensor import make_fx
|
||||
from torch.nn.utils import _stateless
|
||||
|
||||
from torch import fx
|
||||
import copy
|
||||
import tempfile
|
||||
|
||||
|
||||
class MakeFxModule:
|
||||
|
||||
def __init__(self, model, inputs, labels=None, custom_inference_fn=None):
|
||||
self.model = model
|
||||
self.inputs = inputs
|
||||
self.custom_inference_fn = custom_inference_fn
|
||||
self.training_graph = None
|
||||
|
||||
# Doesn't replace the None type.
|
||||
def change_fx_graph_return_to_tuple(self, fx_g: fx.GraphModule):
|
||||
for node in fx_g.graph.nodes:
|
||||
if node.op == "output":
|
||||
# output nodes always have one argument
|
||||
node_arg = node.args[0]
|
||||
out_nodes = []
|
||||
if isinstance(node_arg, list):
|
||||
# Don't return NoneType elements.
|
||||
for out_node in node_arg:
|
||||
if not isinstance(out_node, type(None)):
|
||||
out_nodes.append(out_node)
|
||||
# If there is a single tensor/element to be returned don't
|
||||
# a tuple for it.
|
||||
if len(out_nodes) == 1:
|
||||
node.args = out_nodes
|
||||
else:
|
||||
node.args = (tuple(out_nodes),)
|
||||
fx_g.graph.lint()
|
||||
fx_g.recompile()
|
||||
return fx_g
|
||||
|
||||
def generate_graph(self):
|
||||
fx_g = make_fx(self.custom_inference_fn,
|
||||
decomposition_table=get_decompositions([
|
||||
torch.ops.aten.embedding_dense_backward,
|
||||
torch.ops.aten.native_layer_norm_backward,
|
||||
torch.ops.aten.slice_backward,
|
||||
torch.ops.aten.select_backward
|
||||
]))(dict(self.model.named_parameters()),
|
||||
dict(self.model.named_buffers()), self.inputs)
|
||||
fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
|
||||
fx_g.recompile()
|
||||
fx_g = self.change_fx_graph_return_to_tuple(fx_g)
|
||||
ts_g = torch.jit.script(fx_g)
|
||||
temp = tempfile.NamedTemporaryFile(suffix='_shark_ts',
|
||||
prefix='temp_ts_')
|
||||
ts_g.save(temp.name)
|
||||
new_ts = torch.jit.load(temp.name)
|
||||
self.training_graph = new_ts
|
||||
@@ -1,78 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
import ctypes
|
||||
|
||||
#Some constants taken from cuda.h
|
||||
CUDA_SUCCESS = 0
|
||||
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16
|
||||
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39
|
||||
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13
|
||||
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36
|
||||
|
||||
|
||||
def get_cuda_sm_cc():
|
||||
libnames = ('libcuda.so', 'libcuda.dylib', 'cuda.dll')
|
||||
for libname in libnames:
|
||||
try:
|
||||
cuda = ctypes.CDLL(libname)
|
||||
except OSError:
|
||||
continue
|
||||
else:
|
||||
break
|
||||
else:
|
||||
raise OSError("could not load any of: " + ' '.join(libnames))
|
||||
|
||||
nGpus = ctypes.c_int()
|
||||
name = b' ' * 100
|
||||
cc_major = ctypes.c_int()
|
||||
cc_minor = ctypes.c_int()
|
||||
|
||||
result = ctypes.c_int()
|
||||
device = ctypes.c_int()
|
||||
context = ctypes.c_void_p()
|
||||
error_str = ctypes.c_char_p()
|
||||
|
||||
result = cuda.cuInit(0)
|
||||
if result != CUDA_SUCCESS:
|
||||
cuda.cuGetErrorString(result, ctypes.byref(error_str))
|
||||
print("cuInit failed with error code %d: %s" %
|
||||
(result, error_str.value.decode()))
|
||||
return 1
|
||||
result = cuda.cuDeviceGetCount(ctypes.byref(nGpus))
|
||||
if result != CUDA_SUCCESS:
|
||||
cuda.cuGetErrorString(result, ctypes.byref(error_str))
|
||||
print("cuDeviceGetCount failed with error code %d: %s" %
|
||||
(result, error_str.value.decode()))
|
||||
return 1
|
||||
print("Found %d device(s)." % nGpus.value)
|
||||
for i in range(nGpus.value):
|
||||
result = cuda.cuDeviceGet(ctypes.byref(device), i)
|
||||
if result != CUDA_SUCCESS:
|
||||
cuda.cuGetErrorString(result, ctypes.byref(error_str))
|
||||
print("cuDeviceGet failed with error code %d: %s" %
|
||||
(result, error_str.value.decode()))
|
||||
return 1
|
||||
print("Device: %d" % i)
|
||||
if cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name),
|
||||
device) == CUDA_SUCCESS:
|
||||
print(" Name: %s" % (name.split(b'\0', 1)[0].decode()))
|
||||
if cuda.cuDeviceComputeCapability(ctypes.byref(cc_major),
|
||||
ctypes.byref(cc_minor),
|
||||
device) == CUDA_SUCCESS:
|
||||
print(" Compute Capability: %d.%d" %
|
||||
(cc_major.value, cc_minor.value))
|
||||
sm = f"sm_{cc_major.value}{cc_minor.value}"
|
||||
return sm
|
||||
@@ -1,300 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/mlevental/miniconda3/envs/torch-mlir/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# standard imports\n",
|
||||
"import torch\n",
|
||||
"from shark.iree_utils import get_iree_compiled_module"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# torch dynamo related imports\n",
|
||||
"try:\n",
|
||||
" import torchdynamo\n",
|
||||
" from torchdynamo.optimizations.backends import create_backend\n",
|
||||
" from torchdynamo.optimizations.subgraph import SubGraph\n",
|
||||
"except ModuleNotFoundError:\n",
|
||||
" print(\"Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo\")\n",
|
||||
" exit()\n",
|
||||
"\n",
|
||||
"# torch-mlir imports for compiling\n",
|
||||
"from torch_mlir import compile, OutputType"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"[TorchDynamo](https://github.com/pytorch/torchdynamo) is a compiler for PyTorch programs that uses the [frame evaluation API](https://www.python.org/dev/peps/pep-0523/) in CPython to dynamically modify Python bytecode right before it is executed. It creates this FX Graph through bytecode analysis and is designed to mix Python execution with compiled backends."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def toy_example(*args):\n",
|
||||
" a, b = args\n",
|
||||
"\n",
|
||||
" x = a / (torch.abs(a) + 1)\n",
|
||||
" if b.sum() < 0:\n",
|
||||
" b = b * -1\n",
|
||||
" return x * b"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# compiler that lowers fx_graph to through MLIR\n",
|
||||
"def __torch_mlir(fx_graph, *args, **kwargs):\n",
|
||||
" assert isinstance(\n",
|
||||
" fx_graph, torch.fx.GraphModule\n",
|
||||
" ), \"Model must be an FX GraphModule.\"\n",
|
||||
"\n",
|
||||
" def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule):\n",
|
||||
" \"\"\"Replace tuple with tuple element in functions that return one-element tuples.\"\"\"\n",
|
||||
"\n",
|
||||
" for node in fx_g.graph.nodes:\n",
|
||||
" if node.op == \"output\":\n",
|
||||
" assert len(node.args) == 1, \"Output node must have a single argument\"\n",
|
||||
" node_arg = node.args[0]\n",
|
||||
" if isinstance(node_arg, tuple) and len(node_arg) == 1:\n",
|
||||
" node.args = (node_arg[0],)\n",
|
||||
" fx_g.graph.lint()\n",
|
||||
" fx_g.recompile()\n",
|
||||
" return fx_g\n",
|
||||
"\n",
|
||||
" fx_graph = _unwrap_single_tuple_return(fx_graph)\n",
|
||||
" ts_graph = torch.jit.script(fx_graph)\n",
|
||||
"\n",
|
||||
" # torchdynamo does munges the args differently depending on whether you use\n",
|
||||
" # the @torchdynamo.optimize decorator or the context manager\n",
|
||||
" if isinstance(args, tuple):\n",
|
||||
" args = list(args)\n",
|
||||
" assert isinstance(args, list)\n",
|
||||
" if len(args) == 1 and isinstance(args[0], list):\n",
|
||||
" args = args[0]\n",
|
||||
"\n",
|
||||
" linalg_module = compile(ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS)\n",
|
||||
" callable, _ = get_iree_compiled_module(linalg_module, \"cuda\", func_name=\"forward\")\n",
|
||||
"\n",
|
||||
" def forward(*inputs):\n",
|
||||
" return callable(*inputs)\n",
|
||||
"\n",
|
||||
" return forward"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Simplest way to use TorchDynamo with the `torchdynamo.optimize` context manager:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found 1 device(s).\n",
|
||||
"Device: 0\n",
|
||||
" Name: NVIDIA GeForce RTX 3080\n",
|
||||
" Compute Capability: 8.6\n",
|
||||
"[-0.40066046 -0.4210303 0.03225489 -0.44849953 0.10370405 -0.04422468\n",
|
||||
" 0.33262825 -0.20109026 0.02102537 -0.24882983]\n",
|
||||
"[-0.07824923 -0.17004533 0.06439921 -0.06163602 0.26633525 -1.1560082\n",
|
||||
" -0.06660341 0.24227881 0.1462235 -0.32055548]\n",
|
||||
"[-0.01464001 0.442209 -0.0607936 -0.5477967 -0.25226554 -0.08588809\n",
|
||||
" -0.30497575 0.00061084 -0.50069696 0.2317973 ]\n",
|
||||
"[ 0.25726247 0.39388427 -0.24093066 0.12316308 -0.01981307 0.5661146\n",
|
||||
" 0.26199922 0.8123446 -0.01576749 0.30846444]\n",
|
||||
"[ 0.7878203 -0.45975062 -0.29956317 -0.07032048 -0.55817443 -0.62506855\n",
|
||||
" -1.6837492 -0.38442805 0.28220773 -1.5325156 ]\n",
|
||||
"[ 0.07975311 0.67754704 -0.30927914 0.00347631 -0.07326564 0.01893554\n",
|
||||
" -0.7518105 -0.03078967 -0.07623022 0.38865626]\n",
|
||||
"[-0.7751679 -0.5841397 -0.6622711 0.18574935 -0.6049372 0.02844244\n",
|
||||
" -0.20471913 0.3337415 -0.3619432 -0.35087156]\n",
|
||||
"[-0.08569919 -0.10775139 -0.02338934 0.21933547 -0.46712473 0.00062137\n",
|
||||
" -0.58207744 0.06457533 0.18276742 0.03866556]\n",
|
||||
"[-0.2311981 -0.43036282 0.20561649 -0.10363232 -0.13248594 0.02885137\n",
|
||||
" -0.31241602 -0.36907142 0.08861586 0.2331427 ]\n",
|
||||
"[-0.07273526 -0.31246194 -0.24218291 -0.24145737 0.0364486 0.14382267\n",
|
||||
" -0.00531162 0.15447603 -0.5220248 -0.09016377]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"with torchdynamo.optimize(__torch_mlir):\n",
|
||||
" for _ in range(10):\n",
|
||||
" print(toy_example(torch.randn(10), torch.randn(10)))"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"It can also be used through a decorator:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@create_backend\n",
|
||||
"def torch_mlir(subgraph, *args, **kwargs):\n",
|
||||
" assert isinstance(subgraph, SubGraph), \"Model must be a dynamo SubGraph.\"\n",
|
||||
" return __torch_mlir(subgraph.model, *list(subgraph.example_inputs))\n",
|
||||
"\n",
|
||||
"@torchdynamo.optimize(\"torch_mlir\")\n",
|
||||
"def toy_example2(*args):\n",
|
||||
" a, b = args\n",
|
||||
"\n",
|
||||
" x = a / (torch.abs(a) + 1)\n",
|
||||
" if b.sum() < 0:\n",
|
||||
" b = b * -1\n",
|
||||
" return x * b"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found 1 device(s).\n",
|
||||
"Device: 0\n",
|
||||
" Name: NVIDIA GeForce RTX 3080\n",
|
||||
" Compute Capability: 8.6\n",
|
||||
"[-0.35494277 0.03409214 -0.02271946 0.7335942 0.03122527 -0.41881397\n",
|
||||
" -0.6609761 -0.6418614 0.29336175 -0.01973678]\n",
|
||||
"[-2.7246824e-01 -3.5543957e-01 6.0087401e-01 -7.4570496e-03\n",
|
||||
" -4.2481605e-02 -5.0296803e-04 7.2928613e-01 -1.4673788e-03\n",
|
||||
" -2.7621329e-01 -6.0995776e-02]\n",
|
||||
"[-0.03165906 0.3889693 0.24052973 0.27279532 -0.02773128 -0.12602475\n",
|
||||
" -1.0124422 0.5720256 -0.35437614 -0.20992722]\n",
|
||||
"[-0.41831446 0.5525326 -0.29749998 -0.17044766 0.11804754 -0.05210691\n",
|
||||
" -0.46145165 -0.8776549 0.10090438 0.17463352]\n",
|
||||
"[ 0.02194221 0.20959911 0.26973712 0.12551276 -0.0020404 0.1490246\n",
|
||||
" -0.04456685 1.1100804 0.8105744 0.6676846 ]\n",
|
||||
"[ 0.06528181 -0.13591261 0.5370964 -0.4398162 -0.03372452 0.9691372\n",
|
||||
" -0.01120087 0.2947028 0.4804801 -0.3324341 ]\n",
|
||||
"[ 0.33549032 -0.23001772 -0.08681437 0.16490957 -0.11223086 0.09168988\n",
|
||||
" 0.02403045 0.17344482 0.46406478 -0.00129451]\n",
|
||||
"[-0.27475086 0.42384806 1.9090122 -0.41147137 -0.6888369 0.08435658\n",
|
||||
" -0.26628923 -0.17436793 -0.8058869 -0.02582378]\n",
|
||||
"[-0.10109414 0.08681287 -0.10055986 0.6858881 0.29267687 -0.02797117\n",
|
||||
" -0.01425194 0.4882803 0.3551982 -0.858935 ]\n",
|
||||
"[-0.22086617 0.524994 0.17721705 -0.03813264 -0.54570735 -0.4421502\n",
|
||||
" 0.11938014 -0.01122053 0.39294165 -0.61770755]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for _ in range(10):\n",
|
||||
" print(toy_example2(torch.randn(10), torch.randn(10)))"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
@@ -1,84 +0,0 @@
|
||||
import torch
|
||||
from torch_mlir import compile, OutputType
|
||||
|
||||
from shark.iree_utils import get_iree_compiled_module
|
||||
|
||||
try:
|
||||
import torchdynamo
|
||||
from torchdynamo.optimizations.backends import create_backend
|
||||
from torchdynamo.optimizations.subgraph import SubGraph
|
||||
except ModuleNotFoundError:
|
||||
print("Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo")
|
||||
exit()
|
||||
|
||||
NUM_ITERS = 10
|
||||
|
||||
|
||||
def __torch_mlir(fx_graph, *args, **kwargs):
|
||||
assert isinstance(
|
||||
fx_graph, torch.fx.GraphModule
|
||||
), "Model must be an FX GraphModule."
|
||||
|
||||
def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule):
|
||||
"""Replace tuple with tuple element in functions that return one-element tuples."""
|
||||
|
||||
for node in fx_g.graph.nodes:
|
||||
if node.op == "output":
|
||||
assert len(node.args) == 1, "Output node must have a single argument"
|
||||
node_arg = node.args[0]
|
||||
if isinstance(node_arg, tuple) and len(node_arg) == 1:
|
||||
node.args = (node_arg[0],)
|
||||
fx_g.graph.lint()
|
||||
fx_g.recompile()
|
||||
return fx_g
|
||||
|
||||
fx_graph = _unwrap_single_tuple_return(fx_graph)
|
||||
ts_graph = torch.jit.script(fx_graph)
|
||||
|
||||
if isinstance(args, tuple):
|
||||
args = list(args)
|
||||
assert isinstance(args, list)
|
||||
if len(args) == 1 and isinstance(args[0], list):
|
||||
args = args[0]
|
||||
|
||||
linalg_module = compile(ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS)
|
||||
callable, _ = get_iree_compiled_module(linalg_module, "cuda", func_name="forward")
|
||||
|
||||
def forward(*inputs):
|
||||
return callable(*inputs)
|
||||
|
||||
return forward
|
||||
|
||||
|
||||
def toy_example(*args):
|
||||
a, b = args
|
||||
|
||||
x = a / (torch.abs(a) + 1)
|
||||
if b.sum() < 0:
|
||||
b = b * -1
|
||||
return x * b
|
||||
|
||||
|
||||
with torchdynamo.optimize(__torch_mlir):
|
||||
for _ in range(10):
|
||||
print(toy_example(torch.randn(10), torch.randn(10)))
|
||||
|
||||
|
||||
@create_backend
|
||||
def torch_mlir(subgraph, *args, **kwargs):
|
||||
assert isinstance(subgraph, SubGraph), "Model must be a dynamo SubGraph."
|
||||
return __torch_mlir(subgraph.model, *list(subgraph.example_inputs))
|
||||
|
||||
|
||||
@torchdynamo.optimize("torch_mlir")
|
||||
def toy_example2(*args):
|
||||
a, b = args
|
||||
|
||||
x = a / (torch.abs(a) + 1)
|
||||
if b.sum() < 0:
|
||||
b = b * -1
|
||||
return x * b
|
||||
|
||||
|
||||
for _ in range(10):
|
||||
print(toy_example2(torch.randn(10), torch.randn(10)))
|
||||
@@ -1,805 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/mlevental/miniconda3/envs/torch-mlir/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# standard imports\n",
|
||||
"import torch\n",
|
||||
"from torch_mlir.eager_mode import torch_mlir_tensor"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# eager mode imports\n",
|
||||
"from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor\n",
|
||||
"from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"The simplest way of using Eager Mode (through IREE) requires setting a \"backend\":"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(\"cpu\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"and wrapping all your `torch.Tensor`s:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
|
||||
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"NUM_ITERS = 10\n",
|
||||
"\n",
|
||||
"t = torch.ones((10, 10))\n",
|
||||
"u = 2 * torch.ones((10, 10))\n",
|
||||
"\n",
|
||||
"tt = TorchMLIRTensor(t)\n",
|
||||
"print(tt)\n",
|
||||
"uu = TorchMLIRTensor(u)\n",
|
||||
"print(uu)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"`TorchMLIRTensor` is a \"tensor wrapper subclass\" (more info [here](https://github.com/albanD/subclass_zoo)) that keeps the IREE `DeviceArray` in a field `elem`:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i in range(NUM_ITERS):\n",
|
||||
" yy = tt + uu\n",
|
||||
" print(type(yy))\n",
|
||||
" print(yy.elem.to_host())\n",
|
||||
" yy = tt * uu\n",
|
||||
" print(type(yy))\n",
|
||||
" print(yy.elem.to_host())"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"If you have a GPU (and CUDA installed) that works too (you can verify by having `watch -n1 nvidia-smi` up in a terminal while running the next cell):"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
|
||||
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
|
||||
"[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
|
||||
" [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(\"gpu\")\n",
|
||||
"\n",
|
||||
"t = torch.ones((10, 10))\n",
|
||||
"u = 2 * torch.ones((10, 10))\n",
|
||||
"\n",
|
||||
"tt = TorchMLIRTensor(t)\n",
|
||||
"print(tt)\n",
|
||||
"uu = TorchMLIRTensor(u)\n",
|
||||
"print(uu)\n",
|
||||
"\n",
|
||||
"yy = tt + uu\n",
|
||||
"print(yy.elem.to_host())\n",
|
||||
"yy = tt * uu\n",
|
||||
"print(yy.elem.to_host())"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"There is a convenience class `SharkEagerMode` that will handle both the installation of the backend and the wrapping of `torch.Tensor`s:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
|
||||
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# eager mode RAII\n",
|
||||
"from shark.shark_runner import SharkEagerMode\n",
|
||||
"\n",
|
||||
"shark_eager_mode = SharkEagerMode(\"cpu\")\n",
|
||||
"\n",
|
||||
"t = torch.ones((10, 10))\n",
|
||||
"u = torch.ones((10, 10))\n",
|
||||
"\n",
|
||||
"print(t)\n",
|
||||
"print(u)\n",
|
||||
"\n",
|
||||
"for i in range(NUM_ITERS):\n",
|
||||
" yy = t + u\n",
|
||||
" print(type(yy))\n",
|
||||
" print(yy.elem.to_host())\n",
|
||||
" yy = t * u\n",
|
||||
" print(type(yy))\n",
|
||||
" print(yy.elem.to_host())"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"The `SharkEagerMode` class is a hacky take on [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization) that defines a \"deleter\" that runs when an instantiation (of `SharkEagerMode`) is garbage collected. Takeaway is that if you want to turn off `SharkEagerMode`, or switch backends, you need to `del` the instance:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
|
||||
"TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
|
||||
" [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
|
||||
"<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
|
||||
"[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
|
||||
" [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"del shark_eager_mode\n",
|
||||
"shark_eager_mode = SharkEagerMode(\"cuda\")\n",
|
||||
"\n",
|
||||
"t = torch.ones((10, 10))\n",
|
||||
"u = torch.ones((10, 10))\n",
|
||||
"\n",
|
||||
"print(t)\n",
|
||||
"print(u)\n",
|
||||
"\n",
|
||||
"yy = t + u\n",
|
||||
"print(type(yy))\n",
|
||||
"print(yy.elem.to_host())\n",
|
||||
"yy = t * u\n",
|
||||
"print(type(yy))\n",
|
||||
"print(yy.elem.to_host())"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
@@ -1,148 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import torch
|
||||
from torch.utils.cpp_extension import load_inline, include_paths
|
||||
from torch_mlir.eager_mode import torch_mlir_tensor
|
||||
from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
|
||||
|
||||
from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
|
||||
from shark.shark_runner import SharkEagerMode
|
||||
|
||||
|
||||
def test_cpu():
|
||||
torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend("cpu")
|
||||
|
||||
t = torch.ones((10, 10), device="cpu")
|
||||
u = 2 * torch.ones((10, 10), device="cpu")
|
||||
|
||||
tt = TorchMLIRTensor(t)
|
||||
print(tt)
|
||||
uu = TorchMLIRTensor(u)
|
||||
print(uu)
|
||||
|
||||
for i in range(NUM_ITERS):
|
||||
yy = tt + uu
|
||||
print(type(yy))
|
||||
print(yy.elem.to_host())
|
||||
yy = tt * uu
|
||||
print(type(yy))
|
||||
print(yy.elem.to_host())
|
||||
|
||||
|
||||
def test_gpu():
|
||||
source = """
|
||||
#include <iostream>
|
||||
#include "cuda.h"
|
||||
#include "cuda_runtime_api.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void print_free_mem() {
|
||||
int num_gpus;
|
||||
size_t free, total;
|
||||
cudaSetDevice(0);
|
||||
int id;
|
||||
cudaGetDevice(&id);
|
||||
cudaMemGetInfo(&free, &total);
|
||||
cout << "GPU " << id << " memory: used=" << (total-free)/(1<<20) << endl;
|
||||
}
|
||||
"""
|
||||
gpu_stats = load_inline(
|
||||
name="inline_extension",
|
||||
cpp_sources=[source],
|
||||
extra_include_paths=include_paths(cuda=True),
|
||||
functions=["print_free_mem"],
|
||||
)
|
||||
torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend("gpu")
|
||||
|
||||
t = torch.ones((10, 10), device="cpu")
|
||||
u = 2 * torch.ones((10, 10), device="cpu")
|
||||
|
||||
tt = TorchMLIRTensor(t)
|
||||
print(tt)
|
||||
uu = TorchMLIRTensor(u)
|
||||
print(uu)
|
||||
|
||||
for i in range(NUM_ITERS):
|
||||
yy = tt + uu
|
||||
print(yy.elem.to_host())
|
||||
yy = tt * uu
|
||||
print(yy.elem.to_host())
|
||||
gpu_stats.print_free_mem()
|
||||
|
||||
|
||||
def test_python_mode_ref_backend():
|
||||
# hide this wherever you want?
|
||||
_ = SharkEagerMode("refbackend")
|
||||
|
||||
t = torch.ones((10, 10), device="cpu")
|
||||
u = torch.ones((10, 10), device="cpu")
|
||||
|
||||
print(t)
|
||||
print(u)
|
||||
|
||||
for i in range(NUM_ITERS):
|
||||
print(i)
|
||||
yy = t + u
|
||||
print(yy.elem)
|
||||
yy = t * u
|
||||
print(yy.elem)
|
||||
|
||||
|
||||
def test_python_mode_iree_cpu():
|
||||
# hide this wherever you want?
|
||||
_ = SharkEagerMode("cpu")
|
||||
|
||||
t = torch.ones((10, 10), device="cpu")
|
||||
u = torch.ones((10, 10), device="cpu")
|
||||
|
||||
print(t)
|
||||
print(u)
|
||||
|
||||
for i in range(NUM_ITERS):
|
||||
yy = t + u
|
||||
print(type(yy))
|
||||
print(yy.elem.to_host())
|
||||
yy = t * u
|
||||
print(type(yy))
|
||||
print(yy.elem.to_host())
|
||||
|
||||
|
||||
def test_python_mode_iree_gpu():
|
||||
_ = SharkEagerMode("gpu")
|
||||
|
||||
t = torch.ones((10, 10), device="cpu")
|
||||
u = torch.ones((10, 10), device="cpu")
|
||||
|
||||
print(t)
|
||||
print(u)
|
||||
|
||||
for i in range(NUM_ITERS):
|
||||
yy = t + u
|
||||
print(type(yy))
|
||||
print(yy.elem.to_host())
|
||||
yy = t * u
|
||||
print(type(yy))
|
||||
print(yy.elem.to_host())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
NUM_ITERS = 10
|
||||
test_cpu()
|
||||
if torch.cuda.is_available():
|
||||
test_gpu()
|
||||
test_python_mode_ref_backend()
|
||||
test_python_mode_iree_cpu()
|
||||
test_python_mode_iree_gpu()
|
||||
@@ -1,51 +0,0 @@
|
||||
from PIL import Image
|
||||
import requests
|
||||
|
||||
from transformers import CLIPProcessor, TFCLIPModel
|
||||
import tensorflow as tf
|
||||
from shark.shark_inference import SharkInference
|
||||
|
||||
# Create a set of inputs
|
||||
clip_vit_inputs = [
|
||||
tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[1, 3, 224, 224], dtype=tf.float32)
|
||||
]
|
||||
|
||||
|
||||
class CLIPModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(CLIPModule, self).__init__()
|
||||
self.m = TFCLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
||||
|
||||
self.m.predict = lambda x, y, z: self.m(
|
||||
input_ids=x, attention_mask=y, pixel_values=z)
|
||||
|
||||
@tf.function(input_signature=clip_vit_inputs)
|
||||
def forward(self, input_ids, attention_mask, pixel_values):
|
||||
return self.m.predict(input_ids, attention_mask,
|
||||
pixel_values).logits_per_image
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Prepping Data
|
||||
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
||||
|
||||
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
||||
image = Image.open(requests.get(url, stream=True).raw)
|
||||
|
||||
inputs = processor(text=["a photo of a cat", "a photo of a dog"],
|
||||
images=image,
|
||||
return_tensors="tf",
|
||||
padding=True)
|
||||
|
||||
shark_module = SharkInference(
|
||||
CLIPModule(),
|
||||
(inputs["input_ids"], inputs["attention_mask"], inputs["pixel_values"]))
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
|
||||
print(
|
||||
shark_module.forward((inputs["input_ids"], inputs["attention_mask"],
|
||||
inputs["pixel_values"])))
|
||||
@@ -1,38 +0,0 @@
|
||||
from PIL import Image
|
||||
import requests
|
||||
|
||||
from transformers import GPT2Tokenizer, TFGPT2Model
|
||||
import tensorflow as tf
|
||||
from shark.shark_inference import SharkInference
|
||||
|
||||
# Create a set of inputs
|
||||
gpt2_inputs = [
|
||||
tf.TensorSpec(shape=[1, 8], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[1, 8], dtype=tf.int32),
|
||||
]
|
||||
|
||||
|
||||
class GPT2Module(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(GPT2Module, self).__init__()
|
||||
self.m = TFGPT2Model.from_pretrained("distilgpt2")
|
||||
|
||||
self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)
|
||||
|
||||
@tf.function(input_signature=gpt2_inputs)
|
||||
def forward(self, input_ids, attention_mask):
|
||||
return self.m.predict(input_ids, attention_mask)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Prepping Data
|
||||
tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
|
||||
text = "I love the distilled version of models."
|
||||
|
||||
inputs = tokenizer(text, return_tensors='tf')
|
||||
shark_module = SharkInference(
|
||||
GPT2Module(), (inputs["input_ids"], inputs["attention_mask"]))
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
print(shark_module.forward((inputs["input_ids"], inputs["attention_mask"])))
|
||||
@@ -1,18 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
import numpy as np
|
||||
|
||||
mhlo_ir = r"""builtin.module {
|
||||
func.func @forward(%arg0: tensor<1x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4x4xf32> {
|
||||
%0 = chlo.broadcast_add %arg0, %arg1 : (tensor<1x4xf32>, tensor<4x1xf32>) -> tensor<4x4xf32>
|
||||
%1 = "mhlo.abs"(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
|
||||
return %1 : tensor<4x4xf32>
|
||||
}
|
||||
}"""
|
||||
|
||||
arg0 = np.ones((1, 4)).astype(np.float32)
|
||||
arg1 = np.ones((4, 1)).astype(np.float32)
|
||||
|
||||
shark_module = SharkInference(mhlo_ir, (arg0, arg1))
|
||||
shark_module.set_frontend("mhlo")
|
||||
shark_module.compile()
|
||||
print(shark_module.forward((arg0, arg1)))
|
||||
@@ -1,36 +0,0 @@
|
||||
import torch
|
||||
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
||||
from shark.shark_inference import SharkInference
|
||||
|
||||
torch.manual_seed(0)
|
||||
tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
|
||||
|
||||
|
||||
class MiniLMSequenceClassification(torch.nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.model = AutoModelForSequenceClassification.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased", # The pretrained model.
|
||||
num_labels=
|
||||
2, # The number of output labels--2 for binary classification.
|
||||
output_attentions=
|
||||
False, # Whether the model returns attentions weights.
|
||||
output_hidden_states=
|
||||
False, # Whether the model returns all hidden-states.
|
||||
torchscript=True,
|
||||
)
|
||||
|
||||
def forward(self, tokens):
|
||||
return self.model.forward(tokens)[0]
|
||||
|
||||
|
||||
test_input = torch.randint(2, (1, 128))
|
||||
|
||||
shark_module = SharkInference(MiniLMSequenceClassification(), (test_input,),
|
||||
jit_trace=True,
|
||||
benchmark_mode=True)
|
||||
|
||||
shark_module.compile()
|
||||
shark_module.forward((test_input,))
|
||||
shark_module.benchmark_all((test_input,))
|
||||
@@ -1,58 +0,0 @@
|
||||
import tensorflow as tf
|
||||
from transformers import BertModel, BertTokenizer, TFBertModel
|
||||
from shark.shark_inference import SharkInference
|
||||
|
||||
gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||
for gpu in gpus:
|
||||
tf.config.experimental.set_memory_growth(gpu, True)
|
||||
|
||||
MAX_SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
|
||||
# Create a set of 2-dimensional inputs
|
||||
bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
# Create a BERT trainer with the created network.
|
||||
self.m = TFBertModel.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased", from_pt=True)
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m.predict = lambda x, y, z: self.m.call(
|
||||
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
|
||||
|
||||
@tf.function(input_signature=bert_input)
|
||||
def forward(self, input_ids, attention_mask, token_type_ids):
|
||||
return self.m.predict(input_ids, attention_mask, token_type_ids)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Prepping Data
|
||||
tokenizer = BertTokenizer.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased")
|
||||
text = "Replace me by any text you'd like."
|
||||
encoded_input = tokenizer(text,
|
||||
padding='max_length',
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH)
|
||||
for key in encoded_input:
|
||||
encoded_input[key] = tf.expand_dims(
|
||||
tf.convert_to_tensor(encoded_input[key]), 0)
|
||||
|
||||
test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"])
|
||||
shark_module = SharkInference(
|
||||
BertModule(),
|
||||
test_input,
|
||||
benchmark_mode=True)
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
shark_module.benchmark_all(test_input)
|
||||
@@ -1,35 +0,0 @@
|
||||
import torch
|
||||
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
||||
from shark.shark_inference import SharkInference
|
||||
|
||||
torch.manual_seed(0)
|
||||
tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
|
||||
|
||||
|
||||
class MiniLMSequenceClassification(torch.nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.model = AutoModelForSequenceClassification.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased", # The pretrained model.
|
||||
num_labels=
|
||||
2, # The number of output labels--2 for binary classification.
|
||||
output_attentions=
|
||||
False, # Whether the model returns attentions weights.
|
||||
output_hidden_states=
|
||||
False, # Whether the model returns all hidden-states.
|
||||
torchscript=True,
|
||||
)
|
||||
|
||||
def forward(self, tokens):
|
||||
return self.model.forward(tokens)[0]
|
||||
|
||||
|
||||
test_input = torch.randint(2, (1, 128))
|
||||
|
||||
shark_module = SharkInference(MiniLMSequenceClassification(), (test_input,),
|
||||
jit_trace=True)
|
||||
|
||||
shark_module.compile()
|
||||
result = shark_module.forward((test_input,))
|
||||
print("Obtained result", result)
|
||||
@@ -1,41 +0,0 @@
|
||||
import tensorflow as tf
|
||||
from transformers import BertModel, BertTokenizer, TFBertModel
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.shark_importer import shark_load
|
||||
from shark.parser import parser
|
||||
import os
|
||||
|
||||
gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||
for gpu in gpus:
|
||||
tf.config.experimental.set_memory_growth(gpu, True)
|
||||
|
||||
parser.add_argument(
|
||||
"--download_mlir_path",
|
||||
type=str,
|
||||
default="minilm_tf_inference.mlir",
|
||||
help="Specifies path to target mlir file that will be loaded.")
|
||||
load_args, unknown = parser.parse_known_args()
|
||||
|
||||
MAX_SEQUENCE_LENGTH = 512
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Prepping Data
|
||||
tokenizer = BertTokenizer.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased")
|
||||
text = "Replace me by any text you'd like."
|
||||
encoded_input = tokenizer(text,
|
||||
padding='max_length',
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH)
|
||||
for key in encoded_input:
|
||||
encoded_input[key] = tf.expand_dims(
|
||||
tf.convert_to_tensor(encoded_input[key]), 0)
|
||||
model_name = "minilm_tf_inference"
|
||||
minilm_mlir = shark_load(model_name, load_args.download_mlir_path)
|
||||
test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"])
|
||||
shark_module = SharkInference(
|
||||
minilm_mlir, test_input, benchmark_mode=True)
|
||||
shark_module.set_frontend("mhlo")
|
||||
shark_module.compile()
|
||||
shark_module.benchmark_all(test_input)
|
||||
@@ -1,56 +0,0 @@
|
||||
import tensorflow as tf
|
||||
from transformers import BertModel, BertTokenizer, TFBertModel
|
||||
from shark.shark_inference import SharkInference
|
||||
|
||||
MAX_SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
|
||||
# Create a set of 2-dimensional inputs
|
||||
bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
# Create a BERT trainer with the created network.
|
||||
self.m = TFBertModel.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased", from_pt=True)
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m.predict = lambda x, y, z: self.m.call(
|
||||
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
|
||||
|
||||
@tf.function(input_signature=bert_input)
|
||||
def forward(self, input_ids, attention_mask, token_type_ids):
|
||||
return self.m.predict(input_ids, attention_mask, token_type_ids)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Prepping Data
|
||||
tokenizer = BertTokenizer.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased")
|
||||
text = "Replace me by any text you'd like."
|
||||
encoded_input = tokenizer(text,
|
||||
padding='max_length',
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH)
|
||||
for key in encoded_input:
|
||||
encoded_input[key] = tf.expand_dims(
|
||||
tf.convert_to_tensor(encoded_input[key]), 0)
|
||||
|
||||
shark_module = SharkInference(
|
||||
BertModule(),
|
||||
(encoded_input["input_ids"], encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"]))
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
|
||||
print(
|
||||
shark_module.forward(
|
||||
(encoded_input["input_ids"], encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"])))
|
||||
File diff suppressed because one or more lines are too long
@@ -1,80 +0,0 @@
|
||||
from PIL import Image
|
||||
import requests
|
||||
import torch
|
||||
import torchvision.models as models
|
||||
from torchvision import transforms
|
||||
import sys
|
||||
from shark.shark_inference import SharkInference
|
||||
|
||||
|
||||
################################## Preprocessing inputs and model ############
|
||||
def load_and_preprocess_image(url: str):
|
||||
headers = {
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
|
||||
}
|
||||
img = Image.open(requests.get(url, headers=headers,
|
||||
stream=True).raw).convert("RGB")
|
||||
# preprocessing pipeline
|
||||
preprocess = transforms.Compose([
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
std=[0.229, 0.224, 0.225]),
|
||||
])
|
||||
img_preprocessed = preprocess(img)
|
||||
return torch.unsqueeze(img_preprocessed, 0)
|
||||
|
||||
|
||||
def load_labels():
|
||||
classes_text = requests.get(
|
||||
"https://raw.githubusercontent.com/cathyzhyi/ml-data/main/imagenet-classes.txt",
|
||||
stream=True,
|
||||
).text
|
||||
labels = [line.strip() for line in classes_text.splitlines()]
|
||||
return labels
|
||||
|
||||
|
||||
def top3_possibilities(res):
|
||||
_, indexes = torch.sort(res, descending=True)
|
||||
percentage = torch.nn.functional.softmax(res, dim=1)[0] * 100
|
||||
top3 = [(labels[idx], percentage[idx].item()) for idx in indexes[0][:3]]
|
||||
return top3
|
||||
|
||||
|
||||
class Resnet50Module(torch.nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.resnet = models.resnet50(pretrained=True)
|
||||
self.train(False)
|
||||
|
||||
def forward(self, img):
|
||||
return self.resnet.forward(img)
|
||||
|
||||
|
||||
image_url = "https://upload.wikimedia.org/wikipedia/commons/2/26/YellowLabradorLooking_new.jpg"
|
||||
print("load image from " + image_url, file=sys.stderr)
|
||||
img = load_and_preprocess_image(image_url)
|
||||
labels = load_labels()
|
||||
|
||||
##############################################################################
|
||||
|
||||
input = torch.randn(1, 3, 224, 224)
|
||||
print(input.shape)
|
||||
|
||||
## The img is passed to determine the input shape.
|
||||
shark_module = SharkInference(Resnet50Module(), (img,))
|
||||
shark_module.compile()
|
||||
|
||||
## Can pass any img or input to the forward module.
|
||||
results = shark_module.forward((img,))
|
||||
|
||||
print("The top 3 results obtained via shark_runner is:")
|
||||
print(top3_possibilities(torch.from_numpy(results)))
|
||||
|
||||
print()
|
||||
|
||||
print("The top 3 results obtained via torch is:")
|
||||
print(top3_possibilities(Resnet50Module()(img)))
|
||||
@@ -1,38 +0,0 @@
|
||||
from PIL import Image
|
||||
import requests
|
||||
|
||||
from transformers import T5Tokenizer, TFT5Model
|
||||
import tensorflow as tf
|
||||
from shark.shark_inference import SharkInference
|
||||
|
||||
# Create a set of inputs
|
||||
t5_inputs = [
|
||||
tf.TensorSpec(shape=[1, 10], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[1, 10], dtype=tf.int32),
|
||||
]
|
||||
|
||||
class T5Module(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(T5Module, self).__init__()
|
||||
self.m = TFT5Model.from_pretrained("t5-small")
|
||||
self.m.predict = lambda x,y: self.m(input_ids=x, decoder_input_ids=y)
|
||||
|
||||
@tf.function(input_signature=t5_inputs)
|
||||
def forward(self, input_ids, decoder_input_ids):
|
||||
return self.m.predict(input_ids, decoder_input_ids)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Prepping Data
|
||||
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
||||
text = "I love the distilled version of models."
|
||||
inputs = tokenizer(
|
||||
text, return_tensors="tf"
|
||||
).input_ids
|
||||
|
||||
shark_module = SharkInference(
|
||||
T5Module(), (inputs, inputs))
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
print(shark_module.forward((inputs,inputs)))
|
||||
@@ -1,44 +0,0 @@
|
||||
import torch
|
||||
import torchvision.models as models
|
||||
from shark.shark_inference import SharkInference
|
||||
|
||||
|
||||
class VisionModule(torch.nn.Module):
|
||||
|
||||
def __init__(self, model):
|
||||
super().__init__()
|
||||
self.model = model
|
||||
self.train(False)
|
||||
|
||||
def forward(self, input):
|
||||
return self.model.forward(input)
|
||||
|
||||
|
||||
input = torch.randn(1, 3, 224, 224)
|
||||
|
||||
## The vision models present here: https://pytorch.org/vision/stable/models.html
|
||||
vision_models_list = [
|
||||
models.resnet18(pretrained=True),
|
||||
models.alexnet(pretrained=True),
|
||||
models.vgg16(pretrained=True),
|
||||
models.squeezenet1_0(pretrained=True),
|
||||
models.densenet161(pretrained=True),
|
||||
models.inception_v3(pretrained=True),
|
||||
models.shufflenet_v2_x1_0(pretrained=True),
|
||||
models.mobilenet_v2(pretrained=True),
|
||||
models.mobilenet_v3_small(pretrained=True),
|
||||
models.resnext50_32x4d(pretrained=True),
|
||||
models.wide_resnet50_2(pretrained=True),
|
||||
models.mnasnet1_0(pretrained=True),
|
||||
models.efficientnet_b0(pretrained=True),
|
||||
models.regnet_y_400mf(pretrained=True),
|
||||
models.regnet_x_400mf(pretrained=True),
|
||||
]
|
||||
|
||||
for i, vision_model in enumerate(vision_models_list):
|
||||
shark_module = SharkInference(
|
||||
VisionModule(vision_model),
|
||||
(input,),
|
||||
)
|
||||
shark_module.compile()
|
||||
shark_module.forward((input,))
|
||||
@@ -1,32 +0,0 @@
|
||||
import torch
|
||||
from shark_runner import SharkInference
|
||||
|
||||
|
||||
# Currently not supported aten.transpose_conv2d missing.
|
||||
class UnetModule(torch.nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.model = torch.hub.load(
|
||||
"mateuszbuda/brain-segmentation-pytorch",
|
||||
"unet",
|
||||
in_channels=3,
|
||||
out_channels=1,
|
||||
init_features=32,
|
||||
pretrained=True,
|
||||
)
|
||||
self.train(False)
|
||||
|
||||
def forward(self, input):
|
||||
return self.model(input)
|
||||
|
||||
|
||||
input = torch.randn(1, 3, 224, 224)
|
||||
|
||||
print(input)
|
||||
shark_module = SharkInference(
|
||||
UnetModule(),
|
||||
(input,),
|
||||
)
|
||||
shark_module.benchmark_forward((input,))
|
||||
print(input)
|
||||
@@ -1,50 +0,0 @@
|
||||
import torch
|
||||
from torch.nn.utils import _stateless
|
||||
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
||||
from shark.shark_runner import SharkTrainer
|
||||
|
||||
|
||||
class MiniLMSequenceClassification(torch.nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.model = AutoModelForSequenceClassification.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased", # The pretrained model.
|
||||
num_labels=
|
||||
2, # The number of output labels--2 for binary classification.
|
||||
output_attentions=
|
||||
False, # Whether the model returns attentions weights.
|
||||
output_hidden_states=
|
||||
False, # Whether the model returns all hidden-states.
|
||||
torchscript=True,
|
||||
)
|
||||
|
||||
def forward(self, tokens):
|
||||
return self.model.forward(tokens)[0]
|
||||
|
||||
|
||||
mod = MiniLMSequenceClassification()
|
||||
|
||||
|
||||
def get_sorted_params(named_params):
|
||||
return [i[1] for i in sorted(named_params.items())]
|
||||
|
||||
|
||||
print(dict(mod.named_buffers()))
|
||||
|
||||
inp = (torch.randint(2, (1, 128)),)
|
||||
|
||||
|
||||
def forward(params, buffers, args):
|
||||
params_and_buffers = {**params, **buffers}
|
||||
_stateless.functional_call(mod, params_and_buffers, args,
|
||||
{}).sum().backward()
|
||||
optim = torch.optim.SGD(get_sorted_params(params), lr=0.01)
|
||||
# optim.load_state_dict(optim_state)
|
||||
optim.step()
|
||||
return params, buffers
|
||||
|
||||
|
||||
shark_module = SharkTrainer(mod, inp, custom_inference_fn=forward)
|
||||
|
||||
print(shark_module.forward())
|
||||
@@ -1,45 +0,0 @@
|
||||
import numpy as np
|
||||
import os
|
||||
import time
|
||||
import tensorflow as tf
|
||||
|
||||
from shark.shark_trainer import SharkTrainer
|
||||
from shark.parser import parser
|
||||
from shark.shark_importer import shark_load
|
||||
|
||||
parser.add_argument(
|
||||
"--download_mlir_path",
|
||||
type=str,
|
||||
default="bert_tf_training.mlir",
|
||||
help="Specifies path to target mlir file that will be loaded.")
|
||||
load_args, unknown = parser.parse_known_args()
|
||||
|
||||
tf.random.set_seed(0)
|
||||
vocab_size = 100
|
||||
NUM_CLASSES = 5
|
||||
SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
|
||||
# Download BERT model from tank and train.
|
||||
if __name__ == "__main__":
|
||||
predict_sample_input = [
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
|
||||
]
|
||||
model_name = "bert_tf_training"
|
||||
bert_mlir = shark_load(model_name, load_args.download_mlir_path)
|
||||
sample_input_tensors = [tf.convert_to_tensor(val, dtype=tf.int32) for val in predict_sample_input]
|
||||
num_iter = 10
|
||||
shark_module = SharkTrainer(
|
||||
bert_mlir,
|
||||
(sample_input_tensors,
|
||||
tf.convert_to_tensor(np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32)))
|
||||
shark_module.set_frontend("mhlo")
|
||||
shark_module.compile()
|
||||
start = time.time()
|
||||
print(shark_module.train(num_iter))
|
||||
end = time.time()
|
||||
total_time = end - start
|
||||
print("time: " + str(total_time))
|
||||
print("time/iter: " + str(total_time / num_iter))
|
||||
@@ -1,88 +0,0 @@
|
||||
import sys
|
||||
from absl import app
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import tempfile
|
||||
import tensorflow as tf
|
||||
|
||||
from official.nlp.modeling import layers
|
||||
from official.nlp.modeling import networks
|
||||
from official.nlp.modeling.models import bert_classifier
|
||||
|
||||
from shark.shark_trainer import SharkTrainer
|
||||
|
||||
|
||||
tf.random.set_seed(0)
|
||||
vocab_size = 100
|
||||
NUM_CLASSES = 5
|
||||
SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
# Create a set of 2-dimensional inputs
|
||||
bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
]
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
dict_outputs = False
|
||||
test_network = networks.BertEncoder(vocab_size=vocab_size,
|
||||
num_layers=2,
|
||||
dict_outputs=dict_outputs)
|
||||
|
||||
# Create a BERT trainer with the created network.
|
||||
bert_trainer_model = bert_classifier.BertClassifier(
|
||||
test_network, num_classes=NUM_CLASSES)
|
||||
bert_trainer_model.summary()
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m = bert_trainer_model
|
||||
self.m.predict = lambda x: self.m.call(x, training=False)
|
||||
self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
|
||||
self.m.learn = lambda x, y: self.m.call(x, training=False)
|
||||
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
|
||||
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
|
||||
|
||||
@tf.function(input_signature=[
|
||||
bert_input, # inputs
|
||||
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32) # labels
|
||||
])
|
||||
def forward(self, inputs, labels):
|
||||
with tf.GradientTape() as tape:
|
||||
# Capture the gradients from forward prop...
|
||||
probs = self.m(inputs, training=True)
|
||||
loss = self.loss(labels, probs)
|
||||
|
||||
# ...and use them to update the model's weights.
|
||||
variables = self.m.trainable_variables
|
||||
gradients = tape.gradient(loss, variables)
|
||||
self.optimizer.apply_gradients(zip(gradients, variables))
|
||||
return loss
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
predict_sample_input = [
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
|
||||
]
|
||||
sample_input_tensors = [tf.convert_to_tensor(val, dtype=tf.int32) for val in predict_sample_input]
|
||||
num_iter = 10
|
||||
shark_module = SharkTrainer(
|
||||
BertModule(),
|
||||
(sample_input_tensors,
|
||||
tf.convert_to_tensor(np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32)))
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
start = time.time()
|
||||
print(shark_module.train(num_iter))
|
||||
end = time.time()
|
||||
total_time = end - start
|
||||
print("time: " + str(total_time))
|
||||
print("time/iter: " + str(total_time / num_iter))
|
||||
@@ -1,44 +0,0 @@
|
||||
import torch
|
||||
from torch.nn.utils import _stateless
|
||||
from shark.shark_trainer import SharkTrainer
|
||||
|
||||
|
||||
class Foo(torch.nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(Foo, self).__init__()
|
||||
self.l1 = torch.nn.Linear(10, 16)
|
||||
self.relu = torch.nn.ReLU()
|
||||
self.l2 = torch.nn.Linear(16, 2)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.l1(x)
|
||||
out = self.relu(out)
|
||||
out = self.l2(out)
|
||||
return out
|
||||
|
||||
|
||||
mod = Foo()
|
||||
inp = (torch.randn(10, 10),)
|
||||
|
||||
|
||||
def get_sorted_params(named_params):
|
||||
return [i[1] for i in sorted(named_params.items())]
|
||||
|
||||
|
||||
def forward(params, buffers, args):
|
||||
params_and_buffers = {**params, **buffers}
|
||||
_stateless.functional_call(mod, params_and_buffers, args,
|
||||
{}).sum().backward()
|
||||
optim = torch.optim.SGD(get_sorted_params(params), lr=0.01)
|
||||
optim.step()
|
||||
return params, buffers
|
||||
|
||||
|
||||
# fx_graph = forward(dict(mod.named_parameters()), dict(mod.named_buffers()), inp)
|
||||
|
||||
shark_module = SharkTrainer(mod, inp)
|
||||
# Pass the training function in case of torch
|
||||
shark_module.compile(training_fn=forward)
|
||||
|
||||
shark_module.train(num_iters=10)
|
||||
@@ -1,81 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from typing import Dict, Any
|
||||
|
||||
import iree
|
||||
import iree.runtime as ireert
|
||||
import numpy as np
|
||||
import torch
|
||||
from iree.runtime import DeviceArray
|
||||
from torch_mlir._mlir_libs._mlir.ir import Module
|
||||
from torch_mlir.compiler_utils import (
|
||||
get_module_name_for_debug_dump,
|
||||
run_pipeline_with_repro_report,
|
||||
)
|
||||
from torch_mlir.eager_mode.torch_mlir_eager_backend import (
|
||||
TorchMLIREagerBackend,
|
||||
TensorMetaData,
|
||||
)
|
||||
from torch_mlir_e2e_test.eager_backends.refbackend import NUMPY_TO_TORCH_DTYPE_DICT
|
||||
|
||||
from shark.iree_utils import get_iree_compiled_module, IREE_DEVICE_MAP
|
||||
|
||||
|
||||
class EagerModeIREELinalgOnTensorsBackend(TorchMLIREagerBackend):
|
||||
"""Main entry-point for the iree backend for torch-mlir eager mode.
|
||||
|
||||
EagerModeIREELinalgOnTensorsBackend uses iree.DeviceArray representations of tensors and
|
||||
thus all of the wrapping and unwrapping and munging here is done to between torch.Tensor and iree.DeviceArray,
|
||||
with np.ndarray as an intermediary.
|
||||
"""
|
||||
|
||||
def __init__(self, device: str):
|
||||
self.torch_device_str = device
|
||||
self.iree_device_str = IREE_DEVICE_MAP[device]
|
||||
self.config = ireert.Config(self.iree_device_str)
|
||||
|
||||
def get_torch_metadata(self, tensor: DeviceArray,
|
||||
kwargs: Dict[str, Any]) -> TensorMetaData:
|
||||
return TensorMetaData(
|
||||
size=tensor.shape,
|
||||
dtype=NUMPY_TO_TORCH_DTYPE_DICT[tensor.dtype.type],
|
||||
device=torch.device(self.torch_device_str),
|
||||
requires_grad=tensor.dtype.type
|
||||
in {np.float, np.float32, np.float64} and
|
||||
kwargs.get("requires_grad", False),
|
||||
)
|
||||
|
||||
def compile(self, imported_module: Module):
|
||||
fn_name = get_module_name_for_debug_dump(imported_module)
|
||||
run_pipeline_with_repro_report(
|
||||
imported_module,
|
||||
"torch-function-to-torch-backend-pipeline,torch-backend-to-linalg-on-tensors-backend-pipeline",
|
||||
"EagerMode",
|
||||
)
|
||||
callable, _ = get_iree_compiled_module(imported_module,
|
||||
self.iree_device_str,
|
||||
func_name=fn_name)
|
||||
return callable
|
||||
|
||||
def copy_into(self, dst, src):
|
||||
"""Copy output back to appropriate arg that it should alias."""
|
||||
np.copyto(dst, src)
|
||||
|
||||
def transfer_from_device_to_torch(self, e):
|
||||
return torch.from_numpy(e.to_host())
|
||||
|
||||
def transfer_from_torch_to_device(self,
|
||||
tensor: torch.Tensor) -> DeviceArray:
|
||||
return iree.runtime.asdevicearray(self.config.device, tensor.numpy())
|
||||
@@ -1,359 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import iree.runtime as ireert
|
||||
import iree.runtime.scripts.iree_benchmark_module as benchmark_module
|
||||
import iree.compiler as ireec
|
||||
from shark.torch_mlir_utils import get_module_name_for_asm_dump
|
||||
from shark.cuda_utils import get_cuda_sm_cc
|
||||
from shark.model_annotation import *
|
||||
import subprocess
|
||||
import numpy as np
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
IREE_DEVICE_MAP = {
|
||||
"cpu": "local-task",
|
||||
"gpu": "cuda",
|
||||
"cuda": "cuda",
|
||||
"vulkan": "vulkan",
|
||||
"metal": "vulkan",
|
||||
"rocm": "rocm"
|
||||
}
|
||||
|
||||
IREE_TARGET_MAP = {
|
||||
"cpu": "dylib",
|
||||
"gpu": "cuda",
|
||||
"cuda": "cuda",
|
||||
"vulkan": "vulkan",
|
||||
"metal": "vulkan",
|
||||
"rocm": "rocm"
|
||||
}
|
||||
|
||||
UNIT_TO_SECOND_MAP = {"ms": 0.001, "s": 1}
|
||||
|
||||
|
||||
def check_device_drivers(device):
|
||||
"""Checks necessary drivers present for gpu and vulkan devices"""
|
||||
if (device in ["gpu", "cuda"]):
|
||||
try:
|
||||
subprocess.check_output('nvidia-smi')
|
||||
except Exception:
|
||||
return True
|
||||
elif (device in ["metal", "vulkan"]):
|
||||
try:
|
||||
subprocess.check_output('vulkaninfo')
|
||||
except Exception:
|
||||
return True
|
||||
elif (device == "cpu"):
|
||||
return False
|
||||
# Unknown device.
|
||||
else:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_iree_cpu_args():
|
||||
find_triple_cmd = "uname -s -m"
|
||||
os_name, proc_name = subprocess.run(
|
||||
find_triple_cmd, shell=True, stdout=subprocess.PIPE,
|
||||
check=True).stdout.decode('utf-8').split()
|
||||
if os_name == "Darwin":
|
||||
find_kernel_version_cmd = "uname -r"
|
||||
kernel_version = subprocess.run(find_kernel_version_cmd,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
check=True).stdout.decode('utf-8')
|
||||
target_triple = f"{proc_name}-apple-darwin{kernel_version}"
|
||||
elif os_name == "Linux":
|
||||
target_triple = f"{proc_name}-linux-gnu"
|
||||
else:
|
||||
error_message = f"OS Type f{os_name} not supported and triple can't be determined, open issue to dSHARK team please :)"
|
||||
raise Exception(error_message)
|
||||
print(f"Target triple found:{target_triple}")
|
||||
return [f"-iree-llvm-target-triple={target_triple}"]
|
||||
|
||||
|
||||
def get_iree_gpu_args():
|
||||
ireert.flags.FUNCTION_INPUT_VALIDATION = False
|
||||
ireert.flags.parse_flags("--cuda_allow_inline_execution")
|
||||
sm_arch = get_cuda_sm_cc()
|
||||
if sm_arch in ['sm_70', 'sm_72', 'sm_75', 'sm_80', 'sm_84', 'sm_86']:
|
||||
return [
|
||||
"--iree-hal-cuda-disable-loop-nounroll-wa",
|
||||
f"--iree-hal-cuda-llvm-target-arch={sm_arch}"
|
||||
]
|
||||
else:
|
||||
return ["--iree-hal-cuda-disable-loop-nounroll-wa"]
|
||||
|
||||
|
||||
def get_vulkan_triple_flag():
|
||||
vulkan_device_cmd = "vulkaninfo | grep deviceName | awk \'END{{print $NF}}\'"
|
||||
vulkan_device = run_cmd(vulkan_device_cmd).strip()
|
||||
if vulkan_device == "M1":
|
||||
print("Found Apple Device. Using m1-moltenvk-macos")
|
||||
return "-iree-vulkan-target-triple=m1-moltenvk-macos"
|
||||
elif vulkan_device == "A100-SXM4-40GB":
|
||||
print("Found Nvidia Device. Using ampere-rtx3080-linux")
|
||||
return "-iree-vulkan-target-triple=ampere-rtx3080-linux"
|
||||
else:
|
||||
print(
|
||||
"Optimized kernel for your target device is not added yet. Contact SHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u] or pull up an issue."
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def get_iree_vulkan_args():
|
||||
#vulkan_flag = ["--iree-flow-demote-i64-to-i32"]
|
||||
vulkan_flag = []
|
||||
vulkan_triple_flag = get_vulkan_triple_flag()
|
||||
if vulkan_triple_flag is not None:
|
||||
vulkan_flag.append(vulkan_triple_flag)
|
||||
return vulkan_flag
|
||||
|
||||
|
||||
def get_iree_device_args(device):
|
||||
if device == "cpu":
|
||||
return get_iree_cpu_args()
|
||||
if device in ["gpu", "cuda"]:
|
||||
return get_iree_gpu_args()
|
||||
if device in ["metal", "vulkan"]:
|
||||
return get_iree_vulkan_args()
|
||||
return []
|
||||
|
||||
|
||||
def get_iree_frontend_args(frontend):
|
||||
if frontend in ["torch", "pytorch", "linalg"]:
|
||||
return ["--iree-llvm-target-cpu-features=host"]
|
||||
elif frontend in ["tensorflow", "tf", "mhlo"]:
|
||||
return [
|
||||
"--iree-llvm-target-cpu-features=host",
|
||||
"--iree-mhlo-demote-i64-to-i32=false",
|
||||
"--iree-flow-demote-i64-to-i32"
|
||||
]
|
||||
else:
|
||||
# Frontend not found.
|
||||
return []
|
||||
|
||||
|
||||
def compile_module_to_flatbuffer(module, device, frontend, func_name,
|
||||
model_config_path):
|
||||
# Setup Compile arguments wrt to frontends.
|
||||
input_type = ""
|
||||
args = get_iree_frontend_args(frontend)
|
||||
args += get_iree_device_args(device)
|
||||
|
||||
if frontend in ["tensorflow", "tf"]:
|
||||
input_type = "mhlo"
|
||||
elif frontend in ["mhlo", "tosa"]:
|
||||
input_type = frontend
|
||||
elif frontend in ["tflite"]:
|
||||
input_type = "tosa"
|
||||
|
||||
# Annotate the input module with the configs
|
||||
if model_config_path != None:
|
||||
# Currently tuned model only works on tf frontend
|
||||
if frontend in ["tensorflow", "tf"]:
|
||||
input_module = module.decode('utf-8')
|
||||
elif frontend in ["pytorch", "torch"]:
|
||||
input_module = module.operation.get_asm()
|
||||
with create_context() as ctx:
|
||||
module = model_annotation(ctx,
|
||||
input_contents=input_module,
|
||||
config_path=model_config_path)
|
||||
module = str(module)
|
||||
|
||||
# Compile according to the input type, else just try compiling.
|
||||
if input_type not in ["mhlo", "tosa"]:
|
||||
module = str(module)
|
||||
if input_type != "":
|
||||
# Currently for MHLO/TOSA.
|
||||
flatbuffer_blob = ireec.compile_str(
|
||||
module,
|
||||
target_backends=[IREE_TARGET_MAP[device]],
|
||||
extra_args=args,
|
||||
input_type=input_type)
|
||||
else:
|
||||
# Currently for Torch.
|
||||
flatbuffer_blob = ireec.compile_str(
|
||||
str(module),
|
||||
target_backends=[IREE_TARGET_MAP[device]],
|
||||
extra_args=args)
|
||||
return flatbuffer_blob
|
||||
|
||||
|
||||
def get_iree_module(flatbuffer_blob, device, func_name):
|
||||
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
|
||||
config = ireert.Config(IREE_DEVICE_MAP[device])
|
||||
ctx = ireert.SystemContext(config=config)
|
||||
ctx.add_vm_module(vm_module)
|
||||
ModuleCompiled = ctx.modules.module[func_name]
|
||||
return ModuleCompiled, config
|
||||
|
||||
|
||||
def get_iree_compiled_module(module,
|
||||
device: str,
|
||||
frontend: str = "torch",
|
||||
func_name: str = "forward",
|
||||
model_config_path: str = None):
|
||||
"""Given a module returns the compiled .vmfb and configs"""
|
||||
flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend,
|
||||
func_name, model_config_path)
|
||||
return get_iree_module(flatbuffer_blob, device, func_name)
|
||||
|
||||
|
||||
def export_iree_module_to_vmfb(module,
|
||||
device: str,
|
||||
directory: str,
|
||||
frontend: str = "torch",
|
||||
func_name: str = "forward",
|
||||
model_config_path: str = None):
|
||||
flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend,
|
||||
func_name, model_config_path)
|
||||
module_name = f"{frontend}_{func_name}_{device}"
|
||||
filename = os.path.join(directory, module_name + ".vmfb")
|
||||
print(f"Saved vmfb in {filename}.")
|
||||
with open(filename, 'wb') as f:
|
||||
f.write(flatbuffer_blob)
|
||||
return filename
|
||||
|
||||
|
||||
def export_module_to_mlir_file(module, frontend, directory: str):
|
||||
mlir_str = module
|
||||
if frontend in ["tensorflow", "tf", "mhlo"]:
|
||||
mlir_str = module.decode('utf-8')
|
||||
elif frontend in ["pytorch", "torch"]:
|
||||
mlir_str = module.operation.get_asm()
|
||||
filename = os.path.join(directory, "model.mlir")
|
||||
with open(filename, 'w') as f:
|
||||
f.write(mlir_str)
|
||||
print(f"Saved mlir in {filename}.")
|
||||
return filename
|
||||
|
||||
|
||||
def get_results(compiled_vm, input, config, frontend="torch"):
|
||||
"""Runs a .vmfb file given inputs and config and returns output."""
|
||||
device_inputs = input
|
||||
if frontend in ["torch", "pytorch"]:
|
||||
device_inputs = [ireert.asdevicearray(config.device, a) for a in input]
|
||||
if frontend in ["tensorflow", "tf", "tflite"]:
|
||||
device_inputs = []
|
||||
for a in input:
|
||||
if (isinstance(a, list)):
|
||||
device_inputs.append([
|
||||
ireert.asdevicearray(config.device, val, dtype=np.int32)
|
||||
for val in a
|
||||
])
|
||||
else:
|
||||
device_inputs.append(ireert.asdevicearray(config.device, a))
|
||||
result = compiled_vm(*device_inputs)
|
||||
result_tensors = []
|
||||
if (isinstance(result, tuple)):
|
||||
for val in result:
|
||||
result_tensors.append(np.copy(np.asarray(val, val.dtype)))
|
||||
return result_tensors
|
||||
elif (isinstance(result, dict)):
|
||||
data = list(result.items())
|
||||
res = np.array(data, dtype=object)
|
||||
return np.copy(res)
|
||||
else:
|
||||
return np.copy(np.asarray(result, dtype=result.dtype))
|
||||
|
||||
|
||||
######### Benchmark Related Tools ###########
|
||||
|
||||
|
||||
def tensor_to_type_str(input_tensors: tuple, frontend: str):
|
||||
"""
|
||||
Input: A tuple of input tensors i.e tuple(torch.tensor)
|
||||
Output: list of string that represent mlir types (i.e 1x24xf64)
|
||||
# TODO: Support more than floats, and ints
|
||||
"""
|
||||
list_of_type = []
|
||||
for input_tensor in input_tensors:
|
||||
type_string = "x".join([str(dim) for dim in input_tensor.shape])
|
||||
if frontend in ["torch", "pytorch"]:
|
||||
dtype_string = str(input_tensor.dtype).replace("torch.", "")
|
||||
elif frontend in ["tensorflow", "tf", "mhlo"]:
|
||||
dtype = input_tensor.dtype
|
||||
dtype_string = re.findall('\'[^"]*\'',
|
||||
str(dtype))[0].replace("\'", "")
|
||||
regex_split = re.compile("([a-zA-Z]+)([0-9]+)")
|
||||
match = regex_split.match(dtype_string)
|
||||
mlir_type_string = str(match.group(1)[0]) + str(match.group(2))
|
||||
type_string += f"x{mlir_type_string}"
|
||||
list_of_type.append(type_string)
|
||||
return list_of_type
|
||||
|
||||
|
||||
def build_benchmark_args(input_file: str,
|
||||
device: str,
|
||||
input_tensors: tuple,
|
||||
frontend: str,
|
||||
training=False):
|
||||
"""
|
||||
Inputs: input_file leading to vmfb, input_tensor to function, target device, and whether it is training or not.
|
||||
Outputs: string that execute benchmark-module on target model.
|
||||
"""
|
||||
path = benchmark_module.__path__[0]
|
||||
benchmarker_path = os.path.join(path, "..", "..", "iree-benchmark-module")
|
||||
benchmark_cl = [benchmarker_path, f"--module_file={input_file}"]
|
||||
fn_name = "forward"
|
||||
if training == True:
|
||||
# TODO: Replace name of train with actual train fn name.
|
||||
fn_name = "train"
|
||||
benchmark_cl.append(f"--entry_function={fn_name}")
|
||||
benchmark_cl.append(f"--device={IREE_DEVICE_MAP[device]}")
|
||||
mlir_input_types = tensor_to_type_str(input_tensors, frontend)
|
||||
for mlir_input in mlir_input_types:
|
||||
benchmark_cl.append(f"--function_input={mlir_input}")
|
||||
time_extractor = "| awk \'END{{print $2 $3}}\'"
|
||||
benchmark_cl.append(time_extractor)
|
||||
return benchmark_cl
|
||||
|
||||
|
||||
def run_cmd(cmd):
|
||||
"""
|
||||
Inputs: cli command string.
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(cmd,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
check=True)
|
||||
result_str = result.stdout.decode()
|
||||
return result_str
|
||||
except Exception:
|
||||
sys.exit("Exiting program due to error running:", cmd)
|
||||
|
||||
|
||||
def run_benchmark_module(benchmark_cl):
|
||||
"""
|
||||
Run benchmark command, extract result and return iteration/seconds.
|
||||
|
||||
Input: benchmark command.
|
||||
"""
|
||||
benchmark_path = benchmark_cl[0]
|
||||
assert os.path.exists(
|
||||
benchmark_path
|
||||
), "Cannot find benchmark_module, Please contact SHARK maintainer on discord."
|
||||
bench_result = run_cmd(' '.join(benchmark_cl))
|
||||
regex_split = re.compile("([0-9]+[.]*[0-9]*)([a-zA-Z]+)")
|
||||
match = regex_split.match(bench_result)
|
||||
time = float(match.group(1))
|
||||
unit = match.group(2)
|
||||
return 1.0 / (time * UNIT_TO_SECOND_MAP[unit])
|
||||
@@ -1,143 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
from typing import List, Dict
|
||||
|
||||
from iree.compiler import ir
|
||||
from iree.compiler.transforms import ireec as ireec_trans
|
||||
|
||||
MATMUL_OP_NAMES = set(
|
||||
["linalg.matmul", "linalg.batch_matmul", "mhlo.dot", "mhlo.dot_general"])
|
||||
idx = 0
|
||||
|
||||
|
||||
def model_annotation(ctx: ir.Context, *, input_contents: str, config_path: str):
|
||||
if os.path.isfile(input_contents):
|
||||
with open(input_contents, "rb") as f:
|
||||
input_contents = f.read()
|
||||
|
||||
module = ir.Module.parse(input_contents)
|
||||
|
||||
with open(config_path, "r") as f:
|
||||
data = json.load(f)
|
||||
configs = data["options"]
|
||||
|
||||
# The Python API does not expose a general walk() function, so we just
|
||||
# do it ourselves.
|
||||
walk_children(module.operation, configs)
|
||||
|
||||
if not module.operation.verify():
|
||||
raise RuntimeError("Modified program does not verify!")
|
||||
|
||||
# More efficient than: print(module)
|
||||
# - Disables verification (already done above)
|
||||
# - Writes as binary, avoiding costly unicode conversions
|
||||
sys.stdout.buffer.write(
|
||||
module.operation.get_asm(assume_verified=True, binary=True))
|
||||
return module
|
||||
|
||||
|
||||
def walk_children(op: ir.Operation, configs: List[Dict]):
|
||||
for region in op.regions:
|
||||
for block in region.blocks:
|
||||
for child_op in block.operations:
|
||||
# TODO: This is dumb. Both Operation and OpView should expose
|
||||
# 'operation' and 'name' attributes.
|
||||
if isinstance(child_op, ir.OpView):
|
||||
child_op = child_op.operation
|
||||
if child_op.name in MATMUL_OP_NAMES:
|
||||
global idx
|
||||
tile_sizes, pipeline, workgroup_size, \
|
||||
split_k, pipeline_depth = parse_config(configs[idx])
|
||||
|
||||
add_compilation_info(child_op,
|
||||
tile_sizes=tile_sizes,
|
||||
pipeline=pipeline,
|
||||
workgroup_size=workgroup_size,
|
||||
pipeline_depth=pipeline_depth)
|
||||
|
||||
if split_k:
|
||||
add_split_k(child_op, split_k)
|
||||
|
||||
idx = idx + 1
|
||||
print(f"Updated op {child_op}", file=sys.stderr)
|
||||
walk_children(child_op, configs)
|
||||
|
||||
|
||||
def parse_config(config: Dict):
|
||||
if config["pipeline"] == "GPU" or config["pipeline"] == "GPU_TENSORCORE":
|
||||
pipeline = "LLVMGPUMatmulSimt" if config[
|
||||
"pipeline"] == "GPU" else "LLVMGPUMatmulTensorCore"
|
||||
tile_sizes = [config["work_group_tile_sizes"]]
|
||||
workgroup_size = config["work_group_sizes"]
|
||||
try:
|
||||
pipeline_depth = config["pipeline_depth"]
|
||||
except:
|
||||
pipeline_depth = None
|
||||
try:
|
||||
split_k = config["split_k"]
|
||||
except:
|
||||
split_k = None
|
||||
else:
|
||||
pipeline = config["pipeline"]
|
||||
tile_sizes = [
|
||||
config["work_group_tile_sizes"], config["l1_tile_sizes"],
|
||||
config["vector_tile_sizes"]
|
||||
]
|
||||
workgroup_size = []
|
||||
split_k = None
|
||||
pipeline_depth = None
|
||||
return tile_sizes, pipeline, workgroup_size, split_k, pipeline_depth
|
||||
|
||||
|
||||
def add_compilation_info(op: ir.Operation, tile_sizes: List[List[int]],
|
||||
pipeline: str, workgroup_size: List[int],
|
||||
pipeline_depth: int):
|
||||
# We don't have a Python binding for CompilationInfo, so we just parse
|
||||
# its string form.
|
||||
if pipeline_depth:
|
||||
attr = ir.Attribute.parse(
|
||||
f"#iree_codegen.compilation_info<"
|
||||
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
|
||||
f"translation_info = <{pipeline} pipeline_depth = {pipeline_depth}>, "
|
||||
f"workgroup_size = {repr(workgroup_size)}>")
|
||||
else:
|
||||
attr = ir.Attribute.parse(
|
||||
f"#iree_codegen.compilation_info<"
|
||||
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
|
||||
f"translation_info = <{pipeline}>, "
|
||||
f"workgroup_size = {repr(workgroup_size)}>")
|
||||
op.attributes["compilation_info"] = attr
|
||||
|
||||
|
||||
def add_split_k(op: ir.Operation, k: int):
|
||||
attr = ir.IntegerAttr.get(ir.IntegerType.get_signless(64), k)
|
||||
op.attributes["iree_flow_split_k"] = attr
|
||||
|
||||
|
||||
def create_context() -> ir.Context:
|
||||
context = ir.Context()
|
||||
ireec_trans.register_all_dialects(context)
|
||||
context.allow_unregistered_dialects = True
|
||||
return context
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with create_context() as ctx:
|
||||
model_annotation(ctx,
|
||||
input_contents=sys.argv[1],
|
||||
config_path=sys.argv[2])
|
||||
@@ -1,71 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
|
||||
def dir_path(path):
|
||||
if os.path.isdir(path):
|
||||
return path
|
||||
else:
|
||||
raise argparse.ArgumentTypeError(
|
||||
f"readable_dir:{path} is not a valid path")
|
||||
|
||||
|
||||
def dir_file(path):
|
||||
if os.path.isfile(path):
|
||||
return path
|
||||
else:
|
||||
raise argparse.ArgumentTypeError(
|
||||
f"readable_file:{path} is not a valid file")
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='SHARK runner.')
|
||||
parser.add_argument(
|
||||
"--device",
|
||||
type=str,
|
||||
default="cpu",
|
||||
help="Device on which shark_runner runs. options are cpu, gpu, and vulkan")
|
||||
parser.add_argument(
|
||||
"--repro_dir",
|
||||
help=
|
||||
"Directory to which module files will be saved for reproduction or debugging.",
|
||||
type=dir_path,
|
||||
default="/tmp/")
|
||||
parser.add_argument("--save_mlir",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Saves input MLIR module to /tmp/ directory.")
|
||||
parser.add_argument("--save_vmfb",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Saves iree .vmfb module to /tmp/ directory.")
|
||||
parser.add_argument(
|
||||
"--model_config_path",
|
||||
help="Directory to where the tuned model config file is located.",
|
||||
default=None)
|
||||
|
||||
parser.add_argument(
|
||||
"--num_warmup_iterations",
|
||||
type=int,
|
||||
default=2,
|
||||
help="Run the model for the specified number of warmup iterations.")
|
||||
parser.add_argument(
|
||||
"--num_iterations",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Run the model for the specified number of iterations.")
|
||||
|
||||
shark_args, unknown = parser.parse_known_args()
|
||||
@@ -1,136 +0,0 @@
|
||||
# Lint as: python3
|
||||
"""SHARK Importer"""
|
||||
|
||||
import iree.compiler.tflite as iree_tflite_compile
|
||||
import iree.runtime as iree_rt
|
||||
import numpy as np
|
||||
import os
|
||||
import sys
|
||||
import tensorflow.compat.v2 as tf
|
||||
import urllib.request
|
||||
from shark.shark_inference import SharkInference
|
||||
|
||||
|
||||
class SharkImporter:
|
||||
|
||||
def __init__(self,
|
||||
model_path,
|
||||
model_type: str = "tflite",
|
||||
model_source_hub: str = "tfhub",
|
||||
device: str = None,
|
||||
dynamic: bool = False,
|
||||
jit_trace: bool = False,
|
||||
benchmark_mode: bool = False):
|
||||
self.model_path = model_path
|
||||
self.model_type = model_type
|
||||
self.model_source_hub = model_source_hub
|
||||
self.device = device
|
||||
self.dynamic = dynamic
|
||||
self.jit_trace = jit_trace
|
||||
self.benchmark_mode = benchmark_mode
|
||||
self.inputs = None
|
||||
self.input_details = None
|
||||
self.output_details = None
|
||||
|
||||
# create tmp model file directory
|
||||
if self.model_path is None:
|
||||
print("Error. No model_path, Please input model path.")
|
||||
return
|
||||
|
||||
if self.model_source_hub == "tfhub":
|
||||
# compile and run tfhub tflite
|
||||
if self.model_type == "tflite":
|
||||
print("Setting up for TMP_DIR")
|
||||
exe_basename = os.path.basename(sys.argv[0])
|
||||
self.workdir = os.path.join(os.path.dirname(__file__), "tmp",
|
||||
exe_basename)
|
||||
print(f"TMP_DIR = {self.workdir}")
|
||||
os.makedirs(self.workdir, exist_ok=True)
|
||||
self.tflite_file = '/'.join([self.workdir, 'model.tflite'])
|
||||
print("Setting up local address for tflite model file: ",
|
||||
self.tflite_file)
|
||||
if os.path.exists(self.model_path):
|
||||
self.tflite_file = self.model_path
|
||||
else:
|
||||
print("Download tflite model")
|
||||
urllib.request.urlretrieve(self.model_path,
|
||||
self.tflite_file)
|
||||
print("Setting up tflite interpreter")
|
||||
self.tflite_interpreter = tf.lite.Interpreter(
|
||||
model_path=self.tflite_file)
|
||||
self.tflite_interpreter.allocate_tensors()
|
||||
# default input initialization
|
||||
self.input_details, self.output_details = self.get_model_details(
|
||||
)
|
||||
inputs = self.generate_inputs(
|
||||
self.input_details) # device_inputs
|
||||
self.setup_inputs(inputs)
|
||||
|
||||
def generate_inputs(self, input_details):
|
||||
args = []
|
||||
for input in input_details:
|
||||
print(str(input["shape"]), input["dtype"].__name__)
|
||||
args.append(np.zeros(shape=input["shape"], dtype=input["dtype"]))
|
||||
return args
|
||||
|
||||
def get_model_details(self):
|
||||
if self.model_type == "tflite":
|
||||
print("Get tflite input output details")
|
||||
self.input_details = self.tflite_interpreter.get_input_details()
|
||||
self.output_details = self.tflite_interpreter.get_output_details()
|
||||
return self.input_details, self.output_details
|
||||
|
||||
def setup_inputs(self, inputs):
|
||||
print("Setting up inputs")
|
||||
self.inputs = inputs
|
||||
|
||||
def compile(self, inputs=None):
|
||||
if inputs is not None:
|
||||
self.setup_inputs(inputs)
|
||||
# preprocess model_path to get model_type and Model Source Hub
|
||||
print("Shark Importer Intialize SharkInference and Do Compile")
|
||||
if self.model_source_hub == "tfhub":
|
||||
# compile and run tfhub tflite
|
||||
print("Inference tfhub model")
|
||||
self.shark_module = SharkInference(self.tflite_file,
|
||||
self.inputs,
|
||||
device=self.device,
|
||||
dynamic=self.dynamic,
|
||||
jit_trace=self.jit_trace)
|
||||
self.shark_module.set_frontend("tflite")
|
||||
self.shark_module.compile()
|
||||
elif self.model_source_hub == "huggingface":
|
||||
print("Inference", self.model_source_hub, " not implemented yet")
|
||||
elif self.model_source_hub == "jaxhub":
|
||||
print("Inference", self.model_source_hub, " not implemented yet")
|
||||
|
||||
def forward(self, inputs=None):
|
||||
if inputs is not None:
|
||||
self.setup_inputs(inputs)
|
||||
# preprocess model_path to get model_type and Model Source Hub
|
||||
print("Shark Importer forward Model")
|
||||
if self.model_source_hub == "tfhub":
|
||||
shark_results = self.shark_module.forward(self.inputs)
|
||||
# Fix type information for unsigned cases.
|
||||
# for test compare result
|
||||
shark_results = list(shark_results)
|
||||
for i in range(len(self.output_details)):
|
||||
dtype = self.output_details[i]["dtype"]
|
||||
shark_results[i] = shark_results[i].astype(dtype)
|
||||
return shark_results
|
||||
elif self.model_source_hub == "huggingface":
|
||||
print("Inference", self.model_source_hub, " not implemented yet")
|
||||
elif self.model_source_hub == "jaxhub":
|
||||
print("Inference", self.model_source_hub, " not implemented yet")
|
||||
|
||||
|
||||
def shark_load(model_name, file_path):
|
||||
file_link = f"https://storage.googleapis.com/shark_tank/users/stanley/{model_name}.mlir"
|
||||
response = urllib.request.urlretrieve(file_link, file_path)
|
||||
if not os.path.isfile(file_path):
|
||||
raise ValueError(
|
||||
f"Tried looking for target mlir in {file_path}, but cannot be found."
|
||||
)
|
||||
with open(file_path, "rb") as input_file:
|
||||
model_mlir = input_file.read()
|
||||
return model_mlir
|
||||
@@ -1,115 +0,0 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from shark.torch_mlir_utils import get_torch_mlir_module, run_on_refbackend
|
||||
import os
|
||||
from shark.parser import shark_args
|
||||
from shark.shark_runner import SharkRunner, SharkBenchmarkRunner
|
||||
import time
|
||||
import sys
|
||||
|
||||
|
||||
# Prints to stderr.
|
||||
def print_err(*a):
|
||||
print(*a, file=sys.stderr)
|
||||
|
||||
|
||||
class SharkInference:
|
||||
"""Inference API targeting pytorch, tensorflow, linalg, mhlo and tosa frontend."""
|
||||
|
||||
def __init__(self,
|
||||
model,
|
||||
input: tuple,
|
||||
device: str = None,
|
||||
dynamic: bool = False,
|
||||
jit_trace: bool = False,
|
||||
benchmark_mode: bool = False):
|
||||
self.model = model
|
||||
self.input = input
|
||||
self.dynamic = dynamic
|
||||
self.jit_trace = jit_trace
|
||||
self.benchmark_mode = benchmark_mode
|
||||
|
||||
# By default it's torch frontend.
|
||||
self.frontend = "pytorch"
|
||||
|
||||
# Sets the device.
|
||||
self.device = device if device is not None else shark_args.device
|
||||
|
||||
self.model_config_path = shark_args.model_config_path
|
||||
|
||||
self.shark_runner = None
|
||||
|
||||
# Sets the frontend i.e `pytorch` or `tensorflow`.
|
||||
def set_frontend(self, frontend: str):
|
||||
if frontend not in [
|
||||
"pytorch", "torch", "tensorflow", "tf", "mhlo", "linalg",
|
||||
"tosa", "tflite"
|
||||
]:
|
||||
print_err("frontend not supported.")
|
||||
else:
|
||||
self.frontend = frontend
|
||||
|
||||
def compile(self):
|
||||
# Inference do not use AOT.
|
||||
from_aot = False
|
||||
if (self.benchmark_mode == True):
|
||||
self.shark_runner = SharkBenchmarkRunner(self.model, self.input,
|
||||
self.dynamic, self.device,
|
||||
self.jit_trace, from_aot,
|
||||
self.frontend)
|
||||
else:
|
||||
self.shark_runner = SharkRunner(self.model, self.input,
|
||||
self.dynamic, self.device,
|
||||
self.jit_trace, from_aot,
|
||||
self.frontend,
|
||||
self.model_config_path)
|
||||
|
||||
# inputs are considered to be np.array.
|
||||
def forward(self, inputs):
|
||||
input_list = inputs
|
||||
# converts the inputs to numpy.
|
||||
if self.frontend in ["pytorch", "torch"]:
|
||||
input_list = [x.detach().numpy() for x in inputs]
|
||||
elif self.frontend in ["tensorflow", "tf"]:
|
||||
input_list = [x.numpy() for x in inputs]
|
||||
return self.shark_runner.forward(input_list, self.frontend)
|
||||
|
||||
# Saves the .vmfb module.
|
||||
def save_module(self, dir=None):
|
||||
if dir is None:
|
||||
return self.shark_runner.save_module()
|
||||
return self.shark_runner.save_module(dir)
|
||||
|
||||
######### Benchmark Related Functions #########
|
||||
def benchmark_mode(func):
|
||||
|
||||
def inner(self, *args, **kwargs):
|
||||
assert self.benchmark_mode, "SharkRunner needs to be in benchmark mode to run benchmark methods."
|
||||
return func(self, *args, **kwargs)
|
||||
|
||||
return inner
|
||||
|
||||
@benchmark_mode
|
||||
def benchmark_all(self, inputs):
|
||||
self.shark_runner.benchmark_all(inputs)
|
||||
|
||||
@benchmark_mode
|
||||
def benchmark_frontend(self, inputs):
|
||||
self.shark_runner.benchmark_frontend(inputs)
|
||||
|
||||
@benchmark_mode
|
||||
def benchmark_python(self, inputs):
|
||||
self.shark_runner.benchmark_python(inputs)
|
||||
|
||||
@benchmark_mode
|
||||
def benchmark_c(self):
|
||||
self.shark_runner.benchmark_c()
|
||||
@@ -1,205 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from iree.compiler import tf as tfc
|
||||
import iree.compiler.tflite as ireec_tflite
|
||||
from torch.utils._python_dispatch import enable_torch_dispatch_mode
|
||||
from torch_mlir.eager_mode import torch_mlir_tensor
|
||||
from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
|
||||
from torch_mlir_e2e_test.eager_backends.refbackend import EagerModeRefBackend
|
||||
|
||||
from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
|
||||
from shark.torch_mlir_utils import get_torch_mlir_module, run_on_refbackend
|
||||
from shark.iree_utils import get_results, get_iree_compiled_module, export_iree_module_to_vmfb, export_module_to_mlir_file, build_benchmark_args, run_benchmark_module
|
||||
import os
|
||||
from shark.parser import shark_args
|
||||
from tqdm import tqdm
|
||||
import time
|
||||
|
||||
|
||||
class SharkRunner:
|
||||
"""Base class for Shark Inference and Shark Runner."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model,
|
||||
input: tuple,
|
||||
dynamic: bool = False,
|
||||
device: str = None,
|
||||
jit_trace: bool = False,
|
||||
from_aot: bool = False,
|
||||
frontend: str = "torch",
|
||||
model_config_path: str = None,
|
||||
):
|
||||
self.model = model
|
||||
self.frontend_model = model
|
||||
self.from_aot = from_aot
|
||||
self.input = input
|
||||
self.frontend = frontend
|
||||
self.vmfb_file = None
|
||||
func_name = "forward"
|
||||
self.device = device if device is not None else shark_args.device
|
||||
if self.frontend in ["pytorch", "torch"]:
|
||||
# get torch-mlir dialect
|
||||
# self.model = torch.Module
|
||||
# TODO assert
|
||||
self.model = get_torch_mlir_module(self.model, input, dynamic,
|
||||
jit_trace, from_aot)
|
||||
elif self.frontend in ["tensorflow", "tf"]:
|
||||
# get mhlo dialect
|
||||
# self.model = tf.Module
|
||||
# TODO assert
|
||||
self.model = tfc.compile_module(self.model,
|
||||
exported_names=[func_name],
|
||||
import_only=True)
|
||||
elif self.frontend in ["tflite"]:
|
||||
print("Setting up for IREE compiler tflite")
|
||||
# get tosa dialect
|
||||
# self.model = model.tflite
|
||||
# TODO assert
|
||||
self.model = ireec_tflite.compile_file(self.model,
|
||||
input_type="tosa",
|
||||
import_only=True)
|
||||
func_name = "main"
|
||||
|
||||
# TODO: We can capture the .vmfb module here and later use it for saving
|
||||
# rather than recompiling it again, if used for saving.
|
||||
(
|
||||
self.iree_compilation_module,
|
||||
self.iree_config,
|
||||
) = get_iree_compiled_module(self.model,
|
||||
self.device,
|
||||
self.frontend,
|
||||
func_name=func_name,
|
||||
model_config_path=model_config_path)
|
||||
|
||||
# Debugging Options:
|
||||
if shark_args.save_mlir:
|
||||
export_module_to_mlir_file(self.model, self.frontend,
|
||||
shark_args.repro_dir)
|
||||
if shark_args.save_vmfb:
|
||||
self.vmfb_file = self.save_module(shark_args.repro_dir)
|
||||
|
||||
# All the timings and benchmarking can be done here.
|
||||
def forward(self, input, frontend):
|
||||
return get_results(self.iree_compilation_module, input,
|
||||
self.iree_config, frontend)
|
||||
|
||||
# TODO: Instead of passing directory and having names decided by the module
|
||||
# , user may want to save the module with manual names.
|
||||
def save_module(self, dir=os.getcwd()):
|
||||
return export_iree_module_to_vmfb(self.model, self.device, dir,
|
||||
self.frontend)
|
||||
|
||||
# TODO: Load a module and directly use it, we will need to set the frontend
|
||||
# in this case.
|
||||
def load_module(self, name):
|
||||
pass
|
||||
|
||||
|
||||
class SharkEagerMode:
|
||||
|
||||
def __init__(self, device="cpu"):
|
||||
if device == "refbackend":
|
||||
torch_mlir_tensor.backend = EagerModeRefBackend()
|
||||
else:
|
||||
torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(
|
||||
device)
|
||||
self.guard = enable_torch_dispatch_mode(TorchMLIRTensor)
|
||||
self.guard.__enter__()
|
||||
|
||||
def __del__(self):
|
||||
self.guard.__exit__(None, None, None)
|
||||
|
||||
|
||||
class SharkBenchmarkRunner(SharkRunner):
|
||||
# SharkRunner derived class with Benchmarking capabilities.
|
||||
def __init__(
|
||||
self,
|
||||
model,
|
||||
input: tuple,
|
||||
dynamic: bool = False,
|
||||
device: str = None,
|
||||
jit_trace: bool = False,
|
||||
from_aot: bool = False,
|
||||
frontend: str = "torch",
|
||||
):
|
||||
SharkRunner.__init__(self, model, input, dynamic, device, jit_trace,
|
||||
from_aot, frontend)
|
||||
if (self.vmfb_file == None):
|
||||
self.vmfb_file = export_iree_module_to_vmfb(self.model, device,
|
||||
shark_args.repro_dir,
|
||||
frontend)
|
||||
self.benchmark_cl = build_benchmark_args(self.vmfb_file, device, input,
|
||||
frontend, from_aot)
|
||||
|
||||
def benchmark_frontend(self, inputs):
|
||||
if self.frontend in ["pytorch", "torch"]:
|
||||
self.benchmark_torch(inputs)
|
||||
elif self.frontend in ["tensorflow", "tf"]:
|
||||
self.benchmark_tf(inputs)
|
||||
|
||||
def benchmark_torch(self, inputs):
|
||||
inputs = self.input if self.from_aot else inputs
|
||||
inputs = inputs[0]
|
||||
for i in range(shark_args.num_warmup_iterations):
|
||||
self.frontend_model.forward(inputs)
|
||||
|
||||
begin = time.time()
|
||||
for i in range(shark_args.num_iterations):
|
||||
out = self.frontend_model.forward(inputs)
|
||||
if i == shark_args.num_iterations - 1:
|
||||
end = time.time()
|
||||
break
|
||||
print(
|
||||
f"Torch benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
|
||||
def benchmark_tf(self, inputs):
|
||||
for i in range(shark_args.num_warmup_iterations):
|
||||
self.frontend_model.forward(*inputs)
|
||||
|
||||
begin = time.time()
|
||||
for i in range(shark_args.num_iterations):
|
||||
out = self.frontend_model.forward(*inputs)
|
||||
if i == shark_args.num_iterations - 1:
|
||||
end = time.time()
|
||||
break
|
||||
print(
|
||||
f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
return
|
||||
|
||||
def benchmark_c(self):
|
||||
result = run_benchmark_module(self.benchmark_cl)
|
||||
print(f"Shark-{self.frontend} C-benchmark:{result} iter/second")
|
||||
|
||||
def benchmark_python(self, inputs):
|
||||
inputs = self.input if self.from_aot else inputs
|
||||
input_list = [x for x in inputs]
|
||||
for i in range(shark_args.num_warmup_iterations):
|
||||
self.forward(input_list, self.frontend)
|
||||
|
||||
begin = time.time()
|
||||
for i in range(shark_args.num_iterations):
|
||||
out = self.forward(input_list, self.frontend)
|
||||
if i == shark_args.num_iterations - 1:
|
||||
end = time.time()
|
||||
print(
|
||||
f"Shark-{self.frontend} Python-benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
|
||||
def benchmark_all(self, inputs):
|
||||
self.benchmark_frontend(inputs)
|
||||
self.benchmark_python(inputs)
|
||||
self.benchmark_c()
|
||||
@@ -1,139 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from shark.torch_mlir_utils import get_torch_mlir_module, run_on_refbackend
|
||||
from shark.iree_utils import get_results, get_iree_compiled_module, export_iree_module_to_vmfb
|
||||
import os
|
||||
from shark.parser import shark_args
|
||||
from shark.shark_runner import SharkRunner
|
||||
from shark.backward_makefx import MakeFxModule
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import time
|
||||
import sys
|
||||
|
||||
|
||||
# Prints to stderr.
|
||||
def print_err(*a):
|
||||
print(*a, file=sys.stderr)
|
||||
|
||||
|
||||
class SharkTrainer:
|
||||
"""Training pytorch, tensorflow module on shark runtime."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model,
|
||||
input: tuple,
|
||||
dynamic: bool = False,
|
||||
device: str = None,
|
||||
jit_trace: bool = False,
|
||||
from_aot: bool = True,
|
||||
):
|
||||
self.model = model
|
||||
# Change tuple to list.
|
||||
self.input = [x for x in input]
|
||||
self.dynamic = dynamic
|
||||
self.from_aot = from_aot
|
||||
self.jit_trace = jit_trace
|
||||
self.from_aot = from_aot
|
||||
|
||||
# By default it's the torch frontend.
|
||||
self.frontend = "pytorch"
|
||||
self.device = device if device is not None else shark_args.device
|
||||
|
||||
self.shark_runner = None
|
||||
|
||||
# Sets the frontend i.e `pytorch` or `tensorflow`.
|
||||
def set_frontend(self, frontend: str):
|
||||
if frontend not in [
|
||||
"pytorch", "torch", "tensorflow", "tf", "mhlo", "linalg", "tosa"
|
||||
]:
|
||||
print_err("frontend not supported.")
|
||||
else:
|
||||
self.frontend = frontend
|
||||
|
||||
# Training function is needed in the case of torch_fn.
|
||||
def compile(self, training_fn=None):
|
||||
if self.frontend in ["torch", "pytorch"]:
|
||||
aot_module = MakeFxModule(self.model,
|
||||
tuple(self.input),
|
||||
custom_inference_fn=training_fn)
|
||||
aot_module.generate_graph()
|
||||
# Returns the backward graph.
|
||||
training_graph = aot_module.training_graph
|
||||
weights = self.get_torch_params()
|
||||
self.shark_runner = SharkRunner(training_graph,
|
||||
weights + self.input, self.dynamic,
|
||||
self.device, self.jit_trace,
|
||||
self.from_aot, self.frontend)
|
||||
elif self.frontend in ["tensorflow", "tf", "mhlo"]:
|
||||
self.shark_runner = SharkRunner(self.model, self.input,
|
||||
self.dynamic, self.device,
|
||||
self.jit_trace, self.from_aot,
|
||||
self.frontend)
|
||||
else:
|
||||
print_err("Unknown frontend")
|
||||
return
|
||||
|
||||
# The inputs to the mlir-graph are weights, buffers and inputs respectively.
|
||||
def get_torch_params(self):
|
||||
params = [i.detach() for i in self.model.parameters()]
|
||||
buffers = [i.detach() for i in self.model.buffers()]
|
||||
return params + buffers
|
||||
|
||||
# Function to train pytorch module.
|
||||
def _train_torch(self, num_iters):
|
||||
"""Returns the updated weights after num_iters"""
|
||||
params = self.get_torch_params()
|
||||
params = [x.numpy() for x in params]
|
||||
print(f"Training started for {num_iters} iterations:")
|
||||
for i in tqdm(range(num_iters)):
|
||||
params = self.shark_runner.forward(params + self.input,
|
||||
self.frontend)
|
||||
|
||||
return params
|
||||
|
||||
# Function to train tensorflow module.
|
||||
# Output final loss.
|
||||
# TODO(raikonenfnu): Save updated weight/states in SHARK.
|
||||
def _train_tf(self, num_iters):
|
||||
input_list = []
|
||||
for x in self.input:
|
||||
if (isinstance(x, list)):
|
||||
nested_list = []
|
||||
for val in x:
|
||||
if (isinstance(val, np.ndarray)):
|
||||
nested_list.append(val)
|
||||
else:
|
||||
nested_list.append(val.numpy())
|
||||
input_list.append(nested_list)
|
||||
elif (isinstance(x, np.ndarray)):
|
||||
input_list.append(x)
|
||||
else:
|
||||
input_list.append(x.numpy())
|
||||
|
||||
print(f"Training started for {num_iters} iterations:")
|
||||
for i in tqdm(range(num_iters)):
|
||||
outputs = self.shark_runner.forward(input_list, self.frontend)
|
||||
return outputs
|
||||
|
||||
def train(self, num_iters=1):
|
||||
if self.frontend in ["torch", "pytorch"]:
|
||||
return self._train_torch(num_iters)
|
||||
elif self.frontend in ["tf", "tensorflow", "mhlo"]:
|
||||
return self._train_tf(num_iters)
|
||||
else:
|
||||
print_err("Unknown frontend")
|
||||
return
|
||||
@@ -1,52 +0,0 @@
|
||||
# RUN: %PYTHON %s
|
||||
import numpy as np
|
||||
from shark.shark_importer import SharkImporter
|
||||
import pytest
|
||||
|
||||
model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
|
||||
|
||||
|
||||
# Inputs modified to be useful albert inputs.
|
||||
def generate_inputs(input_details):
|
||||
for input in input_details:
|
||||
print("\t%s, %s", str(input["shape"]), input["dtype"].__name__)
|
||||
|
||||
args = []
|
||||
args.append(
|
||||
np.random.randint(low=0,
|
||||
high=256,
|
||||
size=input_details[0]["shape"],
|
||||
dtype=input_details[0]["dtype"]))
|
||||
args.append(
|
||||
np.ones(shape=input_details[1]["shape"],
|
||||
dtype=input_details[1]["dtype"]))
|
||||
args.append(
|
||||
np.zeros(shape=input_details[2]["shape"],
|
||||
dtype=input_details[2]["dtype"]))
|
||||
return args
|
||||
|
||||
|
||||
# A specific case can be run by commenting different cases. Runs all the test
|
||||
# across cpu, gpu and vulkan according to available drivers.
|
||||
pytest_param = pytest.mark.parametrize(
|
||||
('dynamic', 'device'),
|
||||
[
|
||||
pytest.param(False, 'cpu'),
|
||||
# TODO: Language models are failing for dynamic case..
|
||||
pytest.param(True, 'cpu', marks=pytest.mark.skip),
|
||||
])
|
||||
|
||||
|
||||
@pytest_param
|
||||
def test_albert(dynamic, device):
|
||||
my_shark_importer = SharkImporter(model_path=model_path,
|
||||
model_type="tflite",
|
||||
model_source_hub="tfhub",
|
||||
device=device,
|
||||
dynamic=dynamic,
|
||||
jit_trace=True)
|
||||
input_details, output_details = my_shark_importer.get_model_details()
|
||||
inputs = generate_inputs(input_details) # device_inputs
|
||||
my_shark_importer.compile(inputs)
|
||||
shark_results = my_shark_importer.forward(inputs)
|
||||
# print(shark_results)
|
||||
@@ -1,133 +0,0 @@
|
||||
# Copyright 2020 The Nod Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import torch
|
||||
import io
|
||||
import pickle
|
||||
import sys
|
||||
import os
|
||||
|
||||
from io import StringIO
|
||||
from torch_mlir.dialects.torch.importer.jit_ir import (
|
||||
ClassAnnotator,
|
||||
ModuleBuilder,
|
||||
)
|
||||
from torch_mlir_e2e_test.torchscript.serialization import (
|
||||
extract_serializable_annotations, apply_serializable_annotations,
|
||||
SerializableTest)
|
||||
|
||||
from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend
|
||||
|
||||
from torch_mlir.passmanager import PassManager
|
||||
from torch_mlir_e2e_test.torchscript.annotations import annotate_args, export
|
||||
from torch_mlir.ir import StringAttr
|
||||
|
||||
|
||||
def get_module_name_for_asm_dump(module):
|
||||
"""Gets a name suitable for an assembly dump.
|
||||
The name is not guaranteed to be unique.
|
||||
"""
|
||||
if not "torch.debug_module_name" in module.operation.attributes:
|
||||
return "UnnammedModule"
|
||||
return StringAttr(
|
||||
module.operation.attributes["torch.debug_module_name"]).value
|
||||
|
||||
|
||||
def get_input_annotations(inputs: tuple, dynamic: bool) -> list:
|
||||
"""TODO: Include necessary documentation"""
|
||||
|
||||
annotations_list = [None]
|
||||
for i in inputs:
|
||||
temp_list = []
|
||||
if dynamic:
|
||||
temp_list.append([-1 for i in range(len(i.shape))])
|
||||
else:
|
||||
temp_list.append(list(i.shape))
|
||||
temp_list.append(i.dtype)
|
||||
temp_list.append(True)
|
||||
annotations_list.append(tuple(temp_list))
|
||||
return annotations_list
|
||||
|
||||
|
||||
def run_on_refbackend(torch_module, inputs):
|
||||
backend = refbackend.RefBackendLinalgOnTensorsBackend()
|
||||
compiled = backend.compile(torch_module)
|
||||
jit_module = backend.load(compiled)
|
||||
np_inputs = [x.numpy() for x in inputs]
|
||||
return jit_module.forward(np_inputs[0])
|
||||
|
||||
|
||||
def shark_jit_trace(module, input: tuple, dynamic: bool,
|
||||
tracing_required: bool):
|
||||
"""TODO: Include necessary documentation."""
|
||||
|
||||
if not tracing_required:
|
||||
return torch.jit.script(module)
|
||||
|
||||
traced_module = torch.jit.trace_module(module, {"forward": input})
|
||||
actual_script = traced_module._actual_script_module
|
||||
export(actual_script.forward)
|
||||
annotate_args_decorator = annotate_args(
|
||||
get_input_annotations(input, dynamic))
|
||||
annotate_args_decorator(actual_script.forward)
|
||||
module = torch.jit.script(actual_script)
|
||||
|
||||
# TODO: remove saved annotations.pickle
|
||||
torchscript_module_bytes = module.save_to_buffer({
|
||||
"annotations.pkl":
|
||||
pickle.dumps(extract_serializable_annotations(module))
|
||||
})
|
||||
serializable_test = SerializableTest(unique_name="",
|
||||
program=torchscript_module_bytes,
|
||||
trace=None)
|
||||
_extra_files = {"annotations.pkl": ""}
|
||||
module = torch.jit.load(io.BytesIO(serializable_test.program),
|
||||
_extra_files=_extra_files)
|
||||
# Load the pickled annotations.
|
||||
annotations = pickle.loads(_extra_files["annotations.pkl"])
|
||||
apply_serializable_annotations(module, annotations)
|
||||
return module
|
||||
|
||||
|
||||
def get_torch_mlir_module(
|
||||
module,
|
||||
input: tuple,
|
||||
dynamic: bool,
|
||||
tracing_required: bool,
|
||||
from_aot: bool = False,
|
||||
):
|
||||
"""TODO: Include necessary documentation."""
|
||||
|
||||
# Tracing is not required from the aot_module.
|
||||
if not from_aot:
|
||||
module = shark_jit_trace(module, input, dynamic, tracing_required)
|
||||
|
||||
mb = ModuleBuilder()
|
||||
class_annotator = ClassAnnotator()
|
||||
class_annotator.exportNone(module._c._type())
|
||||
class_annotator.exportPath(module._c._type(), ["forward"])
|
||||
class_annotator.annotateArgs(
|
||||
module._c._type(),
|
||||
["forward"],
|
||||
get_input_annotations(input, dynamic),
|
||||
)
|
||||
mb.import_module(module._c, class_annotator)
|
||||
|
||||
with mb.module.context:
|
||||
pm = PassManager.parse(
|
||||
"torchscript-module-to-torch-backend-pipeline,torch-backend-to-linalg-on-tensors-backend-pipeline"
|
||||
)
|
||||
pm.run(mb.module)
|
||||
|
||||
return mb.module
|
||||
@@ -1,74 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils import check_device_drivers
|
||||
|
||||
import torch
|
||||
import numpy as np
|
||||
import torchvision.models as models
|
||||
from transformers import AutoModelForSequenceClassification, BertTokenizer, TFBertModel
|
||||
import importlib
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
##################### Hugging Face LM Models ###################################
|
||||
|
||||
|
||||
class HuggingFaceLanguage(torch.nn.Module):
|
||||
|
||||
def __init__(self, hf_model_name):
|
||||
super().__init__()
|
||||
self.model = AutoModelForSequenceClassification.from_pretrained(
|
||||
hf_model_name, # The pretrained model.
|
||||
num_labels=
|
||||
2, # The number of output labels--2 for binary classification.
|
||||
output_attentions=
|
||||
False, # Whether the model returns attentions weights.
|
||||
output_hidden_states=
|
||||
False, # Whether the model returns all hidden-states.
|
||||
torchscript=True,
|
||||
)
|
||||
|
||||
def forward(self, tokens):
|
||||
return self.model.forward(tokens)[0]
|
||||
|
||||
|
||||
def get_hf_model(name):
|
||||
model = HuggingFaceLanguage(name)
|
||||
# TODO: Currently the test input is set to (1,128)
|
||||
test_input = torch.randint(2, (1, 128))
|
||||
actual_out = model(test_input)
|
||||
return model, test_input, actual_out
|
||||
|
||||
|
||||
################################################################################
|
||||
|
||||
##################### Torch Vision Models ###################################
|
||||
|
||||
|
||||
class VisionModule(torch.nn.Module):
|
||||
|
||||
def __init__(self, model):
|
||||
super().__init__()
|
||||
self.model = model
|
||||
self.train(False)
|
||||
|
||||
def forward(self, input):
|
||||
return self.model.forward(input)
|
||||
|
||||
|
||||
def get_vision_model(torch_model):
|
||||
model = VisionModule(torch_model)
|
||||
# TODO: Currently the test input is set to (1,128)
|
||||
test_input = torch.randn(1, 3, 224, 224)
|
||||
actual_out = model(test_input)
|
||||
return model, test_input, actual_out
|
||||
|
||||
################################################################################
|
||||
|
||||
# Utility function for comparing two tensors (torch).
|
||||
def compare_tensors(torch_tensor, numpy_tensor):
|
||||
# setting the absolute and relative tolerance
|
||||
rtol = 1e-02
|
||||
atol = 1e-03
|
||||
torch_to_numpy = torch_tensor.detach().numpy()
|
||||
return np.allclose(torch_to_numpy, numpy_tensor, rtol, atol)
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils import check_device_drivers
|
||||
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
from transformers import AutoModelForSequenceClassification, BertTokenizer, TFBertModel
|
||||
import importlib
|
||||
|
||||
##################### Tensorflow Hugging Face LM Models ###################################
|
||||
MAX_SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
|
||||
# Create a set of 2-dimensional inputs
|
||||
tf_bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
class TFHuggingFaceLanguage(tf.Module):
|
||||
|
||||
def __init__(self, hf_model_name):
|
||||
super(TFHuggingFaceLanguage, self).__init__()
|
||||
# Create a BERT trainer with the created network.
|
||||
self.m = TFBertModel.from_pretrained(
|
||||
hf_model_name, from_pt=True)
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m.predict = lambda x, y, z: self.m.call(
|
||||
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
|
||||
|
||||
@tf.function(input_signature=tf_bert_input)
|
||||
def forward(self, input_ids, attention_mask, token_type_ids):
|
||||
return self.m.predict(input_ids, attention_mask, token_type_ids)
|
||||
|
||||
|
||||
def get_TFhf_model(name):
|
||||
model = TFHuggingFaceLanguage(name)
|
||||
tokenizer = BertTokenizer.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased")
|
||||
text = "Replace me by any text you'd like."
|
||||
encoded_input = tokenizer(text,
|
||||
padding='max_length',
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH)
|
||||
for key in encoded_input:
|
||||
encoded_input[key] = tf.expand_dims(
|
||||
tf.convert_to_tensor(encoded_input[key]), 0)
|
||||
test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"])
|
||||
actual_out = model.forward(*test_input)
|
||||
return model, test_input, actual_out
|
||||
|
||||
|
||||
# Utility function for comparing two tensors (tensorflow).
|
||||
def compare_tensors_tf(tf_tensor, numpy_tensor):
|
||||
# setting the absolute and relative tolerance
|
||||
rtol = 1e-02
|
||||
atol = 1e-03
|
||||
tf_to_numpy = tf_tensor.pooler_output.numpy()
|
||||
return np.allclose(tf_to_numpy, numpy_tensor, rtol, atol)
|
||||
|
||||
|
||||
@@ -1,92 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils import check_device_drivers
|
||||
from tank.model_utils import get_hf_model, compare_tensors
|
||||
from shark.parser import shark_args
|
||||
|
||||
import torch
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
#torch.manual_seed(0)
|
||||
|
||||
class AlbertModuleTester:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
|
||||
def create_and_check_module(self):
|
||||
model, input, act_out = get_hf_model("albert-base-v2")
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_module = SharkInference(model, (input,),
|
||||
device=self.device,
|
||||
dynamic=self.dynamic,
|
||||
jit_trace=True)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward((input,))
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
class AlbertModuleTest(unittest.TestCase):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = AlbertModuleTester(self)
|
||||
self.module_tester.save_mlir = self.save_mlir
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.xfail(reason="Language models currently failing for dynamic case")
|
||||
def test_module_dynamic_cpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.xfail(reason="Albert model on GPU currently fails to produce torch numbers")
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_static_gpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.xfail(reason="Language models currently failing for dynamic case")
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_dynamic_gpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.xfail(reason="Static albert model on vulkan currently fails to validate.")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.xfail(reason="Language models currently failing for dynamic case")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,90 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils import check_device_drivers
|
||||
from tank.model_utils import get_vision_model, compare_tensors
|
||||
from shark.parser import shark_args
|
||||
|
||||
import torch
|
||||
import unittest
|
||||
import numpy as np
|
||||
import torchvision.models as models
|
||||
import pytest
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
class AlexnetModuleTester:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
|
||||
def create_and_check_module(self):
|
||||
model, input, act_out = get_vision_model(models.alexnet(pretrained=True))
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_module = SharkInference(
|
||||
model,
|
||||
(input,),
|
||||
device=self.device,
|
||||
dynamic=self.dynamic,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward((input,))
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
class AlexnetModuleTest(unittest.TestCase):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = AlexnetModuleTester(self)
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_static_gpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_dynamic_gpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,91 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils import check_device_drivers
|
||||
from tank.model_utils import get_hf_model, compare_tensors
|
||||
from shark.parser import shark_args
|
||||
|
||||
import torch
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
#torch.manual_seed(0)
|
||||
|
||||
class BertModuleTester:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
|
||||
def create_and_check_module(self):
|
||||
model, input, act_out = get_hf_model("bert-base-uncased")
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_module = SharkInference(model, (input,),
|
||||
device=self.device,
|
||||
dynamic=self.dynamic,
|
||||
jit_trace=True)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward((input,))
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
class BertModuleTest(unittest.TestCase):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = BertModuleTester(self)
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.xfail(reason="Language models currently failing for dynamic case")
|
||||
def test_module_dynamic_cpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.xfail(reason="BERT model on GPU currently fails to produce torch numbers")
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_static_gpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.xfail(reason="Language models currently failing for dynamic case")
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_dynamic_gpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.xfail(reason="Language models currently failing for dynamic case")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,3 +0,0 @@
|
||||
def pytest_addoption(parser):
|
||||
# Attaches SHARK command-line arguments to the pytest machinery.
|
||||
parser.addoption("--save_mlir", action="store_true", default="False", help="Pass option to save input MLIR module to /tmp/ directory.")
|
||||
@@ -1,91 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils import check_device_drivers
|
||||
from tank.model_utils import get_hf_model, compare_tensors
|
||||
from shark.parser import shark_args
|
||||
|
||||
import torch
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
class MiniLMModuleTester:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
|
||||
def create_and_check_module(self):
|
||||
model, input, act_out = get_hf_model("microsoft/MiniLM-L12-H384-uncased")
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_module = SharkInference(model, (input,),
|
||||
device=self.device,
|
||||
dynamic=self.dynamic,
|
||||
jit_trace=True)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward((input,))
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
class MiniLMModuleTest(unittest.TestCase):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = MiniLMModuleTester(self)
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.xfail(reason="language models failing for dynamic case")
|
||||
def test_module_dynamic_cpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.xfail(reason="minilm inference on gpu currently returns invalid results")
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_static_gpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.xfail(reason="language models failing for dynamic case")
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_dynamic_gpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.xfail(reason="language models failing for dynamic case")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,89 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils import check_device_drivers
|
||||
from tank.model_utils import get_vision_model, compare_tensors
|
||||
from shark.parser import shark_args
|
||||
|
||||
import torch
|
||||
import unittest
|
||||
import numpy as np
|
||||
import torchvision.models as models
|
||||
import pytest
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
class Resnet101ModuleTester:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
|
||||
def create_and_check_module(self):
|
||||
model, input, act_out = get_vision_model(models.resnet101(pretrained=True))
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_module = SharkInference(
|
||||
model,
|
||||
(input,),
|
||||
device=self.device,
|
||||
dynamic=self.dynamic,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward((input,))
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
class Resnet101ModuleTest(unittest.TestCase):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = Resnet101ModuleTester(self)
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_static_gpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_dynamic_gpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,90 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils import check_device_drivers
|
||||
from tank.model_utils import get_vision_model, compare_tensors
|
||||
from shark.parser import shark_args
|
||||
|
||||
import torch
|
||||
import unittest
|
||||
import numpy as np
|
||||
import torchvision.models as models
|
||||
import pytest
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
class Resnet18ModuleTester:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
|
||||
def create_and_check_module(self):
|
||||
model, input, act_out = get_vision_model(models.resnet18(pretrained=True))
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_module = SharkInference(
|
||||
model,
|
||||
(input,),
|
||||
device=self.device,
|
||||
dynamic=self.dynamic,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward((input,))
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
class Resnet18ModuleTest(unittest.TestCase):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = Resnet18ModuleTester(self)
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_static_gpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_dynamic_gpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,90 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils import check_device_drivers
|
||||
from tank.model_utils import get_vision_model, compare_tensors
|
||||
from shark.parser import shark_args
|
||||
|
||||
import torch
|
||||
import unittest
|
||||
import numpy as np
|
||||
import torchvision.models as models
|
||||
import pytest
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
class Resnet50ModuleTester:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
|
||||
def create_and_check_module(self):
|
||||
model, input, act_out = get_vision_model(models.resnet50(pretrained=True))
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_module = SharkInference(
|
||||
model,
|
||||
(input,),
|
||||
device=self.device,
|
||||
dynamic=self.dynamic,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward((input,))
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
class Resnet50ModuleTest(unittest.TestCase):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = Resnet50ModuleTester(self)
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_static_gpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_dynamic_gpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,90 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils import check_device_drivers
|
||||
from tank.model_utils import get_vision_model, compare_tensors
|
||||
from shark.parser import shark_args
|
||||
|
||||
import torch
|
||||
import unittest
|
||||
import numpy as np
|
||||
import torchvision.models as models
|
||||
import pytest
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
class SqueezenetModuleTester:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
|
||||
def create_and_check_module(self):
|
||||
model, input, act_out = get_vision_model(models.squeezenet1_0(pretrained=True))
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_module = SharkInference(
|
||||
model,
|
||||
(input,),
|
||||
device=self.device,
|
||||
dynamic=self.dynamic,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward((input,))
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
class SqueezenetModuleTest(unittest.TestCase):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = SqueezenetModuleTester(self)
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_static_gpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_dynamic_gpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,111 +0,0 @@
|
||||
import argparse
|
||||
import os
|
||||
from functools import partial
|
||||
|
||||
import clip
|
||||
import torch
|
||||
from torchvision import transforms
|
||||
from tqdm import trange
|
||||
|
||||
try:
|
||||
from diffusion import get_model, sampling, utils
|
||||
except ModuleNotFoundError:
|
||||
print(
|
||||
"You need to download v-diffusion source from https://github.com/crowsonkb/v-diffusion-pytorch"
|
||||
)
|
||||
raise
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
|
||||
def parse_prompt(prompt, default_weight=3.0):
|
||||
if prompt.startswith("http://") or prompt.startswith("https://"):
|
||||
vals = prompt.rsplit(":", 2)
|
||||
vals = [vals[0] + ":" + vals[1], *vals[2:]]
|
||||
else:
|
||||
vals = prompt.rsplit(":", 1)
|
||||
vals = vals + ["", default_weight][len(vals) :]
|
||||
return vals[0], float(vals[1])
|
||||
|
||||
|
||||
args = argparse.Namespace(
|
||||
prompts=["New York City, oil on canvas"],
|
||||
batch_size=1,
|
||||
device="cuda",
|
||||
model="cc12m_1_cfg",
|
||||
n=1,
|
||||
steps=10,
|
||||
)
|
||||
|
||||
device = torch.device(args.device)
|
||||
print("Using device:", device)
|
||||
|
||||
model = get_model(args.model)()
|
||||
_, side_y, side_x = model.shape
|
||||
checkpoint = f"{args.model}.pth"
|
||||
if os.path.exists(checkpoint):
|
||||
model.load_state_dict(torch.load(checkpoint, map_location="cpu"))
|
||||
|
||||
model = model.to(device).eval().requires_grad_(False)
|
||||
clip_model_name = model.clip_model if hasattr(model, "clip_model") else "ViT-B/16"
|
||||
clip_model = clip.load(clip_model_name, jit=False, device=device)[0]
|
||||
clip_model.eval().requires_grad_(False)
|
||||
normalize = transforms.Normalize(
|
||||
mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]
|
||||
)
|
||||
|
||||
zero_embed = torch.zeros([1, clip_model.visual.output_dim], device=device)
|
||||
target_embeds, weights = [zero_embed], []
|
||||
|
||||
txt, weight = parse_prompt(args.prompts[0])
|
||||
target_embeds.append(clip_model.encode_text(clip.tokenize(txt).to(device)).float())
|
||||
weights.append(weight)
|
||||
|
||||
weights = torch.tensor([1 - sum(weights), *weights], device=device)
|
||||
|
||||
|
||||
def cfg_model_fn(model, x, t):
|
||||
n = x.shape[0]
|
||||
n_conds = len(target_embeds)
|
||||
x_in = x.repeat([n_conds, 1, 1, 1])
|
||||
t_in = t.repeat([n_conds])
|
||||
clip_embed_in = torch.cat([*target_embeds]).repeat_interleave(n, 0)
|
||||
vs = model(x_in, t_in, clip_embed_in).view([n_conds, n, *x.shape[1:]])
|
||||
v = vs.mul(weights[:, None, None, None, None]).sum(0)
|
||||
return v
|
||||
|
||||
|
||||
x = torch.randn([args.n, 3, side_y, side_x], device=device)
|
||||
t = torch.linspace(1, 0, args.steps + 1, device=device)[:-1]
|
||||
|
||||
|
||||
def repro(model):
|
||||
if device.type == "cuda":
|
||||
model = model.half()
|
||||
|
||||
steps = utils.get_spliced_ddpm_cosine_schedule(t)
|
||||
for i in trange(0, args.n, args.batch_size):
|
||||
cur_batch_size = min(args.n - i, args.batch_size)
|
||||
outs = sampling.plms_sample(
|
||||
partial(cfg_model_fn, model), x[i : i + cur_batch_size], steps, {}
|
||||
)
|
||||
for j, out in enumerate(outs):
|
||||
utils.to_pil_image(out).save(f"out_{i + j:05}.png")
|
||||
|
||||
|
||||
def trace(model, x, t):
|
||||
n = x.shape[0]
|
||||
n_conds = len(target_embeds)
|
||||
x_in = x.repeat([n_conds, 1, 1, 1])
|
||||
t_in = t.repeat([n_conds])
|
||||
clip_embed_in = torch.cat([*target_embeds]).repeat_interleave(n, 0)
|
||||
ts_mod = torch.jit.trace(model, (x_in, t_in, clip_embed_in))
|
||||
print(ts_mod.graph)
|
||||
|
||||
clip_model = clip.load(clip_model_name, jit=True, device=device)[0]
|
||||
print(clip_model.graph)
|
||||
|
||||
|
||||
# You can't run both of these because repro will `.half()` the model
|
||||
# repro(model)
|
||||
trace(model, x, t[0])
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 145 KiB |
@@ -1,90 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils import check_device_drivers
|
||||
from tank.model_utils import get_vision_model, compare_tensors
|
||||
from shark.parser import shark_args
|
||||
|
||||
import torch
|
||||
import unittest
|
||||
import numpy as np
|
||||
import torchvision.models as models
|
||||
import pytest
|
||||
|
||||
torch.manual_seed(0)
|
||||
|
||||
class WideResnet50ModuleTester:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
dynamic=False,
|
||||
device="cpu",
|
||||
save_mlir=False,
|
||||
):
|
||||
self.dynamic = dynamic
|
||||
self.device = device
|
||||
self.save_mlir = save_mlir
|
||||
|
||||
def create_and_check_module(self):
|
||||
model, input, act_out = get_vision_model(models.wide_resnet50_2(pretrained=True))
|
||||
shark_args.save_mlir = self.save_mlir
|
||||
shark_module = SharkInference(
|
||||
model,
|
||||
(input,),
|
||||
device=self.device,
|
||||
dynamic=self.dynamic,
|
||||
)
|
||||
shark_module.compile()
|
||||
results = shark_module.forward((input,))
|
||||
assert True == compare_tensors(act_out, results)
|
||||
|
||||
class WideResnet50ModuleTest(unittest.TestCase):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
self.save_mlir = pytestconfig.getoption("save_mlir")
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = WideResnet50ModuleTester(self)
|
||||
|
||||
def test_module_static_cpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
def test_module_dynamic_cpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "cpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_static_gpu(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
|
||||
def test_module_dynamic_gpu(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "gpu"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
self.module_tester.dynamic = False
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
self.module_tester.dynamic = True
|
||||
self.module_tester.device = "vulkan"
|
||||
self.module_tester.create_and_check_module()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,15 +0,0 @@
|
||||
## Running SharkInference on CPUs, GPUs and MAC.
|
||||
|
||||
|
||||
### Run the binary sequence_classification.
|
||||
#### The models supported are: [hugging face sequence classification](https://huggingface.co/docs/transformers/model_doc/auto#transformers.TFAutoModelForSequenceClassification)
|
||||
```shell
|
||||
./seq_classification.py --hf_model_name="hf_model" --device="cpu" # Use gpu | vulkan
|
||||
```
|
||||
|
||||
Once the model is compiled to run on the device mentioned, we can pass in text and
|
||||
get the logits.
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
from transformers import TFAutoModelForMaskedLM
|
||||
import tensorflow as tf
|
||||
from shark.shark_inference import SharkInference
|
||||
|
||||
# Create a set of input signature.
|
||||
inputs_signature = [
|
||||
tf.TensorSpec(shape=[1, 512], dtype=tf.int32),
|
||||
]
|
||||
|
||||
|
||||
class AutoModelMaskedLM(tf.Module):
|
||||
|
||||
def __init__(self, model_name):
|
||||
super(AutoModelMaskedLM, self).__init__()
|
||||
self.m = TFAutoModelForMaskedLM.from_pretrained(model_name,
|
||||
output_attentions=False)
|
||||
self.m.predict = lambda x: self.m(input_ids=x)
|
||||
|
||||
@tf.function(input_signature=inputs_signature)
|
||||
def forward(self, input_ids):
|
||||
return self.m.predict(input_ids)
|
||||
|
||||
|
||||
fail_models = ["microsoft/deberta-base", "google/rembert", "google/tapas-base"]
|
||||
|
||||
supported_models = [
|
||||
"albert-base-v2", "bert-base-uncased", "camembert-base",
|
||||
"dbmdz/convbert-base-turkish-cased", "distilbert-base-uncased",
|
||||
"google/electra-small-discriminator",
|
||||
"hf-internal-testing/tiny-random-flaubert", "funnel-transformer/small",
|
||||
"microsoft/layoutlm-base-uncased", "allenai/longformer-base-4096",
|
||||
"google/mobilebert-uncased", "microsoft/mpnet-base", "roberta-base",
|
||||
"xlm-roberta-base"
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
inputs = tf.random.uniform(shape=[1, 512],
|
||||
maxval=3,
|
||||
dtype=tf.int32,
|
||||
seed=10)
|
||||
|
||||
for model_name in supported_models:
|
||||
print(f"Running model: {model_name}")
|
||||
shark_module = SharkInference(AutoModelMaskedLM(model_name), (inputs,))
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
print(shark_module.forward((inputs,)))
|
||||
@@ -1,90 +0,0 @@
|
||||
from iree import runtime as ireert
|
||||
from iree.tf.support import module_utils
|
||||
from iree.compiler import tf as tfc
|
||||
import sys
|
||||
from absl import app
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import tempfile
|
||||
import tensorflow as tf
|
||||
|
||||
from official.nlp.modeling import layers
|
||||
from official.nlp.modeling import networks
|
||||
from official.nlp.modeling.models import bert_classifier
|
||||
|
||||
vocab_size = 100
|
||||
NUM_CLASSES = 5
|
||||
SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
# Create a set of 2-dimensional inputs
|
||||
bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
dict_outputs = False
|
||||
test_network = networks.BertEncoder(vocab_size=vocab_size,
|
||||
num_layers=24,
|
||||
hidden_size=1024,
|
||||
num_attention_heads=16,
|
||||
dict_outputs=dict_outputs)
|
||||
|
||||
# Create a BERT trainer with the created network.
|
||||
bert_trainer_model = bert_classifier.BertClassifier(
|
||||
test_network, num_classes=NUM_CLASSES)
|
||||
bert_trainer_model.summary()
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m = bert_trainer_model
|
||||
self.m.predict = lambda x: self.m.call(x, training=False)
|
||||
self.m.learn = lambda x, y: self.m.call(x, training=False)
|
||||
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
|
||||
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
|
||||
|
||||
@tf.function(input_signature=[
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
|
||||
dtype=tf.int32), #input0: input_word_ids
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
|
||||
dtype=tf.int32), #input1: input_mask
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
|
||||
dtype=tf.int32), #input2: segment_ids
|
||||
tf.TensorSpec([BATCH_SIZE], tf.int32) # input3: labels
|
||||
])
|
||||
def learn(self, input_word_ids, input_mask, segment_ids, labels):
|
||||
with tf.GradientTape() as tape:
|
||||
# Capture the gradients from forward prop...
|
||||
inputs = [input_word_ids, input_mask, segment_ids]
|
||||
probs = self.m(inputs, training=True)
|
||||
loss = self.loss(labels, probs)
|
||||
|
||||
# ...and use them to update the model's weights.
|
||||
variables = self.m.trainable_variables
|
||||
gradients = tape.gradient(loss, variables)
|
||||
self.optimizer.apply_gradients(zip(gradients, variables))
|
||||
return loss
|
||||
|
||||
@tf.function(input_signature=bert_input)
|
||||
def predict(self, input_word_ids, input_mask, segment_ids):
|
||||
inputs = [input_word_ids, input_mask, segment_ids]
|
||||
return self.m.predict(inputs)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# BertModule()
|
||||
# Compile the model using IREE
|
||||
compiler_module = tfc.compile_module(BertModule(),
|
||||
exported_names=["learn"],
|
||||
import_only=True)
|
||||
# Save module as MLIR file in a directory
|
||||
ARITFACTS_DIR = os.getcwd()
|
||||
mlir_path = os.path.join(ARITFACTS_DIR, "model.mlir")
|
||||
with open(mlir_path, "wt") as output_file:
|
||||
output_file.write(compiler_module.decode('utf-8'))
|
||||
print(f"Wrote MLIR to path '{mlir_path}'")
|
||||
@@ -1,123 +0,0 @@
|
||||
from iree import runtime as ireert
|
||||
from iree.tf.support import module_utils
|
||||
from iree.compiler import tf as tfc
|
||||
from iree.compiler import compile_str
|
||||
import sys
|
||||
from absl import app
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import tempfile
|
||||
import tensorflow as tf
|
||||
|
||||
from official.nlp.modeling import layers
|
||||
from official.nlp.modeling import networks
|
||||
from official.nlp.modeling.models import bert_classifier
|
||||
|
||||
vocab_size = 100
|
||||
NUM_CLASSES = 5
|
||||
SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
# Create a set of 2-dimensional inputs
|
||||
bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
dict_outputs = False
|
||||
test_network = networks.BertEncoder(vocab_size=vocab_size,
|
||||
num_layers=24,
|
||||
hidden_size=1024,
|
||||
num_attention_heads=16,
|
||||
dict_outputs=dict_outputs)
|
||||
|
||||
# Create a BERT trainer with the created network.
|
||||
bert_trainer_model = bert_classifier.BertClassifier(
|
||||
test_network, num_classes=NUM_CLASSES)
|
||||
bert_trainer_model.summary()
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m = bert_trainer_model
|
||||
self.m.predict = lambda x: self.m.call(x, training=False)
|
||||
self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
|
||||
self.m.learn = lambda x, y: self.m.call(x, training=False)
|
||||
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
|
||||
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
|
||||
|
||||
@tf.function(input_signature=[
|
||||
bert_input, # inputs
|
||||
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32) # labels
|
||||
])
|
||||
def learn(self, inputs, labels):
|
||||
with tf.GradientTape() as tape:
|
||||
# Capture the gradients from forward prop...
|
||||
probs = self.m(inputs, training=True)
|
||||
loss = self.loss(labels, probs)
|
||||
|
||||
# ...and use them to update the model's weights.
|
||||
variables = self.m.trainable_variables
|
||||
gradients = tape.gradient(loss, variables)
|
||||
self.optimizer.apply_gradients(zip(gradients, variables))
|
||||
return loss
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# BertModule()
|
||||
# Compile the model using IREE
|
||||
compiler_module = tfc.compile_module(BertModule(),
|
||||
exported_names=["learn"],
|
||||
import_only=True)
|
||||
|
||||
# Compile the model using IREE
|
||||
backend = "dylib-llvm-aot"
|
||||
args = [
|
||||
"--iree-llvm-target-cpu-features=host",
|
||||
"--iree-mhlo-demote-i64-to-i32=false",
|
||||
"--iree-stream-resource-index-bits=64", "--iree-vm-target-index-bits=64"
|
||||
]
|
||||
backend_config = "dylib"
|
||||
#backend = "cuda"
|
||||
#backend_config = "cuda"
|
||||
#args = ["--iree-cuda-llvm-target-arch=sm_80", "--iree-hal-cuda-disable-loop-nounroll-wa", "--iree-enable-fusion-with-reduction-ops"]
|
||||
flatbuffer_blob = compile_str(compiler_module,
|
||||
target_backends=[backend],
|
||||
extra_args=args,
|
||||
input_type="mhlo")
|
||||
#flatbuffer_blob = compile_str(compiler_module, target_backends=["dylib-llvm-aot"])
|
||||
|
||||
# Save module as MLIR file in a directory
|
||||
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
|
||||
tracer = ireert.Tracer(os.getcwd())
|
||||
config = ireert.Config("dylib", tracer)
|
||||
ctx = ireert.SystemContext(config=config)
|
||||
ctx.add_vm_module(vm_module)
|
||||
BertCompiled = ctx.modules.module
|
||||
predict_sample_input = [
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
|
||||
]
|
||||
learn_sample_input = [
|
||||
predict_sample_input,
|
||||
np.random.randint(5, size=(BATCH_SIZE))
|
||||
]
|
||||
warmup = 5
|
||||
total_iter = 10
|
||||
num_iter = total_iter - warmup
|
||||
for i in range(10):
|
||||
if (i == warmup - 1):
|
||||
start = time.time()
|
||||
print(
|
||||
BertCompiled.learn(predict_sample_input,
|
||||
np.random.randint(5, size=(BATCH_SIZE))))
|
||||
end = time.time()
|
||||
total_time = end - start
|
||||
print("time: " + str(total_time))
|
||||
print("time/iter: " + str(total_time / num_iter))
|
||||
@@ -1,85 +0,0 @@
|
||||
import numpy as np
|
||||
import os
|
||||
import tempfile
|
||||
import tensorflow as tf
|
||||
import time
|
||||
|
||||
from official.nlp.modeling import layers
|
||||
from official.nlp.modeling import networks
|
||||
from official.nlp.modeling.models import bert_classifier
|
||||
|
||||
vocab_size = 100
|
||||
NUM_CLASSES = 5
|
||||
SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
# Create a set of 2-dimensional inputs
|
||||
bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
dict_outputs = False
|
||||
test_network = networks.BertEncoder(vocab_size=vocab_size,
|
||||
num_layers=24,
|
||||
hidden_size=1024,
|
||||
num_attention_heads=16,
|
||||
dict_outputs=dict_outputs)
|
||||
|
||||
# Create a BERT trainer with the created network.
|
||||
bert_trainer_model = bert_classifier.BertClassifier(
|
||||
test_network, num_classes=NUM_CLASSES)
|
||||
bert_trainer_model.summary()
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m = bert_trainer_model
|
||||
self.m.predict = lambda x: self.m.call(x, training=False)
|
||||
self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
|
||||
self.m.learn = lambda x, y: self.m.call(x, training=False)
|
||||
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
|
||||
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
|
||||
|
||||
@tf.function(input_signature=[
|
||||
bert_input, # inputs
|
||||
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32) # labels
|
||||
])
|
||||
def learn(self, inputs, labels):
|
||||
with tf.GradientTape() as tape:
|
||||
# Capture the gradients from forward prop...
|
||||
probs = self.m(inputs, training=True)
|
||||
loss = self.loss(labels, probs)
|
||||
|
||||
# ...and use them to update the model's weights.
|
||||
variables = self.m.trainable_variables
|
||||
gradients = tape.gradient(loss, variables)
|
||||
self.optimizer.apply_gradients(zip(gradients, variables))
|
||||
return loss
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# BertModule()
|
||||
predict_sample_input = [
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
|
||||
]
|
||||
bert_model = BertModule()
|
||||
warmup = 1
|
||||
total_iter = 10
|
||||
num_iter = total_iter - warmup
|
||||
for i in range(total_iter):
|
||||
print(
|
||||
bert_model.learn(predict_sample_input,
|
||||
np.random.randint(5, size=(BATCH_SIZE))))
|
||||
if (i == warmup - 1):
|
||||
start = time.time()
|
||||
|
||||
end = time.time()
|
||||
total_time = end - start
|
||||
print("time: " + str(total_time))
|
||||
print("time/iter: " + str(total_time / num_iter))
|
||||
@@ -1,89 +0,0 @@
|
||||
from iree import runtime as ireert
|
||||
#from iree.tf.support import module_utils
|
||||
from iree.compiler import tf as tfc
|
||||
import sys
|
||||
from absl import app
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import tempfile
|
||||
import tensorflow as tf
|
||||
|
||||
from official.nlp.modeling import layers
|
||||
from official.nlp.modeling import networks
|
||||
from official.nlp.modeling.models import bert_classifier
|
||||
|
||||
vocab_size = 100
|
||||
NUM_CLASSES = 5
|
||||
SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
# Create a set of 2-dimensional inputs
|
||||
bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
dict_outputs = False
|
||||
test_network = networks.BertEncoder(vocab_size=vocab_size,
|
||||
num_layers=2,
|
||||
dict_outputs=dict_outputs)
|
||||
|
||||
# Create a BERT trainer with the created network.
|
||||
bert_trainer_model = bert_classifier.BertClassifier(
|
||||
test_network, num_classes=NUM_CLASSES)
|
||||
bert_trainer_model.summary()
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m = bert_trainer_model
|
||||
self.m.predict = lambda x: self.m.call(x, training=False)
|
||||
self.m.learn = lambda x, y: self.m.call(x, training=False)
|
||||
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
|
||||
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
|
||||
|
||||
@tf.function(input_signature=[
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
|
||||
dtype=tf.int32), #input0: input_word_ids
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
|
||||
dtype=tf.int32), #input1: input_mask
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
|
||||
dtype=tf.int32), #input2: segment_ids
|
||||
tf.TensorSpec([BATCH_SIZE], tf.int32) # input3: labels
|
||||
])
|
||||
def learn(self, input_word_ids, input_mask, segment_ids, labels):
|
||||
with tf.GradientTape() as tape:
|
||||
# Capture the gradients from forward prop...
|
||||
inputs = [input_word_ids, input_mask, segment_ids]
|
||||
probs = self.m(inputs, training=True)
|
||||
loss = self.loss(labels, probs)
|
||||
|
||||
# ...and use them to update the model's weights.
|
||||
variables = self.m.trainable_variables
|
||||
gradients = tape.gradient(loss, variables)
|
||||
self.optimizer.apply_gradients(zip(gradients, variables))
|
||||
return loss
|
||||
|
||||
@tf.function(input_signature=bert_input)
|
||||
def predict(self, input_word_ids, input_mask, segment_ids):
|
||||
inputs = [input_word_ids, input_mask, segment_ids]
|
||||
return self.m.predict(inputs)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# BertModule()
|
||||
# Compile the model using IREE
|
||||
compiler_module = tfc.compile_module(BertModule(),
|
||||
exported_names=["learn"],
|
||||
import_only=True)
|
||||
print(type(compiler_module))
|
||||
# Save module as MLIR file in a directory
|
||||
ARITFACTS_DIR = os.getcwd()
|
||||
mlir_path = os.path.join(ARITFACTS_DIR, "model.mlir")
|
||||
with open(mlir_path, "wt") as output_file:
|
||||
output_file.write(compiler_module.decode('utf-8'))
|
||||
print(f"Wrote MLIR to path '{mlir_path}'")
|
||||
@@ -1,120 +0,0 @@
|
||||
from iree import runtime as ireert
|
||||
from iree.tf.support import module_utils
|
||||
from iree.compiler import tf as tfc
|
||||
from iree.compiler import compile_str
|
||||
import sys
|
||||
from absl import app
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import tempfile
|
||||
import tensorflow as tf
|
||||
|
||||
from official.nlp.modeling import layers
|
||||
from official.nlp.modeling import networks
|
||||
from official.nlp.modeling.models import bert_classifier
|
||||
|
||||
vocab_size = 100
|
||||
NUM_CLASSES = 5
|
||||
SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
# Create a set of 2-dimensional inputs
|
||||
bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
dict_outputs = False
|
||||
test_network = networks.BertEncoder(vocab_size=vocab_size,
|
||||
num_layers=2,
|
||||
dict_outputs=dict_outputs)
|
||||
|
||||
# Create a BERT trainer with the created network.
|
||||
bert_trainer_model = bert_classifier.BertClassifier(
|
||||
test_network, num_classes=NUM_CLASSES)
|
||||
bert_trainer_model.summary()
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m = bert_trainer_model
|
||||
self.m.predict = lambda x: self.m.call(x, training=False)
|
||||
self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
|
||||
self.m.learn = lambda x, y: self.m.call(x, training=False)
|
||||
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
|
||||
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
|
||||
|
||||
@tf.function(input_signature=[
|
||||
bert_input, # inputs
|
||||
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32) # labels
|
||||
])
|
||||
def learn(self, inputs, labels):
|
||||
with tf.GradientTape() as tape:
|
||||
# Capture the gradients from forward prop...
|
||||
probs = self.m(inputs, training=True)
|
||||
loss = self.loss(labels, probs)
|
||||
|
||||
# ...and use them to update the model's weights.
|
||||
variables = self.m.trainable_variables
|
||||
gradients = tape.gradient(loss, variables)
|
||||
self.optimizer.apply_gradients(zip(gradients, variables))
|
||||
return loss
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# BertModule()
|
||||
# Compile the model using IREE
|
||||
compiler_module = tfc.compile_module(BertModule(),
|
||||
exported_names=["learn"],
|
||||
import_only=True)
|
||||
|
||||
# Compile the model using IREE
|
||||
backend = "dylib-llvm-aot"
|
||||
args = [
|
||||
"--iree-llvm-target-cpu-features=host",
|
||||
"--iree-mhlo-demote-i64-to-i32=false", "--iree-flow-demote-i64-to-i32"
|
||||
]
|
||||
backend_config = "dylib"
|
||||
#backend = "cuda"
|
||||
#backend_config = "cuda"
|
||||
#args = ["--iree-cuda-llvm-target-arch=sm_80", "--iree-hal-cuda-disable-loop-nounroll-wa", "--iree-enable-fusion-with-reduction-ops"]
|
||||
flatbuffer_blob = compile_str(compiler_module,
|
||||
target_backends=[backend],
|
||||
extra_args=args,
|
||||
input_type="mhlo")
|
||||
#flatbuffer_blob = compile_str(compiler_module, target_backends=["dylib-llvm-aot"])
|
||||
|
||||
# Save module as MLIR file in a directory
|
||||
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
|
||||
tracer = ireert.Tracer(os.getcwd())
|
||||
config = ireert.Config("dylib", tracer)
|
||||
ctx = ireert.SystemContext(config=config)
|
||||
ctx.add_vm_module(vm_module)
|
||||
BertCompiled = ctx.modules.module
|
||||
predict_sample_input = [
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
|
||||
]
|
||||
learn_sample_input = [
|
||||
predict_sample_input,
|
||||
np.random.randint(5, size=(BATCH_SIZE))
|
||||
]
|
||||
warmup = 5
|
||||
total_iter = 10
|
||||
num_iter = total_iter - warmup
|
||||
for i in range(10):
|
||||
if (i == warmup - 1):
|
||||
start = time.time()
|
||||
print(
|
||||
BertCompiled.learn(predict_sample_input,
|
||||
np.random.randint(5, size=(BATCH_SIZE))))
|
||||
end = time.time()
|
||||
total_time = end - start
|
||||
print("time: " + str(total_time))
|
||||
print("time/iter: " + str(total_time / num_iter))
|
||||
@@ -1,83 +0,0 @@
|
||||
import numpy as np
|
||||
import os
|
||||
import tempfile
|
||||
import tensorflow as tf
|
||||
import time
|
||||
|
||||
from official.nlp.modeling import layers
|
||||
from official.nlp.modeling import networks
|
||||
from official.nlp.modeling.models import bert_classifier
|
||||
|
||||
vocab_size = 100
|
||||
NUM_CLASSES = 5
|
||||
SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
# Create a set of 2-dimensional inputs
|
||||
bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
dict_outputs = False
|
||||
test_network = networks.BertEncoder(vocab_size=vocab_size,
|
||||
num_layers=2,
|
||||
dict_outputs=dict_outputs)
|
||||
|
||||
# Create a BERT trainer with the created network.
|
||||
bert_trainer_model = bert_classifier.BertClassifier(
|
||||
test_network, num_classes=NUM_CLASSES)
|
||||
bert_trainer_model.summary()
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m = bert_trainer_model
|
||||
self.m.predict = lambda x: self.m.call(x, training=False)
|
||||
self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
|
||||
self.m.learn = lambda x, y: self.m.call(x, training=False)
|
||||
self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
|
||||
self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
|
||||
|
||||
@tf.function(input_signature=[
|
||||
bert_input, # inputs
|
||||
tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32) # labels
|
||||
])
|
||||
def learn(self, inputs, labels):
|
||||
with tf.GradientTape() as tape:
|
||||
# Capture the gradients from forward prop...
|
||||
probs = self.m(inputs, training=True)
|
||||
loss = self.loss(labels, probs)
|
||||
|
||||
# ...and use them to update the model's weights.
|
||||
variables = self.m.trainable_variables
|
||||
gradients = tape.gradient(loss, variables)
|
||||
self.optimizer.apply_gradients(zip(gradients, variables))
|
||||
return loss
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# BertModule()
|
||||
predict_sample_input = [
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
|
||||
]
|
||||
bert_model = BertModule()
|
||||
warmup = 1
|
||||
total_iter = 10
|
||||
num_iter = total_iter - warmup
|
||||
for i in range(total_iter):
|
||||
print(
|
||||
bert_model.learn(predict_sample_input,
|
||||
np.random.randint(5, size=(BATCH_SIZE))))
|
||||
if (i == warmup - 1):
|
||||
start = time.time()
|
||||
|
||||
end = time.time()
|
||||
total_time = end - start
|
||||
print("time: " + str(total_time))
|
||||
print("time/iter: " + str(total_time / num_iter))
|
||||
@@ -1,52 +0,0 @@
|
||||
from iree import runtime as ireert
|
||||
from iree.compiler import tf as tfc
|
||||
import sys
|
||||
from absl import app
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import tempfile
|
||||
import tensorflow as tf
|
||||
|
||||
from transformers import BertModel, BertTokenizer, TFBertModel
|
||||
|
||||
SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
|
||||
# Create a set of 2-dimensional inputs
|
||||
bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
# Create a BERT trainer with the created network.
|
||||
self.m = TFBertModel.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased", from_pt=True)
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m.predict = lambda x, y, z: self.m.call(
|
||||
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
|
||||
|
||||
@tf.function(input_signature=bert_input)
|
||||
def predict(self, input_word_ids, input_mask, segment_ids):
|
||||
return self.m.predict(input_word_ids, input_mask, segment_ids)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# BertModule()
|
||||
# Compile the model using IREE
|
||||
compiler_module = tfc.compile_module(BertModule(),
|
||||
exported_names=["predict"],
|
||||
import_only=True)
|
||||
# Save module as MLIR file in a directory
|
||||
ARITFACTS_DIR = os.getcwd()
|
||||
mlir_path = os.path.join(ARITFACTS_DIR, "model.mlir")
|
||||
with open(mlir_path, "wt") as output_file:
|
||||
output_file.write(compiler_module.decode('utf-8'))
|
||||
print(f"Wrote MLIR to path '{mlir_path}'")
|
||||
@@ -1,87 +0,0 @@
|
||||
from iree import runtime as ireert
|
||||
from iree.compiler import tf as tfc
|
||||
from iree.compiler import compile_str
|
||||
import sys
|
||||
from absl import app
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import tempfile
|
||||
import tensorflow as tf
|
||||
|
||||
import time
|
||||
from transformers import BertModel, BertTokenizer, TFBertModel
|
||||
|
||||
MAX_SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
|
||||
# Create a set of 2-dimensional inputs
|
||||
bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
|
||||
class BertModule(tf.Module):
|
||||
|
||||
def __init__(self):
|
||||
super(BertModule, self).__init__()
|
||||
# Create a BERT trainer with the created network.
|
||||
self.m = TFBertModel.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased", from_pt=True)
|
||||
|
||||
# Invoke the trainer model on the inputs. This causes the layer to be built.
|
||||
self.m.predict = lambda x, y, z: self.m.call(
|
||||
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
|
||||
|
||||
@tf.function(input_signature=bert_input)
|
||||
def predict(self, input_ids, attention_mask, token_type_ids):
|
||||
return self.m.predict(input_ids, attention_mask, token_type_ids)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Prepping Data
|
||||
tokenizer = BertTokenizer.from_pretrained(
|
||||
"microsoft/MiniLM-L12-H384-uncased")
|
||||
text = "Replace me by any text you'd like."
|
||||
encoded_input = tokenizer(text,
|
||||
padding='max_length',
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH)
|
||||
for key in encoded_input:
|
||||
encoded_input[key] = tf.expand_dims(
|
||||
tf.convert_to_tensor(encoded_input[key]), 0)
|
||||
|
||||
# Compile the model using IREE
|
||||
compiler_module = tfc.compile_module(BertModule(),
|
||||
exported_names=["predict"],
|
||||
import_only=True)
|
||||
|
||||
# Compile the model using IREE
|
||||
backend = "dylib-llvm-aot"
|
||||
args = [
|
||||
"--iree-llvm-target-cpu-features=host",
|
||||
"--iree-mhlo-demote-i64-to-i32=false", "--iree-flow-demote-i64-to-i32"
|
||||
]
|
||||
backend_config = "dylib"
|
||||
#backend = "cuda"
|
||||
#backend_config = "cuda"
|
||||
#args = ["--iree-cuda-llvm-target-arch=sm_80", "--iree-hal-cuda-disable-loop-nounroll-wa", "--iree-enable-fusion-with-reduction-ops"]
|
||||
flatbuffer_blob = compile_str(compiler_module,
|
||||
target_backends=[backend],
|
||||
extra_args=args,
|
||||
input_type="mhlo")
|
||||
#flatbuffer_blob = compile_str(compiler_module, target_backends=["dylib-llvm-aot"])
|
||||
|
||||
# Save module as MLIR file in a directory
|
||||
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
|
||||
tracer = ireert.Tracer(os.getcwd())
|
||||
config = ireert.Config("dylib", tracer)
|
||||
ctx = ireert.SystemContext(config=config)
|
||||
ctx.add_vm_module(vm_module)
|
||||
BertCompiled = ctx.modules.module
|
||||
result = BertCompiled.predict(encoded_input["input_ids"],
|
||||
encoded_input["attention_mask"],
|
||||
encoded_input["token_type_ids"])
|
||||
print(result)
|
||||
@@ -1,18 +0,0 @@
|
||||
import tensorflow as tf
|
||||
from transformers import BertModel, BertTokenizer, TFBertModel
|
||||
|
||||
tf_model = TFBertModel.from_pretrained("microsoft/MiniLM-L12-H384-uncased",
|
||||
from_pt=True)
|
||||
tokenizer = BertTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
|
||||
|
||||
text = "Replace me by any text you'd like."
|
||||
encoded_input = tokenizer(text,
|
||||
padding='max_length',
|
||||
truncation=True,
|
||||
max_length=512)
|
||||
for key in encoded_input:
|
||||
encoded_input[key] = tf.expand_dims(
|
||||
tf.convert_to_tensor(encoded_input[key]), 0)
|
||||
output = tf_model(encoded_input)
|
||||
|
||||
print(output)
|
||||
@@ -1,99 +0,0 @@
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.iree_utils import check_device_drivers
|
||||
from tank.model_utils_tf import get_TFhf_model, compare_tensors_tf
|
||||
|
||||
import tensorflow as tf
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
MAX_SEQUENCE_LENGTH = 512
|
||||
BATCH_SIZE = 1
|
||||
|
||||
#Create a set of 2-dimensional inputs
|
||||
tf_bert_input = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
|
||||
]
|
||||
|
||||
|
||||
class MiniLMTFModuleTester:
|
||||
|
||||
def create_and_check_module(self, dynamic, device):
|
||||
model, input, act_out = get_TFhf_model(
|
||||
"microsoft/MiniLM-L12-H384-uncased")
|
||||
shark_module = SharkInference(model, (input,),
|
||||
device=device,
|
||||
dynamic=dynamic,
|
||||
jit_trace=True)
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
results = shark_module.forward((input))
|
||||
assert True == compare_tensors_tf(act_out, results)
|
||||
|
||||
|
||||
class MiniLMTFModuleTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.module_tester = MiniLMTFModuleTester()
|
||||
|
||||
@pytest.mark.skip(reason="TF testing temporarily unavailable.")
|
||||
def test_module_static_cpu(self):
|
||||
dynamic = False
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skip(reason="TF testing temporarily unavailable.")
|
||||
@pytest.mark.xfail(
|
||||
reason="Language models currently failing for dynamic case")
|
||||
def test_module_dynamic_cpu(self):
|
||||
dynamic = True
|
||||
device = "cpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skip(reason="TF testing temporarily unavailable.")
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"),
|
||||
reason="nvidia-smi not found")
|
||||
def test_module_static_gpu(self):
|
||||
dynamic = False
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skip(reason="TF testing temporarily unavailable.")
|
||||
@pytest.mark.xfail(
|
||||
reason="Language models currently failing for dynamic case")
|
||||
@pytest.mark.skipif(check_device_drivers("gpu"),
|
||||
reason="nvidia-smi not found")
|
||||
def test_module_dynamic_gpu(self):
|
||||
dynamic = True
|
||||
device = "gpu"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skip(reason="TF testing temporarily unavailable.")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason=
|
||||
"vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_static_vulkan(self):
|
||||
dynamic = False
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
@pytest.mark.skip(reason="TF testing temporarily unavailable.")
|
||||
@pytest.mark.xfail(
|
||||
reason="Language models currently failing for dynamic case")
|
||||
@pytest.mark.skipif(
|
||||
check_device_drivers("vulkan"),
|
||||
reason=
|
||||
"vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
|
||||
)
|
||||
def test_module_dynamic_vulkan(self):
|
||||
dynamic = True
|
||||
device = "vulkan"
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
@@ -1,70 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
|
||||
import tensorflow as tf
|
||||
from shark.shark_inference import SharkInference
|
||||
from shark.parser import shark_args
|
||||
import argparse
|
||||
import os
|
||||
|
||||
|
||||
seq_parser = argparse.ArgumentParser(description='Shark Sequence Classification.')
|
||||
seq_parser.add_argument(
|
||||
"--hf_model_name",
|
||||
type=str,
|
||||
default="bert-base-uncased",
|
||||
help="Hugging face model to run sequence classification.")
|
||||
|
||||
seq_args, unknown = seq_parser.parse_known_args()
|
||||
|
||||
|
||||
BATCH_SIZE = 1
|
||||
MAX_SEQUENCE_LENGTH = 16
|
||||
|
||||
# Create a set of input signature.
|
||||
inputs_signature = [
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
|
||||
]
|
||||
|
||||
# For supported models please see here:
|
||||
# https://huggingface.co/docs/transformers/model_doc/auto#transformers.TFAutoModelForSequenceClassification
|
||||
|
||||
def preprocess_input(text = "This is just used to compile the model"):
|
||||
tokenizer = AutoTokenizer.from_pretrained(seq_args.hf_model_name)
|
||||
inputs = tokenizer(text,
|
||||
padding="max_length",
|
||||
return_tensors="tf",
|
||||
truncation=True,
|
||||
max_length=MAX_SEQUENCE_LENGTH)
|
||||
return inputs
|
||||
|
||||
|
||||
class SeqClassification(tf.Module):
|
||||
|
||||
def __init__(self, model_name):
|
||||
super(SeqClassification, self).__init__()
|
||||
self.m = TFAutoModelForSequenceClassification.from_pretrained(
|
||||
model_name, output_attentions=False, num_labels=2)
|
||||
self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)[0]
|
||||
|
||||
@tf.function(input_signature=inputs_signature)
|
||||
def forward(self, input_ids, attention_mask):
|
||||
return tf.math.softmax(self.m.predict(input_ids, attention_mask),
|
||||
axis=-1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
inputs = preprocess_input()
|
||||
shark_module = SharkInference(
|
||||
SeqClassification(seq_args.hf_model_name),
|
||||
(inputs["input_ids"], inputs["attention_mask"]))
|
||||
shark_module.set_frontend("tensorflow")
|
||||
shark_module.compile()
|
||||
print(f"Model has been successfully compiled on {shark_args.device}")
|
||||
|
||||
while True:
|
||||
input_text = input("Enter the text to classify (press q or nothing to exit): ")
|
||||
if not input_text or input_text == "q":
|
||||
break
|
||||
inputs = preprocess_input(input_text)
|
||||
print(shark_module.forward((inputs["input_ids"], inputs["attention_mask"])))
|
||||
2
tank/tflite/.gitignore
vendored
2
tank/tflite/.gitignore
vendored
@@ -1,2 +0,0 @@
|
||||
tmp/
|
||||
.lit_test_times.txt
|
||||
@@ -1,15 +0,0 @@
|
||||
# Sample compile and execution of TFLite models
|
||||
|
||||
This directory contains test scripts to compile/run/compare various TFLite
|
||||
models from TFHub. It aims for simplicity and hackability.
|
||||
|
||||
Follow the instructions at the repository root to install a functioning
|
||||
python venv. Then you can just run individual python files.
|
||||
|
||||
Or, use something like the following to collect all artifacts and traces,
|
||||
which can be fed to other tools:
|
||||
|
||||
```
|
||||
export IREE_SAVE_TEMPS="/tmp/iree/models/{main}/{id}"
|
||||
for i in *.py; do export IREE_SAVE_CALLS=/tmp/iree/traces/$i; python $i; done
|
||||
```
|
||||
@@ -1,44 +0,0 @@
|
||||
# RUN: %PYTHON %s
|
||||
import numpy as np
|
||||
from shark.shark_importer import SharkImporter
|
||||
import pytest
|
||||
|
||||
model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
|
||||
|
||||
|
||||
# Inputs modified to be useful albert inputs.
|
||||
def generate_inputs(input_details):
|
||||
for input in input_details:
|
||||
print(str(input["shape"]), input["dtype"].__name__)
|
||||
|
||||
args = []
|
||||
args.append(
|
||||
np.random.randint(low=0,
|
||||
high=256,
|
||||
size=input_details[0]["shape"],
|
||||
dtype=input_details[0]["dtype"]))
|
||||
args.append(
|
||||
np.ones(shape=input_details[1]["shape"],
|
||||
dtype=input_details[1]["dtype"]))
|
||||
args.append(
|
||||
np.zeros(shape=input_details[2]["shape"],
|
||||
dtype=input_details[2]["dtype"]))
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
my_shark_importer = SharkImporter(model_path=model_path,
|
||||
model_type="tflite",
|
||||
model_source_hub="tfhub",
|
||||
device="cpu",
|
||||
dynamic=False,
|
||||
jit_trace=True)
|
||||
# Case1: Use default inputs
|
||||
my_shark_importer.compile()
|
||||
shark_results = my_shark_importer.forward()
|
||||
# Case2: Use manually set inputs
|
||||
input_details, output_details = my_shark_importer.get_model_details()
|
||||
inputs = generate_inputs(input_details) # device_inputs
|
||||
my_shark_importer.compile(inputs)
|
||||
shark_results = my_shark_importer.forward(inputs)
|
||||
# print(shark_results)
|
||||
@@ -1,22 +0,0 @@
|
||||
# RUN: %PYTHON %s
|
||||
# XFAIL: *
|
||||
|
||||
import absl.testing
|
||||
import test_util
|
||||
|
||||
model_path = "https://tfhub.dev/neso613/lite-model/ASR_TFLite/pre_trained_models/English/1?lite-format=tflite"
|
||||
|
||||
|
||||
# Failure is due to dynamic shapes:
|
||||
# - Some improvements to tfl.strided_slice lowering are next steps
|
||||
class AsrConformerTest(test_util.TFLiteModelTest):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(AsrConformerTest, self).__init__(model_path, *args, **kwargs)
|
||||
|
||||
def test_compile_tflite(self):
|
||||
self.compile_and_execute()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absl.testing.absltest.main()
|
||||
@@ -1,39 +0,0 @@
|
||||
# RUN: %PYTHON %s
|
||||
|
||||
import absl.testing
|
||||
import numpy
|
||||
import test_util
|
||||
import urllib.request
|
||||
|
||||
from PIL import Image
|
||||
|
||||
model_path = "https://tfhub.dev/google/lite-model/aiy/vision/classifier/birds_V1/3?lite-format=tflite"
|
||||
|
||||
|
||||
class BirdClassifierTest(test_util.TFLiteModelTest):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(BirdClassifierTest, self).__init__(model_path, *args, **kwargs)
|
||||
|
||||
def compare_results(self, iree_results, tflite_results, details):
|
||||
super(BirdClassifierTest, self).compare_results(iree_results,
|
||||
tflite_results, details)
|
||||
self.assertTrue(
|
||||
numpy.isclose(iree_results[0], tflite_results[0], atol=1e-3).all())
|
||||
|
||||
def generate_inputs(self, input_details):
|
||||
img_path = "https://github.com/google-coral/test_data/raw/master/bird.bmp"
|
||||
local_path = "/".join([self.workdir, "bird.bmp"])
|
||||
urllib.request.urlretrieve(img_path, local_path)
|
||||
|
||||
shape = input_details[0]["shape"]
|
||||
im = numpy.array(Image.open(local_path).resize((shape[1], shape[2])))
|
||||
args = [im.reshape(shape)]
|
||||
return args
|
||||
|
||||
def test_compile_tflite(self):
|
||||
self.compile_and_execute()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absl.testing.absltest.main()
|
||||
@@ -1,20 +0,0 @@
|
||||
# RUN: %PYTHON %s
|
||||
# REQUIRES: hugetest
|
||||
|
||||
import absl.testing
|
||||
import test_util
|
||||
|
||||
model_path = "https://tfhub.dev/sayakpaul/lite-model/cartoongan/dr/1?lite-format=tflite"
|
||||
|
||||
|
||||
class CartoonGanTest(test_util.TFLiteModelTest):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(CartoonGanTest, self).__init__(model_path, *args, **kwargs)
|
||||
|
||||
def test_compile_tflite(self):
|
||||
self.compile_and_execute()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absl.testing.absltest.main()
|
||||
@@ -1,16 +0,0 @@
|
||||
import numpy as np
|
||||
import urllib.request
|
||||
|
||||
from PIL import Image
|
||||
|
||||
|
||||
# Returns a sample image in the COCO 2017 dataset in uint8.
|
||||
def generate_input(workdir, input_details):
|
||||
# We use an image of a bear since this is an easy example.
|
||||
img_path = "https://storage.googleapis.com/iree-model-artifacts/coco_2017_000000000285.jpg"
|
||||
local_path = "/".join([workdir, "coco_2017_000000000285.jpg"])
|
||||
urllib.request.urlretrieve(img_path, local_path)
|
||||
|
||||
shape = input_details[0]["shape"]
|
||||
im = np.array(Image.open(local_path).resize((shape[1], shape[2])))
|
||||
return im.reshape(shape)
|
||||
@@ -1,26 +0,0 @@
|
||||
# RUN: %PYTHON %s
|
||||
# XFAIL: *
|
||||
|
||||
import absl.testing
|
||||
import test_util
|
||||
|
||||
model_path = "https://tfhub.dev/tulasiram58827/lite-model/craft-text-detector/dr/1?lite-format=tflite"
|
||||
|
||||
|
||||
# Failure: Resize lowering does not handle inferred dynamic shapes. Furthermore, the entire model
|
||||
# requires dynamic shape support.
|
||||
class CraftTextTest(test_util.TFLiteModelTest):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(CraftTextTest, self).__init__(model_path, *args, **kwargs)
|
||||
|
||||
def compare_results(self, iree_results, tflite_results, details):
|
||||
super(CraftTextTest, self).compare_results(iree_results, tflite_results,
|
||||
details)
|
||||
|
||||
def test_compile_tflite(self):
|
||||
self.compile_and_execute()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absl.testing.absltest.main()
|
||||
@@ -1,26 +0,0 @@
|
||||
# RUN: %PYTHON %s
|
||||
|
||||
import absl.testing
|
||||
import numpy
|
||||
import test_util
|
||||
|
||||
model_path = "https://tfhub.dev/tensorflow/lite-model/deeplabv3/1/metadata/2?lite-format=tflite"
|
||||
|
||||
|
||||
class DeepLabV3Test(test_util.TFLiteModelTest):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(DeepLabV3Test, self).__init__(model_path, *args, **kwargs)
|
||||
|
||||
def compare_results(self, iree_results, tflite_results, details):
|
||||
super(DeepLabV3Test, self).compare_results(iree_results, tflite_results,
|
||||
details)
|
||||
self.assertTrue(
|
||||
numpy.isclose(iree_results[0], tflite_results[0], atol=1e-3).all())
|
||||
|
||||
def test_compile_tflite(self):
|
||||
self.compile_and_execute()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absl.testing.absltest.main()
|
||||
@@ -1,26 +0,0 @@
|
||||
# RUN: %PYTHON %s
|
||||
|
||||
import absl.testing
|
||||
import numpy
|
||||
import test_util
|
||||
|
||||
model_path = "https://tfhub.dev/tensorflow/lite-model/densenet/1/metadata/1?lite-format=tflite"
|
||||
|
||||
|
||||
class DenseNetTest(test_util.TFLiteModelTest):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(DenseNetTest, self).__init__(model_path, *args, **kwargs)
|
||||
|
||||
def compare_results(self, iree_results, tflite_results, details):
|
||||
super(DenseNetTest, self).compare_results(iree_results, tflite_results,
|
||||
details)
|
||||
self.assertTrue(
|
||||
numpy.isclose(iree_results[0], tflite_results[0], atol=1e-5).all())
|
||||
|
||||
def test_compile_tflite(self):
|
||||
self.compile_and_execute()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absl.testing.absltest.main()
|
||||
@@ -1,35 +0,0 @@
|
||||
# RUN: %PYTHON %s
|
||||
|
||||
import absl.testing
|
||||
import numpy
|
||||
import test_util
|
||||
|
||||
model_path = "https://tfhub.dev/sayakpaul/lite-model/east-text-detector/dr/1?lite-format=tflite"
|
||||
|
||||
|
||||
class EastTextDetectorTest(test_util.TFLiteModelTest):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(EastTextDetectorTest, self).__init__(model_path, *args, **kwargs)
|
||||
|
||||
def compare_results(self, iree_results, tflite_results, details):
|
||||
super(EastTextDetectorTest,
|
||||
self).compare_results(iree_results, tflite_results, details)
|
||||
self.assertTrue(
|
||||
numpy.isclose(iree_results[0], tflite_results[0], atol=1e-3).all())
|
||||
|
||||
# The second return is extremely noisy as it is not a binary classification. To handle we
|
||||
# check normalized correlation with an expectation of "close enough".
|
||||
iree_norm = numpy.sqrt(iree_results[1] * iree_results[1])
|
||||
tflite_norm = numpy.sqrt(tflite_results[1] * tflite_results[1])
|
||||
|
||||
correlation = numpy.average(iree_results[1] * tflite_results[1] /
|
||||
iree_norm / tflite_norm)
|
||||
self.assertTrue(numpy.isclose(correlation, 1.0, atol=1e-2).all())
|
||||
|
||||
def test_compile_tflite(self):
|
||||
self.compile_and_execute()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absl.testing.absltest.main()
|
||||
@@ -1,39 +0,0 @@
|
||||
# RUN: %PYTHON %s
|
||||
|
||||
import absl.testing
|
||||
import imagenet_test_data
|
||||
import numpy
|
||||
import test_util
|
||||
|
||||
# Source https://tfhub.dev/tensorflow/lite-model/efficientnet/lite0/int8/2
|
||||
model_path = "https://storage.googleapis.com/iree-model-artifacts/efficientnet_lite0_int8_2.tflite"
|
||||
|
||||
|
||||
class EfficientnetLite0Int8Test(test_util.TFLiteModelTest):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(EfficientnetLite0Int8Test, self).__init__(model_path, *args,
|
||||
**kwargs)
|
||||
|
||||
def compare_results(self, iree_results, tflite_results, details):
|
||||
super(EfficientnetLite0Int8Test,
|
||||
self).compare_results(iree_results, tflite_results, details)
|
||||
# Dequantize outputs.
|
||||
zero_point = details[0]['quantization_parameters']['zero_points'][0]
|
||||
scale = details[0]['quantization_parameters']['scales'][0]
|
||||
dequantized_iree_results = (iree_results - zero_point) * scale
|
||||
dequantized_tflite_results = (tflite_results - zero_point) * scale
|
||||
self.assertTrue(
|
||||
numpy.isclose(dequantized_iree_results,
|
||||
dequantized_tflite_results,
|
||||
atol=5e-3).all())
|
||||
|
||||
def generate_inputs(self, input_details):
|
||||
return [imagenet_test_data.generate_input(self.workdir, input_details)]
|
||||
|
||||
def test_compile_tflite(self):
|
||||
self.compile_and_execute()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
absl.testing.absltest.main()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user