mirror of
https://github.com/nod-ai/SHARK-Studio.git
synced 2026-01-09 22:07:55 -05:00
Migration to AMDShark (#2182)
Signed-off-by: pdhirajkumarprasad <dhirajp@amd.com>
This commit is contained in:
committed by
GitHub
parent
dba2c8a567
commit
fe03539901
4
.github/workflows/gh-pages-releases.yml
vendored
4
.github/workflows/gh-pages-releases.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
# Don't run this in everyone's forks.
|
# Don't run this in everyone's forks.
|
||||||
if: github.repository == 'nod-ai/SHARK'
|
if: github.repository == 'nod-ai/AMDSHARK'
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checking out repository
|
- name: Checking out repository
|
||||||
@@ -18,7 +18,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
token: ${{ secrets.NODAI_INVOCATION_TOKEN }}
|
token: ${{ secrets.NODAI_INVOCATION_TOKEN }}
|
||||||
- name: Run scrape releases script
|
- name: Run scrape releases script
|
||||||
run: python ./build_tools/scrape_releases.py nod-ai SHARK > /tmp/index.html
|
run: python ./build_tools/scrape_releases.py nod-ai AMDSHARK > /tmp/index.html
|
||||||
shell: bash
|
shell: bash
|
||||||
- run: git fetch --all
|
- run: git fetch --all
|
||||||
- run: git switch github-pages
|
- run: git switch github-pages
|
||||||
|
|||||||
12
.github/workflows/nightly.yml
vendored
12
.github/workflows/nightly.yml
vendored
@@ -40,9 +40,9 @@ jobs:
|
|||||||
GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
|
||||||
with:
|
with:
|
||||||
tag: ${{ env.tag_name }}
|
tag: ${{ env.tag_name }}
|
||||||
name: nod.ai SHARK ${{ env.tag_name }}
|
name: nod.ai AMDSHARK ${{ env.tag_name }}
|
||||||
body: |
|
body: |
|
||||||
Automatic snapshot release of nod.ai SHARK.
|
Automatic snapshot release of nod.ai AMDSHARK.
|
||||||
draft: true
|
draft: true
|
||||||
prerelease: true
|
prerelease: true
|
||||||
|
|
||||||
@@ -51,12 +51,12 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
./setup_venv.ps1
|
./setup_venv.ps1
|
||||||
python process_skipfiles.py
|
python process_skipfiles.py
|
||||||
$env:SHARK_PACKAGE_VERSION=${{ env.package_version }}
|
$env:AMDSHARK_PACKAGE_VERSION=${{ env.package_version }}
|
||||||
pip install -e .
|
pip install -e .
|
||||||
pip freeze -l
|
pip freeze -l
|
||||||
pyinstaller .\apps\shark_studio\shark_studio.spec
|
pyinstaller .\apps\amdshark_studio\amdshark_studio.spec
|
||||||
mv ./dist/nodai_shark_studio.exe ./dist/nodai_shark_studio_${{ env.package_version_ }}.exe
|
mv ./dist/nodai_amdshark_studio.exe ./dist/nodai_amdshark_studio_${{ env.package_version_ }}.exe
|
||||||
signtool sign /f c:\g\shark_02152023.cer /fd certHash /csp "eToken Base Cryptographic Provider" /k "${{ secrets.CI_CERT }}" ./dist/nodai_shark_studio_${{ env.package_version_ }}.exe
|
signtool sign /f c:\g\amdshark_02152023.cer /fd certHash /csp "eToken Base Cryptographic Provider" /k "${{ secrets.CI_CERT }}" ./dist/nodai_amdshark_studio_${{ env.package_version_ }}.exe
|
||||||
|
|
||||||
- name: Upload Release Assets
|
- name: Upload Release Assets
|
||||||
id: upload-release-assets
|
id: upload-release-assets
|
||||||
|
|||||||
14
.github/workflows/test-studio.yml
vendored
14
.github/workflows/test-studio.yml
vendored
@@ -1,19 +1,19 @@
|
|||||||
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
||||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
|
||||||
|
|
||||||
name: Validate Shark Studio
|
name: Validate AMDShark Studio
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
paths-ignore:
|
paths-ignore:
|
||||||
- '**.md'
|
- '**.md'
|
||||||
- 'shark/examples/**'
|
- 'amdshark/examples/**'
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
paths-ignore:
|
paths-ignore:
|
||||||
- '**.md'
|
- '**.md'
|
||||||
- 'shark/examples/**'
|
- 'amdshark/examples/**'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
# Ensure that only a single job or workflow using the same
|
# Ensure that only a single job or workflow using the same
|
||||||
@@ -66,7 +66,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
# black format check
|
# black format check
|
||||||
black --version
|
black --version
|
||||||
black --check apps/shark_studio
|
black --check apps/amdshark_studio
|
||||||
# stop the build if there are Python syntax errors or undefined names
|
# stop the build if there are Python syntax errors or undefined names
|
||||||
flake8 . --statistics
|
flake8 . --statistics
|
||||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||||
@@ -77,9 +77,9 @@ jobs:
|
|||||||
if: matrix.suite == 'cpu'
|
if: matrix.suite == 'cpu'
|
||||||
run: |
|
run: |
|
||||||
cd $GITHUB_WORKSPACE
|
cd $GITHUB_WORKSPACE
|
||||||
python${{ matrix.python-version }} -m venv shark.venv
|
python${{ matrix.python-version }} -m venv amdshark.venv
|
||||||
source shark.venv/bin/activate
|
source amdshark.venv/bin/activate
|
||||||
pip install -r requirements.txt --no-cache-dir
|
pip install -r requirements.txt --no-cache-dir
|
||||||
pip install -e .
|
pip install -e .
|
||||||
# Disabled due to hang when exporting test llama2
|
# Disabled due to hang when exporting test llama2
|
||||||
# python apps/shark_studio/tests/api_test.py
|
# python apps/amdshark_studio/tests/api_test.py
|
||||||
|
|||||||
8
.gitignore
vendored
8
.gitignore
vendored
@@ -164,15 +164,15 @@ cython_debug/
|
|||||||
# vscode related
|
# vscode related
|
||||||
.vscode
|
.vscode
|
||||||
|
|
||||||
# Shark related artifacts
|
# AMDShark related artifacts
|
||||||
*venv/
|
*venv/
|
||||||
shark_tmp/
|
amdshark_tmp/
|
||||||
*.vmfb
|
*.vmfb
|
||||||
.use-iree
|
.use-iree
|
||||||
tank/dict_configs.py
|
tank/dict_configs.py
|
||||||
*.csv
|
*.csv
|
||||||
reproducers/
|
reproducers/
|
||||||
apps/shark_studio/web/configs
|
apps/amdshark_studio/web/configs
|
||||||
|
|
||||||
# ORT related artefacts
|
# ORT related artefacts
|
||||||
cache_models/
|
cache_models/
|
||||||
@@ -189,7 +189,7 @@ variants.json
|
|||||||
# models folder
|
# models folder
|
||||||
apps/stable_diffusion/web/models/
|
apps/stable_diffusion/web/models/
|
||||||
|
|
||||||
# model artifacts (SHARK)
|
# model artifacts (AMDSHARK)
|
||||||
*.tempfile
|
*.tempfile
|
||||||
*.mlir
|
*.mlir
|
||||||
*.vmfb
|
*.vmfb
|
||||||
|
|||||||
6
.gitmodules
vendored
6
.gitmodules
vendored
@@ -1,4 +1,4 @@
|
|||||||
[submodule "inference/thirdparty/shark-runtime"]
|
[submodule "inference/thirdparty/amdshark-runtime"]
|
||||||
path = inference/thirdparty/shark-runtime
|
path = inference/thirdparty/amdshark-runtime
|
||||||
url =https://github.com/nod-ai/SRT.git
|
url =https://github.com/nod-ai/SRT.git
|
||||||
branch = shark-06032022
|
branch = amdshark-06032022
|
||||||
|
|||||||
118
README.md
118
README.md
@@ -1,12 +1,12 @@
|
|||||||
# SHARK
|
# AMDSHARK
|
||||||
|
|
||||||
High Performance Machine Learning Distribution
|
High Performance Machine Learning Distribution
|
||||||
|
|
||||||
<h2>NOTE: This project is not currently maintained.</h2>
|
<h2>NOTE: This project is not currently maintained.</h2>
|
||||||
|
|
||||||
*The latest versions of this project are developments towards a refactor on top of IREE-Turbine. Until further notice, make sure you use an .exe release or a checkout of the `SHARK-1.0` branch, for a working SHARK-Studio*
|
*The latest versions of this project are developments towards a refactor on top of IREE-Turbine. Until further notice, make sure you use an .exe release or a checkout of the `AMDSHARK-1.0` branch, for a working AMDSHARK-Studio*
|
||||||
|
|
||||||
[](https://github.com/nod-ai/SHARK-Studio/actions/workflows/nightly.yml)
|
[](https://github.com/nod-ai/AMDSHARK-Studio/actions/workflows/nightly.yml)
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary>Prerequisites - Drivers </summary>
|
<summary>Prerequisites - Drivers </summary>
|
||||||
@@ -25,11 +25,11 @@ Other users please ensure you have your latest vendor drivers and Vulkan SDK fro
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Quick Start for SHARK Stable Diffusion for Windows 10/11 Users
|
### Quick Start for AMDSHARK Stable Diffusion for Windows 10/11 Users
|
||||||
|
|
||||||
Install the Driver from [Prerequisites](https://github.com/nod-ai/SHARK-Studio#install-your-hardware-drivers) above
|
Install the Driver from [Prerequisites](https://github.com/nod-ai/AMDSHARK-Studio#install-your-hardware-drivers) above
|
||||||
|
|
||||||
Download the [stable release](https://github.com/nod-ai/SHARK-Studio/releases/latest) or the most recent [SHARK 1.0 pre-release](https://github.com/nod-ai/SHARK-Studio/releases).
|
Download the [stable release](https://github.com/nod-ai/AMDSHARK-Studio/releases/latest) or the most recent [AMDSHARK 1.0 pre-release](https://github.com/nod-ai/AMDSHARK-Studio/releases).
|
||||||
|
|
||||||
Double click the .exe, or [run from the command line](#running) (recommended), and you should have the [UI](http://localhost:8080/) in the browser.
|
Double click the .exe, or [run from the command line](#running) (recommended), and you should have the [UI](http://localhost:8080/) in the browser.
|
||||||
|
|
||||||
@@ -67,16 +67,16 @@ Enjoy.
|
|||||||
## Check out the code
|
## Check out the code
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
git clone https://github.com/nod-ai/SHARK.git
|
git clone https://github.com/nod-ai/AMDSHARK.git
|
||||||
cd SHARK
|
cd AMDSHARK
|
||||||
```
|
```
|
||||||
|
|
||||||
## Switch to the Correct Branch (IMPORTANT!)
|
## Switch to the Correct Branch (IMPORTANT!)
|
||||||
|
|
||||||
Currently SHARK is being rebuilt for [Turbine](https://github.com/iree-org/iree-turbine) on the `main` branch. For now you are strongly discouraged from using `main` unless you are working on the rebuild effort, and should not expect the code there to produce a working application for Image Generation, So for now you'll need switch over to the `SHARK-1.0` branch and use the stable code.
|
Currently AMDSHARK is being rebuilt for [Turbine](https://github.com/iree-org/iree-turbine) on the `main` branch. For now you are strongly discouraged from using `main` unless you are working on the rebuild effort, and should not expect the code there to produce a working application for Image Generation, So for now you'll need switch over to the `AMDSHARK-1.0` branch and use the stable code.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
git checkout SHARK-1.0
|
git checkout AMDSHARK-1.0
|
||||||
```
|
```
|
||||||
|
|
||||||
The following setup instructions assume you are on this branch.
|
The following setup instructions assume you are on this branch.
|
||||||
@@ -92,7 +92,7 @@ The following setup instructions assume you are on this branch.
|
|||||||
set-executionpolicy remotesigned
|
set-executionpolicy remotesigned
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Setup venv and install necessary packages (torch-mlir, nodLabs/Shark, ...)
|
#### Setup venv and install necessary packages (torch-mlir, nodLabs/AMDShark, ...)
|
||||||
```powershell
|
```powershell
|
||||||
./setup_venv.ps1 #You can re-run this script to get the latest version
|
./setup_venv.ps1 #You can re-run this script to get the latest version
|
||||||
```
|
```
|
||||||
@@ -101,20 +101,20 @@ set-executionpolicy remotesigned
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
./setup_venv.sh
|
./setup_venv.sh
|
||||||
source shark1.venv/bin/activate
|
source amdshark1.venv/bin/activate
|
||||||
```
|
```
|
||||||
|
|
||||||
### Run Stable Diffusion on your device - WebUI
|
### Run Stable Diffusion on your device - WebUI
|
||||||
|
|
||||||
#### Windows 10/11 Users
|
#### Windows 10/11 Users
|
||||||
```powershell
|
```powershell
|
||||||
(shark1.venv) PS C:\g\shark> cd .\apps\stable_diffusion\web\
|
(amdshark1.venv) PS C:\g\amdshark> cd .\apps\stable_diffusion\web\
|
||||||
(shark1.venv) PS C:\g\shark\apps\stable_diffusion\web> python .\index.py
|
(amdshark1.venv) PS C:\g\amdshark\apps\stable_diffusion\web> python .\index.py
|
||||||
```
|
```
|
||||||
#### Linux / macOS Users
|
#### Linux / macOS Users
|
||||||
```shell
|
```shell
|
||||||
(shark1.venv) > cd apps/stable_diffusion/web
|
(amdshark1.venv) > cd apps/stable_diffusion/web
|
||||||
(shark1.venv) > python index.py
|
(amdshark1.venv) > python index.py
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Access Stable Diffusion on http://localhost:8080/?__theme=dark
|
#### Access Stable Diffusion on http://localhost:8080/?__theme=dark
|
||||||
@@ -128,7 +128,7 @@ source shark1.venv/bin/activate
|
|||||||
|
|
||||||
#### Windows 10/11 Users
|
#### Windows 10/11 Users
|
||||||
```powershell
|
```powershell
|
||||||
(shark1.venv) PS C:\g\shark> python .\apps\stable_diffusion\scripts\main.py --app="txt2img" --precision="fp16" --prompt="tajmahal, snow, sunflowers, oil on canvas" --device="vulkan"
|
(amdshark1.venv) PS C:\g\amdshark> python .\apps\stable_diffusion\scripts\main.py --app="txt2img" --precision="fp16" --prompt="tajmahal, snow, sunflowers, oil on canvas" --device="vulkan"
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Linux / macOS Users
|
#### Linux / macOS Users
|
||||||
@@ -156,7 +156,7 @@ Here are some samples generated:
|
|||||||

|

|
||||||
|
|
||||||
|
|
||||||
Find us on [SHARK Discord server](https://discord.gg/RUqY2h2s9u) if you have any trouble with running it on your hardware.
|
Find us on [AMDSHARK Discord server](https://discord.gg/RUqY2h2s9u) if you have any trouble with running it on your hardware.
|
||||||
|
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
@@ -168,8 +168,8 @@ This step sets up a new VirtualEnv for Python
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
python --version #Check you have 3.11 on Linux, macOS or Windows Powershell
|
python --version #Check you have 3.11 on Linux, macOS or Windows Powershell
|
||||||
python -m venv shark_venv
|
python -m venv amdshark_venv
|
||||||
source shark_venv/bin/activate # Use shark_venv/Scripts/activate on Windows
|
source amdshark_venv/bin/activate # Use amdshark_venv/Scripts/activate on Windows
|
||||||
|
|
||||||
# If you are using conda create and activate a new conda env
|
# If you are using conda create and activate a new conda env
|
||||||
|
|
||||||
@@ -179,15 +179,15 @@ python -m pip install --upgrade pip
|
|||||||
|
|
||||||
*macOS Metal* users please install https://sdk.lunarg.com/sdk/download/latest/mac/vulkan-sdk.dmg and enable "System wide install"
|
*macOS Metal* users please install https://sdk.lunarg.com/sdk/download/latest/mac/vulkan-sdk.dmg and enable "System wide install"
|
||||||
|
|
||||||
### Install SHARK
|
### Install AMDSHARK
|
||||||
|
|
||||||
This step pip installs SHARK and related packages on Linux Python 3.8, 3.10 and 3.11 and macOS / Windows Python 3.11
|
This step pip installs AMDSHARK and related packages on Linux Python 3.8, 3.10 and 3.11 and macOS / Windows Python 3.11
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
pip install nodai-shark -f https://nod-ai.github.io/SHARK/package-index/ -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SRT/pip-release-links.html --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
pip install nodai-amdshark -f https://nod-ai.github.io/AMDSHARK/package-index/ -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SRT/pip-release-links.html --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
||||||
```
|
```
|
||||||
|
|
||||||
### Run shark tank model tests.
|
### Run amdshark tank model tests.
|
||||||
```shell
|
```shell
|
||||||
pytest tank/test_models.py
|
pytest tank/test_models.py
|
||||||
```
|
```
|
||||||
@@ -196,7 +196,7 @@ See tank/README.md for a more detailed walkthrough of our pytest suite and CLI.
|
|||||||
### Download and run Resnet50 sample
|
### Download and run Resnet50 sample
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/resnet50_script.py
|
curl -O https://raw.githubusercontent.com/nod-ai/AMDSHARK/main/amdshark/examples/amdshark_inference/resnet50_script.py
|
||||||
#Install deps for test script
|
#Install deps for test script
|
||||||
pip install --pre torch torchvision torchaudio tqdm pillow gsutil --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
pip install --pre torch torchvision torchaudio tqdm pillow gsutil --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
||||||
python ./resnet50_script.py --device="cpu" #use cuda or vulkan or metal
|
python ./resnet50_script.py --device="cpu" #use cuda or vulkan or metal
|
||||||
@@ -204,7 +204,7 @@ python ./resnet50_script.py --device="cpu" #use cuda or vulkan or metal
|
|||||||
|
|
||||||
### Download and run BERT (MiniLM) sample
|
### Download and run BERT (MiniLM) sample
|
||||||
```shell
|
```shell
|
||||||
curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/minilm_jit.py
|
curl -O https://raw.githubusercontent.com/nod-ai/AMDSHARK/main/amdshark/examples/amdshark_inference/minilm_jit.py
|
||||||
#Install deps for test script
|
#Install deps for test script
|
||||||
pip install transformers torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
pip install transformers torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu
|
||||||
python ./minilm_jit.py --device="cpu" #use cuda or vulkan or metal
|
python ./minilm_jit.py --device="cpu" #use cuda or vulkan or metal
|
||||||
@@ -222,34 +222,34 @@ Set `USE_IREE=1` to use upstream IREE
|
|||||||
# PYTHON=python3.11 VENV_DIR=0617_venv IMPORTER=1 ./setup_venv.sh
|
# PYTHON=python3.11 VENV_DIR=0617_venv IMPORTER=1 ./setup_venv.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
### Run any of the hundreds of SHARK tank models via the test framework
|
### Run any of the hundreds of AMDSHARK tank models via the test framework
|
||||||
```shell
|
```shell
|
||||||
python -m shark.examples.shark_inference.resnet50_script --device="cpu" # Use gpu | vulkan
|
python -m amdshark.examples.amdshark_inference.resnet50_script --device="cpu" # Use gpu | vulkan
|
||||||
# Or a pytest
|
# Or a pytest
|
||||||
pytest tank/test_models.py -k "MiniLM"
|
pytest tank/test_models.py -k "MiniLM"
|
||||||
```
|
```
|
||||||
|
|
||||||
### How to use your locally built IREE / Torch-MLIR with SHARK
|
### How to use your locally built IREE / Torch-MLIR with AMDSHARK
|
||||||
If you are a *Torch-mlir developer or an IREE developer* and want to test local changes you can uninstall
|
If you are a *Torch-mlir developer or an IREE developer* and want to test local changes you can uninstall
|
||||||
the provided packages with `pip uninstall torch-mlir` and / or `pip uninstall iree-compiler iree-runtime` and build locally
|
the provided packages with `pip uninstall torch-mlir` and / or `pip uninstall iree-compiler iree-runtime` and build locally
|
||||||
with Python bindings and set your PYTHONPATH as mentioned [here](https://github.com/iree-org/iree/tree/main/docs/api_docs/python#install-iree-binaries)
|
with Python bindings and set your PYTHONPATH as mentioned [here](https://github.com/iree-org/iree/tree/main/docs/api_docs/python#install-iree-binaries)
|
||||||
for IREE and [here](https://github.com/llvm/torch-mlir/blob/main/development.md#setup-python-environment-to-export-the-built-python-packages)
|
for IREE and [here](https://github.com/llvm/torch-mlir/blob/main/development.md#setup-python-environment-to-export-the-built-python-packages)
|
||||||
for Torch-MLIR.
|
for Torch-MLIR.
|
||||||
|
|
||||||
How to use your locally built Torch-MLIR with SHARK:
|
How to use your locally built Torch-MLIR with AMDSHARK:
|
||||||
```shell
|
```shell
|
||||||
1.) Run `./setup_venv.sh in SHARK` and activate `shark.venv` virtual env.
|
1.) Run `./setup_venv.sh in AMDSHARK` and activate `amdshark.venv` virtual env.
|
||||||
2.) Run `pip uninstall torch-mlir`.
|
2.) Run `pip uninstall torch-mlir`.
|
||||||
3.) Go to your local Torch-MLIR directory.
|
3.) Go to your local Torch-MLIR directory.
|
||||||
4.) Activate mlir_venv virtual envirnoment.
|
4.) Activate mlir_venv virtual envirnoment.
|
||||||
5.) Run `pip uninstall -r requirements.txt`.
|
5.) Run `pip uninstall -r requirements.txt`.
|
||||||
6.) Run `pip install -r requirements.txt`.
|
6.) Run `pip install -r requirements.txt`.
|
||||||
7.) Build Torch-MLIR.
|
7.) Build Torch-MLIR.
|
||||||
8.) Activate shark.venv virtual environment from the Torch-MLIR directory.
|
8.) Activate amdshark.venv virtual environment from the Torch-MLIR directory.
|
||||||
8.) Run `export PYTHONPATH=`pwd`/build/tools/torch-mlir/python_packages/torch_mlir:`pwd`/examples` in the Torch-MLIR directory.
|
8.) Run `export PYTHONPATH=`pwd`/build/tools/torch-mlir/python_packages/torch_mlir:`pwd`/examples` in the Torch-MLIR directory.
|
||||||
9.) Go to the SHARK directory.
|
9.) Go to the AMDSHARK directory.
|
||||||
```
|
```
|
||||||
Now the SHARK will use your locally build Torch-MLIR repo.
|
Now the AMDSHARK will use your locally build Torch-MLIR repo.
|
||||||
|
|
||||||
|
|
||||||
## Benchmarking Dispatches
|
## Benchmarking Dispatches
|
||||||
@@ -263,10 +263,10 @@ pytest -k "MiniLM and torch and static and cuda" --benchmark_dispatches=All -s -
|
|||||||
```
|
```
|
||||||
The given command will populate `<dispatch_benchmarks_dir>/<model_name>/` with an `ordered_dispatches.txt` that lists and orders the dispatches and their latencies, as well as folders for each dispatch that contain .mlir, .vmfb, and results of the benchmark for that dispatch.
|
The given command will populate `<dispatch_benchmarks_dir>/<model_name>/` with an `ordered_dispatches.txt` that lists and orders the dispatches and their latencies, as well as folders for each dispatch that contain .mlir, .vmfb, and results of the benchmark for that dispatch.
|
||||||
|
|
||||||
if you want to instead incorporate this into a python script, you can pass the `dispatch_benchmarks` and `dispatch_benchmarks_dir` commands when initializing `SharkInference`, and the benchmarks will be generated when compiled. E.G:
|
if you want to instead incorporate this into a python script, you can pass the `dispatch_benchmarks` and `dispatch_benchmarks_dir` commands when initializing `AMDSharkInference`, and the benchmarks will be generated when compiled. E.G:
|
||||||
|
|
||||||
```
|
```
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_model,
|
mlir_model,
|
||||||
device=args.device,
|
device=args.device,
|
||||||
mlir_dialect="tm_tensor",
|
mlir_dialect="tm_tensor",
|
||||||
@@ -285,34 +285,34 @@ Output will include:
|
|||||||
- A .txt file containing benchmark output
|
- A .txt file containing benchmark output
|
||||||
|
|
||||||
|
|
||||||
See tank/README.md for further instructions on how to run model tests and benchmarks from the SHARK tank.
|
See tank/README.md for further instructions on how to run model tests and benchmarks from the AMDSHARK tank.
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary>API Reference</summary>
|
<summary>API Reference</summary>
|
||||||
|
|
||||||
### Shark Inference API
|
### AMDShark Inference API
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
from shark.shark_importer import SharkImporter
|
from amdshark.amdshark_importer import AMDSharkImporter
|
||||||
|
|
||||||
# SharkImporter imports mlir file from the torch, tensorflow or tf-lite module.
|
# AMDSharkImporter imports mlir file from the torch, tensorflow or tf-lite module.
|
||||||
|
|
||||||
mlir_importer = SharkImporter(
|
mlir_importer = AMDSharkImporter(
|
||||||
torch_module,
|
torch_module,
|
||||||
(input),
|
(input),
|
||||||
frontend="torch", #tf, #tf-lite
|
frontend="torch", #tf, #tf-lite
|
||||||
)
|
)
|
||||||
torch_mlir, func_name = mlir_importer.import_mlir(tracing_required=True)
|
torch_mlir, func_name = mlir_importer.import_mlir(tracing_required=True)
|
||||||
|
|
||||||
# SharkInference accepts mlir in linalg, mhlo, and tosa dialect.
|
# AMDSharkInference accepts mlir in linalg, mhlo, and tosa dialect.
|
||||||
|
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
shark_module = SharkInference(torch_mlir, device="cpu", mlir_dialect="linalg")
|
amdshark_module = AMDSharkInference(torch_mlir, device="cpu", mlir_dialect="linalg")
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
result = shark_module.forward((input))
|
result = amdshark_module.forward((input))
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -320,7 +320,7 @@ result = shark_module.forward((input))
|
|||||||
### Example demonstrating running MHLO IR.
|
### Example demonstrating running MHLO IR.
|
||||||
|
|
||||||
```
|
```
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
mhlo_ir = r"""builtin.module {
|
mhlo_ir = r"""builtin.module {
|
||||||
@@ -333,22 +333,22 @@ mhlo_ir = r"""builtin.module {
|
|||||||
|
|
||||||
arg0 = np.ones((1, 4)).astype(np.float32)
|
arg0 = np.ones((1, 4)).astype(np.float32)
|
||||||
arg1 = np.ones((4, 1)).astype(np.float32)
|
arg1 = np.ones((4, 1)).astype(np.float32)
|
||||||
shark_module = SharkInference(mhlo_ir, device="cpu", mlir_dialect="mhlo")
|
amdshark_module = AMDSharkInference(mhlo_ir, device="cpu", mlir_dialect="mhlo")
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
result = shark_module.forward((arg0, arg1))
|
result = amdshark_module.forward((arg0, arg1))
|
||||||
```
|
```
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
## Examples Using the REST API
|
## Examples Using the REST API
|
||||||
|
|
||||||
* [Setting up SHARK for use with Blender](./docs/shark_sd_blender.md)
|
* [Setting up AMDSHARK for use with Blender](./docs/amdshark_sd_blender.md)
|
||||||
* [Setting up SHARK for use with Koboldcpp](./docs/shark_sd_koboldcpp.md)
|
* [Setting up AMDSHARK for use with Koboldcpp](./docs/amdshark_sd_koboldcpp.md)
|
||||||
|
|
||||||
## Supported and Validated Models
|
## Supported and Validated Models
|
||||||
|
|
||||||
SHARK is maintained to support the latest innovations in ML Models:
|
AMDSHARK is maintained to support the latest innovations in ML Models:
|
||||||
|
|
||||||
| TF HuggingFace Models | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|
| TF HuggingFace Models | AMDSHARK-CPU | AMDSHARK-CUDA | AMDSHARK-METAL |
|
||||||
|---------------------|----------|----------|-------------|
|
|---------------------|----------|----------|-------------|
|
||||||
| BERT | :green_heart: | :green_heart: | :green_heart: |
|
| BERT | :green_heart: | :green_heart: | :green_heart: |
|
||||||
| DistilBERT | :green_heart: | :green_heart: | :green_heart: |
|
| DistilBERT | :green_heart: | :green_heart: | :green_heart: |
|
||||||
@@ -358,12 +358,12 @@ SHARK is maintained to support the latest innovations in ML Models:
|
|||||||
| Vision Transformer | :green_heart: | :green_heart: | :green_heart: |
|
| Vision Transformer | :green_heart: | :green_heart: | :green_heart: |
|
||||||
| ResNet50 | :green_heart: | :green_heart: | :green_heart: |
|
| ResNet50 | :green_heart: | :green_heart: | :green_heart: |
|
||||||
|
|
||||||
For a complete list of the models supported in SHARK, please refer to [tank/README.md](https://github.com/nod-ai/SHARK-Studio/blob/main/tank/README.md).
|
For a complete list of the models supported in AMDSHARK, please refer to [tank/README.md](https://github.com/nod-ai/AMDSHARK-Studio/blob/main/tank/README.md).
|
||||||
|
|
||||||
## Communication Channels
|
## Communication Channels
|
||||||
|
|
||||||
* [SHARK Discord server](https://discord.gg/RUqY2h2s9u): Real time discussions with the SHARK team and other users
|
* [AMDSHARK Discord server](https://discord.gg/RUqY2h2s9u): Real time discussions with the AMDSHARK team and other users
|
||||||
* [GitHub issues](https://github.com/nod-ai/SHARK-Studio/issues): Feature requests, bugs etc
|
* [GitHub issues](https://github.com/nod-ai/AMDSHARK-Studio/issues): Feature requests, bugs etc
|
||||||
|
|
||||||
## Related Projects
|
## Related Projects
|
||||||
|
|
||||||
@@ -385,10 +385,10 @@ For a complete list of the models supported in SHARK, please refer to [tank/READ
|
|||||||
* Torch-MLIR Github issues [here](https://github.com/llvm/torch-mlir/issues)
|
* Torch-MLIR Github issues [here](https://github.com/llvm/torch-mlir/issues)
|
||||||
* [`torch-mlir` section](https://llvm.discourse.group/c/projects-that-want-to-become-official-llvm-projects/torch-mlir/41) of LLVM Discourse
|
* [`torch-mlir` section](https://llvm.discourse.group/c/projects-that-want-to-become-official-llvm-projects/torch-mlir/41) of LLVM Discourse
|
||||||
* Weekly meetings on Mondays 9AM PST. See [here](https://discourse.llvm.org/t/community-meeting-developer-hour-refactoring-recurring-meetings/62575) for more information.
|
* Weekly meetings on Mondays 9AM PST. See [here](https://discourse.llvm.org/t/community-meeting-developer-hour-refactoring-recurring-meetings/62575) for more information.
|
||||||
* [MLIR topic within LLVM Discourse](https://llvm.discourse.group/c/llvm-project/mlir/31) SHARK and IREE is enabled by and heavily relies on [MLIR](https://mlir.llvm.org).
|
* [MLIR topic within LLVM Discourse](https://llvm.discourse.group/c/llvm-project/mlir/31) AMDSHARK and IREE is enabled by and heavily relies on [MLIR](https://mlir.llvm.org).
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
nod.ai SHARK is licensed under the terms of the Apache 2.0 License with LLVM Exceptions.
|
nod.ai AMDSHARK is licensed under the terms of the Apache 2.0 License with LLVM Exceptions.
|
||||||
See [LICENSE](LICENSE) for more information.
|
See [LICENSE](LICENSE) for more information.
|
||||||
|
|||||||
28
amdshark/__init__.py
Normal file
28
amdshark/__init__.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
import importlib
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from torch._dynamo import register_backend
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@register_backend
|
||||||
|
def amdshark(model, inputs, *, options):
|
||||||
|
try:
|
||||||
|
from amdshark.dynamo_backend.utils import AMDSharkBackend
|
||||||
|
except ImportError:
|
||||||
|
log.exception(
|
||||||
|
"Unable to import AMDSHARK - High Performance Machine Learning Distribution"
|
||||||
|
"Please install the right version of AMDSHARK that matches the PyTorch version being used. "
|
||||||
|
"Refer to https://github.com/nod-ai/AMDSHARK-Studio/ for details."
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
return AMDSharkBackend(model, inputs, options)
|
||||||
|
|
||||||
|
|
||||||
|
def has_amdshark():
|
||||||
|
try:
|
||||||
|
importlib.import_module("amdshark")
|
||||||
|
return True
|
||||||
|
except ImportError:
|
||||||
|
return False
|
||||||
@@ -12,17 +12,17 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from shark.shark_runner import SharkRunner
|
from amdshark.amdshark_runner import AMDSharkRunner
|
||||||
from shark.iree_utils.compile_utils import (
|
from amdshark.iree_utils.compile_utils import (
|
||||||
export_iree_module_to_vmfb,
|
export_iree_module_to_vmfb,
|
||||||
load_flatbuffer,
|
load_flatbuffer,
|
||||||
get_iree_runtime_config,
|
get_iree_runtime_config,
|
||||||
)
|
)
|
||||||
from shark.iree_utils.benchmark_utils import (
|
from amdshark.iree_utils.benchmark_utils import (
|
||||||
build_benchmark_args,
|
build_benchmark_args,
|
||||||
run_benchmark_module,
|
run_benchmark_module,
|
||||||
)
|
)
|
||||||
from shark.parser import shark_args
|
from amdshark.parser import amdshark_args
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import time
|
import time
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
@@ -67,8 +67,8 @@ def check_requirements(frontend):
|
|||||||
return has_pkgs
|
return has_pkgs
|
||||||
|
|
||||||
|
|
||||||
class SharkBenchmarkRunner(SharkRunner):
|
class AMDSharkBenchmarkRunner(AMDSharkRunner):
|
||||||
# SharkRunner derived class with Benchmarking capabilities.
|
# AMDSharkRunner derived class with Benchmarking capabilities.
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
mlir_module: bytes,
|
mlir_module: bytes,
|
||||||
@@ -76,8 +76,8 @@ class SharkBenchmarkRunner(SharkRunner):
|
|||||||
mlir_dialect: str = "linalg",
|
mlir_dialect: str = "linalg",
|
||||||
extra_args: list = [],
|
extra_args: list = [],
|
||||||
):
|
):
|
||||||
self.device = shark_args.device if device == "none" else device
|
self.device = amdshark_args.device if device == "none" else device
|
||||||
self.enable_tf32 = shark_args.enable_tf32
|
self.enable_tf32 = amdshark_args.enable_tf32
|
||||||
self.frontend_model = None
|
self.frontend_model = None
|
||||||
self.vmfb_file = None
|
self.vmfb_file = None
|
||||||
self.mlir_dialect = mlir_dialect
|
self.mlir_dialect = mlir_dialect
|
||||||
@@ -86,12 +86,12 @@ class SharkBenchmarkRunner(SharkRunner):
|
|||||||
self.temp_file_to_unlink = None
|
self.temp_file_to_unlink = None
|
||||||
if not os.path.isfile(mlir_module):
|
if not os.path.isfile(mlir_module):
|
||||||
print(
|
print(
|
||||||
"Warning: Initializing SharkRunner with a mlir string/bytecode object will duplicate the model in RAM at compile time. To avoid this, initialize SharkInference with a path to a MLIR module on your hard disk instead."
|
"Warning: Initializing AMDSharkRunner with a mlir string/bytecode object will duplicate the model in RAM at compile time. To avoid this, initialize AMDSharkInference with a path to a MLIR module on your hard disk instead."
|
||||||
)
|
)
|
||||||
self.compile_str = True
|
self.compile_str = True
|
||||||
else:
|
else:
|
||||||
self.compile_str = False
|
self.compile_str = False
|
||||||
SharkRunner.__init__(
|
AMDSharkRunner.__init__(
|
||||||
self,
|
self,
|
||||||
mlir_module,
|
mlir_module,
|
||||||
device,
|
device,
|
||||||
@@ -157,13 +157,13 @@ class SharkBenchmarkRunner(SharkRunner):
|
|||||||
frontend_model.cpu()
|
frontend_model.cpu()
|
||||||
input.cpu()
|
input.cpu()
|
||||||
|
|
||||||
for i in range(shark_args.num_warmup_iterations):
|
for i in range(amdshark_args.num_warmup_iterations):
|
||||||
frontend_model.forward(input)
|
frontend_model.forward(input)
|
||||||
|
|
||||||
if device == "cuda":
|
if device == "cuda":
|
||||||
torch.cuda.reset_peak_memory_stats()
|
torch.cuda.reset_peak_memory_stats()
|
||||||
begin = time.time()
|
begin = time.time()
|
||||||
for i in range(shark_args.num_iterations):
|
for i in range(amdshark_args.num_iterations):
|
||||||
out = frontend_model.forward(input)
|
out = frontend_model.forward(input)
|
||||||
end = time.time()
|
end = time.time()
|
||||||
if device == "cuda":
|
if device == "cuda":
|
||||||
@@ -176,14 +176,14 @@ class SharkBenchmarkRunner(SharkRunner):
|
|||||||
device_peak_b = None
|
device_peak_b = None
|
||||||
|
|
||||||
print(
|
print(
|
||||||
f"Torch benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
f"Torch benchmark:{amdshark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{amdshark_args.num_iterations}"
|
||||||
)
|
)
|
||||||
if device == "cuda":
|
if device == "cuda":
|
||||||
# Set device to CPU so we don't run into segfaults exiting pytest subprocesses.
|
# Set device to CPU so we don't run into segfaults exiting pytest subprocesses.
|
||||||
torch_device = torch.device("cpu")
|
torch_device = torch.device("cpu")
|
||||||
return [
|
return [
|
||||||
f"{shark_args.num_iterations/(end-begin)}",
|
f"{amdshark_args.num_iterations/(end-begin)}",
|
||||||
f"{((end-begin)/shark_args.num_iterations)*1000}",
|
f"{((end-begin)/amdshark_args.num_iterations)*1000}",
|
||||||
"", # host_peak_b (CPU usage) is not reported by PyTorch.
|
"", # host_peak_b (CPU usage) is not reported by PyTorch.
|
||||||
_bytes_to_mb_str(device_peak_b),
|
_bytes_to_mb_str(device_peak_b),
|
||||||
]
|
]
|
||||||
@@ -217,13 +217,13 @@ class SharkBenchmarkRunner(SharkRunner):
|
|||||||
)[:2]
|
)[:2]
|
||||||
frontend_model = model
|
frontend_model = model
|
||||||
|
|
||||||
for i in range(shark_args.num_warmup_iterations):
|
for i in range(amdshark_args.num_warmup_iterations):
|
||||||
frontend_model.forward(*input)
|
frontend_model.forward(*input)
|
||||||
|
|
||||||
if tf_device == TF_GPU_DEVICE:
|
if tf_device == TF_GPU_DEVICE:
|
||||||
tf.config.experimental.reset_memory_stats(tf_device)
|
tf.config.experimental.reset_memory_stats(tf_device)
|
||||||
begin = time.time()
|
begin = time.time()
|
||||||
for i in range(shark_args.num_iterations):
|
for i in range(amdshark_args.num_iterations):
|
||||||
out = frontend_model.forward(*input)
|
out = frontend_model.forward(*input)
|
||||||
end = time.time()
|
end = time.time()
|
||||||
if tf_device == TF_GPU_DEVICE:
|
if tf_device == TF_GPU_DEVICE:
|
||||||
@@ -235,11 +235,11 @@ class SharkBenchmarkRunner(SharkRunner):
|
|||||||
device_peak_b = None
|
device_peak_b = None
|
||||||
|
|
||||||
print(
|
print(
|
||||||
f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
f"TF benchmark:{amdshark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{amdshark_args.num_iterations}"
|
||||||
)
|
)
|
||||||
return [
|
return [
|
||||||
f"{shark_args.num_iterations/(end-begin)}",
|
f"{amdshark_args.num_iterations/(end-begin)}",
|
||||||
f"{((end-begin)/shark_args.num_iterations)*1000}",
|
f"{((end-begin)/amdshark_args.num_iterations)*1000}",
|
||||||
"", # host_peak_b (CPU usage) is not reported by TensorFlow.
|
"", # host_peak_b (CPU usage) is not reported by TensorFlow.
|
||||||
_bytes_to_mb_str(device_peak_b),
|
_bytes_to_mb_str(device_peak_b),
|
||||||
]
|
]
|
||||||
@@ -248,7 +248,7 @@ class SharkBenchmarkRunner(SharkRunner):
|
|||||||
iter_per_second, host_peak_b, device_peak_b = run_benchmark_module(
|
iter_per_second, host_peak_b, device_peak_b = run_benchmark_module(
|
||||||
self.benchmark_cl
|
self.benchmark_cl
|
||||||
)
|
)
|
||||||
print(f"Shark-IREE-C benchmark:{iter_per_second} iter/second")
|
print(f"AMDShark-IREE-C benchmark:{iter_per_second} iter/second")
|
||||||
return [
|
return [
|
||||||
f"{iter_per_second}",
|
f"{iter_per_second}",
|
||||||
f"{1000/iter_per_second}",
|
f"{1000/iter_per_second}",
|
||||||
@@ -258,25 +258,25 @@ class SharkBenchmarkRunner(SharkRunner):
|
|||||||
|
|
||||||
def benchmark_python(self, inputs):
|
def benchmark_python(self, inputs):
|
||||||
input_list = [x for x in inputs]
|
input_list = [x for x in inputs]
|
||||||
for i in range(shark_args.num_warmup_iterations):
|
for i in range(amdshark_args.num_warmup_iterations):
|
||||||
self.run("forward", input_list)
|
self.run("forward", input_list)
|
||||||
|
|
||||||
begin = time.time()
|
begin = time.time()
|
||||||
for i in range(shark_args.num_iterations):
|
for i in range(amdshark_args.num_iterations):
|
||||||
out = self.run("forward", input_list)
|
out = self.run("forward", input_list)
|
||||||
end = time.time()
|
end = time.time()
|
||||||
print(
|
print(
|
||||||
f"Shark-IREE Python benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
f"AMDShark-IREE Python benchmark:{amdshark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{amdshark_args.num_iterations}"
|
||||||
)
|
)
|
||||||
return [
|
return [
|
||||||
f"{shark_args.num_iterations/(end-begin)}",
|
f"{amdshark_args.num_iterations/(end-begin)}",
|
||||||
f"{((end-begin)/shark_args.num_iterations)*1000}",
|
f"{((end-begin)/amdshark_args.num_iterations)*1000}",
|
||||||
]
|
]
|
||||||
|
|
||||||
def benchmark_onnx(self, modelname, inputs):
|
def benchmark_onnx(self, modelname, inputs):
|
||||||
if self.device == "cuda":
|
if self.device == "cuda":
|
||||||
print(
|
print(
|
||||||
"Currently GPU benchmarking on ONNX is not supported in SHARK."
|
"Currently GPU benchmarking on ONNX is not supported in AMDSHARK."
|
||||||
)
|
)
|
||||||
return ["N/A", "N/A"]
|
return ["N/A", "N/A"]
|
||||||
else:
|
else:
|
||||||
@@ -325,7 +325,7 @@ for currently supported models. Exiting benchmark ONNX."
|
|||||||
num_threads,
|
num_threads,
|
||||||
batch_sizes,
|
batch_sizes,
|
||||||
sequence_lengths,
|
sequence_lengths,
|
||||||
shark_args.num_iterations,
|
amdshark_args.num_iterations,
|
||||||
input_counts,
|
input_counts,
|
||||||
optimize_onnx,
|
optimize_onnx,
|
||||||
validate_onnx,
|
validate_onnx,
|
||||||
@@ -340,7 +340,7 @@ for currently supported models. Exiting benchmark ONNX."
|
|||||||
onnx_args,
|
onnx_args,
|
||||||
)
|
)
|
||||||
print(
|
print(
|
||||||
f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
|
f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{amdshark_args.num_iterations}"
|
||||||
)
|
)
|
||||||
return [
|
return [
|
||||||
result[0]["QPS"],
|
result[0]["QPS"],
|
||||||
@@ -408,13 +408,13 @@ for currently supported models. Exiting benchmark ONNX."
|
|||||||
]
|
]
|
||||||
# "frontend" must be the first element.
|
# "frontend" must be the first element.
|
||||||
if self.mode == "native":
|
if self.mode == "native":
|
||||||
engines = ["shark_python", "shark_iree_c"]
|
engines = ["amdshark_python", "amdshark_iree_c"]
|
||||||
if self.mode == "baseline":
|
if self.mode == "baseline":
|
||||||
engines = ["frontend"]
|
engines = ["frontend"]
|
||||||
if self.mode == "all":
|
if self.mode == "all":
|
||||||
engines = ["frontend", "shark_python", "shark_iree_c"]
|
engines = ["frontend", "amdshark_python", "amdshark_iree_c"]
|
||||||
|
|
||||||
if shark_args.onnx_bench == True:
|
if amdshark_args.onnx_bench == True:
|
||||||
engines.append("onnxruntime")
|
engines.append("onnxruntime")
|
||||||
|
|
||||||
if not os.path.exists("bench_results.csv"):
|
if not os.path.exists("bench_results.csv"):
|
||||||
@@ -428,7 +428,7 @@ for currently supported models. Exiting benchmark ONNX."
|
|||||||
bench_info["model"] = modelname
|
bench_info["model"] = modelname
|
||||||
bench_info["batch_size"] = str(import_args["batch_size"])
|
bench_info["batch_size"] = str(import_args["batch_size"])
|
||||||
bench_info["dialect"] = self.mlir_dialect
|
bench_info["dialect"] = self.mlir_dialect
|
||||||
bench_info["iterations"] = shark_args.num_iterations
|
bench_info["iterations"] = amdshark_args.num_iterations
|
||||||
if dynamic == True:
|
if dynamic == True:
|
||||||
bench_info["shape_type"] = "dynamic"
|
bench_info["shape_type"] = "dynamic"
|
||||||
else:
|
else:
|
||||||
@@ -462,8 +462,8 @@ for currently supported models. Exiting benchmark ONNX."
|
|||||||
self.frontend_result = None
|
self.frontend_result = None
|
||||||
continue
|
continue
|
||||||
|
|
||||||
elif e == "shark_python":
|
elif e == "amdshark_python":
|
||||||
engine_result["engine"] = "shark_python"
|
engine_result["engine"] = "amdshark_python"
|
||||||
(
|
(
|
||||||
engine_result["iter/sec"],
|
engine_result["iter/sec"],
|
||||||
engine_result["ms/iter"],
|
engine_result["ms/iter"],
|
||||||
@@ -475,8 +475,8 @@ for currently supported models. Exiting benchmark ONNX."
|
|||||||
self.frontend_result, engine_result["ms/iter"]
|
self.frontend_result, engine_result["ms/iter"]
|
||||||
)
|
)
|
||||||
|
|
||||||
elif e == "shark_iree_c":
|
elif e == "amdshark_iree_c":
|
||||||
engine_result["engine"] = "shark_iree_c"
|
engine_result["engine"] = "amdshark_iree_c"
|
||||||
(
|
(
|
||||||
engine_result["iter/sec"],
|
engine_result["iter/sec"],
|
||||||
engine_result["ms/iter"],
|
engine_result["ms/iter"],
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
from shark.shark_importer import import_with_fx, save_mlir
|
from amdshark.amdshark_importer import import_with_fx, save_mlir
|
||||||
import torch
|
import torch
|
||||||
import torch_mlir
|
import torch_mlir
|
||||||
from torch_mlir.compiler_utils import run_pipeline_with_repro_report
|
from torch_mlir.compiler_utils import run_pipeline_with_repro_report
|
||||||
@@ -40,37 +40,37 @@ brevitas_matmul_rhs_group_quant_library = [
|
|||||||
|
|
||||||
def load_vmfb(extended_model_name, device, mlir_dialect, extra_args=[]):
|
def load_vmfb(extended_model_name, device, mlir_dialect, extra_args=[]):
|
||||||
vmfb_path = os.path.join(os.getcwd(), extended_model_name + ".vmfb")
|
vmfb_path = os.path.join(os.getcwd(), extended_model_name + ".vmfb")
|
||||||
shark_module = None
|
amdshark_module = None
|
||||||
if os.path.isfile(vmfb_path):
|
if os.path.isfile(vmfb_path):
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
None,
|
None,
|
||||||
device=device,
|
device=device,
|
||||||
mlir_dialect=mlir_dialect,
|
mlir_dialect=mlir_dialect,
|
||||||
)
|
)
|
||||||
print(f"loading existing vmfb from: {vmfb_path}")
|
print(f"loading existing vmfb from: {vmfb_path}")
|
||||||
shark_module.load_module(vmfb_path, extra_args=extra_args)
|
amdshark_module.load_module(vmfb_path, extra_args=extra_args)
|
||||||
return shark_module
|
return amdshark_module
|
||||||
|
|
||||||
|
|
||||||
def compile_module(
|
def compile_module(
|
||||||
shark_module, extended_model_name, generate_vmfb, extra_args=[]
|
amdshark_module, extended_model_name, generate_vmfb, extra_args=[]
|
||||||
):
|
):
|
||||||
if generate_vmfb:
|
if generate_vmfb:
|
||||||
vmfb_path = os.path.join(os.getcwd(), extended_model_name + ".vmfb")
|
vmfb_path = os.path.join(os.getcwd(), extended_model_name + ".vmfb")
|
||||||
if os.path.isfile(vmfb_path):
|
if os.path.isfile(vmfb_path):
|
||||||
print(f"loading existing vmfb from: {vmfb_path}")
|
print(f"loading existing vmfb from: {vmfb_path}")
|
||||||
shark_module.load_module(vmfb_path, extra_args=extra_args)
|
amdshark_module.load_module(vmfb_path, extra_args=extra_args)
|
||||||
else:
|
else:
|
||||||
print(
|
print(
|
||||||
"No vmfb found. Compiling and saving to {}".format(vmfb_path)
|
"No vmfb found. Compiling and saving to {}".format(vmfb_path)
|
||||||
)
|
)
|
||||||
path = shark_module.save_module(
|
path = amdshark_module.save_module(
|
||||||
os.getcwd(), extended_model_name, extra_args
|
os.getcwd(), extended_model_name, extra_args
|
||||||
)
|
)
|
||||||
shark_module.load_module(path, extra_args=extra_args)
|
amdshark_module.load_module(path, extra_args=extra_args)
|
||||||
else:
|
else:
|
||||||
shark_module.compile(extra_args)
|
amdshark_module.compile(extra_args)
|
||||||
return shark_module
|
return amdshark_module
|
||||||
|
|
||||||
|
|
||||||
def compile_int_precision(
|
def compile_int_precision(
|
||||||
@@ -139,7 +139,7 @@ def compile_int_precision(
|
|||||||
del mlir_module
|
del mlir_module
|
||||||
print(f"Elided IR written for {extended_model_name}")
|
print(f"Elided IR written for {extended_model_name}")
|
||||||
return bytecode_path
|
return bytecode_path
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_module=bytecode_path, device=device, mlir_dialect="tm_tensor"
|
mlir_module=bytecode_path, device=device, mlir_dialect="tm_tensor"
|
||||||
)
|
)
|
||||||
extra_args = [
|
extra_args = [
|
||||||
@@ -150,7 +150,7 @@ def compile_int_precision(
|
|||||||
]
|
]
|
||||||
return (
|
return (
|
||||||
compile_module(
|
compile_module(
|
||||||
shark_module,
|
amdshark_module,
|
||||||
extended_model_name=extended_model_name,
|
extended_model_name=extended_model_name,
|
||||||
generate_vmfb=generate_vmfb,
|
generate_vmfb=generate_vmfb,
|
||||||
extra_args=extra_args,
|
extra_args=extra_args,
|
||||||
@@ -159,7 +159,7 @@ def compile_int_precision(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def shark_compile_through_fx(
|
def amdshark_compile_through_fx(
|
||||||
model,
|
model,
|
||||||
inputs,
|
inputs,
|
||||||
extended_model_name,
|
extended_model_name,
|
||||||
@@ -174,22 +174,22 @@ def shark_compile_through_fx(
|
|||||||
):
|
):
|
||||||
is_f16 = precision == "fp16"
|
is_f16 = precision == "fp16"
|
||||||
if generate_or_load_vmfb:
|
if generate_or_load_vmfb:
|
||||||
shark_module = load_vmfb(
|
amdshark_module = load_vmfb(
|
||||||
extended_model_name=extended_model_name,
|
extended_model_name=extended_model_name,
|
||||||
device=device,
|
device=device,
|
||||||
mlir_dialect=mlir_dialect,
|
mlir_dialect=mlir_dialect,
|
||||||
extra_args=extra_args,
|
extra_args=extra_args,
|
||||||
)
|
)
|
||||||
if shark_module:
|
if amdshark_module:
|
||||||
return (
|
return (
|
||||||
shark_module,
|
amdshark_module,
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
|
|
||||||
from shark.parser import shark_args
|
from amdshark.parser import amdshark_args
|
||||||
|
|
||||||
if "cuda" in device:
|
if "cuda" in device:
|
||||||
shark_args.enable_tf32 = True
|
amdshark_args.enable_tf32 = True
|
||||||
|
|
||||||
if precision in ["int4", "int8"]:
|
if precision in ["int4", "int8"]:
|
||||||
mlir_module = compile_int_precision(
|
mlir_module = compile_int_precision(
|
||||||
@@ -225,14 +225,14 @@ def shark_compile_through_fx(
|
|||||||
mlir_dialect=mlir_dialect,
|
mlir_dialect=mlir_dialect,
|
||||||
)
|
)
|
||||||
|
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_module,
|
mlir_module,
|
||||||
device=device,
|
device=device,
|
||||||
mlir_dialect=mlir_dialect,
|
mlir_dialect=mlir_dialect,
|
||||||
)
|
)
|
||||||
return (
|
return (
|
||||||
compile_module(
|
compile_module(
|
||||||
shark_module,
|
amdshark_module,
|
||||||
extended_model_name,
|
extended_model_name,
|
||||||
generate_vmfb=generate_or_load_vmfb,
|
generate_vmfb=generate_or_load_vmfb,
|
||||||
extra_args=extra_args,
|
extra_args=extra_args,
|
||||||
@@ -1,8 +1,8 @@
|
|||||||
# Lint as: python3
|
# Lint as: python3
|
||||||
"""SHARK Downloader"""
|
"""AMDSHARK Downloader"""
|
||||||
# Requirements : Put shark_tank in SHARK directory
|
# Requirements : Put amdshark_tank in AMDSHARK directory
|
||||||
# /SHARK
|
# /AMDSHARK
|
||||||
# /gen_shark_tank
|
# /gen_amdshark_tank
|
||||||
# /tflite
|
# /tflite
|
||||||
# /albert_lite_base
|
# /albert_lite_base
|
||||||
# /...model_name...
|
# /...model_name...
|
||||||
@@ -17,7 +17,7 @@ import os
|
|||||||
from tqdm.std import tqdm
|
from tqdm.std import tqdm
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from shark.parser import shark_args
|
from amdshark.parser import amdshark_args
|
||||||
from google.cloud import storage
|
from google.cloud import storage
|
||||||
|
|
||||||
|
|
||||||
@@ -83,8 +83,8 @@ input_type_to_np_dtype = {
|
|||||||
|
|
||||||
# Save the model in the home local so it needn't be fetched everytime in the CI.
|
# Save the model in the home local so it needn't be fetched everytime in the CI.
|
||||||
home = str(Path.home())
|
home = str(Path.home())
|
||||||
alt_path = os.path.join(os.path.dirname(__file__), "../gen_shark_tank/")
|
alt_path = os.path.join(os.path.dirname(__file__), "../gen_amdshark_tank/")
|
||||||
custom_path = shark_args.local_tank_cache
|
custom_path = amdshark_args.local_tank_cache
|
||||||
|
|
||||||
if custom_path is not None:
|
if custom_path is not None:
|
||||||
if not os.path.exists(custom_path):
|
if not os.path.exists(custom_path):
|
||||||
@@ -92,17 +92,17 @@ if custom_path is not None:
|
|||||||
|
|
||||||
WORKDIR = custom_path
|
WORKDIR = custom_path
|
||||||
|
|
||||||
print(f"Using {WORKDIR} as local shark_tank cache directory.")
|
print(f"Using {WORKDIR} as local amdshark_tank cache directory.")
|
||||||
|
|
||||||
elif os.path.exists(alt_path):
|
elif os.path.exists(alt_path):
|
||||||
WORKDIR = alt_path
|
WORKDIR = alt_path
|
||||||
print(
|
print(
|
||||||
f"Using {WORKDIR} as shark_tank directory. Delete this directory if you aren't working from locally generated shark_tank."
|
f"Using {WORKDIR} as amdshark_tank directory. Delete this directory if you aren't working from locally generated amdshark_tank."
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
WORKDIR = os.path.join(home, ".local/shark_tank/")
|
WORKDIR = os.path.join(home, ".local/amdshark_tank/")
|
||||||
print(
|
print(
|
||||||
f"shark_tank local cache is located at {WORKDIR} . You may change this by setting the --local_tank_cache= flag"
|
f"amdshark_tank local cache is located at {WORKDIR} . You may change this by setting the --local_tank_cache= flag"
|
||||||
)
|
)
|
||||||
os.makedirs(WORKDIR, exist_ok=True)
|
os.makedirs(WORKDIR, exist_ok=True)
|
||||||
|
|
||||||
@@ -150,8 +150,8 @@ def _internet_connected():
|
|||||||
def get_git_revision_short_hash() -> str:
|
def get_git_revision_short_hash() -> str:
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
if shark_args.shark_prefix is not None:
|
if amdshark_args.amdshark_prefix is not None:
|
||||||
prefix_kw = shark_args.shark_prefix
|
prefix_kw = amdshark_args.amdshark_prefix
|
||||||
else:
|
else:
|
||||||
import json
|
import json
|
||||||
|
|
||||||
@@ -160,11 +160,11 @@ def get_git_revision_short_hash() -> str:
|
|||||||
with open(src, "r") as f:
|
with open(src, "r") as f:
|
||||||
data = json.loads(f.read())
|
data = json.loads(f.read())
|
||||||
prefix_kw = data["version"]
|
prefix_kw = data["version"]
|
||||||
print(f"Checking for updates from gs://shark_tank/{prefix_kw}")
|
print(f"Checking for updates from gs://amdshark_tank/{prefix_kw}")
|
||||||
return prefix_kw
|
return prefix_kw
|
||||||
|
|
||||||
|
|
||||||
def get_sharktank_prefix():
|
def get_amdsharktank_prefix():
|
||||||
tank_prefix = ""
|
tank_prefix = ""
|
||||||
if not _internet_connected():
|
if not _internet_connected():
|
||||||
print(
|
print(
|
||||||
@@ -174,7 +174,7 @@ def get_sharktank_prefix():
|
|||||||
else:
|
else:
|
||||||
desired_prefix = get_git_revision_short_hash()
|
desired_prefix = get_git_revision_short_hash()
|
||||||
storage_client_a = storage.Client.create_anonymous_client()
|
storage_client_a = storage.Client.create_anonymous_client()
|
||||||
base_bucket_name = "shark_tank"
|
base_bucket_name = "amdshark_tank"
|
||||||
base_bucket = storage_client_a.bucket(base_bucket_name)
|
base_bucket = storage_client_a.bucket(base_bucket_name)
|
||||||
dir_blobs = base_bucket.list_blobs(prefix=f"{desired_prefix}")
|
dir_blobs = base_bucket.list_blobs(prefix=f"{desired_prefix}")
|
||||||
for blob in dir_blobs:
|
for blob in dir_blobs:
|
||||||
@@ -186,13 +186,13 @@ def get_sharktank_prefix():
|
|||||||
continue
|
continue
|
||||||
if tank_prefix == "":
|
if tank_prefix == "":
|
||||||
print(
|
print(
|
||||||
f"shark_tank bucket not found matching ({desired_prefix}). Defaulting to nightly."
|
f"amdshark_tank bucket not found matching ({desired_prefix}). Defaulting to nightly."
|
||||||
)
|
)
|
||||||
tank_prefix = "nightly"
|
tank_prefix = "nightly"
|
||||||
return tank_prefix
|
return tank_prefix
|
||||||
|
|
||||||
|
|
||||||
# Downloads the torch model from gs://shark_tank dir.
|
# Downloads the torch model from gs://amdshark_tank dir.
|
||||||
def download_model(
|
def download_model(
|
||||||
model_name,
|
model_name,
|
||||||
dynamic=False,
|
dynamic=False,
|
||||||
@@ -204,7 +204,7 @@ def download_model(
|
|||||||
model_name = model_name.replace("/", "_")
|
model_name = model_name.replace("/", "_")
|
||||||
dyn_str = "_dynamic" if dynamic else ""
|
dyn_str = "_dynamic" if dynamic else ""
|
||||||
os.makedirs(WORKDIR, exist_ok=True)
|
os.makedirs(WORKDIR, exist_ok=True)
|
||||||
shark_args.shark_prefix = get_sharktank_prefix()
|
amdshark_args.amdshark_prefix = get_amdsharktank_prefix()
|
||||||
if import_args["batch_size"] and import_args["batch_size"] != 1:
|
if import_args["batch_size"] and import_args["batch_size"] != 1:
|
||||||
model_dir_name = (
|
model_dir_name = (
|
||||||
model_name
|
model_name
|
||||||
@@ -221,7 +221,7 @@ def download_model(
|
|||||||
model_dir = os.path.join(WORKDIR, model_dir_name)
|
model_dir = os.path.join(WORKDIR, model_dir_name)
|
||||||
|
|
||||||
if not tank_url:
|
if not tank_url:
|
||||||
tank_url = "gs://shark_tank/" + shark_args.shark_prefix
|
tank_url = "gs://amdshark_tank/" + amdshark_args.amdshark_prefix
|
||||||
|
|
||||||
full_gs_url = tank_url.rstrip("/") + "/" + model_dir_name
|
full_gs_url = tank_url.rstrip("/") + "/" + model_dir_name
|
||||||
if not check_dir_exists(
|
if not check_dir_exists(
|
||||||
@@ -232,7 +232,7 @@ def download_model(
|
|||||||
)
|
)
|
||||||
download_public_file(full_gs_url, model_dir)
|
download_public_file(full_gs_url, model_dir)
|
||||||
|
|
||||||
elif shark_args.force_update_tank == True:
|
elif amdshark_args.force_update_tank == True:
|
||||||
print(
|
print(
|
||||||
f"Force-updating artifacts for model {model_name} from: {full_gs_url}"
|
f"Force-updating artifacts for model {model_name} from: {full_gs_url}"
|
||||||
)
|
)
|
||||||
@@ -259,13 +259,13 @@ def download_model(
|
|||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
print(f"Model artifact hash not found at {model_dir}.")
|
print(f"Model artifact hash not found at {model_dir}.")
|
||||||
upstream_hash = None
|
upstream_hash = None
|
||||||
if local_hash != upstream_hash and shark_args.update_tank == True:
|
if local_hash != upstream_hash and amdshark_args.update_tank == True:
|
||||||
print(f"Updating artifacts for model {model_name}...")
|
print(f"Updating artifacts for model {model_name}...")
|
||||||
download_public_file(full_gs_url, model_dir)
|
download_public_file(full_gs_url, model_dir)
|
||||||
|
|
||||||
elif local_hash != upstream_hash:
|
elif local_hash != upstream_hash:
|
||||||
print(
|
print(
|
||||||
"Hash does not match upstream in gs://shark_tank/. If you want to use locally generated artifacts, this is working as intended. Otherwise, run with --update_tank."
|
"Hash does not match upstream in gs://amdshark_tank/. If you want to use locally generated artifacts, this is working as intended. Otherwise, run with --update_tank."
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
print(
|
print(
|
||||||
@@ -280,12 +280,12 @@ def download_model(
|
|||||||
f"Verifying that model artifacts were downloaded successfully to {mlir_filename}..."
|
f"Verifying that model artifacts were downloaded successfully to {mlir_filename}..."
|
||||||
)
|
)
|
||||||
if not os.path.exists(mlir_filename):
|
if not os.path.exists(mlir_filename):
|
||||||
from tank.generate_sharktank import gen_shark_files
|
from tank.generate_amdsharktank import gen_amdshark_files
|
||||||
|
|
||||||
print(
|
print(
|
||||||
"The model data was not found. Trying to generate artifacts locally."
|
"The model data was not found. Trying to generate artifacts locally."
|
||||||
)
|
)
|
||||||
gen_shark_files(model_name, frontend, WORKDIR, import_args)
|
gen_amdshark_files(model_name, frontend, WORKDIR, import_args)
|
||||||
|
|
||||||
assert os.path.exists(mlir_filename), f"MLIR not found at {mlir_filename}"
|
assert os.path.exists(mlir_filename), f"MLIR not found at {mlir_filename}"
|
||||||
function_name = str(np.load(os.path.join(model_dir, "function_name.npy")))
|
function_name = str(np.load(os.path.join(model_dir, "function_name.npy")))
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import Any, Dict, List, Tuple
|
from typing import Any, Dict, List, Tuple
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from shark.shark_importer import import_with_fx, save_mlir
|
from amdshark.amdshark_importer import import_with_fx, save_mlir
|
||||||
import torchvision.models as models
|
import torchvision.models as models
|
||||||
import copy
|
import copy
|
||||||
import io
|
import io
|
||||||
@@ -13,7 +13,7 @@ from typing import Dict
|
|||||||
import torch_mlir
|
import torch_mlir
|
||||||
|
|
||||||
|
|
||||||
def shark_backend(fx_g: torch.fx.GraphModule, inputs, device: str = "cpu"):
|
def amdshark_backend(fx_g: torch.fx.GraphModule, inputs, device: str = "cpu"):
|
||||||
mlir_module = torch_mlir.compile(
|
mlir_module = torch_mlir.compile(
|
||||||
fx_g, inputs, output_type="linalg-on-tensors"
|
fx_g, inputs, output_type="linalg-on-tensors"
|
||||||
)
|
)
|
||||||
@@ -22,19 +22,19 @@ def shark_backend(fx_g: torch.fx.GraphModule, inputs, device: str = "cpu"):
|
|||||||
bytecode = bytecode_stream.getvalue()
|
bytecode = bytecode_stream.getvalue()
|
||||||
bytecode_path = save_mlir(
|
bytecode_path = save_mlir(
|
||||||
bytecode,
|
bytecode,
|
||||||
model_name="shark_eager_module",
|
model_name="amdshark_eager_module",
|
||||||
frontend="torch",
|
frontend="torch",
|
||||||
mlir_dialect="tm_tensor",
|
mlir_dialect="tm_tensor",
|
||||||
)
|
)
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
|
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_module=bytecode_path,
|
mlir_module=bytecode_path,
|
||||||
device=device,
|
device=device,
|
||||||
mlir_dialect="tm_tensor",
|
mlir_dialect="tm_tensor",
|
||||||
)
|
)
|
||||||
shark_module.compile(extra_args=[])
|
amdshark_module.compile(extra_args=[])
|
||||||
return shark_module
|
return amdshark_module
|
||||||
|
|
||||||
|
|
||||||
def _make_single_op_gm(node, captured_val, compiled_graph):
|
def _make_single_op_gm(node, captured_val, compiled_graph):
|
||||||
@@ -55,7 +55,7 @@ def _make_single_op_gm(node, captured_val, compiled_graph):
|
|||||||
g.output(call)
|
g.output(call)
|
||||||
g.lint()
|
g.lint()
|
||||||
single_node = torch.fx.GraphModule(torch.nn.Module(), g)
|
single_node = torch.fx.GraphModule(torch.nn.Module(), g)
|
||||||
compiled_module = shark_backend(single_node, inputs)
|
compiled_module = amdshark_backend(single_node, inputs)
|
||||||
compiled_graph[node.name] = {
|
compiled_graph[node.name] = {
|
||||||
"module": compiled_module,
|
"module": compiled_module,
|
||||||
"inputs": [i for i in env],
|
"inputs": [i for i in env],
|
||||||
@@ -172,41 +172,41 @@ shape_prop = ShapeProp(fx_graph)
|
|||||||
|
|
||||||
x = shape_prop.propagate(input[0])
|
x = shape_prop.propagate(input[0])
|
||||||
|
|
||||||
shark_graph = compiled_graph(fx_graph, x)
|
amdshark_graph = compiled_graph(fx_graph, x)
|
||||||
|
|
||||||
|
|
||||||
for key in shark_graph:
|
for key in amdshark_graph:
|
||||||
if key.startswith("getitem"):
|
if key.startswith("getitem"):
|
||||||
input_val = shark_graph[key]["input"]
|
input_val = amdshark_graph[key]["input"]
|
||||||
pos = shark_graph[key]["pos"]
|
pos = amdshark_graph[key]["pos"]
|
||||||
if input_val not in shark_graph:
|
if input_val not in amdshark_graph:
|
||||||
shark_graph[key]["result"] = x[input_val][pos].detach()
|
amdshark_graph[key]["result"] = x[input_val][pos].detach()
|
||||||
else:
|
else:
|
||||||
shark_graph[key]["result"] = shark_graph[input_val]["result"][
|
amdshark_graph[key]["result"] = amdshark_graph[input_val]["result"][
|
||||||
pos
|
pos
|
||||||
].detach()
|
].detach()
|
||||||
elif key.startswith("empty"):
|
elif key.startswith("empty"):
|
||||||
operator = shark_graph[key]["target"]
|
operator = amdshark_graph[key]["target"]
|
||||||
args = shark_graph[key]["args"]
|
args = amdshark_graph[key]["args"]
|
||||||
kwargs = shark_graph[key]["kwargs"]
|
kwargs = amdshark_graph[key]["kwargs"]
|
||||||
shark_graph[key]["result"] = operator(*args, **kwargs).detach()
|
amdshark_graph[key]["result"] = operator(*args, **kwargs).detach()
|
||||||
else:
|
else:
|
||||||
input_val = shark_graph[key]["inputs"]
|
input_val = amdshark_graph[key]["inputs"]
|
||||||
input_tensors = []
|
input_tensors = []
|
||||||
for input in input_val:
|
for input in input_val:
|
||||||
if input not in shark_graph:
|
if input not in amdshark_graph:
|
||||||
input_tensors.append(x[input].detach())
|
input_tensors.append(x[input].detach())
|
||||||
else:
|
else:
|
||||||
input_tensors.append(shark_graph[input]["result"])
|
input_tensors.append(amdshark_graph[input]["result"])
|
||||||
|
|
||||||
val = shark_graph[key]["module"]("forward", input_tensors)
|
val = amdshark_graph[key]["module"]("forward", input_tensors)
|
||||||
if isinstance(val, (tuple, list)):
|
if isinstance(val, (tuple, list)):
|
||||||
list_val = []
|
list_val = []
|
||||||
for v in val:
|
for v in val:
|
||||||
list_val.append(torch.from_numpy(v))
|
list_val.append(torch.from_numpy(v))
|
||||||
shark_graph[key]["result"] = list_val
|
amdshark_graph[key]["result"] = list_val
|
||||||
else:
|
else:
|
||||||
shark_graph[key]["result"] = torch.from_numpy(val)
|
amdshark_graph[key]["result"] = torch.from_numpy(val)
|
||||||
|
|
||||||
|
|
||||||
print(shark_graph)
|
print(amdshark_graph)
|
||||||
@@ -4,7 +4,7 @@ import numpy as np
|
|||||||
|
|
||||||
import torch_mlir
|
import torch_mlir
|
||||||
from iree.compiler import compile_file
|
from iree.compiler import compile_file
|
||||||
from shark.shark_importer import import_with_fx, get_f16_inputs, save_mlir
|
from amdshark.amdshark_importer import import_with_fx, get_f16_inputs, save_mlir
|
||||||
|
|
||||||
|
|
||||||
class GenerateConfigFile:
|
class GenerateConfigFile:
|
||||||
@@ -1,12 +1,12 @@
|
|||||||
# Lint as: python3
|
# Lint as: python3
|
||||||
"""SHARK Importer"""
|
"""AMDSHARK Importer"""
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import os
|
import os
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
|
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
|
||||||
|
|
||||||
def create_hash(file_name):
|
def create_hash(file_name):
|
||||||
with open(file_name, "rb") as f:
|
with open(file_name, "rb") as f:
|
||||||
@@ -28,9 +28,9 @@ supported_frontends = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class SharkImporter:
|
class AMDSharkImporter:
|
||||||
"""
|
"""
|
||||||
SharkImporter converts frontend modules into a
|
AMDSharkImporter converts frontend modules into a
|
||||||
mlir_module. The supported frameworks are tensorflow,
|
mlir_module. The supported frameworks are tensorflow,
|
||||||
pytorch, and tf-lite.
|
pytorch, and tf-lite.
|
||||||
|
|
||||||
@@ -83,7 +83,7 @@ class SharkImporter:
|
|||||||
# NOTE: The default function for torch is "forward" and tf-lite is "main".
|
# NOTE: The default function for torch is "forward" and tf-lite is "main".
|
||||||
|
|
||||||
def _torch_mlir(self, is_dynamic, tracing_required, mlir_type):
|
def _torch_mlir(self, is_dynamic, tracing_required, mlir_type):
|
||||||
from shark.torch_mlir_utils import get_torch_mlir_module
|
from amdshark.torch_mlir_utils import get_torch_mlir_module
|
||||||
|
|
||||||
return get_torch_mlir_module(
|
return get_torch_mlir_module(
|
||||||
self.module,
|
self.module,
|
||||||
@@ -121,7 +121,7 @@ class SharkImporter:
|
|||||||
is_dynamic=False,
|
is_dynamic=False,
|
||||||
tracing_required=False,
|
tracing_required=False,
|
||||||
func_name="forward",
|
func_name="forward",
|
||||||
save_dir=cmd_opts.tmp_dir, #"./shark_tmp/",
|
save_dir=cmd_opts.tmp_dir, #"./amdshark_tmp/",
|
||||||
mlir_type="linalg",
|
mlir_type="linalg",
|
||||||
):
|
):
|
||||||
if self.frontend in ["torch", "pytorch"]:
|
if self.frontend in ["torch", "pytorch"]:
|
||||||
@@ -773,7 +773,7 @@ def import_with_fx(
|
|||||||
return ts_graph
|
return ts_graph
|
||||||
|
|
||||||
inputs = get_f16_inputs(inputs, is_f16, f16_input_mask)
|
inputs = get_f16_inputs(inputs, is_f16, f16_input_mask)
|
||||||
mlir_importer = SharkImporter(
|
mlir_importer = AMDSharkImporter(
|
||||||
ts_graph,
|
ts_graph,
|
||||||
inputs,
|
inputs,
|
||||||
frontend="torch",
|
frontend="torch",
|
||||||
@@ -807,7 +807,7 @@ def save_mlir(
|
|||||||
model_name + "_" + frontend + "_" + mlir_dialect + ".mlir"
|
model_name + "_" + frontend + "_" + mlir_dialect + ".mlir"
|
||||||
)
|
)
|
||||||
if dir == "":
|
if dir == "":
|
||||||
dir = cmd_opts.tmp_dir, #os.path.join(".", "shark_tmp")
|
dir = cmd_opts.tmp_dir, #os.path.join(".", "amdshark_tmp")
|
||||||
mlir_path = os.path.join(dir, model_name_mlir)
|
mlir_path = os.path.join(dir, model_name_mlir)
|
||||||
print(f"saving {model_name_mlir} to {dir}")
|
print(f"saving {model_name_mlir} to {dir}")
|
||||||
if not os.path.exists(dir):
|
if not os.path.exists(dir):
|
||||||
@@ -9,15 +9,15 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
from shark.iree_utils.compile_utils import (
|
from amdshark.iree_utils.compile_utils import (
|
||||||
export_iree_module_to_vmfb,
|
export_iree_module_to_vmfb,
|
||||||
load_flatbuffer,
|
load_flatbuffer,
|
||||||
create_dispatch_dirs,
|
create_dispatch_dirs,
|
||||||
compile_benchmark_dirs,
|
compile_benchmark_dirs,
|
||||||
)
|
)
|
||||||
import os
|
import os
|
||||||
from shark.shark_runner import SharkRunner
|
from amdshark.amdshark_runner import AMDSharkRunner
|
||||||
from shark.parser import shark_args
|
from amdshark.parser import amdshark_args
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
@@ -30,7 +30,7 @@ dtype_to_np_dtype = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class SharkInference:
|
class AMDSharkInference:
|
||||||
"""
|
"""
|
||||||
Runs prediction or inference on mlir_module.
|
Runs prediction or inference on mlir_module.
|
||||||
|
|
||||||
@@ -47,7 +47,7 @@ class SharkInference:
|
|||||||
The dialect in which the given mlir_module is in.
|
The dialect in which the given mlir_module is in.
|
||||||
Refer to {https://mlir.llvm.org/docs/Dialects/}
|
Refer to {https://mlir.llvm.org/docs/Dialects/}
|
||||||
is_benchmark: bool
|
is_benchmark: bool
|
||||||
Whether this SharkInference module should be benchmark-enabled.
|
Whether this AMDSharkInference module should be benchmark-enabled.
|
||||||
mmap: bool
|
mmap: bool
|
||||||
Whether to load/run vmfb using mmap. It's `True` by default.
|
Whether to load/run vmfb using mmap. It's `True` by default.
|
||||||
|
|
||||||
@@ -79,27 +79,27 @@ class SharkInference:
|
|||||||
if mlir_module is not None:
|
if mlir_module is not None:
|
||||||
if mlir_module and not os.path.isfile(mlir_module):
|
if mlir_module and not os.path.isfile(mlir_module):
|
||||||
print(
|
print(
|
||||||
"Warning: Initializing SharkInference with a mlir string/bytecode object will duplicate the model in RAM at compile time. To avoid this, initialize SharkInference with a path to a MLIR module on your hard disk instead."
|
"Warning: Initializing AMDSharkInference with a mlir string/bytecode object will duplicate the model in RAM at compile time. To avoid this, initialize AMDSharkInference with a path to a MLIR module on your hard disk instead."
|
||||||
)
|
)
|
||||||
self.compile_str = True
|
self.compile_str = True
|
||||||
else:
|
else:
|
||||||
self.compile_str = False
|
self.compile_str = False
|
||||||
self.device = shark_args.device if device == "none" else device
|
self.device = amdshark_args.device if device == "none" else device
|
||||||
self.mlir_dialect = mlir_dialect
|
self.mlir_dialect = mlir_dialect
|
||||||
self.is_benchmark = is_benchmark
|
self.is_benchmark = is_benchmark
|
||||||
self.device_idx = device_idx
|
self.device_idx = device_idx
|
||||||
self.dispatch_benchmarks = (
|
self.dispatch_benchmarks = (
|
||||||
shark_args.dispatch_benchmarks
|
amdshark_args.dispatch_benchmarks
|
||||||
if dispatch_benchmark is None
|
if dispatch_benchmark is None
|
||||||
else dispatch_benchmark
|
else dispatch_benchmark
|
||||||
)
|
)
|
||||||
self.dispatch_benchmarks_dir = (
|
self.dispatch_benchmarks_dir = (
|
||||||
shark_args.dispatch_benchmarks_dir
|
amdshark_args.dispatch_benchmarks_dir
|
||||||
if dispatch_benchmark_dir == "temp_dispatch_benchmarks"
|
if dispatch_benchmark_dir == "temp_dispatch_benchmarks"
|
||||||
else dispatch_benchmark_dir
|
else dispatch_benchmark_dir
|
||||||
)
|
)
|
||||||
|
|
||||||
self.shark_runner = None
|
self.amdshark_runner = None
|
||||||
self.mmap = mmap
|
self.mmap = mmap
|
||||||
self.rt_flags = rt_flags
|
self.rt_flags = rt_flags
|
||||||
|
|
||||||
@@ -120,9 +120,9 @@ class SharkInference:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if self.is_benchmark == True:
|
if self.is_benchmark == True:
|
||||||
from shark.shark_benchmark_runner import SharkBenchmarkRunner
|
from amdshark.amdshark_benchmark_runner import AMDSharkBenchmarkRunner
|
||||||
|
|
||||||
self.shark_runner = SharkBenchmarkRunner(
|
self.amdshark_runner = AMDSharkBenchmarkRunner(
|
||||||
self.mlir_module,
|
self.mlir_module,
|
||||||
self.device,
|
self.device,
|
||||||
self.mlir_dialect,
|
self.mlir_dialect,
|
||||||
@@ -130,7 +130,7 @@ class SharkInference:
|
|||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.shark_runner = SharkRunner(
|
self.amdshark_runner = AMDSharkRunner(
|
||||||
self.mlir_module,
|
self.mlir_module,
|
||||||
self.device,
|
self.device,
|
||||||
self.mlir_dialect,
|
self.mlir_dialect,
|
||||||
@@ -150,19 +150,19 @@ class SharkInference:
|
|||||||
|
|
||||||
# inputs are considered to be tuple of np.array.
|
# inputs are considered to be tuple of np.array.
|
||||||
def __call__(self, function_name: str, inputs: tuple, send_to_host=True):
|
def __call__(self, function_name: str, inputs: tuple, send_to_host=True):
|
||||||
return self.shark_runner.run(
|
return self.amdshark_runner.run(
|
||||||
function_name, inputs, send_to_host, device=self.device
|
function_name, inputs, send_to_host, device=self.device
|
||||||
)
|
)
|
||||||
|
|
||||||
# forward function.
|
# forward function.
|
||||||
def forward(self, inputs: tuple, send_to_host=True):
|
def forward(self, inputs: tuple, send_to_host=True):
|
||||||
return self.shark_runner.run(
|
return self.amdshark_runner.run(
|
||||||
"forward", inputs, send_to_host, device=self.device
|
"forward", inputs, send_to_host, device=self.device
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get all function names defined within the compiled module.
|
# Get all function names defined within the compiled module.
|
||||||
def get_functions_in_module(self):
|
def get_functions_in_module(self):
|
||||||
return self.shark_runner.get_functions_in_module()
|
return self.amdshark_runner.get_functions_in_module()
|
||||||
|
|
||||||
# Captures the static input information from the mlir_module.
|
# Captures the static input information from the mlir_module.
|
||||||
# TODO(pashu123): Generate the input information for dynamic shapes.
|
# TODO(pashu123): Generate the input information for dynamic shapes.
|
||||||
@@ -223,7 +223,7 @@ class SharkInference:
|
|||||||
|
|
||||||
# load and return the module.
|
# load and return the module.
|
||||||
def load_module(self, path, extra_args=[]):
|
def load_module(self, path, extra_args=[]):
|
||||||
self.shark_runner = SharkRunner(
|
self.amdshark_runner = AMDSharkRunner(
|
||||||
device=self.device,
|
device=self.device,
|
||||||
compile_vmfb=False,
|
compile_vmfb=False,
|
||||||
extra_args=extra_args,
|
extra_args=extra_args,
|
||||||
@@ -236,8 +236,8 @@ class SharkInference:
|
|||||||
mmap=self.mmap,
|
mmap=self.mmap,
|
||||||
rt_flags=self.rt_flags,
|
rt_flags=self.rt_flags,
|
||||||
)
|
)
|
||||||
self.shark_runner.iree_compilation_module = params["vmfb"]
|
self.amdshark_runner.iree_compilation_module = params["vmfb"]
|
||||||
self.shark_runner.iree_config = params["config"]
|
self.amdshark_runner.iree_config = params["config"]
|
||||||
self.shark_runner.temp_file_to_unlink = params["temp_file_to_unlink"]
|
self.amdshark_runner.temp_file_to_unlink = params["temp_file_to_unlink"]
|
||||||
del params
|
del params
|
||||||
return
|
return
|
||||||
@@ -12,19 +12,19 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from shark.iree_utils.compile_utils import (
|
from amdshark.iree_utils.compile_utils import (
|
||||||
get_iree_compiled_module,
|
get_iree_compiled_module,
|
||||||
get_results,
|
get_results,
|
||||||
export_iree_module_to_vmfb,
|
export_iree_module_to_vmfb,
|
||||||
load_flatbuffer,
|
load_flatbuffer,
|
||||||
)
|
)
|
||||||
from shark.iree_utils._common import check_device_drivers, device_driver_info
|
from amdshark.iree_utils._common import check_device_drivers, device_driver_info
|
||||||
from shark.parser import shark_args
|
from amdshark.parser import amdshark_args
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
# supported dialects by the shark-runtime.
|
# supported dialects by the amdshark-runtime.
|
||||||
supported_dialects = {
|
supported_dialects = {
|
||||||
"linalg",
|
"linalg",
|
||||||
"auto",
|
"auto",
|
||||||
@@ -35,9 +35,9 @@ supported_dialects = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class SharkRunner:
|
class AMDSharkRunner:
|
||||||
"""
|
"""
|
||||||
Base class for SharkInference and SharkTrainer
|
Base class for AMDSharkInference and AMDSharkTrainer
|
||||||
used to execute an mlir_module.
|
used to execute an mlir_module.
|
||||||
|
|
||||||
...
|
...
|
||||||
@@ -78,12 +78,12 @@ class SharkRunner:
|
|||||||
if self.mlir_module is not None:
|
if self.mlir_module is not None:
|
||||||
if not os.path.isfile(mlir_module):
|
if not os.path.isfile(mlir_module):
|
||||||
print(
|
print(
|
||||||
"Warning: Initializing SharkRunner with a mlir string/bytecode object will duplicate the model in RAM at compile time. To avoid this, initialize SharkInference with a path to a MLIR module on your hard disk instead."
|
"Warning: Initializing AMDSharkRunner with a mlir string/bytecode object will duplicate the model in RAM at compile time. To avoid this, initialize AMDSharkInference with a path to a MLIR module on your hard disk instead."
|
||||||
)
|
)
|
||||||
self.compile_str = True
|
self.compile_str = True
|
||||||
else:
|
else:
|
||||||
self.compile_str = False
|
self.compile_str = False
|
||||||
self.device = shark_args.device if device == "none" else device
|
self.device = amdshark_args.device if device == "none" else device
|
||||||
self.mlir_dialect = mlir_dialect
|
self.mlir_dialect = mlir_dialect
|
||||||
self.extra_args = extra_args
|
self.extra_args = extra_args
|
||||||
self.device_idx = device_idx
|
self.device_idx = device_idx
|
||||||
@@ -12,10 +12,10 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from shark.parser import shark_args
|
from amdshark.parser import amdshark_args
|
||||||
from shark.shark_runner import SharkRunner
|
from amdshark.amdshark_runner import AMDSharkRunner
|
||||||
from shark.backward_makefx import MakeFxModule
|
from amdshark.backward_makefx import MakeFxModule
|
||||||
from shark.shark_importer import import_with_fx, save_mlir
|
from amdshark.amdshark_importer import import_with_fx, save_mlir
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import sys
|
import sys
|
||||||
@@ -26,8 +26,8 @@ def print_err(*a):
|
|||||||
print(*a, file=sys.stderr)
|
print(*a, file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
class SharkTrainer:
|
class AMDSharkTrainer:
|
||||||
"""Training pytorch, tensorflow module on shark runtime."""
|
"""Training pytorch, tensorflow module on amdshark runtime."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@@ -48,9 +48,9 @@ class SharkTrainer:
|
|||||||
|
|
||||||
# By default it's the torch frontend.
|
# By default it's the torch frontend.
|
||||||
self.frontend = "pytorch"
|
self.frontend = "pytorch"
|
||||||
self.device = device if device is not None else shark_args.device
|
self.device = device if device is not None else amdshark_args.device
|
||||||
|
|
||||||
self.shark_runner = None
|
self.amdshark_runner = None
|
||||||
|
|
||||||
# Sets the frontend i.e `pytorch` or `tensorflow`.
|
# Sets the frontend i.e `pytorch` or `tensorflow`.
|
||||||
def set_frontend(self, frontend: str):
|
def set_frontend(self, frontend: str):
|
||||||
@@ -86,18 +86,18 @@ class SharkTrainer:
|
|||||||
)
|
)
|
||||||
mlir_module = save_mlir(
|
mlir_module = save_mlir(
|
||||||
mlir_module,
|
mlir_module,
|
||||||
model_name="shark_model",
|
model_name="amdshark_model",
|
||||||
frontend="torch",
|
frontend="torch",
|
||||||
mlir_dialect=mlir_type,
|
mlir_dialect=mlir_type,
|
||||||
)
|
)
|
||||||
self.shark_runner = SharkRunner(
|
self.amdshark_runner = AMDSharkRunner(
|
||||||
mlir_module,
|
mlir_module,
|
||||||
self.device,
|
self.device,
|
||||||
"tm_tensor",
|
"tm_tensor",
|
||||||
extra_args=extra_args,
|
extra_args=extra_args,
|
||||||
)
|
)
|
||||||
elif self.frontend in ["tensorflow", "tf", "mhlo", "stablehlo"]:
|
elif self.frontend in ["tensorflow", "tf", "mhlo", "stablehlo"]:
|
||||||
self.shark_runner = SharkRunner(
|
self.amdshark_runner = AMDSharkRunner(
|
||||||
self.model,
|
self.model,
|
||||||
self.input,
|
self.input,
|
||||||
self.dynamic,
|
self.dynamic,
|
||||||
@@ -123,7 +123,7 @@ class SharkTrainer:
|
|||||||
params = [x.numpy() for x in params]
|
params = [x.numpy() for x in params]
|
||||||
print(f"Training started for {num_iters} iterations:")
|
print(f"Training started for {num_iters} iterations:")
|
||||||
for i in tqdm(range(num_iters)):
|
for i in tqdm(range(num_iters)):
|
||||||
params = self.shark_runner.run(
|
params = self.amdshark_runner.run(
|
||||||
"forward", params + self.input, self.frontend
|
"forward", params + self.input, self.frontend
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -131,7 +131,7 @@ class SharkTrainer:
|
|||||||
|
|
||||||
# Function to train tensorflow module.
|
# Function to train tensorflow module.
|
||||||
# Output final loss.
|
# Output final loss.
|
||||||
# TODO(raikonenfnu): Save updated weight/states in SHARK.
|
# TODO(raikonenfnu): Save updated weight/states in AMDSHARK.
|
||||||
def _train_tf(self, num_iters):
|
def _train_tf(self, num_iters):
|
||||||
input_list = []
|
input_list = []
|
||||||
for x in self.input:
|
for x in self.input:
|
||||||
@@ -150,7 +150,7 @@ class SharkTrainer:
|
|||||||
|
|
||||||
print(f"Training started for {num_iters} iterations:")
|
print(f"Training started for {num_iters} iterations:")
|
||||||
for i in tqdm(range(num_iters)):
|
for i in tqdm(range(num_iters)):
|
||||||
outputs = self.shark_runner.forward(input_list, self.frontend)
|
outputs = self.amdshark_runner.forward(input_list, self.frontend)
|
||||||
return outputs
|
return outputs
|
||||||
|
|
||||||
def train(self, num_iters=1):
|
def train(self, num_iters=1):
|
||||||
@@ -71,7 +71,7 @@ class MakeFxModule:
|
|||||||
fx_g = self.change_fx_graph_return_to_tuple(fx_g)
|
fx_g = self.change_fx_graph_return_to_tuple(fx_g)
|
||||||
ts_g = torch.jit.script(fx_g)
|
ts_g = torch.jit.script(fx_g)
|
||||||
temp = tempfile.NamedTemporaryFile(
|
temp = tempfile.NamedTemporaryFile(
|
||||||
suffix="_shark_ts", prefix="temp_ts_"
|
suffix="_amdshark_ts", prefix="temp_ts_"
|
||||||
)
|
)
|
||||||
ts_g.save(temp.name)
|
ts_g.save(temp.name)
|
||||||
new_ts = torch.jit.load(temp.name)
|
new_ts = torch.jit.load(temp.name)
|
||||||
@@ -3,7 +3,7 @@ from typing import List, Optional
|
|||||||
import torch
|
import torch
|
||||||
from torch.fx.experimental.proxy_tensor import make_fx
|
from torch.fx.experimental.proxy_tensor import make_fx
|
||||||
from torch._functorch.compile_utils import strip_overloads
|
from torch._functorch.compile_utils import strip_overloads
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
from torch._decomp import get_decompositions
|
from torch._decomp import get_decompositions
|
||||||
from torch.func import functionalize
|
from torch.func import functionalize
|
||||||
import io
|
import io
|
||||||
@@ -93,13 +93,13 @@ def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule) -> bool:
|
|||||||
return unwrapped_tuple
|
return unwrapped_tuple
|
||||||
|
|
||||||
|
|
||||||
class SharkBackend:
|
class AMDSharkBackend:
|
||||||
def __init__(
|
def __init__(
|
||||||
self, fx_g: torch.fx.GraphModule, inputs: tuple, options: dict
|
self, fx_g: torch.fx.GraphModule, inputs: tuple, options: dict
|
||||||
):
|
):
|
||||||
self.fx_g = fx_g
|
self.fx_g = fx_g
|
||||||
self.inputs = inputs
|
self.inputs = inputs
|
||||||
self.shark_module = None
|
self.amdshark_module = None
|
||||||
self.device: str = options.get("device", "cpu")
|
self.device: str = options.get("device", "cpu")
|
||||||
self.was_unwrapped: bool = False
|
self.was_unwrapped: bool = False
|
||||||
self.none_indices: list = []
|
self.none_indices: list = []
|
||||||
@@ -125,19 +125,19 @@ class SharkBackend:
|
|||||||
bytecode_stream = io.BytesIO()
|
bytecode_stream = io.BytesIO()
|
||||||
mlir_module.operation.write_bytecode(bytecode_stream)
|
mlir_module.operation.write_bytecode(bytecode_stream)
|
||||||
bytecode = bytecode_stream.getvalue()
|
bytecode = bytecode_stream.getvalue()
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
|
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_module=bytecode,
|
mlir_module=bytecode,
|
||||||
device=self.device,
|
device=self.device,
|
||||||
mlir_dialect="tm_tensor",
|
mlir_dialect="tm_tensor",
|
||||||
)
|
)
|
||||||
shark_module.compile(extra_args=[])
|
amdshark_module.compile(extra_args=[])
|
||||||
self.shark_module = shark_module
|
self.amdshark_module = amdshark_module
|
||||||
|
|
||||||
def __call__(self, *inputs):
|
def __call__(self, *inputs):
|
||||||
np_inputs = [x.contiguous().detach().cpu().numpy() for x in inputs]
|
np_inputs = [x.contiguous().detach().cpu().numpy() for x in inputs]
|
||||||
np_outs = self.shark_module("forward", np_inputs)
|
np_outs = self.amdshark_module("forward", np_inputs)
|
||||||
if self.was_unwrapped:
|
if self.was_unwrapped:
|
||||||
np_outs = [
|
np_outs = [
|
||||||
np_outs,
|
np_outs,
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
import torch
|
import torch
|
||||||
import shark
|
import amdshark
|
||||||
|
|
||||||
|
|
||||||
def foo(x, a):
|
def foo(x, a):
|
||||||
@@ -9,8 +9,8 @@ def foo(x, a):
|
|||||||
return x + 3
|
return x + 3
|
||||||
|
|
||||||
|
|
||||||
shark_options = {"device": "cpu"}
|
amdshark_options = {"device": "cpu"}
|
||||||
compiled = torch.compile(foo, backend="shark", options=shark_options)
|
compiled = torch.compile(foo, backend="amdshark", options=amdshark_options)
|
||||||
|
|
||||||
input = torch.ones(4)
|
input = torch.ones(4)
|
||||||
|
|
||||||
@@ -22,7 +22,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# standard imports\n",
|
"# standard imports\n",
|
||||||
"import torch\n",
|
"import torch\n",
|
||||||
"from shark.iree_utils import get_iree_compiled_module"
|
"from amdshark.iree_utils import get_iree_compiled_module"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
import torch
|
import torch
|
||||||
from torch_mlir import compile, OutputType
|
from torch_mlir import compile, OutputType
|
||||||
|
|
||||||
from shark.iree_utils import get_iree_compiled_module
|
from amdshark.iree_utils import get_iree_compiled_module
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import torchdynamo
|
import torchdynamo
|
||||||
@@ -32,7 +32,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# eager mode imports\n",
|
"# eager mode imports\n",
|
||||||
"from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor\n",
|
"from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor\n",
|
||||||
"from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend"
|
"from amdshark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
@@ -440,7 +440,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": [
|
||||||
"There is a convenience class `SharkEagerMode` that will handle both the installation of the backend and the wrapping of `torch.Tensor`s:"
|
"There is a convenience class `AMDSharkEagerMode` that will handle both the installation of the backend and the wrapping of `torch.Tensor`s:"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
@@ -684,9 +684,9 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# eager mode RAII\n",
|
"# eager mode RAII\n",
|
||||||
"from shark.shark_runner import SharkEagerMode\n",
|
"from amdshark.amdshark_runner import AMDSharkEagerMode\n",
|
||||||
"\n",
|
"\n",
|
||||||
"shark_eager_mode = SharkEagerMode(\"cpu\")\n",
|
"amdshark_eager_mode = AMDSharkEagerMode(\"cpu\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"t = torch.ones((10, 10))\n",
|
"t = torch.ones((10, 10))\n",
|
||||||
"u = torch.ones((10, 10))\n",
|
"u = torch.ones((10, 10))\n",
|
||||||
@@ -712,7 +712,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": [
|
||||||
"The `SharkEagerMode` class is a hacky take on [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization) that defines a \"deleter\" that runs when an instantiation (of `SharkEagerMode`) is garbage collected. Takeaway is that if you want to turn off `SharkEagerMode`, or switch backends, you need to `del` the instance:"
|
"The `AMDSharkEagerMode` class is a hacky take on [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization) that defines a \"deleter\" that runs when an instantiation (of `AMDSharkEagerMode`) is garbage collected. Takeaway is that if you want to turn off `AMDSharkEagerMode`, or switch backends, you need to `del` the instance:"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
@@ -757,8 +757,8 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"del shark_eager_mode\n",
|
"del amdshark_eager_mode\n",
|
||||||
"shark_eager_mode = SharkEagerMode(\"cuda\")\n",
|
"amdshark_eager_mode = AMDSharkEagerMode(\"cuda\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"t = torch.ones((10, 10))\n",
|
"t = torch.ones((10, 10))\n",
|
||||||
"u = torch.ones((10, 10))\n",
|
"u = torch.ones((10, 10))\n",
|
||||||
@@ -17,8 +17,8 @@ from torch.utils.cpp_extension import load_inline, include_paths
|
|||||||
from torch_mlir.eager_mode import torch_mlir_tensor
|
from torch_mlir.eager_mode import torch_mlir_tensor
|
||||||
from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
|
from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
|
||||||
|
|
||||||
from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
|
from amdshark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
|
||||||
from shark.shark_runner import SharkEagerMode
|
from amdshark.amdshark_runner import AMDSharkEagerMode
|
||||||
|
|
||||||
|
|
||||||
def test_cpu():
|
def test_cpu():
|
||||||
@@ -85,7 +85,7 @@ def test_gpu():
|
|||||||
|
|
||||||
def test_python_mode_ref_backend():
|
def test_python_mode_ref_backend():
|
||||||
# hide this wherever you want?
|
# hide this wherever you want?
|
||||||
_ = SharkEagerMode("refbackend")
|
_ = AMDSharkEagerMode("refbackend")
|
||||||
|
|
||||||
t = torch.ones((10, 10), device="cpu")
|
t = torch.ones((10, 10), device="cpu")
|
||||||
u = torch.ones((10, 10), device="cpu")
|
u = torch.ones((10, 10), device="cpu")
|
||||||
@@ -103,7 +103,7 @@ def test_python_mode_ref_backend():
|
|||||||
|
|
||||||
def test_python_mode_iree_cpu():
|
def test_python_mode_iree_cpu():
|
||||||
# hide this wherever you want?
|
# hide this wherever you want?
|
||||||
_ = SharkEagerMode("cpu")
|
_ = AMDSharkEagerMode("cpu")
|
||||||
|
|
||||||
t = torch.ones((10, 10), device="cpu")
|
t = torch.ones((10, 10), device="cpu")
|
||||||
u = torch.ones((10, 10), device="cpu")
|
u = torch.ones((10, 10), device="cpu")
|
||||||
@@ -121,7 +121,7 @@ def test_python_mode_iree_cpu():
|
|||||||
|
|
||||||
|
|
||||||
def test_python_mode_iree_gpu():
|
def test_python_mode_iree_gpu():
|
||||||
_ = SharkEagerMode("gpu")
|
_ = AMDSharkEagerMode("gpu")
|
||||||
|
|
||||||
t = torch.ones((10, 10), device="cpu")
|
t = torch.ones((10, 10), device="cpu")
|
||||||
u = torch.ones((10, 10), device="cpu")
|
u = torch.ones((10, 10), device="cpu")
|
||||||
@@ -47,7 +47,7 @@ golden_probabilities = torch.nn.functional.softmax(
|
|||||||
|
|
||||||
golden_confidences = golden_confidences.numpy()
|
golden_confidences = golden_confidences.numpy()
|
||||||
|
|
||||||
from shark.torch_mlir_lockstep_tensor import TorchMLIRLockstepTensor
|
from amdshark.torch_mlir_lockstep_tensor import TorchMLIRLockstepTensor
|
||||||
|
|
||||||
input_detached_clone = input_batch.clone()
|
input_detached_clone = input_batch.clone()
|
||||||
eager_input_batch = TorchMLIRLockstepTensor(input_detached_clone)
|
eager_input_batch = TorchMLIRLockstepTensor(input_detached_clone)
|
||||||
@@ -62,7 +62,7 @@ probabilities = torch.nn.functional.softmax(
|
|||||||
torch.from_numpy(confidences), dim=0
|
torch.from_numpy(confidences), dim=0
|
||||||
).numpy()
|
).numpy()
|
||||||
|
|
||||||
print("The obtained result via shark is: ", confidences)
|
print("The obtained result via amdshark is: ", confidences)
|
||||||
print("The golden result is:", golden_confidences)
|
print("The golden result is:", golden_confidences)
|
||||||
|
|
||||||
np.testing.assert_allclose(
|
np.testing.assert_allclose(
|
||||||
@@ -3,7 +3,7 @@ import requests
|
|||||||
|
|
||||||
from transformers import CLIPProcessor, TFCLIPModel
|
from transformers import CLIPProcessor, TFCLIPModel
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
|
|
||||||
# Create a set of inputs
|
# Create a set of inputs
|
||||||
clip_vit_inputs = [
|
clip_vit_inputs = [
|
||||||
@@ -43,7 +43,7 @@ if __name__ == "__main__":
|
|||||||
padding=True,
|
padding=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
CLIPModule(),
|
CLIPModule(),
|
||||||
(
|
(
|
||||||
inputs["input_ids"],
|
inputs["input_ids"],
|
||||||
@@ -51,11 +51,11 @@ if __name__ == "__main__":
|
|||||||
inputs["pixel_values"],
|
inputs["pixel_values"],
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
shark_module.set_frontend("tensorflow")
|
amdshark_module.set_frontend("tensorflow")
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
|
|
||||||
print(
|
print(
|
||||||
shark_module.forward(
|
amdshark_module.forward(
|
||||||
(
|
(
|
||||||
inputs["input_ids"],
|
inputs["input_ids"],
|
||||||
inputs["attention_mask"],
|
inputs["attention_mask"],
|
||||||
@@ -7,7 +7,7 @@ import torch
|
|||||||
|
|
||||||
from torch.fx.experimental.proxy_tensor import make_fx
|
from torch.fx.experimental.proxy_tensor import make_fx
|
||||||
from torch._decomp import get_decompositions
|
from torch._decomp import get_decompositions
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
import torch_mlir
|
import torch_mlir
|
||||||
import tempfile
|
import tempfile
|
||||||
import functools
|
import functools
|
||||||
@@ -176,12 +176,12 @@ def compile_through_fx(model, inputs, mlir_loc=None):
|
|||||||
|
|
||||||
mlir_model = str(module)
|
mlir_model = str(module)
|
||||||
func_name = "forward"
|
func_name = "forward"
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_model, device=args.device, mlir_dialect="linalg"
|
mlir_model, device=args.device, mlir_dialect="linalg"
|
||||||
)
|
)
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
|
|
||||||
return shark_module
|
return amdshark_module
|
||||||
|
|
||||||
|
|
||||||
model_path = "models/RRDB_ESRGAN_x4.pth" # models/RRDB_ESRGAN_x4.pth OR models/RRDB_PSNR_x4.pth
|
model_path = "models/RRDB_ESRGAN_x4.pth" # models/RRDB_ESRGAN_x4.pth OR models/RRDB_PSNR_x4.pth
|
||||||
@@ -213,22 +213,22 @@ if __name__ == "__main__":
|
|||||||
img_LR = img_LR.to(device)
|
img_LR = img_LR.to(device)
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
shark_module = compile_through_fx(inference, img_LR)
|
amdshark_module = compile_through_fx(inference, img_LR)
|
||||||
shark_output = shark_module.forward((img_LR,))
|
amdshark_output = amdshark_module.forward((img_LR,))
|
||||||
shark_output = torch.from_numpy(shark_output)
|
amdshark_output = torch.from_numpy(amdshark_output)
|
||||||
shark_output = (
|
amdshark_output = (
|
||||||
shark_output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
|
amdshark_output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
|
||||||
)
|
)
|
||||||
esrgan_output = (
|
esrgan_output = (
|
||||||
model(img_LR).data.squeeze().float().cpu().clamp_(0, 1).numpy()
|
model(img_LR).data.squeeze().float().cpu().clamp_(0, 1).numpy()
|
||||||
)
|
)
|
||||||
# SHARK OUTPUT
|
# AMDSHARK OUTPUT
|
||||||
shark_output = np.transpose(shark_output[[2, 1, 0], :, :], (1, 2, 0))
|
amdshark_output = np.transpose(amdshark_output[[2, 1, 0], :, :], (1, 2, 0))
|
||||||
shark_output = (shark_output * 255.0).round()
|
amdshark_output = (amdshark_output * 255.0).round()
|
||||||
cv2.imwrite(
|
cv2.imwrite(
|
||||||
"OutputImages/{:s}_rlt_shark_output.png".format(base), shark_output
|
"OutputImages/{:s}_rlt_amdshark_output.png".format(base), amdshark_output
|
||||||
)
|
)
|
||||||
print("Generated SHARK's output")
|
print("Generated AMDSHARK's output")
|
||||||
# ESRGAN OUTPUT
|
# ESRGAN OUTPUT
|
||||||
esrgan_output = np.transpose(esrgan_output[[2, 1, 0], :, :], (1, 2, 0))
|
esrgan_output = np.transpose(esrgan_output[[2, 1, 0], :, :], (1, 2, 0))
|
||||||
esrgan_output = (esrgan_output * 255.0).round()
|
esrgan_output = (esrgan_output * 255.0).round()
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
||||||
import torch
|
import torch
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
from shark.shark_importer import SharkImporter
|
from amdshark.amdshark_importer import AMDSharkImporter
|
||||||
from iree.compiler import compile_str
|
from iree.compiler import compile_str
|
||||||
from iree import runtime as ireert
|
from iree import runtime as ireert
|
||||||
import os
|
import os
|
||||||
@@ -35,7 +35,7 @@ if __name__ == "__main__":
|
|||||||
return_tensors="pt",
|
return_tensors="pt",
|
||||||
)
|
)
|
||||||
inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
|
inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
|
||||||
mlir_importer = SharkImporter(
|
mlir_importer = AMDSharkImporter(
|
||||||
AlbertModule(),
|
AlbertModule(),
|
||||||
inputs,
|
inputs,
|
||||||
frontend="torch",
|
frontend="torch",
|
||||||
@@ -43,9 +43,9 @@ if __name__ == "__main__":
|
|||||||
minilm_mlir, func_name = mlir_importer.import_mlir(
|
minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||||
is_dynamic=False, tracing_required=True
|
is_dynamic=False, tracing_required=True
|
||||||
)
|
)
|
||||||
shark_module = SharkInference(minilm_mlir)
|
amdshark_module = AMDSharkInference(minilm_mlir)
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
token_logits = torch.tensor(shark_module.forward(inputs))
|
token_logits = torch.tensor(amdshark_module.forward(inputs))
|
||||||
mask_id = torch.where(
|
mask_id = torch.where(
|
||||||
encoded_inputs["input_ids"] == tokenizer.mask_token_id
|
encoded_inputs["input_ids"] == tokenizer.mask_token_id
|
||||||
)[1]
|
)[1]
|
||||||
@@ -69,7 +69,7 @@ if __name__ == "__main__":
|
|||||||
encoded_inputs["input_ids"],
|
encoded_inputs["input_ids"],
|
||||||
encoded_inputs["attention_mask"],
|
encoded_inputs["attention_mask"],
|
||||||
)
|
)
|
||||||
token_logits = torch.tensor(shark_module.forward(inputs))
|
token_logits = torch.tensor(amdshark_module.forward(inputs))
|
||||||
mask_id = torch.where(
|
mask_id = torch.where(
|
||||||
encoded_inputs["input_ids"] == tokenizer.mask_token_id
|
encoded_inputs["input_ids"] == tokenizer.mask_token_id
|
||||||
)[1]
|
)[1]
|
||||||
@@ -3,8 +3,8 @@ import requests
|
|||||||
|
|
||||||
from transformers import TFAutoModelForMaskedLM, AutoTokenizer
|
from transformers import TFAutoModelForMaskedLM, AutoTokenizer
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
from shark.shark_importer import SharkImporter
|
from amdshark.amdshark_importer import AMDSharkImporter
|
||||||
from iree.compiler import tf as tfc
|
from iree.compiler import tf as tfc
|
||||||
from iree.compiler import compile_str
|
from iree.compiler import compile_str
|
||||||
from iree import runtime as ireert
|
from iree import runtime as ireert
|
||||||
@@ -46,7 +46,7 @@ if __name__ == "__main__":
|
|||||||
return_tensors="tf",
|
return_tensors="tf",
|
||||||
)
|
)
|
||||||
inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
|
inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
|
||||||
mlir_importer = SharkImporter(
|
mlir_importer = AMDSharkImporter(
|
||||||
AlbertModule(),
|
AlbertModule(),
|
||||||
inputs,
|
inputs,
|
||||||
frontend="tf",
|
frontend="tf",
|
||||||
@@ -54,11 +54,11 @@ if __name__ == "__main__":
|
|||||||
minilm_mlir, func_name = mlir_importer.import_mlir(
|
minilm_mlir, func_name = mlir_importer.import_mlir(
|
||||||
is_dynamic=False, tracing_required=False
|
is_dynamic=False, tracing_required=False
|
||||||
)
|
)
|
||||||
shark_module = SharkInference(minilm_mlir, mlir_dialect="mhlo")
|
amdshark_module = AMDSharkInference(minilm_mlir, mlir_dialect="mhlo")
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
output_idx = 0
|
output_idx = 0
|
||||||
data_idx = 1
|
data_idx = 1
|
||||||
token_logits = shark_module.forward(inputs)[output_idx][data_idx]
|
token_logits = amdshark_module.forward(inputs)[output_idx][data_idx]
|
||||||
mask_id = np.where(
|
mask_id = np.where(
|
||||||
tf.squeeze(encoded_inputs["input_ids"]) == tokenizer.mask_token_id
|
tf.squeeze(encoded_inputs["input_ids"]) == tokenizer.mask_token_id
|
||||||
)
|
)
|
||||||
@@ -82,7 +82,7 @@ if __name__ == "__main__":
|
|||||||
encoded_inputs["input_ids"],
|
encoded_inputs["input_ids"],
|
||||||
encoded_inputs["attention_mask"],
|
encoded_inputs["attention_mask"],
|
||||||
)
|
)
|
||||||
token_logits = shark_module.forward(inputs)[output_idx][data_idx]
|
token_logits = amdshark_module.forward(inputs)[output_idx][data_idx]
|
||||||
mask_id = np.where(
|
mask_id = np.where(
|
||||||
tf.squeeze(encoded_inputs["input_ids"])
|
tf.squeeze(encoded_inputs["input_ids"])
|
||||||
== tokenizer.mask_token_id
|
== tokenizer.mask_token_id
|
||||||
14
amdshark/examples/amdshark_inference/bloom_tank.py
Normal file
14
amdshark/examples/amdshark_inference/bloom_tank.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
|
from amdshark.amdshark_downloader import download_model
|
||||||
|
|
||||||
|
mlir_model, func_name, inputs, golden_out = download_model(
|
||||||
|
"bloom", frontend="torch"
|
||||||
|
)
|
||||||
|
|
||||||
|
amdshark_module = AMDSharkInference(
|
||||||
|
mlir_model, device="cpu", mlir_dialect="tm_tensor"
|
||||||
|
)
|
||||||
|
amdshark_module.compile()
|
||||||
|
result = amdshark_module.forward(inputs)
|
||||||
|
print("The obtained result via amdshark is: ", result)
|
||||||
|
print("The golden result is:", golden_out)
|
||||||
@@ -3,7 +3,7 @@ import requests
|
|||||||
|
|
||||||
from transformers import GPT2Tokenizer, TFGPT2Model
|
from transformers import GPT2Tokenizer, TFGPT2Model
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
|
|
||||||
# Create a set of inputs
|
# Create a set of inputs
|
||||||
gpt2_inputs = [
|
gpt2_inputs = [
|
||||||
@@ -30,11 +30,11 @@ if __name__ == "__main__":
|
|||||||
text = "I love the distilled version of models."
|
text = "I love the distilled version of models."
|
||||||
|
|
||||||
inputs = tokenizer(text, return_tensors="tf")
|
inputs = tokenizer(text, return_tensors="tf")
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
GPT2Module(), (inputs["input_ids"], inputs["attention_mask"])
|
GPT2Module(), (inputs["input_ids"], inputs["attention_mask"])
|
||||||
)
|
)
|
||||||
shark_module.set_frontend("tensorflow")
|
amdshark_module.set_frontend("tensorflow")
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
print(
|
print(
|
||||||
shark_module.forward((inputs["input_ids"], inputs["attention_mask"]))
|
amdshark_module.forward((inputs["input_ids"], inputs["attention_mask"]))
|
||||||
)
|
)
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
# SHARK LLaMA
|
# AMDSHARK LLaMA
|
||||||
|
|
||||||
## TORCH-MLIR Version
|
## TORCH-MLIR Version
|
||||||
|
|
||||||
@@ -14,5 +14,5 @@ git clone https://github.com/nod-ai/llama.git
|
|||||||
Then in this repository
|
Then in this repository
|
||||||
```
|
```
|
||||||
pip install -e .
|
pip install -e .
|
||||||
python llama/shark_model.py
|
python llama/amdshark_model.py
|
||||||
```
|
```
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
import torch
|
import torch
|
||||||
import torch_mlir
|
import torch_mlir
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
from shark.shark_compile import shark_compile_through_fx
|
from amdshark.amdshark_compile import amdshark_compile_through_fx
|
||||||
from MEGABYTE_pytorch import MEGABYTE
|
from MEGABYTE_pytorch import MEGABYTE
|
||||||
|
|
||||||
import os
|
import os
|
||||||
@@ -38,10 +38,10 @@ inputs = [torch.randint(0, 16000, (1, 1024, 4))]
|
|||||||
|
|
||||||
# CURRENTLY IT BAILS OUT HERE BECAUSE OF MISSING OP LOWERINGS :-
|
# CURRENTLY IT BAILS OUT HERE BECAUSE OF MISSING OP LOWERINGS :-
|
||||||
# 1. aten.alias
|
# 1. aten.alias
|
||||||
shark_module, _ = shark_compile_through_fx(
|
amdshark_module, _ = amdshark_compile_through_fx(
|
||||||
model=megaModel,
|
model=megaModel,
|
||||||
inputs=inputs,
|
inputs=inputs,
|
||||||
extended_model_name="mega_shark",
|
extended_model_name="mega_amdshark",
|
||||||
is_f16=False,
|
is_f16=False,
|
||||||
f16_input_mask=None,
|
f16_input_mask=None,
|
||||||
save_dir=os.getcwd(),
|
save_dir=os.getcwd(),
|
||||||
@@ -59,8 +59,8 @@ def print_output_info(output, msg):
|
|||||||
print("\n\t", output.shape)
|
print("\n\t", output.shape)
|
||||||
|
|
||||||
|
|
||||||
ans = shark_module("forward", inputs)
|
ans = amdshark_module("forward", inputs)
|
||||||
print_output_info(torch.from_numpy(ans), "SHARK's output")
|
print_output_info(torch.from_numpy(ans), "AMDSHARK's output")
|
||||||
|
|
||||||
ans = megaModel.forward(*inputs)
|
ans = megaModel.forward(*inputs)
|
||||||
print_output_info(ans, "ORIGINAL Model's output")
|
print_output_info(ans, "ORIGINAL Model's output")
|
||||||
@@ -68,5 +68,5 @@ print_output_info(ans, "ORIGINAL Model's output")
|
|||||||
# and sample from the logits accordingly
|
# and sample from the logits accordingly
|
||||||
# or you can use the generate function
|
# or you can use the generate function
|
||||||
|
|
||||||
# NEED TO LOOK AT THIS LATER IF REQUIRED IN SHARK.
|
# NEED TO LOOK AT THIS LATER IF REQUIRED IN AMDSHARK.
|
||||||
# sampled = model.generate(temperature = 0.9, filter_thres = 0.9) # (1, 1024, 4)
|
# sampled = model.generate(temperature = 0.9, filter_thres = 0.9) # (1, 1024, 4)
|
||||||
31
amdshark/examples/amdshark_inference/mhlo_example.py
Normal file
31
amdshark/examples/amdshark_inference/mhlo_example.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
mhlo_ir = r"""builtin.module {
|
||||||
|
func.func @forward(%arg0: tensor<1x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4x4xf32> {
|
||||||
|
%0 = chlo.broadcast_add %arg0, %arg1 : (tensor<1x4xf32>, tensor<4x1xf32>) -> tensor<4x4xf32>
|
||||||
|
%1 = "mhlo.abs"(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
|
||||||
|
return %1 : tensor<4x4xf32>
|
||||||
|
}
|
||||||
|
}"""
|
||||||
|
|
||||||
|
arg0 = np.ones((1, 4)).astype(np.float32)
|
||||||
|
arg1 = np.ones((4, 1)).astype(np.float32)
|
||||||
|
|
||||||
|
print("Running amdshark on cpu backend")
|
||||||
|
amdshark_module = AMDSharkInference(mhlo_ir, device="cpu", mlir_dialect="mhlo")
|
||||||
|
|
||||||
|
# Generate the random inputs and feed into the graph.
|
||||||
|
x = amdshark_module.generate_random_inputs()
|
||||||
|
amdshark_module.compile()
|
||||||
|
print(amdshark_module.forward(x))
|
||||||
|
|
||||||
|
print("Running amdshark on cuda backend")
|
||||||
|
amdshark_module = AMDSharkInference(mhlo_ir, device="cuda", mlir_dialect="mhlo")
|
||||||
|
amdshark_module.compile()
|
||||||
|
print(amdshark_module.forward(x))
|
||||||
|
|
||||||
|
print("Running amdshark on vulkan backend")
|
||||||
|
amdshark_module = AMDSharkInference(mhlo_ir, device="vulkan", mlir_dialect="mhlo")
|
||||||
|
amdshark_module.compile()
|
||||||
|
print(amdshark_module.forward(x))
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
import torch
|
import torch
|
||||||
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
|
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
|
tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
|
||||||
@@ -23,13 +23,13 @@ class MiniLMSequenceClassification(torch.nn.Module):
|
|||||||
|
|
||||||
test_input = torch.randint(2, (1, 128))
|
test_input = torch.randint(2, (1, 128))
|
||||||
|
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
MiniLMSequenceClassification(),
|
MiniLMSequenceClassification(),
|
||||||
(test_input,),
|
(test_input,),
|
||||||
jit_trace=True,
|
jit_trace=True,
|
||||||
benchmark_mode=True,
|
benchmark_mode=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
shark_module.forward((test_input,))
|
amdshark_module.forward((test_input,))
|
||||||
shark_module.benchmark_all((test_input,))
|
amdshark_module.benchmark_all((test_input,))
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from transformers import BertModel, BertTokenizer, TFBertModel
|
from transformers import BertModel, BertTokenizer, TFBertModel
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
|
|
||||||
MAX_SEQUENCE_LENGTH = 512
|
MAX_SEQUENCE_LENGTH = 512
|
||||||
BATCH_SIZE = 1
|
BATCH_SIZE = 1
|
||||||
@@ -53,9 +53,9 @@ if __name__ == "__main__":
|
|||||||
encoded_input["attention_mask"],
|
encoded_input["attention_mask"],
|
||||||
encoded_input["token_type_ids"],
|
encoded_input["token_type_ids"],
|
||||||
)
|
)
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
BertModule(), test_input, benchmark_mode=True
|
BertModule(), test_input, benchmark_mode=True
|
||||||
)
|
)
|
||||||
shark_module.set_frontend("tensorflow")
|
amdshark_module.set_frontend("tensorflow")
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
shark_module.benchmark_all(test_input)
|
amdshark_module.benchmark_all(test_input)
|
||||||
@@ -3,7 +3,7 @@ import torch
|
|||||||
import jax
|
import jax
|
||||||
from typing import Union, Dict, List, Any
|
from typing import Union, Dict, List, Any
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
import io
|
import io
|
||||||
|
|
||||||
NumpyTree = Union[np.ndarray, Dict[str, np.ndarray], List[np.ndarray]]
|
NumpyTree = Union[np.ndarray, Dict[str, np.ndarray], List[np.ndarray]]
|
||||||
@@ -60,11 +60,11 @@ jax_model = get_jax_model()
|
|||||||
mlir = export_jax_to_mlir(jax_model, sample_input)
|
mlir = export_jax_to_mlir(jax_model, sample_input)
|
||||||
|
|
||||||
# Compile and load module.
|
# Compile and load module.
|
||||||
shark_inference = SharkInference(mlir_module=mlir, mlir_dialect="mhlo")
|
amdshark_inference = AMDSharkInference(mlir_module=mlir, mlir_dialect="mhlo")
|
||||||
shark_inference.compile()
|
amdshark_inference.compile()
|
||||||
|
|
||||||
# Run main function.
|
# Run main function.
|
||||||
result = shark_inference("main", jax.tree_util.tree_flatten(sample_input)[0])
|
result = amdshark_inference("main", jax.tree_util.tree_flatten(sample_input)[0])
|
||||||
|
|
||||||
# Run JAX model.
|
# Run JAX model.
|
||||||
reference_result = jax.tree_util.tree_flatten(jax_model(**sample_input))[0]
|
reference_result = jax.tree_util.tree_flatten(jax_model(**sample_input))[0]
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
flax
|
flax
|
||||||
jax[cpu]
|
jax[cpu]
|
||||||
nodai-SHARK
|
nodai-AMDSHARK
|
||||||
orbax
|
orbax
|
||||||
transformers
|
transformers
|
||||||
torch
|
torch
|
||||||
23
amdshark/examples/amdshark_inference/minilm_jit.py
Normal file
23
amdshark/examples/amdshark_inference/minilm_jit.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
|
from amdshark.amdshark_downloader import download_model
|
||||||
|
|
||||||
|
|
||||||
|
mlir_model, func_name, inputs, golden_out = download_model(
|
||||||
|
"microsoft/MiniLM-L12-H384-uncased",
|
||||||
|
frontend="torch",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
amdshark_module = AMDSharkInference(mlir_model, device="cpu", mlir_dialect="linalg")
|
||||||
|
amdshark_module.compile()
|
||||||
|
result = amdshark_module.forward(inputs)
|
||||||
|
print("The obtained result via amdshark is: ", result)
|
||||||
|
print("The golden result is:", golden_out)
|
||||||
|
|
||||||
|
|
||||||
|
# Let's generate random inputs, currently supported
|
||||||
|
# for static models.
|
||||||
|
rand_inputs = amdshark_module.generate_random_inputs()
|
||||||
|
rand_results = amdshark_module.forward(rand_inputs)
|
||||||
|
|
||||||
|
print("Running amdshark_module with random_inputs is: ", rand_results)
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from transformers import BertModel, BertTokenizer, TFBertModel
|
from transformers import BertModel, BertTokenizer, TFBertModel
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
|
|
||||||
MAX_SEQUENCE_LENGTH = 512
|
MAX_SEQUENCE_LENGTH = 512
|
||||||
BATCH_SIZE = 1
|
BATCH_SIZE = 1
|
||||||
@@ -48,7 +48,7 @@ if __name__ == "__main__":
|
|||||||
tf.convert_to_tensor(encoded_input[key]), 0
|
tf.convert_to_tensor(encoded_input[key]), 0
|
||||||
)
|
)
|
||||||
|
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
BertModule(),
|
BertModule(),
|
||||||
(
|
(
|
||||||
encoded_input["input_ids"],
|
encoded_input["input_ids"],
|
||||||
@@ -56,11 +56,11 @@ if __name__ == "__main__":
|
|||||||
encoded_input["token_type_ids"],
|
encoded_input["token_type_ids"],
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
shark_module.set_frontend("tensorflow")
|
amdshark_module.set_frontend("tensorflow")
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
|
|
||||||
print(
|
print(
|
||||||
shark_module.forward(
|
amdshark_module.forward(
|
||||||
(
|
(
|
||||||
encoded_input["input_ids"],
|
encoded_input["input_ids"],
|
||||||
encoded_input["attention_mask"],
|
encoded_input["attention_mask"],
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
import torch
|
import torch
|
||||||
import torchvision.models as models
|
import torchvision.models as models
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
from shark.shark_importer import SharkImporter
|
from amdshark.amdshark_importer import AMDSharkImporter
|
||||||
|
|
||||||
torch.hub.list("zhanghang1989/ResNeSt", force_reload=True)
|
torch.hub.list("zhanghang1989/ResNeSt", force_reload=True)
|
||||||
|
|
||||||
@@ -21,7 +21,7 @@ class ResnestModule(torch.nn.Module):
|
|||||||
input = torch.randn(1, 3, 224, 224)
|
input = torch.randn(1, 3, 224, 224)
|
||||||
|
|
||||||
|
|
||||||
mlir_importer = SharkImporter(
|
mlir_importer = AMDSharkImporter(
|
||||||
ResnestModule(),
|
ResnestModule(),
|
||||||
(input,),
|
(input,),
|
||||||
frontend="torch",
|
frontend="torch",
|
||||||
@@ -33,7 +33,7 @@ mlir_importer = SharkImporter(
|
|||||||
|
|
||||||
print(golden_out)
|
print(golden_out)
|
||||||
|
|
||||||
shark_module = SharkInference(vision_mlir, mlir_dialect="linalg")
|
amdshark_module = AMDSharkInference(vision_mlir, mlir_dialect="linalg")
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
result = shark_module.forward((input,))
|
result = amdshark_module.forward((input,))
|
||||||
print("Obtained result", result)
|
print("Obtained result", result)
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
from shark.parser import shark_args
|
from amdshark.parser import amdshark_args
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -49,21 +49,21 @@ module = torch_mlir.compile(
|
|||||||
mlir_model = module
|
mlir_model = module
|
||||||
func_name = "forward"
|
func_name = "forward"
|
||||||
|
|
||||||
shark_module = SharkInference(mlir_model, device="cuda", mlir_dialect="linalg")
|
amdshark_module = AMDSharkInference(mlir_model, device="cuda", mlir_dialect="linalg")
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
|
|
||||||
|
|
||||||
def shark_result(x):
|
def amdshark_result(x):
|
||||||
x_ny = x.cpu().detach().numpy()
|
x_ny = x.cpu().detach().numpy()
|
||||||
inputs = (x_ny,)
|
inputs = (x_ny,)
|
||||||
result = shark_module.forward(inputs)
|
result = amdshark_module.forward(inputs)
|
||||||
return torch.from_numpy(result)
|
return torch.from_numpy(result)
|
||||||
|
|
||||||
|
|
||||||
observed_out = shark_result(test_input_fp16)
|
observed_out = amdshark_result(test_input_fp16)
|
||||||
|
|
||||||
print("Golden result:", actual_out_fp16)
|
print("Golden result:", actual_out_fp16)
|
||||||
print("SHARK result:", observed_out)
|
print("AMDSHARK result:", observed_out)
|
||||||
|
|
||||||
actual_out_fp16 = actual_out_fp16.to(device=torch.device("cpu"))
|
actual_out_fp16 = actual_out_fp16.to(device=torch.device("cpu"))
|
||||||
|
|
||||||
@@ -4,8 +4,8 @@ import torch
|
|||||||
import torchvision.models as models
|
import torchvision.models as models
|
||||||
from torchvision import transforms
|
from torchvision import transforms
|
||||||
import sys
|
import sys
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
from shark.shark_downloader import download_model
|
from amdshark.amdshark_downloader import download_model
|
||||||
|
|
||||||
|
|
||||||
################################## Preprocessing inputs and model ############
|
################################## Preprocessing inputs and model ############
|
||||||
@@ -70,13 +70,13 @@ mlir_model, func_name, inputs, golden_out = download_model(
|
|||||||
"resnet50", frontend="torch"
|
"resnet50", frontend="torch"
|
||||||
)
|
)
|
||||||
|
|
||||||
shark_module = SharkInference(mlir_model, mlir_dialect="linalg")
|
amdshark_module = AMDSharkInference(mlir_model, mlir_dialect="linalg")
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
path = shark_module.save_module()
|
path = amdshark_module.save_module()
|
||||||
shark_module.load_module(path)
|
amdshark_module.load_module(path)
|
||||||
result = shark_module("forward", (img.detach().numpy(),))
|
result = amdshark_module("forward", (img.detach().numpy(),))
|
||||||
|
|
||||||
print("The top 3 results obtained via shark_runner is:")
|
print("The top 3 results obtained via amdshark_runner is:")
|
||||||
print(top3_possibilities(torch.from_numpy(result)))
|
print(top3_possibilities(torch.from_numpy(result)))
|
||||||
|
|
||||||
print()
|
print()
|
||||||
@@ -34,8 +34,8 @@ import subprocess
|
|||||||
|
|
||||||
from torch.fx.experimental.proxy_tensor import make_fx
|
from torch.fx.experimental.proxy_tensor import make_fx
|
||||||
from torch._decomp import get_decompositions
|
from torch._decomp import get_decompositions
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
from shark.shark_downloader import download_public_file
|
from amdshark.amdshark_downloader import download_public_file
|
||||||
from transformers import (
|
from transformers import (
|
||||||
BloomTokenizerFast,
|
BloomTokenizerFast,
|
||||||
BloomForSequenceClassification,
|
BloomForSequenceClassification,
|
||||||
@@ -77,13 +77,13 @@ class ShardedBloom:
|
|||||||
module = f_.read()
|
module = f_.read()
|
||||||
f_.close()
|
f_.close()
|
||||||
module = bytes(module, "utf-8")
|
module = bytes(module, "utf-8")
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
module,
|
module,
|
||||||
device=device,
|
device=device,
|
||||||
mlir_dialect="tm_tensor",
|
mlir_dialect="tm_tensor",
|
||||||
device_idx=device_idx,
|
device_idx=device_idx,
|
||||||
)
|
)
|
||||||
shark_module.save_module(
|
amdshark_module.save_module(
|
||||||
module_name=f"{self.src_folder}/{layer_name}",
|
module_name=f"{self.src_folder}/{layer_name}",
|
||||||
extra_args=[
|
extra_args=[
|
||||||
"--iree-vm-bytecode-module-output-format=flatbuffer-binary",
|
"--iree-vm-bytecode-module-output-format=flatbuffer-binary",
|
||||||
@@ -92,14 +92,14 @@ class ShardedBloom:
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
"",
|
"",
|
||||||
device=device,
|
device=device,
|
||||||
mlir_dialect="tm_tensor",
|
mlir_dialect="tm_tensor",
|
||||||
device_idx=device_idx,
|
device_idx=device_idx,
|
||||||
)
|
)
|
||||||
|
|
||||||
return shark_module
|
return amdshark_module
|
||||||
|
|
||||||
def init_layers(self, device, replace=False, device_idx=[0]):
|
def init_layers(self, device, replace=False, device_idx=[0]):
|
||||||
if device_idx is not None:
|
if device_idx is not None:
|
||||||
@@ -311,7 +311,7 @@ def _prepare_attn_mask(
|
|||||||
|
|
||||||
def download_model(destination_folder, model_name):
|
def download_model(destination_folder, model_name):
|
||||||
download_public_file(
|
download_public_file(
|
||||||
f"gs://shark_tank/sharded_bloom/{model_name}/", destination_folder
|
f"gs://amdshark_tank/sharded_bloom/{model_name}/", destination_folder
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -2,7 +2,7 @@ import sys
|
|||||||
import os
|
import os
|
||||||
from transformers import AutoTokenizer, AutoModelForCausalLM, BloomConfig
|
from transformers import AutoTokenizer, AutoModelForCausalLM, BloomConfig
|
||||||
import re
|
import re
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
@@ -142,7 +142,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
mlir_str = bytes(mlir_str, "utf-8")
|
mlir_str = bytes(mlir_str, "utf-8")
|
||||||
|
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_str,
|
mlir_str,
|
||||||
device="cpu",
|
device="cpu",
|
||||||
mlir_dialect="tm_tensor",
|
mlir_dialect="tm_tensor",
|
||||||
@@ -150,7 +150,7 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
|
|
||||||
if will_compile:
|
if will_compile:
|
||||||
shark_module.save_module(
|
amdshark_module.save_module(
|
||||||
module_name=f"{working_dir}/word_embeddings",
|
module_name=f"{working_dir}/word_embeddings",
|
||||||
extra_args=[
|
extra_args=[
|
||||||
"--iree-vm-bytecode-module-output-format=flatbuffer-binary",
|
"--iree-vm-bytecode-module-output-format=flatbuffer-binary",
|
||||||
@@ -159,8 +159,8 @@ if __name__ == "__main__":
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
shark_module.load_module(f"{working_dir}/word_embeddings.vmfb")
|
amdshark_module.load_module(f"{working_dir}/word_embeddings.vmfb")
|
||||||
input_embeds = shark_module(
|
input_embeds = amdshark_module(
|
||||||
inputs=(input_ids,), function_name="forward"
|
inputs=(input_ids,), function_name="forward"
|
||||||
)
|
)
|
||||||
input_embeds = torch.tensor(input_embeds).float()
|
input_embeds = torch.tensor(input_embeds).float()
|
||||||
@@ -175,7 +175,7 @@ if __name__ == "__main__":
|
|||||||
mlir_str = f.read()
|
mlir_str = f.read()
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_str,
|
mlir_str,
|
||||||
device="cpu",
|
device="cpu",
|
||||||
mlir_dialect="tm_tensor",
|
mlir_dialect="tm_tensor",
|
||||||
@@ -183,7 +183,7 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
|
|
||||||
if will_compile:
|
if will_compile:
|
||||||
shark_module.save_module(
|
amdshark_module.save_module(
|
||||||
module_name=f"{working_dir}/word_embeddings_layernorm",
|
module_name=f"{working_dir}/word_embeddings_layernorm",
|
||||||
extra_args=[
|
extra_args=[
|
||||||
"--iree-vm-bytecode-module-output-format=flatbuffer-binary",
|
"--iree-vm-bytecode-module-output-format=flatbuffer-binary",
|
||||||
@@ -192,10 +192,10 @@ if __name__ == "__main__":
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
shark_module.load_module(
|
amdshark_module.load_module(
|
||||||
f"{working_dir}/word_embeddings_layernorm.vmfb"
|
f"{working_dir}/word_embeddings_layernorm.vmfb"
|
||||||
)
|
)
|
||||||
hidden_states = shark_module(
|
hidden_states = amdshark_module(
|
||||||
inputs=(input_embeds,), function_name="forward"
|
inputs=(input_embeds,), function_name="forward"
|
||||||
)
|
)
|
||||||
hidden_states = torch.tensor(hidden_states).float()
|
hidden_states = torch.tensor(hidden_states).float()
|
||||||
@@ -243,7 +243,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
mlir_str = bytes(mlir_str, "utf-8")
|
mlir_str = bytes(mlir_str, "utf-8")
|
||||||
|
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_str,
|
mlir_str,
|
||||||
device=device,
|
device=device,
|
||||||
mlir_dialect="tm_tensor",
|
mlir_dialect="tm_tensor",
|
||||||
@@ -251,7 +251,7 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
|
|
||||||
if will_compile:
|
if will_compile:
|
||||||
shark_module.save_module(
|
amdshark_module.save_module(
|
||||||
module_name=f"{working_dir}/bloom_block_{layer_name}",
|
module_name=f"{working_dir}/bloom_block_{layer_name}",
|
||||||
extra_args=[
|
extra_args=[
|
||||||
"--iree-vm-bytecode-module-output-format=flatbuffer-binary",
|
"--iree-vm-bytecode-module-output-format=flatbuffer-binary",
|
||||||
@@ -260,11 +260,11 @@ if __name__ == "__main__":
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
shark_module.load_module(
|
amdshark_module.load_module(
|
||||||
f"{working_dir}/bloom_block_{layer_name}.vmfb"
|
f"{working_dir}/bloom_block_{layer_name}.vmfb"
|
||||||
)
|
)
|
||||||
|
|
||||||
output = shark_module(
|
output = amdshark_module(
|
||||||
inputs=(
|
inputs=(
|
||||||
hidden_states.detach().numpy(),
|
hidden_states.detach().numpy(),
|
||||||
alibi.detach().numpy(),
|
alibi.detach().numpy(),
|
||||||
@@ -290,7 +290,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
mlir_str = bytes(mlir_str, "utf-8")
|
mlir_str = bytes(mlir_str, "utf-8")
|
||||||
|
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_str,
|
mlir_str,
|
||||||
device="cpu",
|
device="cpu",
|
||||||
mlir_dialect="tm_tensor",
|
mlir_dialect="tm_tensor",
|
||||||
@@ -298,7 +298,7 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
|
|
||||||
if will_compile:
|
if will_compile:
|
||||||
shark_module.save_module(
|
amdshark_module.save_module(
|
||||||
module_name=f"{working_dir}/ln_f",
|
module_name=f"{working_dir}/ln_f",
|
||||||
extra_args=[
|
extra_args=[
|
||||||
"--iree-vm-bytecode-module-output-format=flatbuffer-binary",
|
"--iree-vm-bytecode-module-output-format=flatbuffer-binary",
|
||||||
@@ -307,11 +307,11 @@ if __name__ == "__main__":
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
shark_module.load_module(f"{working_dir}/ln_f.vmfb")
|
amdshark_module.load_module(f"{working_dir}/ln_f.vmfb")
|
||||||
|
|
||||||
hidden_states = torch.load(f"{working_dir}/hidden_states_{n_layer}.pt")
|
hidden_states = torch.load(f"{working_dir}/hidden_states_{n_layer}.pt")
|
||||||
|
|
||||||
hidden_states = shark_module(
|
hidden_states = amdshark_module(
|
||||||
inputs=(hidden_states,), function_name="forward"
|
inputs=(hidden_states,), function_name="forward"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -347,7 +347,7 @@ if __name__ == "__main__":
|
|||||||
logits = lm_head(torch.tensor(hidden_states).float())
|
logits = lm_head(torch.tensor(hidden_states).float())
|
||||||
|
|
||||||
else:
|
else:
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_str,
|
mlir_str,
|
||||||
device="cpu",
|
device="cpu",
|
||||||
mlir_dialect="tm_tensor",
|
mlir_dialect="tm_tensor",
|
||||||
@@ -355,7 +355,7 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
|
|
||||||
if will_compile:
|
if will_compile:
|
||||||
shark_module.save_module(
|
amdshark_module.save_module(
|
||||||
module_name=f"{working_dir}/lm_head",
|
module_name=f"{working_dir}/lm_head",
|
||||||
extra_args=[
|
extra_args=[
|
||||||
"--iree-vm-bytecode-module-output-format=flatbuffer-binary",
|
"--iree-vm-bytecode-module-output-format=flatbuffer-binary",
|
||||||
@@ -364,9 +364,9 @@ if __name__ == "__main__":
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
shark_module.load_module(f"{working_dir}/lm_head.vmfb")
|
amdshark_module.load_module(f"{working_dir}/lm_head.vmfb")
|
||||||
|
|
||||||
logits = shark_module(
|
logits = amdshark_module(
|
||||||
inputs=(hidden_states,), function_name="forward"
|
inputs=(hidden_states,), function_name="forward"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -52,8 +52,8 @@ import sys
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
from shark.shark_importer import SharkImporter
|
from amdshark.amdshark_importer import AMDSharkImporter
|
||||||
|
|
||||||
|
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
@@ -349,7 +349,7 @@ input_dlrm = (dense_inp, vs0, *vsi)
|
|||||||
|
|
||||||
golden_output = dlrm_model(dense_inp, vs0, *vsi)
|
golden_output = dlrm_model(dense_inp, vs0, *vsi)
|
||||||
|
|
||||||
mlir_importer = SharkImporter(
|
mlir_importer = AMDSharkImporter(
|
||||||
dlrm_model,
|
dlrm_model,
|
||||||
input_dlrm,
|
input_dlrm,
|
||||||
frontend="torch",
|
frontend="torch",
|
||||||
@@ -359,11 +359,11 @@ mlir_importer = SharkImporter(
|
|||||||
tracing_required=True
|
tracing_required=True
|
||||||
)
|
)
|
||||||
|
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
dlrm_mlir, device="vulkan", mlir_dialect="linalg"
|
dlrm_mlir, device="vulkan", mlir_dialect="linalg"
|
||||||
)
|
)
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
result = shark_module.forward(input_dlrm)
|
result = amdshark_module.forward(input_dlrm)
|
||||||
np.testing.assert_allclose(
|
np.testing.assert_allclose(
|
||||||
golden_output.detach().numpy(), result, rtol=1e-02, atol=1e-03
|
golden_output.detach().numpy(), result, rtol=1e-02, atol=1e-03
|
||||||
)
|
)
|
||||||
@@ -15,8 +15,8 @@ from torchrec.models.dlrm import (
|
|||||||
SparseArch,
|
SparseArch,
|
||||||
OverArch,
|
OverArch,
|
||||||
)
|
)
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
from shark.shark_importer import SharkImporter
|
from amdshark.amdshark_importer import AMDSharkImporter
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
@@ -70,7 +70,7 @@ def to_list(key_jagged, combined_keys):
|
|||||||
return combined_list
|
return combined_list
|
||||||
|
|
||||||
|
|
||||||
class SparseArchShark(nn.Module):
|
class SparseArchAMDShark(nn.Module):
|
||||||
def create_emb(self, embedding_dim, num_embeddings_list):
|
def create_emb(self, embedding_dim, num_embeddings_list):
|
||||||
embedding_list = nn.ModuleList()
|
embedding_list = nn.ModuleList()
|
||||||
for i in range(0, num_embeddings_list.size):
|
for i in range(0, num_embeddings_list.size):
|
||||||
@@ -91,7 +91,7 @@ class SparseArchShark(nn.Module):
|
|||||||
total_features,
|
total_features,
|
||||||
num_embeddings_list,
|
num_embeddings_list,
|
||||||
):
|
):
|
||||||
super(SparseArchShark, self).__init__()
|
super(SparseArchAMDShark, self).__init__()
|
||||||
self.embedding_dim = embedding_dim
|
self.embedding_dim = embedding_dim
|
||||||
self.num_features = total_features
|
self.num_features = total_features
|
||||||
self.embedding_list = self.create_emb(
|
self.embedding_list = self.create_emb(
|
||||||
@@ -150,7 +150,7 @@ def test_sparse_arch() -> None:
|
|||||||
),
|
),
|
||||||
offsets=offsets,
|
offsets=offsets,
|
||||||
)
|
)
|
||||||
sparse_archi = SparseArchShark(D, 3, np.array([10, 10]))
|
sparse_archi = SparseArchAMDShark(D, 3, np.array([10, 10]))
|
||||||
sparse_archi.embedding_list[0].weight = w1
|
sparse_archi.embedding_list[0].weight = w1
|
||||||
sparse_archi.embedding_list[1].weight = w2
|
sparse_archi.embedding_list[1].weight = w2
|
||||||
inputs = to_list(features, {"f1": 0, "f3": 0, "f2": 1})
|
inputs = to_list(features, {"f1": 0, "f3": 0, "f2": 1})
|
||||||
@@ -169,7 +169,7 @@ def test_sparse_arch() -> None:
|
|||||||
test_sparse_arch()
|
test_sparse_arch()
|
||||||
|
|
||||||
|
|
||||||
class DLRMShark(nn.Module):
|
class DLRMAMDShark(nn.Module):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
embedding_dim,
|
embedding_dim,
|
||||||
@@ -181,7 +181,7 @@ class DLRMShark(nn.Module):
|
|||||||
) -> None:
|
) -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.sparse_arch: SparseArchShark = SparseArchShark(
|
self.sparse_arch: SparseArchAMDShark = SparseArchAMDShark(
|
||||||
embedding_dim, total_features, num_embeddings_list
|
embedding_dim, total_features, num_embeddings_list
|
||||||
)
|
)
|
||||||
num_sparse_features: int = total_features
|
num_sparse_features: int = total_features
|
||||||
@@ -250,7 +250,7 @@ def test_dlrm() -> None:
|
|||||||
dense_arch_layer_sizes=[20, D],
|
dense_arch_layer_sizes=[20, D],
|
||||||
over_arch_layer_sizes=[5, 1],
|
over_arch_layer_sizes=[5, 1],
|
||||||
)
|
)
|
||||||
sparse_nn_nod = DLRMShark(
|
sparse_nn_nod = DLRMAMDShark(
|
||||||
embedding_dim=8,
|
embedding_dim=8,
|
||||||
total_features=3,
|
total_features=3,
|
||||||
num_embeddings_list=np.array([100, 100]),
|
num_embeddings_list=np.array([100, 100]),
|
||||||
@@ -283,7 +283,7 @@ def test_dlrm() -> None:
|
|||||||
# print(logits_nod)
|
# print(logits_nod)
|
||||||
|
|
||||||
# Import the module and print.
|
# Import the module and print.
|
||||||
mlir_importer = SharkImporter(
|
mlir_importer = AMDSharkImporter(
|
||||||
sparse_nn_nod,
|
sparse_nn_nod,
|
||||||
(dense_features, *x),
|
(dense_features, *x),
|
||||||
frontend="torch",
|
frontend="torch",
|
||||||
@@ -293,11 +293,11 @@ def test_dlrm() -> None:
|
|||||||
tracing_required=True
|
tracing_required=True
|
||||||
)
|
)
|
||||||
|
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
dlrm_mlir, device="cpu", mlir_dialect="linalg"
|
dlrm_mlir, device="cpu", mlir_dialect="linalg"
|
||||||
)
|
)
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
result = shark_module.forward(inputs)
|
result = amdshark_module.forward(inputs)
|
||||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||||
|
|
||||||
torch.allclose(
|
torch.allclose(
|
||||||
@@ -3,7 +3,7 @@ import requests
|
|||||||
|
|
||||||
from transformers import T5Tokenizer, TFT5Model
|
from transformers import T5Tokenizer, TFT5Model
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
|
|
||||||
# Create a set of inputs
|
# Create a set of inputs
|
||||||
t5_inputs = [
|
t5_inputs = [
|
||||||
@@ -29,7 +29,7 @@ if __name__ == "__main__":
|
|||||||
text = "I love the distilled version of models."
|
text = "I love the distilled version of models."
|
||||||
inputs = tokenizer(text, return_tensors="tf").input_ids
|
inputs = tokenizer(text, return_tensors="tf").input_ids
|
||||||
|
|
||||||
shark_module = SharkInference(T5Module(), (inputs, inputs))
|
amdshark_module = AMDSharkInference(T5Module(), (inputs, inputs))
|
||||||
shark_module.set_frontend("tensorflow")
|
amdshark_module.set_frontend("tensorflow")
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
print(shark_module.forward((inputs, inputs)))
|
print(amdshark_module.forward((inputs, inputs)))
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
import torch
|
import torch
|
||||||
import torchvision.models as models
|
import torchvision.models as models
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
|
|
||||||
|
|
||||||
class VisionModule(torch.nn.Module):
|
class VisionModule(torch.nn.Module):
|
||||||
@@ -35,9 +35,9 @@ vision_models_list = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
for i, vision_model in enumerate(vision_models_list):
|
for i, vision_model in enumerate(vision_models_list):
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
VisionModule(vision_model),
|
VisionModule(vision_model),
|
||||||
(input,),
|
(input,),
|
||||||
)
|
)
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
shark_module.forward((input,))
|
amdshark_module.forward((input,))
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
import torch
|
import torch
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
from shark.shark_importer import SharkImporter
|
from amdshark.amdshark_importer import AMDSharkImporter
|
||||||
|
|
||||||
|
|
||||||
class UnetModule(torch.nn.Module):
|
class UnetModule(torch.nn.Module):
|
||||||
@@ -23,7 +23,7 @@ class UnetModule(torch.nn.Module):
|
|||||||
|
|
||||||
input = torch.randn(1, 3, 224, 224)
|
input = torch.randn(1, 3, 224, 224)
|
||||||
|
|
||||||
mlir_importer = SharkImporter(
|
mlir_importer = AMDSharkImporter(
|
||||||
UnetModule(),
|
UnetModule(),
|
||||||
(input,),
|
(input,),
|
||||||
frontend="torch",
|
frontend="torch",
|
||||||
@@ -33,7 +33,7 @@ mlir_importer = SharkImporter(
|
|||||||
tracing_required=False
|
tracing_required=False
|
||||||
)
|
)
|
||||||
|
|
||||||
shark_module = SharkInference(vision_mlir, mlir_dialect="linalg")
|
amdshark_module = AMDSharkInference(vision_mlir, mlir_dialect="linalg")
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
result = shark_module.forward((input,))
|
result = amdshark_module.forward((input,))
|
||||||
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
|
||||||
@@ -1,13 +1,13 @@
|
|||||||
import requests
|
import requests
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pipeline_shark_stable_diffusion_upscale import (
|
from pipeline_amdshark_stable_diffusion_upscale import (
|
||||||
SharkStableDiffusionUpscalePipeline,
|
AMDSharkStableDiffusionUpscalePipeline,
|
||||||
)
|
)
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
model_id = "stabilityai/stable-diffusion-x4-upscaler"
|
model_id = "stabilityai/stable-diffusion-x4-upscaler"
|
||||||
pipeline = SharkStableDiffusionUpscalePipeline(model_id)
|
pipeline = AMDSharkStableDiffusionUpscalePipeline(model_id)
|
||||||
|
|
||||||
# let's download an image
|
# let's download an image
|
||||||
url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-upscale/low_res_cat.png"
|
url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-upscale/low_res_cat.png"
|
||||||
@@ -32,13 +32,13 @@ def get_clip_mlir(model_name="clip_text", extra_args=[]):
|
|||||||
return self.text_encoder(input)[0]
|
return self.text_encoder(input)[0]
|
||||||
|
|
||||||
clip_model = CLIPText()
|
clip_model = CLIPText()
|
||||||
shark_clip = compile_through_fx(
|
amdshark_clip = compile_through_fx(
|
||||||
clip_model,
|
clip_model,
|
||||||
model_input["clip"],
|
model_input["clip"],
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
extra_args=extra_args,
|
extra_args=extra_args,
|
||||||
)
|
)
|
||||||
return shark_clip
|
return amdshark_clip
|
||||||
|
|
||||||
|
|
||||||
def get_vae_mlir(model_name="vae", extra_args=[]):
|
def get_vae_mlir(model_name="vae", extra_args=[]):
|
||||||
@@ -55,13 +55,13 @@ def get_vae_mlir(model_name="vae", extra_args=[]):
|
|||||||
return x
|
return x
|
||||||
|
|
||||||
vae = VaeModel()
|
vae = VaeModel()
|
||||||
shark_vae = compile_through_fx(
|
amdshark_vae = compile_through_fx(
|
||||||
vae,
|
vae,
|
||||||
model_input["vae"],
|
model_input["vae"],
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
extra_args=extra_args,
|
extra_args=extra_args,
|
||||||
)
|
)
|
||||||
return shark_vae
|
return amdshark_vae
|
||||||
|
|
||||||
|
|
||||||
def get_unet_mlir(model_name="unet", extra_args=[]):
|
def get_unet_mlir(model_name="unet", extra_args=[]):
|
||||||
@@ -87,7 +87,7 @@ def get_unet_mlir(model_name="unet", extra_args=[]):
|
|||||||
|
|
||||||
unet = UnetModel()
|
unet = UnetModel()
|
||||||
f16_input_mask = (True, True, True, False)
|
f16_input_mask = (True, True, True, False)
|
||||||
shark_unet = compile_through_fx(
|
amdshark_unet = compile_through_fx(
|
||||||
unet,
|
unet,
|
||||||
model_input["unet"],
|
model_input["unet"],
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
@@ -95,4 +95,4 @@ def get_unet_mlir(model_name="unet", extra_args=[]):
|
|||||||
f16_input_mask=f16_input_mask,
|
f16_input_mask=f16_input_mask,
|
||||||
extra_args=extra_args,
|
extra_args=extra_args,
|
||||||
)
|
)
|
||||||
return shark_unet
|
return amdshark_unet
|
||||||
@@ -5,7 +5,7 @@ from model_wrappers import (
|
|||||||
get_clip_mlir,
|
get_clip_mlir,
|
||||||
)
|
)
|
||||||
from upscaler_args import args
|
from upscaler_args import args
|
||||||
from utils import get_shark_model
|
from utils import get_amdshark_model
|
||||||
|
|
||||||
BATCH_SIZE = len(args.prompts)
|
BATCH_SIZE = len(args.prompts)
|
||||||
if BATCH_SIZE != 1:
|
if BATCH_SIZE != 1:
|
||||||
@@ -24,25 +24,25 @@ clip_flag = [
|
|||||||
"--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-pad-linalg-ops{pad-size=16}))"
|
"--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-pad-linalg-ops{pad-size=16}))"
|
||||||
]
|
]
|
||||||
|
|
||||||
bucket = "gs://shark_tank/stable_diffusion/"
|
bucket = "gs://amdshark_tank/stable_diffusion/"
|
||||||
|
|
||||||
|
|
||||||
def get_unet():
|
def get_unet():
|
||||||
model_name = "upscaler_unet"
|
model_name = "upscaler_unet"
|
||||||
if args.import_mlir:
|
if args.import_mlir:
|
||||||
return get_unet_mlir(model_name, unet_flag)
|
return get_unet_mlir(model_name, unet_flag)
|
||||||
return get_shark_model(bucket, model_name, unet_flag)
|
return get_amdshark_model(bucket, model_name, unet_flag)
|
||||||
|
|
||||||
|
|
||||||
def get_vae():
|
def get_vae():
|
||||||
model_name = "upscaler_vae"
|
model_name = "upscaler_vae"
|
||||||
if args.import_mlir:
|
if args.import_mlir:
|
||||||
return get_vae_mlir(model_name, vae_flag)
|
return get_vae_mlir(model_name, vae_flag)
|
||||||
return get_shark_model(bucket, model_name, vae_flag)
|
return get_amdshark_model(bucket, model_name, vae_flag)
|
||||||
|
|
||||||
|
|
||||||
def get_clip():
|
def get_clip():
|
||||||
model_name = "upscaler_clip"
|
model_name = "upscaler_clip"
|
||||||
if args.import_mlir:
|
if args.import_mlir:
|
||||||
return get_clip_mlir(model_name, clip_flag)
|
return get_clip_mlir(model_name, clip_flag)
|
||||||
return get_shark_model(bucket, model_name, clip_flag)
|
return get_amdshark_model(bucket, model_name, clip_flag)
|
||||||
@@ -46,13 +46,13 @@ def preprocess(image):
|
|||||||
return image
|
return image
|
||||||
|
|
||||||
|
|
||||||
def shark_run_wrapper(model, *args):
|
def amdshark_run_wrapper(model, *args):
|
||||||
np_inputs = tuple([x.detach().numpy() for x in args])
|
np_inputs = tuple([x.detach().numpy() for x in args])
|
||||||
outputs = model("forward", np_inputs)
|
outputs = model("forward", np_inputs)
|
||||||
return torch.from_numpy(outputs)
|
return torch.from_numpy(outputs)
|
||||||
|
|
||||||
|
|
||||||
class SharkStableDiffusionUpscalePipeline:
|
class AMDSharkStableDiffusionUpscalePipeline:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
model_id,
|
model_id,
|
||||||
@@ -131,7 +131,7 @@ class SharkStableDiffusionUpscalePipeline:
|
|||||||
# else:
|
# else:
|
||||||
# attention_mask = None
|
# attention_mask = None
|
||||||
|
|
||||||
text_embeddings = shark_run_wrapper(
|
text_embeddings = amdshark_run_wrapper(
|
||||||
self.text_encoder, text_input_ids.to(device)
|
self.text_encoder, text_input_ids.to(device)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -180,7 +180,7 @@ class SharkStableDiffusionUpscalePipeline:
|
|||||||
# else:
|
# else:
|
||||||
# attention_mask = None
|
# attention_mask = None
|
||||||
|
|
||||||
uncond_embeddings = shark_run_wrapper(
|
uncond_embeddings = amdshark_run_wrapper(
|
||||||
self.text_encoder,
|
self.text_encoder,
|
||||||
uncond_input.input_ids.to(device),
|
uncond_input.input_ids.to(device),
|
||||||
)
|
)
|
||||||
@@ -227,7 +227,7 @@ class SharkStableDiffusionUpscalePipeline:
|
|||||||
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.decode_latents with 0.18215->0.08333
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.decode_latents with 0.18215->0.08333
|
||||||
def decode_latents(self, latents):
|
def decode_latents(self, latents):
|
||||||
latents = 1 / 0.08333 * latents
|
latents = 1 / 0.08333 * latents
|
||||||
image = shark_run_wrapper(self.vae, latents)
|
image = amdshark_run_wrapper(self.vae, latents)
|
||||||
image = (image / 2 + 0.5).clamp(0, 1)
|
image = (image / 2 + 0.5).clamp(0, 1)
|
||||||
# we always cast to float32 as this does not cause significant overhead and is compatible with bfloa16
|
# we always cast to float32 as this does not cause significant overhead and is compatible with bfloa16
|
||||||
image = image.cpu().permute(0, 2, 3, 1).float().numpy()
|
image = image.cpu().permute(0, 2, 3, 1).float().numpy()
|
||||||
@@ -445,7 +445,7 @@ class SharkStableDiffusionUpscalePipeline:
|
|||||||
timestep = torch.tensor([t]).to(torch.float32)
|
timestep = torch.tensor([t]).to(torch.float32)
|
||||||
|
|
||||||
# predict the noise residual
|
# predict the noise residual
|
||||||
noise_pred = shark_run_wrapper(
|
noise_pred = amdshark_run_wrapper(
|
||||||
self.unet,
|
self.unet,
|
||||||
latent_model_input.half(),
|
latent_model_input.half(),
|
||||||
timestep,
|
timestep,
|
||||||
@@ -59,7 +59,7 @@ p.add_argument(
|
|||||||
"--import_mlir",
|
"--import_mlir",
|
||||||
default=False,
|
default=False,
|
||||||
action=argparse.BooleanOptionalAction,
|
action=argparse.BooleanOptionalAction,
|
||||||
help="imports the model from torch module to shark_module otherwise downloads the model from shark_tank.",
|
help="imports the model from torch module to amdshark_module otherwise downloads the model from amdshark_tank.",
|
||||||
)
|
)
|
||||||
|
|
||||||
p.add_argument(
|
p.add_argument(
|
||||||
@@ -1,16 +1,16 @@
|
|||||||
import os
|
import os
|
||||||
import torch
|
import torch
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
from upscaler_args import args
|
from upscaler_args import args
|
||||||
from shark.shark_importer import import_with_fx
|
from amdshark.amdshark_importer import import_with_fx
|
||||||
from shark.iree_utils.vulkan_utils import (
|
from amdshark.iree_utils.vulkan_utils import (
|
||||||
set_iree_vulkan_runtime_flags,
|
set_iree_vulkan_runtime_flags,
|
||||||
get_vulkan_target_triple,
|
get_vulkan_target_triple,
|
||||||
get_iree_vulkan_runtime_flags,
|
get_iree_vulkan_runtime_flags,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _compile_module(shark_module, model_name, extra_args=[]):
|
def _compile_module(amdshark_module, model_name, extra_args=[]):
|
||||||
if args.load_vmfb or args.save_vmfb:
|
if args.load_vmfb or args.save_vmfb:
|
||||||
device = (
|
device = (
|
||||||
args.device
|
args.device
|
||||||
@@ -21,7 +21,7 @@ def _compile_module(shark_module, model_name, extra_args=[]):
|
|||||||
vmfb_path = os.path.join(os.getcwd(), extended_name + ".vmfb")
|
vmfb_path = os.path.join(os.getcwd(), extended_name + ".vmfb")
|
||||||
if args.load_vmfb and os.path.isfile(vmfb_path) and not args.save_vmfb:
|
if args.load_vmfb and os.path.isfile(vmfb_path) and not args.save_vmfb:
|
||||||
print(f"loading existing vmfb from: {vmfb_path}")
|
print(f"loading existing vmfb from: {vmfb_path}")
|
||||||
shark_module.load_module(vmfb_path, extra_args=extra_args)
|
amdshark_module.load_module(vmfb_path, extra_args=extra_args)
|
||||||
else:
|
else:
|
||||||
if args.save_vmfb:
|
if args.save_vmfb:
|
||||||
print("Saving to {}".format(vmfb_path))
|
print("Saving to {}".format(vmfb_path))
|
||||||
@@ -31,48 +31,48 @@ def _compile_module(shark_module, model_name, extra_args=[]):
|
|||||||
vmfb_path
|
vmfb_path
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
path = shark_module.save_module(
|
path = amdshark_module.save_module(
|
||||||
os.getcwd(), extended_name, extra_args
|
os.getcwd(), extended_name, extra_args
|
||||||
)
|
)
|
||||||
shark_module.load_module(path, extra_args=extra_args)
|
amdshark_module.load_module(path, extra_args=extra_args)
|
||||||
else:
|
else:
|
||||||
shark_module.compile(extra_args)
|
amdshark_module.compile(extra_args)
|
||||||
return shark_module
|
return amdshark_module
|
||||||
|
|
||||||
|
|
||||||
# Downloads the model from shark_tank and returns the shark_module.
|
# Downloads the model from amdshark_tank and returns the amdshark_module.
|
||||||
def get_shark_model(tank_url, model_name, extra_args=[]):
|
def get_amdshark_model(tank_url, model_name, extra_args=[]):
|
||||||
from shark.shark_downloader import download_model
|
from amdshark.amdshark_downloader import download_model
|
||||||
from shark.parser import shark_args
|
from amdshark.parser import amdshark_args
|
||||||
|
|
||||||
# Set local shark_tank cache directory.
|
# Set local amdshark_tank cache directory.
|
||||||
# shark_args.local_tank_cache = args.local_tank_cache
|
# amdshark_args.local_tank_cache = args.local_tank_cache
|
||||||
|
|
||||||
mlir_model, func_name, inputs, golden_out = download_model(
|
mlir_model, func_name, inputs, golden_out = download_model(
|
||||||
model_name,
|
model_name,
|
||||||
tank_url=tank_url,
|
tank_url=tank_url,
|
||||||
frontend="torch",
|
frontend="torch",
|
||||||
)
|
)
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_model, device=args.device, mlir_dialect="linalg"
|
mlir_model, device=args.device, mlir_dialect="linalg"
|
||||||
)
|
)
|
||||||
return _compile_module(shark_module, model_name, extra_args)
|
return _compile_module(amdshark_module, model_name, extra_args)
|
||||||
|
|
||||||
|
|
||||||
# Converts the torch-module into a shark_module.
|
# Converts the torch-module into a amdshark_module.
|
||||||
def compile_through_fx(
|
def compile_through_fx(
|
||||||
model, inputs, model_name, is_f16=False, f16_input_mask=None, extra_args=[]
|
model, inputs, model_name, is_f16=False, f16_input_mask=None, extra_args=[]
|
||||||
):
|
):
|
||||||
mlir_module, func_name = import_with_fx(
|
mlir_module, func_name = import_with_fx(
|
||||||
model, inputs, is_f16, f16_input_mask
|
model, inputs, is_f16, f16_input_mask
|
||||||
)
|
)
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_module,
|
mlir_module,
|
||||||
device=args.device,
|
device=args.device,
|
||||||
mlir_dialect="linalg",
|
mlir_dialect="linalg",
|
||||||
)
|
)
|
||||||
|
|
||||||
return _compile_module(shark_module, model_name, extra_args)
|
return _compile_module(amdshark_module, model_name, extra_args)
|
||||||
|
|
||||||
|
|
||||||
def set_iree_runtime_flags():
|
def set_iree_runtime_flags():
|
||||||
@@ -112,7 +112,7 @@ def get_device_mapping(driver, key_combination=3):
|
|||||||
Returns:
|
Returns:
|
||||||
dict: map to possible device names user can input mapped to desired combination of name/path.
|
dict: map to possible device names user can input mapped to desired combination of name/path.
|
||||||
"""
|
"""
|
||||||
from shark.iree_utils._common import iree_device_map
|
from amdshark.iree_utils._common import iree_device_map
|
||||||
|
|
||||||
driver = iree_device_map(driver)
|
driver = iree_device_map(driver)
|
||||||
device_list = get_all_devices(driver)
|
device_list = get_all_devices(driver)
|
||||||
@@ -205,7 +205,7 @@ def set_init_device_flags():
|
|||||||
# Utility to get list of devices available.
|
# Utility to get list of devices available.
|
||||||
def get_available_devices():
|
def get_available_devices():
|
||||||
def get_devices_by_name(driver_name):
|
def get_devices_by_name(driver_name):
|
||||||
from shark.iree_utils._common import iree_device_map
|
from amdshark.iree_utils._common import iree_device_map
|
||||||
|
|
||||||
device_list = []
|
device_list = []
|
||||||
try:
|
try:
|
||||||
15
amdshark/examples/amdshark_inference/v_diffusion.py
Normal file
15
amdshark/examples/amdshark_inference/v_diffusion.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
|
from amdshark.amdshark_downloader import download_model
|
||||||
|
|
||||||
|
|
||||||
|
mlir_model, func_name, inputs, golden_out = download_model(
|
||||||
|
"v_diffusion", frontend="torch"
|
||||||
|
)
|
||||||
|
|
||||||
|
amdshark_module = AMDSharkInference(
|
||||||
|
mlir_model, device="vulkan", mlir_dialect="linalg"
|
||||||
|
)
|
||||||
|
amdshark_module.compile()
|
||||||
|
result = amdshark_module.forward(inputs)
|
||||||
|
print("The obtained result via amdshark is: ", result)
|
||||||
|
print("The golden result is:", golden_out)
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
import torch
|
import torch
|
||||||
from torch.nn.utils import stateless
|
from torch.nn.utils import stateless
|
||||||
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
||||||
from shark.shark_trainer import SharkTrainer
|
from amdshark.amdshark_trainer import AMDSharkTrainer
|
||||||
|
|
||||||
|
|
||||||
class MiniLMSequenceClassification(torch.nn.Module):
|
class MiniLMSequenceClassification(torch.nn.Module):
|
||||||
@@ -42,7 +42,7 @@ def forward(params, buffers, args):
|
|||||||
return params, buffers
|
return params, buffers
|
||||||
|
|
||||||
|
|
||||||
shark_module = SharkTrainer(mod, inp)
|
amdshark_module = AMDSharkTrainer(mod, inp)
|
||||||
shark_module.compile(forward)
|
amdshark_module.compile(forward)
|
||||||
shark_module.train(num_iters=2)
|
amdshark_module.train(num_iters=2)
|
||||||
print("training done")
|
print("training done")
|
||||||
@@ -3,8 +3,8 @@ import os
|
|||||||
import time
|
import time
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from shark.shark_trainer import SharkTrainer
|
from amdshark.amdshark_trainer import AMDSharkTrainer
|
||||||
from shark.parser import parser
|
from amdshark.parser import parser
|
||||||
from urllib import request
|
from urllib import request
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@@ -28,7 +28,7 @@ if __name__ == "__main__":
|
|||||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||||
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
|
||||||
]
|
]
|
||||||
file_link = "https://storage.googleapis.com/shark_tank/users/stanley/bert_tf_training.mlir"
|
file_link = "https://storage.googleapis.com/amdshark_tank/users/stanley/bert_tf_training.mlir"
|
||||||
response = request.urlretrieve(file_link, load_args.download_mlir_path)
|
response = request.urlretrieve(file_link, load_args.download_mlir_path)
|
||||||
sample_input_tensors = [
|
sample_input_tensors = [
|
||||||
tf.convert_to_tensor(val, dtype=tf.int32)
|
tf.convert_to_tensor(val, dtype=tf.int32)
|
||||||
@@ -41,7 +41,7 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
with open(load_args.download_mlir_path, "rb") as input_file:
|
with open(load_args.download_mlir_path, "rb") as input_file:
|
||||||
bert_mlir = input_file.read()
|
bert_mlir = input_file.read()
|
||||||
shark_module = SharkTrainer(
|
amdshark_module = AMDSharkTrainer(
|
||||||
bert_mlir,
|
bert_mlir,
|
||||||
(
|
(
|
||||||
sample_input_tensors,
|
sample_input_tensors,
|
||||||
@@ -50,10 +50,10 @@ if __name__ == "__main__":
|
|||||||
),
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
shark_module.set_frontend("mhlo")
|
amdshark_module.set_frontend("mhlo")
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
start = time.time()
|
start = time.time()
|
||||||
print(shark_module.train(num_iter))
|
print(amdshark_module.train(num_iter))
|
||||||
end = time.time()
|
end = time.time()
|
||||||
total_time = end - start
|
total_time = end - start
|
||||||
print("time: " + str(total_time))
|
print("time: " + str(total_time))
|
||||||
@@ -8,7 +8,7 @@ from official.nlp.modeling import layers
|
|||||||
from official.nlp.modeling import networks
|
from official.nlp.modeling import networks
|
||||||
from official.nlp.modeling.models import bert_classifier
|
from official.nlp.modeling.models import bert_classifier
|
||||||
|
|
||||||
from shark.shark_trainer import SharkTrainer
|
from amdshark.amdshark_trainer import AMDSharkTrainer
|
||||||
|
|
||||||
|
|
||||||
tf.random.set_seed(0)
|
tf.random.set_seed(0)
|
||||||
@@ -79,7 +79,7 @@ if __name__ == "__main__":
|
|||||||
for val in predict_sample_input
|
for val in predict_sample_input
|
||||||
]
|
]
|
||||||
num_iter = 10
|
num_iter = 10
|
||||||
shark_module = SharkTrainer(
|
amdshark_module = AMDSharkTrainer(
|
||||||
BertModule(),
|
BertModule(),
|
||||||
(
|
(
|
||||||
sample_input_tensors,
|
sample_input_tensors,
|
||||||
@@ -88,10 +88,10 @@ if __name__ == "__main__":
|
|||||||
),
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
shark_module.set_frontend("tensorflow")
|
amdshark_module.set_frontend("tensorflow")
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
start = time.time()
|
start = time.time()
|
||||||
print(shark_module.train(num_iter))
|
print(amdshark_module.train(num_iter))
|
||||||
end = time.time()
|
end = time.time()
|
||||||
total_time = end - start
|
total_time = end - start
|
||||||
print("time: " + str(total_time))
|
print("time: " + str(total_time))
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
import torch
|
import torch
|
||||||
from torch.nn.utils import _stateless
|
from torch.nn.utils import _stateless
|
||||||
from shark.shark_trainer import SharkTrainer
|
from amdshark.amdshark_trainer import AMDSharkTrainer
|
||||||
|
|
||||||
|
|
||||||
class Foo(torch.nn.Module):
|
class Foo(torch.nn.Module):
|
||||||
@@ -37,8 +37,8 @@ def forward(params, buffers, args):
|
|||||||
|
|
||||||
# fx_graph = forward(dict(mod.named_parameters()), dict(mod.named_buffers()), inp)
|
# fx_graph = forward(dict(mod.named_parameters()), dict(mod.named_buffers()), inp)
|
||||||
|
|
||||||
shark_module = SharkTrainer(mod, inp)
|
amdshark_module = AMDSharkTrainer(mod, inp)
|
||||||
# Pass the training function in case of torch
|
# Pass the training function in case of torch
|
||||||
shark_module.compile(training_fn=forward)
|
amdshark_module.compile(training_fn=forward)
|
||||||
|
|
||||||
shark_module.train(num_iters=10)
|
amdshark_module.train(num_iters=10)
|
||||||
@@ -5,10 +5,10 @@
|
|||||||
<details>
|
<details>
|
||||||
<summary>Installation (Linux)</summary>
|
<summary>Installation (Linux)</summary>
|
||||||
|
|
||||||
### Activate shark.venv Virtual Environment
|
### Activate amdshark.venv Virtual Environment
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
source shark.venv/bin/activate
|
source amdshark.venv/bin/activate
|
||||||
|
|
||||||
# Some older pip installs may not be able to handle the recent PyTorch deps
|
# Some older pip installs may not be able to handle the recent PyTorch deps
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
@@ -2,10 +2,10 @@
|
|||||||
|
|
||||||
## Installation (Linux)
|
## Installation (Linux)
|
||||||
|
|
||||||
### Activate shark.venv Virtual Environment
|
### Activate amdshark.venv Virtual Environment
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
source shark.venv/bin/activate
|
source amdshark.venv/bin/activate
|
||||||
|
|
||||||
# Some older pip installs may not be able to handle the recent PyTorch deps
|
# Some older pip installs may not be able to handle the recent PyTorch deps
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
@@ -23,7 +23,7 @@ pip install accelerate transformers ftfy
|
|||||||
|
|
||||||
Please cherry-pick this branch of torch-mlir: https://github.com/vivekkhandelwal1/torch-mlir/tree/sd-ops
|
Please cherry-pick this branch of torch-mlir: https://github.com/vivekkhandelwal1/torch-mlir/tree/sd-ops
|
||||||
and build it locally. You can find the instructions for using locally build Torch-MLIR,
|
and build it locally. You can find the instructions for using locally build Torch-MLIR,
|
||||||
here: https://github.com/nod-ai/SHARK-Studio#how-to-use-your-locally-built-iree--torch-mlir-with-shark
|
here: https://github.com/nod-ai/AMDSHARK-Studio#how-to-use-your-locally-built-iree--torch-mlir-with-amdshark
|
||||||
|
|
||||||
## Run the Stable diffusion fine tuning
|
## Run the Stable diffusion fine tuning
|
||||||
|
|
||||||
@@ -24,7 +24,7 @@ from torch_mlir.dynamo import make_simple_dynamo_backend
|
|||||||
import torch._dynamo as dynamo
|
import torch._dynamo as dynamo
|
||||||
from torch.fx.experimental.proxy_tensor import make_fx
|
from torch.fx.experimental.proxy_tensor import make_fx
|
||||||
from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend
|
from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
|
|
||||||
torch._dynamo.config.verbose = True
|
torch._dynamo.config.verbose = True
|
||||||
|
|
||||||
@@ -476,8 +476,8 @@ class UnetModel(torch.nn.Module):
|
|||||||
return self.unet.forward(x, y, z, return_dict=False)[0]
|
return self.unet.forward(x, y, z, return_dict=False)[0]
|
||||||
|
|
||||||
|
|
||||||
shark_vae = VaeModel()
|
amdshark_vae = VaeModel()
|
||||||
shark_unet = UnetModel()
|
amdshark_unet = UnetModel()
|
||||||
|
|
||||||
####### Creating our training data ########
|
####### Creating our training data ########
|
||||||
|
|
||||||
@@ -638,14 +638,14 @@ def refbackend_torchdynamo_backend(
|
|||||||
mlir_module.operation.write_bytecode(bytecode_stream)
|
mlir_module.operation.write_bytecode(bytecode_stream)
|
||||||
bytecode = bytecode_stream.getvalue()
|
bytecode = bytecode_stream.getvalue()
|
||||||
|
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_module=bytecode, device=args.device, mlir_dialect="tm_tensor"
|
mlir_module=bytecode, device=args.device, mlir_dialect="tm_tensor"
|
||||||
)
|
)
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
|
|
||||||
def compiled_callable(*inputs):
|
def compiled_callable(*inputs):
|
||||||
inputs = [x.numpy() for x in inputs]
|
inputs = [x.numpy() for x in inputs]
|
||||||
result = shark_module("forward", inputs)
|
result = amdshark_module("forward", inputs)
|
||||||
if was_unwrapped:
|
if was_unwrapped:
|
||||||
result = [
|
result = [
|
||||||
result,
|
result,
|
||||||
@@ -709,7 +709,7 @@ optimizer = torch.optim.AdamW(
|
|||||||
# Training function
|
# Training function
|
||||||
def train_func(batch_pixel_values, batch_input_ids):
|
def train_func(batch_pixel_values, batch_input_ids):
|
||||||
# Convert images to latent space
|
# Convert images to latent space
|
||||||
latents = shark_vae(batch_pixel_values).sample().detach()
|
latents = amdshark_vae(batch_pixel_values).sample().detach()
|
||||||
latents = latents * 0.18215
|
latents = latents * 0.18215
|
||||||
|
|
||||||
# Sample noise that we'll add to the latents
|
# Sample noise that we'll add to the latents
|
||||||
@@ -731,7 +731,7 @@ def train_func(batch_pixel_values, batch_input_ids):
|
|||||||
encoder_hidden_states = text_encoder(batch_input_ids)[0]
|
encoder_hidden_states = text_encoder(batch_input_ids)[0]
|
||||||
|
|
||||||
# Predict the noise residual
|
# Predict the noise residual
|
||||||
noise_pred = shark_unet(
|
noise_pred = amdshark_unet(
|
||||||
noisy_latents,
|
noisy_latents,
|
||||||
timesteps,
|
timesteps,
|
||||||
encoder_hidden_states,
|
encoder_hidden_states,
|
||||||
@@ -31,7 +31,7 @@ from torch_mlir_e2e_test.eager_backends.refbackend import (
|
|||||||
NUMPY_TO_TORCH_DTYPE_DICT,
|
NUMPY_TO_TORCH_DTYPE_DICT,
|
||||||
)
|
)
|
||||||
|
|
||||||
from shark.iree_utils.compile_utils import (
|
from amdshark.iree_utils.compile_utils import (
|
||||||
get_iree_compiled_module,
|
get_iree_compiled_module,
|
||||||
IREE_DEVICE_MAP,
|
IREE_DEVICE_MAP,
|
||||||
)
|
)
|
||||||
@@ -157,7 +157,7 @@ def device_driver_info(device):
|
|||||||
f"Required drivers for {device} not found. {device_driver_err_map[device]['debug']} "
|
f"Required drivers for {device} not found. {device_driver_err_map[device]['debug']} "
|
||||||
f"Please install the required drivers{device_driver_err_map[device]['solution']} "
|
f"Please install the required drivers{device_driver_err_map[device]['solution']} "
|
||||||
f"For further assistance please reach out to the community on discord [https://discord.com/invite/RUqY2h2s9u]"
|
f"For further assistance please reach out to the community on discord [https://discord.com/invite/RUqY2h2s9u]"
|
||||||
f" and/or file a bug at https://github.com/nod-ai/SHARK-Studio/issues"
|
f" and/or file a bug at https://github.com/nod-ai/AMDSHARK-Studio/issues"
|
||||||
)
|
)
|
||||||
return err_msg
|
return err_msg
|
||||||
else:
|
else:
|
||||||
@@ -12,8 +12,8 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from shark.iree_utils._common import run_cmd, iree_device_map
|
from amdshark.iree_utils._common import run_cmd, iree_device_map
|
||||||
from shark.iree_utils.cpu_utils import get_cpu_count
|
from amdshark.iree_utils.cpu_utils import get_cpu_count
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@@ -132,7 +132,7 @@ def run_benchmark_module(benchmark_cl):
|
|||||||
benchmark_path = benchmark_cl[0]
|
benchmark_path = benchmark_cl[0]
|
||||||
assert os.path.exists(
|
assert os.path.exists(
|
||||||
benchmark_path
|
benchmark_path
|
||||||
), "Cannot find iree_benchmark_module, Please contact SHARK maintainer on discord."
|
), "Cannot find iree_benchmark_module, Please contact AMDSHARK maintainer on discord."
|
||||||
bench_stdout, bench_stderr = run_cmd(" ".join(benchmark_cl))
|
bench_stdout, bench_stderr = run_cmd(" ".join(benchmark_cl))
|
||||||
try:
|
try:
|
||||||
regex_split = re.compile("(\d+[.]*\d*)( *)([a-zA-Z]+)")
|
regex_split = re.compile("(\d+[.]*\d*)( *)([a-zA-Z]+)")
|
||||||
@@ -20,7 +20,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
import iree.runtime as ireert
|
import iree.runtime as ireert
|
||||||
import iree.compiler as ireec
|
import iree.compiler as ireec
|
||||||
from shark.parser import shark_args
|
from amdshark.parser import amdshark_args
|
||||||
|
|
||||||
from .trace import DetailLogger
|
from .trace import DetailLogger
|
||||||
from ._common import iree_device_map, iree_target_map
|
from ._common import iree_device_map, iree_target_map
|
||||||
@@ -34,7 +34,7 @@ def get_iree_device_args(device, extra_args=[]):
|
|||||||
device, device_num = clean_device_info(device)
|
device, device_num = clean_device_info(device)
|
||||||
|
|
||||||
if "cpu" in device:
|
if "cpu" in device:
|
||||||
from shark.iree_utils.cpu_utils import get_iree_cpu_args
|
from amdshark.iree_utils.cpu_utils import get_iree_cpu_args
|
||||||
|
|
||||||
u_kernel_flag = ["--iree-llvmcpu-enable-ukernels"]
|
u_kernel_flag = ["--iree-llvmcpu-enable-ukernels"]
|
||||||
stack_size_flag = ["--iree-llvmcpu-stack-allocation-limit=256000"]
|
stack_size_flag = ["--iree-llvmcpu-stack-allocation-limit=256000"]
|
||||||
@@ -45,25 +45,25 @@ def get_iree_device_args(device, extra_args=[]):
|
|||||||
+ stack_size_flag
|
+ stack_size_flag
|
||||||
)
|
)
|
||||||
if device == "cuda":
|
if device == "cuda":
|
||||||
from shark.iree_utils.gpu_utils import get_iree_gpu_args
|
from amdshark.iree_utils.gpu_utils import get_iree_gpu_args
|
||||||
|
|
||||||
return get_iree_gpu_args()
|
return get_iree_gpu_args()
|
||||||
if device == "vulkan":
|
if device == "vulkan":
|
||||||
from shark.iree_utils.vulkan_utils import get_iree_vulkan_args
|
from amdshark.iree_utils.vulkan_utils import get_iree_vulkan_args
|
||||||
|
|
||||||
return get_iree_vulkan_args(
|
return get_iree_vulkan_args(
|
||||||
device_num=device_num, extra_args=extra_args
|
device_num=device_num, extra_args=extra_args
|
||||||
)
|
)
|
||||||
if device == "metal":
|
if device == "metal":
|
||||||
from shark.iree_utils.metal_utils import get_iree_metal_args
|
from amdshark.iree_utils.metal_utils import get_iree_metal_args
|
||||||
|
|
||||||
return get_iree_metal_args(extra_args=extra_args)
|
return get_iree_metal_args(extra_args=extra_args)
|
||||||
if device == "rocm":
|
if device == "rocm":
|
||||||
from shark.iree_utils.gpu_utils import get_iree_rocm_args
|
from amdshark.iree_utils.gpu_utils import get_iree_rocm_args
|
||||||
|
|
||||||
return get_iree_rocm_args(device_num=device_num, extra_args=extra_args)
|
return get_iree_rocm_args(device_num=device_num, extra_args=extra_args)
|
||||||
if device == "hip":
|
if device == "hip":
|
||||||
from shark.iree_utils.gpu_utils import get_iree_rocm_args
|
from amdshark.iree_utils.gpu_utils import get_iree_rocm_args
|
||||||
return get_iree_rocm_args(device_num=device_num, extra_args=extra_args, hip_driver=True)
|
return get_iree_rocm_args(device_num=device_num, extra_args=extra_args, hip_driver=True)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@@ -137,19 +137,19 @@ def get_iree_common_args(debug=False):
|
|||||||
|
|
||||||
|
|
||||||
# Args that are suitable only for certain models or groups of models.
|
# Args that are suitable only for certain models or groups of models.
|
||||||
# shark_args are passed down from pytests to control which models compile with these flags,
|
# amdshark_args are passed down from pytests to control which models compile with these flags,
|
||||||
# but they can also be set in shark/parser.py
|
# but they can also be set in amdshark/parser.py
|
||||||
def get_model_specific_args():
|
def get_model_specific_args():
|
||||||
ms_args = []
|
ms_args = []
|
||||||
if shark_args.enable_conv_transform == True:
|
if amdshark_args.enable_conv_transform == True:
|
||||||
ms_args += [
|
ms_args += [
|
||||||
"--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-convert-conv-nchw-to-nhwc))"
|
"--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-convert-conv-nchw-to-nhwc))"
|
||||||
]
|
]
|
||||||
if shark_args.enable_img2col_transform == True:
|
if amdshark_args.enable_img2col_transform == True:
|
||||||
ms_args += [
|
ms_args += [
|
||||||
"--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-convert-conv2d-to-img2col))"
|
"--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-convert-conv2d-to-img2col))"
|
||||||
]
|
]
|
||||||
if shark_args.use_winograd == True:
|
if amdshark_args.use_winograd == True:
|
||||||
ms_args += [
|
ms_args += [
|
||||||
"--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-linalg-ext-convert-conv2d-to-winograd))"
|
"--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-linalg-ext-convert-conv2d-to-winograd))"
|
||||||
]
|
]
|
||||||
@@ -262,7 +262,7 @@ def compile_benchmark_dirs(bench_dir, device, dispatch_benchmarks):
|
|||||||
benchmark_file.write(f"DISPATCH: {d_}\n")
|
benchmark_file.write(f"DISPATCH: {d_}\n")
|
||||||
benchmark_file.write(str(iter_per_second) + "\n")
|
benchmark_file.write(str(iter_per_second) + "\n")
|
||||||
benchmark_file.write(
|
benchmark_file.write(
|
||||||
"SHARK BENCHMARK RESULT: "
|
"AMDSHARK BENCHMARK RESULT: "
|
||||||
+ str(1 / (iter_per_second * 0.001))
|
+ str(1 / (iter_per_second * 0.001))
|
||||||
+ "\n"
|
+ "\n"
|
||||||
)
|
)
|
||||||
@@ -323,7 +323,7 @@ def compile_module_to_flatbuffer(
|
|||||||
args += get_iree_common_args(debug=debug)
|
args += get_iree_common_args(debug=debug)
|
||||||
args += get_model_specific_args()
|
args += get_model_specific_args()
|
||||||
args += extra_args
|
args += extra_args
|
||||||
args += shark_args.additional_compile_args
|
args += amdshark_args.additional_compile_args
|
||||||
|
|
||||||
if frontend in ["tensorflow", "tf"]:
|
if frontend in ["tensorflow", "tf"]:
|
||||||
input_type = "auto"
|
input_type = "auto"
|
||||||
@@ -382,7 +382,7 @@ def get_iree_module(
|
|||||||
]
|
]
|
||||||
haldevice = haldriver.create_device(
|
haldevice = haldriver.create_device(
|
||||||
hal_device_id,
|
hal_device_id,
|
||||||
allocators=shark_args.device_allocator,
|
allocators=amdshark_args.device_allocator,
|
||||||
)
|
)
|
||||||
config = ireert.Config(device=haldevice)
|
config = ireert.Config(device=haldevice)
|
||||||
config.id = hal_device_id
|
config.id = hal_device_id
|
||||||
@@ -433,7 +433,7 @@ def load_vmfb_using_mmap(
|
|||||||
]
|
]
|
||||||
haldevice = haldriver.create_device(
|
haldevice = haldriver.create_device(
|
||||||
hal_device_id,
|
hal_device_id,
|
||||||
allocators=shark_args.device_allocator,
|
allocators=amdshark_args.device_allocator,
|
||||||
)
|
)
|
||||||
dl.log(f"ireert.create_device()")
|
dl.log(f"ireert.create_device()")
|
||||||
config = ireert.Config(device=haldevice)
|
config = ireert.Config(device=haldevice)
|
||||||
@@ -452,9 +452,9 @@ def load_vmfb_using_mmap(
|
|||||||
# Now load vmfb.
|
# Now load vmfb.
|
||||||
# Two scenarios we have here :-
|
# Two scenarios we have here :-
|
||||||
# 1. We either have the vmfb already saved and therefore pass the path of it.
|
# 1. We either have the vmfb already saved and therefore pass the path of it.
|
||||||
# (This would arise if we're invoking `load_module` from a SharkInference obj)
|
# (This would arise if we're invoking `load_module` from a AMDSharkInference obj)
|
||||||
# OR 2. We are compiling on the fly, therefore we have the flatbuffer blob to play with.
|
# OR 2. We are compiling on the fly, therefore we have the flatbuffer blob to play with.
|
||||||
# (This would arise if we're invoking `compile` from a SharkInference obj)
|
# (This would arise if we're invoking `compile` from a AMDSharkInference obj)
|
||||||
temp_file_to_unlink = None
|
temp_file_to_unlink = None
|
||||||
if isinstance(flatbuffer_blob_or_path, Path):
|
if isinstance(flatbuffer_blob_or_path, Path):
|
||||||
flatbuffer_blob_or_path = flatbuffer_blob_or_path.__str__()
|
flatbuffer_blob_or_path = flatbuffer_blob_or_path.__str__()
|
||||||
@@ -486,7 +486,7 @@ def load_vmfb_using_mmap(
|
|||||||
)
|
)
|
||||||
ctx = ireert.SystemContext(config=config, vm_modules=vm_modules)
|
ctx = ireert.SystemContext(config=config, vm_modules=vm_modules)
|
||||||
dl.log(f"ireert.SystemContext created")
|
dl.log(f"ireert.SystemContext created")
|
||||||
for flag in shark_args.additional_runtime_args:
|
for flag in amdshark_args.additional_runtime_args:
|
||||||
ireert.flags.parse_flags(flag)
|
ireert.flags.parse_flags(flag)
|
||||||
dl.log(f"module initialized")
|
dl.log(f"module initialized")
|
||||||
mmaped_vmfb = getattr(ctx.modules, mmaped_vmfb.name)
|
mmaped_vmfb = getattr(ctx.modules, mmaped_vmfb.name)
|
||||||
@@ -650,7 +650,7 @@ def get_results(
|
|||||||
haldriver = ireert.get_driver("rocm")
|
haldriver = ireert.get_driver("rocm")
|
||||||
haldevice = haldriver.create_device(
|
haldevice = haldriver.create_device(
|
||||||
config.id,
|
config.id,
|
||||||
allocators=shark_args.device_allocator,
|
allocators=amdshark_args.device_allocator,
|
||||||
)
|
)
|
||||||
for input_array in input:
|
for input_array in input:
|
||||||
dl.log(f"Load to device: {input_array.shape}")
|
dl.log(f"Load to device: {input_array.shape}")
|
||||||
@@ -688,7 +688,7 @@ def get_results(
|
|||||||
def get_iree_runtime_config(device):
|
def get_iree_runtime_config(device):
|
||||||
device = iree_device_map(device)
|
device = iree_device_map(device)
|
||||||
haldriver = ireert.get_driver(device)
|
haldriver = ireert.get_driver(device)
|
||||||
if "metal" in device and shark_args.device_allocator == "caching":
|
if "metal" in device and amdshark_args.device_allocator == "caching":
|
||||||
print(
|
print(
|
||||||
"[WARNING] metal devices can not have a `caching` allocator."
|
"[WARNING] metal devices can not have a `caching` allocator."
|
||||||
"\nUsing default allocator `None`"
|
"\nUsing default allocator `None`"
|
||||||
@@ -696,7 +696,7 @@ def get_iree_runtime_config(device):
|
|||||||
haldevice = haldriver.create_device_by_uri(
|
haldevice = haldriver.create_device_by_uri(
|
||||||
device,
|
device,
|
||||||
# metal devices have a failure with caching allocators atm. blcking this util it gets fixed upstream.
|
# metal devices have a failure with caching allocators atm. blcking this util it gets fixed upstream.
|
||||||
allocators=shark_args.device_allocator
|
allocators=amdshark_args.device_allocator
|
||||||
if "metal" not in device
|
if "metal" not in device
|
||||||
else None,
|
else None,
|
||||||
)
|
)
|
||||||
@@ -17,7 +17,7 @@
|
|||||||
import functools
|
import functools
|
||||||
import subprocess
|
import subprocess
|
||||||
import platform
|
import platform
|
||||||
from shark.parser import shark_args
|
from amdshark.parser import amdshark_args
|
||||||
|
|
||||||
|
|
||||||
def get_cpu_count():
|
def get_cpu_count():
|
||||||
@@ -44,7 +44,7 @@ def get_iree_cpu_args():
|
|||||||
elif os_name == "Windows":
|
elif os_name == "Windows":
|
||||||
target_triple = "x86_64-pc-windows-msvc"
|
target_triple = "x86_64-pc-windows-msvc"
|
||||||
else:
|
else:
|
||||||
error_message = f"OS Type f{os_name} not supported and triple can't be determined, open issue to dSHARK team please :)"
|
error_message = f"OS Type f{os_name} not supported and triple can't be determined, open issue to dAMDSHARK team please :)"
|
||||||
raise Exception(error_message)
|
raise Exception(error_message)
|
||||||
print(f"Target triple found:{target_triple}")
|
print(f"Target triple found:{target_triple}")
|
||||||
return [
|
return [
|
||||||
@@ -59,7 +59,7 @@ def get_iree_cpu_rt_args():
|
|||||||
default = default if default <= 8 else default - 2
|
default = default if default <= 8 else default - 2
|
||||||
cpu_count = (
|
cpu_count = (
|
||||||
default
|
default
|
||||||
if shark_args.task_topology_max_group_count is None
|
if amdshark_args.task_topology_max_group_count is None
|
||||||
else shark_args.task_topology_max_group_count
|
else amdshark_args.task_topology_max_group_count
|
||||||
)
|
)
|
||||||
return [f"--task_topology_max_group_count={cpu_count}"]
|
return [f"--task_topology_max_group_count={cpu_count}"]
|
||||||
@@ -19,8 +19,8 @@ import iree.runtime as ireert
|
|||||||
import ctypes
|
import ctypes
|
||||||
import sys
|
import sys
|
||||||
from subprocess import CalledProcessError
|
from subprocess import CalledProcessError
|
||||||
from shark.parser import shark_args
|
from amdshark.parser import amdshark_args
|
||||||
from shark.iree_utils._common import run_cmd
|
from amdshark.iree_utils._common import run_cmd
|
||||||
|
|
||||||
# TODO: refactor to rocm and cuda utils
|
# TODO: refactor to rocm and cuda utils
|
||||||
|
|
||||||
@@ -35,7 +35,7 @@ def get_iree_gpu_args():
|
|||||||
if (
|
if (
|
||||||
sm_arch
|
sm_arch
|
||||||
in ["sm_70", "sm_72", "sm_75", "sm_80", "sm_84", "sm_86", "sm_89"]
|
in ["sm_70", "sm_72", "sm_75", "sm_80", "sm_84", "sm_86", "sm_89"]
|
||||||
) and (shark_args.enable_tf32 == True):
|
) and (amdshark_args.enable_tf32 == True):
|
||||||
return [
|
return [
|
||||||
f"--iree-hal-cuda-llvm-target-arch={sm_arch}",
|
f"--iree-hal-cuda-llvm-target-arch={sm_arch}",
|
||||||
]
|
]
|
||||||
@@ -16,10 +16,10 @@
|
|||||||
|
|
||||||
import functools
|
import functools
|
||||||
|
|
||||||
from shark.iree_utils._common import run_cmd
|
from amdshark.iree_utils._common import run_cmd
|
||||||
import iree.runtime as ireert
|
import iree.runtime as ireert
|
||||||
from sys import platform
|
from sys import platform
|
||||||
from shark.iree_utils.vulkan_target_env_utils import get_vulkan_target_env_flag
|
from amdshark.iree_utils.vulkan_target_env_utils import get_vulkan_target_env_flag
|
||||||
|
|
||||||
|
|
||||||
@functools.cache
|
@functools.cache
|
||||||
@@ -81,7 +81,7 @@ def get_metal_triple_flag(device_name="", device_num=0, extra_args=[]):
|
|||||||
return f"-iree-metal-target-platform={triple}"
|
return f"-iree-metal-target-platform={triple}"
|
||||||
print(
|
print(
|
||||||
"""Optimized kernel for your target device is not added yet.
|
"""Optimized kernel for your target device is not added yet.
|
||||||
Contact SHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u]
|
Contact AMDSHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u]
|
||||||
or pull up an issue."""
|
or pull up an issue."""
|
||||||
)
|
)
|
||||||
print(f"Target : {metal_device}")
|
print(f"Target : {metal_device}")
|
||||||
@@ -20,7 +20,7 @@ import time
|
|||||||
|
|
||||||
|
|
||||||
def _enable_detail_trace() -> bool:
|
def _enable_detail_trace() -> bool:
|
||||||
return os.getenv("SHARK_DETAIL_TRACE", "0") == "1"
|
return os.getenv("AMDSHARK_DETAIL_TRACE", "0") == "1"
|
||||||
|
|
||||||
|
|
||||||
class DetailLogger:
|
class DetailLogger:
|
||||||
@@ -16,11 +16,11 @@
|
|||||||
|
|
||||||
import functools
|
import functools
|
||||||
from os import linesep
|
from os import linesep
|
||||||
from shark.iree_utils._common import run_cmd
|
from amdshark.iree_utils._common import run_cmd
|
||||||
import iree.runtime as ireert
|
import iree.runtime as ireert
|
||||||
from sys import platform
|
from sys import platform
|
||||||
from shark.iree_utils.vulkan_target_env_utils import get_vulkan_target_env_flag
|
from amdshark.iree_utils.vulkan_target_env_utils import get_vulkan_target_env_flag
|
||||||
from shark.parser import shark_args
|
from amdshark.parser import amdshark_args
|
||||||
|
|
||||||
|
|
||||||
@functools.cache
|
@functools.cache
|
||||||
@@ -174,7 +174,7 @@ def get_vulkan_triple_flag(device_name="", device_num=0, extra_args=[]):
|
|||||||
return f"--iree-vulkan-target-triple={triple}"
|
return f"--iree-vulkan-target-triple={triple}"
|
||||||
print(
|
print(
|
||||||
"""Optimized kernel for your target device is not added yet.
|
"""Optimized kernel for your target device is not added yet.
|
||||||
Contact SHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u]
|
Contact AMDSHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u]
|
||||||
or pull up an issue."""
|
or pull up an issue."""
|
||||||
)
|
)
|
||||||
print(f"Target : {vulkan_device}")
|
print(f"Target : {vulkan_device}")
|
||||||
@@ -208,9 +208,9 @@ def get_iree_vulkan_args(device_num=0, extra_args=[]):
|
|||||||
@functools.cache
|
@functools.cache
|
||||||
def get_iree_vulkan_runtime_flags():
|
def get_iree_vulkan_runtime_flags():
|
||||||
vulkan_runtime_flags = [
|
vulkan_runtime_flags = [
|
||||||
f"--vulkan_validation_layers={'true' if shark_args.vulkan_debug_utils else 'false'}",
|
f"--vulkan_validation_layers={'true' if amdshark_args.vulkan_debug_utils else 'false'}",
|
||||||
f"--vulkan_debug_verbosity={'4' if shark_args.vulkan_debug_utils else '0'}"
|
f"--vulkan_debug_verbosity={'4' if amdshark_args.vulkan_debug_utils else '0'}"
|
||||||
f"--vulkan-robust-buffer-access={'true' if shark_args.vulkan_debug_utils else 'false'}",
|
f"--vulkan-robust-buffer-access={'true' if amdshark_args.vulkan_debug_utils else 'false'}",
|
||||||
]
|
]
|
||||||
return vulkan_runtime_flags
|
return vulkan_runtime_flags
|
||||||
|
|
||||||
@@ -18,7 +18,7 @@ This function takes the model mlir file and the tuned config file as input,
|
|||||||
and output a new mlir file with lowering configs annotated on certain ops.
|
and output a new mlir file with lowering configs annotated on certain ops.
|
||||||
There are two ways to utilize the function:
|
There are two ways to utilize the function:
|
||||||
1. Call model_annotation function within another python script
|
1. Call model_annotation function within another python script
|
||||||
from shark.model_annotation import model_annotation
|
from amdshark.model_annotation import model_annotation
|
||||||
with create_context() as ctx:
|
with create_context() as ctx:
|
||||||
module = model_annotation(ctx, input_contents=..., config_path=..., search_op=...)
|
module = model_annotation(ctx, input_contents=..., config_path=..., search_op=...)
|
||||||
2. Run model_annotation.py directly
|
2. Run model_annotation.py directly
|
||||||
@@ -29,13 +29,13 @@ class SplitStrToListAction(argparse.Action):
|
|||||||
setattr(namespace, self.dest, shlex.split(" "))
|
setattr(namespace, self.dest, shlex.split(" "))
|
||||||
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="SHARK runner.")
|
parser = argparse.ArgumentParser(description="AMDSHARK runner.")
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--device",
|
"--device",
|
||||||
type=str,
|
type=str,
|
||||||
default="cpu",
|
default="cpu",
|
||||||
help="Device on which shark_runner runs. options are cpu, cuda, and vulkan",
|
help="Device on which amdshark_runner runs. options are cpu, cuda, and vulkan",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--additional_compile_args",
|
"--additional_compile_args",
|
||||||
@@ -82,26 +82,26 @@ parser.add_argument(
|
|||||||
help="When enabled, pytest bench results will include ONNX benchmark results.",
|
help="When enabled, pytest bench results will include ONNX benchmark results.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--shark_prefix",
|
"--amdshark_prefix",
|
||||||
default=None,
|
default=None,
|
||||||
help="gs://shark_tank/<this_flag>/model_directories",
|
help="gs://amdshark_tank/<this_flag>/model_directories",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--update_tank",
|
"--update_tank",
|
||||||
default=True,
|
default=True,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="When enabled, SHARK downloader will update local shark_tank if local hash is different from latest upstream hash.",
|
help="When enabled, AMDSHARK downloader will update local amdshark_tank if local hash is different from latest upstream hash.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--force_update_tank",
|
"--force_update_tank",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="When enabled, SHARK downloader will force an update of local shark_tank artifacts for each request.",
|
help="When enabled, AMDSHARK downloader will force an update of local amdshark_tank artifacts for each request.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--local_tank_cache",
|
"--local_tank_cache",
|
||||||
default=None,
|
default=None,
|
||||||
help="Specify where to save downloaded shark_tank artifacts. If this is not set, the default is ~/.local/shark_tank/.",
|
help="Specify where to save downloaded amdshark_tank artifacts. If this is not set, the default is ~/.local/amdshark_tank/.",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@@ -167,4 +167,4 @@ parser.add_argument(
|
|||||||
help="Flag for disabling vulkan validation layers when benchmarking.",
|
help="Flag for disabling vulkan validation layers when benchmarking.",
|
||||||
)
|
)
|
||||||
|
|
||||||
shark_args, unknown = parser.parse_known_args()
|
amdshark_args, unknown = parser.parse_known_args()
|
||||||
@@ -13,25 +13,25 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
from iree.runtime import query_available_drivers, get_driver
|
from iree.runtime import query_available_drivers, get_driver
|
||||||
from shark.shark_downloader import download_model
|
from amdshark.amdshark_downloader import download_model
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import argparse
|
import argparse
|
||||||
from shark.iree_utils._common import _IREE_DEVICE_MAP
|
from amdshark.iree_utils._common import _IREE_DEVICE_MAP
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
from shark.shark_runner import supported_dialects
|
from amdshark.amdshark_runner import supported_dialects
|
||||||
import logging
|
import logging
|
||||||
from concurrent.futures import ProcessPoolExecutor
|
from concurrent.futures import ProcessPoolExecutor
|
||||||
from concurrent.futures.thread import ThreadPoolExecutor
|
from concurrent.futures.thread import ThreadPoolExecutor
|
||||||
import time
|
import time
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
IREE_TO_SHARK_DRIVER_MAP = {v: k for k, v in _IREE_DEVICE_MAP.items()}
|
IREE_TO_AMDSHARK_DRIVER_MAP = {v: k for k, v in _IREE_DEVICE_MAP.items()}
|
||||||
|
|
||||||
|
|
||||||
def stress_test_compiled_model(
|
def stress_test_compiled_model(
|
||||||
shark_module_path: str,
|
amdshark_module_path: str,
|
||||||
function_name: str,
|
function_name: str,
|
||||||
device: str,
|
device: str,
|
||||||
inputs: List[np.ndarray],
|
inputs: List[np.ndarray],
|
||||||
@@ -50,14 +50,14 @@ def stress_test_compiled_model(
|
|||||||
# We are using execution in a sperate thread in order to be able
|
# We are using execution in a sperate thread in order to be able
|
||||||
# to wait with a timeout on the inference operation.
|
# to wait with a timeout on the inference operation.
|
||||||
module_executor = ThreadPoolExecutor(1)
|
module_executor = ThreadPoolExecutor(1)
|
||||||
shark_module = module_executor.submit(
|
amdshark_module = module_executor.submit(
|
||||||
SharkInference,
|
AMDSharkInference,
|
||||||
mlir_module=bytes(),
|
mlir_module=bytes(),
|
||||||
function_name=function_name,
|
function_name=function_name,
|
||||||
device=device,
|
device=device,
|
||||||
).result()
|
).result()
|
||||||
module_executor.submit(
|
module_executor.submit(
|
||||||
shark_module.load_module, shark_module_path
|
amdshark_module.load_module, amdshark_module_path
|
||||||
).result()
|
).result()
|
||||||
input_batches = [np.repeat(arr, batch_size, axis=0) for arr in inputs]
|
input_batches = [np.repeat(arr, batch_size, axis=0) for arr in inputs]
|
||||||
golden_output_batches = np.repeat(golden_out, batch_size, axis=0)
|
golden_output_batches = np.repeat(golden_out, batch_size, axis=0)
|
||||||
@@ -67,7 +67,7 @@ def stress_test_compiled_model(
|
|||||||
first_iteration_output = None
|
first_iteration_output = None
|
||||||
for i in range(max_iterations):
|
for i in range(max_iterations):
|
||||||
output = module_executor.submit(
|
output = module_executor.submit(
|
||||||
shark_module.forward, input_batches
|
amdshark_module.forward, input_batches
|
||||||
).result(inference_timeout_seconds)
|
).result(inference_timeout_seconds)
|
||||||
if first_iteration_output is None:
|
if first_iteration_output is None:
|
||||||
np.testing.assert_array_almost_equal_nulp(
|
np.testing.assert_array_almost_equal_nulp(
|
||||||
@@ -100,9 +100,9 @@ def query_devices(device_types: Optional[List[str]] = None) -> List[str]:
|
|||||||
devices = []
|
devices = []
|
||||||
if device_types is None:
|
if device_types is None:
|
||||||
device_types = [
|
device_types = [
|
||||||
IREE_TO_SHARK_DRIVER_MAP[name]
|
IREE_TO_AMDSHARK_DRIVER_MAP[name]
|
||||||
for name in query_available_drivers()
|
for name in query_available_drivers()
|
||||||
if name in IREE_TO_SHARK_DRIVER_MAP
|
if name in IREE_TO_AMDSHARK_DRIVER_MAP
|
||||||
]
|
]
|
||||||
for device_type in device_types:
|
for device_type in device_types:
|
||||||
driver = get_driver(_IREE_DEVICE_MAP[device_type])
|
driver = get_driver(_IREE_DEVICE_MAP[device_type])
|
||||||
@@ -121,19 +121,19 @@ def query_devices(device_types: Optional[List[str]] = None) -> List[str]:
|
|||||||
def compile_stress_test_module(
|
def compile_stress_test_module(
|
||||||
device_types: List[str], mlir_model: str, func_name: str, mlir_dialect: str
|
device_types: List[str], mlir_model: str, func_name: str, mlir_dialect: str
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
shark_module_paths = []
|
amdshark_module_paths = []
|
||||||
for device_type in device_types:
|
for device_type in device_types:
|
||||||
logging.info(
|
logging.info(
|
||||||
f"Compiling stress test model for device type {device_type}."
|
f"Compiling stress test model for device type {device_type}."
|
||||||
)
|
)
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_model,
|
mlir_model,
|
||||||
func_name,
|
func_name,
|
||||||
mlir_dialect=mlir_dialect,
|
mlir_dialect=mlir_dialect,
|
||||||
device=device_type,
|
device=device_type,
|
||||||
)
|
)
|
||||||
shark_module_paths.append(shark_module.save_module())
|
amdshark_module_paths.append(amdshark_module.save_module())
|
||||||
return shark_module_paths
|
return amdshark_module_paths
|
||||||
|
|
||||||
|
|
||||||
def stress_test(
|
def stress_test(
|
||||||
@@ -169,21 +169,21 @@ def stress_test(
|
|||||||
# This needs to run in a subprocess because when compiling for CUDA,
|
# This needs to run in a subprocess because when compiling for CUDA,
|
||||||
# some stuff get intialized and cuInit will fail in a forked process
|
# some stuff get intialized and cuInit will fail in a forked process
|
||||||
# later. It should be just compiling, but alas.
|
# later. It should be just compiling, but alas.
|
||||||
shark_module_paths_set = executor.submit(
|
amdshark_module_paths_set = executor.submit(
|
||||||
compile_stress_test_module,
|
compile_stress_test_module,
|
||||||
device_types_set,
|
device_types_set,
|
||||||
mlir_model,
|
mlir_model,
|
||||||
func_name,
|
func_name,
|
||||||
mlir_dialect,
|
mlir_dialect,
|
||||||
).result()
|
).result()
|
||||||
device_type_shark_module_path_map = {
|
device_type_amdshark_module_path_map = {
|
||||||
device_type: module_path
|
device_type: module_path
|
||||||
for device_type, module_path in zip(
|
for device_type, module_path in zip(
|
||||||
device_types_set, shark_module_paths_set
|
device_types_set, amdshark_module_paths_set
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
device_name_shark_module_path_map = {
|
device_name_amdshark_module_path_map = {
|
||||||
device_name: device_type_shark_module_path_map[
|
device_name: device_type_amdshark_module_path_map[
|
||||||
get_device_type(device_name)
|
get_device_type(device_name)
|
||||||
]
|
]
|
||||||
for device_name in device_names
|
for device_name in device_names
|
||||||
@@ -193,7 +193,7 @@ def stress_test(
|
|||||||
# in IREE and a subsequent call to `iree.runtime.SystemContext.add_vm_module`
|
# in IREE and a subsequent call to `iree.runtime.SystemContext.add_vm_module`
|
||||||
# in a forked process will hang.
|
# in a forked process will hang.
|
||||||
with multiprocessing.Pool(
|
with multiprocessing.Pool(
|
||||||
len(device_name_shark_module_path_map) * oversubscription_factor
|
len(device_name_amdshark_module_path_map) * oversubscription_factor
|
||||||
) as process_pool:
|
) as process_pool:
|
||||||
process_pool.starmap(
|
process_pool.starmap(
|
||||||
stress_test_compiled_model,
|
stress_test_compiled_model,
|
||||||
@@ -212,7 +212,7 @@ def stress_test(
|
|||||||
stress_test_index,
|
stress_test_index,
|
||||||
)
|
)
|
||||||
for stress_test_index, (device_name, module_path) in enumerate(
|
for stress_test_index, (device_name, module_path) in enumerate(
|
||||||
list(device_name_shark_module_path_map.items())
|
list(device_name_amdshark_module_path_map.items())
|
||||||
* oversubscription_factor
|
* oversubscription_factor
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
@@ -1,10 +1,10 @@
|
|||||||
# RUN: %PYTHON %s
|
# RUN: %PYTHON %s
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from shark.shark_importer import SharkImporter
|
from amdshark.amdshark_importer import AMDSharkImporter
|
||||||
import pytest
|
import pytest
|
||||||
from shark.parser import shark_args
|
from amdshark.parser import amdshark_args
|
||||||
from shark.shark_inference import SharkInference
|
from amdshark.amdshark_inference import AMDSharkInference
|
||||||
from shark.tflite_utils import TFLitePreprocessor
|
from amdshark.tflite_utils import TFLitePreprocessor
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
# model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
|
# model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
|
||||||
@@ -66,32 +66,32 @@ class AlbertTfliteModuleTester:
|
|||||||
self.save_vmfb = save_vmfb
|
self.save_vmfb = save_vmfb
|
||||||
|
|
||||||
def create_and_check_module(self):
|
def create_and_check_module(self):
|
||||||
shark_args.save_mlir = self.save_mlir
|
amdshark_args.save_mlir = self.save_mlir
|
||||||
shark_args.save_vmfb = self.save_vmfb
|
amdshark_args.save_vmfb = self.save_vmfb
|
||||||
tflite_preprocessor = TFLitePreprocessor(model_name="albert_lite_base")
|
tflite_preprocessor = TFLitePreprocessor(model_name="albert_lite_base")
|
||||||
|
|
||||||
raw_model_file_path = tflite_preprocessor.get_raw_model_file()
|
raw_model_file_path = tflite_preprocessor.get_raw_model_file()
|
||||||
inputs = tflite_preprocessor.get_inputs()
|
inputs = tflite_preprocessor.get_inputs()
|
||||||
tflite_interpreter = tflite_preprocessor.get_interpreter()
|
tflite_interpreter = tflite_preprocessor.get_interpreter()
|
||||||
|
|
||||||
my_shark_importer = SharkImporter(
|
my_amdshark_importer = AMDSharkImporter(
|
||||||
module=tflite_interpreter,
|
module=tflite_interpreter,
|
||||||
inputs=inputs,
|
inputs=inputs,
|
||||||
frontend="tflite",
|
frontend="tflite",
|
||||||
raw_model_file=raw_model_file_path,
|
raw_model_file=raw_model_file_path,
|
||||||
)
|
)
|
||||||
mlir_model, func_name = my_shark_importer.import_mlir()
|
mlir_model, func_name = my_amdshark_importer.import_mlir()
|
||||||
|
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_module=mlir_model,
|
mlir_module=mlir_model,
|
||||||
function_name=func_name,
|
function_name=func_name,
|
||||||
device=self.device,
|
device=self.device,
|
||||||
mlir_dialect="tflite",
|
mlir_dialect="tflite",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Case1: Use shark_importer default generate inputs
|
# Case1: Use amdshark_importer default generate inputs
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
mlir_results = shark_module.forward(inputs)
|
mlir_results = amdshark_module.forward(inputs)
|
||||||
## post process results for compare
|
## post process results for compare
|
||||||
input_details, output_details = tflite_preprocessor.get_model_details()
|
input_details, output_details = tflite_preprocessor.get_model_details()
|
||||||
mlir_results = list(mlir_results)
|
mlir_results = list(mlir_results)
|
||||||
@@ -105,14 +105,14 @@ class AlbertTfliteModuleTester:
|
|||||||
input_details, output_details = tflite_preprocessor.get_model_details()
|
input_details, output_details = tflite_preprocessor.get_model_details()
|
||||||
inputs = generate_inputs(input_details) # new inputs
|
inputs = generate_inputs(input_details) # new inputs
|
||||||
|
|
||||||
shark_module = SharkInference(
|
amdshark_module = AMDSharkInference(
|
||||||
mlir_module=mlir_model,
|
mlir_module=mlir_model,
|
||||||
function_name=func_name,
|
function_name=func_name,
|
||||||
device=self.device,
|
device=self.device,
|
||||||
mlir_dialect="tflite",
|
mlir_dialect="tflite",
|
||||||
)
|
)
|
||||||
shark_module.compile()
|
amdshark_module.compile()
|
||||||
mlir_results = shark_module.forward(inputs)
|
mlir_results = amdshark_module.forward(inputs)
|
||||||
## post process results for compare
|
## post process results for compare
|
||||||
tflite_results = tflite_preprocessor.get_golden_output()
|
tflite_results = tflite_preprocessor.get_golden_output()
|
||||||
compare_results(mlir_results, tflite_results, output_details)
|
compare_results(mlir_results, tflite_results, output_details)
|
||||||
@@ -22,7 +22,7 @@ def test_stress_test():
|
|||||||
subprocess.check_call(
|
subprocess.check_call(
|
||||||
[
|
[
|
||||||
sys.executable,
|
sys.executable,
|
||||||
importlib.util.find_spec("shark.stress_test").origin,
|
importlib.util.find_spec("amdshark.stress_test").origin,
|
||||||
"--model=squeezenet1_0",
|
"--model=squeezenet1_0",
|
||||||
"--devices",
|
"--devices",
|
||||||
"cpu",
|
"cpu",
|
||||||
@@ -96,7 +96,7 @@ class TFLitePreprocessor:
|
|||||||
|
|
||||||
print("Setting up for TMP_WORK_DIR")
|
print("Setting up for TMP_WORK_DIR")
|
||||||
self.workdir = os.path.join(
|
self.workdir = os.path.join(
|
||||||
os.path.dirname(__file__), "./../gen_shark_tank"
|
os.path.dirname(__file__), "./../gen_amdshark_tank"
|
||||||
)
|
)
|
||||||
os.makedirs(self.workdir, exist_ok=True)
|
os.makedirs(self.workdir, exist_ok=True)
|
||||||
print(f"TMP_WORK_DIR = {self.workdir}")
|
print(f"TMP_WORK_DIR = {self.workdir}")
|
||||||
@@ -28,7 +28,7 @@ from torch_mlir.eager_mode.torch_mlir_tensor import (
|
|||||||
no_dispatch,
|
no_dispatch,
|
||||||
)
|
)
|
||||||
from torch_mlir.eager_mode import torch_mlir_tensor
|
from torch_mlir.eager_mode import torch_mlir_tensor
|
||||||
from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
|
from amdshark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
|
||||||
|
|
||||||
|
|
||||||
backend = EagerModeIREELinalgOnTensorsBackend("cpu")
|
backend = EagerModeIREELinalgOnTensorsBackend("cpu")
|
||||||
@@ -16,7 +16,7 @@ from torch_mlir.ir import StringAttr
|
|||||||
import torch_mlir
|
import torch_mlir
|
||||||
from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend
|
from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend
|
||||||
import tempfile
|
import tempfile
|
||||||
from shark.parser import shark_args
|
from amdshark.parser import amdshark_args
|
||||||
import io
|
import io
|
||||||
|
|
||||||
mlir_type_mapping_dict = {
|
mlir_type_mapping_dict = {
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
# -*- mode: python ; coding: utf-8 -*-
|
# -*- mode: python ; coding: utf-8 -*-
|
||||||
from apps.shark_studio.studio_imports import pathex, datas, hiddenimports
|
from apps.amdshark_studio.studio_imports import pathex, datas, hiddenimports
|
||||||
|
|
||||||
binaries = []
|
binaries = []
|
||||||
|
|
||||||
@@ -32,7 +32,7 @@ exe = EXE(
|
|||||||
a.zipfiles,
|
a.zipfiles,
|
||||||
a.datas,
|
a.datas,
|
||||||
[],
|
[],
|
||||||
name='nodai_shark_studio',
|
name='nodai_amdshark_studio',
|
||||||
debug=False,
|
debug=False,
|
||||||
bootloader_ignore_signals=False,
|
bootloader_ignore_signals=False,
|
||||||
strip=False,
|
strip=False,
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
import os
|
import os
|
||||||
import PIL
|
import PIL
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from apps.shark_studio.web.utils.file_utils import (
|
from apps.amdshark_studio.web.utils.file_utils import (
|
||||||
get_generated_imgs_path,
|
get_generated_imgs_path,
|
||||||
)
|
)
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@@ -6,13 +6,13 @@ import warnings
|
|||||||
import json
|
import json
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
|
||||||
from apps.shark_studio.modules.timer import startup_timer
|
from apps.amdshark_studio.modules.timer import startup_timer
|
||||||
|
|
||||||
from apps.shark_studio.web.utils.tmp_configs import (
|
from apps.amdshark_studio.web.utils.tmp_configs import (
|
||||||
config_tmp,
|
config_tmp,
|
||||||
clear_tmp_mlir,
|
clear_tmp_mlir,
|
||||||
clear_tmp_imgs,
|
clear_tmp_imgs,
|
||||||
shark_tmp,
|
amdshark_tmp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -30,12 +30,12 @@ def imports():
|
|||||||
|
|
||||||
startup_timer.record("import gradio")
|
startup_timer.record("import gradio")
|
||||||
|
|
||||||
import apps.shark_studio.web.utils.globals as global_obj
|
import apps.amdshark_studio.web.utils.globals as global_obj
|
||||||
|
|
||||||
global_obj._init()
|
global_obj._init()
|
||||||
startup_timer.record("initialize globals")
|
startup_timer.record("initialize globals")
|
||||||
|
|
||||||
from apps.shark_studio.modules import (
|
from apps.amdshark_studio.modules import (
|
||||||
img_processing,
|
img_processing,
|
||||||
) # noqa: F401
|
) # noqa: F401
|
||||||
|
|
||||||
@@ -44,7 +44,7 @@ def imports():
|
|||||||
|
|
||||||
def initialize():
|
def initialize():
|
||||||
configure_sigint_handler()
|
configure_sigint_handler()
|
||||||
# Setup to use shark_tmp for gradio's temporary image files and clear any
|
# Setup to use amdshark_tmp for gradio's temporary image files and clear any
|
||||||
# existing temporary images there if they exist. Then we can import gradio.
|
# existing temporary images there if they exist. Then we can import gradio.
|
||||||
# It has to be in this order or gradio ignores what we've set up.
|
# It has to be in this order or gradio ignores what we've set up.
|
||||||
|
|
||||||
@@ -52,7 +52,7 @@ def initialize():
|
|||||||
# clear_tmp_mlir()
|
# clear_tmp_mlir()
|
||||||
clear_tmp_imgs()
|
clear_tmp_imgs()
|
||||||
|
|
||||||
from apps.shark_studio.web.utils.file_utils import (
|
from apps.amdshark_studio.web.utils.file_utils import (
|
||||||
create_model_folders,
|
create_model_folders,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -83,7 +83,7 @@ def dumpstacks():
|
|||||||
code.append(f"""File: "{filename}", line {lineno}, in {name}""")
|
code.append(f"""File: "{filename}", line {lineno}, in {name}""")
|
||||||
if line:
|
if line:
|
||||||
code.append(" " + line.strip())
|
code.append(" " + line.strip())
|
||||||
with open(os.path.join(shark_tmp, "stack_dump.log"), "w") as f:
|
with open(os.path.join(amdshark_tmp, "stack_dump.log"), "w") as f:
|
||||||
f.write("\n".join(code))
|
f.write("\n".join(code))
|
||||||
|
|
||||||
|
|
||||||
@@ -100,7 +100,7 @@ def setup_middleware(app):
|
|||||||
|
|
||||||
def configure_cors_middleware(app):
|
def configure_cors_middleware(app):
|
||||||
from starlette.middleware.cors import CORSMiddleware
|
from starlette.middleware.cors import CORSMiddleware
|
||||||
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
|
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
|
||||||
|
|
||||||
cors_options = {
|
cors_options = {
|
||||||
"allow_methods": ["*"],
|
"allow_methods": ["*"],
|
||||||
@@ -2,13 +2,13 @@ from turbine_models.custom_models import stateless_llama
|
|||||||
from turbine_models.model_runner import vmfbRunner
|
from turbine_models.model_runner import vmfbRunner
|
||||||
from turbine_models.gen_external_params.gen_external_params import gen_external_params
|
from turbine_models.gen_external_params.gen_external_params import gen_external_params
|
||||||
import time
|
import time
|
||||||
from shark.iree_utils.compile_utils import compile_module_to_flatbuffer
|
from amdshark.iree_utils.compile_utils import compile_module_to_flatbuffer
|
||||||
from apps.shark_studio.web.utils.file_utils import (
|
from apps.amdshark_studio.web.utils.file_utils import (
|
||||||
get_resource_path,
|
get_resource_path,
|
||||||
get_checkpoints_path,
|
get_checkpoints_path,
|
||||||
)
|
)
|
||||||
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
|
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
|
||||||
from apps.shark_studio.api.utils import parse_device
|
from apps.amdshark_studio.api.utils import parse_device
|
||||||
from urllib.request import urlopen
|
from urllib.request import urlopen
|
||||||
import iree.runtime as ireert
|
import iree.runtime as ireert
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
@@ -366,7 +366,7 @@ def get_mfma_spec_path(target_chip, save_dir):
|
|||||||
def llm_chat_api(InputData: dict):
|
def llm_chat_api(InputData: dict):
|
||||||
from datetime import datetime as dt
|
from datetime import datetime as dt
|
||||||
|
|
||||||
import apps.shark_studio.web.utils.globals as global_obj
|
import apps.amdshark_studio.web.utils.globals as global_obj
|
||||||
|
|
||||||
print(f"Input keys : {InputData.keys()}")
|
print(f"Input keys : {InputData.keys()}")
|
||||||
|
|
||||||
@@ -12,26 +12,26 @@ from tqdm.auto import tqdm
|
|||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from random import randint
|
from random import randint
|
||||||
from turbine_models.custom_models.sd_inference.sd_pipeline import SharkSDPipeline
|
from turbine_models.custom_models.sd_inference.sd_pipeline import AMDSharkSDPipeline
|
||||||
from turbine_models.custom_models.sdxl_inference.sdxl_compiled_pipeline import (
|
from turbine_models.custom_models.sdxl_inference.sdxl_compiled_pipeline import (
|
||||||
SharkSDXLPipeline,
|
AMDSharkSDXLPipeline,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
from apps.shark_studio.api.controlnet import control_adapter_map
|
from apps.amdshark_studio.api.controlnet import control_adapter_map
|
||||||
from apps.shark_studio.api.utils import parse_device
|
from apps.amdshark_studio.api.utils import parse_device
|
||||||
from apps.shark_studio.web.utils.state import status_label
|
from apps.amdshark_studio.web.utils.state import status_label
|
||||||
from apps.shark_studio.web.utils.file_utils import (
|
from apps.amdshark_studio.web.utils.file_utils import (
|
||||||
safe_name,
|
safe_name,
|
||||||
get_resource_path,
|
get_resource_path,
|
||||||
get_checkpoints_path,
|
get_checkpoints_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
from apps.shark_studio.modules.img_processing import (
|
from apps.amdshark_studio.modules.img_processing import (
|
||||||
save_output_img,
|
save_output_img,
|
||||||
)
|
)
|
||||||
|
|
||||||
from apps.shark_studio.modules.ckpt_processing import (
|
from apps.amdshark_studio.modules.ckpt_processing import (
|
||||||
preprocessCKPT,
|
preprocessCKPT,
|
||||||
save_irpa,
|
save_irpa,
|
||||||
)
|
)
|
||||||
@@ -114,10 +114,10 @@ class StableDiffusion:
|
|||||||
self.turbine_pipe = custom_module.StudioPipeline
|
self.turbine_pipe = custom_module.StudioPipeline
|
||||||
self.model_map = custom_module.MODEL_MAP
|
self.model_map = custom_module.MODEL_MAP
|
||||||
elif self.is_sdxl:
|
elif self.is_sdxl:
|
||||||
self.turbine_pipe = SharkSDXLPipeline
|
self.turbine_pipe = AMDSharkSDXLPipeline
|
||||||
self.model_map = EMPTY_SDXL_MAP
|
self.model_map = EMPTY_SDXL_MAP
|
||||||
else:
|
else:
|
||||||
self.turbine_pipe = SharkSDPipeline
|
self.turbine_pipe = AMDSharkSDPipeline
|
||||||
self.model_map = EMPTY_SD_MAP
|
self.model_map = EMPTY_SD_MAP
|
||||||
max_length = 64
|
max_length = 64
|
||||||
target_backend, self.rt_device, triple = parse_device(device, target_triple)
|
target_backend, self.rt_device, triple = parse_device(device, target_triple)
|
||||||
@@ -273,7 +273,7 @@ class StableDiffusion:
|
|||||||
return img
|
return img
|
||||||
|
|
||||||
|
|
||||||
def shark_sd_fn_dict_input(
|
def amdshark_sd_fn_dict_input(
|
||||||
sd_kwargs: dict,
|
sd_kwargs: dict,
|
||||||
):
|
):
|
||||||
print("\n[LOG] Submitting Request...")
|
print("\n[LOG] Submitting Request...")
|
||||||
@@ -312,11 +312,11 @@ def shark_sd_fn_dict_input(
|
|||||||
)
|
)
|
||||||
return None, ""
|
return None, ""
|
||||||
|
|
||||||
generated_imgs = yield from shark_sd_fn(**sd_kwargs)
|
generated_imgs = yield from amdshark_sd_fn(**sd_kwargs)
|
||||||
return generated_imgs
|
return generated_imgs
|
||||||
|
|
||||||
|
|
||||||
def shark_sd_fn(
|
def amdshark_sd_fn(
|
||||||
prompt,
|
prompt,
|
||||||
negative_prompt,
|
negative_prompt,
|
||||||
sd_init_image: list,
|
sd_init_image: list,
|
||||||
@@ -346,8 +346,8 @@ def shark_sd_fn(
|
|||||||
sd_init_image = [sd_init_image]
|
sd_init_image = [sd_init_image]
|
||||||
is_img2img = True if sd_init_image[0] is not None else False
|
is_img2img = True if sd_init_image[0] is not None else False
|
||||||
|
|
||||||
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
|
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
|
||||||
import apps.shark_studio.web.utils.globals as global_obj
|
import apps.amdshark_studio.web.utils.globals as global_obj
|
||||||
|
|
||||||
adapters = {}
|
adapters = {}
|
||||||
is_controlled = False
|
is_controlled = False
|
||||||
@@ -466,7 +466,7 @@ def shark_sd_fn(
|
|||||||
|
|
||||||
def unload_sd():
|
def unload_sd():
|
||||||
print("Unloading models.")
|
print("Unloading models.")
|
||||||
import apps.shark_studio.web.utils.globals as global_obj
|
import apps.amdshark_studio.web.utils.globals as global_obj
|
||||||
|
|
||||||
global_obj.clear_cache()
|
global_obj.clear_cache()
|
||||||
gc.collect()
|
gc.collect()
|
||||||
@@ -489,8 +489,8 @@ def safe_name(name):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
|
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
|
||||||
import apps.shark_studio.web.utils.globals as global_obj
|
import apps.amdshark_studio.web.utils.globals as global_obj
|
||||||
|
|
||||||
global_obj._init()
|
global_obj._init()
|
||||||
|
|
||||||
@@ -501,5 +501,5 @@ if __name__ == "__main__":
|
|||||||
for arg in vars(cmd_opts):
|
for arg in vars(cmd_opts):
|
||||||
if arg in sd_kwargs:
|
if arg in sd_kwargs:
|
||||||
sd_kwargs[arg] = getattr(cmd_opts, arg)
|
sd_kwargs[arg] = getattr(cmd_opts, arg)
|
||||||
for i in shark_sd_fn_dict_input(sd_kwargs):
|
for i in amdshark_sd_fn_dict_input(sd_kwargs):
|
||||||
print(i)
|
print(i)
|
||||||
@@ -8,11 +8,11 @@ from random import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
|
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
|
||||||
from cpuinfo import get_cpu_info
|
from cpuinfo import get_cpu_info
|
||||||
|
|
||||||
# TODO: migrate these utils to studio
|
# TODO: migrate these utils to studio
|
||||||
from shark.iree_utils.vulkan_utils import (
|
from amdshark.iree_utils.vulkan_utils import (
|
||||||
set_iree_vulkan_runtime_flags,
|
set_iree_vulkan_runtime_flags,
|
||||||
get_vulkan_target_triple,
|
get_vulkan_target_triple,
|
||||||
get_iree_vulkan_runtime_flags,
|
get_iree_vulkan_runtime_flags,
|
||||||
@@ -21,7 +21,7 @@ from shark.iree_utils.vulkan_utils import (
|
|||||||
|
|
||||||
def get_available_devices():
|
def get_available_devices():
|
||||||
def get_devices_by_name(driver_name):
|
def get_devices_by_name(driver_name):
|
||||||
from shark.iree_utils._common import iree_device_map
|
from amdshark.iree_utils._common import iree_device_map
|
||||||
|
|
||||||
device_list = []
|
device_list = []
|
||||||
try:
|
try:
|
||||||
@@ -59,7 +59,7 @@ def get_available_devices():
|
|||||||
cpu_device = get_devices_by_name("cpu-task")
|
cpu_device = get_devices_by_name("cpu-task")
|
||||||
available_devices.extend(cpu_device)
|
available_devices.extend(cpu_device)
|
||||||
|
|
||||||
from shark.iree_utils.vulkan_utils import (
|
from amdshark.iree_utils.vulkan_utils import (
|
||||||
get_all_vulkan_devices,
|
get_all_vulkan_devices,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -116,7 +116,7 @@ def set_init_device_flags():
|
|||||||
elif "metal" in cmd_opts.device:
|
elif "metal" in cmd_opts.device:
|
||||||
device_name, cmd_opts.device = map_device_to_name_path(cmd_opts.device)
|
device_name, cmd_opts.device = map_device_to_name_path(cmd_opts.device)
|
||||||
if not cmd_opts.iree_metal_target_platform:
|
if not cmd_opts.iree_metal_target_platform:
|
||||||
from shark.iree_utils.metal_utils import get_metal_target_triple
|
from amdshark.iree_utils.metal_utils import get_metal_target_triple
|
||||||
|
|
||||||
triple = get_metal_target_triple(device_name)
|
triple = get_metal_target_triple(device_name)
|
||||||
if triple is not None:
|
if triple is not None:
|
||||||
@@ -146,7 +146,7 @@ def set_iree_runtime_flags():
|
|||||||
|
|
||||||
|
|
||||||
def parse_device(device_str, target_override=""):
|
def parse_device(device_str, target_override=""):
|
||||||
from shark.iree_utils.compile_utils import (
|
from amdshark.iree_utils.compile_utils import (
|
||||||
clean_device_info,
|
clean_device_info,
|
||||||
get_iree_target_triple,
|
get_iree_target_triple,
|
||||||
iree_target_map,
|
iree_target_map,
|
||||||
@@ -192,7 +192,7 @@ def get_rocm_target_chip(device_str):
|
|||||||
if key in device_str:
|
if key in device_str:
|
||||||
return rocm_chip_map[key]
|
return rocm_chip_map[key]
|
||||||
raise AssertionError(
|
raise AssertionError(
|
||||||
f"Device {device_str} not recognized. Please file an issue at https://github.com/nod-ai/SHARK-Studio/issues."
|
f"Device {device_str} not recognized. Please file an issue at https://github.com/nod-ai/AMDSHARK-Studio/issues."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -225,7 +225,7 @@ def get_device_mapping(driver, key_combination=3):
|
|||||||
dict: map to possible device names user can input mapped to desired
|
dict: map to possible device names user can input mapped to desired
|
||||||
combination of name/path.
|
combination of name/path.
|
||||||
"""
|
"""
|
||||||
from shark.iree_utils._common import iree_device_map
|
from amdshark.iree_utils._common import iree_device_map
|
||||||
|
|
||||||
driver = iree_device_map(driver)
|
driver = iree_device_map(driver)
|
||||||
device_list = get_all_devices(driver)
|
device_list = get_all_devices(driver)
|
||||||
@@ -256,7 +256,7 @@ def get_opt_flags(model, precision="fp16"):
|
|||||||
f"-iree-vulkan-target-triple={cmd_opts.iree_vulkan_target_triple}"
|
f"-iree-vulkan-target-triple={cmd_opts.iree_vulkan_target_triple}"
|
||||||
)
|
)
|
||||||
if "rocm" in cmd_opts.device:
|
if "rocm" in cmd_opts.device:
|
||||||
from shark.iree_utils.gpu_utils import get_iree_rocm_args
|
from amdshark.iree_utils.gpu_utils import get_iree_rocm_args
|
||||||
|
|
||||||
rocm_args = get_iree_rocm_args()
|
rocm_args = get_iree_rocm_args()
|
||||||
iree_flags.extend(rocm_args)
|
iree_flags.extend(rocm_args)
|
||||||
@@ -301,7 +301,7 @@ def map_device_to_name_path(device, key_combination=3):
|
|||||||
return device_mapping
|
return device_mapping
|
||||||
|
|
||||||
def get_devices_by_name(driver_name):
|
def get_devices_by_name(driver_name):
|
||||||
from shark.iree_utils._common import iree_device_map
|
from amdshark.iree_utils._common import iree_device_map
|
||||||
|
|
||||||
device_list = []
|
device_list = []
|
||||||
try:
|
try:
|
||||||
@@ -332,7 +332,7 @@ def map_device_to_name_path(device, key_combination=3):
|
|||||||
set_iree_runtime_flags()
|
set_iree_runtime_flags()
|
||||||
|
|
||||||
available_devices = []
|
available_devices = []
|
||||||
from shark.iree_utils.vulkan_utils import (
|
from amdshark.iree_utils.vulkan_utils import (
|
||||||
get_all_vulkan_devices,
|
get_all_vulkan_devices,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -12,7 +12,7 @@ from pathlib import Path
|
|||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from omegaconf import OmegaConf
|
from omegaconf import OmegaConf
|
||||||
from diffusers import StableDiffusionPipeline
|
from diffusers import StableDiffusionPipeline
|
||||||
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
|
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
|
||||||
from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
|
from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
|
||||||
download_from_original_stable_diffusion_ckpt,
|
download_from_original_stable_diffusion_ckpt,
|
||||||
create_vae_diffusers_config,
|
create_vae_diffusers_config,
|
||||||
@@ -5,7 +5,7 @@ import json
|
|||||||
import safetensors
|
import safetensors
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from safetensors.torch import load_file
|
from safetensors.torch import load_file
|
||||||
from apps.shark_studio.web.utils.file_utils import (
|
from apps.amdshark_studio.web.utils.file_utils import (
|
||||||
get_checkpoint_pathfile,
|
get_checkpoint_pathfile,
|
||||||
get_path_stem,
|
get_path_stem,
|
||||||
)
|
)
|
||||||
@@ -25,11 +25,11 @@ resampler_list = resamplers.keys()
|
|||||||
|
|
||||||
# save output images and the inputs corresponding to it.
|
# save output images and the inputs corresponding to it.
|
||||||
def save_output_img(output_img, img_seed, extra_info=None):
|
def save_output_img(output_img, img_seed, extra_info=None):
|
||||||
from apps.shark_studio.web.utils.file_utils import (
|
from apps.amdshark_studio.web.utils.file_utils import (
|
||||||
get_generated_imgs_path,
|
get_generated_imgs_path,
|
||||||
get_generated_imgs_todays_subdir,
|
get_generated_imgs_todays_subdir,
|
||||||
)
|
)
|
||||||
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
|
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
|
||||||
|
|
||||||
if extra_info is None:
|
if extra_info is None:
|
||||||
extra_info = {}
|
extra_info = {}
|
||||||
@@ -30,8 +30,8 @@ def logger_test(x):
|
|||||||
|
|
||||||
def read_sd_logs():
|
def read_sd_logs():
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
with open("shark_tmp/sd.log", "r") as f:
|
with open("amdshark_tmp/sd.log", "r") as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
|
|
||||||
sys.stdout = Logger("shark_tmp/sd.log", filter="[LOG]")
|
sys.stdout = Logger("amdshark_tmp/sd.log", filter="[LOG]")
|
||||||
@@ -1,14 +1,14 @@
|
|||||||
from shark.iree_utils.compile_utils import (
|
from amdshark.iree_utils.compile_utils import (
|
||||||
get_iree_compiled_module,
|
get_iree_compiled_module,
|
||||||
load_vmfb_using_mmap,
|
load_vmfb_using_mmap,
|
||||||
clean_device_info,
|
clean_device_info,
|
||||||
get_iree_target_triple,
|
get_iree_target_triple,
|
||||||
)
|
)
|
||||||
from apps.shark_studio.web.utils.file_utils import (
|
from apps.amdshark_studio.web.utils.file_utils import (
|
||||||
get_checkpoints_path,
|
get_checkpoints_path,
|
||||||
get_resource_path,
|
get_resource_path,
|
||||||
)
|
)
|
||||||
from apps.shark_studio.modules.shared_cmd_opts import (
|
from apps.amdshark_studio.modules.shared_cmd_opts import (
|
||||||
cmd_opts,
|
cmd_opts,
|
||||||
)
|
)
|
||||||
from iree import runtime as ireert
|
from iree import runtime as ireert
|
||||||
@@ -17,7 +17,7 @@ import gc
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
class SharkPipelineBase:
|
class AMDSharkPipelineBase:
|
||||||
# This class is a lightweight base for managing an
|
# This class is a lightweight base for managing an
|
||||||
# inference API class. It should provide methods for:
|
# inference API class. It should provide methods for:
|
||||||
# - compiling a set (model map) of torch IR modules
|
# - compiling a set (model map) of torch IR modules
|
||||||
@@ -224,7 +224,7 @@ def get_unweighted_text_embeddings(
|
|||||||
text_embedding = text_embedding[:, 1:-1]
|
text_embedding = text_embedding[:, 1:-1]
|
||||||
|
|
||||||
text_embeddings.append(text_embedding)
|
text_embeddings.append(text_embedding)
|
||||||
# SHARK: Convert the result to tensor
|
# AMDSHARK: Convert the result to tensor
|
||||||
# text_embeddings = torch.concat(text_embeddings, axis=1)
|
# text_embeddings = torch.concat(text_embeddings, axis=1)
|
||||||
text_embeddings_np = np.concatenate(np.array(text_embeddings))
|
text_embeddings_np = np.concatenate(np.array(text_embeddings))
|
||||||
text_embeddings = torch.from_numpy(text_embeddings_np)
|
text_embeddings = torch.from_numpy(text_embeddings_np)
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user