Migration to AMDShark (#2182)

Signed-off-by: pdhirajkumarprasad <dhirajp@amd.com>
This commit is contained in:
pdhirajkumarprasad
2025-11-20 12:52:07 +05:30
committed by GitHub
parent dba2c8a567
commit fe03539901
232 changed files with 1719 additions and 1719 deletions

View File

@@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
# Don't run this in everyone's forks. # Don't run this in everyone's forks.
if: github.repository == 'nod-ai/SHARK' if: github.repository == 'nod-ai/AMDSHARK'
steps: steps:
- name: Checking out repository - name: Checking out repository
@@ -18,7 +18,7 @@ jobs:
with: with:
token: ${{ secrets.NODAI_INVOCATION_TOKEN }} token: ${{ secrets.NODAI_INVOCATION_TOKEN }}
- name: Run scrape releases script - name: Run scrape releases script
run: python ./build_tools/scrape_releases.py nod-ai SHARK > /tmp/index.html run: python ./build_tools/scrape_releases.py nod-ai AMDSHARK > /tmp/index.html
shell: bash shell: bash
- run: git fetch --all - run: git fetch --all
- run: git switch github-pages - run: git switch github-pages

View File

@@ -40,9 +40,9 @@ jobs:
GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }} GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
with: with:
tag: ${{ env.tag_name }} tag: ${{ env.tag_name }}
name: nod.ai SHARK ${{ env.tag_name }} name: nod.ai AMDSHARK ${{ env.tag_name }}
body: | body: |
Automatic snapshot release of nod.ai SHARK. Automatic snapshot release of nod.ai AMDSHARK.
draft: true draft: true
prerelease: true prerelease: true
@@ -51,12 +51,12 @@ jobs:
run: | run: |
./setup_venv.ps1 ./setup_venv.ps1
python process_skipfiles.py python process_skipfiles.py
$env:SHARK_PACKAGE_VERSION=${{ env.package_version }} $env:AMDSHARK_PACKAGE_VERSION=${{ env.package_version }}
pip install -e . pip install -e .
pip freeze -l pip freeze -l
pyinstaller .\apps\shark_studio\shark_studio.spec pyinstaller .\apps\amdshark_studio\amdshark_studio.spec
mv ./dist/nodai_shark_studio.exe ./dist/nodai_shark_studio_${{ env.package_version_ }}.exe mv ./dist/nodai_amdshark_studio.exe ./dist/nodai_amdshark_studio_${{ env.package_version_ }}.exe
signtool sign /f c:\g\shark_02152023.cer /fd certHash /csp "eToken Base Cryptographic Provider" /k "${{ secrets.CI_CERT }}" ./dist/nodai_shark_studio_${{ env.package_version_ }}.exe signtool sign /f c:\g\amdshark_02152023.cer /fd certHash /csp "eToken Base Cryptographic Provider" /k "${{ secrets.CI_CERT }}" ./dist/nodai_amdshark_studio_${{ env.package_version_ }}.exe
- name: Upload Release Assets - name: Upload Release Assets
id: upload-release-assets id: upload-release-assets

View File

@@ -1,19 +1,19 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: Validate Shark Studio name: Validate AMDShark Studio
on: on:
push: push:
branches: [ main ] branches: [ main ]
paths-ignore: paths-ignore:
- '**.md' - '**.md'
- 'shark/examples/**' - 'amdshark/examples/**'
pull_request: pull_request:
branches: [ main ] branches: [ main ]
paths-ignore: paths-ignore:
- '**.md' - '**.md'
- 'shark/examples/**' - 'amdshark/examples/**'
workflow_dispatch: workflow_dispatch:
# Ensure that only a single job or workflow using the same # Ensure that only a single job or workflow using the same
@@ -66,7 +66,7 @@ jobs:
run: | run: |
# black format check # black format check
black --version black --version
black --check apps/shark_studio black --check apps/amdshark_studio
# stop the build if there are Python syntax errors or undefined names # stop the build if there are Python syntax errors or undefined names
flake8 . --statistics flake8 . --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
@@ -77,9 +77,9 @@ jobs:
if: matrix.suite == 'cpu' if: matrix.suite == 'cpu'
run: | run: |
cd $GITHUB_WORKSPACE cd $GITHUB_WORKSPACE
python${{ matrix.python-version }} -m venv shark.venv python${{ matrix.python-version }} -m venv amdshark.venv
source shark.venv/bin/activate source amdshark.venv/bin/activate
pip install -r requirements.txt --no-cache-dir pip install -r requirements.txt --no-cache-dir
pip install -e . pip install -e .
# Disabled due to hang when exporting test llama2 # Disabled due to hang when exporting test llama2
# python apps/shark_studio/tests/api_test.py # python apps/amdshark_studio/tests/api_test.py

8
.gitignore vendored
View File

@@ -164,15 +164,15 @@ cython_debug/
# vscode related # vscode related
.vscode .vscode
# Shark related artifacts # AMDShark related artifacts
*venv/ *venv/
shark_tmp/ amdshark_tmp/
*.vmfb *.vmfb
.use-iree .use-iree
tank/dict_configs.py tank/dict_configs.py
*.csv *.csv
reproducers/ reproducers/
apps/shark_studio/web/configs apps/amdshark_studio/web/configs
# ORT related artefacts # ORT related artefacts
cache_models/ cache_models/
@@ -189,7 +189,7 @@ variants.json
# models folder # models folder
apps/stable_diffusion/web/models/ apps/stable_diffusion/web/models/
# model artifacts (SHARK) # model artifacts (AMDSHARK)
*.tempfile *.tempfile
*.mlir *.mlir
*.vmfb *.vmfb

6
.gitmodules vendored
View File

@@ -1,4 +1,4 @@
[submodule "inference/thirdparty/shark-runtime"] [submodule "inference/thirdparty/amdshark-runtime"]
path = inference/thirdparty/shark-runtime path = inference/thirdparty/amdshark-runtime
url =https://github.com/nod-ai/SRT.git url =https://github.com/nod-ai/SRT.git
branch = shark-06032022 branch = amdshark-06032022

118
README.md
View File

@@ -1,12 +1,12 @@
# SHARK # AMDSHARK
High Performance Machine Learning Distribution High Performance Machine Learning Distribution
<h2>NOTE: This project is not currently maintained.</h2> <h2>NOTE: This project is not currently maintained.</h2>
*The latest versions of this project are developments towards a refactor on top of IREE-Turbine. Until further notice, make sure you use an .exe release or a checkout of the `SHARK-1.0` branch, for a working SHARK-Studio* *The latest versions of this project are developments towards a refactor on top of IREE-Turbine. Until further notice, make sure you use an .exe release or a checkout of the `AMDSHARK-1.0` branch, for a working AMDSHARK-Studio*
[![Nightly Release](https://github.com/nod-ai/SHARK-Studio/actions/workflows/nightly.yml/badge.svg)](https://github.com/nod-ai/SHARK-Studio/actions/workflows/nightly.yml) [![Nightly Release](https://github.com/nod-ai/AMDSHARK-Studio/actions/workflows/nightly.yml/badge.svg)](https://github.com/nod-ai/AMDSHARK-Studio/actions/workflows/nightly.yml)
<details> <details>
<summary>Prerequisites - Drivers </summary> <summary>Prerequisites - Drivers </summary>
@@ -25,11 +25,11 @@ Other users please ensure you have your latest vendor drivers and Vulkan SDK fro
### Quick Start for SHARK Stable Diffusion for Windows 10/11 Users ### Quick Start for AMDSHARK Stable Diffusion for Windows 10/11 Users
Install the Driver from [Prerequisites](https://github.com/nod-ai/SHARK-Studio#install-your-hardware-drivers) above Install the Driver from [Prerequisites](https://github.com/nod-ai/AMDSHARK-Studio#install-your-hardware-drivers) above
Download the [stable release](https://github.com/nod-ai/SHARK-Studio/releases/latest) or the most recent [SHARK 1.0 pre-release](https://github.com/nod-ai/SHARK-Studio/releases). Download the [stable release](https://github.com/nod-ai/AMDSHARK-Studio/releases/latest) or the most recent [AMDSHARK 1.0 pre-release](https://github.com/nod-ai/AMDSHARK-Studio/releases).
Double click the .exe, or [run from the command line](#running) (recommended), and you should have the [UI](http://localhost:8080/) in the browser. Double click the .exe, or [run from the command line](#running) (recommended), and you should have the [UI](http://localhost:8080/) in the browser.
@@ -67,16 +67,16 @@ Enjoy.
## Check out the code ## Check out the code
```shell ```shell
git clone https://github.com/nod-ai/SHARK.git git clone https://github.com/nod-ai/AMDSHARK.git
cd SHARK cd AMDSHARK
``` ```
## Switch to the Correct Branch (IMPORTANT!) ## Switch to the Correct Branch (IMPORTANT!)
Currently SHARK is being rebuilt for [Turbine](https://github.com/iree-org/iree-turbine) on the `main` branch. For now you are strongly discouraged from using `main` unless you are working on the rebuild effort, and should not expect the code there to produce a working application for Image Generation, So for now you'll need switch over to the `SHARK-1.0` branch and use the stable code. Currently AMDSHARK is being rebuilt for [Turbine](https://github.com/iree-org/iree-turbine) on the `main` branch. For now you are strongly discouraged from using `main` unless you are working on the rebuild effort, and should not expect the code there to produce a working application for Image Generation, So for now you'll need switch over to the `AMDSHARK-1.0` branch and use the stable code.
```shell ```shell
git checkout SHARK-1.0 git checkout AMDSHARK-1.0
``` ```
The following setup instructions assume you are on this branch. The following setup instructions assume you are on this branch.
@@ -92,7 +92,7 @@ The following setup instructions assume you are on this branch.
set-executionpolicy remotesigned set-executionpolicy remotesigned
``` ```
#### Setup venv and install necessary packages (torch-mlir, nodLabs/Shark, ...) #### Setup venv and install necessary packages (torch-mlir, nodLabs/AMDShark, ...)
```powershell ```powershell
./setup_venv.ps1 #You can re-run this script to get the latest version ./setup_venv.ps1 #You can re-run this script to get the latest version
``` ```
@@ -101,20 +101,20 @@ set-executionpolicy remotesigned
```shell ```shell
./setup_venv.sh ./setup_venv.sh
source shark1.venv/bin/activate source amdshark1.venv/bin/activate
``` ```
### Run Stable Diffusion on your device - WebUI ### Run Stable Diffusion on your device - WebUI
#### Windows 10/11 Users #### Windows 10/11 Users
```powershell ```powershell
(shark1.venv) PS C:\g\shark> cd .\apps\stable_diffusion\web\ (amdshark1.venv) PS C:\g\amdshark> cd .\apps\stable_diffusion\web\
(shark1.venv) PS C:\g\shark\apps\stable_diffusion\web> python .\index.py (amdshark1.venv) PS C:\g\amdshark\apps\stable_diffusion\web> python .\index.py
``` ```
#### Linux / macOS Users #### Linux / macOS Users
```shell ```shell
(shark1.venv) > cd apps/stable_diffusion/web (amdshark1.venv) > cd apps/stable_diffusion/web
(shark1.venv) > python index.py (amdshark1.venv) > python index.py
``` ```
#### Access Stable Diffusion on http://localhost:8080/?__theme=dark #### Access Stable Diffusion on http://localhost:8080/?__theme=dark
@@ -128,7 +128,7 @@ source shark1.venv/bin/activate
#### Windows 10/11 Users #### Windows 10/11 Users
```powershell ```powershell
(shark1.venv) PS C:\g\shark> python .\apps\stable_diffusion\scripts\main.py --app="txt2img" --precision="fp16" --prompt="tajmahal, snow, sunflowers, oil on canvas" --device="vulkan" (amdshark1.venv) PS C:\g\amdshark> python .\apps\stable_diffusion\scripts\main.py --app="txt2img" --precision="fp16" --prompt="tajmahal, snow, sunflowers, oil on canvas" --device="vulkan"
``` ```
#### Linux / macOS Users #### Linux / macOS Users
@@ -156,7 +156,7 @@ Here are some samples generated:
![a photo of a crab playing a trumpet](https://user-images.githubusercontent.com/74956/204933258-252e7240-8548-45f7-8253-97647d38313d.jpg) ![a photo of a crab playing a trumpet](https://user-images.githubusercontent.com/74956/204933258-252e7240-8548-45f7-8253-97647d38313d.jpg)
Find us on [SHARK Discord server](https://discord.gg/RUqY2h2s9u) if you have any trouble with running it on your hardware. Find us on [AMDSHARK Discord server](https://discord.gg/RUqY2h2s9u) if you have any trouble with running it on your hardware.
<details> <details>
@@ -168,8 +168,8 @@ This step sets up a new VirtualEnv for Python
```shell ```shell
python --version #Check you have 3.11 on Linux, macOS or Windows Powershell python --version #Check you have 3.11 on Linux, macOS or Windows Powershell
python -m venv shark_venv python -m venv amdshark_venv
source shark_venv/bin/activate # Use shark_venv/Scripts/activate on Windows source amdshark_venv/bin/activate # Use amdshark_venv/Scripts/activate on Windows
# If you are using conda create and activate a new conda env # If you are using conda create and activate a new conda env
@@ -179,15 +179,15 @@ python -m pip install --upgrade pip
*macOS Metal* users please install https://sdk.lunarg.com/sdk/download/latest/mac/vulkan-sdk.dmg and enable "System wide install" *macOS Metal* users please install https://sdk.lunarg.com/sdk/download/latest/mac/vulkan-sdk.dmg and enable "System wide install"
### Install SHARK ### Install AMDSHARK
This step pip installs SHARK and related packages on Linux Python 3.8, 3.10 and 3.11 and macOS / Windows Python 3.11 This step pip installs AMDSHARK and related packages on Linux Python 3.8, 3.10 and 3.11 and macOS / Windows Python 3.11
```shell ```shell
pip install nodai-shark -f https://nod-ai.github.io/SHARK/package-index/ -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SRT/pip-release-links.html --extra-index-url https://download.pytorch.org/whl/nightly/cpu pip install nodai-amdshark -f https://nod-ai.github.io/AMDSHARK/package-index/ -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SRT/pip-release-links.html --extra-index-url https://download.pytorch.org/whl/nightly/cpu
``` ```
### Run shark tank model tests. ### Run amdshark tank model tests.
```shell ```shell
pytest tank/test_models.py pytest tank/test_models.py
``` ```
@@ -196,7 +196,7 @@ See tank/README.md for a more detailed walkthrough of our pytest suite and CLI.
### Download and run Resnet50 sample ### Download and run Resnet50 sample
```shell ```shell
curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/resnet50_script.py curl -O https://raw.githubusercontent.com/nod-ai/AMDSHARK/main/amdshark/examples/amdshark_inference/resnet50_script.py
#Install deps for test script #Install deps for test script
pip install --pre torch torchvision torchaudio tqdm pillow gsutil --extra-index-url https://download.pytorch.org/whl/nightly/cpu pip install --pre torch torchvision torchaudio tqdm pillow gsutil --extra-index-url https://download.pytorch.org/whl/nightly/cpu
python ./resnet50_script.py --device="cpu" #use cuda or vulkan or metal python ./resnet50_script.py --device="cpu" #use cuda or vulkan or metal
@@ -204,7 +204,7 @@ python ./resnet50_script.py --device="cpu" #use cuda or vulkan or metal
### Download and run BERT (MiniLM) sample ### Download and run BERT (MiniLM) sample
```shell ```shell
curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/minilm_jit.py curl -O https://raw.githubusercontent.com/nod-ai/AMDSHARK/main/amdshark/examples/amdshark_inference/minilm_jit.py
#Install deps for test script #Install deps for test script
pip install transformers torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu pip install transformers torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu
python ./minilm_jit.py --device="cpu" #use cuda or vulkan or metal python ./minilm_jit.py --device="cpu" #use cuda or vulkan or metal
@@ -222,34 +222,34 @@ Set `USE_IREE=1` to use upstream IREE
# PYTHON=python3.11 VENV_DIR=0617_venv IMPORTER=1 ./setup_venv.sh # PYTHON=python3.11 VENV_DIR=0617_venv IMPORTER=1 ./setup_venv.sh
``` ```
### Run any of the hundreds of SHARK tank models via the test framework ### Run any of the hundreds of AMDSHARK tank models via the test framework
```shell ```shell
python -m shark.examples.shark_inference.resnet50_script --device="cpu" # Use gpu | vulkan python -m amdshark.examples.amdshark_inference.resnet50_script --device="cpu" # Use gpu | vulkan
# Or a pytest # Or a pytest
pytest tank/test_models.py -k "MiniLM" pytest tank/test_models.py -k "MiniLM"
``` ```
### How to use your locally built IREE / Torch-MLIR with SHARK ### How to use your locally built IREE / Torch-MLIR with AMDSHARK
If you are a *Torch-mlir developer or an IREE developer* and want to test local changes you can uninstall If you are a *Torch-mlir developer or an IREE developer* and want to test local changes you can uninstall
the provided packages with `pip uninstall torch-mlir` and / or `pip uninstall iree-compiler iree-runtime` and build locally the provided packages with `pip uninstall torch-mlir` and / or `pip uninstall iree-compiler iree-runtime` and build locally
with Python bindings and set your PYTHONPATH as mentioned [here](https://github.com/iree-org/iree/tree/main/docs/api_docs/python#install-iree-binaries) with Python bindings and set your PYTHONPATH as mentioned [here](https://github.com/iree-org/iree/tree/main/docs/api_docs/python#install-iree-binaries)
for IREE and [here](https://github.com/llvm/torch-mlir/blob/main/development.md#setup-python-environment-to-export-the-built-python-packages) for IREE and [here](https://github.com/llvm/torch-mlir/blob/main/development.md#setup-python-environment-to-export-the-built-python-packages)
for Torch-MLIR. for Torch-MLIR.
How to use your locally built Torch-MLIR with SHARK: How to use your locally built Torch-MLIR with AMDSHARK:
```shell ```shell
1.) Run `./setup_venv.sh in SHARK` and activate `shark.venv` virtual env. 1.) Run `./setup_venv.sh in AMDSHARK` and activate `amdshark.venv` virtual env.
2.) Run `pip uninstall torch-mlir`. 2.) Run `pip uninstall torch-mlir`.
3.) Go to your local Torch-MLIR directory. 3.) Go to your local Torch-MLIR directory.
4.) Activate mlir_venv virtual envirnoment. 4.) Activate mlir_venv virtual envirnoment.
5.) Run `pip uninstall -r requirements.txt`. 5.) Run `pip uninstall -r requirements.txt`.
6.) Run `pip install -r requirements.txt`. 6.) Run `pip install -r requirements.txt`.
7.) Build Torch-MLIR. 7.) Build Torch-MLIR.
8.) Activate shark.venv virtual environment from the Torch-MLIR directory. 8.) Activate amdshark.venv virtual environment from the Torch-MLIR directory.
8.) Run `export PYTHONPATH=`pwd`/build/tools/torch-mlir/python_packages/torch_mlir:`pwd`/examples` in the Torch-MLIR directory. 8.) Run `export PYTHONPATH=`pwd`/build/tools/torch-mlir/python_packages/torch_mlir:`pwd`/examples` in the Torch-MLIR directory.
9.) Go to the SHARK directory. 9.) Go to the AMDSHARK directory.
``` ```
Now the SHARK will use your locally build Torch-MLIR repo. Now the AMDSHARK will use your locally build Torch-MLIR repo.
## Benchmarking Dispatches ## Benchmarking Dispatches
@@ -263,10 +263,10 @@ pytest -k "MiniLM and torch and static and cuda" --benchmark_dispatches=All -s -
``` ```
The given command will populate `<dispatch_benchmarks_dir>/<model_name>/` with an `ordered_dispatches.txt` that lists and orders the dispatches and their latencies, as well as folders for each dispatch that contain .mlir, .vmfb, and results of the benchmark for that dispatch. The given command will populate `<dispatch_benchmarks_dir>/<model_name>/` with an `ordered_dispatches.txt` that lists and orders the dispatches and their latencies, as well as folders for each dispatch that contain .mlir, .vmfb, and results of the benchmark for that dispatch.
if you want to instead incorporate this into a python script, you can pass the `dispatch_benchmarks` and `dispatch_benchmarks_dir` commands when initializing `SharkInference`, and the benchmarks will be generated when compiled. E.G: if you want to instead incorporate this into a python script, you can pass the `dispatch_benchmarks` and `dispatch_benchmarks_dir` commands when initializing `AMDSharkInference`, and the benchmarks will be generated when compiled. E.G:
``` ```
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_model, mlir_model,
device=args.device, device=args.device,
mlir_dialect="tm_tensor", mlir_dialect="tm_tensor",
@@ -285,34 +285,34 @@ Output will include:
- A .txt file containing benchmark output - A .txt file containing benchmark output
See tank/README.md for further instructions on how to run model tests and benchmarks from the SHARK tank. See tank/README.md for further instructions on how to run model tests and benchmarks from the AMDSHARK tank.
</details> </details>
<details> <details>
<summary>API Reference</summary> <summary>API Reference</summary>
### Shark Inference API ### AMDShark Inference API
``` ```
from shark.shark_importer import SharkImporter from amdshark.amdshark_importer import AMDSharkImporter
# SharkImporter imports mlir file from the torch, tensorflow or tf-lite module. # AMDSharkImporter imports mlir file from the torch, tensorflow or tf-lite module.
mlir_importer = SharkImporter( mlir_importer = AMDSharkImporter(
torch_module, torch_module,
(input), (input),
frontend="torch", #tf, #tf-lite frontend="torch", #tf, #tf-lite
) )
torch_mlir, func_name = mlir_importer.import_mlir(tracing_required=True) torch_mlir, func_name = mlir_importer.import_mlir(tracing_required=True)
# SharkInference accepts mlir in linalg, mhlo, and tosa dialect. # AMDSharkInference accepts mlir in linalg, mhlo, and tosa dialect.
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
shark_module = SharkInference(torch_mlir, device="cpu", mlir_dialect="linalg") amdshark_module = AMDSharkInference(torch_mlir, device="cpu", mlir_dialect="linalg")
shark_module.compile() amdshark_module.compile()
result = shark_module.forward((input)) result = amdshark_module.forward((input))
``` ```
@@ -320,7 +320,7 @@ result = shark_module.forward((input))
### Example demonstrating running MHLO IR. ### Example demonstrating running MHLO IR.
``` ```
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
import numpy as np import numpy as np
mhlo_ir = r"""builtin.module { mhlo_ir = r"""builtin.module {
@@ -333,22 +333,22 @@ mhlo_ir = r"""builtin.module {
arg0 = np.ones((1, 4)).astype(np.float32) arg0 = np.ones((1, 4)).astype(np.float32)
arg1 = np.ones((4, 1)).astype(np.float32) arg1 = np.ones((4, 1)).astype(np.float32)
shark_module = SharkInference(mhlo_ir, device="cpu", mlir_dialect="mhlo") amdshark_module = AMDSharkInference(mhlo_ir, device="cpu", mlir_dialect="mhlo")
shark_module.compile() amdshark_module.compile()
result = shark_module.forward((arg0, arg1)) result = amdshark_module.forward((arg0, arg1))
``` ```
</details> </details>
## Examples Using the REST API ## Examples Using the REST API
* [Setting up SHARK for use with Blender](./docs/shark_sd_blender.md) * [Setting up AMDSHARK for use with Blender](./docs/amdshark_sd_blender.md)
* [Setting up SHARK for use with Koboldcpp](./docs/shark_sd_koboldcpp.md) * [Setting up AMDSHARK for use with Koboldcpp](./docs/amdshark_sd_koboldcpp.md)
## Supported and Validated Models ## Supported and Validated Models
SHARK is maintained to support the latest innovations in ML Models: AMDSHARK is maintained to support the latest innovations in ML Models:
| TF HuggingFace Models | SHARK-CPU | SHARK-CUDA | SHARK-METAL | | TF HuggingFace Models | AMDSHARK-CPU | AMDSHARK-CUDA | AMDSHARK-METAL |
|---------------------|----------|----------|-------------| |---------------------|----------|----------|-------------|
| BERT | :green_heart: | :green_heart: | :green_heart: | | BERT | :green_heart: | :green_heart: | :green_heart: |
| DistilBERT | :green_heart: | :green_heart: | :green_heart: | | DistilBERT | :green_heart: | :green_heart: | :green_heart: |
@@ -358,12 +358,12 @@ SHARK is maintained to support the latest innovations in ML Models:
| Vision Transformer | :green_heart: | :green_heart: | :green_heart: | | Vision Transformer | :green_heart: | :green_heart: | :green_heart: |
| ResNet50 | :green_heart: | :green_heart: | :green_heart: | | ResNet50 | :green_heart: | :green_heart: | :green_heart: |
For a complete list of the models supported in SHARK, please refer to [tank/README.md](https://github.com/nod-ai/SHARK-Studio/blob/main/tank/README.md). For a complete list of the models supported in AMDSHARK, please refer to [tank/README.md](https://github.com/nod-ai/AMDSHARK-Studio/blob/main/tank/README.md).
## Communication Channels ## Communication Channels
* [SHARK Discord server](https://discord.gg/RUqY2h2s9u): Real time discussions with the SHARK team and other users * [AMDSHARK Discord server](https://discord.gg/RUqY2h2s9u): Real time discussions with the AMDSHARK team and other users
* [GitHub issues](https://github.com/nod-ai/SHARK-Studio/issues): Feature requests, bugs etc * [GitHub issues](https://github.com/nod-ai/AMDSHARK-Studio/issues): Feature requests, bugs etc
## Related Projects ## Related Projects
@@ -385,10 +385,10 @@ For a complete list of the models supported in SHARK, please refer to [tank/READ
* Torch-MLIR Github issues [here](https://github.com/llvm/torch-mlir/issues) * Torch-MLIR Github issues [here](https://github.com/llvm/torch-mlir/issues)
* [`torch-mlir` section](https://llvm.discourse.group/c/projects-that-want-to-become-official-llvm-projects/torch-mlir/41) of LLVM Discourse * [`torch-mlir` section](https://llvm.discourse.group/c/projects-that-want-to-become-official-llvm-projects/torch-mlir/41) of LLVM Discourse
* Weekly meetings on Mondays 9AM PST. See [here](https://discourse.llvm.org/t/community-meeting-developer-hour-refactoring-recurring-meetings/62575) for more information. * Weekly meetings on Mondays 9AM PST. See [here](https://discourse.llvm.org/t/community-meeting-developer-hour-refactoring-recurring-meetings/62575) for more information.
* [MLIR topic within LLVM Discourse](https://llvm.discourse.group/c/llvm-project/mlir/31) SHARK and IREE is enabled by and heavily relies on [MLIR](https://mlir.llvm.org). * [MLIR topic within LLVM Discourse](https://llvm.discourse.group/c/llvm-project/mlir/31) AMDSHARK and IREE is enabled by and heavily relies on [MLIR](https://mlir.llvm.org).
</details> </details>
## License ## License
nod.ai SHARK is licensed under the terms of the Apache 2.0 License with LLVM Exceptions. nod.ai AMDSHARK is licensed under the terms of the Apache 2.0 License with LLVM Exceptions.
See [LICENSE](LICENSE) for more information. See [LICENSE](LICENSE) for more information.

28
amdshark/__init__.py Normal file
View File

@@ -0,0 +1,28 @@
import importlib
import logging
from torch._dynamo import register_backend
log = logging.getLogger(__name__)
@register_backend
def amdshark(model, inputs, *, options):
try:
from amdshark.dynamo_backend.utils import AMDSharkBackend
except ImportError:
log.exception(
"Unable to import AMDSHARK - High Performance Machine Learning Distribution"
"Please install the right version of AMDSHARK that matches the PyTorch version being used. "
"Refer to https://github.com/nod-ai/AMDSHARK-Studio/ for details."
)
raise
return AMDSharkBackend(model, inputs, options)
def has_amdshark():
try:
importlib.import_module("amdshark")
return True
except ImportError:
return False

View File

@@ -12,17 +12,17 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from shark.shark_runner import SharkRunner from amdshark.amdshark_runner import AMDSharkRunner
from shark.iree_utils.compile_utils import ( from amdshark.iree_utils.compile_utils import (
export_iree_module_to_vmfb, export_iree_module_to_vmfb,
load_flatbuffer, load_flatbuffer,
get_iree_runtime_config, get_iree_runtime_config,
) )
from shark.iree_utils.benchmark_utils import ( from amdshark.iree_utils.benchmark_utils import (
build_benchmark_args, build_benchmark_args,
run_benchmark_module, run_benchmark_module,
) )
from shark.parser import shark_args from amdshark.parser import amdshark_args
from datetime import datetime from datetime import datetime
import time import time
from typing import Optional from typing import Optional
@@ -67,8 +67,8 @@ def check_requirements(frontend):
return has_pkgs return has_pkgs
class SharkBenchmarkRunner(SharkRunner): class AMDSharkBenchmarkRunner(AMDSharkRunner):
# SharkRunner derived class with Benchmarking capabilities. # AMDSharkRunner derived class with Benchmarking capabilities.
def __init__( def __init__(
self, self,
mlir_module: bytes, mlir_module: bytes,
@@ -76,8 +76,8 @@ class SharkBenchmarkRunner(SharkRunner):
mlir_dialect: str = "linalg", mlir_dialect: str = "linalg",
extra_args: list = [], extra_args: list = [],
): ):
self.device = shark_args.device if device == "none" else device self.device = amdshark_args.device if device == "none" else device
self.enable_tf32 = shark_args.enable_tf32 self.enable_tf32 = amdshark_args.enable_tf32
self.frontend_model = None self.frontend_model = None
self.vmfb_file = None self.vmfb_file = None
self.mlir_dialect = mlir_dialect self.mlir_dialect = mlir_dialect
@@ -86,12 +86,12 @@ class SharkBenchmarkRunner(SharkRunner):
self.temp_file_to_unlink = None self.temp_file_to_unlink = None
if not os.path.isfile(mlir_module): if not os.path.isfile(mlir_module):
print( print(
"Warning: Initializing SharkRunner with a mlir string/bytecode object will duplicate the model in RAM at compile time. To avoid this, initialize SharkInference with a path to a MLIR module on your hard disk instead." "Warning: Initializing AMDSharkRunner with a mlir string/bytecode object will duplicate the model in RAM at compile time. To avoid this, initialize AMDSharkInference with a path to a MLIR module on your hard disk instead."
) )
self.compile_str = True self.compile_str = True
else: else:
self.compile_str = False self.compile_str = False
SharkRunner.__init__( AMDSharkRunner.__init__(
self, self,
mlir_module, mlir_module,
device, device,
@@ -157,13 +157,13 @@ class SharkBenchmarkRunner(SharkRunner):
frontend_model.cpu() frontend_model.cpu()
input.cpu() input.cpu()
for i in range(shark_args.num_warmup_iterations): for i in range(amdshark_args.num_warmup_iterations):
frontend_model.forward(input) frontend_model.forward(input)
if device == "cuda": if device == "cuda":
torch.cuda.reset_peak_memory_stats() torch.cuda.reset_peak_memory_stats()
begin = time.time() begin = time.time()
for i in range(shark_args.num_iterations): for i in range(amdshark_args.num_iterations):
out = frontend_model.forward(input) out = frontend_model.forward(input)
end = time.time() end = time.time()
if device == "cuda": if device == "cuda":
@@ -176,14 +176,14 @@ class SharkBenchmarkRunner(SharkRunner):
device_peak_b = None device_peak_b = None
print( print(
f"Torch benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}" f"Torch benchmark:{amdshark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{amdshark_args.num_iterations}"
) )
if device == "cuda": if device == "cuda":
# Set device to CPU so we don't run into segfaults exiting pytest subprocesses. # Set device to CPU so we don't run into segfaults exiting pytest subprocesses.
torch_device = torch.device("cpu") torch_device = torch.device("cpu")
return [ return [
f"{shark_args.num_iterations/(end-begin)}", f"{amdshark_args.num_iterations/(end-begin)}",
f"{((end-begin)/shark_args.num_iterations)*1000}", f"{((end-begin)/amdshark_args.num_iterations)*1000}",
"", # host_peak_b (CPU usage) is not reported by PyTorch. "", # host_peak_b (CPU usage) is not reported by PyTorch.
_bytes_to_mb_str(device_peak_b), _bytes_to_mb_str(device_peak_b),
] ]
@@ -217,13 +217,13 @@ class SharkBenchmarkRunner(SharkRunner):
)[:2] )[:2]
frontend_model = model frontend_model = model
for i in range(shark_args.num_warmup_iterations): for i in range(amdshark_args.num_warmup_iterations):
frontend_model.forward(*input) frontend_model.forward(*input)
if tf_device == TF_GPU_DEVICE: if tf_device == TF_GPU_DEVICE:
tf.config.experimental.reset_memory_stats(tf_device) tf.config.experimental.reset_memory_stats(tf_device)
begin = time.time() begin = time.time()
for i in range(shark_args.num_iterations): for i in range(amdshark_args.num_iterations):
out = frontend_model.forward(*input) out = frontend_model.forward(*input)
end = time.time() end = time.time()
if tf_device == TF_GPU_DEVICE: if tf_device == TF_GPU_DEVICE:
@@ -235,11 +235,11 @@ class SharkBenchmarkRunner(SharkRunner):
device_peak_b = None device_peak_b = None
print( print(
f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}" f"TF benchmark:{amdshark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{amdshark_args.num_iterations}"
) )
return [ return [
f"{shark_args.num_iterations/(end-begin)}", f"{amdshark_args.num_iterations/(end-begin)}",
f"{((end-begin)/shark_args.num_iterations)*1000}", f"{((end-begin)/amdshark_args.num_iterations)*1000}",
"", # host_peak_b (CPU usage) is not reported by TensorFlow. "", # host_peak_b (CPU usage) is not reported by TensorFlow.
_bytes_to_mb_str(device_peak_b), _bytes_to_mb_str(device_peak_b),
] ]
@@ -248,7 +248,7 @@ class SharkBenchmarkRunner(SharkRunner):
iter_per_second, host_peak_b, device_peak_b = run_benchmark_module( iter_per_second, host_peak_b, device_peak_b = run_benchmark_module(
self.benchmark_cl self.benchmark_cl
) )
print(f"Shark-IREE-C benchmark:{iter_per_second} iter/second") print(f"AMDShark-IREE-C benchmark:{iter_per_second} iter/second")
return [ return [
f"{iter_per_second}", f"{iter_per_second}",
f"{1000/iter_per_second}", f"{1000/iter_per_second}",
@@ -258,25 +258,25 @@ class SharkBenchmarkRunner(SharkRunner):
def benchmark_python(self, inputs): def benchmark_python(self, inputs):
input_list = [x for x in inputs] input_list = [x for x in inputs]
for i in range(shark_args.num_warmup_iterations): for i in range(amdshark_args.num_warmup_iterations):
self.run("forward", input_list) self.run("forward", input_list)
begin = time.time() begin = time.time()
for i in range(shark_args.num_iterations): for i in range(amdshark_args.num_iterations):
out = self.run("forward", input_list) out = self.run("forward", input_list)
end = time.time() end = time.time()
print( print(
f"Shark-IREE Python benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}" f"AMDShark-IREE Python benchmark:{amdshark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{amdshark_args.num_iterations}"
) )
return [ return [
f"{shark_args.num_iterations/(end-begin)}", f"{amdshark_args.num_iterations/(end-begin)}",
f"{((end-begin)/shark_args.num_iterations)*1000}", f"{((end-begin)/amdshark_args.num_iterations)*1000}",
] ]
def benchmark_onnx(self, modelname, inputs): def benchmark_onnx(self, modelname, inputs):
if self.device == "cuda": if self.device == "cuda":
print( print(
"Currently GPU benchmarking on ONNX is not supported in SHARK." "Currently GPU benchmarking on ONNX is not supported in AMDSHARK."
) )
return ["N/A", "N/A"] return ["N/A", "N/A"]
else: else:
@@ -325,7 +325,7 @@ for currently supported models. Exiting benchmark ONNX."
num_threads, num_threads,
batch_sizes, batch_sizes,
sequence_lengths, sequence_lengths,
shark_args.num_iterations, amdshark_args.num_iterations,
input_counts, input_counts,
optimize_onnx, optimize_onnx,
validate_onnx, validate_onnx,
@@ -340,7 +340,7 @@ for currently supported models. Exiting benchmark ONNX."
onnx_args, onnx_args,
) )
print( print(
f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}" f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{amdshark_args.num_iterations}"
) )
return [ return [
result[0]["QPS"], result[0]["QPS"],
@@ -408,13 +408,13 @@ for currently supported models. Exiting benchmark ONNX."
] ]
# "frontend" must be the first element. # "frontend" must be the first element.
if self.mode == "native": if self.mode == "native":
engines = ["shark_python", "shark_iree_c"] engines = ["amdshark_python", "amdshark_iree_c"]
if self.mode == "baseline": if self.mode == "baseline":
engines = ["frontend"] engines = ["frontend"]
if self.mode == "all": if self.mode == "all":
engines = ["frontend", "shark_python", "shark_iree_c"] engines = ["frontend", "amdshark_python", "amdshark_iree_c"]
if shark_args.onnx_bench == True: if amdshark_args.onnx_bench == True:
engines.append("onnxruntime") engines.append("onnxruntime")
if not os.path.exists("bench_results.csv"): if not os.path.exists("bench_results.csv"):
@@ -428,7 +428,7 @@ for currently supported models. Exiting benchmark ONNX."
bench_info["model"] = modelname bench_info["model"] = modelname
bench_info["batch_size"] = str(import_args["batch_size"]) bench_info["batch_size"] = str(import_args["batch_size"])
bench_info["dialect"] = self.mlir_dialect bench_info["dialect"] = self.mlir_dialect
bench_info["iterations"] = shark_args.num_iterations bench_info["iterations"] = amdshark_args.num_iterations
if dynamic == True: if dynamic == True:
bench_info["shape_type"] = "dynamic" bench_info["shape_type"] = "dynamic"
else: else:
@@ -462,8 +462,8 @@ for currently supported models. Exiting benchmark ONNX."
self.frontend_result = None self.frontend_result = None
continue continue
elif e == "shark_python": elif e == "amdshark_python":
engine_result["engine"] = "shark_python" engine_result["engine"] = "amdshark_python"
( (
engine_result["iter/sec"], engine_result["iter/sec"],
engine_result["ms/iter"], engine_result["ms/iter"],
@@ -475,8 +475,8 @@ for currently supported models. Exiting benchmark ONNX."
self.frontend_result, engine_result["ms/iter"] self.frontend_result, engine_result["ms/iter"]
) )
elif e == "shark_iree_c": elif e == "amdshark_iree_c":
engine_result["engine"] = "shark_iree_c" engine_result["engine"] = "amdshark_iree_c"
( (
engine_result["iter/sec"], engine_result["iter/sec"],
engine_result["ms/iter"], engine_result["ms/iter"],

View File

@@ -1,7 +1,7 @@
import os import os
import tempfile import tempfile
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
from shark.shark_importer import import_with_fx, save_mlir from amdshark.amdshark_importer import import_with_fx, save_mlir
import torch import torch
import torch_mlir import torch_mlir
from torch_mlir.compiler_utils import run_pipeline_with_repro_report from torch_mlir.compiler_utils import run_pipeline_with_repro_report
@@ -40,37 +40,37 @@ brevitas_matmul_rhs_group_quant_library = [
def load_vmfb(extended_model_name, device, mlir_dialect, extra_args=[]): def load_vmfb(extended_model_name, device, mlir_dialect, extra_args=[]):
vmfb_path = os.path.join(os.getcwd(), extended_model_name + ".vmfb") vmfb_path = os.path.join(os.getcwd(), extended_model_name + ".vmfb")
shark_module = None amdshark_module = None
if os.path.isfile(vmfb_path): if os.path.isfile(vmfb_path):
shark_module = SharkInference( amdshark_module = AMDSharkInference(
None, None,
device=device, device=device,
mlir_dialect=mlir_dialect, mlir_dialect=mlir_dialect,
) )
print(f"loading existing vmfb from: {vmfb_path}") print(f"loading existing vmfb from: {vmfb_path}")
shark_module.load_module(vmfb_path, extra_args=extra_args) amdshark_module.load_module(vmfb_path, extra_args=extra_args)
return shark_module return amdshark_module
def compile_module( def compile_module(
shark_module, extended_model_name, generate_vmfb, extra_args=[] amdshark_module, extended_model_name, generate_vmfb, extra_args=[]
): ):
if generate_vmfb: if generate_vmfb:
vmfb_path = os.path.join(os.getcwd(), extended_model_name + ".vmfb") vmfb_path = os.path.join(os.getcwd(), extended_model_name + ".vmfb")
if os.path.isfile(vmfb_path): if os.path.isfile(vmfb_path):
print(f"loading existing vmfb from: {vmfb_path}") print(f"loading existing vmfb from: {vmfb_path}")
shark_module.load_module(vmfb_path, extra_args=extra_args) amdshark_module.load_module(vmfb_path, extra_args=extra_args)
else: else:
print( print(
"No vmfb found. Compiling and saving to {}".format(vmfb_path) "No vmfb found. Compiling and saving to {}".format(vmfb_path)
) )
path = shark_module.save_module( path = amdshark_module.save_module(
os.getcwd(), extended_model_name, extra_args os.getcwd(), extended_model_name, extra_args
) )
shark_module.load_module(path, extra_args=extra_args) amdshark_module.load_module(path, extra_args=extra_args)
else: else:
shark_module.compile(extra_args) amdshark_module.compile(extra_args)
return shark_module return amdshark_module
def compile_int_precision( def compile_int_precision(
@@ -139,7 +139,7 @@ def compile_int_precision(
del mlir_module del mlir_module
print(f"Elided IR written for {extended_model_name}") print(f"Elided IR written for {extended_model_name}")
return bytecode_path return bytecode_path
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_module=bytecode_path, device=device, mlir_dialect="tm_tensor" mlir_module=bytecode_path, device=device, mlir_dialect="tm_tensor"
) )
extra_args = [ extra_args = [
@@ -150,7 +150,7 @@ def compile_int_precision(
] ]
return ( return (
compile_module( compile_module(
shark_module, amdshark_module,
extended_model_name=extended_model_name, extended_model_name=extended_model_name,
generate_vmfb=generate_vmfb, generate_vmfb=generate_vmfb,
extra_args=extra_args, extra_args=extra_args,
@@ -159,7 +159,7 @@ def compile_int_precision(
) )
def shark_compile_through_fx( def amdshark_compile_through_fx(
model, model,
inputs, inputs,
extended_model_name, extended_model_name,
@@ -174,22 +174,22 @@ def shark_compile_through_fx(
): ):
is_f16 = precision == "fp16" is_f16 = precision == "fp16"
if generate_or_load_vmfb: if generate_or_load_vmfb:
shark_module = load_vmfb( amdshark_module = load_vmfb(
extended_model_name=extended_model_name, extended_model_name=extended_model_name,
device=device, device=device,
mlir_dialect=mlir_dialect, mlir_dialect=mlir_dialect,
extra_args=extra_args, extra_args=extra_args,
) )
if shark_module: if amdshark_module:
return ( return (
shark_module, amdshark_module,
None, None,
) )
from shark.parser import shark_args from amdshark.parser import amdshark_args
if "cuda" in device: if "cuda" in device:
shark_args.enable_tf32 = True amdshark_args.enable_tf32 = True
if precision in ["int4", "int8"]: if precision in ["int4", "int8"]:
mlir_module = compile_int_precision( mlir_module = compile_int_precision(
@@ -225,14 +225,14 @@ def shark_compile_through_fx(
mlir_dialect=mlir_dialect, mlir_dialect=mlir_dialect,
) )
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_module, mlir_module,
device=device, device=device,
mlir_dialect=mlir_dialect, mlir_dialect=mlir_dialect,
) )
return ( return (
compile_module( compile_module(
shark_module, amdshark_module,
extended_model_name, extended_model_name,
generate_vmfb=generate_or_load_vmfb, generate_vmfb=generate_or_load_vmfb,
extra_args=extra_args, extra_args=extra_args,

View File

@@ -1,8 +1,8 @@
# Lint as: python3 # Lint as: python3
"""SHARK Downloader""" """AMDSHARK Downloader"""
# Requirements : Put shark_tank in SHARK directory # Requirements : Put amdshark_tank in AMDSHARK directory
# /SHARK # /AMDSHARK
# /gen_shark_tank # /gen_amdshark_tank
# /tflite # /tflite
# /albert_lite_base # /albert_lite_base
# /...model_name... # /...model_name...
@@ -17,7 +17,7 @@ import os
from tqdm.std import tqdm from tqdm.std import tqdm
import sys import sys
from pathlib import Path from pathlib import Path
from shark.parser import shark_args from amdshark.parser import amdshark_args
from google.cloud import storage from google.cloud import storage
@@ -83,8 +83,8 @@ input_type_to_np_dtype = {
# Save the model in the home local so it needn't be fetched everytime in the CI. # Save the model in the home local so it needn't be fetched everytime in the CI.
home = str(Path.home()) home = str(Path.home())
alt_path = os.path.join(os.path.dirname(__file__), "../gen_shark_tank/") alt_path = os.path.join(os.path.dirname(__file__), "../gen_amdshark_tank/")
custom_path = shark_args.local_tank_cache custom_path = amdshark_args.local_tank_cache
if custom_path is not None: if custom_path is not None:
if not os.path.exists(custom_path): if not os.path.exists(custom_path):
@@ -92,17 +92,17 @@ if custom_path is not None:
WORKDIR = custom_path WORKDIR = custom_path
print(f"Using {WORKDIR} as local shark_tank cache directory.") print(f"Using {WORKDIR} as local amdshark_tank cache directory.")
elif os.path.exists(alt_path): elif os.path.exists(alt_path):
WORKDIR = alt_path WORKDIR = alt_path
print( print(
f"Using {WORKDIR} as shark_tank directory. Delete this directory if you aren't working from locally generated shark_tank." f"Using {WORKDIR} as amdshark_tank directory. Delete this directory if you aren't working from locally generated amdshark_tank."
) )
else: else:
WORKDIR = os.path.join(home, ".local/shark_tank/") WORKDIR = os.path.join(home, ".local/amdshark_tank/")
print( print(
f"shark_tank local cache is located at {WORKDIR} . You may change this by setting the --local_tank_cache= flag" f"amdshark_tank local cache is located at {WORKDIR} . You may change this by setting the --local_tank_cache= flag"
) )
os.makedirs(WORKDIR, exist_ok=True) os.makedirs(WORKDIR, exist_ok=True)
@@ -150,8 +150,8 @@ def _internet_connected():
def get_git_revision_short_hash() -> str: def get_git_revision_short_hash() -> str:
import subprocess import subprocess
if shark_args.shark_prefix is not None: if amdshark_args.amdshark_prefix is not None:
prefix_kw = shark_args.shark_prefix prefix_kw = amdshark_args.amdshark_prefix
else: else:
import json import json
@@ -160,11 +160,11 @@ def get_git_revision_short_hash() -> str:
with open(src, "r") as f: with open(src, "r") as f:
data = json.loads(f.read()) data = json.loads(f.read())
prefix_kw = data["version"] prefix_kw = data["version"]
print(f"Checking for updates from gs://shark_tank/{prefix_kw}") print(f"Checking for updates from gs://amdshark_tank/{prefix_kw}")
return prefix_kw return prefix_kw
def get_sharktank_prefix(): def get_amdsharktank_prefix():
tank_prefix = "" tank_prefix = ""
if not _internet_connected(): if not _internet_connected():
print( print(
@@ -174,7 +174,7 @@ def get_sharktank_prefix():
else: else:
desired_prefix = get_git_revision_short_hash() desired_prefix = get_git_revision_short_hash()
storage_client_a = storage.Client.create_anonymous_client() storage_client_a = storage.Client.create_anonymous_client()
base_bucket_name = "shark_tank" base_bucket_name = "amdshark_tank"
base_bucket = storage_client_a.bucket(base_bucket_name) base_bucket = storage_client_a.bucket(base_bucket_name)
dir_blobs = base_bucket.list_blobs(prefix=f"{desired_prefix}") dir_blobs = base_bucket.list_blobs(prefix=f"{desired_prefix}")
for blob in dir_blobs: for blob in dir_blobs:
@@ -186,13 +186,13 @@ def get_sharktank_prefix():
continue continue
if tank_prefix == "": if tank_prefix == "":
print( print(
f"shark_tank bucket not found matching ({desired_prefix}). Defaulting to nightly." f"amdshark_tank bucket not found matching ({desired_prefix}). Defaulting to nightly."
) )
tank_prefix = "nightly" tank_prefix = "nightly"
return tank_prefix return tank_prefix
# Downloads the torch model from gs://shark_tank dir. # Downloads the torch model from gs://amdshark_tank dir.
def download_model( def download_model(
model_name, model_name,
dynamic=False, dynamic=False,
@@ -204,7 +204,7 @@ def download_model(
model_name = model_name.replace("/", "_") model_name = model_name.replace("/", "_")
dyn_str = "_dynamic" if dynamic else "" dyn_str = "_dynamic" if dynamic else ""
os.makedirs(WORKDIR, exist_ok=True) os.makedirs(WORKDIR, exist_ok=True)
shark_args.shark_prefix = get_sharktank_prefix() amdshark_args.amdshark_prefix = get_amdsharktank_prefix()
if import_args["batch_size"] and import_args["batch_size"] != 1: if import_args["batch_size"] and import_args["batch_size"] != 1:
model_dir_name = ( model_dir_name = (
model_name model_name
@@ -221,7 +221,7 @@ def download_model(
model_dir = os.path.join(WORKDIR, model_dir_name) model_dir = os.path.join(WORKDIR, model_dir_name)
if not tank_url: if not tank_url:
tank_url = "gs://shark_tank/" + shark_args.shark_prefix tank_url = "gs://amdshark_tank/" + amdshark_args.amdshark_prefix
full_gs_url = tank_url.rstrip("/") + "/" + model_dir_name full_gs_url = tank_url.rstrip("/") + "/" + model_dir_name
if not check_dir_exists( if not check_dir_exists(
@@ -232,7 +232,7 @@ def download_model(
) )
download_public_file(full_gs_url, model_dir) download_public_file(full_gs_url, model_dir)
elif shark_args.force_update_tank == True: elif amdshark_args.force_update_tank == True:
print( print(
f"Force-updating artifacts for model {model_name} from: {full_gs_url}" f"Force-updating artifacts for model {model_name} from: {full_gs_url}"
) )
@@ -259,13 +259,13 @@ def download_model(
except FileNotFoundError: except FileNotFoundError:
print(f"Model artifact hash not found at {model_dir}.") print(f"Model artifact hash not found at {model_dir}.")
upstream_hash = None upstream_hash = None
if local_hash != upstream_hash and shark_args.update_tank == True: if local_hash != upstream_hash and amdshark_args.update_tank == True:
print(f"Updating artifacts for model {model_name}...") print(f"Updating artifacts for model {model_name}...")
download_public_file(full_gs_url, model_dir) download_public_file(full_gs_url, model_dir)
elif local_hash != upstream_hash: elif local_hash != upstream_hash:
print( print(
"Hash does not match upstream in gs://shark_tank/. If you want to use locally generated artifacts, this is working as intended. Otherwise, run with --update_tank." "Hash does not match upstream in gs://amdshark_tank/. If you want to use locally generated artifacts, this is working as intended. Otherwise, run with --update_tank."
) )
else: else:
print( print(
@@ -280,12 +280,12 @@ def download_model(
f"Verifying that model artifacts were downloaded successfully to {mlir_filename}..." f"Verifying that model artifacts were downloaded successfully to {mlir_filename}..."
) )
if not os.path.exists(mlir_filename): if not os.path.exists(mlir_filename):
from tank.generate_sharktank import gen_shark_files from tank.generate_amdsharktank import gen_amdshark_files
print( print(
"The model data was not found. Trying to generate artifacts locally." "The model data was not found. Trying to generate artifacts locally."
) )
gen_shark_files(model_name, frontend, WORKDIR, import_args) gen_amdshark_files(model_name, frontend, WORKDIR, import_args)
assert os.path.exists(mlir_filename), f"MLIR not found at {mlir_filename}" assert os.path.exists(mlir_filename), f"MLIR not found at {mlir_filename}"
function_name = str(np.load(os.path.join(model_dir, "function_name.npy"))) function_name = str(np.load(os.path.join(model_dir, "function_name.npy")))

View File

@@ -1,6 +1,6 @@
from typing import Any, Dict, List, Tuple from typing import Any, Dict, List, Tuple
from collections import defaultdict from collections import defaultdict
from shark.shark_importer import import_with_fx, save_mlir from amdshark.amdshark_importer import import_with_fx, save_mlir
import torchvision.models as models import torchvision.models as models
import copy import copy
import io import io
@@ -13,7 +13,7 @@ from typing import Dict
import torch_mlir import torch_mlir
def shark_backend(fx_g: torch.fx.GraphModule, inputs, device: str = "cpu"): def amdshark_backend(fx_g: torch.fx.GraphModule, inputs, device: str = "cpu"):
mlir_module = torch_mlir.compile( mlir_module = torch_mlir.compile(
fx_g, inputs, output_type="linalg-on-tensors" fx_g, inputs, output_type="linalg-on-tensors"
) )
@@ -22,19 +22,19 @@ def shark_backend(fx_g: torch.fx.GraphModule, inputs, device: str = "cpu"):
bytecode = bytecode_stream.getvalue() bytecode = bytecode_stream.getvalue()
bytecode_path = save_mlir( bytecode_path = save_mlir(
bytecode, bytecode,
model_name="shark_eager_module", model_name="amdshark_eager_module",
frontend="torch", frontend="torch",
mlir_dialect="tm_tensor", mlir_dialect="tm_tensor",
) )
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_module=bytecode_path, mlir_module=bytecode_path,
device=device, device=device,
mlir_dialect="tm_tensor", mlir_dialect="tm_tensor",
) )
shark_module.compile(extra_args=[]) amdshark_module.compile(extra_args=[])
return shark_module return amdshark_module
def _make_single_op_gm(node, captured_val, compiled_graph): def _make_single_op_gm(node, captured_val, compiled_graph):
@@ -55,7 +55,7 @@ def _make_single_op_gm(node, captured_val, compiled_graph):
g.output(call) g.output(call)
g.lint() g.lint()
single_node = torch.fx.GraphModule(torch.nn.Module(), g) single_node = torch.fx.GraphModule(torch.nn.Module(), g)
compiled_module = shark_backend(single_node, inputs) compiled_module = amdshark_backend(single_node, inputs)
compiled_graph[node.name] = { compiled_graph[node.name] = {
"module": compiled_module, "module": compiled_module,
"inputs": [i for i in env], "inputs": [i for i in env],
@@ -172,41 +172,41 @@ shape_prop = ShapeProp(fx_graph)
x = shape_prop.propagate(input[0]) x = shape_prop.propagate(input[0])
shark_graph = compiled_graph(fx_graph, x) amdshark_graph = compiled_graph(fx_graph, x)
for key in shark_graph: for key in amdshark_graph:
if key.startswith("getitem"): if key.startswith("getitem"):
input_val = shark_graph[key]["input"] input_val = amdshark_graph[key]["input"]
pos = shark_graph[key]["pos"] pos = amdshark_graph[key]["pos"]
if input_val not in shark_graph: if input_val not in amdshark_graph:
shark_graph[key]["result"] = x[input_val][pos].detach() amdshark_graph[key]["result"] = x[input_val][pos].detach()
else: else:
shark_graph[key]["result"] = shark_graph[input_val]["result"][ amdshark_graph[key]["result"] = amdshark_graph[input_val]["result"][
pos pos
].detach() ].detach()
elif key.startswith("empty"): elif key.startswith("empty"):
operator = shark_graph[key]["target"] operator = amdshark_graph[key]["target"]
args = shark_graph[key]["args"] args = amdshark_graph[key]["args"]
kwargs = shark_graph[key]["kwargs"] kwargs = amdshark_graph[key]["kwargs"]
shark_graph[key]["result"] = operator(*args, **kwargs).detach() amdshark_graph[key]["result"] = operator(*args, **kwargs).detach()
else: else:
input_val = shark_graph[key]["inputs"] input_val = amdshark_graph[key]["inputs"]
input_tensors = [] input_tensors = []
for input in input_val: for input in input_val:
if input not in shark_graph: if input not in amdshark_graph:
input_tensors.append(x[input].detach()) input_tensors.append(x[input].detach())
else: else:
input_tensors.append(shark_graph[input]["result"]) input_tensors.append(amdshark_graph[input]["result"])
val = shark_graph[key]["module"]("forward", input_tensors) val = amdshark_graph[key]["module"]("forward", input_tensors)
if isinstance(val, (tuple, list)): if isinstance(val, (tuple, list)):
list_val = [] list_val = []
for v in val: for v in val:
list_val.append(torch.from_numpy(v)) list_val.append(torch.from_numpy(v))
shark_graph[key]["result"] = list_val amdshark_graph[key]["result"] = list_val
else: else:
shark_graph[key]["result"] = torch.from_numpy(val) amdshark_graph[key]["result"] = torch.from_numpy(val)
print(shark_graph) print(amdshark_graph)

View File

@@ -4,7 +4,7 @@ import numpy as np
import torch_mlir import torch_mlir
from iree.compiler import compile_file from iree.compiler import compile_file
from shark.shark_importer import import_with_fx, get_f16_inputs, save_mlir from amdshark.amdshark_importer import import_with_fx, get_f16_inputs, save_mlir
class GenerateConfigFile: class GenerateConfigFile:

View File

@@ -1,12 +1,12 @@
# Lint as: python3 # Lint as: python3
"""SHARK Importer""" """AMDSHARK Importer"""
import sys import sys
import tempfile import tempfile
import os import os
import hashlib import hashlib
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
def create_hash(file_name): def create_hash(file_name):
with open(file_name, "rb") as f: with open(file_name, "rb") as f:
@@ -28,9 +28,9 @@ supported_frontends = {
} }
class SharkImporter: class AMDSharkImporter:
""" """
SharkImporter converts frontend modules into a AMDSharkImporter converts frontend modules into a
mlir_module. The supported frameworks are tensorflow, mlir_module. The supported frameworks are tensorflow,
pytorch, and tf-lite. pytorch, and tf-lite.
@@ -83,7 +83,7 @@ class SharkImporter:
# NOTE: The default function for torch is "forward" and tf-lite is "main". # NOTE: The default function for torch is "forward" and tf-lite is "main".
def _torch_mlir(self, is_dynamic, tracing_required, mlir_type): def _torch_mlir(self, is_dynamic, tracing_required, mlir_type):
from shark.torch_mlir_utils import get_torch_mlir_module from amdshark.torch_mlir_utils import get_torch_mlir_module
return get_torch_mlir_module( return get_torch_mlir_module(
self.module, self.module,
@@ -121,7 +121,7 @@ class SharkImporter:
is_dynamic=False, is_dynamic=False,
tracing_required=False, tracing_required=False,
func_name="forward", func_name="forward",
save_dir=cmd_opts.tmp_dir, #"./shark_tmp/", save_dir=cmd_opts.tmp_dir, #"./amdshark_tmp/",
mlir_type="linalg", mlir_type="linalg",
): ):
if self.frontend in ["torch", "pytorch"]: if self.frontend in ["torch", "pytorch"]:
@@ -773,7 +773,7 @@ def import_with_fx(
return ts_graph return ts_graph
inputs = get_f16_inputs(inputs, is_f16, f16_input_mask) inputs = get_f16_inputs(inputs, is_f16, f16_input_mask)
mlir_importer = SharkImporter( mlir_importer = AMDSharkImporter(
ts_graph, ts_graph,
inputs, inputs,
frontend="torch", frontend="torch",
@@ -807,7 +807,7 @@ def save_mlir(
model_name + "_" + frontend + "_" + mlir_dialect + ".mlir" model_name + "_" + frontend + "_" + mlir_dialect + ".mlir"
) )
if dir == "": if dir == "":
dir = cmd_opts.tmp_dir, #os.path.join(".", "shark_tmp") dir = cmd_opts.tmp_dir, #os.path.join(".", "amdshark_tmp")
mlir_path = os.path.join(dir, model_name_mlir) mlir_path = os.path.join(dir, model_name_mlir)
print(f"saving {model_name_mlir} to {dir}") print(f"saving {model_name_mlir} to {dir}")
if not os.path.exists(dir): if not os.path.exists(dir):

View File

@@ -9,15 +9,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from shark.iree_utils.compile_utils import ( from amdshark.iree_utils.compile_utils import (
export_iree_module_to_vmfb, export_iree_module_to_vmfb,
load_flatbuffer, load_flatbuffer,
create_dispatch_dirs, create_dispatch_dirs,
compile_benchmark_dirs, compile_benchmark_dirs,
) )
import os import os
from shark.shark_runner import SharkRunner from amdshark.amdshark_runner import AMDSharkRunner
from shark.parser import shark_args from amdshark.parser import amdshark_args
import numpy as np import numpy as np
@@ -30,7 +30,7 @@ dtype_to_np_dtype = {
} }
class SharkInference: class AMDSharkInference:
""" """
Runs prediction or inference on mlir_module. Runs prediction or inference on mlir_module.
@@ -47,7 +47,7 @@ class SharkInference:
The dialect in which the given mlir_module is in. The dialect in which the given mlir_module is in.
Refer to {https://mlir.llvm.org/docs/Dialects/} Refer to {https://mlir.llvm.org/docs/Dialects/}
is_benchmark: bool is_benchmark: bool
Whether this SharkInference module should be benchmark-enabled. Whether this AMDSharkInference module should be benchmark-enabled.
mmap: bool mmap: bool
Whether to load/run vmfb using mmap. It's `True` by default. Whether to load/run vmfb using mmap. It's `True` by default.
@@ -79,27 +79,27 @@ class SharkInference:
if mlir_module is not None: if mlir_module is not None:
if mlir_module and not os.path.isfile(mlir_module): if mlir_module and not os.path.isfile(mlir_module):
print( print(
"Warning: Initializing SharkInference with a mlir string/bytecode object will duplicate the model in RAM at compile time. To avoid this, initialize SharkInference with a path to a MLIR module on your hard disk instead." "Warning: Initializing AMDSharkInference with a mlir string/bytecode object will duplicate the model in RAM at compile time. To avoid this, initialize AMDSharkInference with a path to a MLIR module on your hard disk instead."
) )
self.compile_str = True self.compile_str = True
else: else:
self.compile_str = False self.compile_str = False
self.device = shark_args.device if device == "none" else device self.device = amdshark_args.device if device == "none" else device
self.mlir_dialect = mlir_dialect self.mlir_dialect = mlir_dialect
self.is_benchmark = is_benchmark self.is_benchmark = is_benchmark
self.device_idx = device_idx self.device_idx = device_idx
self.dispatch_benchmarks = ( self.dispatch_benchmarks = (
shark_args.dispatch_benchmarks amdshark_args.dispatch_benchmarks
if dispatch_benchmark is None if dispatch_benchmark is None
else dispatch_benchmark else dispatch_benchmark
) )
self.dispatch_benchmarks_dir = ( self.dispatch_benchmarks_dir = (
shark_args.dispatch_benchmarks_dir amdshark_args.dispatch_benchmarks_dir
if dispatch_benchmark_dir == "temp_dispatch_benchmarks" if dispatch_benchmark_dir == "temp_dispatch_benchmarks"
else dispatch_benchmark_dir else dispatch_benchmark_dir
) )
self.shark_runner = None self.amdshark_runner = None
self.mmap = mmap self.mmap = mmap
self.rt_flags = rt_flags self.rt_flags = rt_flags
@@ -120,9 +120,9 @@ class SharkInference:
) )
if self.is_benchmark == True: if self.is_benchmark == True:
from shark.shark_benchmark_runner import SharkBenchmarkRunner from amdshark.amdshark_benchmark_runner import AMDSharkBenchmarkRunner
self.shark_runner = SharkBenchmarkRunner( self.amdshark_runner = AMDSharkBenchmarkRunner(
self.mlir_module, self.mlir_module,
self.device, self.device,
self.mlir_dialect, self.mlir_dialect,
@@ -130,7 +130,7 @@ class SharkInference:
) )
else: else:
self.shark_runner = SharkRunner( self.amdshark_runner = AMDSharkRunner(
self.mlir_module, self.mlir_module,
self.device, self.device,
self.mlir_dialect, self.mlir_dialect,
@@ -150,19 +150,19 @@ class SharkInference:
# inputs are considered to be tuple of np.array. # inputs are considered to be tuple of np.array.
def __call__(self, function_name: str, inputs: tuple, send_to_host=True): def __call__(self, function_name: str, inputs: tuple, send_to_host=True):
return self.shark_runner.run( return self.amdshark_runner.run(
function_name, inputs, send_to_host, device=self.device function_name, inputs, send_to_host, device=self.device
) )
# forward function. # forward function.
def forward(self, inputs: tuple, send_to_host=True): def forward(self, inputs: tuple, send_to_host=True):
return self.shark_runner.run( return self.amdshark_runner.run(
"forward", inputs, send_to_host, device=self.device "forward", inputs, send_to_host, device=self.device
) )
# Get all function names defined within the compiled module. # Get all function names defined within the compiled module.
def get_functions_in_module(self): def get_functions_in_module(self):
return self.shark_runner.get_functions_in_module() return self.amdshark_runner.get_functions_in_module()
# Captures the static input information from the mlir_module. # Captures the static input information from the mlir_module.
# TODO(pashu123): Generate the input information for dynamic shapes. # TODO(pashu123): Generate the input information for dynamic shapes.
@@ -223,7 +223,7 @@ class SharkInference:
# load and return the module. # load and return the module.
def load_module(self, path, extra_args=[]): def load_module(self, path, extra_args=[]):
self.shark_runner = SharkRunner( self.amdshark_runner = AMDSharkRunner(
device=self.device, device=self.device,
compile_vmfb=False, compile_vmfb=False,
extra_args=extra_args, extra_args=extra_args,
@@ -236,8 +236,8 @@ class SharkInference:
mmap=self.mmap, mmap=self.mmap,
rt_flags=self.rt_flags, rt_flags=self.rt_flags,
) )
self.shark_runner.iree_compilation_module = params["vmfb"] self.amdshark_runner.iree_compilation_module = params["vmfb"]
self.shark_runner.iree_config = params["config"] self.amdshark_runner.iree_config = params["config"]
self.shark_runner.temp_file_to_unlink = params["temp_file_to_unlink"] self.amdshark_runner.temp_file_to_unlink = params["temp_file_to_unlink"]
del params del params
return return

View File

@@ -12,19 +12,19 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from shark.iree_utils.compile_utils import ( from amdshark.iree_utils.compile_utils import (
get_iree_compiled_module, get_iree_compiled_module,
get_results, get_results,
export_iree_module_to_vmfb, export_iree_module_to_vmfb,
load_flatbuffer, load_flatbuffer,
) )
from shark.iree_utils._common import check_device_drivers, device_driver_info from amdshark.iree_utils._common import check_device_drivers, device_driver_info
from shark.parser import shark_args from amdshark.parser import amdshark_args
import os import os
import sys import sys
# supported dialects by the shark-runtime. # supported dialects by the amdshark-runtime.
supported_dialects = { supported_dialects = {
"linalg", "linalg",
"auto", "auto",
@@ -35,9 +35,9 @@ supported_dialects = {
} }
class SharkRunner: class AMDSharkRunner:
""" """
Base class for SharkInference and SharkTrainer Base class for AMDSharkInference and AMDSharkTrainer
used to execute an mlir_module. used to execute an mlir_module.
... ...
@@ -78,12 +78,12 @@ class SharkRunner:
if self.mlir_module is not None: if self.mlir_module is not None:
if not os.path.isfile(mlir_module): if not os.path.isfile(mlir_module):
print( print(
"Warning: Initializing SharkRunner with a mlir string/bytecode object will duplicate the model in RAM at compile time. To avoid this, initialize SharkInference with a path to a MLIR module on your hard disk instead." "Warning: Initializing AMDSharkRunner with a mlir string/bytecode object will duplicate the model in RAM at compile time. To avoid this, initialize AMDSharkInference with a path to a MLIR module on your hard disk instead."
) )
self.compile_str = True self.compile_str = True
else: else:
self.compile_str = False self.compile_str = False
self.device = shark_args.device if device == "none" else device self.device = amdshark_args.device if device == "none" else device
self.mlir_dialect = mlir_dialect self.mlir_dialect = mlir_dialect
self.extra_args = extra_args self.extra_args = extra_args
self.device_idx = device_idx self.device_idx = device_idx

View File

@@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from shark.parser import shark_args from amdshark.parser import amdshark_args
from shark.shark_runner import SharkRunner from amdshark.amdshark_runner import AMDSharkRunner
from shark.backward_makefx import MakeFxModule from amdshark.backward_makefx import MakeFxModule
from shark.shark_importer import import_with_fx, save_mlir from amdshark.amdshark_importer import import_with_fx, save_mlir
import numpy as np import numpy as np
from tqdm import tqdm from tqdm import tqdm
import sys import sys
@@ -26,8 +26,8 @@ def print_err(*a):
print(*a, file=sys.stderr) print(*a, file=sys.stderr)
class SharkTrainer: class AMDSharkTrainer:
"""Training pytorch, tensorflow module on shark runtime.""" """Training pytorch, tensorflow module on amdshark runtime."""
def __init__( def __init__(
self, self,
@@ -48,9 +48,9 @@ class SharkTrainer:
# By default it's the torch frontend. # By default it's the torch frontend.
self.frontend = "pytorch" self.frontend = "pytorch"
self.device = device if device is not None else shark_args.device self.device = device if device is not None else amdshark_args.device
self.shark_runner = None self.amdshark_runner = None
# Sets the frontend i.e `pytorch` or `tensorflow`. # Sets the frontend i.e `pytorch` or `tensorflow`.
def set_frontend(self, frontend: str): def set_frontend(self, frontend: str):
@@ -86,18 +86,18 @@ class SharkTrainer:
) )
mlir_module = save_mlir( mlir_module = save_mlir(
mlir_module, mlir_module,
model_name="shark_model", model_name="amdshark_model",
frontend="torch", frontend="torch",
mlir_dialect=mlir_type, mlir_dialect=mlir_type,
) )
self.shark_runner = SharkRunner( self.amdshark_runner = AMDSharkRunner(
mlir_module, mlir_module,
self.device, self.device,
"tm_tensor", "tm_tensor",
extra_args=extra_args, extra_args=extra_args,
) )
elif self.frontend in ["tensorflow", "tf", "mhlo", "stablehlo"]: elif self.frontend in ["tensorflow", "tf", "mhlo", "stablehlo"]:
self.shark_runner = SharkRunner( self.amdshark_runner = AMDSharkRunner(
self.model, self.model,
self.input, self.input,
self.dynamic, self.dynamic,
@@ -123,7 +123,7 @@ class SharkTrainer:
params = [x.numpy() for x in params] params = [x.numpy() for x in params]
print(f"Training started for {num_iters} iterations:") print(f"Training started for {num_iters} iterations:")
for i in tqdm(range(num_iters)): for i in tqdm(range(num_iters)):
params = self.shark_runner.run( params = self.amdshark_runner.run(
"forward", params + self.input, self.frontend "forward", params + self.input, self.frontend
) )
@@ -131,7 +131,7 @@ class SharkTrainer:
# Function to train tensorflow module. # Function to train tensorflow module.
# Output final loss. # Output final loss.
# TODO(raikonenfnu): Save updated weight/states in SHARK. # TODO(raikonenfnu): Save updated weight/states in AMDSHARK.
def _train_tf(self, num_iters): def _train_tf(self, num_iters):
input_list = [] input_list = []
for x in self.input: for x in self.input:
@@ -150,7 +150,7 @@ class SharkTrainer:
print(f"Training started for {num_iters} iterations:") print(f"Training started for {num_iters} iterations:")
for i in tqdm(range(num_iters)): for i in tqdm(range(num_iters)):
outputs = self.shark_runner.forward(input_list, self.frontend) outputs = self.amdshark_runner.forward(input_list, self.frontend)
return outputs return outputs
def train(self, num_iters=1): def train(self, num_iters=1):

View File

@@ -71,7 +71,7 @@ class MakeFxModule:
fx_g = self.change_fx_graph_return_to_tuple(fx_g) fx_g = self.change_fx_graph_return_to_tuple(fx_g)
ts_g = torch.jit.script(fx_g) ts_g = torch.jit.script(fx_g)
temp = tempfile.NamedTemporaryFile( temp = tempfile.NamedTemporaryFile(
suffix="_shark_ts", prefix="temp_ts_" suffix="_amdshark_ts", prefix="temp_ts_"
) )
ts_g.save(temp.name) ts_g.save(temp.name)
new_ts = torch.jit.load(temp.name) new_ts = torch.jit.load(temp.name)

View File

@@ -3,7 +3,7 @@ from typing import List, Optional
import torch import torch
from torch.fx.experimental.proxy_tensor import make_fx from torch.fx.experimental.proxy_tensor import make_fx
from torch._functorch.compile_utils import strip_overloads from torch._functorch.compile_utils import strip_overloads
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
from torch._decomp import get_decompositions from torch._decomp import get_decompositions
from torch.func import functionalize from torch.func import functionalize
import io import io
@@ -93,13 +93,13 @@ def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule) -> bool:
return unwrapped_tuple return unwrapped_tuple
class SharkBackend: class AMDSharkBackend:
def __init__( def __init__(
self, fx_g: torch.fx.GraphModule, inputs: tuple, options: dict self, fx_g: torch.fx.GraphModule, inputs: tuple, options: dict
): ):
self.fx_g = fx_g self.fx_g = fx_g
self.inputs = inputs self.inputs = inputs
self.shark_module = None self.amdshark_module = None
self.device: str = options.get("device", "cpu") self.device: str = options.get("device", "cpu")
self.was_unwrapped: bool = False self.was_unwrapped: bool = False
self.none_indices: list = [] self.none_indices: list = []
@@ -125,19 +125,19 @@ class SharkBackend:
bytecode_stream = io.BytesIO() bytecode_stream = io.BytesIO()
mlir_module.operation.write_bytecode(bytecode_stream) mlir_module.operation.write_bytecode(bytecode_stream)
bytecode = bytecode_stream.getvalue() bytecode = bytecode_stream.getvalue()
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_module=bytecode, mlir_module=bytecode,
device=self.device, device=self.device,
mlir_dialect="tm_tensor", mlir_dialect="tm_tensor",
) )
shark_module.compile(extra_args=[]) amdshark_module.compile(extra_args=[])
self.shark_module = shark_module self.amdshark_module = amdshark_module
def __call__(self, *inputs): def __call__(self, *inputs):
np_inputs = [x.contiguous().detach().cpu().numpy() for x in inputs] np_inputs = [x.contiguous().detach().cpu().numpy() for x in inputs]
np_outs = self.shark_module("forward", np_inputs) np_outs = self.amdshark_module("forward", np_inputs)
if self.was_unwrapped: if self.was_unwrapped:
np_outs = [ np_outs = [
np_outs, np_outs,

View File

@@ -1,5 +1,5 @@
import torch import torch
import shark import amdshark
def foo(x, a): def foo(x, a):
@@ -9,8 +9,8 @@ def foo(x, a):
return x + 3 return x + 3
shark_options = {"device": "cpu"} amdshark_options = {"device": "cpu"}
compiled = torch.compile(foo, backend="shark", options=shark_options) compiled = torch.compile(foo, backend="amdshark", options=amdshark_options)
input = torch.ones(4) input = torch.ones(4)

View File

@@ -22,7 +22,7 @@
"source": [ "source": [
"# standard imports\n", "# standard imports\n",
"import torch\n", "import torch\n",
"from shark.iree_utils import get_iree_compiled_module" "from amdshark.iree_utils import get_iree_compiled_module"
] ]
}, },
{ {

View File

@@ -1,7 +1,7 @@
import torch import torch
from torch_mlir import compile, OutputType from torch_mlir import compile, OutputType
from shark.iree_utils import get_iree_compiled_module from amdshark.iree_utils import get_iree_compiled_module
try: try:
import torchdynamo import torchdynamo

View File

@@ -32,7 +32,7 @@
"source": [ "source": [
"# eager mode imports\n", "# eager mode imports\n",
"from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor\n", "from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor\n",
"from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend" "from amdshark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend"
], ],
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
@@ -440,7 +440,7 @@
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [ "source": [
"There is a convenience class `SharkEagerMode` that will handle both the installation of the backend and the wrapping of `torch.Tensor`s:" "There is a convenience class `AMDSharkEagerMode` that will handle both the installation of the backend and the wrapping of `torch.Tensor`s:"
], ],
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
@@ -684,9 +684,9 @@
], ],
"source": [ "source": [
"# eager mode RAII\n", "# eager mode RAII\n",
"from shark.shark_runner import SharkEagerMode\n", "from amdshark.amdshark_runner import AMDSharkEagerMode\n",
"\n", "\n",
"shark_eager_mode = SharkEagerMode(\"cpu\")\n", "amdshark_eager_mode = AMDSharkEagerMode(\"cpu\")\n",
"\n", "\n",
"t = torch.ones((10, 10))\n", "t = torch.ones((10, 10))\n",
"u = torch.ones((10, 10))\n", "u = torch.ones((10, 10))\n",
@@ -712,7 +712,7 @@
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [ "source": [
"The `SharkEagerMode` class is a hacky take on [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization) that defines a \"deleter\" that runs when an instantiation (of `SharkEagerMode`) is garbage collected. Takeaway is that if you want to turn off `SharkEagerMode`, or switch backends, you need to `del` the instance:" "The `AMDSharkEagerMode` class is a hacky take on [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization) that defines a \"deleter\" that runs when an instantiation (of `AMDSharkEagerMode`) is garbage collected. Takeaway is that if you want to turn off `AMDSharkEagerMode`, or switch backends, you need to `del` the instance:"
], ],
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
@@ -757,8 +757,8 @@
} }
], ],
"source": [ "source": [
"del shark_eager_mode\n", "del amdshark_eager_mode\n",
"shark_eager_mode = SharkEagerMode(\"cuda\")\n", "amdshark_eager_mode = AMDSharkEagerMode(\"cuda\")\n",
"\n", "\n",
"t = torch.ones((10, 10))\n", "t = torch.ones((10, 10))\n",
"u = torch.ones((10, 10))\n", "u = torch.ones((10, 10))\n",

View File

@@ -17,8 +17,8 @@ from torch.utils.cpp_extension import load_inline, include_paths
from torch_mlir.eager_mode import torch_mlir_tensor from torch_mlir.eager_mode import torch_mlir_tensor
from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend from amdshark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
from shark.shark_runner import SharkEagerMode from amdshark.amdshark_runner import AMDSharkEagerMode
def test_cpu(): def test_cpu():
@@ -85,7 +85,7 @@ def test_gpu():
def test_python_mode_ref_backend(): def test_python_mode_ref_backend():
# hide this wherever you want? # hide this wherever you want?
_ = SharkEagerMode("refbackend") _ = AMDSharkEagerMode("refbackend")
t = torch.ones((10, 10), device="cpu") t = torch.ones((10, 10), device="cpu")
u = torch.ones((10, 10), device="cpu") u = torch.ones((10, 10), device="cpu")
@@ -103,7 +103,7 @@ def test_python_mode_ref_backend():
def test_python_mode_iree_cpu(): def test_python_mode_iree_cpu():
# hide this wherever you want? # hide this wherever you want?
_ = SharkEagerMode("cpu") _ = AMDSharkEagerMode("cpu")
t = torch.ones((10, 10), device="cpu") t = torch.ones((10, 10), device="cpu")
u = torch.ones((10, 10), device="cpu") u = torch.ones((10, 10), device="cpu")
@@ -121,7 +121,7 @@ def test_python_mode_iree_cpu():
def test_python_mode_iree_gpu(): def test_python_mode_iree_gpu():
_ = SharkEagerMode("gpu") _ = AMDSharkEagerMode("gpu")
t = torch.ones((10, 10), device="cpu") t = torch.ones((10, 10), device="cpu")
u = torch.ones((10, 10), device="cpu") u = torch.ones((10, 10), device="cpu")

View File

@@ -47,7 +47,7 @@ golden_probabilities = torch.nn.functional.softmax(
golden_confidences = golden_confidences.numpy() golden_confidences = golden_confidences.numpy()
from shark.torch_mlir_lockstep_tensor import TorchMLIRLockstepTensor from amdshark.torch_mlir_lockstep_tensor import TorchMLIRLockstepTensor
input_detached_clone = input_batch.clone() input_detached_clone = input_batch.clone()
eager_input_batch = TorchMLIRLockstepTensor(input_detached_clone) eager_input_batch = TorchMLIRLockstepTensor(input_detached_clone)
@@ -62,7 +62,7 @@ probabilities = torch.nn.functional.softmax(
torch.from_numpy(confidences), dim=0 torch.from_numpy(confidences), dim=0
).numpy() ).numpy()
print("The obtained result via shark is: ", confidences) print("The obtained result via amdshark is: ", confidences)
print("The golden result is:", golden_confidences) print("The golden result is:", golden_confidences)
np.testing.assert_allclose( np.testing.assert_allclose(

View File

@@ -3,7 +3,7 @@ import requests
from transformers import CLIPProcessor, TFCLIPModel from transformers import CLIPProcessor, TFCLIPModel
import tensorflow as tf import tensorflow as tf
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
# Create a set of inputs # Create a set of inputs
clip_vit_inputs = [ clip_vit_inputs = [
@@ -43,7 +43,7 @@ if __name__ == "__main__":
padding=True, padding=True,
) )
shark_module = SharkInference( amdshark_module = AMDSharkInference(
CLIPModule(), CLIPModule(),
( (
inputs["input_ids"], inputs["input_ids"],
@@ -51,11 +51,11 @@ if __name__ == "__main__":
inputs["pixel_values"], inputs["pixel_values"],
), ),
) )
shark_module.set_frontend("tensorflow") amdshark_module.set_frontend("tensorflow")
shark_module.compile() amdshark_module.compile()
print( print(
shark_module.forward( amdshark_module.forward(
( (
inputs["input_ids"], inputs["input_ids"],
inputs["attention_mask"], inputs["attention_mask"],

View File

@@ -7,7 +7,7 @@ import torch
from torch.fx.experimental.proxy_tensor import make_fx from torch.fx.experimental.proxy_tensor import make_fx
from torch._decomp import get_decompositions from torch._decomp import get_decompositions
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
import torch_mlir import torch_mlir
import tempfile import tempfile
import functools import functools
@@ -176,12 +176,12 @@ def compile_through_fx(model, inputs, mlir_loc=None):
mlir_model = str(module) mlir_model = str(module)
func_name = "forward" func_name = "forward"
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_model, device=args.device, mlir_dialect="linalg" mlir_model, device=args.device, mlir_dialect="linalg"
) )
shark_module.compile() amdshark_module.compile()
return shark_module return amdshark_module
model_path = "models/RRDB_ESRGAN_x4.pth" # models/RRDB_ESRGAN_x4.pth OR models/RRDB_PSNR_x4.pth model_path = "models/RRDB_ESRGAN_x4.pth" # models/RRDB_ESRGAN_x4.pth OR models/RRDB_PSNR_x4.pth
@@ -213,22 +213,22 @@ if __name__ == "__main__":
img_LR = img_LR.to(device) img_LR = img_LR.to(device)
with torch.no_grad(): with torch.no_grad():
shark_module = compile_through_fx(inference, img_LR) amdshark_module = compile_through_fx(inference, img_LR)
shark_output = shark_module.forward((img_LR,)) amdshark_output = amdshark_module.forward((img_LR,))
shark_output = torch.from_numpy(shark_output) amdshark_output = torch.from_numpy(amdshark_output)
shark_output = ( amdshark_output = (
shark_output.data.squeeze().float().cpu().clamp_(0, 1).numpy() amdshark_output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
) )
esrgan_output = ( esrgan_output = (
model(img_LR).data.squeeze().float().cpu().clamp_(0, 1).numpy() model(img_LR).data.squeeze().float().cpu().clamp_(0, 1).numpy()
) )
# SHARK OUTPUT # AMDSHARK OUTPUT
shark_output = np.transpose(shark_output[[2, 1, 0], :, :], (1, 2, 0)) amdshark_output = np.transpose(amdshark_output[[2, 1, 0], :, :], (1, 2, 0))
shark_output = (shark_output * 255.0).round() amdshark_output = (amdshark_output * 255.0).round()
cv2.imwrite( cv2.imwrite(
"OutputImages/{:s}_rlt_shark_output.png".format(base), shark_output "OutputImages/{:s}_rlt_amdshark_output.png".format(base), amdshark_output
) )
print("Generated SHARK's output") print("Generated AMDSHARK's output")
# ESRGAN OUTPUT # ESRGAN OUTPUT
esrgan_output = np.transpose(esrgan_output[[2, 1, 0], :, :], (1, 2, 0)) esrgan_output = np.transpose(esrgan_output[[2, 1, 0], :, :], (1, 2, 0))
esrgan_output = (esrgan_output * 255.0).round() esrgan_output = (esrgan_output * 255.0).round()

View File

@@ -1,7 +1,7 @@
from transformers import AutoModelForMaskedLM, AutoTokenizer from transformers import AutoModelForMaskedLM, AutoTokenizer
import torch import torch
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
from shark.shark_importer import SharkImporter from amdshark.amdshark_importer import AMDSharkImporter
from iree.compiler import compile_str from iree.compiler import compile_str
from iree import runtime as ireert from iree import runtime as ireert
import os import os
@@ -35,7 +35,7 @@ if __name__ == "__main__":
return_tensors="pt", return_tensors="pt",
) )
inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"]) inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
mlir_importer = SharkImporter( mlir_importer = AMDSharkImporter(
AlbertModule(), AlbertModule(),
inputs, inputs,
frontend="torch", frontend="torch",
@@ -43,9 +43,9 @@ if __name__ == "__main__":
minilm_mlir, func_name = mlir_importer.import_mlir( minilm_mlir, func_name = mlir_importer.import_mlir(
is_dynamic=False, tracing_required=True is_dynamic=False, tracing_required=True
) )
shark_module = SharkInference(minilm_mlir) amdshark_module = AMDSharkInference(minilm_mlir)
shark_module.compile() amdshark_module.compile()
token_logits = torch.tensor(shark_module.forward(inputs)) token_logits = torch.tensor(amdshark_module.forward(inputs))
mask_id = torch.where( mask_id = torch.where(
encoded_inputs["input_ids"] == tokenizer.mask_token_id encoded_inputs["input_ids"] == tokenizer.mask_token_id
)[1] )[1]
@@ -69,7 +69,7 @@ if __name__ == "__main__":
encoded_inputs["input_ids"], encoded_inputs["input_ids"],
encoded_inputs["attention_mask"], encoded_inputs["attention_mask"],
) )
token_logits = torch.tensor(shark_module.forward(inputs)) token_logits = torch.tensor(amdshark_module.forward(inputs))
mask_id = torch.where( mask_id = torch.where(
encoded_inputs["input_ids"] == tokenizer.mask_token_id encoded_inputs["input_ids"] == tokenizer.mask_token_id
)[1] )[1]

View File

@@ -3,8 +3,8 @@ import requests
from transformers import TFAutoModelForMaskedLM, AutoTokenizer from transformers import TFAutoModelForMaskedLM, AutoTokenizer
import tensorflow as tf import tensorflow as tf
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
from shark.shark_importer import SharkImporter from amdshark.amdshark_importer import AMDSharkImporter
from iree.compiler import tf as tfc from iree.compiler import tf as tfc
from iree.compiler import compile_str from iree.compiler import compile_str
from iree import runtime as ireert from iree import runtime as ireert
@@ -46,7 +46,7 @@ if __name__ == "__main__":
return_tensors="tf", return_tensors="tf",
) )
inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"]) inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
mlir_importer = SharkImporter( mlir_importer = AMDSharkImporter(
AlbertModule(), AlbertModule(),
inputs, inputs,
frontend="tf", frontend="tf",
@@ -54,11 +54,11 @@ if __name__ == "__main__":
minilm_mlir, func_name = mlir_importer.import_mlir( minilm_mlir, func_name = mlir_importer.import_mlir(
is_dynamic=False, tracing_required=False is_dynamic=False, tracing_required=False
) )
shark_module = SharkInference(minilm_mlir, mlir_dialect="mhlo") amdshark_module = AMDSharkInference(minilm_mlir, mlir_dialect="mhlo")
shark_module.compile() amdshark_module.compile()
output_idx = 0 output_idx = 0
data_idx = 1 data_idx = 1
token_logits = shark_module.forward(inputs)[output_idx][data_idx] token_logits = amdshark_module.forward(inputs)[output_idx][data_idx]
mask_id = np.where( mask_id = np.where(
tf.squeeze(encoded_inputs["input_ids"]) == tokenizer.mask_token_id tf.squeeze(encoded_inputs["input_ids"]) == tokenizer.mask_token_id
) )
@@ -82,7 +82,7 @@ if __name__ == "__main__":
encoded_inputs["input_ids"], encoded_inputs["input_ids"],
encoded_inputs["attention_mask"], encoded_inputs["attention_mask"],
) )
token_logits = shark_module.forward(inputs)[output_idx][data_idx] token_logits = amdshark_module.forward(inputs)[output_idx][data_idx]
mask_id = np.where( mask_id = np.where(
tf.squeeze(encoded_inputs["input_ids"]) tf.squeeze(encoded_inputs["input_ids"])
== tokenizer.mask_token_id == tokenizer.mask_token_id

View File

@@ -0,0 +1,14 @@
from amdshark.amdshark_inference import AMDSharkInference
from amdshark.amdshark_downloader import download_model
mlir_model, func_name, inputs, golden_out = download_model(
"bloom", frontend="torch"
)
amdshark_module = AMDSharkInference(
mlir_model, device="cpu", mlir_dialect="tm_tensor"
)
amdshark_module.compile()
result = amdshark_module.forward(inputs)
print("The obtained result via amdshark is: ", result)
print("The golden result is:", golden_out)

View File

@@ -3,7 +3,7 @@ import requests
from transformers import GPT2Tokenizer, TFGPT2Model from transformers import GPT2Tokenizer, TFGPT2Model
import tensorflow as tf import tensorflow as tf
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
# Create a set of inputs # Create a set of inputs
gpt2_inputs = [ gpt2_inputs = [
@@ -30,11 +30,11 @@ if __name__ == "__main__":
text = "I love the distilled version of models." text = "I love the distilled version of models."
inputs = tokenizer(text, return_tensors="tf") inputs = tokenizer(text, return_tensors="tf")
shark_module = SharkInference( amdshark_module = AMDSharkInference(
GPT2Module(), (inputs["input_ids"], inputs["attention_mask"]) GPT2Module(), (inputs["input_ids"], inputs["attention_mask"])
) )
shark_module.set_frontend("tensorflow") amdshark_module.set_frontend("tensorflow")
shark_module.compile() amdshark_module.compile()
print( print(
shark_module.forward((inputs["input_ids"], inputs["attention_mask"])) amdshark_module.forward((inputs["input_ids"], inputs["attention_mask"]))
) )

View File

@@ -1,4 +1,4 @@
# SHARK LLaMA # AMDSHARK LLaMA
## TORCH-MLIR Version ## TORCH-MLIR Version
@@ -14,5 +14,5 @@ git clone https://github.com/nod-ai/llama.git
Then in this repository Then in this repository
``` ```
pip install -e . pip install -e .
python llama/shark_model.py python llama/amdshark_model.py
``` ```

View File

@@ -1,7 +1,7 @@
import torch import torch
import torch_mlir import torch_mlir
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
from shark.shark_compile import shark_compile_through_fx from amdshark.amdshark_compile import amdshark_compile_through_fx
from MEGABYTE_pytorch import MEGABYTE from MEGABYTE_pytorch import MEGABYTE
import os import os
@@ -38,10 +38,10 @@ inputs = [torch.randint(0, 16000, (1, 1024, 4))]
# CURRENTLY IT BAILS OUT HERE BECAUSE OF MISSING OP LOWERINGS :- # CURRENTLY IT BAILS OUT HERE BECAUSE OF MISSING OP LOWERINGS :-
# 1. aten.alias # 1. aten.alias
shark_module, _ = shark_compile_through_fx( amdshark_module, _ = amdshark_compile_through_fx(
model=megaModel, model=megaModel,
inputs=inputs, inputs=inputs,
extended_model_name="mega_shark", extended_model_name="mega_amdshark",
is_f16=False, is_f16=False,
f16_input_mask=None, f16_input_mask=None,
save_dir=os.getcwd(), save_dir=os.getcwd(),
@@ -59,8 +59,8 @@ def print_output_info(output, msg):
print("\n\t", output.shape) print("\n\t", output.shape)
ans = shark_module("forward", inputs) ans = amdshark_module("forward", inputs)
print_output_info(torch.from_numpy(ans), "SHARK's output") print_output_info(torch.from_numpy(ans), "AMDSHARK's output")
ans = megaModel.forward(*inputs) ans = megaModel.forward(*inputs)
print_output_info(ans, "ORIGINAL Model's output") print_output_info(ans, "ORIGINAL Model's output")
@@ -68,5 +68,5 @@ print_output_info(ans, "ORIGINAL Model's output")
# and sample from the logits accordingly # and sample from the logits accordingly
# or you can use the generate function # or you can use the generate function
# NEED TO LOOK AT THIS LATER IF REQUIRED IN SHARK. # NEED TO LOOK AT THIS LATER IF REQUIRED IN AMDSHARK.
# sampled = model.generate(temperature = 0.9, filter_thres = 0.9) # (1, 1024, 4) # sampled = model.generate(temperature = 0.9, filter_thres = 0.9) # (1, 1024, 4)

View File

@@ -0,0 +1,31 @@
from amdshark.amdshark_inference import AMDSharkInference
import numpy as np
mhlo_ir = r"""builtin.module {
func.func @forward(%arg0: tensor<1x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4x4xf32> {
%0 = chlo.broadcast_add %arg0, %arg1 : (tensor<1x4xf32>, tensor<4x1xf32>) -> tensor<4x4xf32>
%1 = "mhlo.abs"(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
return %1 : tensor<4x4xf32>
}
}"""
arg0 = np.ones((1, 4)).astype(np.float32)
arg1 = np.ones((4, 1)).astype(np.float32)
print("Running amdshark on cpu backend")
amdshark_module = AMDSharkInference(mhlo_ir, device="cpu", mlir_dialect="mhlo")
# Generate the random inputs and feed into the graph.
x = amdshark_module.generate_random_inputs()
amdshark_module.compile()
print(amdshark_module.forward(x))
print("Running amdshark on cuda backend")
amdshark_module = AMDSharkInference(mhlo_ir, device="cuda", mlir_dialect="mhlo")
amdshark_module.compile()
print(amdshark_module.forward(x))
print("Running amdshark on vulkan backend")
amdshark_module = AMDSharkInference(mhlo_ir, device="vulkan", mlir_dialect="mhlo")
amdshark_module.compile()
print(amdshark_module.forward(x))

View File

@@ -1,6 +1,6 @@
import torch import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification from transformers import AutoTokenizer, AutoModelForSequenceClassification
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
torch.manual_seed(0) torch.manual_seed(0)
tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased") tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
@@ -23,13 +23,13 @@ class MiniLMSequenceClassification(torch.nn.Module):
test_input = torch.randint(2, (1, 128)) test_input = torch.randint(2, (1, 128))
shark_module = SharkInference( amdshark_module = AMDSharkInference(
MiniLMSequenceClassification(), MiniLMSequenceClassification(),
(test_input,), (test_input,),
jit_trace=True, jit_trace=True,
benchmark_mode=True, benchmark_mode=True,
) )
shark_module.compile() amdshark_module.compile()
shark_module.forward((test_input,)) amdshark_module.forward((test_input,))
shark_module.benchmark_all((test_input,)) amdshark_module.benchmark_all((test_input,))

View File

@@ -1,6 +1,6 @@
import tensorflow as tf import tensorflow as tf
from transformers import BertModel, BertTokenizer, TFBertModel from transformers import BertModel, BertTokenizer, TFBertModel
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
MAX_SEQUENCE_LENGTH = 512 MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1 BATCH_SIZE = 1
@@ -53,9 +53,9 @@ if __name__ == "__main__":
encoded_input["attention_mask"], encoded_input["attention_mask"],
encoded_input["token_type_ids"], encoded_input["token_type_ids"],
) )
shark_module = SharkInference( amdshark_module = AMDSharkInference(
BertModule(), test_input, benchmark_mode=True BertModule(), test_input, benchmark_mode=True
) )
shark_module.set_frontend("tensorflow") amdshark_module.set_frontend("tensorflow")
shark_module.compile() amdshark_module.compile()
shark_module.benchmark_all(test_input) amdshark_module.benchmark_all(test_input)

View File

@@ -3,7 +3,7 @@ import torch
import jax import jax
from typing import Union, Dict, List, Any from typing import Union, Dict, List, Any
import numpy as np import numpy as np
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
import io import io
NumpyTree = Union[np.ndarray, Dict[str, np.ndarray], List[np.ndarray]] NumpyTree = Union[np.ndarray, Dict[str, np.ndarray], List[np.ndarray]]
@@ -60,11 +60,11 @@ jax_model = get_jax_model()
mlir = export_jax_to_mlir(jax_model, sample_input) mlir = export_jax_to_mlir(jax_model, sample_input)
# Compile and load module. # Compile and load module.
shark_inference = SharkInference(mlir_module=mlir, mlir_dialect="mhlo") amdshark_inference = AMDSharkInference(mlir_module=mlir, mlir_dialect="mhlo")
shark_inference.compile() amdshark_inference.compile()
# Run main function. # Run main function.
result = shark_inference("main", jax.tree_util.tree_flatten(sample_input)[0]) result = amdshark_inference("main", jax.tree_util.tree_flatten(sample_input)[0])
# Run JAX model. # Run JAX model.
reference_result = jax.tree_util.tree_flatten(jax_model(**sample_input))[0] reference_result = jax.tree_util.tree_flatten(jax_model(**sample_input))[0]

View File

@@ -1,6 +1,6 @@
flax flax
jax[cpu] jax[cpu]
nodai-SHARK nodai-AMDSHARK
orbax orbax
transformers transformers
torch torch

View File

@@ -0,0 +1,23 @@
from amdshark.amdshark_inference import AMDSharkInference
from amdshark.amdshark_downloader import download_model
mlir_model, func_name, inputs, golden_out = download_model(
"microsoft/MiniLM-L12-H384-uncased",
frontend="torch",
)
amdshark_module = AMDSharkInference(mlir_model, device="cpu", mlir_dialect="linalg")
amdshark_module.compile()
result = amdshark_module.forward(inputs)
print("The obtained result via amdshark is: ", result)
print("The golden result is:", golden_out)
# Let's generate random inputs, currently supported
# for static models.
rand_inputs = amdshark_module.generate_random_inputs()
rand_results = amdshark_module.forward(rand_inputs)
print("Running amdshark_module with random_inputs is: ", rand_results)

View File

@@ -1,6 +1,6 @@
import tensorflow as tf import tensorflow as tf
from transformers import BertModel, BertTokenizer, TFBertModel from transformers import BertModel, BertTokenizer, TFBertModel
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
MAX_SEQUENCE_LENGTH = 512 MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1 BATCH_SIZE = 1
@@ -48,7 +48,7 @@ if __name__ == "__main__":
tf.convert_to_tensor(encoded_input[key]), 0 tf.convert_to_tensor(encoded_input[key]), 0
) )
shark_module = SharkInference( amdshark_module = AMDSharkInference(
BertModule(), BertModule(),
( (
encoded_input["input_ids"], encoded_input["input_ids"],
@@ -56,11 +56,11 @@ if __name__ == "__main__":
encoded_input["token_type_ids"], encoded_input["token_type_ids"],
), ),
) )
shark_module.set_frontend("tensorflow") amdshark_module.set_frontend("tensorflow")
shark_module.compile() amdshark_module.compile()
print( print(
shark_module.forward( amdshark_module.forward(
( (
encoded_input["input_ids"], encoded_input["input_ids"],
encoded_input["attention_mask"], encoded_input["attention_mask"],

View File

@@ -1,7 +1,7 @@
import torch import torch
import torchvision.models as models import torchvision.models as models
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
from shark.shark_importer import SharkImporter from amdshark.amdshark_importer import AMDSharkImporter
torch.hub.list("zhanghang1989/ResNeSt", force_reload=True) torch.hub.list("zhanghang1989/ResNeSt", force_reload=True)
@@ -21,7 +21,7 @@ class ResnestModule(torch.nn.Module):
input = torch.randn(1, 3, 224, 224) input = torch.randn(1, 3, 224, 224)
mlir_importer = SharkImporter( mlir_importer = AMDSharkImporter(
ResnestModule(), ResnestModule(),
(input,), (input,),
frontend="torch", frontend="torch",
@@ -33,7 +33,7 @@ mlir_importer = SharkImporter(
print(golden_out) print(golden_out)
shark_module = SharkInference(vision_mlir, mlir_dialect="linalg") amdshark_module = AMDSharkInference(vision_mlir, mlir_dialect="linalg")
shark_module.compile() amdshark_module.compile()
result = shark_module.forward((input,)) result = amdshark_module.forward((input,))
print("Obtained result", result) print("Obtained result", result)

View File

@@ -1,5 +1,5 @@
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
from shark.parser import shark_args from amdshark.parser import amdshark_args
import torch import torch
import numpy as np import numpy as np
@@ -49,21 +49,21 @@ module = torch_mlir.compile(
mlir_model = module mlir_model = module
func_name = "forward" func_name = "forward"
shark_module = SharkInference(mlir_model, device="cuda", mlir_dialect="linalg") amdshark_module = AMDSharkInference(mlir_model, device="cuda", mlir_dialect="linalg")
shark_module.compile() amdshark_module.compile()
def shark_result(x): def amdshark_result(x):
x_ny = x.cpu().detach().numpy() x_ny = x.cpu().detach().numpy()
inputs = (x_ny,) inputs = (x_ny,)
result = shark_module.forward(inputs) result = amdshark_module.forward(inputs)
return torch.from_numpy(result) return torch.from_numpy(result)
observed_out = shark_result(test_input_fp16) observed_out = amdshark_result(test_input_fp16)
print("Golden result:", actual_out_fp16) print("Golden result:", actual_out_fp16)
print("SHARK result:", observed_out) print("AMDSHARK result:", observed_out)
actual_out_fp16 = actual_out_fp16.to(device=torch.device("cpu")) actual_out_fp16 = actual_out_fp16.to(device=torch.device("cpu"))

View File

@@ -4,8 +4,8 @@ import torch
import torchvision.models as models import torchvision.models as models
from torchvision import transforms from torchvision import transforms
import sys import sys
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
from shark.shark_downloader import download_model from amdshark.amdshark_downloader import download_model
################################## Preprocessing inputs and model ############ ################################## Preprocessing inputs and model ############
@@ -70,13 +70,13 @@ mlir_model, func_name, inputs, golden_out = download_model(
"resnet50", frontend="torch" "resnet50", frontend="torch"
) )
shark_module = SharkInference(mlir_model, mlir_dialect="linalg") amdshark_module = AMDSharkInference(mlir_model, mlir_dialect="linalg")
shark_module.compile() amdshark_module.compile()
path = shark_module.save_module() path = amdshark_module.save_module()
shark_module.load_module(path) amdshark_module.load_module(path)
result = shark_module("forward", (img.detach().numpy(),)) result = amdshark_module("forward", (img.detach().numpy(),))
print("The top 3 results obtained via shark_runner is:") print("The top 3 results obtained via amdshark_runner is:")
print(top3_possibilities(torch.from_numpy(result))) print(top3_possibilities(torch.from_numpy(result)))
print() print()

View File

@@ -34,8 +34,8 @@ import subprocess
from torch.fx.experimental.proxy_tensor import make_fx from torch.fx.experimental.proxy_tensor import make_fx
from torch._decomp import get_decompositions from torch._decomp import get_decompositions
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
from shark.shark_downloader import download_public_file from amdshark.amdshark_downloader import download_public_file
from transformers import ( from transformers import (
BloomTokenizerFast, BloomTokenizerFast,
BloomForSequenceClassification, BloomForSequenceClassification,
@@ -77,13 +77,13 @@ class ShardedBloom:
module = f_.read() module = f_.read()
f_.close() f_.close()
module = bytes(module, "utf-8") module = bytes(module, "utf-8")
shark_module = SharkInference( amdshark_module = AMDSharkInference(
module, module,
device=device, device=device,
mlir_dialect="tm_tensor", mlir_dialect="tm_tensor",
device_idx=device_idx, device_idx=device_idx,
) )
shark_module.save_module( amdshark_module.save_module(
module_name=f"{self.src_folder}/{layer_name}", module_name=f"{self.src_folder}/{layer_name}",
extra_args=[ extra_args=[
"--iree-vm-bytecode-module-output-format=flatbuffer-binary", "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
@@ -92,14 +92,14 @@ class ShardedBloom:
], ],
) )
else: else:
shark_module = SharkInference( amdshark_module = AMDSharkInference(
"", "",
device=device, device=device,
mlir_dialect="tm_tensor", mlir_dialect="tm_tensor",
device_idx=device_idx, device_idx=device_idx,
) )
return shark_module return amdshark_module
def init_layers(self, device, replace=False, device_idx=[0]): def init_layers(self, device, replace=False, device_idx=[0]):
if device_idx is not None: if device_idx is not None:
@@ -311,7 +311,7 @@ def _prepare_attn_mask(
def download_model(destination_folder, model_name): def download_model(destination_folder, model_name):
download_public_file( download_public_file(
f"gs://shark_tank/sharded_bloom/{model_name}/", destination_folder f"gs://amdshark_tank/sharded_bloom/{model_name}/", destination_folder
) )

View File

@@ -2,7 +2,7 @@ import sys
import os import os
from transformers import AutoTokenizer, AutoModelForCausalLM, BloomConfig from transformers import AutoTokenizer, AutoModelForCausalLM, BloomConfig
import re import re
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
import torch import torch
import torch.nn as nn import torch.nn as nn
from collections import OrderedDict from collections import OrderedDict
@@ -142,7 +142,7 @@ if __name__ == "__main__":
mlir_str = bytes(mlir_str, "utf-8") mlir_str = bytes(mlir_str, "utf-8")
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_str, mlir_str,
device="cpu", device="cpu",
mlir_dialect="tm_tensor", mlir_dialect="tm_tensor",
@@ -150,7 +150,7 @@ if __name__ == "__main__":
) )
if will_compile: if will_compile:
shark_module.save_module( amdshark_module.save_module(
module_name=f"{working_dir}/word_embeddings", module_name=f"{working_dir}/word_embeddings",
extra_args=[ extra_args=[
"--iree-vm-bytecode-module-output-format=flatbuffer-binary", "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
@@ -159,8 +159,8 @@ if __name__ == "__main__":
], ],
) )
shark_module.load_module(f"{working_dir}/word_embeddings.vmfb") amdshark_module.load_module(f"{working_dir}/word_embeddings.vmfb")
input_embeds = shark_module( input_embeds = amdshark_module(
inputs=(input_ids,), function_name="forward" inputs=(input_ids,), function_name="forward"
) )
input_embeds = torch.tensor(input_embeds).float() input_embeds = torch.tensor(input_embeds).float()
@@ -175,7 +175,7 @@ if __name__ == "__main__":
mlir_str = f.read() mlir_str = f.read()
f.close() f.close()
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_str, mlir_str,
device="cpu", device="cpu",
mlir_dialect="tm_tensor", mlir_dialect="tm_tensor",
@@ -183,7 +183,7 @@ if __name__ == "__main__":
) )
if will_compile: if will_compile:
shark_module.save_module( amdshark_module.save_module(
module_name=f"{working_dir}/word_embeddings_layernorm", module_name=f"{working_dir}/word_embeddings_layernorm",
extra_args=[ extra_args=[
"--iree-vm-bytecode-module-output-format=flatbuffer-binary", "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
@@ -192,10 +192,10 @@ if __name__ == "__main__":
], ],
) )
shark_module.load_module( amdshark_module.load_module(
f"{working_dir}/word_embeddings_layernorm.vmfb" f"{working_dir}/word_embeddings_layernorm.vmfb"
) )
hidden_states = shark_module( hidden_states = amdshark_module(
inputs=(input_embeds,), function_name="forward" inputs=(input_embeds,), function_name="forward"
) )
hidden_states = torch.tensor(hidden_states).float() hidden_states = torch.tensor(hidden_states).float()
@@ -243,7 +243,7 @@ if __name__ == "__main__":
mlir_str = bytes(mlir_str, "utf-8") mlir_str = bytes(mlir_str, "utf-8")
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_str, mlir_str,
device=device, device=device,
mlir_dialect="tm_tensor", mlir_dialect="tm_tensor",
@@ -251,7 +251,7 @@ if __name__ == "__main__":
) )
if will_compile: if will_compile:
shark_module.save_module( amdshark_module.save_module(
module_name=f"{working_dir}/bloom_block_{layer_name}", module_name=f"{working_dir}/bloom_block_{layer_name}",
extra_args=[ extra_args=[
"--iree-vm-bytecode-module-output-format=flatbuffer-binary", "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
@@ -260,11 +260,11 @@ if __name__ == "__main__":
], ],
) )
shark_module.load_module( amdshark_module.load_module(
f"{working_dir}/bloom_block_{layer_name}.vmfb" f"{working_dir}/bloom_block_{layer_name}.vmfb"
) )
output = shark_module( output = amdshark_module(
inputs=( inputs=(
hidden_states.detach().numpy(), hidden_states.detach().numpy(),
alibi.detach().numpy(), alibi.detach().numpy(),
@@ -290,7 +290,7 @@ if __name__ == "__main__":
mlir_str = bytes(mlir_str, "utf-8") mlir_str = bytes(mlir_str, "utf-8")
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_str, mlir_str,
device="cpu", device="cpu",
mlir_dialect="tm_tensor", mlir_dialect="tm_tensor",
@@ -298,7 +298,7 @@ if __name__ == "__main__":
) )
if will_compile: if will_compile:
shark_module.save_module( amdshark_module.save_module(
module_name=f"{working_dir}/ln_f", module_name=f"{working_dir}/ln_f",
extra_args=[ extra_args=[
"--iree-vm-bytecode-module-output-format=flatbuffer-binary", "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
@@ -307,11 +307,11 @@ if __name__ == "__main__":
], ],
) )
shark_module.load_module(f"{working_dir}/ln_f.vmfb") amdshark_module.load_module(f"{working_dir}/ln_f.vmfb")
hidden_states = torch.load(f"{working_dir}/hidden_states_{n_layer}.pt") hidden_states = torch.load(f"{working_dir}/hidden_states_{n_layer}.pt")
hidden_states = shark_module( hidden_states = amdshark_module(
inputs=(hidden_states,), function_name="forward" inputs=(hidden_states,), function_name="forward"
) )
@@ -347,7 +347,7 @@ if __name__ == "__main__":
logits = lm_head(torch.tensor(hidden_states).float()) logits = lm_head(torch.tensor(hidden_states).float())
else: else:
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_str, mlir_str,
device="cpu", device="cpu",
mlir_dialect="tm_tensor", mlir_dialect="tm_tensor",
@@ -355,7 +355,7 @@ if __name__ == "__main__":
) )
if will_compile: if will_compile:
shark_module.save_module( amdshark_module.save_module(
module_name=f"{working_dir}/lm_head", module_name=f"{working_dir}/lm_head",
extra_args=[ extra_args=[
"--iree-vm-bytecode-module-output-format=flatbuffer-binary", "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
@@ -364,9 +364,9 @@ if __name__ == "__main__":
], ],
) )
shark_module.load_module(f"{working_dir}/lm_head.vmfb") amdshark_module.load_module(f"{working_dir}/lm_head.vmfb")
logits = shark_module( logits = amdshark_module(
inputs=(hidden_states,), function_name="forward" inputs=(hidden_states,), function_name="forward"
) )

View File

@@ -52,8 +52,8 @@ import sys
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
from shark.shark_importer import SharkImporter from amdshark.amdshark_importer import AMDSharkImporter
torch.manual_seed(0) torch.manual_seed(0)
@@ -349,7 +349,7 @@ input_dlrm = (dense_inp, vs0, *vsi)
golden_output = dlrm_model(dense_inp, vs0, *vsi) golden_output = dlrm_model(dense_inp, vs0, *vsi)
mlir_importer = SharkImporter( mlir_importer = AMDSharkImporter(
dlrm_model, dlrm_model,
input_dlrm, input_dlrm,
frontend="torch", frontend="torch",
@@ -359,11 +359,11 @@ mlir_importer = SharkImporter(
tracing_required=True tracing_required=True
) )
shark_module = SharkInference( amdshark_module = AMDSharkInference(
dlrm_mlir, device="vulkan", mlir_dialect="linalg" dlrm_mlir, device="vulkan", mlir_dialect="linalg"
) )
shark_module.compile() amdshark_module.compile()
result = shark_module.forward(input_dlrm) result = amdshark_module.forward(input_dlrm)
np.testing.assert_allclose( np.testing.assert_allclose(
golden_output.detach().numpy(), result, rtol=1e-02, atol=1e-03 golden_output.detach().numpy(), result, rtol=1e-02, atol=1e-03
) )

View File

@@ -15,8 +15,8 @@ from torchrec.models.dlrm import (
SparseArch, SparseArch,
OverArch, OverArch,
) )
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
from shark.shark_importer import SharkImporter from amdshark.amdshark_importer import AMDSharkImporter
import numpy as np import numpy as np
torch.manual_seed(0) torch.manual_seed(0)
@@ -70,7 +70,7 @@ def to_list(key_jagged, combined_keys):
return combined_list return combined_list
class SparseArchShark(nn.Module): class SparseArchAMDShark(nn.Module):
def create_emb(self, embedding_dim, num_embeddings_list): def create_emb(self, embedding_dim, num_embeddings_list):
embedding_list = nn.ModuleList() embedding_list = nn.ModuleList()
for i in range(0, num_embeddings_list.size): for i in range(0, num_embeddings_list.size):
@@ -91,7 +91,7 @@ class SparseArchShark(nn.Module):
total_features, total_features,
num_embeddings_list, num_embeddings_list,
): ):
super(SparseArchShark, self).__init__() super(SparseArchAMDShark, self).__init__()
self.embedding_dim = embedding_dim self.embedding_dim = embedding_dim
self.num_features = total_features self.num_features = total_features
self.embedding_list = self.create_emb( self.embedding_list = self.create_emb(
@@ -150,7 +150,7 @@ def test_sparse_arch() -> None:
), ),
offsets=offsets, offsets=offsets,
) )
sparse_archi = SparseArchShark(D, 3, np.array([10, 10])) sparse_archi = SparseArchAMDShark(D, 3, np.array([10, 10]))
sparse_archi.embedding_list[0].weight = w1 sparse_archi.embedding_list[0].weight = w1
sparse_archi.embedding_list[1].weight = w2 sparse_archi.embedding_list[1].weight = w2
inputs = to_list(features, {"f1": 0, "f3": 0, "f2": 1}) inputs = to_list(features, {"f1": 0, "f3": 0, "f2": 1})
@@ -169,7 +169,7 @@ def test_sparse_arch() -> None:
test_sparse_arch() test_sparse_arch()
class DLRMShark(nn.Module): class DLRMAMDShark(nn.Module):
def __init__( def __init__(
self, self,
embedding_dim, embedding_dim,
@@ -181,7 +181,7 @@ class DLRMShark(nn.Module):
) -> None: ) -> None:
super().__init__() super().__init__()
self.sparse_arch: SparseArchShark = SparseArchShark( self.sparse_arch: SparseArchAMDShark = SparseArchAMDShark(
embedding_dim, total_features, num_embeddings_list embedding_dim, total_features, num_embeddings_list
) )
num_sparse_features: int = total_features num_sparse_features: int = total_features
@@ -250,7 +250,7 @@ def test_dlrm() -> None:
dense_arch_layer_sizes=[20, D], dense_arch_layer_sizes=[20, D],
over_arch_layer_sizes=[5, 1], over_arch_layer_sizes=[5, 1],
) )
sparse_nn_nod = DLRMShark( sparse_nn_nod = DLRMAMDShark(
embedding_dim=8, embedding_dim=8,
total_features=3, total_features=3,
num_embeddings_list=np.array([100, 100]), num_embeddings_list=np.array([100, 100]),
@@ -283,7 +283,7 @@ def test_dlrm() -> None:
# print(logits_nod) # print(logits_nod)
# Import the module and print. # Import the module and print.
mlir_importer = SharkImporter( mlir_importer = AMDSharkImporter(
sparse_nn_nod, sparse_nn_nod,
(dense_features, *x), (dense_features, *x),
frontend="torch", frontend="torch",
@@ -293,11 +293,11 @@ def test_dlrm() -> None:
tracing_required=True tracing_required=True
) )
shark_module = SharkInference( amdshark_module = AMDSharkInference(
dlrm_mlir, device="cpu", mlir_dialect="linalg" dlrm_mlir, device="cpu", mlir_dialect="linalg"
) )
shark_module.compile() amdshark_module.compile()
result = shark_module.forward(inputs) result = amdshark_module.forward(inputs)
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03) np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
torch.allclose( torch.allclose(

View File

@@ -3,7 +3,7 @@ import requests
from transformers import T5Tokenizer, TFT5Model from transformers import T5Tokenizer, TFT5Model
import tensorflow as tf import tensorflow as tf
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
# Create a set of inputs # Create a set of inputs
t5_inputs = [ t5_inputs = [
@@ -29,7 +29,7 @@ if __name__ == "__main__":
text = "I love the distilled version of models." text = "I love the distilled version of models."
inputs = tokenizer(text, return_tensors="tf").input_ids inputs = tokenizer(text, return_tensors="tf").input_ids
shark_module = SharkInference(T5Module(), (inputs, inputs)) amdshark_module = AMDSharkInference(T5Module(), (inputs, inputs))
shark_module.set_frontend("tensorflow") amdshark_module.set_frontend("tensorflow")
shark_module.compile() amdshark_module.compile()
print(shark_module.forward((inputs, inputs))) print(amdshark_module.forward((inputs, inputs)))

View File

@@ -1,6 +1,6 @@
import torch import torch
import torchvision.models as models import torchvision.models as models
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
class VisionModule(torch.nn.Module): class VisionModule(torch.nn.Module):
@@ -35,9 +35,9 @@ vision_models_list = [
] ]
for i, vision_model in enumerate(vision_models_list): for i, vision_model in enumerate(vision_models_list):
shark_module = SharkInference( amdshark_module = AMDSharkInference(
VisionModule(vision_model), VisionModule(vision_model),
(input,), (input,),
) )
shark_module.compile() amdshark_module.compile()
shark_module.forward((input,)) amdshark_module.forward((input,))

View File

@@ -1,7 +1,7 @@
import torch import torch
import numpy as np import numpy as np
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
from shark.shark_importer import SharkImporter from amdshark.amdshark_importer import AMDSharkImporter
class UnetModule(torch.nn.Module): class UnetModule(torch.nn.Module):
@@ -23,7 +23,7 @@ class UnetModule(torch.nn.Module):
input = torch.randn(1, 3, 224, 224) input = torch.randn(1, 3, 224, 224)
mlir_importer = SharkImporter( mlir_importer = AMDSharkImporter(
UnetModule(), UnetModule(),
(input,), (input,),
frontend="torch", frontend="torch",
@@ -33,7 +33,7 @@ mlir_importer = SharkImporter(
tracing_required=False tracing_required=False
) )
shark_module = SharkInference(vision_mlir, mlir_dialect="linalg") amdshark_module = AMDSharkInference(vision_mlir, mlir_dialect="linalg")
shark_module.compile() amdshark_module.compile()
result = shark_module.forward((input,)) result = amdshark_module.forward((input,))
np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03) np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)

View File

@@ -1,13 +1,13 @@
import requests import requests
from PIL import Image from PIL import Image
from io import BytesIO from io import BytesIO
from pipeline_shark_stable_diffusion_upscale import ( from pipeline_amdshark_stable_diffusion_upscale import (
SharkStableDiffusionUpscalePipeline, AMDSharkStableDiffusionUpscalePipeline,
) )
import torch import torch
model_id = "stabilityai/stable-diffusion-x4-upscaler" model_id = "stabilityai/stable-diffusion-x4-upscaler"
pipeline = SharkStableDiffusionUpscalePipeline(model_id) pipeline = AMDSharkStableDiffusionUpscalePipeline(model_id)
# let's download an image # let's download an image
url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-upscale/low_res_cat.png" url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-upscale/low_res_cat.png"

View File

@@ -32,13 +32,13 @@ def get_clip_mlir(model_name="clip_text", extra_args=[]):
return self.text_encoder(input)[0] return self.text_encoder(input)[0]
clip_model = CLIPText() clip_model = CLIPText()
shark_clip = compile_through_fx( amdshark_clip = compile_through_fx(
clip_model, clip_model,
model_input["clip"], model_input["clip"],
model_name=model_name, model_name=model_name,
extra_args=extra_args, extra_args=extra_args,
) )
return shark_clip return amdshark_clip
def get_vae_mlir(model_name="vae", extra_args=[]): def get_vae_mlir(model_name="vae", extra_args=[]):
@@ -55,13 +55,13 @@ def get_vae_mlir(model_name="vae", extra_args=[]):
return x return x
vae = VaeModel() vae = VaeModel()
shark_vae = compile_through_fx( amdshark_vae = compile_through_fx(
vae, vae,
model_input["vae"], model_input["vae"],
model_name=model_name, model_name=model_name,
extra_args=extra_args, extra_args=extra_args,
) )
return shark_vae return amdshark_vae
def get_unet_mlir(model_name="unet", extra_args=[]): def get_unet_mlir(model_name="unet", extra_args=[]):
@@ -87,7 +87,7 @@ def get_unet_mlir(model_name="unet", extra_args=[]):
unet = UnetModel() unet = UnetModel()
f16_input_mask = (True, True, True, False) f16_input_mask = (True, True, True, False)
shark_unet = compile_through_fx( amdshark_unet = compile_through_fx(
unet, unet,
model_input["unet"], model_input["unet"],
model_name=model_name, model_name=model_name,
@@ -95,4 +95,4 @@ def get_unet_mlir(model_name="unet", extra_args=[]):
f16_input_mask=f16_input_mask, f16_input_mask=f16_input_mask,
extra_args=extra_args, extra_args=extra_args,
) )
return shark_unet return amdshark_unet

View File

@@ -5,7 +5,7 @@ from model_wrappers import (
get_clip_mlir, get_clip_mlir,
) )
from upscaler_args import args from upscaler_args import args
from utils import get_shark_model from utils import get_amdshark_model
BATCH_SIZE = len(args.prompts) BATCH_SIZE = len(args.prompts)
if BATCH_SIZE != 1: if BATCH_SIZE != 1:
@@ -24,25 +24,25 @@ clip_flag = [
"--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-pad-linalg-ops{pad-size=16}))" "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-pad-linalg-ops{pad-size=16}))"
] ]
bucket = "gs://shark_tank/stable_diffusion/" bucket = "gs://amdshark_tank/stable_diffusion/"
def get_unet(): def get_unet():
model_name = "upscaler_unet" model_name = "upscaler_unet"
if args.import_mlir: if args.import_mlir:
return get_unet_mlir(model_name, unet_flag) return get_unet_mlir(model_name, unet_flag)
return get_shark_model(bucket, model_name, unet_flag) return get_amdshark_model(bucket, model_name, unet_flag)
def get_vae(): def get_vae():
model_name = "upscaler_vae" model_name = "upscaler_vae"
if args.import_mlir: if args.import_mlir:
return get_vae_mlir(model_name, vae_flag) return get_vae_mlir(model_name, vae_flag)
return get_shark_model(bucket, model_name, vae_flag) return get_amdshark_model(bucket, model_name, vae_flag)
def get_clip(): def get_clip():
model_name = "upscaler_clip" model_name = "upscaler_clip"
if args.import_mlir: if args.import_mlir:
return get_clip_mlir(model_name, clip_flag) return get_clip_mlir(model_name, clip_flag)
return get_shark_model(bucket, model_name, clip_flag) return get_amdshark_model(bucket, model_name, clip_flag)

View File

@@ -46,13 +46,13 @@ def preprocess(image):
return image return image
def shark_run_wrapper(model, *args): def amdshark_run_wrapper(model, *args):
np_inputs = tuple([x.detach().numpy() for x in args]) np_inputs = tuple([x.detach().numpy() for x in args])
outputs = model("forward", np_inputs) outputs = model("forward", np_inputs)
return torch.from_numpy(outputs) return torch.from_numpy(outputs)
class SharkStableDiffusionUpscalePipeline: class AMDSharkStableDiffusionUpscalePipeline:
def __init__( def __init__(
self, self,
model_id, model_id,
@@ -131,7 +131,7 @@ class SharkStableDiffusionUpscalePipeline:
# else: # else:
# attention_mask = None # attention_mask = None
text_embeddings = shark_run_wrapper( text_embeddings = amdshark_run_wrapper(
self.text_encoder, text_input_ids.to(device) self.text_encoder, text_input_ids.to(device)
) )
@@ -180,7 +180,7 @@ class SharkStableDiffusionUpscalePipeline:
# else: # else:
# attention_mask = None # attention_mask = None
uncond_embeddings = shark_run_wrapper( uncond_embeddings = amdshark_run_wrapper(
self.text_encoder, self.text_encoder,
uncond_input.input_ids.to(device), uncond_input.input_ids.to(device),
) )
@@ -227,7 +227,7 @@ class SharkStableDiffusionUpscalePipeline:
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.decode_latents with 0.18215->0.08333 # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.decode_latents with 0.18215->0.08333
def decode_latents(self, latents): def decode_latents(self, latents):
latents = 1 / 0.08333 * latents latents = 1 / 0.08333 * latents
image = shark_run_wrapper(self.vae, latents) image = amdshark_run_wrapper(self.vae, latents)
image = (image / 2 + 0.5).clamp(0, 1) image = (image / 2 + 0.5).clamp(0, 1)
# we always cast to float32 as this does not cause significant overhead and is compatible with bfloa16 # we always cast to float32 as this does not cause significant overhead and is compatible with bfloa16
image = image.cpu().permute(0, 2, 3, 1).float().numpy() image = image.cpu().permute(0, 2, 3, 1).float().numpy()
@@ -445,7 +445,7 @@ class SharkStableDiffusionUpscalePipeline:
timestep = torch.tensor([t]).to(torch.float32) timestep = torch.tensor([t]).to(torch.float32)
# predict the noise residual # predict the noise residual
noise_pred = shark_run_wrapper( noise_pred = amdshark_run_wrapper(
self.unet, self.unet,
latent_model_input.half(), latent_model_input.half(),
timestep, timestep,

View File

@@ -59,7 +59,7 @@ p.add_argument(
"--import_mlir", "--import_mlir",
default=False, default=False,
action=argparse.BooleanOptionalAction, action=argparse.BooleanOptionalAction,
help="imports the model from torch module to shark_module otherwise downloads the model from shark_tank.", help="imports the model from torch module to amdshark_module otherwise downloads the model from amdshark_tank.",
) )
p.add_argument( p.add_argument(

View File

@@ -1,16 +1,16 @@
import os import os
import torch import torch
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
from upscaler_args import args from upscaler_args import args
from shark.shark_importer import import_with_fx from amdshark.amdshark_importer import import_with_fx
from shark.iree_utils.vulkan_utils import ( from amdshark.iree_utils.vulkan_utils import (
set_iree_vulkan_runtime_flags, set_iree_vulkan_runtime_flags,
get_vulkan_target_triple, get_vulkan_target_triple,
get_iree_vulkan_runtime_flags, get_iree_vulkan_runtime_flags,
) )
def _compile_module(shark_module, model_name, extra_args=[]): def _compile_module(amdshark_module, model_name, extra_args=[]):
if args.load_vmfb or args.save_vmfb: if args.load_vmfb or args.save_vmfb:
device = ( device = (
args.device args.device
@@ -21,7 +21,7 @@ def _compile_module(shark_module, model_name, extra_args=[]):
vmfb_path = os.path.join(os.getcwd(), extended_name + ".vmfb") vmfb_path = os.path.join(os.getcwd(), extended_name + ".vmfb")
if args.load_vmfb and os.path.isfile(vmfb_path) and not args.save_vmfb: if args.load_vmfb and os.path.isfile(vmfb_path) and not args.save_vmfb:
print(f"loading existing vmfb from: {vmfb_path}") print(f"loading existing vmfb from: {vmfb_path}")
shark_module.load_module(vmfb_path, extra_args=extra_args) amdshark_module.load_module(vmfb_path, extra_args=extra_args)
else: else:
if args.save_vmfb: if args.save_vmfb:
print("Saving to {}".format(vmfb_path)) print("Saving to {}".format(vmfb_path))
@@ -31,48 +31,48 @@ def _compile_module(shark_module, model_name, extra_args=[]):
vmfb_path vmfb_path
) )
) )
path = shark_module.save_module( path = amdshark_module.save_module(
os.getcwd(), extended_name, extra_args os.getcwd(), extended_name, extra_args
) )
shark_module.load_module(path, extra_args=extra_args) amdshark_module.load_module(path, extra_args=extra_args)
else: else:
shark_module.compile(extra_args) amdshark_module.compile(extra_args)
return shark_module return amdshark_module
# Downloads the model from shark_tank and returns the shark_module. # Downloads the model from amdshark_tank and returns the amdshark_module.
def get_shark_model(tank_url, model_name, extra_args=[]): def get_amdshark_model(tank_url, model_name, extra_args=[]):
from shark.shark_downloader import download_model from amdshark.amdshark_downloader import download_model
from shark.parser import shark_args from amdshark.parser import amdshark_args
# Set local shark_tank cache directory. # Set local amdshark_tank cache directory.
# shark_args.local_tank_cache = args.local_tank_cache # amdshark_args.local_tank_cache = args.local_tank_cache
mlir_model, func_name, inputs, golden_out = download_model( mlir_model, func_name, inputs, golden_out = download_model(
model_name, model_name,
tank_url=tank_url, tank_url=tank_url,
frontend="torch", frontend="torch",
) )
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_model, device=args.device, mlir_dialect="linalg" mlir_model, device=args.device, mlir_dialect="linalg"
) )
return _compile_module(shark_module, model_name, extra_args) return _compile_module(amdshark_module, model_name, extra_args)
# Converts the torch-module into a shark_module. # Converts the torch-module into a amdshark_module.
def compile_through_fx( def compile_through_fx(
model, inputs, model_name, is_f16=False, f16_input_mask=None, extra_args=[] model, inputs, model_name, is_f16=False, f16_input_mask=None, extra_args=[]
): ):
mlir_module, func_name = import_with_fx( mlir_module, func_name = import_with_fx(
model, inputs, is_f16, f16_input_mask model, inputs, is_f16, f16_input_mask
) )
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_module, mlir_module,
device=args.device, device=args.device,
mlir_dialect="linalg", mlir_dialect="linalg",
) )
return _compile_module(shark_module, model_name, extra_args) return _compile_module(amdshark_module, model_name, extra_args)
def set_iree_runtime_flags(): def set_iree_runtime_flags():
@@ -112,7 +112,7 @@ def get_device_mapping(driver, key_combination=3):
Returns: Returns:
dict: map to possible device names user can input mapped to desired combination of name/path. dict: map to possible device names user can input mapped to desired combination of name/path.
""" """
from shark.iree_utils._common import iree_device_map from amdshark.iree_utils._common import iree_device_map
driver = iree_device_map(driver) driver = iree_device_map(driver)
device_list = get_all_devices(driver) device_list = get_all_devices(driver)
@@ -205,7 +205,7 @@ def set_init_device_flags():
# Utility to get list of devices available. # Utility to get list of devices available.
def get_available_devices(): def get_available_devices():
def get_devices_by_name(driver_name): def get_devices_by_name(driver_name):
from shark.iree_utils._common import iree_device_map from amdshark.iree_utils._common import iree_device_map
device_list = [] device_list = []
try: try:

View File

@@ -0,0 +1,15 @@
from amdshark.amdshark_inference import AMDSharkInference
from amdshark.amdshark_downloader import download_model
mlir_model, func_name, inputs, golden_out = download_model(
"v_diffusion", frontend="torch"
)
amdshark_module = AMDSharkInference(
mlir_model, device="vulkan", mlir_dialect="linalg"
)
amdshark_module.compile()
result = amdshark_module.forward(inputs)
print("The obtained result via amdshark is: ", result)
print("The golden result is:", golden_out)

View File

@@ -1,7 +1,7 @@
import torch import torch
from torch.nn.utils import stateless from torch.nn.utils import stateless
from transformers import AutoTokenizer, AutoModelForSequenceClassification from transformers import AutoTokenizer, AutoModelForSequenceClassification
from shark.shark_trainer import SharkTrainer from amdshark.amdshark_trainer import AMDSharkTrainer
class MiniLMSequenceClassification(torch.nn.Module): class MiniLMSequenceClassification(torch.nn.Module):
@@ -42,7 +42,7 @@ def forward(params, buffers, args):
return params, buffers return params, buffers
shark_module = SharkTrainer(mod, inp) amdshark_module = AMDSharkTrainer(mod, inp)
shark_module.compile(forward) amdshark_module.compile(forward)
shark_module.train(num_iters=2) amdshark_module.train(num_iters=2)
print("training done") print("training done")

View File

@@ -3,8 +3,8 @@ import os
import time import time
import tensorflow as tf import tensorflow as tf
from shark.shark_trainer import SharkTrainer from amdshark.amdshark_trainer import AMDSharkTrainer
from shark.parser import parser from amdshark.parser import parser
from urllib import request from urllib import request
parser.add_argument( parser.add_argument(
@@ -28,7 +28,7 @@ if __name__ == "__main__":
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)), np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)), np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
] ]
file_link = "https://storage.googleapis.com/shark_tank/users/stanley/bert_tf_training.mlir" file_link = "https://storage.googleapis.com/amdshark_tank/users/stanley/bert_tf_training.mlir"
response = request.urlretrieve(file_link, load_args.download_mlir_path) response = request.urlretrieve(file_link, load_args.download_mlir_path)
sample_input_tensors = [ sample_input_tensors = [
tf.convert_to_tensor(val, dtype=tf.int32) tf.convert_to_tensor(val, dtype=tf.int32)
@@ -41,7 +41,7 @@ if __name__ == "__main__":
) )
with open(load_args.download_mlir_path, "rb") as input_file: with open(load_args.download_mlir_path, "rb") as input_file:
bert_mlir = input_file.read() bert_mlir = input_file.read()
shark_module = SharkTrainer( amdshark_module = AMDSharkTrainer(
bert_mlir, bert_mlir,
( (
sample_input_tensors, sample_input_tensors,
@@ -50,10 +50,10 @@ if __name__ == "__main__":
), ),
), ),
) )
shark_module.set_frontend("mhlo") amdshark_module.set_frontend("mhlo")
shark_module.compile() amdshark_module.compile()
start = time.time() start = time.time()
print(shark_module.train(num_iter)) print(amdshark_module.train(num_iter))
end = time.time() end = time.time()
total_time = end - start total_time = end - start
print("time: " + str(total_time)) print("time: " + str(total_time))

View File

@@ -8,7 +8,7 @@ from official.nlp.modeling import layers
from official.nlp.modeling import networks from official.nlp.modeling import networks
from official.nlp.modeling.models import bert_classifier from official.nlp.modeling.models import bert_classifier
from shark.shark_trainer import SharkTrainer from amdshark.amdshark_trainer import AMDSharkTrainer
tf.random.set_seed(0) tf.random.set_seed(0)
@@ -79,7 +79,7 @@ if __name__ == "__main__":
for val in predict_sample_input for val in predict_sample_input
] ]
num_iter = 10 num_iter = 10
shark_module = SharkTrainer( amdshark_module = AMDSharkTrainer(
BertModule(), BertModule(),
( (
sample_input_tensors, sample_input_tensors,
@@ -88,10 +88,10 @@ if __name__ == "__main__":
), ),
), ),
) )
shark_module.set_frontend("tensorflow") amdshark_module.set_frontend("tensorflow")
shark_module.compile() amdshark_module.compile()
start = time.time() start = time.time()
print(shark_module.train(num_iter)) print(amdshark_module.train(num_iter))
end = time.time() end = time.time()
total_time = end - start total_time = end - start
print("time: " + str(total_time)) print("time: " + str(total_time))

View File

@@ -1,6 +1,6 @@
import torch import torch
from torch.nn.utils import _stateless from torch.nn.utils import _stateless
from shark.shark_trainer import SharkTrainer from amdshark.amdshark_trainer import AMDSharkTrainer
class Foo(torch.nn.Module): class Foo(torch.nn.Module):
@@ -37,8 +37,8 @@ def forward(params, buffers, args):
# fx_graph = forward(dict(mod.named_parameters()), dict(mod.named_buffers()), inp) # fx_graph = forward(dict(mod.named_parameters()), dict(mod.named_buffers()), inp)
shark_module = SharkTrainer(mod, inp) amdshark_module = AMDSharkTrainer(mod, inp)
# Pass the training function in case of torch # Pass the training function in case of torch
shark_module.compile(training_fn=forward) amdshark_module.compile(training_fn=forward)
shark_module.train(num_iters=10) amdshark_module.train(num_iters=10)

View File

@@ -5,10 +5,10 @@
<details> <details>
<summary>Installation (Linux)</summary> <summary>Installation (Linux)</summary>
### Activate shark.venv Virtual Environment ### Activate amdshark.venv Virtual Environment
```shell ```shell
source shark.venv/bin/activate source amdshark.venv/bin/activate
# Some older pip installs may not be able to handle the recent PyTorch deps # Some older pip installs may not be able to handle the recent PyTorch deps
python -m pip install --upgrade pip python -m pip install --upgrade pip

View File

@@ -2,10 +2,10 @@
## Installation (Linux) ## Installation (Linux)
### Activate shark.venv Virtual Environment ### Activate amdshark.venv Virtual Environment
```shell ```shell
source shark.venv/bin/activate source amdshark.venv/bin/activate
# Some older pip installs may not be able to handle the recent PyTorch deps # Some older pip installs may not be able to handle the recent PyTorch deps
python -m pip install --upgrade pip python -m pip install --upgrade pip
@@ -23,7 +23,7 @@ pip install accelerate transformers ftfy
Please cherry-pick this branch of torch-mlir: https://github.com/vivekkhandelwal1/torch-mlir/tree/sd-ops Please cherry-pick this branch of torch-mlir: https://github.com/vivekkhandelwal1/torch-mlir/tree/sd-ops
and build it locally. You can find the instructions for using locally build Torch-MLIR, and build it locally. You can find the instructions for using locally build Torch-MLIR,
here: https://github.com/nod-ai/SHARK-Studio#how-to-use-your-locally-built-iree--torch-mlir-with-shark here: https://github.com/nod-ai/AMDSHARK-Studio#how-to-use-your-locally-built-iree--torch-mlir-with-amdshark
## Run the Stable diffusion fine tuning ## Run the Stable diffusion fine tuning

View File

@@ -24,7 +24,7 @@ from torch_mlir.dynamo import make_simple_dynamo_backend
import torch._dynamo as dynamo import torch._dynamo as dynamo
from torch.fx.experimental.proxy_tensor import make_fx from torch.fx.experimental.proxy_tensor import make_fx
from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
torch._dynamo.config.verbose = True torch._dynamo.config.verbose = True
@@ -476,8 +476,8 @@ class UnetModel(torch.nn.Module):
return self.unet.forward(x, y, z, return_dict=False)[0] return self.unet.forward(x, y, z, return_dict=False)[0]
shark_vae = VaeModel() amdshark_vae = VaeModel()
shark_unet = UnetModel() amdshark_unet = UnetModel()
####### Creating our training data ######## ####### Creating our training data ########
@@ -638,14 +638,14 @@ def refbackend_torchdynamo_backend(
mlir_module.operation.write_bytecode(bytecode_stream) mlir_module.operation.write_bytecode(bytecode_stream)
bytecode = bytecode_stream.getvalue() bytecode = bytecode_stream.getvalue()
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_module=bytecode, device=args.device, mlir_dialect="tm_tensor" mlir_module=bytecode, device=args.device, mlir_dialect="tm_tensor"
) )
shark_module.compile() amdshark_module.compile()
def compiled_callable(*inputs): def compiled_callable(*inputs):
inputs = [x.numpy() for x in inputs] inputs = [x.numpy() for x in inputs]
result = shark_module("forward", inputs) result = amdshark_module("forward", inputs)
if was_unwrapped: if was_unwrapped:
result = [ result = [
result, result,
@@ -709,7 +709,7 @@ optimizer = torch.optim.AdamW(
# Training function # Training function
def train_func(batch_pixel_values, batch_input_ids): def train_func(batch_pixel_values, batch_input_ids):
# Convert images to latent space # Convert images to latent space
latents = shark_vae(batch_pixel_values).sample().detach() latents = amdshark_vae(batch_pixel_values).sample().detach()
latents = latents * 0.18215 latents = latents * 0.18215
# Sample noise that we'll add to the latents # Sample noise that we'll add to the latents
@@ -731,7 +731,7 @@ def train_func(batch_pixel_values, batch_input_ids):
encoder_hidden_states = text_encoder(batch_input_ids)[0] encoder_hidden_states = text_encoder(batch_input_ids)[0]
# Predict the noise residual # Predict the noise residual
noise_pred = shark_unet( noise_pred = amdshark_unet(
noisy_latents, noisy_latents,
timesteps, timesteps,
encoder_hidden_states, encoder_hidden_states,

View File

@@ -31,7 +31,7 @@ from torch_mlir_e2e_test.eager_backends.refbackend import (
NUMPY_TO_TORCH_DTYPE_DICT, NUMPY_TO_TORCH_DTYPE_DICT,
) )
from shark.iree_utils.compile_utils import ( from amdshark.iree_utils.compile_utils import (
get_iree_compiled_module, get_iree_compiled_module,
IREE_DEVICE_MAP, IREE_DEVICE_MAP,
) )

View File

@@ -157,7 +157,7 @@ def device_driver_info(device):
f"Required drivers for {device} not found. {device_driver_err_map[device]['debug']} " f"Required drivers for {device} not found. {device_driver_err_map[device]['debug']} "
f"Please install the required drivers{device_driver_err_map[device]['solution']} " f"Please install the required drivers{device_driver_err_map[device]['solution']} "
f"For further assistance please reach out to the community on discord [https://discord.com/invite/RUqY2h2s9u]" f"For further assistance please reach out to the community on discord [https://discord.com/invite/RUqY2h2s9u]"
f" and/or file a bug at https://github.com/nod-ai/SHARK-Studio/issues" f" and/or file a bug at https://github.com/nod-ai/AMDSHARK-Studio/issues"
) )
return err_msg return err_msg
else: else:

View File

@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from shark.iree_utils._common import run_cmd, iree_device_map from amdshark.iree_utils._common import run_cmd, iree_device_map
from shark.iree_utils.cpu_utils import get_cpu_count from amdshark.iree_utils.cpu_utils import get_cpu_count
import numpy as np import numpy as np
import os import os
import re import re
@@ -132,7 +132,7 @@ def run_benchmark_module(benchmark_cl):
benchmark_path = benchmark_cl[0] benchmark_path = benchmark_cl[0]
assert os.path.exists( assert os.path.exists(
benchmark_path benchmark_path
), "Cannot find iree_benchmark_module, Please contact SHARK maintainer on discord." ), "Cannot find iree_benchmark_module, Please contact AMDSHARK maintainer on discord."
bench_stdout, bench_stderr = run_cmd(" ".join(benchmark_cl)) bench_stdout, bench_stderr = run_cmd(" ".join(benchmark_cl))
try: try:
regex_split = re.compile("(\d+[.]*\d*)( *)([a-zA-Z]+)") regex_split = re.compile("(\d+[.]*\d*)( *)([a-zA-Z]+)")

View File

@@ -20,7 +20,7 @@ from pathlib import Path
import iree.runtime as ireert import iree.runtime as ireert
import iree.compiler as ireec import iree.compiler as ireec
from shark.parser import shark_args from amdshark.parser import amdshark_args
from .trace import DetailLogger from .trace import DetailLogger
from ._common import iree_device_map, iree_target_map from ._common import iree_device_map, iree_target_map
@@ -34,7 +34,7 @@ def get_iree_device_args(device, extra_args=[]):
device, device_num = clean_device_info(device) device, device_num = clean_device_info(device)
if "cpu" in device: if "cpu" in device:
from shark.iree_utils.cpu_utils import get_iree_cpu_args from amdshark.iree_utils.cpu_utils import get_iree_cpu_args
u_kernel_flag = ["--iree-llvmcpu-enable-ukernels"] u_kernel_flag = ["--iree-llvmcpu-enable-ukernels"]
stack_size_flag = ["--iree-llvmcpu-stack-allocation-limit=256000"] stack_size_flag = ["--iree-llvmcpu-stack-allocation-limit=256000"]
@@ -45,25 +45,25 @@ def get_iree_device_args(device, extra_args=[]):
+ stack_size_flag + stack_size_flag
) )
if device == "cuda": if device == "cuda":
from shark.iree_utils.gpu_utils import get_iree_gpu_args from amdshark.iree_utils.gpu_utils import get_iree_gpu_args
return get_iree_gpu_args() return get_iree_gpu_args()
if device == "vulkan": if device == "vulkan":
from shark.iree_utils.vulkan_utils import get_iree_vulkan_args from amdshark.iree_utils.vulkan_utils import get_iree_vulkan_args
return get_iree_vulkan_args( return get_iree_vulkan_args(
device_num=device_num, extra_args=extra_args device_num=device_num, extra_args=extra_args
) )
if device == "metal": if device == "metal":
from shark.iree_utils.metal_utils import get_iree_metal_args from amdshark.iree_utils.metal_utils import get_iree_metal_args
return get_iree_metal_args(extra_args=extra_args) return get_iree_metal_args(extra_args=extra_args)
if device == "rocm": if device == "rocm":
from shark.iree_utils.gpu_utils import get_iree_rocm_args from amdshark.iree_utils.gpu_utils import get_iree_rocm_args
return get_iree_rocm_args(device_num=device_num, extra_args=extra_args) return get_iree_rocm_args(device_num=device_num, extra_args=extra_args)
if device == "hip": if device == "hip":
from shark.iree_utils.gpu_utils import get_iree_rocm_args from amdshark.iree_utils.gpu_utils import get_iree_rocm_args
return get_iree_rocm_args(device_num=device_num, extra_args=extra_args, hip_driver=True) return get_iree_rocm_args(device_num=device_num, extra_args=extra_args, hip_driver=True)
return [] return []
@@ -137,19 +137,19 @@ def get_iree_common_args(debug=False):
# Args that are suitable only for certain models or groups of models. # Args that are suitable only for certain models or groups of models.
# shark_args are passed down from pytests to control which models compile with these flags, # amdshark_args are passed down from pytests to control which models compile with these flags,
# but they can also be set in shark/parser.py # but they can also be set in amdshark/parser.py
def get_model_specific_args(): def get_model_specific_args():
ms_args = [] ms_args = []
if shark_args.enable_conv_transform == True: if amdshark_args.enable_conv_transform == True:
ms_args += [ ms_args += [
"--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-convert-conv-nchw-to-nhwc))" "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-convert-conv-nchw-to-nhwc))"
] ]
if shark_args.enable_img2col_transform == True: if amdshark_args.enable_img2col_transform == True:
ms_args += [ ms_args += [
"--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-convert-conv2d-to-img2col))" "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-convert-conv2d-to-img2col))"
] ]
if shark_args.use_winograd == True: if amdshark_args.use_winograd == True:
ms_args += [ ms_args += [
"--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-linalg-ext-convert-conv2d-to-winograd))" "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-linalg-ext-convert-conv2d-to-winograd))"
] ]
@@ -262,7 +262,7 @@ def compile_benchmark_dirs(bench_dir, device, dispatch_benchmarks):
benchmark_file.write(f"DISPATCH: {d_}\n") benchmark_file.write(f"DISPATCH: {d_}\n")
benchmark_file.write(str(iter_per_second) + "\n") benchmark_file.write(str(iter_per_second) + "\n")
benchmark_file.write( benchmark_file.write(
"SHARK BENCHMARK RESULT: " "AMDSHARK BENCHMARK RESULT: "
+ str(1 / (iter_per_second * 0.001)) + str(1 / (iter_per_second * 0.001))
+ "\n" + "\n"
) )
@@ -323,7 +323,7 @@ def compile_module_to_flatbuffer(
args += get_iree_common_args(debug=debug) args += get_iree_common_args(debug=debug)
args += get_model_specific_args() args += get_model_specific_args()
args += extra_args args += extra_args
args += shark_args.additional_compile_args args += amdshark_args.additional_compile_args
if frontend in ["tensorflow", "tf"]: if frontend in ["tensorflow", "tf"]:
input_type = "auto" input_type = "auto"
@@ -382,7 +382,7 @@ def get_iree_module(
] ]
haldevice = haldriver.create_device( haldevice = haldriver.create_device(
hal_device_id, hal_device_id,
allocators=shark_args.device_allocator, allocators=amdshark_args.device_allocator,
) )
config = ireert.Config(device=haldevice) config = ireert.Config(device=haldevice)
config.id = hal_device_id config.id = hal_device_id
@@ -433,7 +433,7 @@ def load_vmfb_using_mmap(
] ]
haldevice = haldriver.create_device( haldevice = haldriver.create_device(
hal_device_id, hal_device_id,
allocators=shark_args.device_allocator, allocators=amdshark_args.device_allocator,
) )
dl.log(f"ireert.create_device()") dl.log(f"ireert.create_device()")
config = ireert.Config(device=haldevice) config = ireert.Config(device=haldevice)
@@ -452,9 +452,9 @@ def load_vmfb_using_mmap(
# Now load vmfb. # Now load vmfb.
# Two scenarios we have here :- # Two scenarios we have here :-
# 1. We either have the vmfb already saved and therefore pass the path of it. # 1. We either have the vmfb already saved and therefore pass the path of it.
# (This would arise if we're invoking `load_module` from a SharkInference obj) # (This would arise if we're invoking `load_module` from a AMDSharkInference obj)
# OR 2. We are compiling on the fly, therefore we have the flatbuffer blob to play with. # OR 2. We are compiling on the fly, therefore we have the flatbuffer blob to play with.
# (This would arise if we're invoking `compile` from a SharkInference obj) # (This would arise if we're invoking `compile` from a AMDSharkInference obj)
temp_file_to_unlink = None temp_file_to_unlink = None
if isinstance(flatbuffer_blob_or_path, Path): if isinstance(flatbuffer_blob_or_path, Path):
flatbuffer_blob_or_path = flatbuffer_blob_or_path.__str__() flatbuffer_blob_or_path = flatbuffer_blob_or_path.__str__()
@@ -486,7 +486,7 @@ def load_vmfb_using_mmap(
) )
ctx = ireert.SystemContext(config=config, vm_modules=vm_modules) ctx = ireert.SystemContext(config=config, vm_modules=vm_modules)
dl.log(f"ireert.SystemContext created") dl.log(f"ireert.SystemContext created")
for flag in shark_args.additional_runtime_args: for flag in amdshark_args.additional_runtime_args:
ireert.flags.parse_flags(flag) ireert.flags.parse_flags(flag)
dl.log(f"module initialized") dl.log(f"module initialized")
mmaped_vmfb = getattr(ctx.modules, mmaped_vmfb.name) mmaped_vmfb = getattr(ctx.modules, mmaped_vmfb.name)
@@ -650,7 +650,7 @@ def get_results(
haldriver = ireert.get_driver("rocm") haldriver = ireert.get_driver("rocm")
haldevice = haldriver.create_device( haldevice = haldriver.create_device(
config.id, config.id,
allocators=shark_args.device_allocator, allocators=amdshark_args.device_allocator,
) )
for input_array in input: for input_array in input:
dl.log(f"Load to device: {input_array.shape}") dl.log(f"Load to device: {input_array.shape}")
@@ -688,7 +688,7 @@ def get_results(
def get_iree_runtime_config(device): def get_iree_runtime_config(device):
device = iree_device_map(device) device = iree_device_map(device)
haldriver = ireert.get_driver(device) haldriver = ireert.get_driver(device)
if "metal" in device and shark_args.device_allocator == "caching": if "metal" in device and amdshark_args.device_allocator == "caching":
print( print(
"[WARNING] metal devices can not have a `caching` allocator." "[WARNING] metal devices can not have a `caching` allocator."
"\nUsing default allocator `None`" "\nUsing default allocator `None`"
@@ -696,7 +696,7 @@ def get_iree_runtime_config(device):
haldevice = haldriver.create_device_by_uri( haldevice = haldriver.create_device_by_uri(
device, device,
# metal devices have a failure with caching allocators atm. blcking this util it gets fixed upstream. # metal devices have a failure with caching allocators atm. blcking this util it gets fixed upstream.
allocators=shark_args.device_allocator allocators=amdshark_args.device_allocator
if "metal" not in device if "metal" not in device
else None, else None,
) )

View File

@@ -17,7 +17,7 @@
import functools import functools
import subprocess import subprocess
import platform import platform
from shark.parser import shark_args from amdshark.parser import amdshark_args
def get_cpu_count(): def get_cpu_count():
@@ -44,7 +44,7 @@ def get_iree_cpu_args():
elif os_name == "Windows": elif os_name == "Windows":
target_triple = "x86_64-pc-windows-msvc" target_triple = "x86_64-pc-windows-msvc"
else: else:
error_message = f"OS Type f{os_name} not supported and triple can't be determined, open issue to dSHARK team please :)" error_message = f"OS Type f{os_name} not supported and triple can't be determined, open issue to dAMDSHARK team please :)"
raise Exception(error_message) raise Exception(error_message)
print(f"Target triple found:{target_triple}") print(f"Target triple found:{target_triple}")
return [ return [
@@ -59,7 +59,7 @@ def get_iree_cpu_rt_args():
default = default if default <= 8 else default - 2 default = default if default <= 8 else default - 2
cpu_count = ( cpu_count = (
default default
if shark_args.task_topology_max_group_count is None if amdshark_args.task_topology_max_group_count is None
else shark_args.task_topology_max_group_count else amdshark_args.task_topology_max_group_count
) )
return [f"--task_topology_max_group_count={cpu_count}"] return [f"--task_topology_max_group_count={cpu_count}"]

View File

@@ -19,8 +19,8 @@ import iree.runtime as ireert
import ctypes import ctypes
import sys import sys
from subprocess import CalledProcessError from subprocess import CalledProcessError
from shark.parser import shark_args from amdshark.parser import amdshark_args
from shark.iree_utils._common import run_cmd from amdshark.iree_utils._common import run_cmd
# TODO: refactor to rocm and cuda utils # TODO: refactor to rocm and cuda utils
@@ -35,7 +35,7 @@ def get_iree_gpu_args():
if ( if (
sm_arch sm_arch
in ["sm_70", "sm_72", "sm_75", "sm_80", "sm_84", "sm_86", "sm_89"] in ["sm_70", "sm_72", "sm_75", "sm_80", "sm_84", "sm_86", "sm_89"]
) and (shark_args.enable_tf32 == True): ) and (amdshark_args.enable_tf32 == True):
return [ return [
f"--iree-hal-cuda-llvm-target-arch={sm_arch}", f"--iree-hal-cuda-llvm-target-arch={sm_arch}",
] ]

View File

@@ -16,10 +16,10 @@
import functools import functools
from shark.iree_utils._common import run_cmd from amdshark.iree_utils._common import run_cmd
import iree.runtime as ireert import iree.runtime as ireert
from sys import platform from sys import platform
from shark.iree_utils.vulkan_target_env_utils import get_vulkan_target_env_flag from amdshark.iree_utils.vulkan_target_env_utils import get_vulkan_target_env_flag
@functools.cache @functools.cache
@@ -81,7 +81,7 @@ def get_metal_triple_flag(device_name="", device_num=0, extra_args=[]):
return f"-iree-metal-target-platform={triple}" return f"-iree-metal-target-platform={triple}"
print( print(
"""Optimized kernel for your target device is not added yet. """Optimized kernel for your target device is not added yet.
Contact SHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u] Contact AMDSHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u]
or pull up an issue.""" or pull up an issue."""
) )
print(f"Target : {metal_device}") print(f"Target : {metal_device}")

View File

@@ -20,7 +20,7 @@ import time
def _enable_detail_trace() -> bool: def _enable_detail_trace() -> bool:
return os.getenv("SHARK_DETAIL_TRACE", "0") == "1" return os.getenv("AMDSHARK_DETAIL_TRACE", "0") == "1"
class DetailLogger: class DetailLogger:

View File

@@ -16,11 +16,11 @@
import functools import functools
from os import linesep from os import linesep
from shark.iree_utils._common import run_cmd from amdshark.iree_utils._common import run_cmd
import iree.runtime as ireert import iree.runtime as ireert
from sys import platform from sys import platform
from shark.iree_utils.vulkan_target_env_utils import get_vulkan_target_env_flag from amdshark.iree_utils.vulkan_target_env_utils import get_vulkan_target_env_flag
from shark.parser import shark_args from amdshark.parser import amdshark_args
@functools.cache @functools.cache
@@ -174,7 +174,7 @@ def get_vulkan_triple_flag(device_name="", device_num=0, extra_args=[]):
return f"--iree-vulkan-target-triple={triple}" return f"--iree-vulkan-target-triple={triple}"
print( print(
"""Optimized kernel for your target device is not added yet. """Optimized kernel for your target device is not added yet.
Contact SHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u] Contact AMDSHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u]
or pull up an issue.""" or pull up an issue."""
) )
print(f"Target : {vulkan_device}") print(f"Target : {vulkan_device}")
@@ -208,9 +208,9 @@ def get_iree_vulkan_args(device_num=0, extra_args=[]):
@functools.cache @functools.cache
def get_iree_vulkan_runtime_flags(): def get_iree_vulkan_runtime_flags():
vulkan_runtime_flags = [ vulkan_runtime_flags = [
f"--vulkan_validation_layers={'true' if shark_args.vulkan_debug_utils else 'false'}", f"--vulkan_validation_layers={'true' if amdshark_args.vulkan_debug_utils else 'false'}",
f"--vulkan_debug_verbosity={'4' if shark_args.vulkan_debug_utils else '0'}" f"--vulkan_debug_verbosity={'4' if amdshark_args.vulkan_debug_utils else '0'}"
f"--vulkan-robust-buffer-access={'true' if shark_args.vulkan_debug_utils else 'false'}", f"--vulkan-robust-buffer-access={'true' if amdshark_args.vulkan_debug_utils else 'false'}",
] ]
return vulkan_runtime_flags return vulkan_runtime_flags

View File

@@ -18,7 +18,7 @@ This function takes the model mlir file and the tuned config file as input,
and output a new mlir file with lowering configs annotated on certain ops. and output a new mlir file with lowering configs annotated on certain ops.
There are two ways to utilize the function: There are two ways to utilize the function:
1. Call model_annotation function within another python script 1. Call model_annotation function within another python script
from shark.model_annotation import model_annotation from amdshark.model_annotation import model_annotation
with create_context() as ctx: with create_context() as ctx:
module = model_annotation(ctx, input_contents=..., config_path=..., search_op=...) module = model_annotation(ctx, input_contents=..., config_path=..., search_op=...)
2. Run model_annotation.py directly 2. Run model_annotation.py directly

View File

@@ -29,13 +29,13 @@ class SplitStrToListAction(argparse.Action):
setattr(namespace, self.dest, shlex.split(" ")) setattr(namespace, self.dest, shlex.split(" "))
parser = argparse.ArgumentParser(description="SHARK runner.") parser = argparse.ArgumentParser(description="AMDSHARK runner.")
parser.add_argument( parser.add_argument(
"--device", "--device",
type=str, type=str,
default="cpu", default="cpu",
help="Device on which shark_runner runs. options are cpu, cuda, and vulkan", help="Device on which amdshark_runner runs. options are cpu, cuda, and vulkan",
) )
parser.add_argument( parser.add_argument(
"--additional_compile_args", "--additional_compile_args",
@@ -82,26 +82,26 @@ parser.add_argument(
help="When enabled, pytest bench results will include ONNX benchmark results.", help="When enabled, pytest bench results will include ONNX benchmark results.",
) )
parser.add_argument( parser.add_argument(
"--shark_prefix", "--amdshark_prefix",
default=None, default=None,
help="gs://shark_tank/<this_flag>/model_directories", help="gs://amdshark_tank/<this_flag>/model_directories",
) )
parser.add_argument( parser.add_argument(
"--update_tank", "--update_tank",
default=True, default=True,
action="store_true", action="store_true",
help="When enabled, SHARK downloader will update local shark_tank if local hash is different from latest upstream hash.", help="When enabled, AMDSHARK downloader will update local amdshark_tank if local hash is different from latest upstream hash.",
) )
parser.add_argument( parser.add_argument(
"--force_update_tank", "--force_update_tank",
default=False, default=False,
action="store_true", action="store_true",
help="When enabled, SHARK downloader will force an update of local shark_tank artifacts for each request.", help="When enabled, AMDSHARK downloader will force an update of local amdshark_tank artifacts for each request.",
) )
parser.add_argument( parser.add_argument(
"--local_tank_cache", "--local_tank_cache",
default=None, default=None,
help="Specify where to save downloaded shark_tank artifacts. If this is not set, the default is ~/.local/shark_tank/.", help="Specify where to save downloaded amdshark_tank artifacts. If this is not set, the default is ~/.local/amdshark_tank/.",
) )
parser.add_argument( parser.add_argument(
@@ -167,4 +167,4 @@ parser.add_argument(
help="Flag for disabling vulkan validation layers when benchmarking.", help="Flag for disabling vulkan validation layers when benchmarking.",
) )
shark_args, unknown = parser.parse_known_args() amdshark_args, unknown = parser.parse_known_args()

View File

@@ -13,25 +13,25 @@
# limitations under the License. # limitations under the License.
from iree.runtime import query_available_drivers, get_driver from iree.runtime import query_available_drivers, get_driver
from shark.shark_downloader import download_model from amdshark.amdshark_downloader import download_model
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
import numpy as np import numpy as np
import argparse import argparse
from shark.iree_utils._common import _IREE_DEVICE_MAP from amdshark.iree_utils._common import _IREE_DEVICE_MAP
import multiprocessing import multiprocessing
from shark.shark_runner import supported_dialects from amdshark.amdshark_runner import supported_dialects
import logging import logging
from concurrent.futures import ProcessPoolExecutor from concurrent.futures import ProcessPoolExecutor
from concurrent.futures.thread import ThreadPoolExecutor from concurrent.futures.thread import ThreadPoolExecutor
import time import time
import numpy as np import numpy as np
IREE_TO_SHARK_DRIVER_MAP = {v: k for k, v in _IREE_DEVICE_MAP.items()} IREE_TO_AMDSHARK_DRIVER_MAP = {v: k for k, v in _IREE_DEVICE_MAP.items()}
def stress_test_compiled_model( def stress_test_compiled_model(
shark_module_path: str, amdshark_module_path: str,
function_name: str, function_name: str,
device: str, device: str,
inputs: List[np.ndarray], inputs: List[np.ndarray],
@@ -50,14 +50,14 @@ def stress_test_compiled_model(
# We are using execution in a sperate thread in order to be able # We are using execution in a sperate thread in order to be able
# to wait with a timeout on the inference operation. # to wait with a timeout on the inference operation.
module_executor = ThreadPoolExecutor(1) module_executor = ThreadPoolExecutor(1)
shark_module = module_executor.submit( amdshark_module = module_executor.submit(
SharkInference, AMDSharkInference,
mlir_module=bytes(), mlir_module=bytes(),
function_name=function_name, function_name=function_name,
device=device, device=device,
).result() ).result()
module_executor.submit( module_executor.submit(
shark_module.load_module, shark_module_path amdshark_module.load_module, amdshark_module_path
).result() ).result()
input_batches = [np.repeat(arr, batch_size, axis=0) for arr in inputs] input_batches = [np.repeat(arr, batch_size, axis=0) for arr in inputs]
golden_output_batches = np.repeat(golden_out, batch_size, axis=0) golden_output_batches = np.repeat(golden_out, batch_size, axis=0)
@@ -67,7 +67,7 @@ def stress_test_compiled_model(
first_iteration_output = None first_iteration_output = None
for i in range(max_iterations): for i in range(max_iterations):
output = module_executor.submit( output = module_executor.submit(
shark_module.forward, input_batches amdshark_module.forward, input_batches
).result(inference_timeout_seconds) ).result(inference_timeout_seconds)
if first_iteration_output is None: if first_iteration_output is None:
np.testing.assert_array_almost_equal_nulp( np.testing.assert_array_almost_equal_nulp(
@@ -100,9 +100,9 @@ def query_devices(device_types: Optional[List[str]] = None) -> List[str]:
devices = [] devices = []
if device_types is None: if device_types is None:
device_types = [ device_types = [
IREE_TO_SHARK_DRIVER_MAP[name] IREE_TO_AMDSHARK_DRIVER_MAP[name]
for name in query_available_drivers() for name in query_available_drivers()
if name in IREE_TO_SHARK_DRIVER_MAP if name in IREE_TO_AMDSHARK_DRIVER_MAP
] ]
for device_type in device_types: for device_type in device_types:
driver = get_driver(_IREE_DEVICE_MAP[device_type]) driver = get_driver(_IREE_DEVICE_MAP[device_type])
@@ -121,19 +121,19 @@ def query_devices(device_types: Optional[List[str]] = None) -> List[str]:
def compile_stress_test_module( def compile_stress_test_module(
device_types: List[str], mlir_model: str, func_name: str, mlir_dialect: str device_types: List[str], mlir_model: str, func_name: str, mlir_dialect: str
) -> List[str]: ) -> List[str]:
shark_module_paths = [] amdshark_module_paths = []
for device_type in device_types: for device_type in device_types:
logging.info( logging.info(
f"Compiling stress test model for device type {device_type}." f"Compiling stress test model for device type {device_type}."
) )
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_model, mlir_model,
func_name, func_name,
mlir_dialect=mlir_dialect, mlir_dialect=mlir_dialect,
device=device_type, device=device_type,
) )
shark_module_paths.append(shark_module.save_module()) amdshark_module_paths.append(amdshark_module.save_module())
return shark_module_paths return amdshark_module_paths
def stress_test( def stress_test(
@@ -169,21 +169,21 @@ def stress_test(
# This needs to run in a subprocess because when compiling for CUDA, # This needs to run in a subprocess because when compiling for CUDA,
# some stuff get intialized and cuInit will fail in a forked process # some stuff get intialized and cuInit will fail in a forked process
# later. It should be just compiling, but alas. # later. It should be just compiling, but alas.
shark_module_paths_set = executor.submit( amdshark_module_paths_set = executor.submit(
compile_stress_test_module, compile_stress_test_module,
device_types_set, device_types_set,
mlir_model, mlir_model,
func_name, func_name,
mlir_dialect, mlir_dialect,
).result() ).result()
device_type_shark_module_path_map = { device_type_amdshark_module_path_map = {
device_type: module_path device_type: module_path
for device_type, module_path in zip( for device_type, module_path in zip(
device_types_set, shark_module_paths_set device_types_set, amdshark_module_paths_set
) )
} }
device_name_shark_module_path_map = { device_name_amdshark_module_path_map = {
device_name: device_type_shark_module_path_map[ device_name: device_type_amdshark_module_path_map[
get_device_type(device_name) get_device_type(device_name)
] ]
for device_name in device_names for device_name in device_names
@@ -193,7 +193,7 @@ def stress_test(
# in IREE and a subsequent call to `iree.runtime.SystemContext.add_vm_module` # in IREE and a subsequent call to `iree.runtime.SystemContext.add_vm_module`
# in a forked process will hang. # in a forked process will hang.
with multiprocessing.Pool( with multiprocessing.Pool(
len(device_name_shark_module_path_map) * oversubscription_factor len(device_name_amdshark_module_path_map) * oversubscription_factor
) as process_pool: ) as process_pool:
process_pool.starmap( process_pool.starmap(
stress_test_compiled_model, stress_test_compiled_model,
@@ -212,7 +212,7 @@ def stress_test(
stress_test_index, stress_test_index,
) )
for stress_test_index, (device_name, module_path) in enumerate( for stress_test_index, (device_name, module_path) in enumerate(
list(device_name_shark_module_path_map.items()) list(device_name_amdshark_module_path_map.items())
* oversubscription_factor * oversubscription_factor
) )
], ],

View File

@@ -1,10 +1,10 @@
# RUN: %PYTHON %s # RUN: %PYTHON %s
import numpy as np import numpy as np
from shark.shark_importer import SharkImporter from amdshark.amdshark_importer import AMDSharkImporter
import pytest import pytest
from shark.parser import shark_args from amdshark.parser import amdshark_args
from shark.shark_inference import SharkInference from amdshark.amdshark_inference import AMDSharkInference
from shark.tflite_utils import TFLitePreprocessor from amdshark.tflite_utils import TFLitePreprocessor
import sys import sys
# model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite" # model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
@@ -66,32 +66,32 @@ class AlbertTfliteModuleTester:
self.save_vmfb = save_vmfb self.save_vmfb = save_vmfb
def create_and_check_module(self): def create_and_check_module(self):
shark_args.save_mlir = self.save_mlir amdshark_args.save_mlir = self.save_mlir
shark_args.save_vmfb = self.save_vmfb amdshark_args.save_vmfb = self.save_vmfb
tflite_preprocessor = TFLitePreprocessor(model_name="albert_lite_base") tflite_preprocessor = TFLitePreprocessor(model_name="albert_lite_base")
raw_model_file_path = tflite_preprocessor.get_raw_model_file() raw_model_file_path = tflite_preprocessor.get_raw_model_file()
inputs = tflite_preprocessor.get_inputs() inputs = tflite_preprocessor.get_inputs()
tflite_interpreter = tflite_preprocessor.get_interpreter() tflite_interpreter = tflite_preprocessor.get_interpreter()
my_shark_importer = SharkImporter( my_amdshark_importer = AMDSharkImporter(
module=tflite_interpreter, module=tflite_interpreter,
inputs=inputs, inputs=inputs,
frontend="tflite", frontend="tflite",
raw_model_file=raw_model_file_path, raw_model_file=raw_model_file_path,
) )
mlir_model, func_name = my_shark_importer.import_mlir() mlir_model, func_name = my_amdshark_importer.import_mlir()
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_module=mlir_model, mlir_module=mlir_model,
function_name=func_name, function_name=func_name,
device=self.device, device=self.device,
mlir_dialect="tflite", mlir_dialect="tflite",
) )
# Case1: Use shark_importer default generate inputs # Case1: Use amdshark_importer default generate inputs
shark_module.compile() amdshark_module.compile()
mlir_results = shark_module.forward(inputs) mlir_results = amdshark_module.forward(inputs)
## post process results for compare ## post process results for compare
input_details, output_details = tflite_preprocessor.get_model_details() input_details, output_details = tflite_preprocessor.get_model_details()
mlir_results = list(mlir_results) mlir_results = list(mlir_results)
@@ -105,14 +105,14 @@ class AlbertTfliteModuleTester:
input_details, output_details = tflite_preprocessor.get_model_details() input_details, output_details = tflite_preprocessor.get_model_details()
inputs = generate_inputs(input_details) # new inputs inputs = generate_inputs(input_details) # new inputs
shark_module = SharkInference( amdshark_module = AMDSharkInference(
mlir_module=mlir_model, mlir_module=mlir_model,
function_name=func_name, function_name=func_name,
device=self.device, device=self.device,
mlir_dialect="tflite", mlir_dialect="tflite",
) )
shark_module.compile() amdshark_module.compile()
mlir_results = shark_module.forward(inputs) mlir_results = amdshark_module.forward(inputs)
## post process results for compare ## post process results for compare
tflite_results = tflite_preprocessor.get_golden_output() tflite_results = tflite_preprocessor.get_golden_output()
compare_results(mlir_results, tflite_results, output_details) compare_results(mlir_results, tflite_results, output_details)

View File

@@ -22,7 +22,7 @@ def test_stress_test():
subprocess.check_call( subprocess.check_call(
[ [
sys.executable, sys.executable,
importlib.util.find_spec("shark.stress_test").origin, importlib.util.find_spec("amdshark.stress_test").origin,
"--model=squeezenet1_0", "--model=squeezenet1_0",
"--devices", "--devices",
"cpu", "cpu",

View File

@@ -96,7 +96,7 @@ class TFLitePreprocessor:
print("Setting up for TMP_WORK_DIR") print("Setting up for TMP_WORK_DIR")
self.workdir = os.path.join( self.workdir = os.path.join(
os.path.dirname(__file__), "./../gen_shark_tank" os.path.dirname(__file__), "./../gen_amdshark_tank"
) )
os.makedirs(self.workdir, exist_ok=True) os.makedirs(self.workdir, exist_ok=True)
print(f"TMP_WORK_DIR = {self.workdir}") print(f"TMP_WORK_DIR = {self.workdir}")

View File

@@ -28,7 +28,7 @@ from torch_mlir.eager_mode.torch_mlir_tensor import (
no_dispatch, no_dispatch,
) )
from torch_mlir.eager_mode import torch_mlir_tensor from torch_mlir.eager_mode import torch_mlir_tensor
from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend from amdshark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
backend = EagerModeIREELinalgOnTensorsBackend("cpu") backend = EagerModeIREELinalgOnTensorsBackend("cpu")

View File

@@ -16,7 +16,7 @@ from torch_mlir.ir import StringAttr
import torch_mlir import torch_mlir
from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend
import tempfile import tempfile
from shark.parser import shark_args from amdshark.parser import amdshark_args
import io import io
mlir_type_mapping_dict = { mlir_type_mapping_dict = {

View File

@@ -1,5 +1,5 @@
# -*- mode: python ; coding: utf-8 -*- # -*- mode: python ; coding: utf-8 -*-
from apps.shark_studio.studio_imports import pathex, datas, hiddenimports from apps.amdshark_studio.studio_imports import pathex, datas, hiddenimports
binaries = [] binaries = []
@@ -32,7 +32,7 @@ exe = EXE(
a.zipfiles, a.zipfiles,
a.datas, a.datas,
[], [],
name='nodai_shark_studio', name='nodai_amdshark_studio',
debug=False, debug=False,
bootloader_ignore_signals=False, bootloader_ignore_signals=False,
strip=False, strip=False,

View File

@@ -2,7 +2,7 @@
import os import os
import PIL import PIL
import numpy as np import numpy as np
from apps.shark_studio.web.utils.file_utils import ( from apps.amdshark_studio.web.utils.file_utils import (
get_generated_imgs_path, get_generated_imgs_path,
) )
from datetime import datetime from datetime import datetime

View File

@@ -6,13 +6,13 @@ import warnings
import json import json
from threading import Thread from threading import Thread
from apps.shark_studio.modules.timer import startup_timer from apps.amdshark_studio.modules.timer import startup_timer
from apps.shark_studio.web.utils.tmp_configs import ( from apps.amdshark_studio.web.utils.tmp_configs import (
config_tmp, config_tmp,
clear_tmp_mlir, clear_tmp_mlir,
clear_tmp_imgs, clear_tmp_imgs,
shark_tmp, amdshark_tmp,
) )
@@ -30,12 +30,12 @@ def imports():
startup_timer.record("import gradio") startup_timer.record("import gradio")
import apps.shark_studio.web.utils.globals as global_obj import apps.amdshark_studio.web.utils.globals as global_obj
global_obj._init() global_obj._init()
startup_timer.record("initialize globals") startup_timer.record("initialize globals")
from apps.shark_studio.modules import ( from apps.amdshark_studio.modules import (
img_processing, img_processing,
) # noqa: F401 ) # noqa: F401
@@ -44,7 +44,7 @@ def imports():
def initialize(): def initialize():
configure_sigint_handler() configure_sigint_handler()
# Setup to use shark_tmp for gradio's temporary image files and clear any # Setup to use amdshark_tmp for gradio's temporary image files and clear any
# existing temporary images there if they exist. Then we can import gradio. # existing temporary images there if they exist. Then we can import gradio.
# It has to be in this order or gradio ignores what we've set up. # It has to be in this order or gradio ignores what we've set up.
@@ -52,7 +52,7 @@ def initialize():
# clear_tmp_mlir() # clear_tmp_mlir()
clear_tmp_imgs() clear_tmp_imgs()
from apps.shark_studio.web.utils.file_utils import ( from apps.amdshark_studio.web.utils.file_utils import (
create_model_folders, create_model_folders,
) )
@@ -83,7 +83,7 @@ def dumpstacks():
code.append(f"""File: "{filename}", line {lineno}, in {name}""") code.append(f"""File: "{filename}", line {lineno}, in {name}""")
if line: if line:
code.append(" " + line.strip()) code.append(" " + line.strip())
with open(os.path.join(shark_tmp, "stack_dump.log"), "w") as f: with open(os.path.join(amdshark_tmp, "stack_dump.log"), "w") as f:
f.write("\n".join(code)) f.write("\n".join(code))
@@ -100,7 +100,7 @@ def setup_middleware(app):
def configure_cors_middleware(app): def configure_cors_middleware(app):
from starlette.middleware.cors import CORSMiddleware from starlette.middleware.cors import CORSMiddleware
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
cors_options = { cors_options = {
"allow_methods": ["*"], "allow_methods": ["*"],

View File

@@ -2,13 +2,13 @@ from turbine_models.custom_models import stateless_llama
from turbine_models.model_runner import vmfbRunner from turbine_models.model_runner import vmfbRunner
from turbine_models.gen_external_params.gen_external_params import gen_external_params from turbine_models.gen_external_params.gen_external_params import gen_external_params
import time import time
from shark.iree_utils.compile_utils import compile_module_to_flatbuffer from amdshark.iree_utils.compile_utils import compile_module_to_flatbuffer
from apps.shark_studio.web.utils.file_utils import ( from apps.amdshark_studio.web.utils.file_utils import (
get_resource_path, get_resource_path,
get_checkpoints_path, get_checkpoints_path,
) )
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from apps.shark_studio.api.utils import parse_device from apps.amdshark_studio.api.utils import parse_device
from urllib.request import urlopen from urllib.request import urlopen
import iree.runtime as ireert import iree.runtime as ireert
from itertools import chain from itertools import chain
@@ -366,7 +366,7 @@ def get_mfma_spec_path(target_chip, save_dir):
def llm_chat_api(InputData: dict): def llm_chat_api(InputData: dict):
from datetime import datetime as dt from datetime import datetime as dt
import apps.shark_studio.web.utils.globals as global_obj import apps.amdshark_studio.web.utils.globals as global_obj
print(f"Input keys : {InputData.keys()}") print(f"Input keys : {InputData.keys()}")

View File

@@ -12,26 +12,26 @@ from tqdm.auto import tqdm
from pathlib import Path from pathlib import Path
from random import randint from random import randint
from turbine_models.custom_models.sd_inference.sd_pipeline import SharkSDPipeline from turbine_models.custom_models.sd_inference.sd_pipeline import AMDSharkSDPipeline
from turbine_models.custom_models.sdxl_inference.sdxl_compiled_pipeline import ( from turbine_models.custom_models.sdxl_inference.sdxl_compiled_pipeline import (
SharkSDXLPipeline, AMDSharkSDXLPipeline,
) )
from apps.shark_studio.api.controlnet import control_adapter_map from apps.amdshark_studio.api.controlnet import control_adapter_map
from apps.shark_studio.api.utils import parse_device from apps.amdshark_studio.api.utils import parse_device
from apps.shark_studio.web.utils.state import status_label from apps.amdshark_studio.web.utils.state import status_label
from apps.shark_studio.web.utils.file_utils import ( from apps.amdshark_studio.web.utils.file_utils import (
safe_name, safe_name,
get_resource_path, get_resource_path,
get_checkpoints_path, get_checkpoints_path,
) )
from apps.shark_studio.modules.img_processing import ( from apps.amdshark_studio.modules.img_processing import (
save_output_img, save_output_img,
) )
from apps.shark_studio.modules.ckpt_processing import ( from apps.amdshark_studio.modules.ckpt_processing import (
preprocessCKPT, preprocessCKPT,
save_irpa, save_irpa,
) )
@@ -114,10 +114,10 @@ class StableDiffusion:
self.turbine_pipe = custom_module.StudioPipeline self.turbine_pipe = custom_module.StudioPipeline
self.model_map = custom_module.MODEL_MAP self.model_map = custom_module.MODEL_MAP
elif self.is_sdxl: elif self.is_sdxl:
self.turbine_pipe = SharkSDXLPipeline self.turbine_pipe = AMDSharkSDXLPipeline
self.model_map = EMPTY_SDXL_MAP self.model_map = EMPTY_SDXL_MAP
else: else:
self.turbine_pipe = SharkSDPipeline self.turbine_pipe = AMDSharkSDPipeline
self.model_map = EMPTY_SD_MAP self.model_map = EMPTY_SD_MAP
max_length = 64 max_length = 64
target_backend, self.rt_device, triple = parse_device(device, target_triple) target_backend, self.rt_device, triple = parse_device(device, target_triple)
@@ -273,7 +273,7 @@ class StableDiffusion:
return img return img
def shark_sd_fn_dict_input( def amdshark_sd_fn_dict_input(
sd_kwargs: dict, sd_kwargs: dict,
): ):
print("\n[LOG] Submitting Request...") print("\n[LOG] Submitting Request...")
@@ -312,11 +312,11 @@ def shark_sd_fn_dict_input(
) )
return None, "" return None, ""
generated_imgs = yield from shark_sd_fn(**sd_kwargs) generated_imgs = yield from amdshark_sd_fn(**sd_kwargs)
return generated_imgs return generated_imgs
def shark_sd_fn( def amdshark_sd_fn(
prompt, prompt,
negative_prompt, negative_prompt,
sd_init_image: list, sd_init_image: list,
@@ -346,8 +346,8 @@ def shark_sd_fn(
sd_init_image = [sd_init_image] sd_init_image = [sd_init_image]
is_img2img = True if sd_init_image[0] is not None else False is_img2img = True if sd_init_image[0] is not None else False
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
import apps.shark_studio.web.utils.globals as global_obj import apps.amdshark_studio.web.utils.globals as global_obj
adapters = {} adapters = {}
is_controlled = False is_controlled = False
@@ -466,7 +466,7 @@ def shark_sd_fn(
def unload_sd(): def unload_sd():
print("Unloading models.") print("Unloading models.")
import apps.shark_studio.web.utils.globals as global_obj import apps.amdshark_studio.web.utils.globals as global_obj
global_obj.clear_cache() global_obj.clear_cache()
gc.collect() gc.collect()
@@ -489,8 +489,8 @@ def safe_name(name):
if __name__ == "__main__": if __name__ == "__main__":
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
import apps.shark_studio.web.utils.globals as global_obj import apps.amdshark_studio.web.utils.globals as global_obj
global_obj._init() global_obj._init()
@@ -501,5 +501,5 @@ if __name__ == "__main__":
for arg in vars(cmd_opts): for arg in vars(cmd_opts):
if arg in sd_kwargs: if arg in sd_kwargs:
sd_kwargs[arg] = getattr(cmd_opts, arg) sd_kwargs[arg] = getattr(cmd_opts, arg)
for i in shark_sd_fn_dict_input(sd_kwargs): for i in amdshark_sd_fn_dict_input(sd_kwargs):
print(i) print(i)

View File

@@ -8,11 +8,11 @@ from random import (
) )
from pathlib import Path from pathlib import Path
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from cpuinfo import get_cpu_info from cpuinfo import get_cpu_info
# TODO: migrate these utils to studio # TODO: migrate these utils to studio
from shark.iree_utils.vulkan_utils import ( from amdshark.iree_utils.vulkan_utils import (
set_iree_vulkan_runtime_flags, set_iree_vulkan_runtime_flags,
get_vulkan_target_triple, get_vulkan_target_triple,
get_iree_vulkan_runtime_flags, get_iree_vulkan_runtime_flags,
@@ -21,7 +21,7 @@ from shark.iree_utils.vulkan_utils import (
def get_available_devices(): def get_available_devices():
def get_devices_by_name(driver_name): def get_devices_by_name(driver_name):
from shark.iree_utils._common import iree_device_map from amdshark.iree_utils._common import iree_device_map
device_list = [] device_list = []
try: try:
@@ -59,7 +59,7 @@ def get_available_devices():
cpu_device = get_devices_by_name("cpu-task") cpu_device = get_devices_by_name("cpu-task")
available_devices.extend(cpu_device) available_devices.extend(cpu_device)
from shark.iree_utils.vulkan_utils import ( from amdshark.iree_utils.vulkan_utils import (
get_all_vulkan_devices, get_all_vulkan_devices,
) )
@@ -116,7 +116,7 @@ def set_init_device_flags():
elif "metal" in cmd_opts.device: elif "metal" in cmd_opts.device:
device_name, cmd_opts.device = map_device_to_name_path(cmd_opts.device) device_name, cmd_opts.device = map_device_to_name_path(cmd_opts.device)
if not cmd_opts.iree_metal_target_platform: if not cmd_opts.iree_metal_target_platform:
from shark.iree_utils.metal_utils import get_metal_target_triple from amdshark.iree_utils.metal_utils import get_metal_target_triple
triple = get_metal_target_triple(device_name) triple = get_metal_target_triple(device_name)
if triple is not None: if triple is not None:
@@ -146,7 +146,7 @@ def set_iree_runtime_flags():
def parse_device(device_str, target_override=""): def parse_device(device_str, target_override=""):
from shark.iree_utils.compile_utils import ( from amdshark.iree_utils.compile_utils import (
clean_device_info, clean_device_info,
get_iree_target_triple, get_iree_target_triple,
iree_target_map, iree_target_map,
@@ -192,7 +192,7 @@ def get_rocm_target_chip(device_str):
if key in device_str: if key in device_str:
return rocm_chip_map[key] return rocm_chip_map[key]
raise AssertionError( raise AssertionError(
f"Device {device_str} not recognized. Please file an issue at https://github.com/nod-ai/SHARK-Studio/issues." f"Device {device_str} not recognized. Please file an issue at https://github.com/nod-ai/AMDSHARK-Studio/issues."
) )
@@ -225,7 +225,7 @@ def get_device_mapping(driver, key_combination=3):
dict: map to possible device names user can input mapped to desired dict: map to possible device names user can input mapped to desired
combination of name/path. combination of name/path.
""" """
from shark.iree_utils._common import iree_device_map from amdshark.iree_utils._common import iree_device_map
driver = iree_device_map(driver) driver = iree_device_map(driver)
device_list = get_all_devices(driver) device_list = get_all_devices(driver)
@@ -256,7 +256,7 @@ def get_opt_flags(model, precision="fp16"):
f"-iree-vulkan-target-triple={cmd_opts.iree_vulkan_target_triple}" f"-iree-vulkan-target-triple={cmd_opts.iree_vulkan_target_triple}"
) )
if "rocm" in cmd_opts.device: if "rocm" in cmd_opts.device:
from shark.iree_utils.gpu_utils import get_iree_rocm_args from amdshark.iree_utils.gpu_utils import get_iree_rocm_args
rocm_args = get_iree_rocm_args() rocm_args = get_iree_rocm_args()
iree_flags.extend(rocm_args) iree_flags.extend(rocm_args)
@@ -301,7 +301,7 @@ def map_device_to_name_path(device, key_combination=3):
return device_mapping return device_mapping
def get_devices_by_name(driver_name): def get_devices_by_name(driver_name):
from shark.iree_utils._common import iree_device_map from amdshark.iree_utils._common import iree_device_map
device_list = [] device_list = []
try: try:
@@ -332,7 +332,7 @@ def map_device_to_name_path(device, key_combination=3):
set_iree_runtime_flags() set_iree_runtime_flags()
available_devices = [] available_devices = []
from shark.iree_utils.vulkan_utils import ( from amdshark.iree_utils.vulkan_utils import (
get_all_vulkan_devices, get_all_vulkan_devices,
) )

View File

@@ -12,7 +12,7 @@ from pathlib import Path
from tqdm import tqdm from tqdm import tqdm
from omegaconf import OmegaConf from omegaconf import OmegaConf
from diffusers import StableDiffusionPipeline from diffusers import StableDiffusionPipeline
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from diffusers.pipelines.stable_diffusion.convert_from_ckpt import ( from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
download_from_original_stable_diffusion_ckpt, download_from_original_stable_diffusion_ckpt,
create_vae_diffusers_config, create_vae_diffusers_config,

View File

@@ -5,7 +5,7 @@ import json
import safetensors import safetensors
from dataclasses import dataclass from dataclasses import dataclass
from safetensors.torch import load_file from safetensors.torch import load_file
from apps.shark_studio.web.utils.file_utils import ( from apps.amdshark_studio.web.utils.file_utils import (
get_checkpoint_pathfile, get_checkpoint_pathfile,
get_path_stem, get_path_stem,
) )

View File

@@ -25,11 +25,11 @@ resampler_list = resamplers.keys()
# save output images and the inputs corresponding to it. # save output images and the inputs corresponding to it.
def save_output_img(output_img, img_seed, extra_info=None): def save_output_img(output_img, img_seed, extra_info=None):
from apps.shark_studio.web.utils.file_utils import ( from apps.amdshark_studio.web.utils.file_utils import (
get_generated_imgs_path, get_generated_imgs_path,
get_generated_imgs_todays_subdir, get_generated_imgs_todays_subdir,
) )
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
if extra_info is None: if extra_info is None:
extra_info = {} extra_info = {}

View File

@@ -30,8 +30,8 @@ def logger_test(x):
def read_sd_logs(): def read_sd_logs():
sys.stdout.flush() sys.stdout.flush()
with open("shark_tmp/sd.log", "r") as f: with open("amdshark_tmp/sd.log", "r") as f:
return f.read() return f.read()
sys.stdout = Logger("shark_tmp/sd.log", filter="[LOG]") sys.stdout = Logger("amdshark_tmp/sd.log", filter="[LOG]")

View File

@@ -1,14 +1,14 @@
from shark.iree_utils.compile_utils import ( from amdshark.iree_utils.compile_utils import (
get_iree_compiled_module, get_iree_compiled_module,
load_vmfb_using_mmap, load_vmfb_using_mmap,
clean_device_info, clean_device_info,
get_iree_target_triple, get_iree_target_triple,
) )
from apps.shark_studio.web.utils.file_utils import ( from apps.amdshark_studio.web.utils.file_utils import (
get_checkpoints_path, get_checkpoints_path,
get_resource_path, get_resource_path,
) )
from apps.shark_studio.modules.shared_cmd_opts import ( from apps.amdshark_studio.modules.shared_cmd_opts import (
cmd_opts, cmd_opts,
) )
from iree import runtime as ireert from iree import runtime as ireert
@@ -17,7 +17,7 @@ import gc
import os import os
class SharkPipelineBase: class AMDSharkPipelineBase:
# This class is a lightweight base for managing an # This class is a lightweight base for managing an
# inference API class. It should provide methods for: # inference API class. It should provide methods for:
# - compiling a set (model map) of torch IR modules # - compiling a set (model map) of torch IR modules

View File

@@ -224,7 +224,7 @@ def get_unweighted_text_embeddings(
text_embedding = text_embedding[:, 1:-1] text_embedding = text_embedding[:, 1:-1]
text_embeddings.append(text_embedding) text_embeddings.append(text_embedding)
# SHARK: Convert the result to tensor # AMDSHARK: Convert the result to tensor
# text_embeddings = torch.concat(text_embeddings, axis=1) # text_embeddings = torch.concat(text_embeddings, axis=1)
text_embeddings_np = np.concatenate(np.array(text_embeddings)) text_embeddings_np = np.concatenate(np.array(text_embeddings))
text_embeddings = torch.from_numpy(text_embeddings_np) text_embeddings = torch.from_numpy(text_embeddings_np)

Some files were not shown because too many files have changed in this diff Show More