Compare commits

..

1 Commits

Author SHA1 Message Date
gpetters94
e4499cb472 Remove nightly signing again 2024-05-16 05:43:56 -04:00
237 changed files with 2613 additions and 2680 deletions

View File

@@ -10,15 +10,15 @@ jobs:
runs-on: ubuntu-latest
# Don't run this in everyone's forks.
if: github.repository == 'nod-ai/AMD-SHARK-Studio'
if: github.repository == 'nod-ai/SHARK'
steps:
- name: Checking out repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
uses: actions/checkout@v2
with:
token: ${{ secrets.NODAI_INVOCATION_TOKEN }}
- name: Run scrape releases script
run: python ./build_tools/scrape_releases.py nod-ai AMD-SHARK-Studio > /tmp/index.html
run: python ./build_tools/scrape_releases.py nod-ai SHARK > /tmp/index.html
shell: bash
- run: git fetch --all
- run: git switch github-pages
@@ -31,7 +31,7 @@ jobs:
- run: git diff --cached --exit-code || git commit -m "Update releases."
- name: GitHub Push
uses: ad-m/github-push-action@d91a481090679876dfc4178fef17f286781251df # v0.8.0
uses: ad-m/github-push-action@v0.6.0
with:
github_token: ${{ secrets.NODAI_INVOCATION_TOKEN }}
branch: github-pages

View File

@@ -17,9 +17,9 @@ jobs:
python-version: ["3.11"]
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
@@ -35,14 +35,14 @@ jobs:
- name: Create Release
id: create_release
uses: ncipollo/release-action@440c8c1cb0ed28b9f43e4d1d670870f059653174 # v1.16.0
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
with:
tag: ${{ env.tag_name }}
name: nod.ai AMDSHARK ${{ env.tag_name }}
tag_name: ${{ env.tag_name }}
release_name: nod.ai SHARK ${{ env.tag_name }}
body: |
Automatic snapshot release of nod.ai AMDSHARK.
Automatic snapshot release of nod.ai SHARK.
draft: true
prerelease: true
@@ -51,16 +51,16 @@ jobs:
run: |
./setup_venv.ps1
python process_skipfiles.py
$env:AMDSHARK_PACKAGE_VERSION=${{ env.package_version }}
$env:SHARK_PACKAGE_VERSION=${{ env.package_version }}
pip install -e .
pip freeze -l
pyinstaller .\apps\amdshark_studio\amdshark_studio.spec
mv ./dist/nodai_amdshark_studio.exe ./dist/nodai_amdshark_studio_${{ env.package_version_ }}.exe
signtool sign /f c:\g\amdshark_02152023.cer /fd certHash /csp "eToken Base Cryptographic Provider" /k "${{ secrets.CI_CERT }}" ./dist/nodai_amdshark_studio_${{ env.package_version_ }}.exe
pyinstaller .\apps\shark_studio\shark_studio.spec
mv ./dist/nodai_shark_studio.exe ./dist/nodai_shark_studio_${{ env.package_version_ }}.exe
#signtool sign /f c:\g\shark_02152023.cer /fd certHash /csp "eToken Base Cryptographic Provider" /k "${{ secrets.CI_CERT }}" ./dist/nodai_shark_studio_${{ env.package_version_ }}.exe
- name: Upload Release Assets
id: upload-release-assets
uses: dwenegar/upload-release-assets@fe47e06814723c7b1bea3a7e46cf93d5f020d0c3 # v3
uses: dwenegar/upload-release-assets@v1
env:
GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
with:
@@ -70,7 +70,7 @@ jobs:
- name: Publish Release
id: publish_release
uses: eregon/publish-release@01df127f5e9a3c26935118e22e738d95b59d10ce # v1.0.6
uses: eregon/publish-release@v1
env:
GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
with:

View File

@@ -1,19 +1,19 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: Validate AMDShark Studio
name: Validate Shark Studio
on:
push:
branches: [ main ]
paths-ignore:
- '**.md'
- 'amdshark/examples/**'
- 'shark/examples/**'
pull_request:
branches: [ main ]
paths-ignore:
- '**.md'
- 'amdshark/examples/**'
- 'shark/examples/**'
workflow_dispatch:
# Ensure that only a single job or workflow using the same
@@ -39,7 +39,7 @@ jobs:
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/checkout@v3
- name: Set Environment Variables
run: |
@@ -51,7 +51,7 @@ jobs:
echo ${{ matrix.python-version }} >> $GITHUB_WORKSPACE/.python-version
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
uses: actions/setup-python@v4
with:
python-version: '${{ matrix.python-version }}'
@@ -66,7 +66,7 @@ jobs:
run: |
# black format check
black --version
black --check apps/amdshark_studio
black --check apps/shark_studio
# stop the build if there are Python syntax errors or undefined names
flake8 . --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
@@ -77,9 +77,10 @@ jobs:
if: matrix.suite == 'cpu'
run: |
cd $GITHUB_WORKSPACE
python${{ matrix.python-version }} -m venv amdshark.venv
source amdshark.venv/bin/activate
python${{ matrix.python-version }} -m venv shark.venv
source shark.venv/bin/activate
pip install -r requirements.txt --no-cache-dir
pip install -e .
# Disabled due to hang when exporting test llama2
# python apps/amdshark_studio/tests/api_test.py
pip uninstall -y torch
pip install torch==2.1.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
python apps/shark_studio/tests/api_test.py

10
.gitignore vendored
View File

@@ -164,15 +164,14 @@ cython_debug/
# vscode related
.vscode
# AMDShark related artifacts
# Shark related artefacts
*venv/
amdshark_tmp/
shark_tmp/
*.vmfb
.use-iree
tank/dict_configs.py
*.csv
reproducers/
apps/amdshark_studio/web/configs
# ORT related artefacts
cache_models/
@@ -189,11 +188,6 @@ variants.json
# models folder
apps/stable_diffusion/web/models/
# model artifacts (AMDSHARK)
*.tempfile
*.mlir
*.vmfb
# Stencil annotators.
stencil_annotator/

4
.gitmodules vendored
View File

@@ -1,4 +1,4 @@
[submodule "inference/thirdparty/amdshark-runtime"]
path = inference/thirdparty/amdshark-runtime
[submodule "inference/thirdparty/shark-runtime"]
path = inference/thirdparty/shark-runtime
url =https://github.com/nod-ai/SRT.git
branch = shark-06032022

120
README.md
View File

@@ -1,12 +1,12 @@
# AMDSHARK
# SHARK
High Performance Machine Learning Distribution
<h2>NOTE: This project is not currently maintained.</h2>
*We are currently rebuilding SHARK to take advantage of [Turbine](https://github.com/nod-ai/SHARK-Turbine). Until that is complete make sure you use an .exe release or a checkout of the `SHARK-1.0` branch, for a working SHARK*
*The latest versions of this project are developments towards a refactor on top of IREE-Turbine. Until further notice, make sure you use an .exe release or a checkout of the `AMDSHARK-1.0` branch, for a working AMDSHARK-Studio*
[![Nightly Release](https://github.com/nod-ai/SHARK/actions/workflows/nightly.yml/badge.svg)](https://github.com/nod-ai/SHARK/actions/workflows/nightly.yml)
[![Validate torch-models on Shark Runtime](https://github.com/nod-ai/SHARK/actions/workflows/test-models.yml/badge.svg)](https://github.com/nod-ai/SHARK/actions/workflows/test-models.yml)
[![Nightly Release](https://github.com/nod-ai/AMD-SHARK-Studio/actions/workflows/nightly.yml/badge.svg)](https://github.com/nod-ai/AMD-SHARK-Studio/actions/workflows/nightly.yml)
<details>
<summary>Prerequisites - Drivers </summary>
@@ -25,11 +25,11 @@ Other users please ensure you have your latest vendor drivers and Vulkan SDK fro
### Quick Start for AMDSHARK Stable Diffusion for Windows 10/11 Users
### Quick Start for SHARK Stable Diffusion for Windows 10/11 Users
Install the Driver from [Prerequisites](https://github.com/nod-ai/AMD-SHARK-Studio#install-your-hardware-drivers) above
Install the Driver from (Prerequisites)[https://github.com/nod-ai/SHARK#install-your-hardware-drivers] above
Download the [stable release](https://github.com/nod-ai/AMD-SHARK-Studio/releases/latest) or the most recent [AMDSHARK 1.0 pre-release](https://github.com/nod-ai/AMD-SHARK-Studio/releases).
Download the [stable release](https://github.com/nod-ai/shark/releases/latest) or the most recent [SHARK 1.0 pre-release](https://github.com/nod-ai/shark/releases).
Double click the .exe, or [run from the command line](#running) (recommended), and you should have the [UI](http://localhost:8080/) in the browser.
@@ -67,16 +67,16 @@ Enjoy.
## Check out the code
```shell
git clone https://github.com/nod-ai/AMD-SHARK-Studio.git
cd AMD-SHARK-Studio
git clone https://github.com/nod-ai/SHARK.git
cd SHARK
```
## Switch to the Correct Branch (IMPORTANT!)
Currently AMDSHARK is being rebuilt for [Turbine](https://github.com/iree-org/iree-turbine) on the `main` branch. For now you are strongly discouraged from using `main` unless you are working on the rebuild effort, and should not expect the code there to produce a working application for Image Generation, So for now you'll need switch over to the `AMDSHARK-1.0` branch and use the stable code.
Currently SHARK is being rebuilt for [Turbine](https://github.com/nod-ai/SHARK-Turbine) on the `main` branch. For now you are strongly discouraged from using `main` unless you are working on the rebuild effort, and should not expect the code there to produce a working application for Image Generation, So for now you'll need switch over to the `SHARK-1.0` branch and use the stable code.
```shell
git checkout AMDSHARK-1.0
git checkout SHARK-1.0
```
The following setup instructions assume you are on this branch.
@@ -92,7 +92,7 @@ The following setup instructions assume you are on this branch.
set-executionpolicy remotesigned
```
#### Setup venv and install necessary packages (torch-mlir, nodLabs/AMDShark, ...)
#### Setup venv and install necessary packages (torch-mlir, nodLabs/Shark, ...)
```powershell
./setup_venv.ps1 #You can re-run this script to get the latest version
```
@@ -101,20 +101,20 @@ set-executionpolicy remotesigned
```shell
./setup_venv.sh
source amdshark1.venv/bin/activate
source shark1.venv/bin/activate
```
### Run Stable Diffusion on your device - WebUI
#### Windows 10/11 Users
```powershell
(amdshark1.venv) PS C:\g\amdshark> cd .\apps\stable_diffusion\web\
(amdshark1.venv) PS C:\g\amdshark\apps\stable_diffusion\web> python .\index.py
(shark1.venv) PS C:\g\shark> cd .\apps\stable_diffusion\web\
(shark1.venv) PS C:\g\shark\apps\stable_diffusion\web> python .\index.py
```
#### Linux / macOS Users
```shell
(amdshark1.venv) > cd apps/stable_diffusion/web
(amdshark1.venv) > python index.py
(shark1.venv) > cd apps/stable_diffusion/web
(shark1.venv) > python index.py
```
#### Access Stable Diffusion on http://localhost:8080/?__theme=dark
@@ -128,7 +128,7 @@ source amdshark1.venv/bin/activate
#### Windows 10/11 Users
```powershell
(amdshark1.venv) PS C:\g\amdshark> python .\apps\stable_diffusion\scripts\main.py --app="txt2img" --precision="fp16" --prompt="tajmahal, snow, sunflowers, oil on canvas" --device="vulkan"
(shark1.venv) PS C:\g\shark> python .\apps\stable_diffusion\scripts\main.py --app="txt2img" --precision="fp16" --prompt="tajmahal, snow, sunflowers, oil on canvas" --device="vulkan"
```
#### Linux / macOS Users
@@ -156,7 +156,7 @@ Here are some samples generated:
![a photo of a crab playing a trumpet](https://user-images.githubusercontent.com/74956/204933258-252e7240-8548-45f7-8253-97647d38313d.jpg)
Find us on [AMDSHARK Discord server](https://discord.gg/RUqY2h2s9u) if you have any trouble with running it on your hardware.
Find us on [SHARK Discord server](https://discord.gg/RUqY2h2s9u) if you have any trouble with running it on your hardware.
<details>
@@ -168,8 +168,8 @@ This step sets up a new VirtualEnv for Python
```shell
python --version #Check you have 3.11 on Linux, macOS or Windows Powershell
python -m venv amdshark_venv
source amdshark_venv/bin/activate # Use amdshark_venv/Scripts/activate on Windows
python -m venv shark_venv
source shark_venv/bin/activate # Use shark_venv/Scripts/activate on Windows
# If you are using conda create and activate a new conda env
@@ -179,15 +179,15 @@ python -m pip install --upgrade pip
*macOS Metal* users please install https://sdk.lunarg.com/sdk/download/latest/mac/vulkan-sdk.dmg and enable "System wide install"
### Install AMD-SHARK
### Install SHARK
This step pip installs AMD-SHARK and related packages on Linux Python 3.8, 3.10 and 3.11 and macOS / Windows Python 3.11
This step pip installs SHARK and related packages on Linux Python 3.8, 3.10 and 3.11 and macOS / Windows Python 3.11
```shell
pip install nodai-amdshark -f https://nod-ai.github.io/AMD-SHARK-Studio/package-index/ -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SRT/pip-release-links.html --extra-index-url https://download.pytorch.org/whl/nightly/cpu
pip install nodai-shark -f https://nod-ai.github.io/SHARK/package-index/ -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SRT/pip-release-links.html --extra-index-url https://download.pytorch.org/whl/nightly/cpu
```
### Run amdshark tank model tests.
### Run shark tank model tests.
```shell
pytest tank/test_models.py
```
@@ -196,7 +196,7 @@ See tank/README.md for a more detailed walkthrough of our pytest suite and CLI.
### Download and run Resnet50 sample
```shell
curl -O https://raw.githubusercontent.com/nod-ai/AMD-SHARK-Studio/main/amdshark/examples/amdshark_inference/resnet50_script.py
curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/resnet50_script.py
#Install deps for test script
pip install --pre torch torchvision torchaudio tqdm pillow gsutil --extra-index-url https://download.pytorch.org/whl/nightly/cpu
python ./resnet50_script.py --device="cpu" #use cuda or vulkan or metal
@@ -204,7 +204,7 @@ python ./resnet50_script.py --device="cpu" #use cuda or vulkan or metal
### Download and run BERT (MiniLM) sample
```shell
curl -O https://raw.githubusercontent.com/nod-ai/AMD-SHARK-Studio/main/amdshark/examples/amdshark_inference/minilm_jit.py
curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/minilm_jit.py
#Install deps for test script
pip install transformers torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu
python ./minilm_jit.py --device="cpu" #use cuda or vulkan or metal
@@ -222,34 +222,34 @@ Set `USE_IREE=1` to use upstream IREE
# PYTHON=python3.11 VENV_DIR=0617_venv IMPORTER=1 ./setup_venv.sh
```
### Run any of the hundreds of AMDSHARK tank models via the test framework
### Run any of the hundreds of SHARK tank models via the test framework
```shell
python -m amdshark.examples.amdshark_inference.resnet50_script --device="cpu" # Use gpu | vulkan
python -m shark.examples.shark_inference.resnet50_script --device="cpu" # Use gpu | vulkan
# Or a pytest
pytest tank/test_models.py -k "MiniLM"
```
### How to use your locally built IREE / Torch-MLIR with AMDSHARK
### How to use your locally built IREE / Torch-MLIR with SHARK
If you are a *Torch-mlir developer or an IREE developer* and want to test local changes you can uninstall
the provided packages with `pip uninstall torch-mlir` and / or `pip uninstall iree-compiler iree-runtime` and build locally
with Python bindings and set your PYTHONPATH as mentioned [here](https://github.com/iree-org/iree/tree/main/docs/api_docs/python#install-iree-binaries)
for IREE and [here](https://github.com/llvm/torch-mlir/blob/main/development.md#setup-python-environment-to-export-the-built-python-packages)
for Torch-MLIR.
How to use your locally built Torch-MLIR with AMDSHARK:
How to use your locally built Torch-MLIR with SHARK:
```shell
1.) Run `./setup_venv.sh in AMDSHARK` and activate `amdshark.venv` virtual env.
1.) Run `./setup_venv.sh in SHARK` and activate `shark.venv` virtual env.
2.) Run `pip uninstall torch-mlir`.
3.) Go to your local Torch-MLIR directory.
4.) Activate mlir_venv virtual envirnoment.
5.) Run `pip uninstall -r requirements.txt`.
6.) Run `pip install -r requirements.txt`.
7.) Build Torch-MLIR.
8.) Activate amdshark.venv virtual environment from the Torch-MLIR directory.
8.) Activate shark.venv virtual environment from the Torch-MLIR directory.
8.) Run `export PYTHONPATH=`pwd`/build/tools/torch-mlir/python_packages/torch_mlir:`pwd`/examples` in the Torch-MLIR directory.
9.) Go to the AMDSHARK directory.
9.) Go to the SHARK directory.
```
Now the AMDSHARK will use your locally build Torch-MLIR repo.
Now the SHARK will use your locally build Torch-MLIR repo.
## Benchmarking Dispatches
@@ -263,10 +263,10 @@ pytest -k "MiniLM and torch and static and cuda" --benchmark_dispatches=All -s -
```
The given command will populate `<dispatch_benchmarks_dir>/<model_name>/` with an `ordered_dispatches.txt` that lists and orders the dispatches and their latencies, as well as folders for each dispatch that contain .mlir, .vmfb, and results of the benchmark for that dispatch.
if you want to instead incorporate this into a python script, you can pass the `dispatch_benchmarks` and `dispatch_benchmarks_dir` commands when initializing `AMDSharkInference`, and the benchmarks will be generated when compiled. E.G:
if you want to instead incorporate this into a python script, you can pass the `dispatch_benchmarks` and `dispatch_benchmarks_dir` commands when initializing `SharkInference`, and the benchmarks will be generated when compiled. E.G:
```
amdshark_module = AMDSharkInference(
shark_module = SharkInference(
mlir_model,
device=args.device,
mlir_dialect="tm_tensor",
@@ -285,34 +285,34 @@ Output will include:
- A .txt file containing benchmark output
See tank/README.md for further instructions on how to run model tests and benchmarks from the AMDSHARK tank.
See tank/README.md for further instructions on how to run model tests and benchmarks from the SHARK tank.
</details>
<details>
<summary>API Reference</summary>
### AMDShark Inference API
### Shark Inference API
```
from amdshark.amdshark_importer import AMDSharkImporter
from shark.shark_importer import SharkImporter
# AMDSharkImporter imports mlir file from the torch, tensorflow or tf-lite module.
# SharkImporter imports mlir file from the torch, tensorflow or tf-lite module.
mlir_importer = AMDSharkImporter(
mlir_importer = SharkImporter(
torch_module,
(input),
frontend="torch", #tf, #tf-lite
)
torch_mlir, func_name = mlir_importer.import_mlir(tracing_required=True)
# AMDSharkInference accepts mlir in linalg, mhlo, and tosa dialect.
# SharkInference accepts mlir in linalg, mhlo, and tosa dialect.
from amdshark.amdshark_inference import AMDSharkInference
amdshark_module = AMDSharkInference(torch_mlir, device="cpu", mlir_dialect="linalg")
amdshark_module.compile()
result = amdshark_module.forward((input))
from shark.shark_inference import SharkInference
shark_module = SharkInference(torch_mlir, device="cpu", mlir_dialect="linalg")
shark_module.compile()
result = shark_module.forward((input))
```
@@ -320,7 +320,7 @@ result = amdshark_module.forward((input))
### Example demonstrating running MHLO IR.
```
from amdshark.amdshark_inference import AMDSharkInference
from shark.shark_inference import SharkInference
import numpy as np
mhlo_ir = r"""builtin.module {
@@ -333,22 +333,22 @@ mhlo_ir = r"""builtin.module {
arg0 = np.ones((1, 4)).astype(np.float32)
arg1 = np.ones((4, 1)).astype(np.float32)
amdshark_module = AMDSharkInference(mhlo_ir, device="cpu", mlir_dialect="mhlo")
amdshark_module.compile()
result = amdshark_module.forward((arg0, arg1))
shark_module = SharkInference(mhlo_ir, device="cpu", mlir_dialect="mhlo")
shark_module.compile()
result = shark_module.forward((arg0, arg1))
```
</details>
## Examples Using the REST API
* [Setting up AMDSHARK for use with Blender](./docs/amdshark_sd_blender.md)
* [Setting up AMDSHARK for use with Koboldcpp](./docs/amdshark_sd_koboldcpp.md)
* [Setting up SHARK for use with Blender](./docs/shark_sd_blender.md)
* [Setting up SHARK for use with Koboldcpp](./docs/shark_sd_koboldcpp.md)
## Supported and Validated Models
AMDSHARK is maintained to support the latest innovations in ML Models:
SHARK is maintained to support the latest innovations in ML Models:
| TF HuggingFace Models | AMDSHARK-CPU | AMDSHARK-CUDA | AMDSHARK-METAL |
| TF HuggingFace Models | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
|---------------------|----------|----------|-------------|
| BERT | :green_heart: | :green_heart: | :green_heart: |
| DistilBERT | :green_heart: | :green_heart: | :green_heart: |
@@ -358,12 +358,12 @@ AMDSHARK is maintained to support the latest innovations in ML Models:
| Vision Transformer | :green_heart: | :green_heart: | :green_heart: |
| ResNet50 | :green_heart: | :green_heart: | :green_heart: |
For a complete list of the models supported in AMDSHARK, please refer to [tank/README.md](https://github.com/nod-ai/AMD-SHARK-Studio/blob/main/tank/README.md).
For a complete list of the models supported in SHARK, please refer to [tank/README.md](https://github.com/nod-ai/SHARK/blob/main/tank/README.md).
## Communication Channels
* [AMDSHARK Discord server](https://discord.gg/RUqY2h2s9u): Real time discussions with the AMDSHARK team and other users
* [GitHub issues](https://github.com/nod-ai/AMD-SHARK-Studio/issues): Feature requests, bugs etc
* [SHARK Discord server](https://discord.gg/RUqY2h2s9u): Real time discussions with the SHARK team and other users
* [GitHub issues](https://github.com/nod-ai/SHARK/issues): Feature requests, bugs etc
## Related Projects
@@ -385,10 +385,10 @@ For a complete list of the models supported in AMDSHARK, please refer to [tank/R
* Torch-MLIR Github issues [here](https://github.com/llvm/torch-mlir/issues)
* [`torch-mlir` section](https://llvm.discourse.group/c/projects-that-want-to-become-official-llvm-projects/torch-mlir/41) of LLVM Discourse
* Weekly meetings on Mondays 9AM PST. See [here](https://discourse.llvm.org/t/community-meeting-developer-hour-refactoring-recurring-meetings/62575) for more information.
* [MLIR topic within LLVM Discourse](https://llvm.discourse.group/c/llvm-project/mlir/31) AMDSHARK and IREE is enabled by and heavily relies on [MLIR](https://mlir.llvm.org).
* [MLIR topic within LLVM Discourse](https://llvm.discourse.group/c/llvm-project/mlir/31) SHARK and IREE is enabled by and heavily relies on [MLIR](https://mlir.llvm.org).
</details>
## License
nod.ai AMDSHARK is licensed under the terms of the Apache 2.0 License with LLVM Exceptions.
nod.ai SHARK is licensed under the terms of the Apache 2.0 License with LLVM Exceptions.
See [LICENSE](LICENSE) for more information.

View File

@@ -1,28 +0,0 @@
import importlib
import logging
from torch._dynamo import register_backend
log = logging.getLogger(__name__)
@register_backend
def amdshark(model, inputs, *, options):
try:
from amdshark.dynamo_backend.utils import AMDSharkBackend
except ImportError:
log.exception(
"Unable to import AMDSHARK - High Performance Machine Learning Distribution"
"Please install the right version of AMDSHARK that matches the PyTorch version being used. "
"Refer to https://github.com/nod-ai/AMD-SHARK-Studio/ for details."
)
raise
return AMDSharkBackend(model, inputs, options)
def has_amdshark():
try:
importlib.import_module("amdshark")
return True
except ImportError:
return False

View File

@@ -1,14 +0,0 @@
from amdshark.amdshark_inference import AMDSharkInference
from amdshark.amdshark_downloader import download_model
mlir_model, func_name, inputs, golden_out = download_model(
"bloom", frontend="torch"
)
amdshark_module = AMDSharkInference(
mlir_model, device="cpu", mlir_dialect="tm_tensor"
)
amdshark_module.compile()
result = amdshark_module.forward(inputs)
print("The obtained result via amdshark is: ", result)
print("The golden result is:", golden_out)

View File

@@ -1,31 +0,0 @@
from amdshark.amdshark_inference import AMDSharkInference
import numpy as np
mhlo_ir = r"""builtin.module {
func.func @forward(%arg0: tensor<1x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4x4xf32> {
%0 = chlo.broadcast_add %arg0, %arg1 : (tensor<1x4xf32>, tensor<4x1xf32>) -> tensor<4x4xf32>
%1 = "mhlo.abs"(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
return %1 : tensor<4x4xf32>
}
}"""
arg0 = np.ones((1, 4)).astype(np.float32)
arg1 = np.ones((4, 1)).astype(np.float32)
print("Running amdshark on cpu backend")
amdshark_module = AMDSharkInference(mhlo_ir, device="cpu", mlir_dialect="mhlo")
# Generate the random inputs and feed into the graph.
x = amdshark_module.generate_random_inputs()
amdshark_module.compile()
print(amdshark_module.forward(x))
print("Running amdshark on cuda backend")
amdshark_module = AMDSharkInference(mhlo_ir, device="cuda", mlir_dialect="mhlo")
amdshark_module.compile()
print(amdshark_module.forward(x))
print("Running amdshark on vulkan backend")
amdshark_module = AMDSharkInference(mhlo_ir, device="vulkan", mlir_dialect="mhlo")
amdshark_module.compile()
print(amdshark_module.forward(x))

View File

@@ -1,23 +0,0 @@
from amdshark.amdshark_inference import AMDSharkInference
from amdshark.amdshark_downloader import download_model
mlir_model, func_name, inputs, golden_out = download_model(
"microsoft/MiniLM-L12-H384-uncased",
frontend="torch",
)
amdshark_module = AMDSharkInference(mlir_model, device="cpu", mlir_dialect="linalg")
amdshark_module.compile()
result = amdshark_module.forward(inputs)
print("The obtained result via amdshark is: ", result)
print("The golden result is:", golden_out)
# Let's generate random inputs, currently supported
# for static models.
rand_inputs = amdshark_module.generate_random_inputs()
rand_results = amdshark_module.forward(rand_inputs)
print("Running amdshark_module with random_inputs is: ", rand_results)

View File

@@ -1,15 +0,0 @@
from amdshark.amdshark_inference import AMDSharkInference
from amdshark.amdshark_downloader import download_model
mlir_model, func_name, inputs, golden_out = download_model(
"v_diffusion", frontend="torch"
)
amdshark_module = AMDSharkInference(
mlir_model, device="vulkan", mlir_dialect="linalg"
)
amdshark_module.compile()
result = amdshark_module.forward(inputs)
print("The obtained result via amdshark is: ", result)
print("The golden result is:", golden_out)

View File

@@ -1,505 +0,0 @@
import gc
import torch
import gradio as gr
import time
import os
import json
import numpy as np
import copy
import importlib.util
import sys
from tqdm.auto import tqdm
from pathlib import Path
from random import randint
from turbine_models.custom_models.sd_inference.sd_pipeline import AMDSharkSDPipeline
from turbine_models.custom_models.sdxl_inference.sdxl_compiled_pipeline import (
AMDSharkSDXLPipeline,
)
from apps.amdshark_studio.api.controlnet import control_adapter_map
from apps.amdshark_studio.api.utils import parse_device
from apps.amdshark_studio.web.utils.state import status_label
from apps.amdshark_studio.web.utils.file_utils import (
safe_name,
get_resource_path,
get_checkpoints_path,
)
from apps.amdshark_studio.modules.img_processing import (
save_output_img,
)
from apps.amdshark_studio.modules.ckpt_processing import (
preprocessCKPT,
save_irpa,
)
EMPTY_SD_MAP = {
"clip": None,
"scheduler": None,
"unet": None,
"vae_decode": None,
}
EMPTY_SDXL_MAP = {
"prompt_encoder": None,
"scheduled_unet": None,
"vae_decode": None,
"pipeline": None,
"full_pipeline": None,
}
EMPTY_FLAGS = {
"clip": None,
"unet": None,
"vae": None,
"pipeline": None,
}
def load_script(source, module_name):
"""
reads file source and loads it as a module
:param source: file to load
:param module_name: name of module to register in sys.modules
:return: loaded module
"""
spec = importlib.util.spec_from_file_location(module_name, source)
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
class StableDiffusion:
# This class is responsible for executing image generation and creating
# /managing a set of compiled modules to run Stable Diffusion. The init
# aims to be as general as possible, and the class will infer and compile
# a list of necessary modules or a combined "pipeline module" for a
# specified job based on the inference task.
def __init__(
self,
base_model_id,
height: int,
width: int,
batch_size: int,
steps: int,
scheduler: str,
precision: str,
device: str,
target_triple: str = None,
custom_vae: str = None,
num_loras: int = 0,
import_ir: bool = True,
is_controlled: bool = False,
external_weights: str = "safetensors",
):
self.precision = precision
self.compiled_pipeline = False
self.base_model_id = base_model_id
self.custom_vae = custom_vae
self.is_sdxl = "xl" in self.base_model_id.lower()
self.is_custom = ".py" in self.base_model_id.lower()
if self.is_custom:
custom_module = load_script(
os.path.join(get_checkpoints_path("scripts"), self.base_model_id),
"custom_pipeline",
)
self.turbine_pipe = custom_module.StudioPipeline
self.model_map = custom_module.MODEL_MAP
elif self.is_sdxl:
self.turbine_pipe = AMDSharkSDXLPipeline
self.model_map = EMPTY_SDXL_MAP
else:
self.turbine_pipe = AMDSharkSDPipeline
self.model_map = EMPTY_SD_MAP
max_length = 64
target_backend, self.rt_device, triple = parse_device(device, target_triple)
pipe_id_list = [
safe_name(base_model_id),
str(batch_size),
str(max_length),
f"{str(height)}x{str(width)}",
precision,
triple,
]
if num_loras > 0:
pipe_id_list.append(str(num_loras) + "lora")
if is_controlled:
pipe_id_list.append("controlled")
if custom_vae:
pipe_id_list.append(custom_vae)
self.pipe_id = "_".join(pipe_id_list)
self.pipeline_dir = Path(os.path.join(get_checkpoints_path(), self.pipe_id))
self.weights_path = Path(
os.path.join(
get_checkpoints_path(), safe_name(self.base_model_id + "_" + precision)
)
)
if not os.path.exists(self.weights_path):
os.mkdir(self.weights_path)
decomp_attn = True
attn_spec = None
if triple in ["gfx940", "gfx942", "gfx90a"]:
decomp_attn = False
attn_spec = "mfma"
elif triple in ["gfx1100", "gfx1103", "gfx1150"]:
decomp_attn = False
attn_spec = "wmma"
if triple in ["gfx1103", "gfx1150"]:
# external weights have issues on igpu
external_weights = None
elif target_backend == "llvm-cpu":
decomp_attn = False
self.sd_pipe = self.turbine_pipe(
hf_model_name=base_model_id,
scheduler_id=scheduler,
height=height,
width=width,
precision=precision,
max_length=max_length,
batch_size=batch_size,
num_inference_steps=steps,
device=target_backend,
iree_target_triple=triple,
ireec_flags=EMPTY_FLAGS,
attn_spec=attn_spec,
decomp_attn=decomp_attn,
pipeline_dir=self.pipeline_dir,
external_weights_dir=self.weights_path,
external_weights=external_weights,
custom_vae=custom_vae,
)
print(f"\n[LOG] Pipeline initialized with pipe_id: {self.pipe_id}.")
gc.collect()
def prepare_pipe(
self, custom_weights, adapters, embeddings, is_img2img, compiled_pipeline
):
print(f"\n[LOG] Preparing pipeline...")
self.is_img2img = False
mlirs = copy.deepcopy(self.model_map)
vmfbs = copy.deepcopy(self.model_map)
weights = copy.deepcopy(self.model_map)
if not self.is_sdxl:
compiled_pipeline = False
self.compiled_pipeline = compiled_pipeline
if custom_weights:
custom_weights = os.path.join(
get_checkpoints_path("checkpoints"),
safe_name(self.base_model_id.split("/")[-1]),
custom_weights,
)
diffusers_weights_path = preprocessCKPT(custom_weights, self.precision)
for key in weights:
if key in ["scheduled_unet", "unet"]:
unet_weights_path = os.path.join(
diffusers_weights_path,
"unet",
"diffusion_pytorch_model.safetensors",
)
weights[key] = save_irpa(unet_weights_path, "unet.")
elif key in ["clip", "prompt_encoder"]:
if not self.is_sdxl:
sd1_path = os.path.join(
diffusers_weights_path, "text_encoder", "model.safetensors"
)
weights[key] = save_irpa(sd1_path, "text_encoder_model.")
else:
clip_1_path = os.path.join(
diffusers_weights_path, "text_encoder", "model.safetensors"
)
clip_2_path = os.path.join(
diffusers_weights_path,
"text_encoder_2",
"model.safetensors",
)
weights[key] = [
save_irpa(clip_1_path, "text_encoder_model_1."),
save_irpa(clip_2_path, "text_encoder_model_2."),
]
elif key in ["vae_decode"] and weights[key] is None:
vae_weights_path = os.path.join(
diffusers_weights_path,
"vae",
"diffusion_pytorch_model.safetensors",
)
weights[key] = save_irpa(vae_weights_path, "vae.")
vmfbs, weights = self.sd_pipe.check_prepared(
mlirs, vmfbs, weights, interactive=False
)
print(f"\n[LOG] Loading pipeline to device {self.rt_device}.")
self.sd_pipe.load_pipeline(
vmfbs, weights, self.rt_device, self.compiled_pipeline
)
print(
"\n[LOG] Pipeline successfully prepared for runtime. Generating images..."
)
return
def generate_images(
self,
prompt,
negative_prompt,
image,
strength,
guidance_scale,
seed,
ondemand,
resample_type,
control_mode,
hints,
):
img = self.sd_pipe.generate_images(
prompt,
negative_prompt,
1,
guidance_scale,
seed,
return_imgs=True,
)
return img
def amdshark_sd_fn_dict_input(
sd_kwargs: dict,
):
print("\n[LOG] Submitting Request...")
for key in sd_kwargs:
if sd_kwargs[key] in [None, []]:
sd_kwargs[key] = None
if sd_kwargs[key] in ["None"]:
sd_kwargs[key] = ""
if key == "seed":
sd_kwargs[key] = int(sd_kwargs[key])
# TODO: move these checks into the UI code so we don't have gradio warnings in a generalized dict input function.
if not sd_kwargs["device"]:
gr.Warning("No device specified. Please specify a device.")
return None, ""
if sd_kwargs["height"] not in [512, 1024]:
gr.Warning("Height must be 512 or 1024. This is a temporary limitation.")
return None, ""
if sd_kwargs["height"] != sd_kwargs["width"]:
gr.Warning("Height and width must be the same. This is a temporary limitation.")
return None, ""
if sd_kwargs["base_model_id"] == "stabilityai/sdxl-turbo":
if sd_kwargs["steps"] > 10:
gr.Warning("Max steps for sdxl-turbo is 10. 1 to 4 steps are recommended.")
return None, ""
if sd_kwargs["guidance_scale"] > 3:
gr.Warning(
"sdxl-turbo CFG scale should be less than 2.0 if using negative prompt, 0 otherwise."
)
return None, ""
if sd_kwargs["target_triple"] == "":
if parse_device(sd_kwargs["device"], sd_kwargs["target_triple"])[2] == "":
gr.Warning(
"Target device architecture could not be inferred. Please specify a target triple, e.g. 'gfx1100' for a Radeon 7900xtx."
)
return None, ""
generated_imgs = yield from amdshark_sd_fn(**sd_kwargs)
return generated_imgs
def amdshark_sd_fn(
prompt,
negative_prompt,
sd_init_image: list,
height: int,
width: int,
steps: int,
strength: float,
guidance_scale: float,
seed: list,
batch_count: int,
batch_size: int,
scheduler: str,
base_model_id: str,
custom_weights: str,
custom_vae: str,
precision: str,
device: str,
target_triple: str,
ondemand: bool,
compiled_pipeline: bool,
resample_type: str,
controlnets: dict,
embeddings: dict,
):
sd_kwargs = locals()
if not isinstance(sd_init_image, list):
sd_init_image = [sd_init_image]
is_img2img = True if sd_init_image[0] is not None else False
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
import apps.amdshark_studio.web.utils.globals as global_obj
adapters = {}
is_controlled = False
control_mode = None
hints = []
num_loras = 0
import_ir = True
for i in embeddings:
num_loras += 1 if embeddings[i] else 0
if "model" in controlnets:
for i, model in enumerate(controlnets["model"]):
if "xl" not in base_model_id.lower():
adapters[f"control_adapter_{model}"] = {
"hf_id": control_adapter_map["runwayml/stable-diffusion-v1-5"][
model
],
"strength": controlnets["strength"][i],
}
else:
adapters[f"control_adapter_{model}"] = {
"hf_id": control_adapter_map["stabilityai/stable-diffusion-xl-1.0"][
model
],
"strength": controlnets["strength"][i],
}
if model is not None:
is_controlled = True
control_mode = controlnets["control_mode"]
for i in controlnets["hint"]:
hints.append[i]
submit_pipe_kwargs = {
"base_model_id": base_model_id,
"height": height,
"width": width,
"batch_size": batch_size,
"precision": precision,
"device": device,
"target_triple": target_triple,
"custom_vae": custom_vae,
"num_loras": num_loras,
"import_ir": import_ir,
"is_controlled": is_controlled,
"steps": steps,
"scheduler": scheduler,
}
submit_prep_kwargs = {
"custom_weights": custom_weights,
"adapters": adapters,
"embeddings": embeddings,
"is_img2img": is_img2img,
"compiled_pipeline": compiled_pipeline,
}
submit_run_kwargs = {
"prompt": prompt,
"negative_prompt": negative_prompt,
"image": sd_init_image,
"strength": strength,
"guidance_scale": guidance_scale,
"seed": seed,
"ondemand": ondemand,
"resample_type": resample_type,
"control_mode": control_mode,
"hints": hints,
}
if (
not global_obj.get_sd_obj()
or global_obj.get_pipe_kwargs() != submit_pipe_kwargs
):
print("\n[LOG] Initializing new pipeline...")
global_obj.clear_cache()
gc.collect()
# Initializes the pipeline and retrieves IR based on all
# parameters that are static in the turbine output format,
# which is currently MLIR in the torch dialect.
sd_pipe = StableDiffusion(
**submit_pipe_kwargs,
)
global_obj.set_sd_obj(sd_pipe)
global_obj.set_pipe_kwargs(submit_pipe_kwargs)
if (
not global_obj.get_prep_kwargs()
or global_obj.get_prep_kwargs() != submit_prep_kwargs
):
global_obj.set_prep_kwargs(submit_prep_kwargs)
global_obj.get_sd_obj().prepare_pipe(**submit_prep_kwargs)
generated_imgs = []
for current_batch in range(batch_count):
start_time = time.time()
out_imgs = global_obj.get_sd_obj().generate_images(**submit_run_kwargs)
if not isinstance(out_imgs, list):
out_imgs = [out_imgs]
# total_time = time.time() - start_time
# text_output = f"Total image(s) generation time: {total_time:.4f}sec"
# print(f"\n[LOG] {text_output}")
# if global_obj.get_sd_status() == SD_STATE_CANCEL:
# break
# else:
for batch in range(batch_size):
save_output_img(
out_imgs[batch],
seed,
sd_kwargs,
)
generated_imgs.extend(out_imgs)
# TODO: make seed changes over batch counts more configurable.
submit_run_kwargs["seed"] = submit_run_kwargs["seed"] + 1
yield generated_imgs, status_label(
"Stable Diffusion", current_batch + 1, batch_count, batch_size
)
return (generated_imgs, "")
def unload_sd():
print("Unloading models.")
import apps.amdshark_studio.web.utils.globals as global_obj
global_obj.clear_cache()
gc.collect()
def cancel_sd():
print("Inject call to cancel longer API calls.")
return
def view_json_file(file_path):
content = ""
with open(file_path, "r") as fopen:
content = fopen.read()
return content
def safe_name(name):
return name.replace("/", "_").replace("\\", "_").replace(".", "_")
if __name__ == "__main__":
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
import apps.amdshark_studio.web.utils.globals as global_obj
global_obj._init()
sd_json = view_json_file(
get_resource_path(os.path.join(cmd_opts.config_dir, "default_sd_config.json"))
)
sd_kwargs = json.loads(sd_json)
for arg in vars(cmd_opts):
if arg in sd_kwargs:
sd_kwargs[arg] = getattr(cmd_opts, arg)
for i in amdshark_sd_fn_dict_input(sd_kwargs):
print(i)

View File

@@ -1,118 +0,0 @@
# from amdshark_turbine.turbine_models.schedulers import export_scheduler_model
from diffusers import (
LCMScheduler,
LMSDiscreteScheduler,
PNDMScheduler,
DDPMScheduler,
DDIMScheduler,
DPMSolverMultistepScheduler,
KDPM2DiscreteScheduler,
EulerDiscreteScheduler,
EulerAncestralDiscreteScheduler,
DEISMultistepScheduler,
DPMSolverSinglestepScheduler,
KDPM2AncestralDiscreteScheduler,
HeunDiscreteScheduler,
)
def get_schedulers(model_id):
# TODO: switch over to turbine and run all on GPU
print(f"\n[LOG] Initializing schedulers from model id: {model_id}")
schedulers = dict()
schedulers["PNDM"] = PNDMScheduler.from_pretrained(
model_id,
subfolder="scheduler",
)
# schedulers["DDPM"] = DDPMScheduler.from_pretrained(
# model_id,
# subfolder="scheduler",
# )
# schedulers["KDPM2Discrete"] = KDPM2DiscreteScheduler.from_pretrained(
# model_id,
# subfolder="scheduler",
# )
# schedulers["LMSDiscrete"] = LMSDiscreteScheduler.from_pretrained(
# model_id,
# subfolder="scheduler",
# )
# schedulers["DDIM"] = DDIMScheduler.from_pretrained(
# model_id,
# subfolder="scheduler",
# )
# schedulers["LCMScheduler"] = LCMScheduler.from_pretrained(
# model_id,
# subfolder="scheduler",
# )
# schedulers["DPMSolverMultistep"] = DPMSolverMultistepScheduler.from_pretrained(
# model_id, subfolder="scheduler", algorithm_type="dpmsolver"
# )
# schedulers["DPMSolverMultistep++"] = DPMSolverMultistepScheduler.from_pretrained(
# model_id, subfolder="scheduler", algorithm_type="dpmsolver++"
# )
# schedulers["DPMSolverMultistepKarras"] = (
# DPMSolverMultistepScheduler.from_pretrained(
# model_id,
# subfolder="scheduler",
# use_karras_sigmas=True,
# )
# )
# schedulers["DPMSolverMultistepKarras++"] = (
# DPMSolverMultistepScheduler.from_pretrained(
# model_id,
# subfolder="scheduler",
# algorithm_type="dpmsolver++",
# use_karras_sigmas=True,
# )
# )
schedulers["EulerDiscrete"] = EulerDiscreteScheduler.from_pretrained(
model_id,
subfolder="scheduler",
)
schedulers["EulerAncestralDiscrete"] = (
EulerAncestralDiscreteScheduler.from_pretrained(
model_id,
subfolder="scheduler",
)
)
# schedulers["DEISMultistep"] = DEISMultistepScheduler.from_pretrained(
# model_id,
# subfolder="scheduler",
# )
# schedulers["DPMSolverSinglestep"] = DPMSolverSinglestepScheduler.from_pretrained(
# model_id,
# subfolder="scheduler",
# )
# schedulers["KDPM2AncestralDiscrete"] = (
# KDPM2AncestralDiscreteScheduler.from_pretrained(
# model_id,
# subfolder="scheduler",
# )
# )
# schedulers["HeunDiscrete"] = HeunDiscreteScheduler.from_pretrained(
# model_id,
# subfolder="scheduler",
# )
return schedulers
def export_scheduler_model(model):
return "None", "None"
scheduler_model_map = {
"PNDM": export_scheduler_model("PNDMScheduler"),
# "DPMSolverSDE": export_scheduler_model("DpmSolverSDEScheduler"),
"EulerDiscrete": export_scheduler_model("EulerDiscreteScheduler"),
"EulerAncestralDiscrete": export_scheduler_model("EulerAncestralDiscreteScheduler"),
# "LCM": export_scheduler_model("LCMScheduler"),
# "LMSDiscrete": export_scheduler_model("LMSDiscreteScheduler"),
# "DDPM": export_scheduler_model("DDPMScheduler"),
# "DDIM": export_scheduler_model("DDIMScheduler"),
# "DPMSolverMultistep": export_scheduler_model("DPMSolverMultistepScheduler"),
# "KDPM2Discrete": export_scheduler_model("KDPM2DiscreteScheduler"),
# "DEISMultistep": export_scheduler_model("DEISMultistepScheduler"),
# "DPMSolverSinglestep": export_scheduler_model("DPMSolverSingleStepScheduler"),
# "KDPM2AncestralDiscrete": export_scheduler_model("KDPM2AncestralDiscreteScheduler"),
# "HeunDiscrete": export_scheduler_model("HeunDiscreteScheduler"),
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.4 KiB

View File

@@ -1,95 +0,0 @@
default_sd_config = r"""{
"prompt": [
"a photo taken of the front of a super-car drifting on a road near mountains at high speeds with smoke coming off the tires, front angle, front point of view, trees in the mountains of the background, ((sharp focus))"
],
"negative_prompt": [
"watermark, signature, logo, text, lowres, ((monochrome, grayscale)), blurry, ugly, blur, oversaturated, cropped"
],
"sd_init_image": [null],
"height": 512,
"width": 512,
"steps": 50,
"strength": 0.8,
"guidance_scale": 7.5,
"seed": "-1",
"batch_count": 1,
"batch_size": 1,
"scheduler": "EulerDiscrete",
"base_model_id": "stabilityai/stable-diffusion-2-1-base",
"custom_weights": null,
"custom_vae": null,
"precision": "fp16",
"device": "",
"target_triple": "",
"ondemand": false,
"compiled_pipeline": false,
"resample_type": "Nearest Neighbor",
"controlnets": {},
"embeddings": {}
}"""
sdxl_30steps = r"""{
"prompt": [
"a cat under the snow with blue eyes, covered by snow, cinematic style, medium shot, professional photo, animal"
],
"negative_prompt": [
"watermark, signature, logo, text, lowres, ((monochrome, grayscale)), blurry, ugly, blur, oversaturated, cropped"
],
"sd_init_image": [null],
"height": 1024,
"width": 1024,
"steps": 30,
"strength": 0.8,
"guidance_scale": 7.5,
"seed": "-1",
"batch_count": 1,
"batch_size": 1,
"scheduler": "EulerDiscrete",
"base_model_id": "stabilityai/stable-diffusion-xl-base-1.0",
"custom_weights": null,
"custom_vae": null,
"precision": "fp16",
"device": "",
"target_triple": "",
"ondemand": false,
"compiled_pipeline": true,
"resample_type": "Nearest Neighbor",
"controlnets": {},
"embeddings": {}
}"""
sdxl_turbo = r"""{
"prompt": [
"A cat wearing a hat that says 'TURBO' on it. The cat is sitting on a skateboard."
],
"negative_prompt": [
""
],
"sd_init_image": [null],
"height": 512,
"width": 512,
"steps": 2,
"strength": 0.8,
"guidance_scale": 0,
"seed": "-1",
"batch_count": 1,
"batch_size": 1,
"scheduler": "EulerAncestralDiscrete",
"base_model_id": "stabilityai/sdxl-turbo",
"custom_weights": null,
"custom_vae": null,
"precision": "fp16",
"device": "",
"target_triple": "",
"ondemand": false,
"compiled_pipeline": true,
"resample_type": "Nearest Neighbor",
"controlnets": {},
"embeddings": {}
}"""
default_sd_configs = {
"default_sd_config.json": default_sd_config,
"sdxl-30steps.json": sdxl_30steps,
"sdxl-turbo.json": sdxl_turbo,
}

View File

@@ -2,7 +2,7 @@
import os
import PIL
import numpy as np
from apps.amdshark_studio.web.utils.file_utils import (
from apps.shark_studio.web.utils.file_utils import (
get_generated_imgs_path,
)
from datetime import datetime

View File

@@ -6,13 +6,13 @@ import warnings
import json
from threading import Thread
from apps.amdshark_studio.modules.timer import startup_timer
from apps.shark_studio.modules.timer import startup_timer
from apps.amdshark_studio.web.utils.tmp_configs import (
from apps.shark_studio.web.utils.tmp_configs import (
config_tmp,
clear_tmp_mlir,
clear_tmp_imgs,
amdshark_tmp,
shark_tmp,
)
@@ -30,12 +30,12 @@ def imports():
startup_timer.record("import gradio")
import apps.amdshark_studio.web.utils.globals as global_obj
import apps.shark_studio.web.utils.globals as global_obj
global_obj._init()
startup_timer.record("initialize globals")
from apps.amdshark_studio.modules import (
from apps.shark_studio.modules import (
img_processing,
) # noqa: F401
@@ -44,7 +44,7 @@ def imports():
def initialize():
configure_sigint_handler()
# Setup to use amdshark_tmp for gradio's temporary image files and clear any
# Setup to use shark_tmp for gradio's temporary image files and clear any
# existing temporary images there if they exist. Then we can import gradio.
# It has to be in this order or gradio ignores what we've set up.
@@ -52,7 +52,7 @@ def initialize():
# clear_tmp_mlir()
clear_tmp_imgs()
from apps.amdshark_studio.web.utils.file_utils import (
from apps.shark_studio.web.utils.file_utils import (
create_model_folders,
)
@@ -83,7 +83,7 @@ def dumpstacks():
code.append(f"""File: "{filename}", line {lineno}, in {name}""")
if line:
code.append(" " + line.strip())
with open(os.path.join(amdshark_tmp, "stack_dump.log"), "w") as f:
with open(os.path.join(shark_tmp, "stack_dump.log"), "w") as f:
f.write("\n".join(code))
@@ -100,7 +100,7 @@ def setup_middleware(app):
def configure_cors_middleware(app):
from starlette.middleware.cors import CORSMiddleware
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
cors_options = {
"allow_methods": ["*"],

View File

@@ -2,14 +2,9 @@ from turbine_models.custom_models import stateless_llama
from turbine_models.model_runner import vmfbRunner
from turbine_models.gen_external_params.gen_external_params import gen_external_params
import time
from amdshark.iree_utils.compile_utils import compile_module_to_flatbuffer
from apps.amdshark_studio.web.utils.file_utils import (
get_resource_path,
get_checkpoints_path,
)
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from apps.amdshark_studio.api.utils import parse_device
from urllib.request import urlopen
from shark.iree_utils.compile_utils import compile_module_to_flatbuffer
from apps.shark_studio.web.utils.file_utils import get_resource_path
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
import iree.runtime as ireert
from itertools import chain
import gc
@@ -70,7 +65,6 @@ class LanguageModel:
use_system_prompt=True,
streaming_llm=False,
):
_, _, self.triple = parse_device(device)
self.hf_model_name = llm_model_map[model_name]["hf_model_name"]
self.device = device.split("=>")[-1].strip()
self.backend = self.device.split("://")[0]
@@ -161,9 +155,7 @@ class LanguageModel:
use_auth_token=hf_auth_token,
)
elif not os.path.exists(self.tempfile_name):
self.torch_ir, self.tokenizer = llm_model_map[self.hf_model_name][
"initializer"
](
self.torch_ir, self.tokenizer = llm_model_map[model_name]["initializer"](
self.hf_model_name,
hf_auth_token,
compile_to="torch",
@@ -171,7 +163,6 @@ class LanguageModel:
precision=self.precision,
quantization=self.quantization,
streaming_llm=self.streaming_llm,
decomp_attn=True,
)
with open(self.tempfile_name, "w+") as f:
f.write(self.torch_ir)
@@ -201,27 +192,11 @@ class LanguageModel:
)
elif self.backend == "vulkan":
flags.extend(["--iree-stream-resource-max-allocation-size=4294967296"])
elif self.backend == "rocm":
flags.extend(
[
"--iree-codegen-llvmgpu-enable-transform-dialect-jit=false",
"--iree-llvmgpu-enable-prefetch=true",
"--iree-opt-outer-dim-concat=true",
"--iree-flow-enable-aggressive-fusion",
]
)
if "gfx9" in self.triple:
flags.extend(
[
f"--iree-codegen-transform-dialect-library={get_mfma_spec_path(self.triple, get_checkpoints_path())}",
"--iree-codegen-llvmgpu-use-vector-distribution=true",
]
)
flags.extend(llm_model_map[self.hf_model_name]["compile_flags"])
flatbuffer_blob = compile_module_to_flatbuffer(
self.tempfile_name,
device=self.device,
frontend="auto",
frontend="torch",
model_config_path=None,
extra_args=flags,
write_to=self.vmfb_name,
@@ -283,7 +258,8 @@ class LanguageModel:
history.append(format_out(token))
while (
format_out(token) != llm_model_map[self.hf_model_name]["stop_token"]
format_out(token)
!= llm_model_map["meta-llama/Llama-2-7b-chat-hf"]["stop_token"]
and len(history) < self.max_tokens
):
dec_time = time.time()
@@ -297,7 +273,10 @@ class LanguageModel:
self.prev_token_len = token_len + len(history)
if format_out(token) == llm_model_map[self.hf_model_name]["stop_token"]:
if (
format_out(token)
== llm_model_map["meta-llama/Llama-2-7b-chat-hf"]["stop_token"]
):
break
for i in range(len(history)):
@@ -331,7 +310,7 @@ class LanguageModel:
self.first_input = False
history.append(int(token))
while token != llm_model_map[self.hf_model_name]["stop_token"]:
while token != llm_model_map["meta-llama/Llama-2-7b-chat-hf"]["stop_token"]:
dec_time = time.time()
result = self.hf_mod(token.reshape([1, 1]), past_key_values=pkv)
history.append(int(token))
@@ -342,7 +321,7 @@ class LanguageModel:
self.prev_token_len = token_len + len(history)
if token == llm_model_map[self.hf_model_name]["stop_token"]:
if token == llm_model_map["meta-llama/Llama-2-7b-chat-hf"]["stop_token"]:
break
for i in range(len(history)):
if type(history[i]) != int:
@@ -352,21 +331,10 @@ class LanguageModel:
return result_output, total_time
def get_mfma_spec_path(target_chip, save_dir):
url = "https://raw.githubusercontent.com/iree-org/iree/main/build_tools/pkgci/external_test_suite/attention_and_matmul_spec.mlir"
attn_spec = urlopen(url).read().decode("utf-8")
spec_path = os.path.join(save_dir, "attention_and_matmul_spec_mfma.mlir")
if os.path.exists(spec_path):
return spec_path
with open(spec_path, "w") as f:
f.write(attn_spec)
return spec_path
def llm_chat_api(InputData: dict):
from datetime import datetime as dt
import apps.amdshark_studio.web.utils.globals as global_obj
import apps.shark_studio.web.utils.globals as global_obj
print(f"Input keys : {InputData.keys()}")

613
apps/shark_studio/api/sd.py Normal file
View File

@@ -0,0 +1,613 @@
import gc
import torch
import time
import os
import json
import numpy as np
from tqdm.auto import tqdm
from pathlib import Path
from random import randint
from turbine_models.custom_models.sd_inference import clip, unet, vae
from apps.shark_studio.api.controlnet import control_adapter_map
from apps.shark_studio.web.utils.state import status_label
from apps.shark_studio.web.utils.file_utils import (
safe_name,
get_resource_path,
get_checkpoints_path,
)
from apps.shark_studio.modules.pipeline import SharkPipelineBase
from apps.shark_studio.modules.schedulers import get_schedulers
from apps.shark_studio.modules.prompt_encoding import (
get_weighted_text_embeddings,
)
from apps.shark_studio.modules.img_processing import (
resize_stencil,
save_output_img,
resamplers,
resampler_list,
)
from apps.shark_studio.modules.ckpt_processing import (
preprocessCKPT,
process_custom_pipe_weights,
)
from transformers import CLIPTokenizer
from diffusers.image_processor import VaeImageProcessor
sd_model_map = {
"clip": {
"initializer": clip.export_clip_model,
},
"unet": {
"initializer": unet.export_unet_model,
},
"vae_decode": {
"initializer": vae.export_vae_model,
},
}
class StableDiffusion(SharkPipelineBase):
# This class is responsible for executing image generation and creating
# /managing a set of compiled modules to run Stable Diffusion. The init
# aims to be as general as possible, and the class will infer and compile
# a list of necessary modules or a combined "pipeline module" for a
# specified job based on the inference task.
def __init__(
self,
base_model_id,
height: int,
width: int,
batch_size: int,
precision: str,
device: str,
custom_vae: str = None,
num_loras: int = 0,
import_ir: bool = True,
is_controlled: bool = False,
hf_auth_token=None,
):
self.model_max_length = 77
self.batch_size = batch_size
self.precision = precision
self.dtype = torch.float16 if precision == "fp16" else torch.float32
self.height = height
self.width = width
self.scheduler_obj = {}
static_kwargs = {
"pipe": {
"external_weights": "safetensors",
},
"clip": {"hf_model_name": base_model_id},
"unet": {
"hf_model_name": base_model_id,
"unet_model": unet.UnetModel(hf_model_name=base_model_id),
"batch_size": batch_size,
# "is_controlled": is_controlled,
# "num_loras": num_loras,
"height": height,
"width": width,
"precision": precision,
"max_length": self.model_max_length,
},
"vae_encode": {
"hf_model_name": base_model_id,
"vae_model": vae.VaeModel(
hf_model_name=custom_vae if custom_vae else base_model_id,
),
"batch_size": batch_size,
"height": height,
"width": width,
"precision": precision,
},
"vae_decode": {
"hf_model_name": base_model_id,
"vae_model": vae.VaeModel(
hf_model_name=custom_vae if custom_vae else base_model_id,
),
"batch_size": batch_size,
"height": height,
"width": width,
"precision": precision,
},
}
super().__init__(sd_model_map, base_model_id, static_kwargs, device, import_ir)
pipe_id_list = [
safe_name(base_model_id),
str(batch_size),
str(self.model_max_length),
f"{str(height)}x{str(width)}",
precision,
self.device,
]
if num_loras > 0:
pipe_id_list.append(str(num_loras) + "lora")
if is_controlled:
pipe_id_list.append("controlled")
if custom_vae:
pipe_id_list.append(custom_vae)
self.pipe_id = "_".join(pipe_id_list)
print(f"\n[LOG] Pipeline initialized with pipe_id: {self.pipe_id}.")
del static_kwargs
gc.collect()
def prepare_pipe(self, custom_weights, adapters, embeddings, is_img2img):
print(f"\n[LOG] Preparing pipeline...")
self.is_img2img = is_img2img
self.schedulers = get_schedulers(self.base_model_id)
self.weights_path = os.path.join(
get_checkpoints_path(), self.safe_name(self.base_model_id)
)
if not os.path.exists(self.weights_path):
os.mkdir(self.weights_path)
for model in adapters:
self.model_map[model] = adapters[model]
for submodel in self.static_kwargs:
if custom_weights:
custom_weights_params, _ = process_custom_pipe_weights(custom_weights)
if submodel not in ["clip", "clip2"]:
self.static_kwargs[submodel][
"external_weights"
] = custom_weights_params
else:
self.static_kwargs[submodel]["external_weight_path"] = os.path.join(
self.weights_path, submodel + ".safetensors"
)
else:
self.static_kwargs[submodel]["external_weight_path"] = os.path.join(
self.weights_path, submodel + ".safetensors"
)
self.get_compiled_map(pipe_id=self.pipe_id)
print("\n[LOG] Pipeline successfully prepared for runtime.")
return
def encode_prompts_weight(
self,
prompt,
negative_prompt,
do_classifier_free_guidance=True,
):
# Encodes the prompt into text encoder hidden states.
self.load_submodels(["clip"])
self.tokenizer = CLIPTokenizer.from_pretrained(
self.base_model_id,
subfolder="tokenizer",
)
clip_inf_start = time.time()
text_embeddings, uncond_embeddings = get_weighted_text_embeddings(
pipe=self,
prompt=prompt,
uncond_prompt=negative_prompt if do_classifier_free_guidance else None,
)
if do_classifier_free_guidance:
text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
pad = (0, 0) * (len(text_embeddings.shape) - 2)
pad = pad + (
0,
self.static_kwargs["unet"]["max_length"] - text_embeddings.shape[1],
)
text_embeddings = torch.nn.functional.pad(text_embeddings, pad)
# SHARK: Report clip inference time
clip_inf_time = (time.time() - clip_inf_start) * 1000
if self.ondemand:
self.unload_submodels(["clip"])
gc.collect()
print(f"\n[LOG] Clip Inference time (ms) = {clip_inf_time:.3f}")
return text_embeddings.numpy().astype(np.float16)
def prepare_latents(
self,
generator,
num_inference_steps,
image,
strength,
):
noise = torch.randn(
(
self.batch_size,
4,
self.height // 8,
self.width // 8,
),
generator=generator,
dtype=self.dtype,
).to("cpu")
self.scheduler.set_timesteps(num_inference_steps)
if self.is_img2img:
init_timestep = min(
int(num_inference_steps * strength), num_inference_steps
)
t_start = max(num_inference_steps - init_timestep, 0)
timesteps = self.scheduler.timesteps[t_start:]
latents = self.encode_image(image)
latents = self.scheduler.add_noise(latents, noise, timesteps[0].repeat(1))
return latents, [timesteps]
else:
self.scheduler.is_scale_input_called = True
latents = noise * self.scheduler.init_noise_sigma
return latents, self.scheduler.timesteps
def encode_image(self, input_image):
self.load_submodels(["vae_encode"])
vae_encode_start = time.time()
latents = self.run("vae_encode", input_image)
vae_inf_time = (time.time() - vae_encode_start) * 1000
if self.ondemand:
self.unload_submodels(["vae_encode"])
print(f"\n[LOG] VAE Encode Inference time (ms): {vae_inf_time:.3f}")
return latents
def produce_img_latents(
self,
latents,
text_embeddings,
guidance_scale,
total_timesteps,
cpu_scheduling,
mask=None,
masked_image_latents=None,
return_all_latents=False,
):
# self.status = SD_STATE_IDLE
step_time_sum = 0
latent_history = [latents]
text_embeddings = torch.from_numpy(text_embeddings).to(self.dtype)
text_embeddings_numpy = text_embeddings.detach().numpy()
guidance_scale = torch.Tensor([guidance_scale]).to(self.dtype)
self.load_submodels(["unet"])
for i, t in tqdm(enumerate(total_timesteps)):
step_start_time = time.time()
timestep = torch.tensor([t]).to(self.dtype).detach().numpy()
latent_model_input = self.scheduler.scale_model_input(latents, t).to(
self.dtype
)
if mask is not None and masked_image_latents is not None:
latent_model_input = torch.cat(
[
torch.from_numpy(np.asarray(latent_model_input)).to(self.dtype),
mask,
masked_image_latents,
],
dim=1,
).to(self.dtype)
if cpu_scheduling:
latent_model_input = latent_model_input.detach().numpy()
# Profiling Unet.
# profile_device = start_profiling(file_path="unet.rdc")
noise_pred = self.run(
"unet",
[
latent_model_input,
timestep,
text_embeddings_numpy,
guidance_scale,
],
)
# end_profiling(profile_device)
if cpu_scheduling:
noise_pred = torch.from_numpy(noise_pred.to_host())
latents = self.scheduler.step(noise_pred, t, latents).prev_sample
else:
latents = self.run("scheduler_step", (noise_pred, t, latents))
latent_history.append(latents)
step_time = (time.time() - step_start_time) * 1000
# print(
# f"\n [LOG] step = {i} | timestep = {t} | time = {step_time:.2f}ms"
# )
step_time_sum += step_time
# if self.status == SD_STATE_CANCEL:
# break
if self.ondemand:
self.unload_submodels(["unet"])
gc.collect()
avg_step_time = step_time_sum / len(total_timesteps)
print(f"\n[LOG] Average step time: {avg_step_time}ms/it")
if not return_all_latents:
return latents
all_latents = torch.cat(latent_history, dim=0)
return all_latents
def decode_latents(self, latents, cpu_scheduling=True):
latents_numpy = latents.to(self.dtype)
if cpu_scheduling:
latents_numpy = latents.detach().numpy()
# profile_device = start_profiling(file_path="vae.rdc")
vae_start = time.time()
images = self.run("vae_decode", latents_numpy).to_host()
vae_inf_time = (time.time() - vae_start) * 1000
# end_profiling(profile_device)
print(f"\n[LOG] VAE Inference time (ms): {vae_inf_time:.3f}")
images = torch.from_numpy(images).permute(0, 2, 3, 1).float().numpy()
pil_images = self.image_processor.numpy_to_pil(images)
return pil_images
def generate_images(
self,
prompt,
negative_prompt,
image,
scheduler,
steps,
strength,
guidance_scale,
seed,
ondemand,
repeatable_seeds,
resample_type,
control_mode,
hints,
):
# TODO: Batched args
self.image_processor = VaeImageProcessor(do_convert_rgb=True)
self.scheduler = self.schedulers[scheduler]
self.ondemand = ondemand
if self.is_img2img:
image, _ = self.image_processor.preprocess(image, resample_type)
else:
image = None
print("\n[LOG] Generating images...")
batched_args = [
prompt,
negative_prompt,
image,
]
for arg in batched_args:
if not isinstance(arg, list):
arg = [arg] * self.batch_size
if len(arg) < self.batch_size:
arg = arg * self.batch_size
else:
arg = [arg[i] for i in range(self.batch_size)]
text_embeddings = self.encode_prompts_weight(
prompt,
negative_prompt,
)
uint32_info = np.iinfo(np.uint32)
uint32_min, uint32_max = uint32_info.min, uint32_info.max
if seed < uint32_min or seed >= uint32_max:
seed = randint(uint32_min, uint32_max)
generator = torch.manual_seed(seed)
init_latents, final_timesteps = self.prepare_latents(
generator=generator,
num_inference_steps=steps,
image=image,
strength=strength,
)
latents = self.produce_img_latents(
latents=init_latents,
text_embeddings=text_embeddings,
guidance_scale=guidance_scale,
total_timesteps=final_timesteps,
cpu_scheduling=True, # until we have schedulers through Turbine
)
# Img latents -> PIL images
all_imgs = []
self.load_submodels(["vae_decode"])
for i in tqdm(range(0, latents.shape[0], self.batch_size)):
imgs = self.decode_latents(
latents=latents[i : i + self.batch_size],
cpu_scheduling=True,
)
all_imgs.extend(imgs)
if self.ondemand:
self.unload_submodels(["vae_decode"])
return all_imgs
def shark_sd_fn_dict_input(
sd_kwargs: dict,
):
print("[LOG] Submitting Request...")
for key in sd_kwargs:
if sd_kwargs[key] in [None, []]:
sd_kwargs[key] = None
if sd_kwargs[key] in ["None"]:
sd_kwargs[key] = ""
if key == "seed":
sd_kwargs[key] = int(sd_kwargs[key])
for i in range(1):
generated_imgs = yield from shark_sd_fn(**sd_kwargs)
yield generated_imgs
def shark_sd_fn(
prompt,
negative_prompt,
sd_init_image: list,
height: int,
width: int,
steps: int,
strength: float,
guidance_scale: float,
seed: list,
batch_count: int,
batch_size: int,
scheduler: str,
base_model_id: str,
custom_weights: str,
custom_vae: str,
precision: str,
device: str,
ondemand: bool,
repeatable_seeds: bool,
resample_type: str,
controlnets: dict,
embeddings: dict,
):
sd_kwargs = locals()
if not isinstance(sd_init_image, list):
sd_init_image = [sd_init_image]
is_img2img = True if sd_init_image[0] is not None else False
print("\n[LOG] Performing Stable Diffusion Pipeline setup...")
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
import apps.shark_studio.web.utils.globals as global_obj
adapters = {}
is_controlled = False
control_mode = None
hints = []
num_loras = 0
for i in embeddings:
num_loras += 1 if embeddings[i] else 0
if "model" in controlnets:
for i, model in enumerate(controlnets["model"]):
if "xl" not in base_model_id.lower():
adapters[f"control_adapter_{model}"] = {
"hf_id": control_adapter_map["runwayml/stable-diffusion-v1-5"][
model
],
"strength": controlnets["strength"][i],
}
else:
adapters[f"control_adapter_{model}"] = {
"hf_id": control_adapter_map["stabilityai/stable-diffusion-xl-1.0"][
model
],
"strength": controlnets["strength"][i],
}
if model is not None:
is_controlled = True
control_mode = controlnets["control_mode"]
for i in controlnets["hint"]:
hints.append[i]
submit_pipe_kwargs = {
"base_model_id": base_model_id,
"height": height,
"width": width,
"batch_size": batch_size,
"precision": precision,
"device": device,
"custom_vae": custom_vae,
"num_loras": num_loras,
"import_ir": cmd_opts.import_mlir,
"is_controlled": is_controlled,
}
submit_prep_kwargs = {
"custom_weights": custom_weights,
"adapters": adapters,
"embeddings": embeddings,
"is_img2img": is_img2img,
}
submit_run_kwargs = {
"prompt": prompt,
"negative_prompt": negative_prompt,
"image": sd_init_image,
"steps": steps,
"scheduler": scheduler,
"strength": strength,
"guidance_scale": guidance_scale,
"seed": seed,
"ondemand": ondemand,
"repeatable_seeds": repeatable_seeds,
"resample_type": resample_type,
"control_mode": control_mode,
"hints": hints,
}
if (
not global_obj.get_sd_obj()
or global_obj.get_pipe_kwargs() != submit_pipe_kwargs
):
print("\n[LOG] Initializing new pipeline...")
global_obj.clear_cache()
gc.collect()
# Initializes the pipeline and retrieves IR based on all
# parameters that are static in the turbine output format,
# which is currently MLIR in the torch dialect.
sd_pipe = StableDiffusion(
**submit_pipe_kwargs,
)
global_obj.set_sd_obj(sd_pipe)
global_obj.set_pipe_kwargs(submit_pipe_kwargs)
if (
not global_obj.get_prep_kwargs()
or global_obj.get_prep_kwargs() != submit_prep_kwargs
):
global_obj.set_prep_kwargs(submit_prep_kwargs)
global_obj.get_sd_obj().prepare_pipe(**submit_prep_kwargs)
generated_imgs = []
for current_batch in range(batch_count):
start_time = time.time()
out_imgs = global_obj.get_sd_obj().generate_images(**submit_run_kwargs)
total_time = time.time() - start_time
text_output = f"Total image(s) generation time: {total_time:.4f}sec"
print(f"\n[LOG] {text_output}")
# if global_obj.get_sd_status() == SD_STATE_CANCEL:
# break
# else:
save_output_img(
out_imgs[current_batch],
seed,
sd_kwargs,
)
generated_imgs.extend(out_imgs)
yield generated_imgs, status_label(
"Stable Diffusion", current_batch + 1, batch_count, batch_size
)
return generated_imgs, ""
def cancel_sd():
print("Inject call to cancel longer API calls.")
return
def view_json_file(file_path):
content = ""
with open(file_path, "r") as fopen:
content = fopen.read()
return content
if __name__ == "__main__":
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
import apps.shark_studio.web.utils.globals as global_obj
global_obj._init()
sd_json = view_json_file(
get_resource_path(os.path.join(cmd_opts.config_dir, "default_sd_config.json"))
)
sd_kwargs = json.loads(sd_json)
for arg in vars(cmd_opts):
if arg in sd_kwargs:
sd_kwargs[arg] = getattr(cmd_opts, arg)
for i in shark_sd_fn_dict_input(sd_kwargs):
print(i)

View File

@@ -8,11 +8,11 @@ from random import (
)
from pathlib import Path
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
from cpuinfo import get_cpu_info
# TODO: migrate these utils to studio
from amdshark.iree_utils.vulkan_utils import (
from shark.iree_utils.vulkan_utils import (
set_iree_vulkan_runtime_flags,
get_vulkan_target_triple,
get_iree_vulkan_runtime_flags,
@@ -21,7 +21,7 @@ from amdshark.iree_utils.vulkan_utils import (
def get_available_devices():
def get_devices_by_name(driver_name):
from amdshark.iree_utils._common import iree_device_map
from shark.iree_utils._common import iree_device_map
device_list = []
try:
@@ -52,14 +52,7 @@ def get_available_devices():
set_iree_runtime_flags()
available_devices = []
rocm_devices = get_devices_by_name("rocm")
available_devices.extend(rocm_devices)
cpu_device = get_devices_by_name("cpu-sync")
available_devices.extend(cpu_device)
cpu_device = get_devices_by_name("cpu-task")
available_devices.extend(cpu_device)
from amdshark.iree_utils.vulkan_utils import (
from shark.iree_utils.vulkan_utils import (
get_all_vulkan_devices,
)
@@ -71,28 +64,17 @@ def get_available_devices():
id += 1
if id != 0:
print(f"vulkan devices are available.")
available_devices.extend(vulkan_devices)
metal_devices = get_devices_by_name("metal")
available_devices.extend(metal_devices)
cuda_devices = get_devices_by_name("cuda")
available_devices.extend(cuda_devices)
hip_devices = get_devices_by_name("hip")
available_devices.extend(hip_devices)
for idx, device_str in enumerate(available_devices):
if "AMD Radeon(TM) Graphics =>" in device_str:
igpu_id_candidates = [
x.split("w/")[-1].split("=>")[0]
for x in available_devices
if "M Graphics" in x
]
for igpu_name in igpu_id_candidates:
if igpu_name:
available_devices[idx] = device_str.replace(
"AMD Radeon(TM) Graphics", igpu_name
)
break
rocm_devices = get_devices_by_name("rocm")
available_devices.extend(rocm_devices)
cpu_device = get_devices_by_name("cpu-sync")
available_devices.extend(cpu_device)
cpu_device = get_devices_by_name("cpu-task")
available_devices.extend(cpu_device)
return available_devices
@@ -116,7 +98,7 @@ def set_init_device_flags():
elif "metal" in cmd_opts.device:
device_name, cmd_opts.device = map_device_to_name_path(cmd_opts.device)
if not cmd_opts.iree_metal_target_platform:
from amdshark.iree_utils.metal_utils import get_metal_target_triple
from shark.iree_utils.metal_utils import get_metal_target_triple
triple = get_metal_target_triple(device_name)
if triple is not None:
@@ -145,57 +127,6 @@ def set_iree_runtime_flags():
set_iree_vulkan_runtime_flags(flags=vulkan_runtime_flags)
def parse_device(device_str, target_override=""):
from amdshark.iree_utils.compile_utils import (
clean_device_info,
get_iree_target_triple,
iree_target_map,
)
rt_driver, device_id = clean_device_info(device_str)
target_backend = iree_target_map(rt_driver)
if device_id:
rt_device = f"{rt_driver}://{device_id}"
else:
rt_device = rt_driver
if target_override:
return target_backend, rt_device, target_override
match target_backend:
case "vulkan-spirv":
triple = get_iree_target_triple(device_str)
return target_backend, rt_device, triple
case "rocm":
triple = get_rocm_target_chip(device_str)
return target_backend, rt_device, triple
case "llvm-cpu":
return "llvm-cpu", "local-task", "x86_64-linux-gnu"
def get_rocm_target_chip(device_str):
# TODO: Use a data file to map device_str to target chip.
rocm_chip_map = {
"6700": "gfx1031",
"6800": "gfx1030",
"6900": "gfx1030",
"7900": "gfx1100",
"MI300X": "gfx942",
"MI300A": "gfx940",
"MI210": "gfx90a",
"MI250": "gfx90a",
"MI100": "gfx908",
"MI50": "gfx906",
"MI60": "gfx906",
"780M": "gfx1103",
}
for key in rocm_chip_map:
if key in device_str:
return rocm_chip_map[key]
raise AssertionError(
f"Device {device_str} not recognized. Please file an issue at https://github.com/nod-ai/AMD-SHARK-Studio/issues."
)
def get_all_devices(driver_name):
"""
Inputs: driver_name
@@ -225,7 +156,7 @@ def get_device_mapping(driver, key_combination=3):
dict: map to possible device names user can input mapped to desired
combination of name/path.
"""
from amdshark.iree_utils._common import iree_device_map
from shark.iree_utils._common import iree_device_map
driver = iree_device_map(driver)
device_list = get_all_devices(driver)
@@ -256,7 +187,7 @@ def get_opt_flags(model, precision="fp16"):
f"-iree-vulkan-target-triple={cmd_opts.iree_vulkan_target_triple}"
)
if "rocm" in cmd_opts.device:
from amdshark.iree_utils.gpu_utils import get_iree_rocm_args
from shark.iree_utils.gpu_utils import get_iree_rocm_args
rocm_args = get_iree_rocm_args()
iree_flags.extend(rocm_args)
@@ -301,7 +232,7 @@ def map_device_to_name_path(device, key_combination=3):
return device_mapping
def get_devices_by_name(driver_name):
from amdshark.iree_utils._common import iree_device_map
from shark.iree_utils._common import iree_device_map
device_list = []
try:
@@ -332,7 +263,7 @@ def map_device_to_name_path(device, key_combination=3):
set_iree_runtime_flags()
available_devices = []
from amdshark.iree_utils.vulkan_utils import (
from shark.iree_utils.vulkan_utils import (
get_all_vulkan_devices,
)

View File

@@ -2,17 +2,12 @@ import os
import json
import re
import requests
import torch
import safetensors
from iree.turbine.aot.params import (
ParameterArchiveBuilder,
)
from io import BytesIO
from pathlib import Path
from tqdm import tqdm
from omegaconf import OmegaConf
from diffusers import StableDiffusionPipeline
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
download_from_original_stable_diffusion_ckpt,
create_vae_diffusers_config,
@@ -20,21 +15,21 @@ from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
)
def get_path_to_diffusers_checkpoint(custom_weights, precision="fp16"):
def get_path_to_diffusers_checkpoint(custom_weights):
path = Path(custom_weights)
diffusers_path = path.parent.absolute()
diffusers_directory_name = os.path.join("diffusers", path.stem + f"_{precision}")
diffusers_directory_name = os.path.join("diffusers", path.stem)
complete_path_to_diffusers = diffusers_path / diffusers_directory_name
complete_path_to_diffusers.mkdir(parents=True, exist_ok=True)
path_to_diffusers = complete_path_to_diffusers.as_posix()
return path_to_diffusers
def preprocessCKPT(custom_weights, precision="fp16", is_inpaint=False):
path_to_diffusers = get_path_to_diffusers_checkpoint(custom_weights, precision)
def preprocessCKPT(custom_weights, is_inpaint=False):
path_to_diffusers = get_path_to_diffusers_checkpoint(custom_weights)
if next(Path(path_to_diffusers).iterdir(), None):
print("Checkpoint already loaded at : ", path_to_diffusers)
return path_to_diffusers
return
else:
print(
"Diffusers' checkpoint will be identified here : ",
@@ -56,24 +51,8 @@ def preprocessCKPT(custom_weights, precision="fp16", is_inpaint=False):
from_safetensors=from_safetensors,
num_in_channels=num_in_channels,
)
if precision == "fp16":
pipe.to(dtype=torch.float16)
pipe.save_pretrained(path_to_diffusers)
del pipe
print("Loading complete")
return path_to_diffusers
def save_irpa(weights_path, prepend_str):
weights = safetensors.torch.load_file(weights_path)
archive = ParameterArchiveBuilder()
for key in weights.keys():
new_key = prepend_str + key
archive.add_tensor(new_key, weights[key])
irpa_file = weights_path.replace(".safetensors", ".irpa")
archive.save(irpa_file)
return irpa_file
def convert_original_vae(vae_checkpoint):

View File

@@ -5,7 +5,7 @@ import json
import safetensors
from dataclasses import dataclass
from safetensors.torch import load_file
from apps.amdshark_studio.web.utils.file_utils import (
from apps.shark_studio.web.utils.file_utils import (
get_checkpoint_pathfile,
get_path_stem,
)

View File

@@ -25,11 +25,11 @@ resampler_list = resamplers.keys()
# save output images and the inputs corresponding to it.
def save_output_img(output_img, img_seed, extra_info=None):
from apps.amdshark_studio.web.utils.file_utils import (
from apps.shark_studio.web.utils.file_utils import (
get_generated_imgs_path,
get_generated_imgs_todays_subdir,
)
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
if extra_info is None:
extra_info = {}

View File

@@ -30,8 +30,8 @@ def logger_test(x):
def read_sd_logs():
sys.stdout.flush()
with open("amdshark_tmp/sd.log", "r") as f:
with open("shark_tmp/sd.log", "r") as f:
return f.read()
sys.stdout = Logger("amdshark_tmp/sd.log", filter="[LOG]")
sys.stdout = Logger("shark_tmp/sd.log", filter="[LOG]")

View File

@@ -1,14 +1,14 @@
from amdshark.iree_utils.compile_utils import (
from shark.iree_utils.compile_utils import (
get_iree_compiled_module,
load_vmfb_using_mmap,
clean_device_info,
get_iree_target_triple,
)
from apps.amdshark_studio.web.utils.file_utils import (
from apps.shark_studio.web.utils.file_utils import (
get_checkpoints_path,
get_resource_path,
)
from apps.amdshark_studio.modules.shared_cmd_opts import (
from apps.shark_studio.modules.shared_cmd_opts import (
cmd_opts,
)
from iree import runtime as ireert
@@ -17,7 +17,7 @@ import gc
import os
class AMDSharkPipelineBase:
class SharkPipelineBase:
# This class is a lightweight base for managing an
# inference API class. It should provide methods for:
# - compiling a set (model map) of torch IR modules

View File

@@ -224,7 +224,7 @@ def get_unweighted_text_embeddings(
text_embedding = text_embedding[:, 1:-1]
text_embeddings.append(text_embedding)
# AMDSHARK: Convert the result to tensor
# SHARK: Convert the result to tensor
# text_embeddings = torch.concat(text_embeddings, axis=1)
text_embeddings_np = np.concatenate(np.array(text_embeddings))
text_embeddings = torch.from_numpy(text_embeddings_np)

View File

@@ -0,0 +1,117 @@
# from shark_turbine.turbine_models.schedulers import export_scheduler_model
from diffusers import (
LCMScheduler,
LMSDiscreteScheduler,
PNDMScheduler,
DDPMScheduler,
DDIMScheduler,
DPMSolverMultistepScheduler,
KDPM2DiscreteScheduler,
EulerDiscreteScheduler,
EulerAncestralDiscreteScheduler,
DEISMultistepScheduler,
DPMSolverSinglestepScheduler,
KDPM2AncestralDiscreteScheduler,
HeunDiscreteScheduler,
)
def get_schedulers(model_id):
# TODO: switch over to turbine and run all on GPU
print(f"\n[LOG] Initializing schedulers from model id: {model_id}")
schedulers = dict()
schedulers["PNDM"] = PNDMScheduler.from_pretrained(
model_id,
subfolder="scheduler",
)
schedulers["DDPM"] = DDPMScheduler.from_pretrained(
model_id,
subfolder="scheduler",
)
schedulers["KDPM2Discrete"] = KDPM2DiscreteScheduler.from_pretrained(
model_id,
subfolder="scheduler",
)
schedulers["LMSDiscrete"] = LMSDiscreteScheduler.from_pretrained(
model_id,
subfolder="scheduler",
)
schedulers["DDIM"] = DDIMScheduler.from_pretrained(
model_id,
subfolder="scheduler",
)
schedulers["LCMScheduler"] = LCMScheduler.from_pretrained(
model_id,
subfolder="scheduler",
)
schedulers["DPMSolverMultistep"] = DPMSolverMultistepScheduler.from_pretrained(
model_id, subfolder="scheduler", algorithm_type="dpmsolver"
)
schedulers["DPMSolverMultistep++"] = DPMSolverMultistepScheduler.from_pretrained(
model_id, subfolder="scheduler", algorithm_type="dpmsolver++"
)
schedulers["DPMSolverMultistepKarras"] = (
DPMSolverMultistepScheduler.from_pretrained(
model_id,
subfolder="scheduler",
use_karras_sigmas=True,
)
)
schedulers["DPMSolverMultistepKarras++"] = (
DPMSolverMultistepScheduler.from_pretrained(
model_id,
subfolder="scheduler",
algorithm_type="dpmsolver++",
use_karras_sigmas=True,
)
)
schedulers["EulerDiscrete"] = EulerDiscreteScheduler.from_pretrained(
model_id,
subfolder="scheduler",
)
schedulers["EulerAncestralDiscrete"] = (
EulerAncestralDiscreteScheduler.from_pretrained(
model_id,
subfolder="scheduler",
)
)
schedulers["DEISMultistep"] = DEISMultistepScheduler.from_pretrained(
model_id,
subfolder="scheduler",
)
schedulers["DPMSolverSinglestep"] = DPMSolverSinglestepScheduler.from_pretrained(
model_id,
subfolder="scheduler",
)
schedulers["KDPM2AncestralDiscrete"] = (
KDPM2AncestralDiscreteScheduler.from_pretrained(
model_id,
subfolder="scheduler",
)
)
schedulers["HeunDiscrete"] = HeunDiscreteScheduler.from_pretrained(
model_id,
subfolder="scheduler",
)
return schedulers
def export_scheduler_model(model):
return "None", "None"
scheduler_model_map = {
"EulerDiscrete": export_scheduler_model("EulerDiscreteScheduler"),
"EulerAncestralDiscrete": export_scheduler_model("EulerAncestralDiscreteScheduler"),
"LCM": export_scheduler_model("LCMScheduler"),
"LMSDiscrete": export_scheduler_model("LMSDiscreteScheduler"),
"PNDM": export_scheduler_model("PNDMScheduler"),
"DDPM": export_scheduler_model("DDPMScheduler"),
"DDIM": export_scheduler_model("DDIMScheduler"),
"DPMSolverMultistep": export_scheduler_model("DPMSolverMultistepScheduler"),
"KDPM2Discrete": export_scheduler_model("KDPM2DiscreteScheduler"),
"DEISMultistep": export_scheduler_model("DEISMultistepScheduler"),
"DPMSolverSinglestep": export_scheduler_model("DPMSolverSingleStepScheduler"),
"KDPM2AncestralDiscrete": export_scheduler_model("KDPM2AncestralDiscreteScheduler"),
"HeunDiscrete": export_scheduler_model("HeunDiscreteScheduler"),
}

View File

@@ -2,7 +2,7 @@ import argparse
import os
from pathlib import Path
from apps.amdshark_studio.modules.img_processing import resampler_list
from apps.shark_studio.modules.img_processing import resampler_list
def path_expand(s):
@@ -299,8 +299,8 @@ p.add_argument(
"--import_mlir",
default=True,
action=argparse.BooleanOptionalAction,
help="Imports the model from torch module to amdshark_module otherwise "
"downloads the model from amdshark_tank.",
help="Imports the model from torch module to shark_module otherwise "
"downloads the model from shark_tank.",
)
p.add_argument(
@@ -487,8 +487,8 @@ p.add_argument(
p.add_argument(
"--local_tank_cache",
default="",
help="Specify where to save downloaded amdshark_tank artifacts. "
"If this is not set, the default is ~/.local/amdshark_tank/.",
help="Specify where to save downloaded shark_tank artifacts. "
"If this is not set, the default is ~/.local/shark_tank/.",
)
p.add_argument(
@@ -562,7 +562,7 @@ p.add_argument(
default=False,
action=argparse.BooleanOptionalAction,
help="If import_mlir is True, saves mlir via the debug option "
"in amdshark importer. Does nothing if import_mlir is false (the default).",
"in shark importer. Does nothing if import_mlir is false (the default).",
)
p.add_argument(
@@ -615,7 +615,7 @@ p.add_argument(
p.add_argument(
"--tmp_dir",
type=str,
default=os.path.join(os.getcwd(), "amdshark_tmp"),
default=os.path.join(os.getcwd(), "shark_tmp"),
help="Path to tmp directory",
)

View File

@@ -1,5 +1,5 @@
# -*- mode: python ; coding: utf-8 -*-
from apps.amdshark_studio.studio_imports import pathex, datas, hiddenimports
from apps.shark_studio.studio_imports import pathex, datas, hiddenimports
binaries = []
@@ -32,7 +32,7 @@ exe = EXE(
a.zipfiles,
a.datas,
[],
name='nodai_amdshark_studio',
name='nodai_shark_studio',
debug=False,
bootloader_ignore_signals=False,
strip=False,

View File

@@ -38,7 +38,7 @@ datas += collect_data_files("transformers")
datas += collect_data_files("gradio")
datas += collect_data_files("gradio_client")
datas += collect_data_files("iree", include_py_files=True)
datas += collect_data_files("amdshark", include_py_files=True)
datas += collect_data_files("shark", include_py_files=True)
datas += collect_data_files("tqdm")
datas += collect_data_files("tkinter")
datas += collect_data_files("sentencepiece")
@@ -54,7 +54,7 @@ datas += [
# hidden imports for pyinstaller
hiddenimports = ["amdshark", "apps"]
hiddenimports = ["shark", "apps"]
hiddenimports += [x for x in collect_submodules("gradio") if "tests" not in x]
hiddenimports += [x for x in collect_submodules("diffusers") if "tests" not in x]
blacklist = ["tests", "convert"]

View File

@@ -8,14 +8,14 @@ import logging
import unittest
import json
import gc
from apps.amdshark_studio.api.llm import LanguageModel, llm_chat_api
from apps.amdshark_studio.api.sd import amdshark_sd_fn_dict_input, view_json_file
from apps.amdshark_studio.web.utils.file_utils import get_resource_path
from apps.shark_studio.api.llm import LanguageModel, llm_chat_api
from apps.shark_studio.api.sd import shark_sd_fn_dict_input, view_json_file
from apps.shark_studio.web.utils.file_utils import get_resource_path
# class SDAPITest(unittest.TestCase):
# def testSDSimple(self):
# from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
# import apps.amdshark_studio.web.utils.globals as global_obj
# from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
# import apps.shark_studio.web.utils.globals as global_obj
# global_obj._init()
@@ -24,7 +24,7 @@ from apps.amdshark_studio.web.utils.file_utils import get_resource_path
# for arg in vars(cmd_opts):
# if arg in sd_kwargs:
# sd_kwargs[arg] = getattr(cmd_opts, arg)
# for i in amdshark_sd_fn_dict_input(sd_kwargs):
# for i in shark_sd_fn_dict_input(sd_kwargs):
# print(i)
@@ -36,7 +36,6 @@ class LLMAPITest(unittest.TestCase):
device="cpu",
precision="fp32",
quantization="None",
streaming_llm=True,
)
count = 0
label = "Turkishoure Turkish"

View File

Before

Width:  |  Height:  |  Size: 347 KiB

After

Width:  |  Height:  |  Size: 347 KiB

View File

@@ -38,8 +38,8 @@ def llm_chat_test(verbose=False):
if __name__ == "__main__":
# "Exercises the chatbot REST API of AMDShark. Make sure "
# "AMDShark is running in API mode on 127.0.0.1:8080 before running"
# "Exercises the chatbot REST API of Shark. Make sure "
# "Shark is running in API mode on 127.0.0.1:8080 before running"
# "this script."
llm_chat_test(verbose=True)

View File

@@ -18,10 +18,10 @@ from fastapi.exceptions import HTTPException
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
# from sdapi_v1 import amdshark_sd_api
from apps.amdshark_studio.api.llm import llm_chat_api
# from sdapi_v1 import shark_sd_api
from apps.shark_studio.api.llm import llm_chat_api
def decode_base64_to_image(encoding):
@@ -183,8 +183,8 @@ class ApiCompat:
self.app = app
self.queue_lock = queue_lock
api_middleware(self.app)
# self.add_api_route("/sdapi/v1/txt2img", amdshark_sd_api, methods=["POST"])
# self.add_api_route("/sdapi/v1/img2img", amdshark_sd_api, methods=["POST"])
# self.add_api_route("/sdapi/v1/txt2img", shark_sd_api, methods=["POST"])
# self.add_api_route("/sdapi/v1/img2img", shark_sd_api, methods=["POST"])
# self.add_api_route("/sdapi/v1/upscaler", self.upscaler_api, methods=["POST"])
# self.add_api_route("/sdapi/v1/extra-single-image", self.extras_single_image_api, methods=["POST"], response_model=models.ExtrasSingleImageResponse)
# self.add_api_route("/sdapi/v1/extra-batch-images", self.extras_batch_images_api, methods=["POST"], response_model=models.ExtrasBatchImagesResponse)

View File

@@ -0,0 +1,28 @@
{
"prompt": [
"a photo taken of the front of a super-car drifting on a road near mountains at high speeds with smoke coming off the tires, front angle, front point of view, trees in the mountains of the background, ((sharp focus))"
],
"negative_prompt": [
"watermark, signature, logo, text, lowres, ((monochrome, grayscale)), blurry, ugly, blur, oversaturated, cropped"
],
"sd_init_image": [null],
"height": 512,
"width": 512,
"steps": 50,
"strength": 0.8,
"guidance_scale": 7.5,
"seed": "-1",
"batch_count": 1,
"batch_size": 1,
"scheduler": "EulerDiscrete",
"base_model_id": "stabilityai/stable-diffusion-2-1-base",
"custom_weights": null,
"custom_vae": null,
"precision": "fp16",
"device": "AMD Radeon RX 7900 XTX => vulkan://0",
"ondemand": false,
"repeatable_seeds": false,
"resample_type": "Nearest Neighbor",
"controlnets": {},
"embeddings": {}
}

View File

@@ -7,10 +7,10 @@ import os
import time
import sys
import logging
import apps.amdshark_studio.api.initializers as initialize
import apps.shark_studio.api.initializers as initialize
from apps.amdshark_studio.modules import timer
from apps.shark_studio.modules import timer
startup_timer = timer.startup_timer
startup_timer.record("launcher")
@@ -24,7 +24,7 @@ if sys.platform == "darwin":
def create_api(app):
from apps.amdshark_studio.web.api.compat import ApiCompat, FIFOLock
from apps.shark_studio.web.api.compat import ApiCompat, FIFOLock
queue_lock = FIFOLock()
api = ApiCompat(app, queue_lock)
@@ -33,7 +33,7 @@ def create_api(app):
def api_only():
from fastapi import FastAPI
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
initialize.initialize()
@@ -64,7 +64,7 @@ def launch_webui(address):
width = int(window.winfo_screenwidth() * 0.81)
height = int(window.winfo_screenheight() * 0.91)
webview.create_window(
"AMDSHARK AI Studio",
"SHARK AI Studio",
url=address,
width=width,
height=height,
@@ -74,10 +74,10 @@ def launch_webui(address):
def webui():
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from apps.amdshark_studio.web.ui.utils import (
amdicon_loc,
amdlogo_loc,
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
from apps.shark_studio.web.ui.utils import (
nodicon_loc,
nodlogo_loc,
)
launch_api = cmd_opts.api
@@ -91,10 +91,10 @@ def webui():
freeze_support()
# if args.api or "api" in args.ui.split(","):
# from apps.amdshark_studio.api.llm import (
# from apps.shark_studio.api.llm import (
# chat,
# )
# from apps.amdshark_studio.web.api import sdapi
# from apps.shark_studio.web.api import sdapi
#
# from fastapi import FastAPI, APIRouter
# from fastapi.middleware.cors import CORSMiddleware
@@ -144,7 +144,7 @@ def webui():
dark_theme = resource_path("ui/css/sd_dark_theme.css")
gradio_workarounds = resource_path("ui/js/sd_gradio_workarounds.js")
# from apps.amdshark_studio.web.ui import load_ui_from_script
# from apps.shark_studio.web.ui import load_ui_from_script
def register_button_click(button, selectedid, inputs, outputs):
button.click(
@@ -170,11 +170,11 @@ def webui():
css=dark_theme,
js=gradio_workarounds,
analytics_enabled=False,
title="AMDShark Studio 2.0 Beta",
title="Shark Studio 2.0 Beta",
) as studio_web:
amd_logo = Image.open(amdlogo_loc)
nod_logo = Image.open(nodlogo_loc)
gr.Image(
value=amd_logo,
value=nod_logo,
show_label=False,
interactive=False,
elem_id="tab_bar_logo",
@@ -209,12 +209,12 @@ def webui():
inbrowser=True,
server_name="0.0.0.0",
server_port=cmd_opts.server_port,
favicon_path=amdicon_loc,
favicon_path=nodicon_loc,
)
if __name__ == "__main__":
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
if cmd_opts.webui == False:
api_only()

View File

@@ -5,12 +5,12 @@ from pathlib import Path
from datetime import datetime as dt
import json
import sys
from apps.amdshark_studio.api.llm import (
from apps.shark_studio.api.llm import (
llm_model_map,
LanguageModel,
)
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
import apps.amdshark_studio.web.utils.globals as global_obj
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
import apps.shark_studio.web.utils.globals as global_obj
B_SYS, E_SYS = "<s>", "</s>"
@@ -129,7 +129,7 @@ with gr.Blocks(title="Chat") as chat_element:
tokens_time = gr.Textbox(label="Tokens generated per second")
with gr.Column():
download_vmfb = gr.Checkbox(
label="Download vmfb from AMDShark tank if available",
label="Download vmfb from Shark tank if available",
value=False,
interactive=True,
visible=False,
@@ -137,8 +137,7 @@ with gr.Blocks(title="Chat") as chat_element:
streaming_llm = gr.Checkbox(
label="Run in streaming mode (requires recompilation)",
value=True,
interactive=False,
visible=False,
interactive=True,
)
prompt_prefix = gr.Checkbox(
label="Add System Prompt",

View File

@@ -1,8 +1,8 @@
from apps.amdshark_studio.web.ui.utils import (
from apps.shark_studio.web.ui.utils import (
HSLHue,
hsl_color,
)
from apps.amdshark_studio.modules.embeddings import get_lora_metadata
from apps.shark_studio.modules.embeddings import get_lora_metadata
# Answers HTML to show the most frequent tags used when a LoRA was trained,

View File

@@ -100,7 +100,7 @@ Procedure to upgrade the dark theme:
--input-border-width: 1px;
}
/* AMDSHARK theme */
/* SHARK theme */
body {
background-color: var(--background-fill-primary);
}
@@ -367,7 +367,7 @@ footer {
#tab_bar_logo .image-container {
object-fit: scale-down;
position: absolute !important;
top: 10px;
top: 14px;
right: 0px;
height: 36px;
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

View File

@@ -5,13 +5,13 @@ import subprocess
import sys
from PIL import Image
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from apps.amdshark_studio.web.utils.file_utils import (
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
from apps.shark_studio.web.utils.file_utils import (
get_generated_imgs_path,
get_generated_imgs_todays_subdir,
)
from apps.amdshark_studio.web.ui.utils import amdlogo_loc
from apps.amdshark_studio.web.utils.metadata import displayable_metadata
from apps.shark_studio.web.ui.utils import nodlogo_loc
from apps.shark_studio.web.utils.metadata import displayable_metadata
# -- Functions for file, directory and image info querying
@@ -60,7 +60,7 @@ def output_subdirs() -> list[str]:
# --- Define UI layout for Gradio
with gr.Blocks() as outputgallery_element:
amd_logo = Image.open(amdlogo_loc)
nod_logo = Image.open(nodlogo_loc)
with gr.Row(elem_id="outputgallery_gallery"):
# needed to workaround gradio issue:
@@ -73,7 +73,7 @@ with gr.Blocks() as outputgallery_element:
with gr.Column(scale=6):
logo = gr.Image(
label="Getting subdirectories...",
value=amd_logo,
value=nod_logo,
interactive=False,
visible=True,
show_label=True,

View File

@@ -9,46 +9,47 @@ from datetime import datetime as dt
from gradio.components.image_editor import (
EditorValue,
)
from apps.amdshark_studio.web.utils.file_utils import (
from apps.shark_studio.web.utils.file_utils import (
get_generated_imgs_path,
get_checkpoints_path,
get_checkpoints,
get_configs_path,
write_default_sd_configs,
write_default_sd_config,
)
from apps.amdshark_studio.api.sd import (
amdshark_sd_fn_dict_input,
from apps.shark_studio.api.sd import (
sd_model_map,
shark_sd_fn_dict_input,
cancel_sd,
unload_sd,
)
from apps.amdshark_studio.api.controlnet import (
from apps.shark_studio.api.controlnet import (
cnet_preview,
)
from apps.amdshark_studio.modules.schedulers import (
from apps.shark_studio.modules.schedulers import (
scheduler_model_map,
)
from apps.amdshark_studio.modules.img_processing import (
from apps.shark_studio.modules.img_processing import (
resampler_list,
resize_stencil,
)
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from apps.amdshark_studio.web.ui.utils import (
amdlogo_loc,
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
from apps.shark_studio.web.ui.utils import (
nodlogo_loc,
none_to_str_none,
str_none_to_none,
)
from apps.amdshark_studio.web.utils.state import (
from apps.shark_studio.web.utils.state import (
status_label,
)
from apps.amdshark_studio.web.ui.common_events import lora_changed
from apps.amdshark_studio.modules import logger
import apps.amdshark_studio.web.utils.globals as global_obj
from apps.shark_studio.web.ui.common_events import lora_changed
from apps.shark_studio.modules import logger
import apps.shark_studio.web.utils.globals as global_obj
sd_default_models = [
"CompVis/stable-diffusion-v1-4",
"runwayml/stable-diffusion-v1-5",
"stabilityai/stable-diffusion-2-1-base",
"stabilityai/stable-diffusion-2-1",
"stabilityai/stable-diffusion-xl-base-1.0",
"stabilityai/stable-diffusion-xl-1.0",
"stabilityai/sdxl-turbo",
]
@@ -118,9 +119,8 @@ def pull_sd_configs(
custom_vae,
precision,
device,
target_triple,
ondemand,
compiled_pipeline,
repeatable_seeds,
resample_type,
controlnets,
embeddings,
@@ -177,9 +177,8 @@ def load_sd_cfg(sd_json: dict, load_sd_config: str):
sd_json["custom_vae"],
sd_json["precision"],
sd_json["device"],
sd_json["target_triple"],
sd_json["ondemand"],
sd_json["compiled_pipeline"],
sd_json["repeatable_seeds"],
sd_json["resample_type"],
sd_json["controlnets"],
sd_json["embeddings"],
@@ -232,9 +231,14 @@ def import_original(original_img, width, height):
def base_model_changed(base_model_id):
new_choices = get_checkpoints(
os.path.join("checkpoints", os.path.basename(str(base_model_id)))
) + get_checkpoints(model_type="checkpoints")
ckpt_path = Path(
os.path.join(
cmd_opts.model_dir, "checkpoints", os.path.basename(str(base_model_id))
)
)
ckpt_path.mkdir(parents=True, exist_ok=True)
new_choices = get_checkpoints(ckpt_path) + get_checkpoints(model_type="checkpoints")
return gr.Dropdown(
value=new_choices[0] if len(new_choices) > 0 else "None",
@@ -256,11 +260,6 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
choices=global_obj.get_device_list(),
allow_custom_value=False,
)
target_triple = gr.Textbox(
elem_id="target_triple",
label="Architecture",
value="",
)
with gr.Row():
ondemand = gr.Checkbox(
value=cmd_opts.lowvram,
@@ -283,19 +282,18 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
elem_id="custom_model",
value="stabilityai/stable-diffusion-2-1-base",
choices=sd_default_models,
allow_custom_value=True,
) # base_model_id
with gr.Row():
height = gr.Slider(
384,
1024,
768,
value=cmd_opts.height,
step=8,
label="\U00002195\U0000FE0F Height",
)
width = gr.Slider(
384,
1024,
768,
value=cmd_opts.width,
step=8,
label="\U00002194\U0000FE0F Width",
@@ -606,34 +604,35 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
interactive=True,
visible=True,
)
compiled_pipeline = gr.Checkbox(
False,
label="Faster txt2img (SDXL only)",
repeatable_seeds = gr.Checkbox(
cmd_opts.repeatable_seeds,
label="Use Repeatable Seeds for Batches",
)
with gr.Row():
stable_diffusion = gr.Button("Start")
unload = gr.Button("Unload Models")
unload.click(
fn=unload_sd,
random_seed = gr.Button("Randomize Seed")
random_seed.click(
lambda: -1,
inputs=[],
outputs=[seed],
queue=False,
show_progress=False,
)
stop_batch = gr.Button("Stop")
with gr.Tab(label="Config", id=102) as sd_tab_config:
with gr.Column(elem_classes=["sd-right-panel"]):
with gr.Row(elem_classes=["fill"]):
Path(get_configs_path()).mkdir(
parents=True, exist_ok=True
)
default_config_file = os.path.join(
get_configs_path(),
"default_sd_config.json",
)
write_default_sd_configs(get_configs_path())
sd_json = gr.JSON(
elem_classes=["fill"],
value=view_json_file(default_config_file),
)
Path(get_configs_path()).mkdir(parents=True, exist_ok=True)
default_config_file = os.path.join(
get_configs_path(),
"default_sd_config.json",
)
write_default_sd_config(default_config_file)
sd_json = gr.JSON(
label="SD Config",
elem_classes=["fill"],
value=view_json_file(default_config_file),
render=False,
)
with gr.Row():
with gr.Column(scale=3):
load_sd_config = gr.FileExplorer(
@@ -683,9 +682,8 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
custom_vae,
precision,
device,
target_triple,
ondemand,
compiled_pipeline,
repeatable_seeds,
resample_type,
cnet_config,
embeddings_config,
@@ -697,6 +695,8 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
inputs=[sd_json, sd_config_name],
outputs=[sd_config_name],
)
with gr.Row(elem_classes=["fill"]):
sd_json.render()
save_sd_config.click(
fn=save_sd_cfg,
inputs=[sd_json, sd_config_name],
@@ -708,7 +708,6 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
value=f"{sd_model_info}\n"
f"Images will be saved at "
f"{get_generated_imgs_path()}",
lines=2,
elem_id="std_output",
show_label=True,
label="Log",
@@ -718,6 +717,8 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
logger.read_sd_logs, None, std_output, every=1
)
sd_status = gr.Textbox(visible=False)
with gr.Tab(label="Automation", id=104) as sd_tab_automation:
pass
pull_kwargs = dict(
fn=pull_sd_configs,
@@ -739,9 +740,8 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
custom_vae,
precision,
device,
target_triple,
ondemand,
compiled_pipeline,
repeatable_seeds,
resample_type,
cnet_config,
embeddings_config,
@@ -758,7 +758,7 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
)
gen_kwargs = dict(
fn=amdshark_sd_fn_dict_input,
fn=shark_sd_fn_dict_input,
inputs=[sd_json],
outputs=[
sd_gallery,

View File

@@ -10,8 +10,8 @@ def resource_path(relative_path):
return os.path.join(base_path, relative_path)
amdlogo_loc = resource_path("logos/amd-logo.jpg")
amdicon_loc = resource_path("logos/amd-icon.jpg")
nodlogo_loc = resource_path("logos/nod-logo.png")
nodicon_loc = resource_path("logos/nod-icon.png")
class HSLHue(IntEnum):

View File

@@ -4,25 +4,50 @@ import glob
from datetime import datetime as dt
from pathlib import Path
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
checkpoints_filetypes = (
"*.ckpt",
"*.safetensors",
)
from apps.amdshark_studio.web.utils.default_configs import default_sd_configs
default_sd_config = r"""{
"prompt": [
"a photo taken of the front of a super-car drifting on a road near mountains at high speeds with smoke coming off the tires, front angle, front point of view, trees in the mountains of the background, ((sharp focus))"
],
"negative_prompt": [
"watermark, signature, logo, text, lowres, ((monochrome, grayscale)), blurry, ugly, blur, oversaturated, cropped"
],
"sd_init_image": [null],
"height": 512,
"width": 512,
"steps": 50,
"strength": 0.8,
"guidance_scale": 7.5,
"seed": "-1",
"batch_count": 1,
"batch_size": 1,
"scheduler": "EulerDiscrete",
"base_model_id": "stabilityai/stable-diffusion-2-1-base",
"custom_weights": null,
"custom_vae": null,
"precision": "fp16",
"device": "AMD Radeon RX 7900 XTX => vulkan://0",
"ondemand": false,
"repeatable_seeds": false,
"resample_type": "Nearest Neighbor",
"controlnets": {},
"embeddings": {}
}"""
def write_default_sd_configs(path):
for key in default_sd_configs.keys():
config_fpath = os.path.join(path, key)
with open(config_fpath, "w") as f:
f.write(default_sd_configs[key])
def write_default_sd_config(path):
with open(path, "w") as f:
f.write(default_sd_config)
def safe_name(name):
return name.split("/")[-1].replace("-", "_")
return name.replace("/", "_").replace("-", "_")
def get_path_stem(path):

View File

@@ -1,4 +1,4 @@
# As AMDSHARK has evolved more columns have been added to images_details.csv. However, since
# As SHARK has evolved more columns have been added to images_details.csv. However, since
# no version of the CSV has any headers (yet) we don't actually have anything within the
# file that tells us which parameter each column is for. So this is a list of known patterns
# indexed by length which is what we're going to have to use to guess which columns are the

View File

@@ -1,11 +1,12 @@
import re
from pathlib import Path
from apps.amdshark_studio.web.utils.file_utils import (
from apps.shark_studio.web.utils.file_utils import (
get_checkpoint_pathfile,
)
from apps.amdshark_studio.api.sd import EMPTY_SD_MAP as sd_model_map
from apps.amdshark_studio.modules.schedulers import (
from apps.shark_studio.api.sd import (
sd_model_map,
)
from apps.shark_studio.modules.schedulers import (
scheduler_model_map,
)

View File

@@ -1,4 +1,4 @@
import apps.amdshark_studio.web.utils.globals as global_obj
import apps.shark_studio.web.utils.globals as global_obj
import gc

View File

@@ -2,9 +2,9 @@ import os
import shutil
from time import time
from apps.amdshark_studio.modules.shared_cmd_opts import cmd_opts
from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
amdshark_tmp = cmd_opts.tmp_dir # os.path.join(os.getcwd(), "amdshark_tmp/")
shark_tmp = cmd_opts.tmp_dir # os.path.join(os.getcwd(), "shark_tmp/")
def clear_tmp_mlir():
@@ -12,20 +12,20 @@ def clear_tmp_mlir():
print("Clearing .mlir temporary files from a prior run. This may take some time...")
mlir_files = [
filename
for filename in os.listdir(amdshark_tmp)
if os.path.isfile(os.path.join(amdshark_tmp, filename))
for filename in os.listdir(shark_tmp)
if os.path.isfile(os.path.join(shark_tmp, filename))
and filename.endswith(".mlir")
]
for filename in mlir_files:
os.remove(os.path.join(amdshark_tmp, filename))
os.remove(shark_tmp + filename)
print(f"Clearing .mlir temporary files took {time() - cleanup_start:.4f} seconds.")
def clear_tmp_imgs():
# tell gradio to use a directory under amdshark_tmp for its temporary
# tell gradio to use a directory under shark_tmp for its temporary
# image files unless somewhere else has been set
if "GRADIO_TEMP_DIR" not in os.environ:
os.environ["GRADIO_TEMP_DIR"] = os.path.join(amdshark_tmp, "gradio")
os.environ["GRADIO_TEMP_DIR"] = os.path.join(shark_tmp, "gradio")
print(
f"gradio temporary image cache located at {os.environ['GRADIO_TEMP_DIR']}. "
@@ -43,22 +43,22 @@ def clear_tmp_imgs():
f"Clearing gradio UI temporary image files took {time() - cleanup_start:.4f} seconds."
)
# older AMDSHARK versions had to workaround gradio bugs and stored things differently
# older SHARK versions had to workaround gradio bugs and stored things differently
else:
image_files = [
filename
for filename in os.listdir(amdshark_tmp)
if os.path.isfile(os.path.join(amdshark_tmp, filename))
for filename in os.listdir(shark_tmp)
if os.path.isfile(os.path.join(shark_tmp, filename))
and filename.startswith("tmp")
and filename.endswith(".png")
]
if len(image_files) > 0:
print(
"Clearing temporary image files of a prior run of a previous AMDSHARK version. This may take some time..."
"Clearing temporary image files of a prior run of a previous SHARK version. This may take some time..."
)
cleanup_start = time()
for filename in image_files:
os.remove(amdshark_tmp + filename)
os.remove(shark_tmp + filename)
print(
f"Clearing temporary image files took {time() - cleanup_start:.4f} seconds."
)
@@ -67,9 +67,9 @@ def clear_tmp_imgs():
def config_tmp():
# create amdshark_tmp if it does not exist
if not os.path.exists(amdshark_tmp):
os.mkdir(amdshark_tmp)
# create shark_tmp if it does not exist
if not os.path.exists(shark_tmp):
os.mkdir(shark_tmp)
clear_tmp_mlir()
clear_tmp_imgs()

View File

@@ -1,6 +1,6 @@
import torch
from amdshark.parser import parser
from benchmarks.hf_transformer import AMDSharkHFBenchmarkRunner
from shark.parser import parser
from benchmarks.hf_transformer import SharkHFBenchmarkRunner
parser.add_argument(
"--model_name",
@@ -13,10 +13,10 @@ load_args, unknown = parser.parse_known_args()
if __name__ == "__main__":
model_name = load_args.model_name
test_input = torch.randint(2, (1, 128))
amdshark_module = AMDSharkHFBenchmarkRunner(
shark_module = SharkHFBenchmarkRunner(
model_name, (test_input,), jit_trace=True
)
amdshark_module.benchmark_c()
amdshark_module.benchmark_python((test_input,))
amdshark_module.benchmark_torch(test_input)
amdshark_module.benchmark_onnx(test_input)
shark_module.benchmark_c()
shark_module.benchmark_python((test_input,))
shark_module.benchmark_torch(test_input)
shark_module.benchmark_onnx(test_input)

View File

@@ -1,6 +1,6 @@
import torch
from amdshark.amdshark_benchmark_runner import AMDSharkBenchmarkRunner
from amdshark.parser import amdshark_args
from shark.shark_benchmark_runner import SharkBenchmarkRunner
from shark.parser import shark_args
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from onnxruntime.transformers.benchmark import (
run_pytorch,
@@ -42,8 +42,8 @@ class HuggingFaceLanguage(torch.nn.Module):
return self.model.forward(tokens)[0]
class AMDSharkHFBenchmarkRunner(AMDSharkBenchmarkRunner):
# AMDSharkRunner derived class with Benchmarking capabilities.
class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
# SharkRunner derived class with Benchmarking capabilities.
def __init__(
self,
model_name: str,
@@ -54,14 +54,14 @@ class AMDSharkHFBenchmarkRunner(AMDSharkBenchmarkRunner):
from_aot: bool = False,
frontend: str = "torch",
):
self.device = device if device is not None else amdshark_args.device
self.device = device if device is not None else shark_args.device
if self.device == "gpu":
raise ValueError(
"Currently GPU Benchmarking is not supported due to OOM from ORT."
)
self.model_name = model_name
model = HuggingFaceLanguage(model_name)
AMDSharkBenchmarkRunner.__init__(
SharkBenchmarkRunner.__init__(
self,
model,
input,
@@ -90,13 +90,13 @@ class AMDSharkHFBenchmarkRunner(AMDSharkBenchmarkRunner):
num_threads,
batch_sizes,
sequence_lengths,
amdshark_args.num_iterations,
shark_args.num_iterations,
False,
cache_dir,
verbose,
)
print(
f"ONNX Pytorch-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{amdshark_args.num_iterations}"
f"ONNX Pytorch-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
)
# TODO: Currently non-functional due to TF runtime error. There might be some issue with, initializing TF.
@@ -118,12 +118,12 @@ class AMDSharkHFBenchmarkRunner(AMDSharkBenchmarkRunner):
num_threads,
batch_sizes,
sequence_lengths,
amdshark_args.num_iterations,
shark_args.num_iterations,
cache_dir,
verbose,
)
print(
f"ONNX TF-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{amdshark_args.num_iterations}"
f"ONNX TF-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
)
def benchmark_onnx(self, inputs):
@@ -162,7 +162,7 @@ for currently supported models. Exiting benchmark ONNX."
num_threads,
batch_sizes,
sequence_lengths,
amdshark_args.num_iterations,
shark_args.num_iterations,
input_counts,
optimize_onnx,
validate_onnx,
@@ -177,5 +177,5 @@ for currently supported models. Exiting benchmark ONNX."
onnx_args,
)
print(
f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{amdshark_args.num_iterations}"
f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
)

View File

@@ -1,5 +1,5 @@
from amdshark.amdshark_inference import AMDSharkInference
from amdshark.iree_utils._common import check_device_drivers
from shark.shark_inference import SharkInference
from shark.iree_utils._common import check_device_drivers
import torch
import tensorflow as tf
@@ -164,7 +164,7 @@ def test_bench_minilm_torch(dynamic, device):
model, test_input, act_out = get_hf_model(
"microsoft/MiniLM-L12-H384-uncased"
)
amdshark_module = AMDSharkInference(
shark_module = SharkInference(
model,
(test_input,),
device=device,
@@ -174,8 +174,8 @@ def test_bench_minilm_torch(dynamic, device):
)
try:
# If becnhmarking succesful, assert success/True.
amdshark_module.compile()
amdshark_module.benchmark_all((test_input,))
shark_module.compile()
shark_module.benchmark_all((test_input,))
assert True
except Exception as e:
# If anything happen during benchmarking, assert False/failure.
@@ -189,7 +189,7 @@ def test_bench_minilm_torch(dynamic, device):
@pytest_benchmark_param
def test_bench_distilbert(dynamic, device):
model, test_input, act_out = get_TFhf_model("distilbert-base-uncased")
amdshark_module = AMDSharkInference(
shark_module = SharkInference(
model,
test_input,
device=device,
@@ -199,9 +199,9 @@ def test_bench_distilbert(dynamic, device):
)
try:
# If becnhmarking succesful, assert success/True.
amdshark_module.set_frontend("tensorflow")
amdshark_module.compile()
amdshark_module.benchmark_all(test_input)
shark_module.set_frontend("tensorflow")
shark_module.compile()
shark_module.benchmark_all(test_input)
assert True
except Exception as e:
# If anything happen during benchmarking, assert False/failure.
@@ -212,7 +212,7 @@ def test_bench_distilbert(dynamic, device):
@pytest_benchmark_param
def test_bench_xlm_roberta(dynamic, device):
model, test_input, act_out = get_TFhf_model("xlm-roberta-base")
amdshark_module = AMDSharkInference(
shark_module = SharkInference(
model,
test_input,
device=device,
@@ -222,9 +222,9 @@ def test_bench_xlm_roberta(dynamic, device):
)
try:
# If becnhmarking succesful, assert success/True.
amdshark_module.set_frontend("tensorflow")
amdshark_module.compile()
amdshark_module.benchmark_all(test_input)
shark_module.set_frontend("tensorflow")
shark_module.compile()
shark_module.benchmark_all(test_input)
assert True
except Exception as e:
# If anything happen during benchmarking, assert False/failure.

View File

@@ -1,5 +1,5 @@
import torch
from benchmarks.hf_transformer import AMDSharkHFBenchmarkRunner
from benchmarks.hf_transformer import SharkHFBenchmarkRunner
import importlib
import pytest
@@ -27,17 +27,17 @@ def test_HFbench_minilm_torch(dynamic, device):
model_name = "bert-base-uncased"
test_input = torch.randint(2, (1, 128))
try:
amdshark_module = AMDSharkHFBenchmarkRunner(
shark_module = SharkHFBenchmarkRunner(
model_name,
(test_input,),
jit_trace=True,
dynamic=dynamic,
device=device,
)
amdshark_module.benchmark_c()
amdshark_module.benchmark_python((test_input,))
amdshark_module.benchmark_torch(test_input)
amdshark_module.benchmark_onnx(test_input)
shark_module.benchmark_c()
shark_module.benchmark_python((test_input,))
shark_module.benchmark_torch(test_input)
shark_module.benchmark_onnx(test_input)
# If becnhmarking succesful, assert success/True.
assert True
except Exception as e:

View File

@@ -39,7 +39,7 @@ FROM base-${TARGETARCH}
ARG TARGETARCH
LABEL maintainer "AMDSHARK<stdin@nod.com>"
LABEL maintainer "SHARK<stdin@nod.com>"
# Register the ROCM package repository, and install rocm-dev package
ARG ROCM_VERSION=5.6

View File

@@ -15,7 +15,7 @@ Install Nvidia [Container and register it](https://docs.nvidia.com/datacenter/cl
Build docker with :
```
docker build . -f Dockerfile-ubuntu-22.04 -t amdshark/dev-22.04:5.6 --build-arg=ROCM_VERSION=5.6 --build-arg=AMDGPU_VERSION=5.6 --build-arg=APT_PREF="Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600" --build-arg=IMAGE_NAME=nvidia/cuda --build-arg=TARGETARCH=amd64
docker build . -f Dockerfile-ubuntu-22.04 -t shark/dev-22.04:5.6 --build-arg=ROCM_VERSION=5.6 --build-arg=AMDGPU_VERSION=5.6 --build-arg=APT_PREF="Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600" --build-arg=IMAGE_NAME=nvidia/cuda --build-arg=TARGETARCH=amd64
```
Run with:
@@ -23,19 +23,19 @@ Run with:
*CPU*
```
docker run -it docker.io/amdshark/dev-22.04:5.6
docker run -it docker.io/shark/dev-22.04:5.6
```
*Nvidia GPU*
```
docker run --rm -it --gpus all docker.io/amdshark/dev-22.04:5.6
docker run --rm -it --gpus all docker.io/shark/dev-22.04:5.6
```
*AMD GPUs*
```
docker run --device /dev/kfd --device /dev/dri docker.io/amdshark/dev-22.04:5.6
docker run --device /dev/kfd --device /dev/dri docker.io/shark/dev-22.04:5.6
```
More AMD instructions are [here](https://docs.amd.com/en/latest/deploy/docker.html)

View File

@@ -13,7 +13,7 @@ parser.add_argument("-n", "--newfile")
parser.add_argument(
"-g",
"--golden_url",
default="https://storage.googleapis.com/amdshark_tank/testdata/cyberpunk_fores_42_0_230119_021148.png",
default="https://storage.googleapis.com/shark_tank/testdata/cyberpunk_fores_42_0_230119_021148.png",
)
@@ -36,7 +36,7 @@ def compare_images(new_filename, golden_filename, upload=False):
"gsutil",
"cp",
new_filename,
"gs://amdshark_tank/testdata/builder/",
"gs://shark_tank/testdata/builder/",
]
)
raise AssertionError("new and golden not close")

View File

@@ -1,6 +1,6 @@
#!/bin/bash
IMPORTER=1 BENCHMARK=1 NO_BREVITAS=1 ./setup_venv.sh
source $GITHUB_WORKSPACE/amdshark.venv/bin/activate
source $GITHUB_WORKSPACE/shark.venv/bin/activate
python build_tools/stable_diffusion_testing.py --gen
python tank/generate_amdsharktank.py
python tank/generate_sharktank.py

View File

@@ -5,7 +5,7 @@ from apps.stable_diffusion.src.utils.resources import (
get_json_file,
)
from datetime import datetime as dt
from amdshark.amdshark_downloader import download_public_file
from shark.shark_downloader import download_public_file
from image_comparison import compare_images
import argparse
from glob import glob
@@ -178,7 +178,7 @@ def test_loop(
print("Successfully generated image")
os.makedirs("./test_images/golden/" + model_name, exist_ok=True)
download_public_file(
"gs://amdshark_tank/testdata/golden/" + model_name,
"gs://shark_tank/testdata/golden/" + model_name,
"./test_images/golden/" + model_name,
)
test_file_path = os.path.join(
@@ -239,7 +239,7 @@ def test_loop(
def prepare_artifacts():
gen_path = os.path.join(os.getcwd(), "gen_amdshark_tank")
gen_path = os.path.join(os.getcwd(), "gen_shark_tank")
if not os.path.isdir(gen_path):
os.mkdir(gen_path)
for dirname in os.listdir(os.getcwd()):

View File

@@ -1,5 +1,5 @@
def pytest_addoption(parser):
# Attaches AMDSHARK command-line arguments to the pytest machinery.
# Attaches SHARK command-line arguments to the pytest machinery.
parser.addoption(
"--benchmark",
action="store",
@@ -24,7 +24,7 @@ def pytest_addoption(parser):
"--save_repro",
action="store_true",
default="False",
help="Pass option to save reproduction artifacts to AMDSHARK/amdshark_tmp/test_case/",
help="Pass option to save reproduction artifacts to SHARK/shark_tmp/test_case/",
)
parser.addoption(
"--save_fails",
@@ -42,13 +42,13 @@ def pytest_addoption(parser):
"--update_tank",
action="store_true",
default="False",
help="Update local amdshark tank with latest artifacts if model artifact hash mismatched.",
help="Update local shark tank with latest artifacts if model artifact hash mismatched.",
)
parser.addoption(
"--force_update_tank",
action="store_true",
default="False",
help="Force-update local amdshark tank with artifacts from specified amdshark_tank URL (defaults to nightly).",
help="Force-update local shark tank with artifacts from specified shark_tank URL (defaults to nightly).",
)
parser.addoption(
"--ci_sha",
@@ -60,19 +60,19 @@ def pytest_addoption(parser):
"--local_tank_cache",
action="store",
default=None,
help="Specify the directory in which all downloaded amdshark_tank artifacts will be cached.",
help="Specify the directory in which all downloaded shark_tank artifacts will be cached.",
)
parser.addoption(
"--tank_url",
type=str,
default="gs://amdshark_tank/nightly",
help="URL to bucket from which to download AMDSHARK tank artifacts. Default is gs://amdshark_tank/latest",
default="gs://shark_tank/nightly",
help="URL to bucket from which to download SHARK tank artifacts. Default is gs://shark_tank/latest",
)
parser.addoption(
"--tank_prefix",
type=str,
default=None,
help="Prefix to gs://amdshark_tank/ model directories from which to download AMDSHARK tank artifacts. Default is nightly.",
help="Prefix to gs://shark_tank/ model directories from which to download SHARK tank artifacts. Default is nightly.",
)
parser.addoption(
"--benchmark_dispatches",

View File

@@ -28,7 +28,7 @@ include(FetchContent)
FetchContent_Declare(
iree
GIT_REPOSITORY https://github.com/nod-ai/srt.git
GIT_TAG amdshark
GIT_TAG shark
GIT_SUBMODULES_RECURSE OFF
GIT_SHALLOW OFF
GIT_PROGRESS ON

View File

@@ -1,7 +1,7 @@
# AMDSHARK C/C++ Samples
# SHARK C/C++ Samples
These C/C++ samples can be built using CMake. The samples depend on the main
AMDSHARK-Runtime project's C/C++ sources, including both the runtime and the compiler.
SHARK-Runtime project's C/C++ sources, including both the runtime and the compiler.
Individual samples may require additional dependencies. Watch CMake's output
for information about which you are missing for individual samples.
@@ -9,10 +9,10 @@ for information about which you are missing for individual samples.
On Windows we recommend using https://github.com/microsoft/vcpkg to download packages for
your system. The general setup flow looks like
*Install and activate AMDSHARK*
*Install and activate SHARK*
```bash
source amdshark.venv/bin/activate #follow main repo instructions to setup your venv
source shark.venv/bin/activate #follow main repo instructions to setup your venv
```
*Install Dependencies*
@@ -39,7 +39,7 @@ cmake --build build/
*Prepare the model*
```bash
wget https://storage.googleapis.com/amdshark_tank/latest/resnet50_tf/resnet50_tf.mlir
wget https://storage.googleapis.com/shark_tank/latest/resnet50_tf/resnet50_tf.mlir
iree-compile --iree-input-type=auto --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --iree-llvmcpu-embedded-linker-path=`python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])'`/iree/compiler/tools/../_mlir_libs/iree-lld --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=ist/core-reproducer.mlir --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux resnet50_tf.mlir -o resnet50_tf.vmfb
```
*Prepare the input*
@@ -64,19 +64,19 @@ A tool for benchmarking other models is built and can be invoked with a command
```
see `./build/vulkan_gui/iree-vulkan-gui --help` for an explanation on the function input. For example, stable diffusion unet can be tested with the following commands:
```bash
wget https://storage.googleapis.com/amdshark_tank/quinn/stable_diff_tf/stable_diff_tf.mlir
wget https://storage.googleapis.com/shark_tank/quinn/stable_diff_tf/stable_diff_tf.mlir
iree-compile --iree-input-type=auto --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux stable_diff_tf.mlir -o stable_diff_tf.vmfb
./build/vulkan_gui/iree-vulkan-gui --module-file=stable_diff_tf.vmfb --function_input=2x4x64x64xf32 --function_input=1xf32 --function_input=2x77x768xf32
```
VAE and Autoencoder are also available
```bash
# VAE
wget https://storage.googleapis.com/amdshark_tank/quinn/stable_diff_tf/vae_tf/vae.mlir
wget https://storage.googleapis.com/shark_tank/quinn/stable_diff_tf/vae_tf/vae.mlir
iree-compile --iree-input-type=auto --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux vae.mlir -o vae.vmfb
./build/vulkan_gui/iree-vulkan-gui --module-file=stable_diff_tf.vmfb --function_input=1x4x64x64xf32
# CLIP Autoencoder
wget https://storage.googleapis.com/amdshark_tank/quinn/stable_diff_tf/clip_tf/clip_autoencoder.mlir
wget https://storage.googleapis.com/shark_tank/quinn/stable_diff_tf/clip_tf/clip_autoencoder.mlir
iree-compile --iree-input-type=auto --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux clip_autoencoder.mlir -o clip_autoencoder.vmfb
./build/vulkan_gui/iree-vulkan-gui --module-file=stable_diff_tf.vmfb --function_input=1x77xi32 --function_input=1x77xi32
```

View File

@@ -1,6 +1,6 @@
import numpy as np
import tensorflow as tf
from amdshark.amdshark_inference import AMDSharkInference
from shark.shark_inference import SharkInference
def load_and_preprocess_image(fname: str):

View File

@@ -1,12 +1,12 @@
# Dataset annotation tool
AMDSHARK annotator for adding or modifying prompts of dataset images
SHARK annotator for adding or modifying prompts of dataset images
## Set up
Activate AMDSHARK Python virtual environment and install additional packages
Activate SHARK Python virtual environment and install additional packages
```shell
source ../amdshark.venv/bin/activate
source ../shark.venv/bin/activate
pip install -r requirements.txt
```

View File

@@ -8,12 +8,12 @@ from PIL import Image
from utils import get_datasets
amdshark_root = Path(__file__).parent.parent
demo_css = amdshark_root.joinpath("web/demo.css").resolve()
nodlogo_loc = amdshark_root.joinpath("web/models/stable_diffusion/logos/amd-logo.jpg")
shark_root = Path(__file__).parent.parent
demo_css = shark_root.joinpath("web/demo.css").resolve()
nodlogo_loc = shark_root.joinpath("web/models/stable_diffusion/logos/nod-logo.png")
with gr.Blocks(title="Dataset Annotation Tool", css=demo_css) as amdshark_web:
with gr.Blocks(title="Dataset Annotation Tool", css=demo_css) as shark_web:
with gr.Row(elem_id="ui_title"):
nod_logo = Image.open(nodlogo_loc)
with gr.Column(scale=1, elem_id="demo_title_outer"):
@@ -62,7 +62,7 @@ with gr.Blocks(title="Dataset Annotation Tool", css=demo_css) as amdshark_web:
return gr.Dropdown.update(value=None, choices=[])
# create the dataset dir if doesn't exist and download prompt file
dataset_path = str(amdshark_root) + "/dataset/" + dataset
dataset_path = str(shark_root) + "/dataset/" + dataset
if not os.path.exists(dataset_path):
os.mkdir(dataset_path)
@@ -89,7 +89,7 @@ with gr.Blocks(title="Dataset Annotation Tool", css=demo_css) as amdshark_web:
img_gs_path = args.gs_url + "/" + dataset + "/" + image_name
img_sub_path = "/".join(image_name.split("/")[:-1])
img_dst_path = (
str(amdshark_root) + "/dataset/" + dataset + "/" + img_sub_path + "/"
str(shark_root) + "/dataset/" + dataset + "/" + img_sub_path + "/"
)
if not os.path.exists(img_dst_path):
os.mkdir(img_dst_path)
@@ -126,7 +126,7 @@ with gr.Blocks(title="Dataset Annotation Tool", css=demo_css) as amdshark_web:
idx = prompt_data[image_name].index(prompts)
prompt_data[image_name][idx] = prompt
prompt_path = str(amdshark_root) + "/dataset/" + dataset + "/metadata.jsonl"
prompt_path = str(shark_root) + "/dataset/" + dataset + "/metadata.jsonl"
# write prompt jsonlines file
with open(prompt_path, "w") as f:
for key, value in prompt_data.items():
@@ -153,7 +153,7 @@ with gr.Blocks(title="Dataset Annotation Tool", css=demo_css) as amdshark_web:
return
prompt_data[image_name].remove(prompts)
prompt_path = str(amdshark_root) + "/dataset/" + dataset + "/metadata.jsonl"
prompt_path = str(shark_root) + "/dataset/" + dataset + "/metadata.jsonl"
# write prompt jsonlines file
with open(prompt_path, "w") as f:
for key, value in prompt_data.items():
@@ -178,7 +178,7 @@ with gr.Blocks(title="Dataset Annotation Tool", css=demo_css) as amdshark_web:
return
# remove local image
img_path = str(amdshark_root) + "/dataset/" + dataset + "/" + image_name
img_path = str(shark_root) + "/dataset/" + dataset + "/" + image_name
os.system(f'rm "{img_path}"')
# get the index for the back image
idx = images[dataset].index(image_name)
@@ -196,7 +196,7 @@ with gr.Blocks(title="Dataset Annotation Tool", css=demo_css) as amdshark_web:
return
# remove local image
img_path = str(amdshark_root) + "/dataset/" + dataset + "/" + image_name
img_path = str(shark_root) + "/dataset/" + dataset + "/" + image_name
os.system(f'rm "{img_path}"')
# get the index for the next image
idx = images[dataset].index(image_name)
@@ -214,7 +214,7 @@ with gr.Blocks(title="Dataset Annotation Tool", css=demo_css) as amdshark_web:
return
# upload prompt and remove local data
dataset_path = str(amdshark_root) + "/dataset/" + dataset
dataset_path = str(shark_root) + "/dataset/" + dataset
dataset_gs_path = args.gs_url + "/" + dataset + "/"
os.system(f'gsutil cp "{dataset_path}/metadata.jsonl" "{dataset_gs_path}"')
os.system(f'rm -rf "{dataset_path}"')
@@ -225,7 +225,7 @@ with gr.Blocks(title="Dataset Annotation Tool", css=demo_css) as amdshark_web:
if __name__ == "__main__":
amdshark_web.launch(
shark_web.launch(
share=args.share,
inbrowser=True,
server_name="0.0.0.0",

View File

@@ -1,3 +1,3 @@
# AMDSHARK Annotator
# SHARK Annotator
gradio==4.19.2
jsonlines

View File

@@ -1,23 +1,23 @@
# Overview
This document is intended to provide a starting point for profiling with AMDSHARK/IREE. At it's core
[AMDSHARK](https://github.com/nod-ai/AMD-SHARK-Studio/tree/main/tank) is a python API that links the MLIR lowerings from various
This document is intended to provide a starting point for profiling with SHARK/IREE. At it's core
[SHARK](https://github.com/nod-ai/SHARK/tree/main/tank) is a python API that links the MLIR lowerings from various
frameworks + frontends (e.g. PyTorch -> Torch-MLIR) with the compiler + runtime offered by IREE. More information
on model coverage and framework support can be found [here](https://github.com/nod-ai/AMD-SHARK-Studio/tree/main/tank). The intended
use case for AMDSHARK is for compilation and deployment of performant state of the art AI models.
on model coverage and framework support can be found [here](https://github.com/nod-ai/SHARK/tree/main/tank). The intended
use case for SHARK is for compilation and deployment of performant state of the art AI models.
![image](https://user-images.githubusercontent.com/22101546/217151219-9bb184a3-cfb9-4788-bb7e-5b502953525c.png)
## Benchmarking with AMDSHARK
## Benchmarking with SHARK
TODO: Expand this section.
AMDSHARK offers native benchmarking support, although because it is model focused, fine grain profiling is
hidden when compared against the common "model benchmarking suite" use case AMDSHARK is good at.
SHARK offers native benchmarking support, although because it is model focused, fine grain profiling is
hidden when compared against the common "model benchmarking suite" use case SHARK is good at.
### AMDSharkBenchmarkRunner
### SharkBenchmarkRunner
AMDSharkBenchmarkRunner is a class designed for benchmarking models against other runtimes.
SharkBenchmarkRunner is a class designed for benchmarking models against other runtimes.
TODO: List supported runtimes for comparison + example on how to benchmark with it.
## Directly profiling IREE
@@ -30,11 +30,11 @@ focus on the bridging the gap between the two.
- https://github.com/iree-org/iree/blob/main/docs/developers/developing_iree/profiling_vulkan_gpu.md
- https://github.com/iree-org/iree/blob/main/docs/developers/developing_iree/profiling_cpu_events.md
Internally, AMDSHARK builds a pair of IREE commands to compile + run a model. At a high level the flow starts with the
Internally, SHARK builds a pair of IREE commands to compile + run a model. At a high level the flow starts with the
model represented with a high level dialect (commonly Linalg) and is compiled to a flatbuffer (.vmfb) that
the runtime is capable of ingesting. At this point (with potentially a few runtime flags) the compiled model is then run
through the IREE runtime. This is all facilitated with the IREE python bindings, which offers a convenient method
to capture the compile command AMDSHARK comes up with. This is done by setting the environment variable
to capture the compile command SHARK comes up with. This is done by setting the environment variable
`IREE_SAVE_TEMPS` to point to a directory of choice, e.g. for stable diffusion
```
# Linux
@@ -63,7 +63,7 @@ Where `${NUM}` is the dispatch number that you want to benchmark/profile in isol
### Enabling Tracy for Vulkan profiling
To begin profiling with Tracy, a build of IREE runtime with tracing enabled is needed. AMDSHARK-Runtime (SRT) builds an
To begin profiling with Tracy, a build of IREE runtime with tracing enabled is needed. SHARK-Runtime (SRT) builds an
instrumented version alongside the normal version nightly (.whls typically found [here](https://github.com/nod-ai/SRT/releases)), however this is only available for Linux. For Windows, tracing can be enabled by enabling a CMake flag.
```
$env:IREE_ENABLE_RUNTIME_TRACING="ON"
@@ -92,7 +92,7 @@ The trace can then be viewed with
iree-tracy-profiler /path/to/trace.tracy
```
Capturing a runtime trace will work with any IREE tooling that uses the runtime. For example, `iree-benchmark-module`
can be used for benchmarking an individual module. Importantly this means that any AMDSHARK script can be profiled with tracy.
can be used for benchmarking an individual module. Importantly this means that any SHARK script can be profiled with tracy.
NOTE: Not all backends have the same tracy support. This writeup is focused on CPU/Vulkan backends but there is recently added support for tracing on CUDA (requires the `--cuda_tracing` flag).

View File

@@ -1,21 +1,21 @@
# Overview
This document is intended to provide a starting point for using AMDSHARK stable diffusion with Blender.
This document is intended to provide a starting point for using SHARK stable diffusion with Blender.
We currently make use of the [AI-Render Plugin](https://github.com/benrugg/AI-Render) to integrate with Blender.
## Setup AMDSHARK and prerequisites:
## Setup SHARK and prerequisites:
* Download the latest AMDSHARK SD webui .exe from [here](https://github.com/nod-ai/AMD-SHARK-Studio/releases) or follow instructions on the [README](https://github.com/nod-ai/AMD-SHARK-Studio#readme)
* Once you have the .exe where you would like AMDSHARK to install, run the .exe from terminal/PowerShell with the `--api` flag:
* Download the latest SHARK SD webui .exe from [here](https://github.com/nod-ai/SHARK/releases) or follow instructions on the [README](https://github.com/nod-ai/SHARK#readme)
* Once you have the .exe where you would like SHARK to install, run the .exe from terminal/PowerShell with the `--api` flag:
```
## Run the .exe in API mode:
.\amdshark_sd_<date>_<ver>.exe --api
.\shark_sd_<date>_<ver>.exe --api
## For example:
.\amdshark_sd_20230411_671.exe --api --server_port=8082
.\shark_sd_20230411_671.exe --api --server_port=8082
## From a the base directory of a source clone of AMDSHARK:
## From a the base directory of a source clone of SHARK:
./setup_venv.ps1
python apps\stable_diffusion\web\index.py --api
@@ -38,7 +38,7 @@ Your AI-Render preferences should be configured as shown; the highlighted part s
The [AI-Render README](https://github.com/benrugg/AI-Render/blob/main/README.md) has more details on installation and usage, as well as video tutorials.
## Using AI-Render + AMDSHARK in your Blender project
## Using AI-Render + SHARK in your Blender project
- In the Render Properties tab, in the AI-Render dropdown, enable AI-Render.
@@ -48,7 +48,7 @@ The [AI-Render README](https://github.com/benrugg/AI-Render/blob/main/README.md)
![image](https://user-images.githubusercontent.com/87458719/231394288-0c4ab8c5-dc30-4dbe-8bc1-7520ded5efe8.png)
- From here, you can enter a prompt and configure img2img Stable Diffusion parameters, and AI-Render will run AMDSHARK SD img2img on the rendered scene.
- From here, you can enter a prompt and configure img2img Stable Diffusion parameters, and AI-Render will run SHARK SD img2img on the rendered scene.
- AI-Render has useful presets for aesthetic styles, so you should be able to keep your subject prompt simple and focus on creating a decent Blender scene to start from.
![image](https://user-images.githubusercontent.com/87458719/231440729-2fe69586-41cb-4274-9ce7-f6c08def600b.png)

View File

@@ -1,6 +1,6 @@
# Overview
In [1.47.2](https://github.com/LostRuins/koboldcpp/releases/tag/v1.47.2) [Koboldcpp](https://github.com/LostRuins/koboldcpp) added AUTOMATIC1111 integration for image generation. Since AMDSHARK implements a small subset of the A1111 REST api, you can also use AMDSHARK for this. This document gives a starting point for how to get this working.
In [1.47.2](https://github.com/LostRuins/koboldcpp/releases/tag/v1.47.2) [Koboldcpp](https://github.com/LostRuins/koboldcpp) added AUTOMATIC1111 integration for image generation. Since SHARK implements a small subset of the A1111 REST api, you can also use SHARK for this. This document gives a starting point for how to get this working.
## In Action
@@ -8,52 +8,52 @@ In [1.47.2](https://github.com/LostRuins/koboldcpp/releases/tag/v1.47.2) [Kobold
## Memory considerations
Since both Koboldcpp and AMDSHARK will use VRAM on your graphic card(s) running both at the same time using the same card will impose extra limitations on the model size you can fully offload to the video card in Koboldcpp. For me, on a RX 7900 XTX on Windows with 24 GiB of VRAM, the limit was about a 13 Billion parameter model with Q5_K_M quantisation.
Since both Koboldcpp and SHARK will use VRAM on your graphic card(s) running both at the same time using the same card will impose extra limitations on the model size you can fully offload to the video card in Koboldcpp. For me, on a RX 7900 XTX on Windows with 24 GiB of VRAM, the limit was about a 13 Billion parameter model with Q5_K_M quantisation.
## Performance Considerations
When using AMDSHARK for image generation, especially with Koboldcpp, you need to be aware that it is currently designed to pay a large upfront cost in time compiling and tuning the model you select, to get an optimal individual image generation time. You need to be the judge as to whether this trade-off is going to be worth it for your OS and hardware combination.
When using SHARK for image generation, especially with Koboldcpp, you need to be aware that it is currently designed to pay a large upfront cost in time compiling and tuning the model you select, to get an optimal individual image generation time. You need to be the judge as to whether this trade-off is going to be worth it for your OS and hardware combination.
It means that the first time you run a particular Stable Diffusion model for a particular combination of image size, LoRA, and VAE, AMDSHARK will spend *many minutes* - even on a beefy machaine with very fast graphics card with lots of memory - building that model combination just so it can save it to disk. It may even have to go away and download the model if it doesn't already have it locally. Once it has done its build of a model combination for your hardware once, it shouldn't need to do it again until you upgrade to a newer AMDSHARK version, install different drivers or change your graphics hardware. It will just upload the files it generated the first time to your graphics card and proceed from there.
It means that the first time you run a particular Stable Diffusion model for a particular combination of image size, LoRA, and VAE, SHARK will spend *many minutes* - even on a beefy machaine with very fast graphics card with lots of memory - building that model combination just so it can save it to disk. It may even have to go away and download the model if it doesn't already have it locally. Once it has done its build of a model combination for your hardware once, it shouldn't need to do it again until you upgrade to a newer SHARK version, install different drivers or change your graphics hardware. It will just upload the files it generated the first time to your graphics card and proceed from there.
This does mean however, that on a brand new fresh install of AMDSHARK that has not generated any images on a model you haven't selected before, the first image Koboldcpp requests may look like it is *never* going finish and that the whole process has broken. Be forewarned, make yourself a cup of coffee, and expect a lot of messages about compilation and tuning from AMDSHARK in the terminal you ran it from.
This does mean however, that on a brand new fresh install of SHARK that has not generated any images on a model you haven't selected before, the first image Koboldcpp requests may look like it is *never* going finish and that the whole process has broken. Be forewarned, make yourself a cup of coffee, and expect a lot of messages about compilation and tuning from SHARK in the terminal you ran it from.
## Setup AMDSHARK and prerequisites:
## Setup SHARK and prerequisites:
* Make sure you have suitable drivers for your graphics card installed. See the prerequisties section of the [README](https://github.com/nod-ai/AMD-SHARK-Studio#readme).
* Download the latest AMDSHARK studio .exe from [here](https://github.com/nod-ai/AMD-SHARK-Studio/releases) or follow the instructions in the [README](https://github.com/nod-ai/AMD-SHARK-Studio#readme) for an advanced, Linux or Mac install.
* Run AMDSHARK from terminal/PowerShell with the `--api` flag. Since koboldcpp also expects both CORS support and the image generator to be running on port `7860` rather than AMDSHARK default of `8080`, also include both the `--api_accept_origin` flag with a suitable origin (use `="*"` to enable all origins) and `--server_port=7860` on the command line. (See the if you want to run AMDSHARK on a different port)
* Make sure you have suitable drivers for your graphics card installed. See the prerequisties section of the [README](https://github.com/nod-ai/SHARK#readme).
* Download the latest SHARK studio .exe from [here](https://github.com/nod-ai/SHARK/releases) or follow the instructions in the [README](https://github.com/nod-ai/SHARK#readme) for an advanced, Linux or Mac install.
* Run SHARK from terminal/PowerShell with the `--api` flag. Since koboldcpp also expects both CORS support and the image generator to be running on port `7860` rather than SHARK default of `8080`, also include both the `--api_accept_origin` flag with a suitable origin (use `="*"` to enable all origins) and `--server_port=7860` on the command line. (See the if you want to run SHARK on a different port)
```powershell
## Run the .exe in API mode, with CORS support, on the A1111 endpoint port:
.\node_ai_amdshark_studio_<date>_<ver>.exe --api --api_accept_origin="*" --server_port=7860
.\node_ai_shark_studio_<date>_<ver>.exe --api --api_accept_origin="*" --server_port=7860
## Run trom the base directory of a source clone of AMDSHARK on Windows:
## Run trom the base directory of a source clone of SHARK on Windows:
.\setup_venv.ps1
python .\apps\stable_diffusion\web\index.py --api --api_accept_origin="*" --server_port=7860
## Run a the base directory of a source clone of AMDSHARK on Linux:
## Run a the base directory of a source clone of SHARK on Linux:
./setup_venv.sh
source amdshark.venv/bin/activate
source shark.venv/bin/activate
python ./apps/stable_diffusion/web/index.py --api --api_accept_origin="*" --server_port=7860
## An example giving improved performance on AMD cards using vulkan, that runs on the same port as A1111
.\node_ai_amdshark_studio_20320901_2525.exe --api --api_accept_origin="*" --device_allocator="caching" --server_port=7860
.\node_ai_shark_studio_20320901_2525.exe --api --api_accept_origin="*" --device_allocator="caching" --server_port=7860
## Since the api respects most applicable AMDSHARK command line arguments for options not specified,
## Since the api respects most applicable SHARK command line arguments for options not specified,
## or currently unimplemented by API, there might be some you want to set, as listed in `--help`
.\node_ai_amdshark_studio_20320901_2525.exe --help
.\node_ai_shark_studio_20320901_2525.exe --help
## For instance, the example above, but with a a custom VAE specified
.\node_ai_amdshark_studio_20320901_2525.exe --api --api_accept_origin="*" --device_allocator="caching" --server_port=7860 --custom_vae="clearvae_v23.safetensors"
.\node_ai_shark_studio_20320901_2525.exe --api --api_accept_origin="*" --device_allocator="caching" --server_port=7860 --custom_vae="clearvae_v23.safetensors"
## An example with multiple specific CORS origins
python apps/stable_diffusion/web/index.py --api --api_accept_origin="koboldcpp.example.com:7001" --api_accept_origin="koboldcpp.example.com:7002" --server_port=7860
```
AMDSHARK should start in server mode, and you should see something like this:
SHARK should start in server mode, and you should see something like this:
![AMDSHARK API startup](https://user-images.githubusercontent.com/121311569/280556294-c3f7fc1a-c8e2-467d-afe6-365638d6823a.png)
![SHARK API startup](https://user-images.githubusercontent.com/121311569/280556294-c3f7fc1a-c8e2-467d-afe6-365638d6823a.png)
* Note: When running in api mode with `--api`, the .exe will not function as a webUI. Thus, the address or port shown in the terminal output will only be useful for API requests.
@@ -68,9 +68,9 @@ AMDSHARK should start in server mode, and you should see something like this:
![Advanced Settings with 'Local A1111' location](https://user-images.githubusercontent.com/121311569/280556234-6ebc8ba7-1469-442a-93a7-5626a094ddf1.png)
*if you get an error here, see the next section [below](#connecting-to-amdshark-on-a-different-address-or-port)*
*if you get an error here, see the next section [below](#connecting-to-shark-on-a-different-address-or-port)*
* A list of Stable Diffusion models available to your AMDSHARK instance should now be listed in the box below *generate images*. The default value will usually be set to `stabilityai/stable-diffusion-2-1-base`. Choose the model you want to use for image generation from the list (but see [performance considerations](#performance-considerations)).
* A list of Stable Diffusion models available to your SHARK instance should now be listed in the box below *generate images*. The default value will usually be set to `stabilityai/stable-diffusion-2-1-base`. Choose the model you want to use for image generation from the list (but see [performance considerations](#performance-considerations)).
* You should now be ready to generate images, either by clicking the 'Add Img' button above the text entry box:
![Add Image Button](https://user-images.githubusercontent.com/121311569/280556161-846c7883-4a83-4458-a56a-bd9f93ca354c.png)
@@ -85,18 +85,18 @@ AMDSHARK should start in server mode, and you should see something like this:
![Selecting the 'styles' button](https://user-images.githubusercontent.com/121311569/280556694-55cd1c55-a059-4b54-9293-63d66a32368e.png)
This will bring up a dialog box where you can enter a short text that will sent as a prefix to the Prompt sent to AMDSHARK:
This will bring up a dialog box where you can enter a short text that will sent as a prefix to the Prompt sent to SHARK:
![Entering extra image styles](https://user-images.githubusercontent.com/121311569/280556172-4aab9794-7a77-46d7-bdda-43df570ad19a.png)
## Connecting to AMDSHARK on a different address or port
## Connecting to SHARK on a different address or port
If you didn't set the port to `--server_port=7860` when starting AMDSHARK, or you are running it on different machine on your network than you are running Koboldcpp, or to where you are running the koboldcpp's kdlite client frontend, then you very likely got the following error:
If you didn't set the port to `--server_port=7860` when starting SHARK, or you are running it on different machine on your network than you are running Koboldcpp, or to where you are running the koboldcpp's kdlite client frontend, then you very likely got the following error:
![Can't find the A1111 endpoint error](https://user-images.githubusercontent.com/121311569/280555857-601f53dc-35e9-4027-9180-baa61d2393ba.png)
As long as AMDSHARK is running correctly, this means you need to set the url and port to the correct values in Koboldcpp. For instance. to set the port that Koboldcpp looks for an image generator to AMDSHARK's default port of 8080:
As long as SHARK is running correctly, this means you need to set the url and port to the correct values in Koboldcpp. For instance. to set the port that Koboldcpp looks for an image generator to SHARK's default port of 8080:
* Select the cog icon the Generate Images section of Advanced settings:
@@ -106,7 +106,7 @@ As long as AMDSHARK is running correctly, this means you need to set the url and
![Changing the endpoint port](https://user-images.githubusercontent.com/121311569/280556170-f8848b7b-6fc9-4cf7-80eb-5c312f332fd9.png)
* Similarly, when running AMDSHARK on a different machine you will need to change host part of the endpoint url to the hostname or ip address where AMDSHARK is running, similarly:
* Similarly, when running SHARK on a different machine you will need to change host part of the endpoint url to the hostname or ip address where SHARK is running, similarly:
![Changing the endpoint hostname](https://user-images.githubusercontent.com/121311569/280556167-c6541dea-0f85-417a-b661-fdf4dc40d05f.png)
@@ -125,16 +125,16 @@ And the same image when clicked on:
![A selected image](https://user-images.githubusercontent.com/121311569/280556216-2ca4c0a4-3889-4ef5-8a09-30084fb34081.jpg)
## Where to find the images in AMDSHARK
## Where to find the images in SHARK
Even though Koboldcpp requests images at a size of 512x512, it resizes then to 256x256, converts them to `.jpeg`, and only shows them at 200x200 in the main text window. It does this so it can save them compactly embedded in your story as a `data://` uri.
However the images at the original size are saved by AMDSHARK in its `output_dir` which is usually a folder named for the current date. inside `generated_imgs` folder in the AMDSHARK installation directory.
However the images at the original size are saved by SHARK in its `output_dir` which is usually a folder named for the current date. inside `generated_imgs` folder in the SHARK installation directory.
You can browse these, either using the Output Gallery tab from within the AMDSHARK web ui:
You can browse these, either using the Output Gallery tab from within the SHARK web ui:
![AMDSHARK web ui output gallery tab](https://user-images.githubusercontent.com/121311569/280556582-9303ca85-2594-4a8c-97a2-fbd72337980b.jpg)
![SHARK web ui output gallery tab](https://user-images.githubusercontent.com/121311569/280556582-9303ca85-2594-4a8c-97a2-fbd72337980b.jpg)
...or by browsing to the `output_dir` in your operating system's file manager:
![AMDSHARK output directory subfolder in Windows File Explorer](https://user-images.githubusercontent.com/121311569/280556297-66173030-2324-415c-a236-ef3fcd73e6ed.jpg)
![SHARK output directory subfolder in Windows File Explorer](https://user-images.githubusercontent.com/121311569/280556297-66173030-2324-415c-a236-ef3fcd73e6ed.jpg)

View File

@@ -1,6 +1,6 @@
# This script will toggle the comment/uncommenting aspect for dealing
# with __file__ AttributeError arising in case of a few modules in
# `torch/_dynamo/skipfiles.py` (within amdshark.venv)
# `torch/_dynamo/skipfiles.py` (within shark.venv)
from distutils.sysconfig import get_python_lib
import fileinput

View File

@@ -17,12 +17,12 @@ exclude = '''
/(
| apps/stable_diffusion
| apps/language_models
| amdshark
| shark
| benchmarks
| tank
| build
| generated_imgs
| amdshark.venv
| shark.venv
)/
| setup.py
)

View File

@@ -1,3 +1,3 @@
[pytest]
addopts = --verbose -s -p no:warnings
norecursedirs = inference tank/tflite examples benchmarks amdshark apps/amdshark_studio
norecursedirs = inference tank/tflite examples benchmarks shark apps/shark_studio

View File

@@ -1,25 +1,24 @@
-f https://download.pytorch.org/whl/nightly/cpu
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
-f https://iree.dev/pip-release-links.html
--pre
setuptools
wheel
torch==2.3.0
iree-turbine @ git+https://github.com/iree-org/iree-turbine.git@main
turbine-models @ git+https://github.com/nod-ai/AMD-SHARK-ModelDev.git@main#subdirectory=models
diffusers @ git+https://github.com/nod-ai/diffusers@0.29.0.dev0-amdshark
brevitas @ git+https://github.com/Xilinx/brevitas.git@6695e8df7f6a2c7715b9ed69c4b78157376bb60b
shark-turbine @ git+https://github.com/iree-org/iree-turbine.git@main
turbine-models @ git+https://github.com/nod-ai/SHARK-Turbine.git@main#subdirectory=models
# AMDSHARK Runner
# SHARK Runner
tqdm
# AMDSHARK Downloader
# SHARK Downloader
google-cloud-storage
# Testing
pytest
pytest-xdist
pytest-forked
Pillow
parameterized
@@ -27,10 +26,8 @@ parameterized
#accelerate is now required for diffusers import from ckpt.
accelerate
scipy
transformers==4.37.1
torchsde # Required for Stable Diffusion SDE schedulers.
ftfy
gradio==4.29.0
gradio==4.19.2
altair
omegaconf
# 0.3.2 doesn't have binaries for arm64

View File

@@ -321,8 +321,8 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=(
"Exercises the Stable Diffusion REST API of AMDShark. Make sure "
"AMDShark is running in API mode on 127.0.0.1:8080 before running"
"Exercises the Stable Diffusion REST API of Shark. Make sure "
"Shark is running in API mode on 127.0.0.1:8080 before running"
"this script."
),
)

View File

@@ -7,21 +7,21 @@ import glob
with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
PACKAGE_VERSION = os.environ.get("AMDSHARK_PACKAGE_VERSION") or "2.0.0"
PACKAGE_VERSION = os.environ.get("SHARK_PACKAGE_VERSION") or "2.0.0"
backend_deps = []
setup(
name="nodai-AMDSHARK",
name="nodai-SHARK",
version=f"{PACKAGE_VERSION}",
description="AMDSHARK provides a High Performance Machine Learning Framework",
description="SHARK provides a High Performance Machine Learning Framework",
author="nod.ai",
author_email="stdin@nod.ai",
url="https://nod.ai",
long_description=long_description,
long_description_content_type="text/markdown",
project_urls={
"Code": "https://github.com/nod-ai/AMD-SHARK-Studio",
"Bug Tracker": "https://github.com/nod-ai/AMD-SHARK-Studio/issues",
"Code": "https://github.com/nod-ai/SHARK",
"Bug Tracker": "https://github.com/nod-ai/SHARK/issues",
},
classifiers=[
"Programming Language :: Python :: 3",

View File

@@ -1,9 +1,9 @@
<#
.SYNOPSIS
A script to update and install the AMDSHARK runtime and its dependencies.
A script to update and install the SHARK runtime and its dependencies.
.DESCRIPTION
This script updates and installs the AMDSHARK runtime and its dependencies.
This script updates and installs the SHARK runtime and its dependencies.
It checks the Python version installed and installs any required build
dependencies into a Python virtual environment.
If that environment does not exist, it creates it.
@@ -40,11 +40,11 @@ if ($arguments -eq "--force"){
Deactivate
}
if (Test-Path .\amdshark.venv\) {
if (Test-Path .\shark.venv\) {
Write-Host "removing and recreating venv..."
Remove-Item .\amdshark.venv -Force -Recurse
if (Test-Path .\amdshark.venv\) {
Write-Host 'could not remove .\amdshark-venv - please try running ".\setup_venv.ps1 --force" again!'
Remove-Item .\shark.venv -Force -Recurse
if (Test-Path .\shark.venv\) {
Write-Host 'could not remove .\shark-venv - please try running ".\setup_venv.ps1 --force" again!'
exit 1
}
}
@@ -83,13 +83,10 @@ if (!($PyVer -like "*3.11*") -and !($p -like "*3.11*")) # if 3.11 is not in any
Write-Host "Installing Build Dependencies"
# make sure we really use 3.11 from list, even if it's not the default.
if ($NULL -ne $PyVer) {py -3.11 -m venv .\amdshark.venv\}
else {python -m venv .\amdshark.venv\}
.\amdshark.venv\Scripts\activate
if ($NULL -ne $PyVer) {py -3.11 -m venv .\shark.venv\}
else {python -m venv .\shark.venv\}
.\shark.venv\Scripts\activate
python -m pip install --upgrade pip
pip install wheel
pip install --pre -r requirements.txt
pip install --force-reinstall https://github.com/nod-ai/SRT/releases/download/candidate-20240528.279/iree_compiler-20240528.279-cp311-cp311-win_amd64.whl https://github.com/nod-ai/SRT/releases/download/candidate-20240528.279/iree_runtime-20240528.279-cp311-cp311-win_amd64.whl
pip install -e .
Write-Host "Source your venv with ./amdshark.venv/Scripts/activate"
pip install -r requirements.txt
Write-Host "Source your venv with ./shark.venv/Scripts/activate"

View File

@@ -1,15 +1,15 @@
#!/bin/bash
# Sets up a venv suitable for running samples.
# e.g:
# ./setup_venv.sh #setup a default $PYTHON3 amdshark.venv
# ./setup_venv.sh #setup a default $PYTHON3 shark.venv
# Environment variables used by the script.
# PYTHON=$PYTHON3.10 ./setup_venv.sh #pass a version of $PYTHON to use
# VENV_DIR=myamdshark.venv #create a venv called myamdshark.venv
# VENV_DIR=myshark.venv #create a venv called myshark.venv
# SKIP_VENV=1 #Don't create and activate a Python venv. Use the current environment.
# USE_IREE=1 #use stock IREE instead of Nod.ai's AMDSHARK build
# USE_IREE=1 #use stock IREE instead of Nod.ai's SHARK build
# IMPORTER=1 #Install importer deps
# BENCHMARK=1 #Install benchmark deps
# NO_BACKEND=1 #Don't install iree or amdshark backend
# NO_BACKEND=1 #Don't install iree or shark backend
# if you run the script from a conda env it will install in your conda env
TD="$(cd $(dirname $0) && pwd)"
@@ -35,7 +35,7 @@ fi
if [[ "$SKIP_VENV" != "1" ]]; then
if [[ -z "${CONDA_PREFIX}" ]]; then
# Not a conda env. So create a new VENV dir
VENV_DIR=${VENV_DIR:-amdshark.venv}
VENV_DIR=${VENV_DIR:-shark.venv}
echo "Using pip venv.. Setting up venv dir: $VENV_DIR"
$PYTHON -m venv "$VENV_DIR" || die "Could not create venv."
source "$VENV_DIR/bin/activate" || die "Could not activate venv"
@@ -84,7 +84,21 @@ else
PYTORCH_URL=https://download.pytorch.org/whl/nightly/cpu/
fi
$PYTHON -m pip install --no-warn-conflicts -e . -f ${RUNTIME} -f ${PYTORCH_URL}
$PYTHON -m pip install --no-warn-conflicts -e . -f https://llvm.github.io/torch-mlir/package-index/ -f ${RUNTIME} -f ${PYTORCH_URL}
if [[ $(uname -s) = 'Linux' && ! -z "${IMPORTER}" ]]; then
T_VER=$($PYTHON -m pip show torch | grep Version)
T_VER_MIN=${T_VER:14:12}
TV_VER=$($PYTHON -m pip show torchvision | grep Version)
TV_VER_MAJ=${TV_VER:9:6}
$PYTHON -m pip uninstall -y torchvision
$PYTHON -m pip install torchvision==${TV_VER_MAJ}${T_VER_MIN} --no-deps -f https://download.pytorch.org/whl/nightly/cpu/torchvision/
if [ $? -eq 0 ];then
echo "Successfully Installed torch + cu118."
else
echo "Could not install torch + cu118." >&2
fi
fi
if [[ -z "${NO_BREVITAS}" ]]; then
$PYTHON -m pip install git+https://github.com/Xilinx/brevitas.git@dev

28
shark/__init__.py Normal file
View File

@@ -0,0 +1,28 @@
import importlib
import logging
from torch._dynamo import register_backend
log = logging.getLogger(__name__)
@register_backend
def shark(model, inputs, *, options):
try:
from shark.dynamo_backend.utils import SharkBackend
except ImportError:
log.exception(
"Unable to import SHARK - High Performance Machine Learning Distribution"
"Please install the right version of SHARK that matches the PyTorch version being used. "
"Refer to https://github.com/nod-ai/SHARK/ for details."
)
raise
return SharkBackend(model, inputs, options)
def has_shark():
try:
importlib.import_module("shark")
return True
except ImportError:
return False

View File

@@ -71,7 +71,7 @@ class MakeFxModule:
fx_g = self.change_fx_graph_return_to_tuple(fx_g)
ts_g = torch.jit.script(fx_g)
temp = tempfile.NamedTemporaryFile(
suffix="_amdshark_ts", prefix="temp_ts_"
suffix="_shark_ts", prefix="temp_ts_"
)
ts_g.save(temp.name)
new_ts = torch.jit.load(temp.name)

View File

@@ -3,7 +3,7 @@ from typing import List, Optional
import torch
from torch.fx.experimental.proxy_tensor import make_fx
from torch._functorch.compile_utils import strip_overloads
from amdshark.amdshark_inference import AMDSharkInference
from shark.shark_inference import SharkInference
from torch._decomp import get_decompositions
from torch.func import functionalize
import io
@@ -93,13 +93,13 @@ def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule) -> bool:
return unwrapped_tuple
class AMDSharkBackend:
class SharkBackend:
def __init__(
self, fx_g: torch.fx.GraphModule, inputs: tuple, options: dict
):
self.fx_g = fx_g
self.inputs = inputs
self.amdshark_module = None
self.shark_module = None
self.device: str = options.get("device", "cpu")
self.was_unwrapped: bool = False
self.none_indices: list = []
@@ -125,19 +125,19 @@ class AMDSharkBackend:
bytecode_stream = io.BytesIO()
mlir_module.operation.write_bytecode(bytecode_stream)
bytecode = bytecode_stream.getvalue()
from amdshark.amdshark_inference import AMDSharkInference
from shark.shark_inference import SharkInference
amdshark_module = AMDSharkInference(
shark_module = SharkInference(
mlir_module=bytecode,
device=self.device,
mlir_dialect="tm_tensor",
)
amdshark_module.compile(extra_args=[])
self.amdshark_module = amdshark_module
shark_module.compile(extra_args=[])
self.shark_module = shark_module
def __call__(self, *inputs):
np_inputs = [x.contiguous().detach().cpu().numpy() for x in inputs]
np_outs = self.amdshark_module("forward", np_inputs)
np_outs = self.shark_module("forward", np_inputs)
if self.was_unwrapped:
np_outs = [
np_outs,

View File

@@ -1,5 +1,5 @@
import torch
import amdshark
import shark
def foo(x, a):
@@ -9,8 +9,8 @@ def foo(x, a):
return x + 3
amdshark_options = {"device": "cpu"}
compiled = torch.compile(foo, backend="amdshark", options=amdshark_options)
shark_options = {"device": "cpu"}
compiled = torch.compile(foo, backend="shark", options=shark_options)
input = torch.ones(4)

View File

@@ -22,7 +22,7 @@
"source": [
"# standard imports\n",
"import torch\n",
"from amdshark.iree_utils import get_iree_compiled_module"
"from shark.iree_utils import get_iree_compiled_module"
]
},
{

View File

@@ -1,7 +1,7 @@
import torch
from torch_mlir import compile, OutputType
from amdshark.iree_utils import get_iree_compiled_module
from shark.iree_utils import get_iree_compiled_module
try:
import torchdynamo

View File

@@ -32,7 +32,7 @@
"source": [
"# eager mode imports\n",
"from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor\n",
"from amdshark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend"
"from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend"
],
"metadata": {
"collapsed": false,
@@ -440,7 +440,7 @@
{
"cell_type": "markdown",
"source": [
"There is a convenience class `AMDSharkEagerMode` that will handle both the installation of the backend and the wrapping of `torch.Tensor`s:"
"There is a convenience class `SharkEagerMode` that will handle both the installation of the backend and the wrapping of `torch.Tensor`s:"
],
"metadata": {
"collapsed": false,
@@ -684,9 +684,9 @@
],
"source": [
"# eager mode RAII\n",
"from amdshark.amdshark_runner import AMDSharkEagerMode\n",
"from shark.shark_runner import SharkEagerMode\n",
"\n",
"amdshark_eager_mode = AMDSharkEagerMode(\"cpu\")\n",
"shark_eager_mode = SharkEagerMode(\"cpu\")\n",
"\n",
"t = torch.ones((10, 10))\n",
"u = torch.ones((10, 10))\n",
@@ -712,7 +712,7 @@
{
"cell_type": "markdown",
"source": [
"The `AMDSharkEagerMode` class is a hacky take on [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization) that defines a \"deleter\" that runs when an instantiation (of `AMDSharkEagerMode`) is garbage collected. Takeaway is that if you want to turn off `AMDSharkEagerMode`, or switch backends, you need to `del` the instance:"
"The `SharkEagerMode` class is a hacky take on [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization) that defines a \"deleter\" that runs when an instantiation (of `SharkEagerMode`) is garbage collected. Takeaway is that if you want to turn off `SharkEagerMode`, or switch backends, you need to `del` the instance:"
],
"metadata": {
"collapsed": false,
@@ -757,8 +757,8 @@
}
],
"source": [
"del amdshark_eager_mode\n",
"amdshark_eager_mode = AMDSharkEagerMode(\"cuda\")\n",
"del shark_eager_mode\n",
"shark_eager_mode = SharkEagerMode(\"cuda\")\n",
"\n",
"t = torch.ones((10, 10))\n",
"u = torch.ones((10, 10))\n",

View File

@@ -17,8 +17,8 @@ from torch.utils.cpp_extension import load_inline, include_paths
from torch_mlir.eager_mode import torch_mlir_tensor
from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
from amdshark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
from amdshark.amdshark_runner import AMDSharkEagerMode
from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
from shark.shark_runner import SharkEagerMode
def test_cpu():
@@ -85,7 +85,7 @@ def test_gpu():
def test_python_mode_ref_backend():
# hide this wherever you want?
_ = AMDSharkEagerMode("refbackend")
_ = SharkEagerMode("refbackend")
t = torch.ones((10, 10), device="cpu")
u = torch.ones((10, 10), device="cpu")
@@ -103,7 +103,7 @@ def test_python_mode_ref_backend():
def test_python_mode_iree_cpu():
# hide this wherever you want?
_ = AMDSharkEagerMode("cpu")
_ = SharkEagerMode("cpu")
t = torch.ones((10, 10), device="cpu")
u = torch.ones((10, 10), device="cpu")
@@ -121,7 +121,7 @@ def test_python_mode_iree_cpu():
def test_python_mode_iree_gpu():
_ = AMDSharkEagerMode("gpu")
_ = SharkEagerMode("gpu")
t = torch.ones((10, 10), device="cpu")
u = torch.ones((10, 10), device="cpu")

View File

@@ -47,7 +47,7 @@ golden_probabilities = torch.nn.functional.softmax(
golden_confidences = golden_confidences.numpy()
from amdshark.torch_mlir_lockstep_tensor import TorchMLIRLockstepTensor
from shark.torch_mlir_lockstep_tensor import TorchMLIRLockstepTensor
input_detached_clone = input_batch.clone()
eager_input_batch = TorchMLIRLockstepTensor(input_detached_clone)
@@ -62,7 +62,7 @@ probabilities = torch.nn.functional.softmax(
torch.from_numpy(confidences), dim=0
).numpy()
print("The obtained result via amdshark is: ", confidences)
print("The obtained result via shark is: ", confidences)
print("The golden result is:", golden_confidences)
np.testing.assert_allclose(

Some files were not shown because too many files have changed in this diff Show More