[WEB] Remove long prompts support

It removes support to long prompts due to higher lag in loading long prompts. Signed-Off-by: Gaurav Shukla <gaurav@nod-labs>
[WEB] fix background color
2026-04-20 03:00:34 -04:00 · 2022-11-03 18:57:58 +05:30 · 2022-11-03 17:36:24 +05:30 · 2022-11-03 03:27:36 -07:00 · 2022-11-02 14:30:03 -07:00 · 2022-11-02 12:36:11 -07:00
192 changed files with 4209 additions and 17711 deletions
--- a/.flake8
+++ b/.flake8
@@ -1,5 +0,0 @@
-[flake8]
-count = 1
-show-source = 1
-select = E9,F63,F7,F82
-exclude = lit.cfg.py
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -9,93 +9,13 @@ on:
  workflow_dispatch:

 jobs:
-  windows-build:
-    runs-on: 7950X
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.11"]
-
-    steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
-      with:
-        python-version: ${{ matrix.python-version }}
-
-    - name: Compute version
-      shell: powershell
-      run: |
-        $package_version = $(Get-Date -UFormat "%Y%m%d")+"."+${{ github.run_number }}
-        $package_version_ = $(Get-Date -UFormat "%Y%m%d")+"_"+${{ github.run_number }}
-        $tag_name=$package_version
-        echo "package_version=$package_version" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append
-        echo "package_version_=$package_version_" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append
-        echo "tag_name=$tag_name" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append
-
-    - name: Create Release
-      id: create_release
-      uses: actions/create-release@v1
-      env:
-        GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
-      with:
-        tag_name: ${{ env.tag_name }}
-        release_name: nod.ai SHARK ${{ env.tag_name }}
-        body: |
-          Automatic snapshot release of nod.ai SHARK.
-        draft: true
-        prerelease: true
-
-    - name: Build Package 
-      shell: powershell
-      run: |
-        ./setup_venv.ps1
-        python process_skipfiles.py
-        pyinstaller .\apps\stable_diffusion\shark_sd.spec
-        mv ./dist/shark_sd.exe ./dist/shark_sd_${{ env.package_version_ }}.exe
-        signtool sign /f c:\g\shark_02152023.cer /csp "eToken Base Cryptographic Provider" /k "${{ secrets.CI_CERT }}" ./dist/shark_sd_${{ env.package_version_ }}.exe
-        pyinstaller .\apps\stable_diffusion\shark_sd_cli.spec
-        python process_skipfiles.py
-        mv ./dist/shark_sd_cli.exe ./dist/shark_sd_cli_${{ env.package_version_ }}.exe
-        signtool sign /f c:\g\shark_02152023.cer /csp "eToken Base Cryptographic Provider" /k "${{ secrets.CI_CERT }}" ./dist/shark_sd_cli_${{ env.package_version_ }}.exe
-
-        
-    # GHA windows VM OOMs so disable for now
-    #- name: Build and validate the SHARK Runtime package
-    #  shell: powershell
-    #  run: |
-    #    $env:SHARK_PACKAGE_VERSION=${{ env.package_version }}
-    #    pip wheel -v -w dist . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html
-
-    #- uses: actions/upload-artifact@v2
-    #  with:
-    #    path: dist/*
-    
-    - name: Upload Release Assets
-      id: upload-release-assets
-      uses: dwenegar/upload-release-assets@v1
-      env:
-        GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
-      with:
-        release_id: ${{ steps.create_release.outputs.id }}
-        assets_path: ./dist/*
-        #asset_content_type: application/vnd.microsoft.portable-executable 
-
-    - name: Publish Release
-      id: publish_release
-      uses: eregon/publish-release@v1
-      env:
-        GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
-      with:
-        release_id: ${{ steps.create_release.outputs.id }}
-
-  linux-build:
+  build:

    runs-on: a100
    strategy:
      fail-fast: false
      matrix:
-        python-version: ["3.11"]
+        python-version: ["3.10"]
        backend: [IREE, SHARK]

    steps:
@@ -112,10 +32,31 @@ jobs:
        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
        restore-keys: |
          ${{ runner.os }}-pip-
-
+    
+    - name: Compute version
+      run: |
+        package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
+        tag_name="${package_version}"
+        echo "package_version=${package_version}" >> $GITHUB_ENV
+        echo "tag_name=${tag_name}" >> $GITHUB_ENV    
+    - name: Set Environment Variables
+      run: |
+        echo "SHORT_SHA=`git rev-parse --short=4 HEAD`" >> $GITHUB_ENV
+        echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
+    - name: Create Release
+      id: create_release
+      uses: actions/create-release@v1
+      env:
+        GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
+      with:
+        tag_name: ${{ env.tag_name }}
+        release_name: nod.ai SHARK ${{ env.tag_name }}
+        body: |
+          Automatic snapshot release of nod.ai SHARK.
+        draft: true
+        prerelease: false
    - name: Install dependencies
      run: |
-        echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
        python -m pip install --upgrade pip
        python -m pip install flake8 pytest toml
        if [ -f requirements.txt ]; then pip install -r requirements.txt -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html; fi
@@ -127,26 +68,25 @@ jobs:
        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude shark.venv,lit.cfg.py 
    - name: Build and validate the IREE package
      if: ${{ matrix.backend == 'IREE' }}
-      continue-on-error: true
      run: |
        cd $GITHUB_WORKSPACE
        USE_IREE=1 VENV_DIR=iree.venv ./setup_venv.sh
        source iree.venv/bin/activate
        package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
        SHARK_PACKAGE_VERSION=${package_version} \
-        pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://llvm.github.io/torch-mlir/package-index/ -f https://openxla.github.io/iree/pip-release-links.html
+        pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://llvm.github.io/torch-mlir/package-index/ -f https://iree-org.github.io/iree/pip-release-links.html
        # Install the built wheel
        pip install ./wheelhouse/nodai*
        # Validate the Models
        /bin/bash "$GITHUB_WORKSPACE/build_tools/populate_sharktank_ci.sh"
-        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="./gen_shark_tank/" -k "not metal" |
+        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="./gen_shark_tank/" tank/test_models.py |
          tail -n 1 |
          tee -a pytest_results.txt
        if !(grep -Fxq " failed" pytest_results.txt) 
          then 
            export SHA=$(git log -1 --format='%h')
            gsutil -m cp -r $GITHUB_WORKSPACE/gen_shark_tank/* gs://shark_tank/${DATE}_$SHA
-            gsutil -m cp -r gs://shark_tank/${DATE}_$SHA/* gs://shark_tank/nightly/
+            gsutil -m cp -r gs://shark_tank/${DATE}_$SHA/* gs://shark_tank/latest/
        fi
        rm -rf ./wheelhouse/nodai*

@@ -162,6 +102,25 @@ jobs:
        # Install the built wheel
        pip install ./wheelhouse/nodai*
        # Validate the Models
-        pytest --ci --ci_sha=${SHORT_SHA} -k "not metal" |
+        pytest --ci --ci_sha=${SHORT_SHA} tank/test_models.py |
          tail -n 1 |
          tee -a pytest_results.txt
+    
+    - name: Upload Release Assets
+      if: ${{ matrix.backend == 'SHARK' }}
+      id: upload-release-assets
+      uses: dwenegar/upload-release-assets@v1
+      env:
+        GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
+      with:
+        release_id: ${{ steps.create_release.outputs.id }}
+        assets_path: ./wheelhouse/nodai_*.whl
+
+    - name: Publish Release
+      if: ${{ matrix.backend == 'SHARK' }}
+      id: publish_release
+      uses: eregon/publish-release@v1
+      env:
+        GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
+      with:
+        release_id: ${{ steps.create_release.outputs.id }}
--- a/.github/workflows/test-models.yml
+++ b/.github/workflows/test-models.yml
@@ -6,14 +6,8 @@ name: Validate Models on Shark Runtime
 on:
  push:
    branches: [ main ]
-    paths-ignore:
-      - '**.md'
-      - 'shark/examples/**'
  pull_request:
    branches: [ main ]
-    paths-ignore:
-      - '**.md'
-      - 'shark/examples/**'
  workflow_dispatch:

 # Ensure that only a single job or workflow using the same
@@ -29,9 +23,9 @@ jobs:
    strategy:
      fail-fast: true
      matrix:
-        os: [7950x, icelake, a100, MacStudio, ubuntu-latest]
+        os: [icelake, a100, MacStudio, ubuntu-latest]
        suite: [cpu,cuda,vulkan]
-        python-version: ["3.11"]
+        python-version: ["3.10"]
        include:
          - os: ubuntu-latest
            suite: lint
@@ -42,6 +36,8 @@ jobs:
            suite: cuda
          - os: ubuntu-latest
            suite: cpu
+          - os: MacStudio
+            suite: vulkan
          - os: MacStudio
            suite: cuda
          - os: MacStudio
@@ -52,19 +48,13 @@ jobs:
            suite: cuda
          - os: a100
            suite: cpu
-          - os: 7950x
-            suite: cpu
-          - os: 7950x
-            suite: cuda

    runs-on: ${{ matrix.os }}

    steps:
    - uses: actions/checkout@v3
-      if: matrix.os != '7950x'
    
    - name: Set Environment Variables
-      if: matrix.os != '7950x'
      run: |
        echo "SHORT_SHA=`git rev-parse --short=4 HEAD`" >> $GITHUB_ENV
        echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
@@ -84,9 +74,6 @@ jobs:
        #cache-dependency-path: |
        #  **/requirements-importer.txt
        #  **/requirements.txt
-    
-    - uses: actions/checkout@v2
-      if: matrix.os == '7950x'
          
    - name: Install dependencies
      if: matrix.suite == 'lint'
@@ -99,20 +86,19 @@ jobs:
      run: |
        # black format check
        black --version
-        black --check .
+        black --line-length 79 --check .
        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --statistics
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude lit.cfg.py
        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --isolated --count --exit-zero --max-complexity=10 --max-line-length=127 \
-          --statistics --exclude lit.cfg.py
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude lit.cfg.py

    - name: Validate Models on CPU
      if: matrix.suite == 'cpu'
      run: |
        cd $GITHUB_WORKSPACE
-        PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
+        PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest --forked --benchmark --ci --ci_sha=${SHORT_SHA} --update_tank --tank_url="gs://shark_tank/nightly/" -k cpu 
+        pytest --benchmark --ci --ci_sha=${SHORT_SHA} -s --local_tank_cache="/data/anush/shark_cache" tank/test_models.py -k cpu --update_tank
        gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv
        gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cpu_latest.csv

@@ -122,40 +108,29 @@ jobs:
        cd $GITHUB_WORKSPACE
        PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest --forked --benchmark --ci --ci_sha=${SHORT_SHA} --update_tank --tank_url="gs://shark_tank/nightly/" -k cuda
+        pytest --benchmark --ci --ci_sha=${SHORT_SHA} -s --local_tank_cache="/data/anush/shark_cache" tank/test_models.py -k cuda --update_tank
        gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv
        gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cuda_latest.csv
-        # Disabled due to black image bug
-        # python build_tools/stable_diffusion_testing.py --device=cuda 

    - name: Validate Vulkan Models (MacOS)
      if: matrix.suite == 'vulkan' && matrix.os == 'MacStudio'
      run: |
        cd $GITHUB_WORKSPACE
-        PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
+        PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        export DYLD_LIBRARY_PATH=/usr/local/lib/
+        echo "VULKAN SDK PATH wo setup: $VULKAN_SDK"
+        cd /Users/anush/VulkanSDK/1.3.224.1/
+        source setup-env.sh
+        cd $GITHUB_WORKSPACE
+        echo "VULKAN SDK PATH with setup: $VULKAN_SDK"
        echo $PATH
        pip list | grep -E "torch|iree"
-        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/Volumes/builder/anush/shark_cache" --tank_url="gs://shark_tank/nightly/" -k vulkan --update_tank
+        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/Volumes/builder/anush/shark_cache" tank/test_models.py -k vulkan --update_tank

    - name: Validate Vulkan Models (a100)
-      if: matrix.suite == 'vulkan' && matrix.os == 'a100'
+      if: matrix.suite == 'vulkan' && matrix.os != 'MacStudio'
      run: |
        cd $GITHUB_WORKSPACE
-        PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
+        PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest --forked --benchmark --ci --ci_sha=${SHORT_SHA} --update_tank --tank_url="gs://shark_tank/nightly/" -k vulkan
-        python build_tools/stable_diffusion_testing.py --device=vulkan
-
-    - name: Validate Vulkan Models (Windows)
-      if: matrix.suite == 'vulkan' && matrix.os == '7950x'
-      run: |
-        ./setup_venv.ps1
-        pytest -k vulkan -s
-
-    - name: Validate Stable Diffusion Models (Windows)
-      if: matrix.suite == 'vulkan' && matrix.os == '7950x'
-      run: |
-        ./setup_venv.ps1
-        python build_tools/stable_diffusion_testing.py --device=vulkan
+        pytest --ci --ci_sha=${SHORT_SHA} -s --local_tank_cache="/data/anush/shark_cache" tank/test_models.py -k vulkan --update_tank
--- a/.gitignore
+++ b/.gitignore
@@ -31,6 +31,7 @@ MANIFEST
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
+*.spec

 # Installer logs
 pip-log.txt
@@ -159,31 +160,14 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/

-# vscode related
-.vscode
-
 # Shark related artefacts
 *venv/
 shark_tmp/
-*.vmfb
-.use-iree
-tank/dict_configs.py
-*.csv
-reproducers/

 # ORT related artefacts
 cache_models/
 onnx_models/

-# Generated images
-generated_imgs/
-
-# Custom model related artefacts
-variants.json
-models/
-
-# models folder
-apps/stable_diffusion/web/models/
-
-# Stencil annotators.
-stencil_annotator/
+#web logging
+web/logs/
+web/stored_results/stable_diffusion/
--- a/.style.yapf
+++ b/.style.yapf
@@ -0,0 +1,3 @@
+[style]
+  based_on_style = google
+  column_limit = 80
--- a/README.md
+++ b/README.md
@@ -1,159 +1,27 @@
 # SHARK

-High Performance Machine Learning Distribution
+High Performance Machine Learning and Data Analytics for CPUs, GPUs, Accelerators and Heterogeneous Clusters

 [![Nightly Release](https://github.com/nod-ai/SHARK/actions/workflows/nightly.yml/badge.svg)](https://github.com/nod-ai/SHARK/actions/workflows/nightly.yml)
 [![Validate torch-models on Shark Runtime](https://github.com/nod-ai/SHARK/actions/workflows/test-models.yml/badge.svg)](https://github.com/nod-ai/SHARK/actions/workflows/test-models.yml)

+## Communication Channels
+
+*   [SHARK Discord server](https://discord.gg/RUqY2h2s9u): Real time discussions with the SHARK team and other users
+*   [GitHub issues](https://github.com/nod-ai/SHARK/issues): Feature requests, bugs etc
+
+
+## Installation

 <details>
-  <summary>Prerequisites - Drivers </summary>
-  
-#### Install your Windows hardware drivers
-* [AMD RDNA Users] Download the latest driver [here](https://www.amd.com/en/support/kb/release-notes/rn-rad-win-23-2-1).
-* [macOS Users] Download and install the 1.3.216 Vulkan SDK from [here](https://sdk.lunarg.com/sdk/download/1.3.216.0/mac/vulkansdk-macos-1.3.216.0.dmg). Newer versions of the SDK will not work. 
-* [Nvidia Users] Download and install the latest CUDA / Vulkan drivers from [here](https://developer.nvidia.com/cuda-downloads)
-  
-#### Linux Drivers
-* MESA / RADV drivers wont work with FP16. Please use the latest AMGPU-PRO drivers (non-pro OSS drivers also wont work) or the latest NVidia Linux Drivers.
-
-Other users please ensure you have your latest vendor drivers and Vulkan SDK from [here](https://vulkan.lunarg.com/sdk/home) and if you are using vulkan check `vulkaninfo` works in a terminal window
-
-</details>
-
-
- 
-### Quick Start for SHARK Stable Diffusion for Windows 10/11 Users
-
-Install the Driver from [Prerequisites](https://github.com/nod-ai/SHARK#install-your-hardware-drivers) above 
-
-Download the [stable release](https://github.com/nod-ai/shark/releases/latest)
-
-Double click the .exe and you should have the [UI](http://localhost:8080/) in the browser. 
-
-If you have custom models put them in a `models/` directory where the .exe is. 
-
-Enjoy. 
-
-<details>
-  <summary>More installation notes</summary>
-* We recommend that you download EXE in a new folder, whenever you download a new EXE version. If you download it in the same folder as a previous install, you must delete the old `*.vmfb` files with `rm *.vmfb`. You can also use `--clear_all` flag once to clean all the old files. 
-* If you recently updated the driver or this binary (EXE file), we recommend you clear all the local artifacts with `--clear_all` 
-
-## Running
-
-* Open a Command Prompt or Powershell terminal, change folder (`cd`) to the .exe folder. Then run the EXE from the command prompt. That way, if an error occurs, you'll be able to cut-and-paste it to ask for help. (if it always works for you without error, you may simply double-click the EXE)
-* The first run may take few minutes when the models are downloaded and compiled. Your patience is appreciated. The download could be about 5GB.
-* You will likely see a Windows Defender message asking you to give permission to open a web server port. Accept it.
-* Open a browser to access the Stable Diffusion web server. By default, the port is 8080, so you can go to http://localhost:8080/.
-
-## Stopping
-
-* Select the command prompt that's running the EXE. Press CTRL-C and wait a moment or close the terminal. 
-</details>
-
-<details>
-  <summary>Advanced Installation (Only for developers)</summary>
-  
-## Advanced Installation (Windows, Linux and macOS) for developers
-
-## Check out the code
-
-```shell
-git clone https://github.com/nod-ai/SHARK.git
-cd SHARK
-```
-
-## Setup your Python VirtualEnvironment and Dependencies
-
-### Windows 10/11 Users
-
-* Install the latest Python 3.11.x version from [here](https://www.python.org/downloads/windows/)
-
-* Install Git for Windows from [here](https://git-scm.com/download/win)
-
-#### Allow the install script to run in Powershell
-```powershell
-set-executionpolicy remotesigned
-```
-
-#### Setup venv and install necessary packages (torch-mlir, nodLabs/Shark, ...)
-```powershell
-./setup_venv.ps1 #You can re-run this script to get the latest version
-```
-
-### Linux / macOS Users
-
-```shell
-./setup_venv.sh
-source shark.venv/bin/activate
-```
-
-
-### Run Stable Diffusion on your device - WebUI
-
-#### Windows 10/11 Users
-```powershell
-(shark.venv) PS C:\g\shark> cd .\apps\stable_diffusion\web\
-(shark.venv) PS C:\g\shark\apps\stable_diffusion\web> python .\index.py
-```
-#### Linux / macOS Users
-```shell
-(shark.venv) > cd apps/stable_diffusion/web
-(shark.venv) > python index.py
-```
-
-#### Access Stable Diffusion on http://localhost:8080/?__theme=dark
-
-
-<img width="1607" alt="webui" src="https://user-images.githubusercontent.com/74956/204939260-b8308bc2-8dc4-47f6-9ac0-f60b66edab99.png">
-
-
-
-### Run Stable Diffusion on your device - Commandline
-
-#### Windows 10/11 Users
-```powershell
-(shark.venv) PS C:\g\shark> python .\apps\stable_diffusion\scripts\txt2img.py --precision="fp16" --prompt="tajmahal, snow, sunflowers, oil on canvas" --device="vulkan"
-```
-
-#### Linux / macOS Users
-```shell
-python3.11 apps/stable_diffusion/scripts/txt2img.py --precision=fp16 --device=vulkan --prompt="tajmahal, oil on canvas, sunflowers, 4k, uhd"
-```
-
-You can replace `vulkan` with `cpu` to run on your CPU or with `cuda` to run on CUDA devices. If you have multiple vulkan devices you can address them with `--device=vulkan://1` etc
-</details>
-
-The output on a AMD 7900XTX would look something like:
-
-```shell
-Average step time: 47.19188690185547ms/it
-Clip Inference time (ms) = 109.531
-VAE Inference time (ms): 78.590
-
-Total image generation time: 2.5788655281066895sec
-```
-
-Here are some samples generated:
-
-![tajmahal, snow, sunflowers, oil on canvas_0](https://user-images.githubusercontent.com/74956/204934186-141f7e43-6eb2-4e89-a99c-4704d20444b3.jpg)
-
-![a photo of a crab playing a trumpet](https://user-images.githubusercontent.com/74956/204933258-252e7240-8548-45f7-8253-97647d38313d.jpg)
-
-
-Find us on [SHARK Discord server](https://discord.gg/RUqY2h2s9u) if you have any trouble with running it on your hardware. 
-
-
-<details>
-  <summary>Binary Installation</summary>
+  <summary>Installation (Linux, macOS and Windows)</summary>

 ### Setup a new pip Virtual Environment

 This step sets up a new VirtualEnv for Python

 ```shell
-python --version #Check you have 3.11 on Linux, macOS or Windows Powershell
+python --version #Check you have 3.10 on Linux, macOS or Windows Powershell
 python -m venv shark_venv
 source shark_venv/bin/activate   # Use shark_venv/Scripts/activate on Windows

@@ -167,7 +35,7 @@ python -m pip install --upgrade pip

 ### Install SHARK

-This step pip installs SHARK and related packages on Linux Python 3.8, 3.10 and 3.11 and macOS / Windows Python 3.11
+This step pip installs SHARK and related packages on Linux Python 3.7, 3.8, 3.9, 3.10 and macOS Python 3.10

 ```shell
 pip install nodai-shark -f https://nod-ai.github.io/SHARK/package-index/ -f https://llvm.github.io/torch-mlir/package-index/ -f  https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html --extra-index-url https://download.pytorch.org/whl/nightly/cpu
@@ -198,31 +66,60 @@ python ./minilm_jit.py --device="cpu"  #use cuda or vulkan or metal
 </details>


-
 <details>
-  <summary>Development, Testing and Benchmarks</summary>
+  <summary>Source Installation</summary>

-If you want to use Python3.11 and with TF Import tools you can use the environment variables like:
-Set `USE_IREE=1` to use upstream IREE
-```
-# PYTHON=python3.11 VENV_DIR=0617_venv IMPORTER=1 ./setup_venv.sh 
+## Check out the code
+
+```shell
+git clone https://github.com/nod-ai/SHARK.git
 ```

-### Run any of the hundreds of SHARK tank models via the test framework
+## Setup your Python VirtualEnvironment and Dependencies
+
+### Windows Users
+
+```shell
+# Setup venv and install necessary packages (torch-mlir, nodLabs/Shark, ...). 
+# Requires Python 3.10 and Powershell
+./setup_venv.ps1
+shark.venv/Scripts/activate
+```
+
+### Linux / macOS Users
+
+```shell
+# Setup venv and install necessary packages (torch-mlir, nodLabs/Shark, ...).
+./setup_venv.sh
+source shark.venv/bin/activate
+```
+
+
+### Run a demo script
 ```shell
 python -m  shark.examples.shark_inference.resnet50_script --device="cpu" # Use gpu | vulkan
 # Or a pytest
 pytest tank/test_models.py -k "MiniLM"
 ```
-  
-### How to use your locally built IREE / Torch-MLIR with SHARK
+
+</details>
+
+<details>
+  <summary>Development, Testing and Benchmarks</summary>
+
+If you want to use Python3.10 and with TF Import tools you can use the environment variables like:
+Set `USE_IREE=1` to use upstream IREE
+```
+# PYTHON=python3.10 VENV_DIR=0617_venv IMPORTER=1 ./setup_venv.sh 
+```
+
 If you are a *Torch-mlir developer or an IREE developer* and want to test local changes you can uninstall
 the provided packages with `pip uninstall torch-mlir` and / or `pip uninstall iree-compiler iree-runtime` and build locally
-with Python bindings and set your PYTHONPATH as mentioned [here](https://github.com/iree-org/iree/tree/main/docs/api_docs/python#install-iree-binaries)
+with Python bindings and set your PYTHONPATH as mentioned [here](https://google.github.io/iree/bindings/python/)
 for IREE and [here](https://github.com/llvm/torch-mlir/blob/main/development.md#setup-python-environment-to-export-the-built-python-packages)
 for Torch-MLIR.

-How to use your locally built Torch-MLIR with SHARK:
+### How to use your locally built Torch-MLIR with SHARK
 ```shell
 1.) Run `./setup_venv.sh in SHARK` and activate `shark.venv` virtual env.
 2.) Run `pip uninstall torch-mlir`.
@@ -240,15 +137,9 @@ Now the SHARK will use your locally build Torch-MLIR repo.

 ## Benchmarking Dispatches

-To produce benchmarks of individual dispatches, you can add `--dispatch_benchmarks=All --dispatch_benchmarks_dir=<output_dir>` to your pytest command line argument.  
+To produce benchmarks of individual dispatches, you can add `--dispatch_benchmarks=All --dispatch_benchmarks_dir=<output_dir>` to your command line argument.  
 If you only want to compile specific dispatches, you can specify them with a space seperated string instead of `"All"`.  E.G. `--dispatch_benchmarks="0 1 2 10"`

-For example, to generate and run dispatch benchmarks for MiniLM on CUDA:
-```
-pytest -k "MiniLM and torch and static and cuda" --benchmark_dispatches=All -s --dispatch_benchmarks_dir=./my_dispatch_benchmarks                                                                                
-```
-The given command will populate `<dispatch_benchmarks_dir>/<model_name>/` with an `ordered_dispatches.txt` that lists and orders the dispatches and their latencies, as well as folders for each dispatch that contain .mlir, .vmfb, and results of the benchmark for that dispatch.
-
 if you want to instead incorporate this into a python script, you can pass the `dispatch_benchmarks` and `dispatch_benchmarks_dir` commands when initializing `SharkInference`, and the benchmarks will be generated when compiled.  E.G:

 ```
@@ -263,7 +154,6 @@ shark_module = SharkInference(
 ```

 Output will include:
- An ordered list ordered-dispatches.txt of all the dispatches with their runtime
 - Inside the specified directory, there will be a directory for each dispatch (there will be mlir files for all dispatches, but only compiled binaries and benchmark data for the specified dispatches)
 - An .mlir file containing the dispatch benchmark 
 - A compiled .vmfb file containing the dispatch benchmark
@@ -272,7 +162,7 @@ Output will include:
 - A .txt file containing benchmark output


-See tank/README.md for further instructions on how to run model tests and benchmarks from the SHARK tank.
+See tank/README.md for instructions on how to run model tests and benchmarks from the SHARK tank.

 </details>

@@ -342,11 +232,6 @@ SHARK is maintained to support the latest innovations in ML Models:

 For a complete list of the models supported in SHARK, please refer to [tank/README.md](https://github.com/nod-ai/SHARK/blob/main/tank/README.md).

-## Communication Channels
-
-*   [SHARK Discord server](https://discord.gg/RUqY2h2s9u): Real time discussions with the SHARK team and other users
-*   [GitHub issues](https://github.com/nod-ai/SHARK/issues): Feature requests, bugs etc
-
 ## Related Projects

 <details>
--- a/apps/stable_diffusion/profiling_with_iree.md
+++ b/apps/stable_diffusion/profiling_with_iree.md
@@ -1,87 +0,0 @@
-Compile / Run Instructions:
-
-To compile .vmfb for SD (vae, unet, CLIP), run the following commands with the .mlir in your local shark_tank cache (default location for Linux users is `~/.local/shark_tank`). These will be available once the script from [this README](https://github.com/nod-ai/SHARK/blob/main/shark/examples/shark_inference/stable_diffusion/README.md) is run once.
-Running the script mentioned above with the `--save_vmfb` flag will also save the .vmfb in your SHARK base directory if you want to skip straight to benchmarks.
-
-Compile Commands FP32/FP16: 
-
-```shell
-Vulkan AMD: 
-iree-compile --iree-input-type=none --iree-hal-target-backends=vulkan --iree-vulkan-target-triple=rdna2-unknown-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 /path/to/input/mlir -o /path/to/output/vmfb
-
-#  add --mlir-print-debuginfo --mlir-print-op-on-diagnostic=true for debug
-#  use –iree-input-type=mhlo for tf models
-
-CUDA NVIDIA:
-iree-compile --iree-input-type=none --iree-hal-target-backends=cuda --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 /path/to/input/mlir -o /path/to/output/vmfb
-
-CPU:
-iree-compile --iree-input-type=none --iree-hal-target-backends=llvm-cpu  --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 /path/to/input/mlir -o /path/to/output/vmfb
-```
-
-
-
-Run / Benchmark Command (FP32 - NCHW):
-(NEED to use BS=2 since we do two forward passes to unet as a result of classifier free guidance.)
-
-```shell
-## Vulkan AMD:
-iree-benchmark-module --module=/path/to/output/vmfb --function=forward --device=vulkan --input=1x4x64x64xf32 --input=1xf32 --input=2x77x768xf32 --input=f32=1.0 --input=f32=1.0
-
-## CUDA:
-iree-benchmark-module --module=/path/to/vmfb --function=forward --device=cuda  --input=1x4x64x64xf32 --input=1xf32 --input=2x77x768xf32 --input=f32=1.0 --input=f32=1.0
-
-## CPU:
-iree-benchmark-module --module=/path/to/vmfb --function=forward --device=local-task  --input=1x4x64x64xf32 --input=1xf32 --input=2x77x768xf32 --input=f32=1.0 --input=f32=1.0
-
-```
-
-Run via vulkan_gui for RGP Profiling:
-
-To build the vulkan app for profiling UNet follow the instructions [here](https://github.com/nod-ai/SHARK/tree/main/cpp) and then run the following command from the cpp directory with your compiled stable_diff.vmfb
-```shell
-./build/vulkan_gui/iree-vulkan-gui --module=/path/to/unet.vmfb --input=1x4x64x64xf32 --input=1xf32 --input=2x77x768xf32 --input=f32=1.0 --input=f32=1.0
-```
-
-</details>
-  <details>
-  <summary>Debug Commands</summary>
-
-## Debug commands and other advanced usage follows.
-
-```shell
-python txt2img.py --precision="fp32"|"fp16" --device="cpu"|"cuda"|"vulkan" --import_mlir|--no-import_mlir --prompt "enter the text" 
-```
-
-## dump all dispatch .spv and isa using amdllpc
-
-```shell
-python txt2img.py --precision="fp16" --device="vulkan" --iree-vulkan-target-triple=rdna3-unknown-linux --no-load_vmfb --dispatch_benchmarks="all" --dispatch_benchmarks_dir="SD_dispatches" --dump_isa
-```
-
-## Compile and save the .vmfb (using vulkan fp16 as an example):
-
-```shell
-python txt2img.py --precision=fp16 --device=vulkan --steps=50 --save_vmfb
-```
-
-## Capture an RGP trace
-
-```shell
-python txt2img.py --precision=fp16 --device=vulkan --steps=50 --save_vmfb --enable_rgp
-```
-
-## Run the vae module with iree-benchmark-module (NCHW, fp16, vulkan, for example):
-
-```shell
-iree-benchmark-module --module=/path/to/output/vmfb --function=forward --device=vulkan --input=1x4x64x64xf16  
-```
-
-## Run the unet module with iree-benchmark-module (same config as above):
-```shell
-##if you want to use .npz inputs:
-unzip ~/.local/shark_tank/<your unet>/inputs.npz
-iree-benchmark-module --module=/path/to/output/vmfb --function=forward --input=@arr_0.npy --input=1xf16 --input=@arr_2.npy --input=@arr_3.npy --input=@arr_4.npy  
-```
-
-</details>
--- a/apps/stable_diffusion/scripts/init.py
+++ b/apps/stable_diffusion/scripts/init.py
@@ -1,5 +0,0 @@
-from apps.stable_diffusion.scripts.txt2img import txt2img_inf
-from apps.stable_diffusion.scripts.img2img import img2img_inf
-from apps.stable_diffusion.scripts.inpaint import inpaint_inf
-from apps.stable_diffusion.scripts.outpaint import outpaint_inf
-from apps.stable_diffusion.scripts.upscaler import upscaler_inf
--- a/apps/stable_diffusion/scripts/img2img.py
+++ b/apps/stable_diffusion/scripts/img2img.py
@@ -1,364 +0,0 @@
-import sys
-import torch
-import time
-from PIL import Image
-from apps.stable_diffusion.src import (
-    args,
-    Image2ImagePipeline,
-    StencilPipeline,
-    get_schedulers,
-    set_init_device_flags,
-    utils,
-    clear_all,
-    save_output_img,
-)
-
-
-schedulers = None
-
-# set initial values of iree_vulkan_target_triple, use_tuned and import_mlir.
-init_iree_vulkan_target_triple = args.iree_vulkan_target_triple
-init_use_tuned = args.use_tuned
-init_import_mlir = args.import_mlir
-
-
-# For stencil, the input image can be of any size but we need to ensure that
-# it conforms with our model contraints :-
-#   Both width and height should be > 384 and multiple of 8.
-# This utility function performs the transformation on the input image while
-# also maintaining the aspect ratio before sending it to the stencil pipeline.
-def resize_stencil(image: Image.Image):
-    width, height = image.size
-    aspect_ratio = width / height
-    min_size = min(width, height)
-    if min_size < 384:
-        n_size = 384
-        if width == min_size:
-            width = n_size
-            height = n_size / aspect_ratio
-        else:
-            height = n_size
-            width = n_size * aspect_ratio
-    width = int(width)
-    height = int(height)
-    n_width = width // 8
-    n_height = height // 8
-    n_width *= 8
-    n_height *= 8
-    new_image = image.resize((n_width, n_height))
-    return new_image, n_width, n_height
-
-
-# Exposed to UI.
-def img2img_inf(
-    prompt: str,
-    negative_prompt: str,
-    init_image,
-    height: int,
-    width: int,
-    steps: int,
-    strength: float,
-    guidance_scale: float,
-    seed: int,
-    batch_count: int,
-    batch_size: int,
-    scheduler: str,
-    custom_model: str,
-    hf_model_id: str,
-    precision: str,
-    device: str,
-    max_length: int,
-    use_stencil: str,
-    save_metadata_to_json: bool,
-    save_metadata_to_png: bool,
-):
-    from apps.stable_diffusion.web.ui.utils import (
-        get_custom_model_pathfile,
-        Config,
-    )
-    import apps.stable_diffusion.web.utils.global_obj as global_obj
-
-    global schedulers
-
-    args.prompts = [prompt]
-    args.negative_prompts = [negative_prompt]
-    args.guidance_scale = guidance_scale
-    args.seed = seed
-    args.steps = steps
-    args.strength = strength
-    args.scheduler = scheduler
-    args.img_path = "not none"
-
-    if init_image is None:
-        return None, "An Initial Image is required"
-    image = init_image.convert("RGB")
-
-    # set ckpt_loc and hf_model_id.
-    types = (
-        ".ckpt",
-        ".safetensors",
-    )  # the tuple of file types
-    args.ckpt_loc = ""
-    args.hf_model_id = ""
-    if custom_model == "None":
-        if not hf_model_id:
-            return (
-                None,
-                "Please provide either custom model or huggingface model ID, both must not be empty",
-            )
-        args.hf_model_id = hf_model_id
-    elif ".ckpt" in custom_model or ".safetensors" in custom_model:
-        args.ckpt_loc = get_custom_model_pathfile(custom_model)
-    else:
-        args.hf_model_id = custom_model
-
-    args.save_metadata_to_json = save_metadata_to_json
-    args.write_metadata_to_png = save_metadata_to_png
-
-    use_stencil = None if use_stencil == "None" else use_stencil
-    args.use_stencil = use_stencil
-    if use_stencil is not None:
-        args.scheduler = "DDIM"
-        args.hf_model_id = "runwayml/stable-diffusion-v1-5"
-        image, width, height = resize_stencil(image)
-    elif args.scheduler != "PNDM":
-        if "Shark" in args.scheduler:
-            print(
-                f"SharkEulerDiscrete scheduler not supported. Switching to PNDM scheduler"
-            )
-            args.scheduler = "PNDM"
-        else:
-            sys.exit(
-                "Img2Img works best with PNDM scheduler. Other schedulers are not supported yet."
-            )
-    cpu_scheduling = not args.scheduler.startswith("Shark")
-    args.precision = precision
-    dtype = torch.float32 if precision == "fp32" else torch.half
-    new_config_obj = Config(
-        "img2img",
-        args.hf_model_id,
-        args.ckpt_loc,
-        precision,
-        batch_size,
-        max_length,
-        height,
-        width,
-        device,
-        use_lora=None,
-        use_stencil=use_stencil,
-    )
-    if (
-        not global_obj.get_sd_obj()
-        or global_obj.get_cfg_obj() != new_config_obj
-    ):
-        global_obj.clear_cache()
-        global_obj.set_cfg_obj(new_config_obj)
-        args.batch_size = batch_size
-        args.max_length = max_length
-        args.height = height
-        args.width = width
-        args.device = device.split("=>", 1)[1].strip()
-        args.iree_vulkan_target_triple = init_iree_vulkan_target_triple
-        args.use_tuned = init_use_tuned
-        args.import_mlir = init_import_mlir
-        set_init_device_flags()
-        model_id = (
-            args.hf_model_id
-            if args.hf_model_id
-            else "stabilityai/stable-diffusion-2-1-base"
-        )
-        schedulers = get_schedulers(model_id)
-        scheduler_obj = schedulers[scheduler]
-
-        if use_stencil is not None:
-            args.use_tuned = False
-            global_obj.set_sd_obj(
-                StencilPipeline.from_pretrained(
-                    scheduler_obj,
-                    args.import_mlir,
-                    args.hf_model_id,
-                    args.ckpt_loc,
-                    args.custom_vae,
-                    args.precision,
-                    args.max_length,
-                    args.batch_size,
-                    args.height,
-                    args.width,
-                    args.use_base_vae,
-                    args.use_tuned,
-                    low_cpu_mem_usage=args.low_cpu_mem_usage,
-                    use_stencil=use_stencil,
-                    debug=args.import_debug if args.import_mlir else False,
-                )
-            )
-        else:
-            global_obj.set_sd_obj(
-                Image2ImagePipeline.from_pretrained(
-                    scheduler_obj,
-                    args.import_mlir,
-                    args.hf_model_id,
-                    args.ckpt_loc,
-                    args.custom_vae,
-                    args.precision,
-                    args.max_length,
-                    args.batch_size,
-                    args.height,
-                    args.width,
-                    args.use_base_vae,
-                    args.use_tuned,
-                    low_cpu_mem_usage=args.low_cpu_mem_usage,
-                    debug=args.import_debug if args.import_mlir else False,
-                )
-            )
-
-    global_obj.set_schedulers(schedulers[scheduler])
-
-    start_time = time.time()
-    global_obj.get_sd_obj().log = ""
-    generated_imgs = []
-    seeds = []
-    img_seed = utils.sanitize_seed(seed)
-    extra_info = {"STRENGTH": strength}
-    for current_batch in range(batch_count):
-        if current_batch > 0:
-            img_seed = utils.sanitize_seed(-1)
-        out_imgs = global_obj.get_sd_obj().generate_images(
-            prompt,
-            negative_prompt,
-            image,
-            batch_size,
-            height,
-            width,
-            steps,
-            strength,
-            guidance_scale,
-            img_seed,
-            args.max_length,
-            dtype,
-            args.use_base_vae,
-            cpu_scheduling,
-            use_stencil=use_stencil,
-        )
-        save_output_img(out_imgs[0], img_seed, extra_info)
-        generated_imgs.extend(out_imgs)
-        seeds.append(img_seed)
-        global_obj.get_sd_obj().log += "\n"
-        yield generated_imgs, global_obj.get_sd_obj().log
-
-    total_time = time.time() - start_time
-    text_output = f"prompt={args.prompts}"
-    text_output += f"\nnegative prompt={args.negative_prompts}"
-    text_output += f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-    text_output += f"\nscheduler={args.scheduler}, device={device}"
-    text_output += f"\nsteps={steps}, strength={args.strength}, guidance_scale={guidance_scale}, seed={seeds}"
-    text_output += f"\nsize={height}x{width}, batch_count={batch_count}, batch_size={batch_size}, max_length={args.max_length}"
-    text_output += global_obj.get_sd_obj().log
-    text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-    yield generated_imgs, text_output
-
-
-if __name__ == "__main__":
-    if args.clear_all:
-        clear_all()
-
-    if args.img_path is None:
-        print("Flag --img_path is required.")
-        exit()
-
-    image = Image.open(args.img_path).convert("RGB")
-    # When the models get uploaded, it should be default to False.
-    args.import_mlir = True
-
-    use_stencil = args.use_stencil
-    if use_stencil:
-        args.scheduler = "DDIM"
-        args.hf_model_id = "runwayml/stable-diffusion-v1-5"
-        image, args.width, args.height = resize_stencil(image)
-    elif args.scheduler != "PNDM":
-        if "Shark" in args.scheduler:
-            print(
-                f"SharkEulerDiscrete scheduler not supported. Switching to PNDM scheduler"
-            )
-            args.scheduler = "PNDM"
-        else:
-            sys.exit(
-                "Img2Img works best with PNDM scheduler. Other schedulers are not supported yet."
-            )
-    cpu_scheduling = not args.scheduler.startswith("Shark")
-    dtype = torch.float32 if args.precision == "fp32" else torch.half
-    set_init_device_flags()
-    schedulers = get_schedulers(args.hf_model_id)
-    scheduler_obj = schedulers[args.scheduler]
-    seed = utils.sanitize_seed(args.seed)
-    # Adjust for height and width based on model
-
-    if use_stencil:
-        img2img_obj = StencilPipeline.from_pretrained(
-            scheduler_obj,
-            args.import_mlir,
-            args.hf_model_id,
-            args.ckpt_loc,
-            args.custom_vae,
-            args.precision,
-            args.max_length,
-            args.batch_size,
-            args.height,
-            args.width,
-            args.use_base_vae,
-            args.use_tuned,
-            low_cpu_mem_usage=args.low_cpu_mem_usage,
-            use_stencil=use_stencil,
-            debug=args.import_debug if args.import_mlir else False,
-        )
-    else:
-        img2img_obj = Image2ImagePipeline.from_pretrained(
-            scheduler_obj,
-            args.import_mlir,
-            args.hf_model_id,
-            args.ckpt_loc,
-            args.custom_vae,
-            args.precision,
-            args.max_length,
-            args.batch_size,
-            args.height,
-            args.width,
-            args.use_base_vae,
-            args.use_tuned,
-            low_cpu_mem_usage=args.low_cpu_mem_usage,
-            debug=args.import_debug if args.import_mlir else False,
-        )
-
-    start_time = time.time()
-    generated_imgs = img2img_obj.generate_images(
-        args.prompts,
-        args.negative_prompts,
-        image,
-        args.batch_size,
-        args.height,
-        args.width,
-        args.steps,
-        args.strength,
-        args.guidance_scale,
-        seed,
-        args.max_length,
-        dtype,
-        args.use_base_vae,
-        cpu_scheduling,
-        use_stencil=use_stencil,
-    )
-    total_time = time.time() - start_time
-    text_output = f"prompt={args.prompts}"
-    text_output += f"\nnegative prompt={args.negative_prompts}"
-    text_output += f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-    text_output += f"\nscheduler={args.scheduler}, device={args.device}"
-    text_output += f"\nsteps={args.steps}, strength={args.strength}, guidance_scale={args.guidance_scale}, seed={seed}, size={args.height}x{args.width}"
-    text_output += (
-        f", batch size={args.batch_size}, max_length={args.max_length}"
-    )
-    text_output += img2img_obj.log
-    text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-    extra_info = {"STRENGTH": args.strength}
-    save_output_img(generated_imgs[0], seed, extra_info)
-    print(text_output)
--- a/apps/stable_diffusion/scripts/inpaint.py
+++ b/apps/stable_diffusion/scripts/inpaint.py
@@ -1,271 +0,0 @@
-import torch
-import time
-from PIL import Image
-from apps.stable_diffusion.src import (
-    args,
-    InpaintPipeline,
-    get_schedulers,
-    set_init_device_flags,
-    utils,
-    clear_all,
-    save_output_img,
-)
-
-
-schedulers = None
-
-# set initial values of iree_vulkan_target_triple, use_tuned and import_mlir.
-init_iree_vulkan_target_triple = args.iree_vulkan_target_triple
-init_use_tuned = args.use_tuned
-init_import_mlir = args.import_mlir
-
-
-# Exposed to UI.
-def inpaint_inf(
-    prompt: str,
-    negative_prompt: str,
-    image_dict,
-    height: int,
-    width: int,
-    inpaint_full_res: bool,
-    inpaint_full_res_padding: int,
-    steps: int,
-    guidance_scale: float,
-    seed: int,
-    batch_count: int,
-    batch_size: int,
-    scheduler: str,
-    custom_model: str,
-    hf_model_id: str,
-    precision: str,
-    device: str,
-    max_length: int,
-    save_metadata_to_json: bool,
-    save_metadata_to_png: bool,
-):
-    from apps.stable_diffusion.web.ui.utils import (
-        get_custom_model_pathfile,
-        Config,
-    )
-    import apps.stable_diffusion.web.utils.global_obj as global_obj
-
-    global schedulers
-
-    args.prompts = [prompt]
-    args.negative_prompts = [negative_prompt]
-    args.guidance_scale = guidance_scale
-    args.steps = steps
-    args.scheduler = scheduler
-    args.img_path = "not none"
-    args.mask_path = "not none"
-
-    # set ckpt_loc and hf_model_id.
-    types = (
-        ".ckpt",
-        ".safetensors",
-    )  # the tuple of file types
-    args.ckpt_loc = ""
-    args.hf_model_id = ""
-    if custom_model == "None":
-        if not hf_model_id:
-            return (
-                None,
-                "Please provide either custom model or huggingface model ID, both must not be empty",
-            )
-        args.hf_model_id = hf_model_id
-    elif ".ckpt" in custom_model or ".safetensors" in custom_model:
-        args.ckpt_loc = get_custom_model_pathfile(custom_model)
-    else:
-        args.hf_model_id = custom_model
-
-    args.save_metadata_to_json = save_metadata_to_json
-    args.write_metadata_to_png = save_metadata_to_png
-
-    dtype = torch.float32 if precision == "fp32" else torch.half
-    cpu_scheduling = not scheduler.startswith("Shark")
-    new_config_obj = Config(
-        "inpaint",
-        args.hf_model_id,
-        args.ckpt_loc,
-        precision,
-        batch_size,
-        max_length,
-        height,
-        width,
-        device,
-        use_lora=None,
-        use_stencil=None,
-    )
-    if (
-        not global_obj.get_sd_obj()
-        or global_obj.get_cfg_obj() != new_config_obj
-    ):
-        global_obj.clear_cache()
-        global_obj.set_cfg_obj(new_config_obj)
-        args.precision = precision
-        args.batch_size = batch_size
-        args.max_length = max_length
-        args.height = height
-        args.width = width
-        args.device = device.split("=>", 1)[1].strip()
-        args.iree_vulkan_target_triple = init_iree_vulkan_target_triple
-        args.use_tuned = init_use_tuned
-        args.import_mlir = init_import_mlir
-        set_init_device_flags()
-        model_id = (
-            args.hf_model_id
-            if args.hf_model_id
-            else "stabilityai/stable-diffusion-2-inpainting"
-        )
-        schedulers = get_schedulers(model_id)
-        scheduler_obj = schedulers[scheduler]
-        global_obj.set_sd_obj(
-            InpaintPipeline.from_pretrained(
-                scheduler=scheduler_obj,
-                import_mlir=args.import_mlir,
-                model_id=args.hf_model_id,
-                ckpt_loc=args.ckpt_loc,
-                precision=args.precision,
-                max_length=args.max_length,
-                batch_size=args.batch_size,
-                height=args.height,
-                width=args.width,
-                use_base_vae=args.use_base_vae,
-                use_tuned=args.use_tuned,
-                custom_vae=args.custom_vae,
-                low_cpu_mem_usage=args.low_cpu_mem_usage,
-                debug=args.import_debug if args.import_mlir else False,
-            )
-        )
-
-    global_obj.set_schedulers(schedulers[scheduler])
-
-    start_time = time.time()
-    global_obj.get_sd_obj().log = ""
-    generated_imgs = []
-    seeds = []
-    img_seed = utils.sanitize_seed(seed)
-    image = image_dict["image"]
-    mask_image = image_dict["mask"]
-    for i in range(batch_count):
-        if i > 0:
-            img_seed = utils.sanitize_seed(-1)
-        out_imgs = global_obj.get_sd_obj().generate_images(
-            prompt,
-            negative_prompt,
-            image,
-            mask_image,
-            batch_size,
-            height,
-            width,
-            inpaint_full_res,
-            inpaint_full_res_padding,
-            steps,
-            guidance_scale,
-            img_seed,
-            args.max_length,
-            dtype,
-            args.use_base_vae,
-            cpu_scheduling,
-        )
-        save_output_img(out_imgs[0], img_seed)
-        generated_imgs.extend(out_imgs)
-        seeds.append(img_seed)
-        global_obj.get_sd_obj().log += "\n"
-        yield generated_imgs, global_obj.get_sd_obj().log
-
-    total_time = time.time() - start_time
-    text_output = f"prompt={args.prompts}"
-    text_output += f"\nnegative prompt={args.negative_prompts}"
-    text_output += f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-    text_output += f"\nscheduler={args.scheduler}, device={device}"
-    text_output += f"\nsteps={args.steps}, guidance_scale={args.guidance_scale}, seed={seeds}"
-    text_output += f"\nsize={args.height}x{args.width}, batch-count={batch_count}, batch-size={args.batch_size}, max_length={args.max_length}"
-    text_output += global_obj.get_sd_obj().log
-    text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-    yield generated_imgs, text_output
-
-
-if __name__ == "__main__":
-    if args.clear_all:
-        clear_all()
-
-    if args.img_path is None:
-        print("Flag --img_path is required.")
-        exit()
-    if args.mask_path is None:
-        print("Flag --mask_path is required.")
-        exit()
-
-    dtype = torch.float32 if args.precision == "fp32" else torch.half
-    cpu_scheduling = not args.scheduler.startswith("Shark")
-    set_init_device_flags()
-    model_id = (
-        args.hf_model_id
-        if "inpaint" in args.hf_model_id
-        else "stabilityai/stable-diffusion-2-inpainting"
-    )
-    schedulers = get_schedulers(model_id)
-    scheduler_obj = schedulers[args.scheduler]
-    seed = args.seed
-    image = Image.open(args.img_path)
-    mask_image = Image.open(args.mask_path)
-
-    inpaint_obj = InpaintPipeline.from_pretrained(
-        scheduler=scheduler_obj,
-        import_mlir=args.import_mlir,
-        model_id=args.hf_model_id,
-        ckpt_loc=args.ckpt_loc,
-        precision=args.precision,
-        max_length=args.max_length,
-        batch_size=args.batch_size,
-        height=args.height,
-        width=args.width,
-        use_base_vae=args.use_base_vae,
-        use_tuned=args.use_tuned,
-        custom_vae=args.custom_vae,
-        low_cpu_mem_usage=args.low_cpu_mem_usage,
-        debug=args.import_debug if args.import_mlir else False,
-    )
-
-    for current_batch in range(args.batch_count):
-        if current_batch > 0:
-            seed = -1
-        seed = utils.sanitize_seed(seed)
-
-        start_time = time.time()
-        generated_imgs = inpaint_obj.generate_images(
-            args.prompts,
-            args.negative_prompts,
-            image,
-            mask_image,
-            args.batch_size,
-            args.height,
-            args.width,
-            args.inpaint_full_res,
-            args.inpaint_full_res_padding,
-            args.steps,
-            args.guidance_scale,
-            seed,
-            args.max_length,
-            dtype,
-            args.use_base_vae,
-            cpu_scheduling,
-        )
-        total_time = time.time() - start_time
-        text_output = f"prompt={args.prompts}"
-        text_output += f"\nnegative prompt={args.negative_prompts}"
-        text_output += (
-            f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-        )
-        text_output += f"\nscheduler={args.scheduler}, device={args.device}"
-        text_output += f"\nsteps={args.steps}, guidance_scale={args.guidance_scale}, seed={seed}, size={args.height}x{args.width}"
-        text_output += (
-            f", batch size={args.batch_size}, max_length={args.max_length}"
-        )
-        text_output += inpaint_obj.log
-        text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-        save_output_img(generated_imgs[0], seed)
-        print(text_output)
--- a/apps/stable_diffusion/scripts/outpaint.py
+++ b/apps/stable_diffusion/scripts/outpaint.py
@@ -1,296 +0,0 @@
-import torch
-import time
-from PIL import Image
-from apps.stable_diffusion.src import (
-    args,
-    OutpaintPipeline,
-    get_schedulers,
-    set_init_device_flags,
-    utils,
-    clear_all,
-    save_output_img,
-)
-
-
-schedulers = None
-
-# set initial values of iree_vulkan_target_triple, use_tuned and import_mlir.
-init_iree_vulkan_target_triple = args.iree_vulkan_target_triple
-init_use_tuned = args.use_tuned
-init_import_mlir = args.import_mlir
-
-
-# Exposed to UI.
-def outpaint_inf(
-    prompt: str,
-    negative_prompt: str,
-    init_image,
-    pixels: int,
-    mask_blur: int,
-    directions: list,
-    noise_q: float,
-    color_variation: float,
-    height: int,
-    width: int,
-    steps: int,
-    guidance_scale: float,
-    seed: int,
-    batch_count: int,
-    batch_size: int,
-    scheduler: str,
-    custom_model: str,
-    hf_model_id: str,
-    precision: str,
-    device: str,
-    max_length: int,
-    save_metadata_to_json: bool,
-    save_metadata_to_png: bool,
-):
-    from apps.stable_diffusion.web.ui.utils import (
-        get_custom_model_pathfile,
-        Config,
-    )
-    import apps.stable_diffusion.web.utils.global_obj as global_obj
-
-    global schedulers
-
-    args.prompts = [prompt]
-    args.negative_prompts = [negative_prompt]
-    args.guidance_scale = guidance_scale
-    args.steps = steps
-    args.scheduler = scheduler
-    args.img_path = "not none"
-
-    # set ckpt_loc and hf_model_id.
-    types = (
-        ".ckpt",
-        ".safetensors",
-    )  # the tuple of file types
-    args.ckpt_loc = ""
-    args.hf_model_id = ""
-    if custom_model == "None":
-        if not hf_model_id:
-            return (
-                None,
-                "Please provide either custom model or huggingface model ID, both must not be empty",
-            )
-        args.hf_model_id = hf_model_id
-    elif ".ckpt" in custom_model or ".safetensors" in custom_model:
-        args.ckpt_loc = get_custom_model_pathfile(custom_model)
-    else:
-        args.hf_model_id = custom_model
-
-    args.save_metadata_to_json = save_metadata_to_json
-    args.write_metadata_to_png = save_metadata_to_png
-
-    dtype = torch.float32 if precision == "fp32" else torch.half
-    cpu_scheduling = not scheduler.startswith("Shark")
-    new_config_obj = Config(
-        "outpaint",
-        args.hf_model_id,
-        args.ckpt_loc,
-        precision,
-        batch_size,
-        max_length,
-        height,
-        width,
-        device,
-        use_lora=None,
-        use_stencil=None,
-    )
-    if (
-        not global_obj.get_sd_obj()
-        or global_obj.get_cfg_obj() != new_config_obj
-    ):
-        global_obj.clear_cache()
-        global_obj.set_cfg_obj(new_config_obj)
-        args.precision = precision
-        args.batch_size = batch_size
-        args.max_length = max_length
-        args.height = height
-        args.width = width
-        args.device = device.split("=>", 1)[1].strip()
-        args.iree_vulkan_target_triple = init_iree_vulkan_target_triple
-        args.use_tuned = init_use_tuned
-        args.import_mlir = init_import_mlir
-        set_init_device_flags()
-        model_id = (
-            args.hf_model_id
-            if args.hf_model_id
-            else "stabilityai/stable-diffusion-2-inpainting"
-        )
-        schedulers = get_schedulers(model_id)
-        scheduler_obj = schedulers[scheduler]
-        global_obj.set_sd_obj(
-            OutpaintPipeline.from_pretrained(
-                scheduler_obj,
-                args.import_mlir,
-                args.hf_model_id,
-                args.ckpt_loc,
-                args.custom_vae,
-                args.precision,
-                args.max_length,
-                args.batch_size,
-                args.height,
-                args.width,
-                args.use_base_vae,
-                args.use_tuned,
-            )
-        )
-
-    global_obj.set_schedulers(schedulers[scheduler])
-
-    start_time = time.time()
-    global_obj.get_sd_obj().log = ""
-    generated_imgs = []
-    seeds = []
-    img_seed = utils.sanitize_seed(seed)
-
-    left = True if "left" in directions else False
-    right = True if "right" in directions else False
-    top = True if "up" in directions else False
-    bottom = True if "down" in directions else False
-
-    for i in range(batch_count):
-        if i > 0:
-            img_seed = utils.sanitize_seed(-1)
-        out_imgs = global_obj.get_sd_obj().generate_images(
-            prompt,
-            negative_prompt,
-            init_image,
-            pixels,
-            mask_blur,
-            left,
-            right,
-            top,
-            bottom,
-            noise_q,
-            color_variation,
-            batch_size,
-            height,
-            width,
-            steps,
-            guidance_scale,
-            img_seed,
-            args.max_length,
-            dtype,
-            args.use_base_vae,
-            cpu_scheduling,
-        )
-        save_output_img(out_imgs[0], img_seed)
-        generated_imgs.extend(out_imgs)
-        seeds.append(img_seed)
-        global_obj.get_sd_obj().log += "\n"
-        yield generated_imgs, global_obj.get_sd_obj().log
-
-    total_time = time.time() - start_time
-    text_output = f"prompt={args.prompts}"
-    text_output += f"\nnegative prompt={args.negative_prompts}"
-    text_output += f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-    text_output += f"\nscheduler={args.scheduler}, device={device}"
-    text_output += f"\nsteps={args.steps}, guidance_scale={args.guidance_scale}, seed={seeds}"
-    text_output += f"\nsize={args.height}x{args.width}, batch-count={batch_count}, batch-size={args.batch_size}, max_length={args.max_length}"
-    text_output += global_obj.get_sd_obj().log
-    text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-    yield generated_imgs, text_output
-
-
-if __name__ == "__main__":
-    if args.clear_all:
-        clear_all()
-
-    if args.img_path is None:
-        print("Flag --img_path is required.")
-        exit()
-
-    dtype = torch.float32 if args.precision == "fp32" else torch.half
-    cpu_scheduling = not args.scheduler.startswith("Shark")
-    set_init_device_flags()
-    model_id = (
-        args.hf_model_id
-        if "inpaint" in args.hf_model_id
-        else "stabilityai/stable-diffusion-2-inpainting"
-    )
-    schedulers = get_schedulers(model_id)
-    scheduler_obj = schedulers[args.scheduler]
-    seed = args.seed
-    image = Image.open(args.img_path)
-
-    outpaint_obj = OutpaintPipeline.from_pretrained(
-        scheduler_obj,
-        args.import_mlir,
-        args.hf_model_id,
-        args.ckpt_loc,
-        args.custom_vae,
-        args.precision,
-        args.max_length,
-        args.batch_size,
-        args.height,
-        args.width,
-        args.use_base_vae,
-        args.use_tuned,
-    )
-
-    for current_batch in range(args.batch_count):
-        if current_batch > 0:
-            seed = -1
-        seed = utils.sanitize_seed(seed)
-
-        start_time = time.time()
-        generated_imgs = outpaint_obj.generate_images(
-            args.prompts,
-            args.negative_prompts,
-            image,
-            args.pixels,
-            args.mask_blur,
-            args.left,
-            args.right,
-            args.top,
-            args.bottom,
-            args.noise_q,
-            args.color_variation,
-            args.batch_size,
-            args.height,
-            args.width,
-            args.steps,
-            args.guidance_scale,
-            seed,
-            args.max_length,
-            dtype,
-            args.use_base_vae,
-            cpu_scheduling,
-        )
-        total_time = time.time() - start_time
-        text_output = f"prompt={args.prompts}"
-        text_output += f"\nnegative prompt={args.negative_prompts}"
-        text_output += (
-            f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-        )
-        text_output += f"\nscheduler={args.scheduler}, device={args.device}"
-        text_output += f"\nsteps={args.steps}, guidance_scale={args.guidance_scale}, seed={seed}, size={args.height}x{args.width}"
-        text_output += (
-            f", batch size={args.batch_size}, max_length={args.max_length}"
-        )
-        text_output += outpaint_obj.log
-        text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-        # save this information as metadata of output generated image.
-        directions = []
-        if args.left:
-            directions.append("left")
-        if args.right:
-            directions.append("right")
-        if args.top:
-            directions.append("up")
-        if args.bottom:
-            directions.append("down")
-        extra_info = {
-            "PIXELS": args.pixels,
-            "MASK_BLUR": args.mask_blur,
-            "DIRECTIONS": directions,
-            "NOISE_Q": args.noise_q,
-            "COLOR_VARIATION": args.color_variation,
-        }
-        save_output_img(generated_imgs[0], seed, extra_info)
-        print(text_output)
--- a/apps/stable_diffusion/scripts/telegram_bot.py
+++ b/apps/stable_diffusion/scripts/telegram_bot.py
@@ -1,240 +0,0 @@
-import logging
-import os
-from models.stable_diffusion.main import stable_diff_inf
-from models.stable_diffusion.utils import get_available_devices
-from dotenv import load_dotenv
-from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup
-from telegram import BotCommand
-from telegram.ext import Application, ApplicationBuilder, CallbackQueryHandler
-from telegram.ext import ContextTypes, MessageHandler, CommandHandler, filters
-from io import BytesIO
-import random
-
-log = logging.getLogger("TG.Bot")
-logging.basicConfig()
-log.warning("Start")
-load_dotenv()
-os.environ["AMD_ENABLE_LLPC"] = "0"
-TG_TOKEN = os.getenv("TG_TOKEN")
-SELECTED_MODEL = "stablediffusion"
-SELECTED_SCHEDULER = "EulerAncestralDiscrete"
-STEPS = 30
-NEGATIVE_PROMPT = (
-    "Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra"
-    " limbs,Gross proportions,Missing arms,Mutated hands,Long"
-    " neck,Duplicate,Mutilated,Mutilated hands,Poorly drawn face,Deformed,Bad"
-    " anatomy,Cloned face,Malformed limbs,Missing legs,Too many"
-    " fingers,blurry, lowres, text, error, cropped, worst quality, low"
-    " quality, jpeg artifacts, out of frame, extra fingers, mutated hands,"
-    " poorly drawn hands, poorly drawn face, bad anatomy, extra limbs, cloned"
-    " face, malformed limbs, missing arms, missing legs, extra arms, extra"
-    " legs, fused fingers, too many fingers"
-)
-GUIDANCE_SCALE = 6
-available_devices = get_available_devices()
-models_list = [
-    "stablediffusion",
-    "anythingv3",
-    "analogdiffusion",
-    "openjourney",
-    "dreamlike",
-]
-sheds_list = [
-    "DDIM",
-    "PNDM",
-    "LMSDiscrete",
-    "DPMSolverMultistep",
-    "EulerDiscrete",
-    "EulerAncestralDiscrete",
-    "SharkEulerDiscrete",
-]
-
-
-def image_to_bytes(image):
-    bio = BytesIO()
-    bio.name = "image.jpeg"
-    image.save(bio, "JPEG")
-    bio.seek(0)
-    return bio
-
-
-def get_try_again_markup():
-    keyboard = [[InlineKeyboardButton("Try again", callback_data="TRYAGAIN")]]
-    reply_markup = InlineKeyboardMarkup(keyboard)
-    return reply_markup
-
-
-def generate_image(prompt):
-    seed = random.randint(1, 10000)
-    log.warning(SELECTED_MODEL)
-    log.warning(STEPS)
-    image, text = stable_diff_inf(
-        prompt=prompt,
-        negative_prompt=NEGATIVE_PROMPT,
-        steps=STEPS,
-        guidance_scale=GUIDANCE_SCALE,
-        seed=seed,
-        scheduler_key=SELECTED_SCHEDULER,
-        variant=SELECTED_MODEL,
-        device_key=available_devices[0],
-    )
-
-    return image, seed
-
-
-async def generate_and_send_photo(
-    update: Update, context: ContextTypes.DEFAULT_TYPE
-) -> None:
-    progress_msg = await update.message.reply_text(
-        "Generating image...", reply_to_message_id=update.message.message_id
-    )
-    im, seed = generate_image(prompt=update.message.text)
-    await context.bot.delete_message(
-        chat_id=progress_msg.chat_id, message_id=progress_msg.message_id
-    )
-    await context.bot.send_photo(
-        update.effective_user.id,
-        image_to_bytes(im),
-        caption=f'"{update.message.text}" (Seed: {seed})',
-        reply_markup=get_try_again_markup(),
-        reply_to_message_id=update.message.message_id,
-    )
-
-
-async def button(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
-    query = update.callback_query
-    if query.data in models_list:
-        global SELECTED_MODEL
-        SELECTED_MODEL = query.data
-        await query.answer()
-        await query.edit_message_text(text=f"Selected model: {query.data}")
-        return
-    if query.data in sheds_list:
-        global SELECTED_SCHEDULER
-        SELECTED_SCHEDULER = query.data
-        await query.answer()
-        await query.edit_message_text(text=f"Selected scheduler: {query.data}")
-        return
-    replied_message = query.message.reply_to_message
-    await query.answer()
-    progress_msg = await query.message.reply_text(
-        "Generating image...", reply_to_message_id=replied_message.message_id
-    )
-
-    if query.data == "TRYAGAIN":
-        prompt = replied_message.text
-        im, seed = generate_image(prompt)
-
-    await context.bot.delete_message(
-        chat_id=progress_msg.chat_id, message_id=progress_msg.message_id
-    )
-    await context.bot.send_photo(
-        update.effective_user.id,
-        image_to_bytes(im),
-        caption=f'"{prompt}" (Seed: {seed})',
-        reply_markup=get_try_again_markup(),
-        reply_to_message_id=replied_message.message_id,
-    )
-
-
-async def select_model_handler(update, context):
-    text = "Select model"
-    keyboard = []
-    for model in models_list:
-        keyboard.append(
-            [
-                InlineKeyboardButton(text=model, callback_data=model),
-            ]
-        )
-    markup = InlineKeyboardMarkup(keyboard)
-    await update.message.reply_text(text=text, reply_markup=markup)
-
-
-async def select_scheduler_handler(update, context):
-    text = "Select schedule"
-    keyboard = []
-    for shed in sheds_list:
-        keyboard.append(
-            [
-                InlineKeyboardButton(text=shed, callback_data=shed),
-            ]
-        )
-    markup = InlineKeyboardMarkup(keyboard)
-    await update.message.reply_text(text=text, reply_markup=markup)
-
-
-async def set_steps_handler(update, context):
-    input_mex = update.message.text
-    log.warning(input_mex)
-    try:
-        input_args = input_mex.split("/set_steps ")[1]
-        global STEPS
-        STEPS = int(input_args)
-    except Exception:
-        input_args = (
-            "Invalid parameter for command. Correct command looks like\n"
-            " /set_steps 30"
-        )
-    await update.message.reply_text(input_args)
-
-
-async def set_negative_prompt_handler(update, context):
-    input_mex = update.message.text
-    log.warning(input_mex)
-    try:
-        input_args = input_mex.split("/set_negative_prompt ")[1]
-        global NEGATIVE_PROMPT
-        NEGATIVE_PROMPT = input_args
-    except Exception:
-        input_args = (
-            "Invalid parameter for command. Correct command looks like\n"
-            " /set_negative_prompt ugly, bad art, mutated"
-        )
-    await update.message.reply_text(input_args)
-
-
-async def set_guidance_scale_handler(update, context):
-    input_mex = update.message.text
-    log.warning(input_mex)
-    try:
-        input_args = input_mex.split("/set_guidance_scale ")[1]
-        global GUIDANCE_SCALE
-        GUIDANCE_SCALE = int(input_args)
-    except Exception:
-        input_args = (
-            "Invalid parameter for command. Correct command looks like\n"
-            " /set_guidance_scale 7"
-        )
-    await update.message.reply_text(input_args)
-
-
-async def setup_bot_commands(application: Application) -> None:
-    await application.bot.set_my_commands(
-        [
-            BotCommand("select_model", "to select model"),
-            BotCommand("select_scheduler", "to select scheduler"),
-            BotCommand("set_steps", "to set steps"),
-            BotCommand("set_guidance_scale", "to set guidance scale"),
-            BotCommand("set_negative_prompt", "to set negative prompt"),
-        ]
-    )
-
-
-app = (
-    ApplicationBuilder().token(TG_TOKEN).post_init(setup_bot_commands).build()
-)
-app.add_handler(CommandHandler("select_model", select_model_handler))
-app.add_handler(CommandHandler("select_scheduler", select_scheduler_handler))
-app.add_handler(CommandHandler("set_steps", set_steps_handler))
-app.add_handler(
-    CommandHandler("set_guidance_scale", set_guidance_scale_handler)
-)
-app.add_handler(
-    CommandHandler("set_negative_prompt", set_negative_prompt_handler)
-)
-app.add_handler(
-    MessageHandler(filters.TEXT & ~filters.COMMAND, generate_and_send_photo)
-)
-app.add_handler(CallbackQueryHandler(button))
-log.warning("Start bot")
-app.run_polling()
--- a/apps/stable_diffusion/scripts/txt2img.py
+++ b/apps/stable_diffusion/scripts/txt2img.py
@@ -1,258 +0,0 @@
-import torch
-import time
-from apps.stable_diffusion.src import (
-    args,
-    Text2ImagePipeline,
-    get_schedulers,
-    set_init_device_flags,
-    utils,
-    clear_all,
-    save_output_img,
-)
-
-
-schedulers = None
-
-# set initial values of iree_vulkan_target_triple, use_tuned and import_mlir.
-init_iree_vulkan_target_triple = args.iree_vulkan_target_triple
-init_use_tuned = args.use_tuned
-init_import_mlir = args.import_mlir
-
-
-# Exposed to UI.
-def txt2img_inf(
-    prompt: str,
-    negative_prompt: str,
-    height: int,
-    width: int,
-    steps: int,
-    guidance_scale: float,
-    seed: int,
-    batch_count: int,
-    batch_size: int,
-    scheduler: str,
-    custom_model: str,
-    hf_model_id: str,
-    precision: str,
-    device: str,
-    max_length: int,
-    save_metadata_to_json: bool,
-    save_metadata_to_png: bool,
-    lora_weights: str,
-    lora_hf_id: str,
-):
-    from apps.stable_diffusion.web.ui.utils import (
-        get_custom_model_pathfile,
-        Config,
-    )
-    import apps.stable_diffusion.web.utils.global_obj as global_obj
-
-    global schedulers
-
-    args.prompts = [prompt]
-    args.negative_prompts = [negative_prompt]
-    args.guidance_scale = guidance_scale
-    args.steps = steps
-    args.scheduler = scheduler
-
-    # set ckpt_loc and hf_model_id.
-    types = (
-        ".ckpt",
-        ".safetensors",
-    )  # the tuple of file types
-    args.ckpt_loc = ""
-    args.hf_model_id = ""
-    if custom_model == "None":
-        if not hf_model_id:
-            return (
-                None,
-                "Please provide either custom model or huggingface model ID, both must not be empty",
-            )
-        args.hf_model_id = hf_model_id
-    elif ".ckpt" in custom_model or ".safetensors" in custom_model:
-        args.ckpt_loc = get_custom_model_pathfile(custom_model)
-    else:
-        args.hf_model_id = custom_model
-
-    args.save_metadata_to_json = save_metadata_to_json
-    args.write_metadata_to_png = save_metadata_to_png
-
-    use_lora = ""
-    if lora_weights == "None" and not lora_hf_id:
-        use_lora = ""
-    elif not lora_hf_id:
-        use_lora = lora_weights
-    else:
-        use_lora = lora_hf_id
-    args.use_lora = use_lora
-
-    dtype = torch.float32 if precision == "fp32" else torch.half
-    cpu_scheduling = not scheduler.startswith("Shark")
-    new_config_obj = Config(
-        "txt2img",
-        args.hf_model_id,
-        args.ckpt_loc,
-        precision,
-        batch_size,
-        max_length,
-        height,
-        width,
-        device,
-        use_lora=use_lora,
-        use_stencil=None,
-    )
-    if (
-        not global_obj.get_sd_obj()
-        or global_obj.get_cfg_obj() != new_config_obj
-    ):
-        global_obj.clear_cache()
-        global_obj.set_cfg_obj(new_config_obj)
-        args.precision = precision
-        args.batch_size = batch_size
-        args.max_length = max_length
-        args.height = height
-        args.width = width
-        args.device = device.split("=>", 1)[1].strip()
-        args.iree_vulkan_target_triple = init_iree_vulkan_target_triple
-        args.use_tuned = init_use_tuned
-        args.import_mlir = init_import_mlir
-        args.img_path = None
-        set_init_device_flags()
-        model_id = (
-            args.hf_model_id
-            if args.hf_model_id
-            else "stabilityai/stable-diffusion-2-1-base"
-        )
-        schedulers = get_schedulers(model_id)
-        scheduler_obj = schedulers[scheduler]
-        global_obj.set_sd_obj(
-            Text2ImagePipeline.from_pretrained(
-                scheduler=scheduler_obj,
-                import_mlir=args.import_mlir,
-                model_id=args.hf_model_id,
-                ckpt_loc=args.ckpt_loc,
-                precision=args.precision,
-                max_length=args.max_length,
-                batch_size=args.batch_size,
-                height=args.height,
-                width=args.width,
-                use_base_vae=args.use_base_vae,
-                use_tuned=args.use_tuned,
-                custom_vae=args.custom_vae,
-                low_cpu_mem_usage=args.low_cpu_mem_usage,
-                debug=args.import_debug if args.import_mlir else False,
-                use_lora=use_lora,
-            )
-        )
-
-    global_obj.set_schedulers(schedulers[scheduler])
-
-    start_time = time.time()
-    global_obj.get_sd_obj().log = ""
-    generated_imgs = []
-    seeds = []
-    img_seed = utils.sanitize_seed(seed)
-    for i in range(batch_count):
-        if i > 0:
-            img_seed = utils.sanitize_seed(-1)
-        out_imgs = global_obj.get_sd_obj().generate_images(
-            prompt,
-            negative_prompt,
-            batch_size,
-            height,
-            width,
-            steps,
-            guidance_scale,
-            img_seed,
-            args.max_length,
-            dtype,
-            args.use_base_vae,
-            cpu_scheduling,
-        )
-        save_output_img(out_imgs[0], img_seed)
-        generated_imgs.extend(out_imgs)
-        seeds.append(img_seed)
-        global_obj.get_sd_obj().log += "\n"
-        yield generated_imgs, global_obj.get_sd_obj().log
-
-    total_time = time.time() - start_time
-    text_output = f"prompt={args.prompts}"
-    text_output += f"\nnegative prompt={args.negative_prompts}"
-    text_output += f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-    text_output += f"\nscheduler={args.scheduler}, device={device}"
-    text_output += (
-        f"\nsteps={steps}, guidance_scale={guidance_scale}, seed={seeds}"
-    )
-    text_output += f"\nsize={height}x{width}, batch_count={batch_count}, batch_size={batch_size}, max_length={args.max_length}"
-    # text_output += txt2img_obj.log
-    text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-    yield generated_imgs, text_output
-
-
-if __name__ == "__main__":
-    if args.clear_all:
-        clear_all()
-
-    dtype = torch.float32 if args.precision == "fp32" else torch.half
-    cpu_scheduling = not args.scheduler.startswith("Shark")
-    set_init_device_flags()
-    schedulers = get_schedulers(args.hf_model_id)
-    scheduler_obj = schedulers[args.scheduler]
-    seed = args.seed
-    use_lora = args.use_lora
-    txt2img_obj = Text2ImagePipeline.from_pretrained(
-        scheduler=scheduler_obj,
-        import_mlir=args.import_mlir,
-        model_id=args.hf_model_id,
-        ckpt_loc=args.ckpt_loc,
-        precision=args.precision,
-        max_length=args.max_length,
-        batch_size=args.batch_size,
-        height=args.height,
-        width=args.width,
-        use_base_vae=args.use_base_vae,
-        use_tuned=args.use_tuned,
-        custom_vae=args.custom_vae,
-        low_cpu_mem_usage=args.low_cpu_mem_usage,
-        debug=args.import_debug if args.import_mlir else False,
-        use_lora=use_lora,
-    )
-
-    for current_batch in range(args.batch_count):
-        if current_batch > 0:
-            seed = -1
-        seed = utils.sanitize_seed(seed)
-
-        start_time = time.time()
-        generated_imgs = txt2img_obj.generate_images(
-            args.prompts,
-            args.negative_prompts,
-            args.batch_size,
-            args.height,
-            args.width,
-            args.steps,
-            args.guidance_scale,
-            seed,
-            args.max_length,
-            dtype,
-            args.use_base_vae,
-            cpu_scheduling,
-        )
-        total_time = time.time() - start_time
-        text_output = f"prompt={args.prompts}"
-        text_output += f"\nnegative prompt={args.negative_prompts}"
-        text_output += (
-            f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-        )
-        text_output += f"\nscheduler={args.scheduler}, device={args.device}"
-        text_output += f"\nsteps={args.steps}, guidance_scale={args.guidance_scale}, seed={seed}, size={args.height}x{args.width}"
-        text_output += (
-            f", batch size={args.batch_size}, max_length={args.max_length}"
-        )
-        # TODO: if using --batch_count=x txt2img_obj.log will output on each display every iteration infos from the start
-        text_output += txt2img_obj.log
-        text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-        save_output_img(generated_imgs[0], seed)
-        print(text_output)
--- a/apps/stable_diffusion/scripts/upscaler.py
+++ b/apps/stable_diffusion/scripts/upscaler.py
@@ -1,261 +0,0 @@
-import torch
-import time
-from PIL import Image
-from apps.stable_diffusion.src import (
-    args,
-    UpscalerPipeline,
-    get_schedulers,
-    set_init_device_flags,
-    utils,
-    clear_all,
-    save_output_img,
-)
-
-
-schedulers = None
-
-# set initial values of iree_vulkan_target_triple, use_tuned and import_mlir.
-init_iree_vulkan_target_triple = args.iree_vulkan_target_triple
-init_use_tuned = args.use_tuned
-init_import_mlir = args.import_mlir
-
-
-# Exposed to UI.
-def upscaler_inf(
-    prompt: str,
-    negative_prompt: str,
-    init_image,
-    height: int,
-    width: int,
-    steps: int,
-    noise_level: int,
-    guidance_scale: float,
-    seed: int,
-    batch_count: int,
-    batch_size: int,
-    scheduler: str,
-    custom_model: str,
-    hf_model_id: str,
-    precision: str,
-    device: str,
-    max_length: int,
-    save_metadata_to_json: bool,
-    save_metadata_to_png: bool,
-):
-    from apps.stable_diffusion.web.ui.utils import (
-        get_custom_model_pathfile,
-        Config,
-    )
-    import apps.stable_diffusion.web.utils.global_obj as global_obj
-
-    global schedulers
-
-    args.prompts = [prompt]
-    args.negative_prompts = [negative_prompt]
-    args.guidance_scale = guidance_scale
-    args.seed = seed
-    args.steps = steps
-    args.scheduler = scheduler
-    args.height = height
-    args.width = width
-
-    if init_image is None:
-        return None, "An Initial Image is required"
-    image = init_image.convert("RGB").resize((args.height, args.width))
-
-    # set ckpt_loc and hf_model_id.
-    types = (
-        ".ckpt",
-        ".safetensors",
-    )  # the tuple of file types
-    args.ckpt_loc = ""
-    args.hf_model_id = ""
-    if custom_model == "None":
-        if not hf_model_id:
-            return (
-                None,
-                "Please provide either custom model or huggingface model ID, both must not be empty",
-            )
-        args.hf_model_id = hf_model_id
-    elif ".ckpt" in custom_model or ".safetensors" in custom_model:
-        args.ckpt_loc = get_custom_model_pathfile(custom_model)
-    else:
-        args.hf_model_id = custom_model
-
-    args.save_metadata_to_json = save_metadata_to_json
-    args.write_metadata_to_png = save_metadata_to_png
-
-    dtype = torch.float32 if precision == "fp32" else torch.half
-    cpu_scheduling = not scheduler.startswith("Shark")
-    new_config_obj = Config(
-        "upscaler",
-        args.hf_model_id,
-        args.ckpt_loc,
-        precision,
-        batch_size,
-        max_length,
-        height,
-        width,
-        device,
-        use_lora=None,
-        use_stencil=None,
-    )
-    if (
-        not global_obj.get_sd_obj()
-        or global_obj.get_cfg_obj() != new_config_obj
-    ):
-        global_obj.clear_cache()
-        global_obj.set_cfg_obj(new_config_obj)
-        args.batch_size = batch_size
-        args.max_length = max_length
-        args.device = device.split("=>", 1)[1].strip()
-        args.iree_vulkan_target_triple = init_iree_vulkan_target_triple
-        args.use_tuned = init_use_tuned
-        args.import_mlir = init_import_mlir
-        set_init_device_flags()
-        model_id = (
-            args.hf_model_id
-            if args.hf_model_id
-            else "stabilityai/stable-diffusion-2-1-base"
-        )
-        schedulers = get_schedulers(model_id)
-        scheduler_obj = schedulers[scheduler]
-        global_obj.set_sd_obj(
-            UpscalerPipeline.from_pretrained(
-                scheduler_obj,
-                args.import_mlir,
-                args.hf_model_id,
-                args.ckpt_loc,
-                args.custom_vae,
-                args.precision,
-                args.max_length,
-                args.batch_size,
-                args.height,
-                args.width,
-                args.use_base_vae,
-                args.use_tuned,
-                low_cpu_mem_usage=args.low_cpu_mem_usage,
-            )
-        )
-
-    global_obj.set_schedulers(schedulers[scheduler])
-    global_obj.get_sd_obj().low_res_scheduler = schedulers["DDPM"]
-
-    start_time = time.time()
-    global_obj.get_sd_obj().log = ""
-    generated_imgs = []
-    seeds = []
-    img_seed = utils.sanitize_seed(seed)
-    extra_info = {"NOISE LEVEL": noise_level}
-    for current_batch in range(batch_count):
-        if current_batch > 0:
-            img_seed = utils.sanitize_seed(-1)
-        out_imgs = global_obj.get_sd_obj().generate_images(
-            prompt,
-            negative_prompt,
-            image,
-            batch_size,
-            height,
-            width,
-            steps,
-            noise_level,
-            guidance_scale,
-            img_seed,
-            args.max_length,
-            dtype,
-            args.use_base_vae,
-            cpu_scheduling,
-        )
-        save_output_img(out_imgs[0], img_seed, extra_info)
-        generated_imgs.extend(out_imgs)
-        seeds.append(img_seed)
-        global_obj.get_sd_obj().log += "\n"
-        yield generated_imgs, global_obj.get_sd_obj().log
-
-    total_time = time.time() - start_time
-    text_output = f"prompt={args.prompts}"
-    text_output += f"\nnegative prompt={args.negative_prompts}"
-    text_output += f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-    text_output += f"\nscheduler={args.scheduler}, device={device}"
-    text_output += f"\nsteps={steps}, noise_level={noise_level}, guidance_scale={guidance_scale}, seed={seeds}"
-    text_output += f"\nsize={height}x{width}, batch_count={batch_count}, batch_size={batch_size}, max_length={args.max_length}"
-    text_output += global_obj.get_sd_obj().log
-    text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-    yield generated_imgs, text_output
-
-
-if __name__ == "__main__":
-    if args.clear_all:
-        clear_all()
-
-    if args.img_path is None:
-        print("Flag --img_path is required.")
-        exit()
-
-    # When the models get uploaded, it should be default to False.
-    args.import_mlir = True
-
-    cpu_scheduling = not args.scheduler.startswith("Shark")
-    dtype = torch.float32 if args.precision == "fp32" else torch.half
-    set_init_device_flags()
-    schedulers = get_schedulers(args.hf_model_id)
-
-    scheduler_obj = schedulers[args.scheduler]
-    image = (
-        Image.open(args.img_path)
-        .convert("RGB")
-        .resize((args.height, args.width))
-    )
-    seed = utils.sanitize_seed(args.seed)
-    # Adjust for height and width based on model
-
-    upscaler_obj = UpscalerPipeline.from_pretrained(
-        scheduler_obj,
-        args.import_mlir,
-        args.hf_model_id,
-        args.ckpt_loc,
-        args.custom_vae,
-        args.precision,
-        args.max_length,
-        args.batch_size,
-        args.height,
-        args.width,
-        args.use_base_vae,
-        args.use_tuned,
-        low_cpu_mem_usage=args.low_cpu_mem_usage,
-        ddpm_scheduler=schedulers["DDPM"],
-    )
-
-    start_time = time.time()
-    generated_imgs = upscaler_obj.generate_images(
-        args.prompts,
-        args.negative_prompts,
-        image,
-        args.batch_size,
-        args.height,
-        args.width,
-        args.steps,
-        args.noise_level,
-        args.guidance_scale,
-        seed,
-        args.max_length,
-        dtype,
-        args.use_base_vae,
-        cpu_scheduling,
-    )
-    total_time = time.time() - start_time
-    text_output = f"prompt={args.prompts}"
-    text_output += f"\nnegative prompt={args.negative_prompts}"
-    text_output += f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-    text_output += f"\nscheduler={args.scheduler}, device={args.device}"
-    text_output += f"\nsteps={args.steps}, noise_level={args.noise_level}, guidance_scale={args.guidance_scale}, seed={seed}, size={args.height}x{args.width}"
-    text_output += (
-        f", batch size={args.batch_size}, max_length={args.max_length}"
-    )
-    text_output += upscaler_obj.log
-    text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-    extra_info = {"NOISE LEVEL": args.noise_level}
-    save_output_img(generated_imgs[0], seed, extra_info)
-    print(text_output)
--- a/apps/stable_diffusion/shark_sd.spec
+++ b/apps/stable_diffusion/shark_sd.spec
@@ -1,83 +0,0 @@
-# -*- mode: python ; coding: utf-8 -*-
-from PyInstaller.utils.hooks import collect_data_files
-from PyInstaller.utils.hooks import copy_metadata
-from PyInstaller.utils.hooks import collect_submodules
-
-import sys ; sys.setrecursionlimit(sys.getrecursionlimit() * 5)
-
-datas = []
-datas += collect_data_files('torch')
-datas += copy_metadata('torch')
-datas += copy_metadata('tqdm')
-datas += copy_metadata('regex')
-datas += copy_metadata('requests')
-datas += copy_metadata('packaging')
-datas += copy_metadata('filelock')
-datas += copy_metadata('numpy')
-datas += copy_metadata('tokenizers')
-datas += copy_metadata('importlib_metadata')
-datas += copy_metadata('torch-mlir')
-datas += copy_metadata('omegaconf')
-datas += copy_metadata('safetensors')
-datas += collect_data_files('diffusers')
-datas += collect_data_files('transformers')
-datas += collect_data_files('opencv-python')
-datas += collect_data_files('skimage')
-datas += collect_data_files('gradio')
-datas += collect_data_files('iree')
-datas += collect_data_files('google-cloud-storage')
-datas += collect_data_files('shark')
-datas += [
-         ( 'src/utils/resources/prompts.json', 'resources' ),
-         ( 'src/utils/resources/model_db.json', 'resources' ),
-         ( 'src/utils/resources/opt_flags.json', 'resources' ),
-         ( 'src/utils/resources/base_model.json', 'resources' ),
-         ( 'web/ui/css/*', 'ui/css' ),
-         ( 'web/ui/logos/*', 'logos' )
-         ]
-
-binaries = []
-
-block_cipher = None
-
-hiddenimports = ['shark', 'shark.shark_inference', 'apps']
-hiddenimports += [x for x in collect_submodules("skimage") if "tests" not in x]
-
-a = Analysis(
-    ['web/index.py'],
-    pathex=['.'],
-    binaries=binaries,
-    datas=datas,
-    hiddenimports=hiddenimports,
-    hookspath=[],
-    hooksconfig={},
-    runtime_hooks=[],
-    excludes=[],
-    win_no_prefer_redirects=False,
-    win_private_assemblies=False,
-    cipher=block_cipher,
-    noarchive=False,
-)
-pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
-
-exe = EXE(
-    pyz,
-    a.scripts,
-    a.binaries,
-    a.zipfiles,
-    a.datas,
-    [],
-    name='shark_sd',
-    debug=False,
-    bootloader_ignore_signals=False,
-    strip=False,
-    upx=True,
-    upx_exclude=[],
-    runtime_tmpdir=None,
-    console=True,
-    disable_windowed_traceback=False,
-    argv_emulation=False,
-    target_arch=None,
-    codesign_identity=None,
-    entitlements_file=None,
-)
--- a/apps/stable_diffusion/shark_sd_cli.spec
+++ b/apps/stable_diffusion/shark_sd_cli.spec
@@ -1,81 +0,0 @@
-# -*- mode: python ; coding: utf-8 -*-
-from PyInstaller.utils.hooks import collect_data_files
-from PyInstaller.utils.hooks import collect_submodules
-from PyInstaller.utils.hooks import copy_metadata
-
-import sys ; sys.setrecursionlimit(sys.getrecursionlimit() * 5)
-
-datas = []
-datas += collect_data_files('torch')
-datas += copy_metadata('torch')
-datas += copy_metadata('tqdm')
-datas += copy_metadata('regex')
-datas += copy_metadata('requests')
-datas += copy_metadata('packaging')
-datas += copy_metadata('filelock')
-datas += copy_metadata('numpy')
-datas += copy_metadata('tokenizers')
-datas += copy_metadata('importlib_metadata')
-datas += copy_metadata('torch-mlir')
-datas += copy_metadata('omegaconf')
-datas += copy_metadata('safetensors')
-datas += collect_data_files('diffusers')
-datas += collect_data_files('transformers')
-datas += collect_data_files('opencv-python')
-datas += collect_data_files('skimage')
-datas += collect_data_files('gradio')
-datas += collect_data_files('iree')
-datas += collect_data_files('google-cloud-storage')
-datas += collect_data_files('shark')
-datas += [
-         ( 'src/utils/resources/prompts.json', 'resources' ),
-         ( 'src/utils/resources/model_db.json', 'resources' ),
-         ( 'src/utils/resources/opt_flags.json', 'resources' ),
-         ( 'src/utils/resources/base_model.json', 'resources' ),
-         ]
-
-binaries = []
-
-block_cipher = None
-
-hiddenimports = ['shark', 'shark.shark_inference', 'apps']
-hiddenimports += [x for x in collect_submodules("skimage") if "tests" not in x]
-
-a = Analysis(
-    ['scripts/txt2img.py'],
-    pathex=['.'],
-    binaries=binaries,
-    datas=datas,
-    hiddenimports=hiddenimports,
-    hookspath=[],
-    hooksconfig={},
-    runtime_hooks=[],
-    excludes=[],
-    win_no_prefer_redirects=False,
-    win_private_assemblies=False,
-    cipher=block_cipher,
-    noarchive=False,
-)
-pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
-
-exe = EXE(
-    pyz,
-    a.scripts,
-    a.binaries,
-    a.zipfiles,
-    a.datas,
-    [],
-    name='shark_sd_cli',
-    debug=False,
-    bootloader_ignore_signals=False,
-    strip=False,
-    upx=True,
-    upx_exclude=[],
-    runtime_tmpdir=None,
-    console=True,
-    disable_windowed_traceback=False,
-    argv_emulation=False,
-    target_arch=None,
-    codesign_identity=None,
-    entitlements_file=None,
-)
--- a/apps/stable_diffusion/src/init.py
+++ b/apps/stable_diffusion/src/init.py
@@ -1,17 +0,0 @@
-from apps.stable_diffusion.src.utils import (
-    args,
-    set_init_device_flags,
-    prompt_examples,
-    get_available_devices,
-    clear_all,
-    save_output_img,
-)
-from apps.stable_diffusion.src.pipelines import (
-    Text2ImagePipeline,
-    Image2ImagePipeline,
-    InpaintPipeline,
-    OutpaintPipeline,
-    StencilPipeline,
-    UpscalerPipeline,
-)
-from apps.stable_diffusion.src.schedulers import get_schedulers
--- a/apps/stable_diffusion/src/models/init.py
+++ b/apps/stable_diffusion/src/models/init.py
@@ -1,12 +0,0 @@
-from apps.stable_diffusion.src.models.model_wrappers import (
-    SharkifyStableDiffusionModel,
-)
-from apps.stable_diffusion.src.models.opt_params import (
-    get_vae_encode,
-    get_vae,
-    get_unet,
-    get_clip,
-    get_tokenizer,
-    get_params,
-    get_variant_version,
-)
--- a/apps/stable_diffusion/src/models/model_wrappers.py
+++ b/apps/stable_diffusion/src/models/model_wrappers.py
@@ -1,665 +0,0 @@
-from diffusers import AutoencoderKL, UNet2DConditionModel, ControlNetModel
-from transformers import CLIPTextModel
-from collections import defaultdict
-import torch
-import safetensors.torch
-import traceback
-import sys
-import os
-from apps.stable_diffusion.src.utils import (
-    compile_through_fx,
-    get_opt_flags,
-    base_models,
-    args,
-    fetch_or_delete_vmfbs,
-    preprocessCKPT,
-    get_path_to_diffusers_checkpoint,
-    fetch_and_update_base_model_id,
-    get_path_stem,
-    get_extended_name,
-    get_stencil_model_id,
-)
-
-
-# These shapes are parameter dependent.
-def replace_shape_str(shape, max_len, width, height, batch_size):
-    new_shape = []
-    for i in range(len(shape)):
-        if shape[i] == "max_len":
-            new_shape.append(max_len)
-        elif shape[i] == "height":
-            new_shape.append(height)
-        elif shape[i] == "width":
-            new_shape.append(width)
-        elif isinstance(shape[i], str):
-            if "*" in shape[i]:
-                mul_val = int(shape[i].split("*")[0])
-                if "batch_size" in shape[i]:
-                    new_shape.append(batch_size * mul_val)
-                elif "height" in shape[i]:
-                    new_shape.append(height * mul_val)
-                elif "width" in shape[i]:
-                    new_shape.append(width * mul_val)
-            elif "/" in shape[i]:
-                import math
-                div_val = int(shape[i].split("/")[1])
-                if "batch_size" in shape[i]:
-                    new_shape.append(math.ceil(batch_size / div_val))
-                elif "height" in shape[i]:
-                    new_shape.append(math.ceil(height / div_val))
-                elif "width" in shape[i]:
-                    new_shape.append(math.ceil(width / div_val))
-        else:
-            new_shape.append(shape[i])
-    return new_shape
-
-
-# Get the input info for various models i.e. "unet", "clip", "vae", "vae_encode".
-def get_input_info(model_info, max_len, width, height, batch_size):
-    dtype_config = {"f32": torch.float32, "i64": torch.int64}
-    input_map = defaultdict(list)
-    for k in model_info:
-        for inp in model_info[k]:
-            shape = model_info[k][inp]["shape"]
-            dtype = dtype_config[model_info[k][inp]["dtype"]]
-            tensor = None
-            if isinstance(shape, list):
-                clean_shape = replace_shape_str(
-                    shape, max_len, width, height, batch_size
-                )
-                if dtype == torch.int64:
-                    tensor = torch.randint(1, 3, tuple(clean_shape))
-                else:
-                    tensor = torch.randn(*clean_shape).to(dtype)
-            elif isinstance(shape, int):
-                tensor = torch.tensor(shape).to(dtype)
-            else:
-                sys.exit("shape isn't specified correctly.")
-            input_map[k].append(tensor)
-    return input_map
-
-
-class SharkifyStableDiffusionModel:
-    def __init__(
-        self,
-        model_id: str,
-        custom_weights: str,
-        custom_vae: str,
-        precision: str,
-        max_len: int = 64,
-        width: int = 512,
-        height: int = 512,
-        batch_size: int = 1,
-        use_base_vae: bool = False,
-        use_tuned: bool = False,
-        low_cpu_mem_usage: bool = False,
-        debug: bool = False,
-        sharktank_dir: str = "",
-        generate_vmfb: bool = True,
-        is_inpaint: bool = False,
-        is_upscaler: bool = False,
-        use_stencil: str = None,
-        use_lora: str = ""
-    ):
-        self.check_params(max_len, width, height)
-        self.max_len = max_len
-        self.height = height // 8
-        self.width = width // 8
-        self.batch_size = batch_size
-        self.custom_weights = custom_weights
-        if custom_weights != "":
-            assert custom_weights.lower().endswith(
-                (".ckpt", ".safetensors")
-            ), "checkpoint files supported can be any of [.ckpt, .safetensors] type"
-            custom_weights = get_path_to_diffusers_checkpoint(custom_weights)
-        self.model_id = model_id if custom_weights == "" else custom_weights
-        # TODO: remove the following line when stable-diffusion-2-1 works
-        if self.model_id == "stabilityai/stable-diffusion-2-1":
-            self.model_id = "stabilityai/stable-diffusion-2-1-base"
-        self.custom_vae = custom_vae
-        self.precision = precision
-        self.base_vae = use_base_vae
-        self.model_name = (
-            "_"
-            + str(batch_size)
-            + "_"
-            + str(max_len)
-            + "_"
-            + str(height)
-            + "_"
-            + str(width)
-            + "_"
-            + precision
-        )
-        print(f'use_tuned? sharkify: {use_tuned}')
-        self.use_tuned = use_tuned
-        if use_tuned:
-            self.model_name = self.model_name + "_tuned"
-        self.model_name = self.model_name + "_" + get_path_stem(self.model_id)
-        self.low_cpu_mem_usage = low_cpu_mem_usage
-        self.is_inpaint = is_inpaint
-        self.is_upscaler = is_upscaler
-        self.use_stencil = get_stencil_model_id(use_stencil)
-        if use_lora != "":
-            self.model_name = self.model_name + "_" + get_path_stem(use_lora)
-        self.use_lora = use_lora
-
-        print(self.model_name)
-        self.debug = debug
-        self.sharktank_dir = sharktank_dir
-        self.generate_vmfb = generate_vmfb
-
-    def get_extended_name_for_all_model(self, mask_to_fetch):
-        model_name = {}
-        sub_model_list = ["clip", "unet", "stencil_unet", "vae", "vae_encode", "stencil_adaptor"]
-        index = 0
-        for model in sub_model_list:
-            if mask_to_fetch[index] == False:
-                index += 1
-                continue
-            sub_model = model
-            model_config = self.model_name
-            if "vae" == model:
-                if self.custom_vae != "":
-                    model_config = model_config + get_path_stem(self.custom_vae)
-                if self.base_vae:
-                    sub_model = "base_vae"
-            model_name[model] = get_extended_name(sub_model + model_config)
-            index += 1
-        return model_name
-
-    def check_params(self, max_len, width, height):
-        if not (max_len >= 32 and max_len <= 77):
-            sys.exit("please specify max_len in the range [32, 77].")
-        if not (width % 8 == 0 and width >= 128):
-            sys.exit("width should be greater than 128 and multiple of 8")
-        if not (height % 8 == 0 and height >= 128):
-            sys.exit("height should be greater than 128 and multiple of 8")
-
-    def get_vae_encode(self):
-        class VaeEncodeModel(torch.nn.Module):
-            def __init__(self, model_id=self.model_id, low_cpu_mem_usage=False):
-                super().__init__()
-                self.vae = AutoencoderKL.from_pretrained(
-                    model_id,
-                    subfolder="vae",
-                    low_cpu_mem_usage=low_cpu_mem_usage,
-                )
-
-            def forward(self, input):
-                latents = self.vae.encode(input).latent_dist.sample()
-                return 0.18215 * latents
-
-        vae_encode = VaeEncodeModel()
-        inputs = tuple(self.inputs["vae_encode"])
-        is_f16 = True if self.precision == "fp16" else False
-        shark_vae_encode = compile_through_fx(
-            vae_encode,
-            inputs,
-            is_f16=is_f16,
-            use_tuned=self.use_tuned,
-            model_name=self.model_name["vae_encode"],
-            extra_args=get_opt_flags("vae", precision=self.precision),
-        )
-        return shark_vae_encode
-
-    def get_vae(self):
-        class VaeModel(torch.nn.Module):
-            def __init__(self, model_id=self.model_id, base_vae=self.base_vae, custom_vae=self.custom_vae, low_cpu_mem_usage=False):
-                super().__init__()
-                self.vae = None
-                if custom_vae == "":
-                    self.vae = AutoencoderKL.from_pretrained(
-                        model_id,
-                        subfolder="vae",
-                        low_cpu_mem_usage=low_cpu_mem_usage,
-                    )
-                elif not isinstance(custom_vae, dict):
-                    self.vae = AutoencoderKL.from_pretrained(
-                        custom_vae,
-                        subfolder="vae",
-                        low_cpu_mem_usage=low_cpu_mem_usage,
-                    )
-                else:
-                    self.vae = AutoencoderKL.from_pretrained(
-                        model_id,
-                        subfolder="vae",
-                        low_cpu_mem_usage=low_cpu_mem_usage,
-                    )
-                    self.vae.load_state_dict(custom_vae)
-                self.base_vae = base_vae
-
-            def forward(self, input):
-                if not self.base_vae:
-                    input = 1 / 0.18215 * input
-                x = self.vae.decode(input, return_dict=False)[0]
-                x = (x / 2 + 0.5).clamp(0, 1)
-                if self.base_vae:
-                    return x
-                x = x * 255.0
-                return x.round()
-
-        vae = VaeModel(low_cpu_mem_usage=self.low_cpu_mem_usage)
-        inputs = tuple(self.inputs["vae"])
-        is_f16 = True if self.precision == "fp16" else False
-        save_dir = os.path.join(self.sharktank_dir, self.model_name["vae"])
-        if self.debug:
-            os.makedirs(save_dir, exist_ok=True)
-        shark_vae = compile_through_fx(
-            vae,
-            inputs,
-            is_f16=is_f16,
-            use_tuned=self.use_tuned,
-            model_name=self.model_name["vae"],
-            debug=self.debug,
-            generate_vmfb=self.generate_vmfb,
-            save_dir=save_dir,
-            extra_args=get_opt_flags("vae", precision=self.precision),
-        )
-        return shark_vae
-
-    def get_vae_upscaler(self):
-        class VaeModel(torch.nn.Module):
-            def __init__(self, model_id=self.model_id, low_cpu_mem_usage=False):
-                super().__init__()
-                self.vae = AutoencoderKL.from_pretrained(
-                    model_id,
-                    subfolder="vae",
-                    low_cpu_mem_usage=low_cpu_mem_usage,
-                )
-
-            def forward(self, input):
-                x = self.vae.decode(input, return_dict=False)[0]
-                x = (x / 2 + 0.5).clamp(0, 1)
-                return x
-
-        vae = VaeModel(low_cpu_mem_usage=self.low_cpu_mem_usage)
-        inputs = tuple(self.inputs["vae"])
-        shark_vae = compile_through_fx(
-            vae,
-            inputs,
-            use_tuned=self.use_tuned,
-            model_name=self.model_name["vae"],
-            extra_args=get_opt_flags("vae", precision="fp32"),
-        )
-        return shark_vae
-
-    def get_controlled_unet(self):
-        class ControlledUnetModel(torch.nn.Module):
-            def __init__(
-                self, model_id=self.model_id, low_cpu_mem_usage=False
-            ):
-                super().__init__()
-                self.unet = UNet2DConditionModel.from_pretrained(
-                    model_id,
-                    subfolder="unet",
-                    low_cpu_mem_usage=low_cpu_mem_usage,
-                )
-                self.in_channels = self.unet.in_channels
-                self.train(False)
-
-            def forward( self, latent, timestep, text_embedding, guidance_scale, control1,
-                         control2, control3, control4, control5, control6, control7,
-                         control8, control9, control10, control11, control12, control13,
-            ):
-                # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes.
-                db_res_samples = tuple([ control1, control2, control3, control4, control5, control6, control7, control8, control9, control10, control11, control12,])
-                mb_res_samples = control13
-                latents = torch.cat([latent] * 2)
-                unet_out = self.unet.forward(
-                    latents,
-                    timestep,
-                    encoder_hidden_states=text_embedding,
-                    down_block_additional_residuals=db_res_samples,
-                    mid_block_additional_residual=mb_res_samples,
-                    return_dict=False,
-                )[0]
-                noise_pred_uncond, noise_pred_text = unet_out.chunk(2)
-                noise_pred = noise_pred_uncond + guidance_scale * (
-                    noise_pred_text - noise_pred_uncond
-                )
-                return noise_pred
-
-        unet = ControlledUnetModel(low_cpu_mem_usage=self.low_cpu_mem_usage)
-        is_f16 = True if self.precision == "fp16" else False
-
-        inputs = tuple(self.inputs["stencil_unet"])
-        input_mask = [True, True, True, False, True, True, True, True, True, True, True, True, True, True, True, True, True,]
-        shark_controlled_unet = compile_through_fx(
-            unet,
-            inputs,
-            model_name=self.model_name["stencil_unet"],
-            is_f16=is_f16,
-            f16_input_mask=input_mask,
-            use_tuned=self.use_tuned,
-            extra_args=get_opt_flags("unet", precision=self.precision),
-        )
-        return shark_controlled_unet
-
-    def get_control_net(self):
-        class StencilControlNetModel(torch.nn.Module):
-            def __init__(
-                self, model_id=self.use_stencil, low_cpu_mem_usage=False
-            ):
-                super().__init__()
-                self.cnet = ControlNetModel.from_pretrained(
-                    model_id,
-                    low_cpu_mem_usage=low_cpu_mem_usage,
-                )
-                self.in_channels = self.cnet.in_channels
-                self.train(False)
-
-            def forward(
-                self,
-                latent,
-                timestep,
-                text_embedding,
-                stencil_image_input,
-            ):
-                # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes.
-                # TODO: guidance NOT NEEDED change in `get_input_info` later
-                latents = torch.cat(
-                    [latent] * 2
-                )  # needs to be same as controlledUNET latents
-                stencil_image = torch.cat(
-                    [stencil_image_input] * 2
-                )  # needs to be same as controlledUNET latents
-                down_block_res_samples, mid_block_res_sample = self.cnet.forward(
-                    latents,
-                    timestep,
-                    encoder_hidden_states=text_embedding,
-                    controlnet_cond=stencil_image,
-                    return_dict=False,
-                )
-                return tuple(list(down_block_res_samples) + [mid_block_res_sample])
-
-        scnet = StencilControlNetModel(low_cpu_mem_usage=self.low_cpu_mem_usage)
-        is_f16 = True if self.precision == "fp16" else False
-
-        inputs = tuple(self.inputs["stencil_adaptor"])
-        input_mask = [True, True, True, True]
-        shark_cnet = compile_through_fx(
-            scnet,
-            inputs,
-            model_name=self.model_name["stencil_adaptor"],
-            is_f16=is_f16,
-            f16_input_mask=input_mask,
-            use_tuned=self.use_tuned,
-            extra_args=get_opt_flags("unet", precision=self.precision),
-        )
-        return shark_cnet
-
-    def get_unet(self):
-        class UnetModel(torch.nn.Module):
-            def __init__(self, model_id=self.model_id, low_cpu_mem_usage=False, use_lora=self.use_lora):
-                super().__init__()
-                self.unet = UNet2DConditionModel.from_pretrained(
-                    model_id,
-                    subfolder="unet",
-                    low_cpu_mem_usage=low_cpu_mem_usage,
-                )
-                if use_lora != "":
-                    self.unet.load_attn_procs(use_lora)
-                self.in_channels = self.unet.in_channels
-                self.train(False)
-                if(args.attention_slicing is not None and args.attention_slicing != "none"):
-                    if(args.attention_slicing.isdigit()):
-                        self.unet.set_attention_slice(int(args.attention_slicing))
-                    else:
-                        self.unet.set_attention_slice(args.attention_slicing)
-
-            # TODO: Instead of flattening the `control` try to use the list.
-            def forward(
-                self, latent, timestep, text_embedding, guidance_scale,
-            ):
-                # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes.
-                latents = torch.cat([latent] * 2)
-                unet_out = self.unet.forward(
-                    latents, timestep, text_embedding, return_dict=False
-                )[0]
-                noise_pred_uncond, noise_pred_text = unet_out.chunk(2)
-                noise_pred = noise_pred_uncond + guidance_scale * (
-                    noise_pred_text - noise_pred_uncond
-                )
-                return noise_pred
-
-        unet = UnetModel(low_cpu_mem_usage=self.low_cpu_mem_usage)
-        is_f16 = True if self.precision == "fp16" else False
-        inputs = tuple(self.inputs["unet"])
-        input_mask = [True, True, True, False]
-        save_dir = os.path.join(self.sharktank_dir, self.model_name["unet"])
-        if self.debug:
-            os.makedirs(
-                save_dir,
-                exist_ok=True,
-            )
-        shark_unet = compile_through_fx(
-            unet,
-            inputs,
-            model_name=self.model_name["unet"],
-            is_f16=is_f16,
-            f16_input_mask=input_mask,
-            use_tuned=self.use_tuned,
-            debug=self.debug,
-            generate_vmfb=self.generate_vmfb,
-            save_dir=save_dir,
-            extra_args=get_opt_flags("unet", precision=self.precision),
-        )
-        return shark_unet
-
-    def get_unet_upscaler(self):
-        class UnetModel(torch.nn.Module):
-            def __init__(self, model_id=self.model_id, low_cpu_mem_usage=False):
-                super().__init__()
-                self.unet = UNet2DConditionModel.from_pretrained(
-                    model_id,
-                    subfolder="unet",
-                    low_cpu_mem_usage=low_cpu_mem_usage,
-                )
-                self.in_channels = self.unet.in_channels
-                self.train(False)
-
-            def forward(self, latent, timestep, text_embedding, noise_level):
-                unet_out = self.unet.forward(
-                    latent,
-                    timestep,
-                    text_embedding,
-                    noise_level,
-                    return_dict=False,
-                )[0]
-                return unet_out
-
-        unet = UnetModel(low_cpu_mem_usage=self.low_cpu_mem_usage)
-        is_f16 = True if self.precision == "fp16" else False
-        inputs = tuple(self.inputs["unet"])
-        input_mask = [True, True, True, False]
-        shark_unet = compile_through_fx(
-            unet,
-            inputs,
-            model_name=self.model_name["unet"],
-            is_f16=is_f16,
-            f16_input_mask=input_mask,
-            use_tuned=self.use_tuned,
-            extra_args=get_opt_flags("unet", precision=self.precision),
-        )
-        return shark_unet
-
-    def get_clip(self):
-        class CLIPText(torch.nn.Module):
-            def __init__(self, model_id=self.model_id, low_cpu_mem_usage=False):
-                super().__init__()
-                self.text_encoder = CLIPTextModel.from_pretrained(
-                    model_id,
-                    subfolder="text_encoder",
-                    low_cpu_mem_usage=low_cpu_mem_usage,
-                )
-
-            def forward(self, input):
-                return self.text_encoder(input)[0]
-
-        clip_model = CLIPText(low_cpu_mem_usage=self.low_cpu_mem_usage)
-        save_dir = os.path.join(self.sharktank_dir, self.model_name["clip"])
-        if self.debug:
-            os.makedirs(
-                save_dir,
-                exist_ok=True,
-            )
-        shark_clip = compile_through_fx(
-            clip_model,
-            tuple(self.inputs["clip"]),
-            model_name=self.model_name["clip"],
-            debug=self.debug,
-            generate_vmfb=self.generate_vmfb,
-            save_dir=save_dir,
-            extra_args=get_opt_flags("clip", precision="fp32"),
-        )
-        return shark_clip
-
-    def process_custom_vae(self):
-        custom_vae = self.custom_vae.lower()
-        if not custom_vae.endswith((".ckpt", ".safetensors")):
-            return self.custom_vae
-        try:
-            preprocessCKPT(self.custom_vae)
-            return get_path_to_diffusers_checkpoint(self.custom_vae)
-        except:
-            print("Processing standalone Vae checkpoint")
-            vae_checkpoint = None
-            vae_ignore_keys = {"model_ema.decay", "model_ema.num_updates"}
-            if custom_vae.endswith(".ckpt"):
-                vae_checkpoint = torch.load(self.custom_vae, map_location="cpu")
-            else:
-                vae_checkpoint = safetensors.torch.load_file(self.custom_vae, device="cpu")
-            if "state_dict" in vae_checkpoint:
-                vae_checkpoint = vae_checkpoint["state_dict"]
-            vae_dict = {k: v for k, v in vae_checkpoint.items() if k[0:4] != "loss" and k not in vae_ignore_keys}
-            return vae_dict
-        
-            
-    # Compiles Clip, Unet and Vae with `base_model_id` as defining their input
-    # configiration.
-    def compile_all(self, base_model_id, need_vae_encode, need_stencil):
-        self.inputs = get_input_info(
-            base_models[base_model_id],
-            self.max_len,
-            self.width,
-            self.height,
-            self.batch_size,
-        )
-        if self.is_upscaler:
-            return self.get_clip(), self.get_unet_upscaler(), self.get_vae_upscaler()
-
-        compiled_controlnet = None
-        compiled_controlled_unet = None
-        compiled_unet = None
-        if need_stencil:
-            compiled_controlnet = self.get_control_net()
-            compiled_controlled_unet = self.get_controlled_unet()
-        else:
-            compiled_unet = self.get_unet()
-        if self.custom_vae != "":
-            print("Plugging in custom Vae")
-        compiled_vae = self.get_vae()
-        compiled_clip = self.get_clip()
-
-        if need_stencil:
-            return compiled_clip, compiled_controlled_unet, compiled_vae, compiled_controlnet
-        if need_vae_encode:
-            compiled_vae_encode = self.get_vae_encode()
-            return compiled_clip, compiled_unet, compiled_vae, compiled_vae_encode
-
-        return compiled_clip, compiled_unet, compiled_vae
-
-    def __call__(self):
-        # Step 1:
-        # --  Fetch all vmfbs for the model, if present, else delete the lot.
-        need_vae_encode, need_stencil = False, False
-        if not self.is_upscaler and args.img_path is not None:
-            if self.use_stencil is not None:
-                need_stencil = True
-            else:
-                need_vae_encode = True
-        # `mask_to_fetch` prepares a mask to pick a combination out of :-
-        # ["clip", "unet", "stencil_unet", "vae", "vae_encode", "stencil_adaptor"]
-        mask_to_fetch = [True, True, False, True, False, False]
-        if need_vae_encode:
-            mask_to_fetch = [True, True, False, True, True, False]
-        elif need_stencil:
-            mask_to_fetch = [True, False, True, True, False, True]
-        self.model_name = self.get_extended_name_for_all_model(mask_to_fetch)
-        vmfbs = fetch_or_delete_vmfbs(self.model_name, self.precision)   
-        if vmfbs[0]:
-            # -- If all vmfbs are indeed present, we also try and fetch the base
-            #    model configuration for running SD with custom checkpoints.
-            if self.custom_weights != "":
-                args.hf_model_id = fetch_and_update_base_model_id(self.custom_weights)
-            if args.hf_model_id == "":
-                sys.exit("Base model configuration for the custom model is missing. Use `--clear_all` and re-run.")
-            print("Loaded vmfbs from cache and successfully fetched base model configuration.")
-            return vmfbs
-
-        # Step 2:
-        # -- If vmfbs weren't found, we try to see if the base model configuration
-        #    for the required SD run is known to us and bypass the retry mechanism.
-        model_to_run = ""
-        if self.custom_weights != "":
-            model_to_run = self.custom_weights
-            assert self.custom_weights.lower().endswith(
-                (".ckpt", ".safetensors")
-            ), "checkpoint files supported can be any of [.ckpt, .safetensors] type"
-            preprocessCKPT(self.custom_weights, self.is_inpaint)
-        else:
-            model_to_run = args.hf_model_id
-        # For custom Vae user can provide either the repo-id or a checkpoint file,
-        # and for a checkpoint file we'd need to process it via Diffusers' script.
-        self.custom_vae = self.process_custom_vae()
-        base_model_fetched = fetch_and_update_base_model_id(model_to_run)
-        if base_model_fetched != "":
-            print("Compiling all the models with the fetched base model configuration.")
-            if args.ckpt_loc != "":
-                args.hf_model_id = base_model_fetched
-            return self.compile_all(base_model_fetched, need_vae_encode, need_stencil)
-
-        # Step 3:
-        # -- This is the retry mechanism where the base model's configuration is not
-        #    known to us and figure that out by trial and error.
-        print("Inferring base model configuration.")
-        for model_id in base_models:
-            try:
-                if need_vae_encode:
-                    compiled_clip, compiled_unet, compiled_vae, compiled_vae_encode = self.compile_all(model_id, need_vae_encode, need_stencil)
-                elif need_stencil:
-                    compiled_clip, compiled_unet, compiled_vae, compiled_controlnet = self.compile_all(model_id, need_vae_encode, need_stencil)
-                else:
-                    compiled_clip, compiled_unet, compiled_vae = self.compile_all(model_id, need_vae_encode, need_stencil)
-            except Exception as e:
-                print(e)
-                print("Retrying with a different base model configuration")
-                continue
-            # -- Once a successful compilation has taken place we'd want to store
-            #    the base model's configuration inferred.
-            fetch_and_update_base_model_id(model_to_run, model_id)
-            # This is done just because in main.py we are basing the choice of tokenizer and scheduler
-            # on `args.hf_model_id`. Since now, we don't maintain 1:1 mapping of variants and the base
-            # model and rely on retrying method to find the input configuration, we should also update
-            # the knowledge of base model id accordingly into `args.hf_model_id`.
-            if args.ckpt_loc != "":
-                args.hf_model_id = model_id
-            if need_vae_encode:
-                return (
-                    compiled_clip,
-                    compiled_unet,
-                    compiled_vae,
-                    compiled_vae_encode,
-                )
-            if need_stencil:
-                return (
-                    compiled_clip,
-                    compiled_unet,
-                    compiled_vae,
-                    compiled_controlnet,
-                )
-            return compiled_clip, compiled_unet, compiled_vae
-        sys.exit(
-            "Cannot compile the model. Please create an issue with the detailed log at https://github.com/nod-ai/SHARK/issues"
-        )
--- a/apps/stable_diffusion/src/models/opt_params.py
+++ b/apps/stable_diffusion/src/models/opt_params.py
@@ -1,108 +0,0 @@
-import sys
-from transformers import CLIPTokenizer
-from apps.stable_diffusion.src.utils import (
-    models_db,
-    args,
-    get_shark_model,
-    get_opt_flags,
-)
-
-
-hf_model_variant_map = {
-    "Linaqruf/anything-v3.0": ["anythingv3", "v1_4"],
-    "dreamlike-art/dreamlike-diffusion-1.0": ["dreamlike", "v1_4"],
-    "prompthero/openjourney": ["openjourney", "v1_4"],
-    "wavymulder/Analog-Diffusion": ["analogdiffusion", "v1_4"],
-    "stabilityai/stable-diffusion-2-1": ["stablediffusion", "v2_1base"],
-    "stabilityai/stable-diffusion-2-1-base": ["stablediffusion", "v2_1base"],
-    "CompVis/stable-diffusion-v1-4": ["stablediffusion", "v1_4"],
-    "runwayml/stable-diffusion-inpainting": ["stablediffusion", "inpaint_v1"],
-    "stabilityai/stable-diffusion-2-inpainting": ["stablediffusion", "inpaint_v2"],
-}
-
-
-def get_variant_version(hf_model_id):
-    return hf_model_variant_map[hf_model_id]
-
-
-def get_params(bucket_key, model_key, model, is_tuned, precision):
-    try:
-        bucket = models_db[0][bucket_key]
-        model_name = models_db[1][model_key]
-    except KeyError:
-        raise Exception(
-            f"{bucket_key}/{model_key} is not present in the models database"
-        )
-    iree_flags = get_opt_flags(model, precision="fp16")
-    return bucket, model_name, iree_flags
-
-
-def get_unet():
-    variant, version = get_variant_version(args.hf_model_id)
-    # Tuned model is present only for `fp16` precision.
-    is_tuned = "tuned" if args.use_tuned else "untuned"
-    if "vulkan" not in args.device and args.use_tuned:
-        bucket_key = f"{variant}/{is_tuned}/{args.device}"
-        model_key = f"{variant}/{version}/unet/{args.precision}/length_{args.max_length}/{is_tuned}/{args.device}"
-    else:
-        bucket_key = f"{variant}/{is_tuned}"
-        model_key = f"{variant}/{version}/unet/{args.precision}/length_{args.max_length}/{is_tuned}"
-
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, "unet", is_tuned, args.precision
-    )
-    return get_shark_model(bucket, model_name, iree_flags)
-
-
-def get_vae_encode():
-    variant, version = get_variant_version(args.hf_model_id)
-    # Tuned model is present only for `fp16` precision.
-    is_tuned = "tuned" if args.use_tuned else "untuned"
-    if "vulkan" not in args.device and args.use_tuned:
-        bucket_key = f"{variant}/{is_tuned}/{args.device}"
-        model_key = f"{variant}/{version}/vae_encode/{args.precision}/length_77/{is_tuned}/{args.device}"
-    else:
-        bucket_key = f"{variant}/{is_tuned}"
-        model_key = f"{variant}/{version}/vae_encode/{args.precision}/length_77/{is_tuned}"
-
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, "vae", is_tuned, args.precision
-    )
-    return get_shark_model(bucket, model_name, iree_flags)
-
-
-def get_vae():
-    variant, version = get_variant_version(args.hf_model_id)
-    # Tuned model is present only for `fp16` precision.
-    is_tuned = "tuned" if args.use_tuned else "untuned"
-    is_base = "/base" if args.use_base_vae else ""
-    if "vulkan" not in args.device and args.use_tuned:
-        bucket_key = f"{variant}/{is_tuned}/{args.device}"
-        model_key = f"{variant}/{version}/vae/{args.precision}/length_77/{is_tuned}{is_base}/{args.device}"
-    else:
-        bucket_key = f"{variant}/{is_tuned}"
-        model_key = f"{variant}/{version}/vae/{args.precision}/length_77/{is_tuned}{is_base}"
-
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, "vae", is_tuned, args.precision
-    )
-    return get_shark_model(bucket, model_name, iree_flags)
-
-
-def get_clip():
-    variant, version = get_variant_version(args.hf_model_id)
-    bucket_key = f"{variant}/untuned"
-    model_key = (
-        f"{variant}/{version}/clip/fp32/length_{args.max_length}/untuned"
-    )
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, "clip", "untuned", "fp32"
-    )
-    return get_shark_model(bucket, model_name, iree_flags)
-
-
-def get_tokenizer():
-    tokenizer = CLIPTokenizer.from_pretrained(
-        args.hf_model_id, subfolder="tokenizer"
-    )
-    return tokenizer
--- a/apps/stable_diffusion/src/pipelines/init.py
+++ b/apps/stable_diffusion/src/pipelines/init.py
@@ -1,18 +0,0 @@
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_txt2img import (
-    Text2ImagePipeline,
-)
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_img2img import (
-    Image2ImagePipeline,
-)
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_inpaint import (
-    InpaintPipeline,
-)
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_outpaint import (
-    OutpaintPipeline,
-)
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_stencil import (
-    StencilPipeline,
-)
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_upscaler import (
-    UpscalerPipeline,
-)
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_img2img.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_img2img.py
@@ -1,172 +0,0 @@
-import torch
-import time
-import numpy as np
-from tqdm.auto import tqdm
-from random import randint
-from PIL import Image
-from transformers import CLIPTokenizer
-from typing import Union
-from shark.shark_inference import SharkInference
-from diffusers import (
-    DDIMScheduler,
-    PNDMScheduler,
-    LMSDiscreteScheduler,
-    EulerDiscreteScheduler,
-    EulerAncestralDiscreteScheduler,
-    DPMSolverMultistepScheduler,
-    DEISMultistepScheduler,
-)
-from apps.stable_diffusion.src.schedulers import SharkEulerDiscreteScheduler
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_utils import (
-    StableDiffusionPipeline,
-)
-
-
-class Image2ImagePipeline(StableDiffusionPipeline):
-    def __init__(
-        self,
-        vae_encode: SharkInference,
-        vae: SharkInference,
-        text_encoder: SharkInference,
-        tokenizer: CLIPTokenizer,
-        unet: SharkInference,
-        scheduler: Union[
-            DDIMScheduler,
-            PNDMScheduler,
-            LMSDiscreteScheduler,
-            EulerDiscreteScheduler,
-            EulerAncestralDiscreteScheduler,
-            DPMSolverMultistepScheduler,
-            SharkEulerDiscreteScheduler,
-            DEISMultistepScheduler,
-        ],
-    ):
-        super().__init__(vae, text_encoder, tokenizer, unet, scheduler)
-        self.vae_encode = vae_encode
-
-    def prepare_image_latents(
-        self,
-        image,
-        batch_size,
-        height,
-        width,
-        generator,
-        num_inference_steps,
-        strength,
-        dtype,
-    ):
-        # Pre process image -> get image encoded -> process latents
-
-        # TODO: process with variable HxW combos
-
-        # Pre process image
-        image = image.resize((width, height))
-        image_arr = np.stack([np.array(i) for i in (image,)], axis=0)
-        image_arr = image_arr / 255.0
-        image_arr = torch.from_numpy(image_arr).permute(0, 3, 1, 2).to(dtype)
-        image_arr = 2 * (image_arr - 0.5)
-
-        # set scheduler steps
-        self.scheduler.set_timesteps(num_inference_steps)
-        init_timestep = min(
-            int(num_inference_steps * strength), num_inference_steps
-        )
-        t_start = max(num_inference_steps - init_timestep, 0)
-        # timesteps reduced as per strength
-        timesteps = self.scheduler.timesteps[t_start:]
-        # new number of steps to be used as per strength will be
-        # num_inference_steps = num_inference_steps - t_start
-
-        # image encode
-        latents = self.encode_image((image_arr,))
-        latents = torch.from_numpy(latents).to(dtype)
-        # add noise to data
-        noise = torch.randn(latents.shape, generator=generator, dtype=dtype)
-        latents = self.scheduler.add_noise(
-            latents, noise, timesteps[0].repeat(1)
-        )
-
-        return latents, timesteps
-
-    def encode_image(self, input_image):
-        vae_encode_start = time.time()
-        latents = self.vae_encode("forward", input_image)
-        vae_inf_time = (time.time() - vae_encode_start) * 1000
-        self.log += f"\nVAE Encode Inference time (ms): {vae_inf_time:.3f}"
-
-        return latents
-
-    def generate_images(
-        self,
-        prompts,
-        neg_prompts,
-        image,
-        batch_size,
-        height,
-        width,
-        num_inference_steps,
-        strength,
-        guidance_scale,
-        seed,
-        max_length,
-        dtype,
-        use_base_vae,
-        cpu_scheduling,
-        use_stencil,
-    ):
-        # prompts and negative prompts must be a list.
-        if isinstance(prompts, str):
-            prompts = [prompts]
-
-        if isinstance(neg_prompts, str):
-            neg_prompts = [neg_prompts]
-
-        prompts = prompts * batch_size
-        neg_prompts = neg_prompts * batch_size
-
-        # seed generator to create the inital latent noise. Also handle out of range seeds.
-        uint32_info = np.iinfo(np.uint32)
-        uint32_min, uint32_max = uint32_info.min, uint32_info.max
-        if seed < uint32_min or seed >= uint32_max:
-            seed = randint(uint32_min, uint32_max)
-        generator = torch.manual_seed(seed)
-
-        # Get text embeddings from prompts
-        text_embeddings = self.encode_prompts(prompts, neg_prompts, max_length)
-
-        # guidance scale as a float32 tensor.
-        guidance_scale = torch.tensor(guidance_scale).to(torch.float32)
-
-        # Prepare input image latent
-        image_latents, final_timesteps = self.prepare_image_latents(
-            image=image,
-            batch_size=batch_size,
-            height=height,
-            width=width,
-            generator=generator,
-            num_inference_steps=num_inference_steps,
-            strength=strength,
-            dtype=dtype,
-        )
-
-        # Get Image latents
-        latents = self.produce_img_latents(
-            latents=image_latents,
-            text_embeddings=text_embeddings,
-            guidance_scale=guidance_scale,
-            total_timesteps=final_timesteps,
-            dtype=dtype,
-            cpu_scheduling=cpu_scheduling,
-        )
-
-        # Img latents -> PIL images
-        all_imgs = []
-        for i in tqdm(range(0, latents.shape[0], batch_size)):
-            imgs = self.decode_latents(
-                latents=latents[i : i + batch_size],
-                use_base_vae=use_base_vae,
-                cpu_scheduling=cpu_scheduling,
-            )
-            all_imgs.extend(imgs)
-
-        return all_imgs
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_inpaint.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_inpaint.py
@@ -1,445 +0,0 @@
-import torch
-from tqdm.auto import tqdm
-import numpy as np
-from random import randint
-from PIL import Image, ImageOps
-from transformers import CLIPTokenizer
-from typing import Union
-from shark.shark_inference import SharkInference
-from diffusers import (
-    DDIMScheduler,
-    PNDMScheduler,
-    LMSDiscreteScheduler,
-    EulerDiscreteScheduler,
-    EulerAncestralDiscreteScheduler,
-    DPMSolverMultistepScheduler,
-    DEISMultistepScheduler,
-)
-from apps.stable_diffusion.src.schedulers import SharkEulerDiscreteScheduler
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_utils import (
-    StableDiffusionPipeline,
-)
-
-
-class InpaintPipeline(StableDiffusionPipeline):
-    def __init__(
-        self,
-        vae_encode: SharkInference,
-        vae: SharkInference,
-        text_encoder: SharkInference,
-        tokenizer: CLIPTokenizer,
-        unet: SharkInference,
-        scheduler: Union[
-            DDIMScheduler,
-            PNDMScheduler,
-            LMSDiscreteScheduler,
-            EulerDiscreteScheduler,
-            EulerAncestralDiscreteScheduler,
-            DPMSolverMultistepScheduler,
-            SharkEulerDiscreteScheduler,
-            DEISMultistepScheduler,
-        ],
-    ):
-        super().__init__(vae, text_encoder, tokenizer, unet, scheduler)
-        self.vae_encode = vae_encode
-
-    def prepare_latents(
-        self,
-        batch_size,
-        height,
-        width,
-        generator,
-        num_inference_steps,
-        dtype,
-    ):
-        latents = torch.randn(
-            (
-                batch_size,
-                4,
-                height // 8,
-                width // 8,
-            ),
-            generator=generator,
-            dtype=torch.float32,
-        ).to(dtype)
-
-        self.scheduler.set_timesteps(num_inference_steps)
-        latents = latents * self.scheduler.init_noise_sigma
-        return latents
-
-    def get_crop_region(self, mask, pad=0):
-        h, w = mask.shape
-
-        crop_left = 0
-        for i in range(w):
-            if not (mask[:, i] == 0).all():
-                break
-            crop_left += 1
-
-        crop_right = 0
-        for i in reversed(range(w)):
-            if not (mask[:, i] == 0).all():
-                break
-            crop_right += 1
-
-        crop_top = 0
-        for i in range(h):
-            if not (mask[i] == 0).all():
-                break
-            crop_top += 1
-
-        crop_bottom = 0
-        for i in reversed(range(h)):
-            if not (mask[i] == 0).all():
-                break
-            crop_bottom += 1
-
-        return (
-            int(max(crop_left - pad, 0)),
-            int(max(crop_top - pad, 0)),
-            int(min(w - crop_right + pad, w)),
-            int(min(h - crop_bottom + pad, h)),
-        )
-
-    def expand_crop_region(
-        self,
-        crop_region,
-        processing_width,
-        processing_height,
-        image_width,
-        image_height,
-    ):
-        x1, y1, x2, y2 = crop_region
-
-        ratio_crop_region = (x2 - x1) / (y2 - y1)
-        ratio_processing = processing_width / processing_height
-
-        if ratio_crop_region > ratio_processing:
-            desired_height = (x2 - x1) / ratio_processing
-            desired_height_diff = int(desired_height - (y2 - y1))
-            y1 -= desired_height_diff // 2
-            y2 += desired_height_diff - desired_height_diff // 2
-            if y2 >= image_height:
-                diff = y2 - image_height
-                y2 -= diff
-                y1 -= diff
-            if y1 < 0:
-                y2 -= y1
-                y1 -= y1
-            if y2 >= image_height:
-                y2 = image_height
-        else:
-            desired_width = (y2 - y1) * ratio_processing
-            desired_width_diff = int(desired_width - (x2 - x1))
-            x1 -= desired_width_diff // 2
-            x2 += desired_width_diff - desired_width_diff // 2
-            if x2 >= image_width:
-                diff = x2 - image_width
-                x2 -= diff
-                x1 -= diff
-            if x1 < 0:
-                x2 -= x1
-                x1 -= x1
-            if x2 >= image_width:
-                x2 = image_width
-
-        return x1, y1, x2, y2
-
-    def resize_image(self, resize_mode, im, width, height):
-        """
-        resize_mode:
-            0: Resize the image to fill the specified width and height, maintaining the aspect ratio, and then center the image within the dimensions, cropping the excess.
-            1: Resize the image to fit within the specified width and height, maintaining the aspect ratio, and then center the image within the dimensions, filling empty with data from image.
-        """
-
-        if resize_mode == 0:
-            ratio = width / height
-            src_ratio = im.width / im.height
-
-            src_w = (
-                width if ratio > src_ratio else im.width * height // im.height
-            )
-            src_h = (
-                height if ratio <= src_ratio else im.height * width // im.width
-            )
-
-            resized = im.resize((src_w, src_h), resample=Image.LANCZOS)
-            res = Image.new("RGB", (width, height))
-            res.paste(
-                resized,
-                box=(width // 2 - src_w // 2, height // 2 - src_h // 2),
-            )
-
-        else:
-            ratio = width / height
-            src_ratio = im.width / im.height
-
-            src_w = (
-                width if ratio < src_ratio else im.width * height // im.height
-            )
-            src_h = (
-                height if ratio >= src_ratio else im.height * width // im.width
-            )
-
-            resized = im.resize((src_w, src_h), resample=Image.LANCZOS)
-            res = Image.new("RGB", (width, height))
-            res.paste(
-                resized,
-                box=(width // 2 - src_w // 2, height // 2 - src_h // 2),
-            )
-
-            if ratio < src_ratio:
-                fill_height = height // 2 - src_h // 2
-                res.paste(
-                    resized.resize((width, fill_height), box=(0, 0, width, 0)),
-                    box=(0, 0),
-                )
-                res.paste(
-                    resized.resize(
-                        (width, fill_height),
-                        box=(0, resized.height, width, resized.height),
-                    ),
-                    box=(0, fill_height + src_h),
-                )
-            elif ratio > src_ratio:
-                fill_width = width // 2 - src_w // 2
-                res.paste(
-                    resized.resize(
-                        (fill_width, height), box=(0, 0, 0, height)
-                    ),
-                    box=(0, 0),
-                )
-                res.paste(
-                    resized.resize(
-                        (fill_width, height),
-                        box=(resized.width, 0, resized.width, height),
-                    ),
-                    box=(fill_width + src_w, 0),
-                )
-
-        return res
-
-    def prepare_mask_and_masked_image(
-        self,
-        image,
-        mask,
-        height,
-        width,
-        inpaint_full_res,
-        inpaint_full_res_padding,
-    ):
-        # preprocess image
-        image = image.resize((width, height))
-        mask = mask.resize((width, height))
-
-        paste_to = ()
-        overlay_image = None
-        if inpaint_full_res:
-            # prepare overlay image
-            overlay_image = Image.new("RGB", (image.width, image.height))
-            overlay_image.paste(
-                image.convert("RGB"),
-                mask=ImageOps.invert(mask.convert("L")),
-            )
-
-            # prepare mask
-            mask = mask.convert("L")
-            crop_region = self.get_crop_region(
-                np.array(mask), inpaint_full_res_padding
-            )
-            crop_region = self.expand_crop_region(
-                crop_region, width, height, mask.width, mask.height
-            )
-            x1, y1, x2, y2 = crop_region
-            mask = mask.crop(crop_region)
-            mask = self.resize_image(1, mask, width, height)
-            paste_to = (x1, y1, x2 - x1, y2 - y1)
-
-            # prepare image
-            image = image.crop(crop_region)
-            image = self.resize_image(1, image, width, height)
-
-        if isinstance(image, (Image.Image, np.ndarray)):
-            image = [image]
-
-        if isinstance(image, list) and isinstance(image[0], Image.Image):
-            image = [np.array(i.convert("RGB"))[None, :] for i in image]
-            image = np.concatenate(image, axis=0)
-        elif isinstance(image, list) and isinstance(image[0], np.ndarray):
-            image = np.concatenate([i[None, :] for i in image], axis=0)
-
-        image = image.transpose(0, 3, 1, 2)
-        image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
-
-        # preprocess mask
-        if isinstance(mask, (Image.Image, np.ndarray)):
-            mask = [mask]
-
-        if isinstance(mask, list) and isinstance(mask[0], Image.Image):
-            mask = np.concatenate(
-                [np.array(m.convert("L"))[None, None, :] for m in mask], axis=0
-            )
-            mask = mask.astype(np.float32) / 255.0
-        elif isinstance(mask, list) and isinstance(mask[0], np.ndarray):
-            mask = np.concatenate([m[None, None, :] for m in mask], axis=0)
-
-        mask[mask < 0.5] = 0
-        mask[mask >= 0.5] = 1
-        mask = torch.from_numpy(mask)
-
-        masked_image = image * (mask < 0.5)
-
-        return mask, masked_image, paste_to, overlay_image
-
-    def prepare_mask_latents(
-        self,
-        mask,
-        masked_image,
-        batch_size,
-        height,
-        width,
-        dtype,
-    ):
-        mask = torch.nn.functional.interpolate(
-            mask, size=(height // 8, width // 8)
-        )
-        mask = mask.to(dtype)
-
-        masked_image = masked_image.to(dtype)
-        masked_image_latents = self.vae_encode("forward", (masked_image,))
-        masked_image_latents = torch.from_numpy(masked_image_latents)
-
-        # duplicate mask and masked_image_latents for each generation per prompt, using mps friendly method
-        if mask.shape[0] < batch_size:
-            if not batch_size % mask.shape[0] == 0:
-                raise ValueError(
-                    "The passed mask and the required batch size don't match. Masks are supposed to be duplicated to"
-                    f" a total batch size of {batch_size}, but {mask.shape[0]} masks were passed. Make sure the number"
-                    " of masks that you pass is divisible by the total requested batch size."
-                )
-            mask = mask.repeat(batch_size // mask.shape[0], 1, 1, 1)
-        if masked_image_latents.shape[0] < batch_size:
-            if not batch_size % masked_image_latents.shape[0] == 0:
-                raise ValueError(
-                    "The passed images and the required batch size don't match. Images are supposed to be duplicated"
-                    f" to a total batch size of {batch_size}, but {masked_image_latents.shape[0]} images were passed."
-                    " Make sure the number of images that you pass is divisible by the total requested batch size."
-                )
-            masked_image_latents = masked_image_latents.repeat(
-                batch_size // masked_image_latents.shape[0], 1, 1, 1
-            )
-        return mask, masked_image_latents
-
-    def apply_overlay(self, image, paste_loc, overlay):
-        x, y, w, h = paste_loc
-        image = self.resize_image(0, image, w, h)
-        overlay.paste(image, (x, y))
-
-        return overlay
-
-    def generate_images(
-        self,
-        prompts,
-        neg_prompts,
-        image,
-        mask_image,
-        batch_size,
-        height,
-        width,
-        inpaint_full_res,
-        inpaint_full_res_padding,
-        num_inference_steps,
-        guidance_scale,
-        seed,
-        max_length,
-        dtype,
-        use_base_vae,
-        cpu_scheduling,
-    ):
-        # prompts and negative prompts must be a list.
-        if isinstance(prompts, str):
-            prompts = [prompts]
-
-        if isinstance(neg_prompts, str):
-            neg_prompts = [neg_prompts]
-
-        prompts = prompts * batch_size
-        neg_prompts = neg_prompts * batch_size
-
-        # seed generator to create the inital latent noise. Also handle out of range seeds.
-        uint32_info = np.iinfo(np.uint32)
-        uint32_min, uint32_max = uint32_info.min, uint32_info.max
-        if seed < uint32_min or seed >= uint32_max:
-            seed = randint(uint32_min, uint32_max)
-        generator = torch.manual_seed(seed)
-
-        # Get initial latents
-        init_latents = self.prepare_latents(
-            batch_size=batch_size,
-            height=height,
-            width=width,
-            generator=generator,
-            num_inference_steps=num_inference_steps,
-            dtype=dtype,
-        )
-
-        # Get text embeddings from prompts
-        text_embeddings = self.encode_prompts(prompts, neg_prompts, max_length)
-
-        # guidance scale as a float32 tensor.
-        guidance_scale = torch.tensor(guidance_scale).to(torch.float32)
-
-        # Preprocess mask and image
-        (
-            mask,
-            masked_image,
-            paste_to,
-            overlay_image,
-        ) = self.prepare_mask_and_masked_image(
-            image,
-            mask_image,
-            height,
-            width,
-            inpaint_full_res,
-            inpaint_full_res_padding,
-        )
-
-        # Prepare mask latent variables
-        mask, masked_image_latents = self.prepare_mask_latents(
-            mask=mask,
-            masked_image=masked_image,
-            batch_size=batch_size,
-            height=height,
-            width=width,
-            dtype=dtype,
-        )
-
-        # Get Image latents
-        latents = self.produce_img_latents(
-            latents=init_latents,
-            text_embeddings=text_embeddings,
-            guidance_scale=guidance_scale,
-            total_timesteps=self.scheduler.timesteps,
-            dtype=dtype,
-            cpu_scheduling=cpu_scheduling,
-            mask=mask,
-            masked_image_latents=masked_image_latents,
-        )
-
-        # Img latents -> PIL images
-        all_imgs = []
-        for i in tqdm(range(0, latents.shape[0], batch_size)):
-            imgs = self.decode_latents(
-                latents=latents[i : i + batch_size],
-                use_base_vae=use_base_vae,
-                cpu_scheduling=cpu_scheduling,
-            )
-            all_imgs.extend(imgs)
-
-        if inpaint_full_res:
-            output_image = self.apply_overlay(
-                all_imgs[0], paste_to, overlay_image
-            )
-            return [output_image]
-
-        return all_imgs
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_outpaint.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_outpaint.py
@@ -1,541 +0,0 @@
-import torch
-from tqdm.auto import tqdm
-import numpy as np
-from random import randint
-from PIL import Image, ImageDraw, ImageFilter
-from transformers import CLIPTokenizer
-from typing import Union
-from shark.shark_inference import SharkInference
-from diffusers import (
-    DDIMScheduler,
-    PNDMScheduler,
-    LMSDiscreteScheduler,
-    EulerDiscreteScheduler,
-    EulerAncestralDiscreteScheduler,
-    DPMSolverMultistepScheduler,
-    DEISMultistepScheduler,
-)
-from apps.stable_diffusion.src.schedulers import SharkEulerDiscreteScheduler
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_utils import (
-    StableDiffusionPipeline,
-)
-import math
-
-
-class OutpaintPipeline(StableDiffusionPipeline):
-    def __init__(
-        self,
-        vae_encode: SharkInference,
-        vae: SharkInference,
-        text_encoder: SharkInference,
-        tokenizer: CLIPTokenizer,
-        unet: SharkInference,
-        scheduler: Union[
-            DDIMScheduler,
-            PNDMScheduler,
-            LMSDiscreteScheduler,
-            EulerDiscreteScheduler,
-            EulerAncestralDiscreteScheduler,
-            DPMSolverMultistepScheduler,
-            SharkEulerDiscreteScheduler,
-            DEISMultistepScheduler,
-        ],
-    ):
-        super().__init__(vae, text_encoder, tokenizer, unet, scheduler)
-        self.vae_encode = vae_encode
-
-    def prepare_latents(
-        self,
-        batch_size,
-        height,
-        width,
-        generator,
-        num_inference_steps,
-        dtype,
-    ):
-        latents = torch.randn(
-            (
-                batch_size,
-                4,
-                height // 8,
-                width // 8,
-            ),
-            generator=generator,
-            dtype=torch.float32,
-        ).to(dtype)
-
-        self.scheduler.set_timesteps(num_inference_steps)
-        latents = latents * self.scheduler.init_noise_sigma
-        return latents
-
-    def prepare_mask_and_masked_image(
-        self, image, mask, mask_blur, width, height
-    ):
-        if mask_blur > 0:
-            mask = mask.filter(ImageFilter.GaussianBlur(mask_blur))
-        image = image.resize((width, height))
-        mask = mask.resize((width, height))
-
-        # preprocess image
-        if isinstance(image, (Image.Image, np.ndarray)):
-            image = [image]
-
-        if isinstance(image, list) and isinstance(image[0], Image.Image):
-            image = [np.array(i.convert("RGB"))[None, :] for i in image]
-            image = np.concatenate(image, axis=0)
-        elif isinstance(image, list) and isinstance(image[0], np.ndarray):
-            image = np.concatenate([i[None, :] for i in image], axis=0)
-
-        image = image.transpose(0, 3, 1, 2)
-        image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
-
-        # preprocess mask
-        if isinstance(mask, (Image.Image, np.ndarray)):
-            mask = [mask]
-
-        if isinstance(mask, list) and isinstance(mask[0], Image.Image):
-            mask = np.concatenate(
-                [np.array(m.convert("L"))[None, None, :] for m in mask], axis=0
-            )
-            mask = mask.astype(np.float32) / 255.0
-        elif isinstance(mask, list) and isinstance(mask[0], np.ndarray):
-            mask = np.concatenate([m[None, None, :] for m in mask], axis=0)
-
-        mask[mask < 0.5] = 0
-        mask[mask >= 0.5] = 1
-        mask = torch.from_numpy(mask)
-
-        masked_image = image * (mask < 0.5)
-
-        return mask, masked_image
-
-    def prepare_mask_latents(
-        self,
-        mask,
-        masked_image,
-        batch_size,
-        height,
-        width,
-        dtype,
-    ):
-        mask = torch.nn.functional.interpolate(
-            mask, size=(height // 8, width // 8)
-        )
-        mask = mask.to(dtype)
-
-        masked_image = masked_image.to(dtype)
-        masked_image_latents = self.vae_encode("forward", (masked_image,))
-        masked_image_latents = torch.from_numpy(masked_image_latents)
-
-        # duplicate mask and masked_image_latents for each generation per prompt, using mps friendly method
-        if mask.shape[0] < batch_size:
-            if not batch_size % mask.shape[0] == 0:
-                raise ValueError(
-                    "The passed mask and the required batch size don't match. Masks are supposed to be duplicated to"
-                    f" a total batch size of {batch_size}, but {mask.shape[0]} masks were passed. Make sure the number"
-                    " of masks that you pass is divisible by the total requested batch size."
-                )
-            mask = mask.repeat(batch_size // mask.shape[0], 1, 1, 1)
-        if masked_image_latents.shape[0] < batch_size:
-            if not batch_size % masked_image_latents.shape[0] == 0:
-                raise ValueError(
-                    "The passed images and the required batch size don't match. Images are supposed to be duplicated"
-                    f" to a total batch size of {batch_size}, but {masked_image_latents.shape[0]} images were passed."
-                    " Make sure the number of images that you pass is divisible by the total requested batch size."
-                )
-            masked_image_latents = masked_image_latents.repeat(
-                batch_size // masked_image_latents.shape[0], 1, 1, 1
-            )
-        return mask, masked_image_latents
-
-    def get_matched_noise(
-        self, _np_src_image, np_mask_rgb, noise_q=1, color_variation=0.05
-    ):
-        # helper fft routines that keep ortho normalization and auto-shift before and after fft
-        def _fft2(data):
-            if data.ndim > 2:  # has channels
-                out_fft = np.zeros(
-                    (data.shape[0], data.shape[1], data.shape[2]),
-                    dtype=np.complex128,
-                )
-                for c in range(data.shape[2]):
-                    c_data = data[:, :, c]
-                    out_fft[:, :, c] = np.fft.fft2(
-                        np.fft.fftshift(c_data), norm="ortho"
-                    )
-                    out_fft[:, :, c] = np.fft.ifftshift(out_fft[:, :, c])
-            else:  # one channel
-                out_fft = np.zeros(
-                    (data.shape[0], data.shape[1]), dtype=np.complex128
-                )
-                out_fft[:, :] = np.fft.fft2(
-                    np.fft.fftshift(data), norm="ortho"
-                )
-                out_fft[:, :] = np.fft.ifftshift(out_fft[:, :])
-
-            return out_fft
-
-        def _ifft2(data):
-            if data.ndim > 2:  # has channels
-                out_ifft = np.zeros(
-                    (data.shape[0], data.shape[1], data.shape[2]),
-                    dtype=np.complex128,
-                )
-                for c in range(data.shape[2]):
-                    c_data = data[:, :, c]
-                    out_ifft[:, :, c] = np.fft.ifft2(
-                        np.fft.fftshift(c_data), norm="ortho"
-                    )
-                    out_ifft[:, :, c] = np.fft.ifftshift(out_ifft[:, :, c])
-            else:  # one channel
-                out_ifft = np.zeros(
-                    (data.shape[0], data.shape[1]), dtype=np.complex128
-                )
-                out_ifft[:, :] = np.fft.ifft2(
-                    np.fft.fftshift(data), norm="ortho"
-                )
-                out_ifft[:, :] = np.fft.ifftshift(out_ifft[:, :])
-
-            return out_ifft
-
-        def _get_gaussian_window(width, height, std=3.14, mode=0):
-            window_scale_x = float(width / min(width, height))
-            window_scale_y = float(height / min(width, height))
-
-            window = np.zeros((width, height))
-            x = (np.arange(width) / width * 2.0 - 1.0) * window_scale_x
-            for y in range(height):
-                fy = (y / height * 2.0 - 1.0) * window_scale_y
-                if mode == 0:
-                    window[:, y] = np.exp(-(x**2 + fy**2) * std)
-                else:
-                    window[:, y] = (
-                        1 / ((x**2 + 1.0) * (fy**2 + 1.0))
-                    ) ** (std / 3.14)
-
-            return window
-
-        def _get_masked_window_rgb(np_mask_grey, hardness=1.0):
-            np_mask_rgb = np.zeros(
-                (np_mask_grey.shape[0], np_mask_grey.shape[1], 3)
-            )
-            if hardness != 1.0:
-                hardened = np_mask_grey[:] ** hardness
-            else:
-                hardened = np_mask_grey[:]
-            for c in range(3):
-                np_mask_rgb[:, :, c] = hardened[:]
-            return np_mask_rgb
-
-        def _match_cumulative_cdf(source, template):
-            src_values, src_unique_indices, src_counts = np.unique(
-                source.ravel(), return_inverse=True, return_counts=True
-            )
-            tmpl_values, tmpl_counts = np.unique(
-                template.ravel(), return_counts=True
-            )
-
-            # calculate normalized quantiles for each array
-            src_quantiles = np.cumsum(src_counts) / source.size
-            tmpl_quantiles = np.cumsum(tmpl_counts) / template.size
-
-            interp_a_values = np.interp(
-                src_quantiles, tmpl_quantiles, tmpl_values
-            )
-            return interp_a_values[src_unique_indices].reshape(source.shape)
-
-        def _match_histograms(image, reference):
-            if image.ndim != reference.ndim:
-                raise ValueError(
-                    "Image and reference must have the same number of channels."
-                )
-
-            if image.shape[-1] != reference.shape[-1]:
-                raise ValueError(
-                    "Number of channels in the input image and reference image must match!"
-                )
-
-            matched = np.empty(image.shape, dtype=image.dtype)
-            for channel in range(image.shape[-1]):
-                matched_channel = _match_cumulative_cdf(
-                    image[..., channel], reference[..., channel]
-                )
-                matched[..., channel] = matched_channel
-
-            matched = matched.astype(np.float64, copy=False)
-            return matched
-
-        width = _np_src_image.shape[0]
-        height = _np_src_image.shape[1]
-        num_channels = _np_src_image.shape[2]
-
-        np_src_image = _np_src_image[:] * (1.0 - np_mask_rgb)
-        np_mask_grey = np.sum(np_mask_rgb, axis=2) / 3.0
-        img_mask = np_mask_grey > 1e-6
-        ref_mask = np_mask_grey < 1e-3
-
-        # rather than leave the masked area black, we get better results from fft by filling the average unmasked color
-        windowed_image = _np_src_image * (
-            1.0 - _get_masked_window_rgb(np_mask_grey)
-        )
-        windowed_image /= np.max(windowed_image)
-        windowed_image += np.average(_np_src_image) * np_mask_rgb
-
-        src_fft = _fft2(
-            windowed_image
-        )  # get feature statistics from masked src img
-        src_dist = np.absolute(src_fft)
-        src_phase = src_fft / src_dist
-
-        # create a generator with a static seed to make outpainting deterministic / only follow global seed
-        rng = np.random.default_rng(0)
-
-        noise_window = _get_gaussian_window(
-            width, height, mode=1
-        )  # start with simple gaussian noise
-        noise_rgb = rng.random((width, height, num_channels))
-        noise_grey = np.sum(noise_rgb, axis=2) / 3.0
-        # the colorfulness of the starting noise is blended to greyscale with a parameter
-        noise_rgb *= color_variation
-        for c in range(num_channels):
-            noise_rgb[:, :, c] += (1.0 - color_variation) * noise_grey
-
-        noise_fft = _fft2(noise_rgb)
-        for c in range(num_channels):
-            noise_fft[:, :, c] *= noise_window
-        noise_rgb = np.real(_ifft2(noise_fft))
-        shaped_noise_fft = _fft2(noise_rgb)
-        shaped_noise_fft[:, :, :] = (
-            np.absolute(shaped_noise_fft[:, :, :]) ** 2
-            * (src_dist**noise_q)
-            * src_phase
-        )  # perform the actual shaping
-
-        # color_variation
-        brightness_variation = 0.0
-        contrast_adjusted_np_src = (
-            _np_src_image[:] * (brightness_variation + 1.0)
-            - brightness_variation * 2.0
-        )
-
-        shaped_noise = np.real(_ifft2(shaped_noise_fft))
-        shaped_noise -= np.min(shaped_noise)
-        shaped_noise /= np.max(shaped_noise)
-        shaped_noise[img_mask, :] = _match_histograms(
-            shaped_noise[img_mask, :] ** 1.0,
-            contrast_adjusted_np_src[ref_mask, :],
-        )
-        shaped_noise = (
-            _np_src_image[:] * (1.0 - np_mask_rgb) + shaped_noise * np_mask_rgb
-        )
-
-        matched_noise = shaped_noise[:]
-
-        return np.clip(matched_noise, 0.0, 1.0)
-
-    def generate_images(
-        self,
-        prompts,
-        neg_prompts,
-        image,
-        pixels,
-        mask_blur,
-        is_left,
-        is_right,
-        is_top,
-        is_bottom,
-        noise_q,
-        color_variation,
-        batch_size,
-        height,
-        width,
-        num_inference_steps,
-        guidance_scale,
-        seed,
-        max_length,
-        dtype,
-        use_base_vae,
-        cpu_scheduling,
-    ):
-        # prompts and negative prompts must be a list.
-        if isinstance(prompts, str):
-            prompts = [prompts]
-
-        if isinstance(neg_prompts, str):
-            neg_prompts = [neg_prompts]
-
-        prompts = prompts * batch_size
-        neg_prompts = neg_prompts * batch_size
-
-        # seed generator to create the inital latent noise. Also handle out of range seeds.
-        uint32_info = np.iinfo(np.uint32)
-        uint32_min, uint32_max = uint32_info.min, uint32_info.max
-        if seed < uint32_min or seed >= uint32_max:
-            seed = randint(uint32_min, uint32_max)
-        generator = torch.manual_seed(seed)
-
-        # Get initial latents
-        init_latents = self.prepare_latents(
-            batch_size=batch_size,
-            height=height,
-            width=width,
-            generator=generator,
-            num_inference_steps=num_inference_steps,
-            dtype=dtype,
-        )
-
-        # Get text embeddings from prompts
-        text_embeddings = self.encode_prompts(prompts, neg_prompts, max_length)
-
-        # guidance scale as a float32 tensor.
-        guidance_scale = torch.tensor(guidance_scale).to(torch.float32)
-
-        process_width = width
-        process_height = height
-        left = pixels if is_left else 0
-        right = pixels if is_right else 0
-        up = pixels if is_top else 0
-        down = pixels if is_bottom else 0
-        target_w = math.ceil((image.width + left + right) / 64) * 64
-        target_h = math.ceil((image.height + up + down) / 64) * 64
-
-        if left > 0:
-            left = left * (target_w - image.width) // (left + right)
-        if right > 0:
-            right = target_w - image.width - left
-        if up > 0:
-            up = up * (target_h - image.height) // (up + down)
-        if down > 0:
-            down = target_h - image.height - up
-
-        def expand(
-            init_img,
-            expand_pixels,
-            is_left=False,
-            is_right=False,
-            is_top=False,
-            is_bottom=False,
-        ):
-            is_horiz = is_left or is_right
-            is_vert = is_top or is_bottom
-            pixels_horiz = expand_pixels if is_horiz else 0
-            pixels_vert = expand_pixels if is_vert else 0
-
-            res_w = init_img.width + pixels_horiz
-            res_h = init_img.height + pixels_vert
-            process_res_w = math.ceil(res_w / 64) * 64
-            process_res_h = math.ceil(res_h / 64) * 64
-
-            img = Image.new("RGB", (process_res_w, process_res_h))
-            img.paste(
-                init_img,
-                (pixels_horiz if is_left else 0, pixels_vert if is_top else 0),
-            )
-
-            msk = Image.new("RGB", (process_res_w, process_res_h), "white")
-            draw = ImageDraw.Draw(msk)
-            draw.rectangle(
-                (
-                    expand_pixels + mask_blur if is_left else 0,
-                    expand_pixels + mask_blur if is_top else 0,
-                    msk.width - expand_pixels - mask_blur
-                    if is_right
-                    else res_w,
-                    msk.height - expand_pixels - mask_blur
-                    if is_bottom
-                    else res_h,
-                ),
-                fill="black",
-            )
-
-            np_image = (np.asarray(img) / 255.0).astype(np.float64)
-            np_mask = (np.asarray(msk) / 255.0).astype(np.float64)
-            noised = self.get_matched_noise(
-                np_image, np_mask, noise_q, color_variation
-            )
-            output_image = Image.fromarray(
-                np.clip(noised * 255.0, 0.0, 255.0).astype(np.uint8),
-                mode="RGB",
-            )
-
-            target_width = (
-                min(width, init_img.width + pixels_horiz)
-                if is_horiz
-                else img.width
-            )
-            target_height = (
-                min(height, init_img.height + pixels_vert)
-                if is_vert
-                else img.height
-            )
-            crop_region = (
-                0 if is_left else output_image.width - target_width,
-                0 if is_top else output_image.height - target_height,
-                target_width if is_left else output_image.width,
-                target_height if is_top else output_image.height,
-            )
-            mask_to_process = msk.crop(crop_region)
-            image_to_process = output_image.crop(crop_region)
-
-            # Preprocess mask and image
-            mask, masked_image = self.prepare_mask_and_masked_image(
-                image_to_process, mask_to_process, mask_blur, width, height
-            )
-
-            # Prepare mask latent variables
-            mask, masked_image_latents = self.prepare_mask_latents(
-                mask=mask,
-                masked_image=masked_image,
-                batch_size=batch_size,
-                height=height,
-                width=width,
-                dtype=dtype,
-            )
-
-            # Get Image latents
-            latents = self.produce_img_latents(
-                latents=init_latents,
-                text_embeddings=text_embeddings,
-                guidance_scale=guidance_scale,
-                total_timesteps=self.scheduler.timesteps,
-                dtype=dtype,
-                cpu_scheduling=cpu_scheduling,
-                mask=mask,
-                masked_image_latents=masked_image_latents,
-            )
-
-            # Img latents -> PIL images
-            all_imgs = []
-            for i in tqdm(range(0, latents.shape[0], batch_size)):
-                imgs = self.decode_latents(
-                    latents=latents[i : i + batch_size],
-                    use_base_vae=use_base_vae,
-                    cpu_scheduling=cpu_scheduling,
-                )
-                all_imgs.extend(imgs)
-
-            res_img = all_imgs[0].resize(
-                (image_to_process.width, image_to_process.height)
-            )
-            output_image.paste(
-                res_img,
-                (
-                    0 if is_left else output_image.width - res_img.width,
-                    0 if is_top else output_image.height - res_img.height,
-                ),
-            )
-            output_image = output_image.crop((0, 0, res_w, res_h))
-
-            return output_image
-
-        img = image.resize((width, height))
-        if left > 0:
-            img = expand(img, left, is_left=True)
-        if right > 0:
-            img = expand(img, right, is_right=True)
-        if up > 0:
-            img = expand(img, up, is_top=True)
-        if down > 0:
-            img = expand(img, down, is_bottom=True)
-
-        return [img]
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_stencil.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_stencil.py
@@ -1,150 +0,0 @@
-import torch
-import time
-import numpy as np
-from tqdm.auto import tqdm
-from random import randint
-from PIL import Image
-from transformers import CLIPTokenizer
-from typing import Union
-from shark.shark_inference import SharkInference
-from diffusers import (
-    DDIMScheduler,
-    PNDMScheduler,
-    LMSDiscreteScheduler,
-    EulerDiscreteScheduler,
-    EulerAncestralDiscreteScheduler,
-    DPMSolverMultistepScheduler,
-)
-from apps.stable_diffusion.src.schedulers import SharkEulerDiscreteScheduler
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_utils import (
-    StableDiffusionPipeline,
-)
-from apps.stable_diffusion.src.utils import controlnet_hint_conversion
-
-
-class StencilPipeline(StableDiffusionPipeline):
-    def __init__(
-        self,
-        controlnet: SharkInference,
-        vae: SharkInference,
-        text_encoder: SharkInference,
-        tokenizer: CLIPTokenizer,
-        unet: SharkInference,
-        scheduler: Union[
-            DDIMScheduler,
-            PNDMScheduler,
-            LMSDiscreteScheduler,
-            EulerDiscreteScheduler,
-            EulerAncestralDiscreteScheduler,
-            DPMSolverMultistepScheduler,
-            SharkEulerDiscreteScheduler,
-        ],
-    ):
-        super().__init__(vae, text_encoder, tokenizer, unet, scheduler)
-        self.controlnet = controlnet
-
-    def prepare_latents(
-        self,
-        batch_size,
-        height,
-        width,
-        generator,
-        num_inference_steps,
-        dtype,
-    ):
-        latents = torch.randn(
-            (
-                batch_size,
-                4,
-                height // 8,
-                width // 8,
-            ),
-            generator=generator,
-            dtype=torch.float32,
-        ).to(dtype)
-
-        self.scheduler.set_timesteps(num_inference_steps)
-        self.scheduler.is_scale_input_called = True
-        latents = latents * self.scheduler.init_noise_sigma
-        return latents
-
-    def generate_images(
-        self,
-        prompts,
-        neg_prompts,
-        image,
-        batch_size,
-        height,
-        width,
-        num_inference_steps,
-        strength,
-        guidance_scale,
-        seed,
-        max_length,
-        dtype,
-        use_base_vae,
-        cpu_scheduling,
-        use_stencil,
-    ):
-        # Control Embedding check & conversion
-        # TODO: 1. Change `num_images_per_prompt`.
-        controlnet_hint = controlnet_hint_conversion(
-            image, use_stencil, height, width, dtype, num_images_per_prompt=1
-        )
-        # prompts and negative prompts must be a list.
-        if isinstance(prompts, str):
-            prompts = [prompts]
-
-        if isinstance(neg_prompts, str):
-            neg_prompts = [neg_prompts]
-
-        prompts = prompts * batch_size
-        neg_prompts = neg_prompts * batch_size
-
-        # seed generator to create the inital latent noise. Also handle out of range seeds.
-        uint32_info = np.iinfo(np.uint32)
-        uint32_min, uint32_max = uint32_info.min, uint32_info.max
-        if seed < uint32_min or seed >= uint32_max:
-            seed = randint(uint32_min, uint32_max)
-        generator = torch.manual_seed(seed)
-
-        # Get text embeddings from prompts
-        text_embeddings = self.encode_prompts(prompts, neg_prompts, max_length)
-
-        # guidance scale as a float32 tensor.
-        guidance_scale = torch.tensor(guidance_scale).to(torch.float32)
-
-        # Prepare initial latent.
-        init_latents = self.prepare_latents(
-            batch_size=batch_size,
-            height=height,
-            width=width,
-            generator=generator,
-            num_inference_steps=num_inference_steps,
-            dtype=dtype,
-        )
-        final_timesteps = self.scheduler.timesteps
-
-        # Get Image latents
-        latents = self.produce_stencil_latents(
-            latents=init_latents,
-            text_embeddings=text_embeddings,
-            guidance_scale=guidance_scale,
-            total_timesteps=final_timesteps,
-            dtype=dtype,
-            cpu_scheduling=cpu_scheduling,
-            controlnet_hint=controlnet_hint,
-            controlnet=self.controlnet,
-        )
-
-        # Img latents -> PIL images
-        all_imgs = []
-        for i in tqdm(range(0, latents.shape[0], batch_size)):
-            imgs = self.decode_latents(
-                latents=latents[i : i + batch_size],
-                use_base_vae=use_base_vae,
-                cpu_scheduling=cpu_scheduling,
-            )
-            all_imgs.extend(imgs)
-
-        return all_imgs
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_txt2img.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_txt2img.py
@@ -1,139 +0,0 @@
-import torch
-from tqdm.auto import tqdm
-import numpy as np
-from random import randint
-from transformers import CLIPTokenizer
-from typing import Union
-from shark.shark_inference import SharkInference
-from diffusers import (
-    DDIMScheduler,
-    PNDMScheduler,
-    LMSDiscreteScheduler,
-    KDPM2DiscreteScheduler,
-    EulerDiscreteScheduler,
-    EulerAncestralDiscreteScheduler,
-    DPMSolverMultistepScheduler,
-    DEISMultistepScheduler,
-)
-from apps.stable_diffusion.src.schedulers import SharkEulerDiscreteScheduler
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_utils import (
-    StableDiffusionPipeline,
-)
-
-
-class Text2ImagePipeline(StableDiffusionPipeline):
-    def __init__(
-        self,
-        vae: SharkInference,
-        text_encoder: SharkInference,
-        tokenizer: CLIPTokenizer,
-        unet: SharkInference,
-        scheduler: Union[
-            DDIMScheduler,
-            PNDMScheduler,
-            LMSDiscreteScheduler,
-            KDPM2DiscreteScheduler,
-            EulerDiscreteScheduler,
-            EulerAncestralDiscreteScheduler,
-            DPMSolverMultistepScheduler,
-            SharkEulerDiscreteScheduler,
-            DEISMultistepScheduler,
-        ],
-    ):
-        super().__init__(vae, text_encoder, tokenizer, unet, scheduler)
-
-    def prepare_latents(
-        self,
-        batch_size,
-        height,
-        width,
-        generator,
-        num_inference_steps,
-        dtype,
-    ):
-        latents = torch.randn(
-            (
-                batch_size,
-                4,
-                height // 8,
-                width // 8,
-            ),
-            generator=generator,
-            dtype=torch.float32,
-        ).to(dtype)
-
-        self.scheduler.set_timesteps(num_inference_steps)
-        self.scheduler.is_scale_input_called = True
-        latents = latents * self.scheduler.init_noise_sigma
-        return latents
-
-    def generate_images(
-        self,
-        prompts,
-        neg_prompts,
-        batch_size,
-        height,
-        width,
-        num_inference_steps,
-        guidance_scale,
-        seed,
-        max_length,
-        dtype,
-        use_base_vae,
-        cpu_scheduling,
-    ):
-        # prompts and negative prompts must be a list.
-        if isinstance(prompts, str):
-            prompts = [prompts]
-
-        if isinstance(neg_prompts, str):
-            neg_prompts = [neg_prompts]
-
-        prompts = prompts * batch_size
-        neg_prompts = neg_prompts * batch_size
-
-        # seed generator to create the inital latent noise. Also handle out of range seeds.
-        # TODO: Wouldn't it be preferable to just report an error instead of modifying the seed on the fly?
-        uint32_info = np.iinfo(np.uint32)
-        uint32_min, uint32_max = uint32_info.min, uint32_info.max
-        if seed < uint32_min or seed >= uint32_max:
-            seed = randint(uint32_min, uint32_max)
-        generator = torch.manual_seed(seed)
-
-        # Get initial latents
-        init_latents = self.prepare_latents(
-            batch_size=batch_size,
-            height=height,
-            width=width,
-            generator=generator,
-            num_inference_steps=num_inference_steps,
-            dtype=dtype,
-        )
-
-        # Get text embeddings from prompts
-        text_embeddings = self.encode_prompts(prompts, neg_prompts, max_length)
-
-        # guidance scale as a float32 tensor.
-        guidance_scale = torch.tensor(guidance_scale).to(torch.float32)
-
-        # Get Image latents
-        latents = self.produce_img_latents(
-            latents=init_latents,
-            text_embeddings=text_embeddings,
-            guidance_scale=guidance_scale,
-            total_timesteps=self.scheduler.timesteps,
-            dtype=dtype,
-            cpu_scheduling=cpu_scheduling,
-        )
-
-        # Img latents -> PIL images
-        all_imgs = []
-        for i in tqdm(range(0, latents.shape[0], batch_size)):
-            imgs = self.decode_latents(
-                latents=latents[i : i + batch_size],
-                use_base_vae=use_base_vae,
-                cpu_scheduling=cpu_scheduling,
-            )
-            all_imgs.extend(imgs)
-
-        return all_imgs
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_upscaler.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_upscaler.py
@@ -1,310 +0,0 @@
-import inspect
-import torch
-import time
-from tqdm.auto import tqdm
-import numpy as np
-from random import randint
-from transformers import CLIPTokenizer
-from typing import Union
-from shark.shark_inference import SharkInference
-from diffusers import (
-    DDIMScheduler,
-    DDPMScheduler,
-    PNDMScheduler,
-    LMSDiscreteScheduler,
-    KDPM2DiscreteScheduler,
-    EulerDiscreteScheduler,
-    EulerAncestralDiscreteScheduler,
-    DPMSolverMultistepScheduler,
-    DEISMultistepScheduler,
-)
-from apps.stable_diffusion.src.schedulers import SharkEulerDiscreteScheduler
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_utils import (
-    StableDiffusionPipeline,
-)
-from apps.stable_diffusion.src.utils import (
-    start_profiling,
-    end_profiling,
-)
-from PIL import Image
-
-
-def preprocess(image):
-    if isinstance(image, torch.Tensor):
-        return image
-    elif isinstance(image, Image.Image):
-        image = [image]
-
-    if isinstance(image[0], Image.Image):
-        w, h = image[0].size
-        w, h = map(
-            lambda x: x - x % 64, (w, h)
-        )  # resize to integer multiple of 64
-
-        image = [np.array(i.resize((w, h)))[None, :] for i in image]
-        image = np.concatenate(image, axis=0)
-        image = np.array(image).astype(np.float32) / 255.0
-        image = image.transpose(0, 3, 1, 2)
-        image = 2.0 * image - 1.0
-        image = torch.from_numpy(image)
-    elif isinstance(image[0], torch.Tensor):
-        image = torch.cat(image, dim=0)
-    return image
-
-
-class UpscalerPipeline(StableDiffusionPipeline):
-    def __init__(
-        self,
-        vae: SharkInference,
-        text_encoder: SharkInference,
-        tokenizer: CLIPTokenizer,
-        unet: SharkInference,
-        scheduler: Union[
-            DDIMScheduler,
-            PNDMScheduler,
-            LMSDiscreteScheduler,
-            EulerDiscreteScheduler,
-            EulerAncestralDiscreteScheduler,
-            DPMSolverMultistepScheduler,
-            SharkEulerDiscreteScheduler,
-            DEISMultistepScheduler,
-        ],
-        low_res_scheduler: Union[
-            DDIMScheduler,
-            DDPMScheduler,
-            PNDMScheduler,
-            LMSDiscreteScheduler,
-            EulerDiscreteScheduler,
-            EulerAncestralDiscreteScheduler,
-            DPMSolverMultistepScheduler,
-            SharkEulerDiscreteScheduler,
-            DEISMultistepScheduler,
-        ],
-    ):
-        super().__init__(vae, text_encoder, tokenizer, unet, scheduler)
-        self.low_res_scheduler = low_res_scheduler
-
-    def prepare_extra_step_kwargs(self, generator, eta):
-        accepts_eta = "eta" in set(
-            inspect.signature(self.scheduler.step).parameters.keys()
-        )
-        extra_step_kwargs = {}
-        if accepts_eta:
-            extra_step_kwargs["eta"] = eta
-
-        # check if the scheduler accepts generator
-        accepts_generator = "generator" in set(
-            inspect.signature(self.scheduler.step).parameters.keys()
-        )
-        if accepts_generator:
-            extra_step_kwargs["generator"] = generator
-        return extra_step_kwargs
-
-    def decode_latents(self, latents, use_base_vae, cpu_scheduling):
-        latents = 1 / 0.08333 * (latents.float())
-        latents_numpy = latents
-        if cpu_scheduling:
-            latents_numpy = latents.detach().numpy()
-
-        profile_device = start_profiling(file_path="vae.rdc")
-        vae_start = time.time()
-        images = self.vae("forward", (latents_numpy,))
-        vae_inf_time = (time.time() - vae_start) * 1000
-        end_profiling(profile_device)
-        self.log += f"\nVAE Inference time (ms): {vae_inf_time:.3f}"
-
-        images = torch.from_numpy(images)
-        images = (images.detach().cpu() * 255.0).numpy()
-        images = images.round()
-
-        images = torch.from_numpy(images).to(torch.uint8).permute(0, 2, 3, 1)
-        pil_images = [Image.fromarray(image) for image in images.numpy()]
-        return pil_images
-
-    def prepare_latents(
-        self,
-        batch_size,
-        height,
-        width,
-        generator,
-        num_inference_steps,
-        dtype,
-    ):
-        latents = torch.randn(
-            (
-                batch_size,
-                4,
-                height,
-                width,
-            ),
-            generator=generator,
-            dtype=torch.float32,
-        ).to(dtype)
-
-        self.scheduler.set_timesteps(num_inference_steps)
-        self.scheduler.is_scale_input_called = True
-        latents = latents * self.scheduler.init_noise_sigma
-        return latents
-
-    def produce_img_latents(
-        self,
-        latents,
-        image,
-        text_embeddings,
-        guidance_scale,
-        noise_level,
-        total_timesteps,
-        dtype,
-        cpu_scheduling,
-        extra_step_kwargs,
-        return_all_latents=False,
-    ):
-        step_time_sum = 0
-        latent_history = [latents]
-        text_embeddings = torch.from_numpy(text_embeddings).to(dtype)
-        text_embeddings_numpy = text_embeddings.detach().numpy()
-        for i, t in tqdm(enumerate(total_timesteps)):
-            step_start_time = time.time()
-            latent_model_input = torch.cat([latents] * 2)
-            latent_model_input = self.scheduler.scale_model_input(
-                latent_model_input, t
-            )
-            latent_model_input = torch.cat([latent_model_input, image], dim=1)
-            timestep = torch.tensor([t]).to(dtype).detach().numpy()
-            if cpu_scheduling:
-                latent_model_input = latent_model_input.detach().numpy()
-
-            # Profiling Unet.
-            profile_device = start_profiling(file_path="unet.rdc")
-            noise_pred = self.unet(
-                "forward",
-                (
-                    latent_model_input,
-                    timestep,
-                    text_embeddings_numpy,
-                    noise_level,
-                ),
-            )
-            end_profiling(profile_device)
-            noise_pred = torch.from_numpy(noise_pred)
-            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-            noise_pred = noise_pred_uncond + guidance_scale * (
-                noise_pred_text - noise_pred_uncond
-            )
-
-            if cpu_scheduling:
-                latents = self.scheduler.step(
-                    noise_pred, t, latents, **extra_step_kwargs
-                ).prev_sample
-            else:
-                latents = self.scheduler.step(
-                    noise_pred, t, latents, **extra_step_kwargs
-                )
-
-            latent_history.append(latents)
-            step_time = (time.time() - step_start_time) * 1000
-            #  self.log += (
-            #      f"\nstep = {i} | timestep = {t} | time = {step_time:.2f}ms"
-            #  )
-            step_time_sum += step_time
-
-        avg_step_time = step_time_sum / len(total_timesteps)
-        self.log += f"\nAverage step time: {avg_step_time}ms/it"
-
-        if not return_all_latents:
-            return latents
-        all_latents = torch.cat(latent_history, dim=0)
-        return all_latents
-
-    def generate_images(
-        self,
-        prompts,
-        neg_prompts,
-        image,
-        batch_size,
-        height,
-        width,
-        num_inference_steps,
-        noise_level,
-        guidance_scale,
-        seed,
-        max_length,
-        dtype,
-        use_base_vae,
-        cpu_scheduling,
-    ):
-        # prompts and negative prompts must be a list.
-        if isinstance(prompts, str):
-            prompts = [prompts]
-
-        if isinstance(neg_prompts, str):
-            neg_prompts = [neg_prompts]
-
-        prompts = prompts * batch_size
-        neg_prompts = neg_prompts * batch_size
-
-        # seed generator to create the inital latent noise. Also handle out of range seeds.
-        # TODO: Wouldn't it be preferable to just report an error instead of modifying the seed on the fly?
-        uint32_info = np.iinfo(np.uint32)
-        uint32_min, uint32_max = uint32_info.min, uint32_info.max
-        if seed < uint32_min or seed >= uint32_max:
-            seed = randint(uint32_min, uint32_max)
-        generator = torch.manual_seed(seed)
-
-        # Get text embeddings from prompts
-        text_embeddings = self.encode_prompts(prompts, neg_prompts, max_length)
-
-        # 4. Preprocess image
-        image = preprocess(image).to(dtype)
-
-        # 5. Add noise to image
-        noise_level = torch.tensor([noise_level], dtype=torch.long)
-        noise = torch.randn(
-            image.shape,
-            generator=generator,
-        ).to(dtype)
-        image = self.low_res_scheduler.add_noise(image, noise, noise_level)
-        image = torch.cat([image] * 2)
-        noise_level = torch.cat([noise_level] * image.shape[0])
-
-        height, width = image.shape[2:]
-        # Get initial latents
-        init_latents = self.prepare_latents(
-            batch_size=batch_size,
-            height=height,
-            width=width,
-            generator=generator,
-            num_inference_steps=num_inference_steps,
-            dtype=dtype,
-        )
-
-        eta = 0.0
-        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
-
-        # guidance scale as a float32 tensor.
-        #  guidance_scale = torch.tensor(guidance_scale).to(torch.float32)
-
-        # Get Image latents
-        latents = self.produce_img_latents(
-            latents=init_latents,
-            image=image,
-            text_embeddings=text_embeddings,
-            guidance_scale=guidance_scale,
-            noise_level=noise_level,
-            total_timesteps=self.scheduler.timesteps,
-            dtype=dtype,
-            cpu_scheduling=cpu_scheduling,
-            extra_step_kwargs=extra_step_kwargs,
-        )
-
-        # Img latents -> PIL images
-        all_imgs = []
-        for i in tqdm(range(0, latents.shape[0], batch_size)):
-            imgs = self.decode_latents(
-                latents=latents[i : i + batch_size],
-                use_base_vae=use_base_vae,
-                cpu_scheduling=cpu_scheduling,
-            )
-            all_imgs.extend(imgs)
-
-        return all_imgs
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_utils.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_utils.py
@@ -1,418 +0,0 @@
-import torch
-import numpy as np
-from transformers import CLIPTokenizer
-from PIL import Image
-from tqdm.auto import tqdm
-import time
-from typing import Union
-from diffusers import (
-    DDIMScheduler,
-    DDPMScheduler,
-    PNDMScheduler,
-    LMSDiscreteScheduler,
-    KDPM2DiscreteScheduler,
-    EulerDiscreteScheduler,
-    EulerAncestralDiscreteScheduler,
-    DPMSolverMultistepScheduler,
-    DEISMultistepScheduler,
-)
-from shark.shark_inference import SharkInference
-from apps.stable_diffusion.src.schedulers import SharkEulerDiscreteScheduler
-from apps.stable_diffusion.src.models import (
-    SharkifyStableDiffusionModel,
-    get_vae_encode,
-    get_vae,
-    get_clip,
-    get_unet,
-    get_tokenizer,
-)
-from apps.stable_diffusion.src.utils import (
-    start_profiling,
-    end_profiling,
-)
-
-
-class StableDiffusionPipeline:
-    def __init__(
-        self,
-        vae: SharkInference,
-        text_encoder: SharkInference,
-        tokenizer: CLIPTokenizer,
-        unet: SharkInference,
-        scheduler: Union[
-            DDIMScheduler,
-            PNDMScheduler,
-            LMSDiscreteScheduler,
-            KDPM2DiscreteScheduler,
-            EulerDiscreteScheduler,
-            EulerAncestralDiscreteScheduler,
-            DPMSolverMultistepScheduler,
-            SharkEulerDiscreteScheduler,
-            DEISMultistepScheduler,
-        ],
-    ):
-        self.vae = vae
-        self.text_encoder = text_encoder
-        self.tokenizer = tokenizer
-        self.unet = unet
-        self.scheduler = scheduler
-        # TODO: Implement using logging python utility.
-        self.log = ""
-
-    def encode_prompts(self, prompts, neg_prompts, max_length):
-        # Tokenize text and get embeddings
-        text_input = self.tokenizer(
-            prompts,
-            padding="max_length",
-            max_length=max_length,
-            truncation=True,
-            return_tensors="pt",
-        )
-
-        # Get unconditional embeddings as well
-        uncond_input = self.tokenizer(
-            neg_prompts,
-            padding="max_length",
-            max_length=max_length,
-            truncation=True,
-            return_tensors="pt",
-        )
-
-        text_input = torch.cat([uncond_input.input_ids, text_input.input_ids])
-
-        clip_inf_start = time.time()
-        text_embeddings = self.text_encoder("forward", (text_input,))
-        clip_inf_time = (time.time() - clip_inf_start) * 1000
-        self.log += f"\nClip Inference time (ms) = {clip_inf_time:.3f}"
-
-        return text_embeddings
-
-    def decode_latents(self, latents, use_base_vae, cpu_scheduling):
-        if use_base_vae:
-            latents = 1 / 0.18215 * latents
-
-        latents_numpy = latents
-        if cpu_scheduling:
-            latents_numpy = latents.detach().numpy()
-
-        profile_device = start_profiling(file_path="vae.rdc")
-        vae_start = time.time()
-        images = self.vae("forward", (latents_numpy,))
-        vae_inf_time = (time.time() - vae_start) * 1000
-        end_profiling(profile_device)
-        self.log += f"\nVAE Inference time (ms): {vae_inf_time:.3f}"
-
-        if use_base_vae:
-            images = torch.from_numpy(images)
-            images = (images.detach().cpu() * 255.0).numpy()
-            images = images.round()
-
-        images = torch.from_numpy(images).to(torch.uint8).permute(0, 2, 3, 1)
-        pil_images = [Image.fromarray(image) for image in images.numpy()]
-        return pil_images
-
-    def produce_stencil_latents(
-        self,
-        latents,
-        text_embeddings,
-        guidance_scale,
-        total_timesteps,
-        dtype,
-        cpu_scheduling,
-        controlnet_hint=None,
-        controlnet=None,
-        controlnet_conditioning_scale: float = 1.0,
-        mask=None,
-        masked_image_latents=None,
-        return_all_latents=False,
-    ):
-        step_time_sum = 0
-        latent_history = [latents]
-        text_embeddings = torch.from_numpy(text_embeddings).to(dtype)
-        text_embeddings_numpy = text_embeddings.detach().numpy()
-        for i, t in tqdm(enumerate(total_timesteps)):
-            step_start_time = time.time()
-            timestep = torch.tensor([t]).to(dtype)
-            latent_model_input = self.scheduler.scale_model_input(latents, t)
-            if mask is not None and masked_image_latents is not None:
-                latent_model_input = torch.cat(
-                    [
-                        torch.from_numpy(np.asarray(latent_model_input)),
-                        mask,
-                        masked_image_latents,
-                    ],
-                    dim=1,
-                ).to(dtype)
-            if cpu_scheduling:
-                latent_model_input = latent_model_input.detach().numpy()
-
-            if not torch.is_tensor(latent_model_input):
-                latent_model_input_1 = torch.from_numpy(
-                    np.asarray(latent_model_input)
-                ).to(dtype)
-            else:
-                latent_model_input_1 = latent_model_input
-            control = controlnet(
-                "forward",
-                (
-                    latent_model_input_1,
-                    timestep,
-                    text_embeddings,
-                    controlnet_hint,
-                ),
-                send_to_host=False,
-            )
-            timestep = timestep.detach().numpy()
-            # Profiling Unet.
-            profile_device = start_profiling(file_path="unet.rdc")
-            # TODO: Pass `control` as it is to Unet. Same as TODO mentioned in model_wrappers.py.
-            noise_pred = self.unet(
-                "forward",
-                (
-                    latent_model_input,
-                    timestep,
-                    text_embeddings_numpy,
-                    guidance_scale,
-                    control[0],
-                    control[1],
-                    control[2],
-                    control[3],
-                    control[4],
-                    control[5],
-                    control[6],
-                    control[7],
-                    control[8],
-                    control[9],
-                    control[10],
-                    control[11],
-                    control[12],
-                ),
-                send_to_host=False,
-            )
-            end_profiling(profile_device)
-
-            if cpu_scheduling:
-                noise_pred = torch.from_numpy(noise_pred.to_host())
-                latents = self.scheduler.step(
-                    noise_pred, t, latents
-                ).prev_sample
-            else:
-                latents = self.scheduler.step(noise_pred, t, latents)
-
-            latent_history.append(latents)
-            step_time = (time.time() - step_start_time) * 1000
-            #  self.log += (
-            #      f"\nstep = {i} | timestep = {t} | time = {step_time:.2f}ms"
-            #  )
-            step_time_sum += step_time
-
-        avg_step_time = step_time_sum / len(total_timesteps)
-        self.log += f"\nAverage step time: {avg_step_time}ms/it"
-
-        if not return_all_latents:
-            return latents
-        all_latents = torch.cat(latent_history, dim=0)
-        return all_latents
-
-    def produce_img_latents(
-        self,
-        latents,
-        text_embeddings,
-        guidance_scale,
-        total_timesteps,
-        dtype,
-        cpu_scheduling,
-        mask=None,
-        masked_image_latents=None,
-        return_all_latents=False,
-    ):
-        step_time_sum = 0
-        latent_history = [latents]
-        text_embeddings = torch.from_numpy(text_embeddings).to(dtype)
-        text_embeddings_numpy = text_embeddings.detach().numpy()
-        for i, t in tqdm(enumerate(total_timesteps)):
-            step_start_time = time.time()
-            timestep = torch.tensor([t]).to(dtype).detach().numpy()
-            latent_model_input = self.scheduler.scale_model_input(latents, t)
-            if mask is not None and masked_image_latents is not None:
-                latent_model_input = torch.cat(
-                    [
-                        torch.from_numpy(np.asarray(latent_model_input)),
-                        mask,
-                        masked_image_latents,
-                    ],
-                    dim=1,
-                ).to(dtype)
-            if cpu_scheduling:
-                latent_model_input = latent_model_input.detach().numpy()
-
-            # Profiling Unet.
-            profile_device = start_profiling(file_path="unet.rdc")
-            noise_pred = self.unet(
-                "forward",
-                (
-                    latent_model_input,
-                    timestep,
-                    text_embeddings_numpy,
-                    guidance_scale,
-                ),
-                send_to_host=False,
-            )
-            end_profiling(profile_device)
-
-            if cpu_scheduling:
-                noise_pred = torch.from_numpy(noise_pred.to_host())
-                latents = self.scheduler.step(
-                    noise_pred, t, latents
-                ).prev_sample
-            else:
-                latents = self.scheduler.step(noise_pred, t, latents)
-
-            latent_history.append(latents)
-            step_time = (time.time() - step_start_time) * 1000
-            #  self.log += (
-            #      f"\nstep = {i} | timestep = {t} | time = {step_time:.2f}ms"
-            #  )
-            step_time_sum += step_time
-
-        avg_step_time = step_time_sum / len(total_timesteps)
-        self.log += f"\nAverage step time: {avg_step_time}ms/it"
-
-        if not return_all_latents:
-            return latents
-        all_latents = torch.cat(latent_history, dim=0)
-        return all_latents
-
-    @classmethod
-    def from_pretrained(
-        cls,
-        scheduler: Union[
-            DDIMScheduler,
-            PNDMScheduler,
-            LMSDiscreteScheduler,
-            KDPM2DiscreteScheduler,
-            EulerDiscreteScheduler,
-            EulerAncestralDiscreteScheduler,
-            DPMSolverMultistepScheduler,
-            SharkEulerDiscreteScheduler,
-            DEISMultistepScheduler,
-        ],
-        import_mlir: bool,
-        model_id: str,
-        ckpt_loc: str,
-        custom_vae: str,
-        precision: str,
-        max_length: int,
-        batch_size: int,
-        height: int,
-        width: int,
-        use_base_vae: bool,
-        use_tuned: bool,
-        low_cpu_mem_usage: bool = False,
-        debug: bool = False,
-        use_stencil: str = None,
-        use_lora: str = "",
-        ddpm_scheduler: DDPMScheduler = None,
-    ):
-        is_inpaint = cls.__name__ in [
-            "InpaintPipeline",
-            "OutpaintPipeline",
-        ]
-        is_upscaler = cls.__name__ in ["UpscalerPipeline"]
-        if import_mlir:
-            mlir_import = SharkifyStableDiffusionModel(
-                model_id,
-                ckpt_loc,
-                custom_vae,
-                precision,
-                max_len=max_length,
-                batch_size=batch_size,
-                height=height,
-                width=width,
-                use_base_vae=use_base_vae,
-                use_tuned=use_tuned,
-                low_cpu_mem_usage=low_cpu_mem_usage,
-                debug=debug,
-                is_inpaint=is_inpaint,
-                is_upscaler=is_upscaler,
-                use_stencil=use_stencil,
-                use_lora=use_lora,
-            )
-            if cls.__name__ in [
-                "Image2ImagePipeline",
-                "InpaintPipeline",
-                "OutpaintPipeline",
-            ]:
-                clip, unet, vae, vae_encode = mlir_import()
-                return cls(
-                    vae_encode, vae, clip, get_tokenizer(), unet, scheduler
-                )
-            if cls.__name__ in ["StencilPipeline"]:
-                clip, unet, vae, controlnet = mlir_import()
-                return cls(
-                    controlnet, vae, clip, get_tokenizer(), unet, scheduler
-                )
-            if cls.__name__ in ["UpscalerPipeline"]:
-                clip, unet, vae = mlir_import()
-                return cls(
-                    vae, clip, get_tokenizer(), unet, scheduler, ddpm_scheduler
-                )
-
-            clip, unet, vae = mlir_import()
-            return cls(vae, clip, get_tokenizer(), unet, scheduler)
-        try:
-            if cls.__name__ in [
-                "Image2ImagePipeline",
-                "InpaintPipeline",
-                "OutpaintPipeline",
-            ]:
-                return cls(
-                    get_vae_encode(),
-                    get_vae(),
-                    get_clip(),
-                    get_tokenizer(),
-                    get_unet(),
-                    scheduler,
-                )
-            if cls.__name__ == "StencilPipeline":
-                import sys
-
-                sys.exit(
-                    "StencilPipeline not supported with SharkTank currently."
-                )
-            return cls(
-                get_vae(), get_clip(), get_tokenizer(), get_unet(), scheduler
-            )
-        except:
-            print("download pipeline failed, falling back to import_mlir")
-            mlir_import = SharkifyStableDiffusionModel(
-                model_id,
-                ckpt_loc,
-                custom_vae,
-                precision,
-                max_len=max_length,
-                batch_size=batch_size,
-                height=height,
-                width=width,
-                use_base_vae=use_base_vae,
-                use_tuned=use_tuned,
-                low_cpu_mem_usage=low_cpu_mem_usage,
-                is_inpaint=is_inpaint,
-                is_upscaler=is_upscaler,
-            )
-            if cls.__name__ in [
-                "Image2ImagePipeline",
-                "InpaintPipeline",
-                "OutpaintPipeline",
-            ]:
-                clip, unet, vae, vae_encode = mlir_import()
-                return cls(
-                    vae_encode, vae, clip, get_tokenizer(), unet, scheduler
-                )
-            if cls.__name__ == "StencilPipeline":
-                clip, unet, vae, controlnet = mlir_import()
-                return cls(
-                    controlnet, vae, clip, get_tokenizer(), unet, scheduler
-                )
-            clip, unet, vae = mlir_import()
-            return cls(vae, clip, get_tokenizer(), unet, scheduler)
--- a/apps/stable_diffusion/src/schedulers/init.py
+++ b/apps/stable_diffusion/src/schedulers/init.py
@@ -1,4 +0,0 @@
-from apps.stable_diffusion.src.schedulers.sd_schedulers import get_schedulers
-from apps.stable_diffusion.src.schedulers.shark_eulerdiscrete import (
-    SharkEulerDiscreteScheduler,
-)
--- a/apps/stable_diffusion/src/schedulers/sd_schedulers.py
+++ b/apps/stable_diffusion/src/schedulers/sd_schedulers.py
@@ -1,66 +0,0 @@
-from diffusers import (
-    LMSDiscreteScheduler,
-    PNDMScheduler,
-    DDPMScheduler,
-    DDIMScheduler,
-    DPMSolverMultistepScheduler,
-    KDPM2DiscreteScheduler,
-    EulerDiscreteScheduler,
-    EulerAncestralDiscreteScheduler,
-    DEISMultistepScheduler,
-)
-from apps.stable_diffusion.src.schedulers.shark_eulerdiscrete import (
-    SharkEulerDiscreteScheduler,
-)
-
-
-def get_schedulers(model_id):
-    schedulers = dict()
-    schedulers["PNDM"] = PNDMScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["DDPM"] = DDPMScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["KDPM2Discrete"] = KDPM2DiscreteScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["LMSDiscrete"] = LMSDiscreteScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["DDIM"] = DDIMScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers[
-        "DPMSolverMultistep"
-    ] = DPMSolverMultistepScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["EulerDiscrete"] = EulerDiscreteScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers[
-        "EulerAncestralDiscrete"
-    ] = EulerAncestralDiscreteScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["DEISMultistep"] = DEISMultistepScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers[
-        "SharkEulerDiscrete"
-    ] = SharkEulerDiscreteScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["SharkEulerDiscrete"].compile()
-    return schedulers
--- a/apps/stable_diffusion/src/schedulers/shark_eulerdiscrete.py
+++ b/apps/stable_diffusion/src/schedulers/shark_eulerdiscrete.py
@@ -1,156 +0,0 @@
-import sys
-import numpy as np
-from typing import List, Optional, Tuple, Union
-from diffusers import (
-    LMSDiscreteScheduler,
-    PNDMScheduler,
-    DDIMScheduler,
-    DPMSolverMultistepScheduler,
-    EulerDiscreteScheduler,
-)
-from diffusers.configuration_utils import register_to_config
-from apps.stable_diffusion.src.utils import (
-    compile_through_fx,
-    get_shark_model,
-    args,
-)
-import torch
-
-
-class SharkEulerDiscreteScheduler(EulerDiscreteScheduler):
-    @register_to_config
-    def __init__(
-        self,
-        num_train_timesteps: int = 1000,
-        beta_start: float = 0.0001,
-        beta_end: float = 0.02,
-        beta_schedule: str = "linear",
-        trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
-        prediction_type: str = "epsilon",
-    ):
-        super().__init__(
-            num_train_timesteps,
-            beta_start,
-            beta_end,
-            beta_schedule,
-            trained_betas,
-            prediction_type,
-        )
-
-    def compile(self):
-        SCHEDULER_BUCKET = "gs://shark_tank/stable_diffusion/schedulers"
-        BATCH_SIZE = args.batch_size
-
-        model_input = {
-            "euler": {
-                "latent": torch.randn(
-                    BATCH_SIZE, 4, args.height // 8, args.width // 8
-                ),
-                "output": torch.randn(
-                    BATCH_SIZE, 4, args.height // 8, args.width // 8
-                ),
-                "sigma": torch.tensor(1).to(torch.float32),
-                "dt": torch.tensor(1).to(torch.float32),
-            },
-        }
-
-        example_latent = model_input["euler"]["latent"]
-        example_output = model_input["euler"]["output"]
-        if args.precision == "fp16":
-            example_latent = example_latent.half()
-            example_output = example_output.half()
-        example_sigma = model_input["euler"]["sigma"]
-        example_dt = model_input["euler"]["dt"]
-
-        class ScalingModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
-            def forward(self, latent, sigma):
-                return latent / ((sigma**2 + 1) ** 0.5)
-
-        class SchedulerStepModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
-            def forward(self, noise_pred, sigma, latent, dt):
-                pred_original_sample = latent - sigma * noise_pred
-                derivative = (latent - pred_original_sample) / sigma
-                return latent + derivative * dt
-
-        iree_flags = []
-        if len(args.iree_vulkan_target_triple) > 0:
-            iree_flags.append(
-                f"-iree-vulkan-target-triple={args.iree_vulkan_target_triple}"
-            )
-        # Disable bindings fusion to work with moltenVK.
-        if sys.platform == "darwin":
-            iree_flags.append("-iree-stream-fuse-binding=false")
-
-        def _import(self):
-            scaling_model = ScalingModel()
-            self.scaling_model = compile_through_fx(
-                model=scaling_model,
-                inputs=(example_latent, example_sigma),
-                model_name=f"euler_scale_model_input_{BATCH_SIZE}_{args.height}_{args.width}"
-                + args.precision,
-                extra_args=iree_flags,
-            )
-
-            step_model = SchedulerStepModel()
-            self.step_model = compile_through_fx(
-                step_model,
-                (example_output, example_sigma, example_latent, example_dt),
-                model_name=f"euler_step_{BATCH_SIZE}_{args.height}_{args.width}"
-                + args.precision,
-                extra_args=iree_flags,
-            )
-
-        if args.import_mlir:
-            _import(self)
-
-        else:
-            try:
-                self.scaling_model = get_shark_model(
-                    SCHEDULER_BUCKET,
-                    "euler_scale_model_input_" + args.precision,
-                    iree_flags,
-                )
-                self.step_model = get_shark_model(
-                    SCHEDULER_BUCKET,
-                    "euler_step_" + args.precision,
-                    iree_flags,
-                )
-            except:
-                print(
-                    "failed to download model, falling back and using import_mlir"
-                )
-                args.import_mlir = True
-                _import(self)
-
-    def scale_model_input(self, sample, timestep):
-        step_index = (self.timesteps == timestep).nonzero().item()
-        sigma = self.sigmas[step_index]
-        return self.scaling_model(
-            "forward",
-            (
-                sample,
-                sigma,
-            ),
-            send_to_host=False,
-        )
-
-    def step(self, noise_pred, timestep, latent):
-        step_index = (self.timesteps == timestep).nonzero().item()
-        sigma = self.sigmas[step_index]
-        dt = self.sigmas[step_index + 1] - sigma
-        return self.step_model(
-            "forward",
-            (
-                noise_pred,
-                sigma,
-                latent,
-                dt,
-            ),
-            send_to_host=False,
-        )
--- a/apps/stable_diffusion/src/utils/init.py
+++ b/apps/stable_diffusion/src/utils/init.py
@@ -1,35 +0,0 @@
-from apps.stable_diffusion.src.utils.profiler import (
-    start_profiling,
-    end_profiling,
-)
-from apps.stable_diffusion.src.utils.resources import (
-    prompt_examples,
-    models_db,
-    base_models,
-    opt_flags,
-    resource_path,
-)
-from apps.stable_diffusion.src.utils.sd_annotation import sd_model_annotation
-from apps.stable_diffusion.src.utils.stable_args import args
-from apps.stable_diffusion.src.utils.stencils.stencil_utils import (
-    controlnet_hint_conversion,
-    get_stencil_model_id,
-)
-from apps.stable_diffusion.src.utils.utils import (
-    get_shark_model,
-    compile_through_fx,
-    set_iree_runtime_flags,
-    map_device_to_name_path,
-    set_init_device_flags,
-    get_available_devices,
-    get_opt_flags,
-    preprocessCKPT,
-    fetch_or_delete_vmfbs,
-    fetch_and_update_base_model_id,
-    get_path_to_diffusers_checkpoint,
-    sanitize_seed,
-    get_path_stem,
-    get_extended_name,
-    clear_all,
-    save_output_img,
-)
--- a/apps/stable_diffusion/src/utils/profiler.py
+++ b/apps/stable_diffusion/src/utils/profiler.py
@@ -1,18 +0,0 @@
-from apps.stable_diffusion.src.utils.stable_args import args
-
-
-# Helper function to profile the vulkan device.
-def start_profiling(file_path="foo.rdc", profiling_mode="queue"):
-    if args.vulkan_debug_utils and "vulkan" in args.device:
-        import iree
-
-        print(f"Profiling and saving to {file_path}.")
-        vulkan_device = iree.runtime.get_device(args.device)
-        vulkan_device.begin_profiling(mode=profiling_mode, file_path=file_path)
-        return vulkan_device
-    return None
-
-
-def end_profiling(device):
-    if device:
-        return device.end_profiling()
--- a/apps/stable_diffusion/src/utils/resources.py
+++ b/apps/stable_diffusion/src/utils/resources.py
@@ -1,37 +0,0 @@
-import os
-import json
-import sys
-
-
-def resource_path(relative_path):
-    """Get absolute path to resource, works for dev and for PyInstaller"""
-    base_path = getattr(
-        sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))
-    )
-    return os.path.join(base_path, relative_path)
-
-
-def get_json_file(path):
-    json_var = []
-    loc_json = resource_path(path)
-    if os.path.exists(loc_json):
-        with open(loc_json, encoding="utf-8") as fopen:
-            json_var = json.load(fopen)
-
-    if not json_var:
-        print(f"Unable to fetch {path}")
-
-    return json_var
-
-
-# TODO: This shouldn't be called from here, every time the file imports
-# it will run all the global vars.
-prompt_examples = get_json_file("resources/prompts.json")
-models_db = get_json_file("resources/model_db.json")
-
-# The base_model contains the input configuration for the different
-# models and also helps in providing information for the variants.
-base_models = get_json_file("resources/base_model.json")
-
-# Contains optimization flags for different models.
-opt_flags = get_json_file("resources/opt_flags.json")
--- a/apps/stable_diffusion/src/utils/resources/base_model.json
+++ b/apps/stable_diffusion/src/utils/resources/base_model.json
@@ -1,384 +0,0 @@
-{
-    "stabilityai/stable-diffusion-x4-upscaler": {
-        "unet": {
-            "latents": {
-                "shape": [
-                    "2*batch_size",
-                    7,
-                    "8*height",
-                    "8*width"
-                ],
-                "dtype": "f32"
-            },
-            "timesteps": {
-                "shape": [
-                    1
-                ],
-                "dtype": "f32"
-            },
-            "embedding": {
-                "shape": [
-                    "2*batch_size",
-                    "max_len",
-                    1024
-                ],
-                "dtype": "f32"
-            },
-            "noise_level": {
-                "shape": [2],
-                "dtype": "i64"
-            }
-        },
-        "vae": {
-            "latents" : {
-                "shape" : [
-                    "1*batch_size",4,"8*height","8*width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "clip": {
-            "token" : {
-                "shape" : [
-                    "2*batch_size",
-                    "max_len"
-                ],
-                "dtype":"i64"
-            }
-        }
-    },
-    "stabilityai/stable-diffusion-2-1": {
-        "unet": {
-            "latents": {
-                "shape": [
-                    "1*batch_size",
-                    4,
-                    "height",
-                    "width"
-                ],
-                "dtype": "f32"
-            },
-            "timesteps": {
-                "shape": [
-                    1
-                ],
-                "dtype": "f32"
-            },
-            "embedding": {
-                "shape": [
-                    "2*batch_size",
-                    "max_len",
-                    1024
-                ],
-                "dtype": "f32"
-            },
-            "guidance_scale": {
-                "shape": 2,
-                "dtype": "f32"
-            }
-        },
-        "vae_encode": {
-            "image" : {
-                "shape" : [
-                    "1*batch_size",3,"8*height","8*width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "vae": {
-            "latents" : {
-                "shape" : [
-                    "1*batch_size",4,"height","width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "clip": {
-            "token" : {
-                "shape" : [
-                    "2*batch_size",
-                    "max_len"
-                ],
-                "dtype":"i64"
-            }
-        }
-    },
-    "CompVis/stable-diffusion-v1-4": {
-        "unet": {
-            "latents": {
-                "shape": [
-                    "1*batch_size",
-                    4,
-                    "height",
-                    "width"
-                ],
-                "dtype": "f32"
-            },
-            "timesteps": {
-                "shape": [
-                    1
-                ],
-                "dtype": "f32"
-            },
-            "embedding": {
-                "shape": [
-                    "2*batch_size",
-                    "max_len",
-                    768
-                ],
-                "dtype": "f32"
-            },
-            "guidance_scale": {
-                "shape": 2,
-                "dtype": "f32"
-            }
-        },
-        "stencil_adaptor": {
-            "latents": {
-                "shape": [
-                    "1*batch_size",
-                    4,
-                    "height",
-                    "width"
-                ],
-                "dtype": "f32"
-            },
-            "timesteps": {
-                "shape": [
-                    1
-                ],
-                "dtype": "f32"
-            },
-            "embedding": {
-                "shape": [
-                    "2*batch_size",
-                    "max_len",
-                    768
-                ],
-                "dtype": "f32"
-            },
-            "controlnet_hint": {
-                "shape": [1, 3, "8*height", "8*width"],
-                "dtype": "f32"
-            }
-        },
-        "stencil_unet": {
-            "latents": {
-                "shape": [
-                    "1*batch_size",
-                    4,
-                    "height",
-                    "width"
-                ],
-                "dtype": "f32"
-            },
-            "timesteps": {
-                "shape": [
-                    1
-                ],
-                "dtype": "f32"
-            },
-            "embedding": {
-                "shape": [
-                    "2*batch_size",
-                    "max_len",
-                    768
-                ],
-                "dtype": "f32"
-            },
-            "guidance_scale": {
-                "shape": 2,
-                "dtype": "f32"
-            },
-            "control1": {
-                "shape": [2, 320, "height", "width"],
-                "dtype": "f32"
-            },
-            "control2": {
-                "shape": [2, 320, "height", "width"],
-                "dtype": "f32"
-            },
-            "control3": {
-                "shape": [2, 320, "height", "width"],
-                "dtype": "f32"
-            },
-            "control4": {
-                "shape": [2, 320, "height/2", "width/2"],
-                "dtype": "f32"
-            },
-            "control5": {
-                "shape": [2, 640, "height/2", "width/2"],
-                "dtype": "f32"
-            },
-            "control6": {
-                "shape": [2, 640, "height/2", "width/2"],
-                "dtype": "f32"
-            },
-            "control7": {
-                "shape": [2, 640, "height/4", "width/4"],
-                "dtype": "f32"
-            },
-            "control8": {
-                "shape": [2, 1280, "height/4", "width/4"],
-                "dtype": "f32"
-            },
-            "control9": {
-                "shape": [2, 1280, "height/4", "width/4"],
-                "dtype": "f32"
-            },
-            "control10": {
-                "shape": [2, 1280, "height/8", "width/8"],
-                "dtype": "f32"
-            },
-            "control11": {
-                "shape": [2, 1280, "height/8", "width/8"],
-                "dtype": "f32"
-            },
-            "control12": {
-                "shape": [2, 1280, "height/8", "width/8"],
-                "dtype": "f32"
-            },
-            "control13": {
-                "shape": [2, 1280, "height/8", "width/8"],
-                "dtype": "f32"
-            }
-        },
-        "vae_encode": {
-            "image" : {
-                "shape" : [
-                    "1*batch_size",3,"8*height","8*width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "vae": {
-            "latents" : {
-                "shape" : [
-                    "1*batch_size",4,"height","width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "clip": {
-            "token" : {
-                "shape" : [
-                    "2*batch_size",
-                    "max_len"
-                ],
-                "dtype":"i64"
-            }
-        }
-    },
-    "stabilityai/stable-diffusion-2-inpainting": {
-        "unet": {
-            "latents": {
-                "shape": [
-                    "1*batch_size",
-                    9,
-                    "height",
-                    "width"
-                ],
-                "dtype": "f32"
-            },
-            "timesteps": {
-                "shape": [
-                    1
-                ],
-                "dtype": "f32"
-            },
-            "embedding": {
-                "shape": [
-                    "2*batch_size",
-                    "max_len",
-                    1024
-                ],
-                "dtype": "f32"
-            },
-            "guidance_scale": {
-                "shape": 2,
-                "dtype": "f32"
-            }
-        },
-        "vae_encode": {
-            "image" : {
-                "shape" : [
-                    "1*batch_size",3,"8*height","8*width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "vae": {
-            "latents" : {
-                "shape" : [
-                    "1*batch_size",4,"height","width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "clip": {
-            "token" : {
-                "shape" : [
-                    "2*batch_size",
-                    "max_len"
-                ],
-                "dtype":"i64"
-            }
-        }
-    },
-    "runwayml/stable-diffusion-inpainting": {
-        "unet": {
-            "latents": {
-                "shape": [
-                    "1*batch_size",
-                    9,
-                    "height",
-                    "width"
-                ],
-                "dtype": "f32"
-            },
-            "timesteps": {
-                "shape": [
-                    1
-                ],
-                "dtype": "f32"
-            },
-            "embedding": {
-                "shape": [
-                    "2*batch_size",
-                    "max_len",
-                    768
-                ],
-                "dtype": "f32"
-            },
-            "guidance_scale": {
-                "shape": 2,
-                "dtype": "f32"
-            }
-        },
-        "vae_encode": {
-            "image" : {
-                "shape" : [
-                    "1*batch_size",3,"8*height","8*width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "vae": {
-            "latents" : {
-                "shape" : [
-                    "1*batch_size",4,"height","width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "clip": {
-            "token" : {
-                "shape" : [
-                    "2*batch_size",
-                    "max_len"
-                ],
-                "dtype":"i64"
-            }
-        }
-    }
-}
--- a/apps/stable_diffusion/src/utils/resources/model_config.json
+++ b/apps/stable_diffusion/src/utils/resources/model_config.json
@@ -1,23 +0,0 @@
-[
-  {
-    "stablediffusion/v1_4":"CompVis/stable-diffusion-v1-4",
-    "stablediffusion/v2_1base":"stabilityai/stable-diffusion-2-1-base",
-    "stablediffusion/v2_1":"stabilityai/stable-diffusion-2-1",
-    "stablediffusion/inpaint_v1":"runwayml/stable-diffusion-inpainting",
-    "stablediffusion/inpaint_v2":"stabilityai/stable-diffusion-2-inpainting",
-    "anythingv3/v1_4":"Linaqruf/anything-v3.0",
-    "analogdiffusion/v1_4":"wavymulder/Analog-Diffusion",
-    "openjourney/v1_4":"prompthero/openjourney",
-    "dreamlike/v1_4":"dreamlike-art/dreamlike-diffusion-1.0"
-  },
-  {
-    "stablediffusion/fp16":"fp16",
-    "stablediffusion/fp32":"main",
-    "anythingv3/fp16":"diffusers",
-    "anythingv3/fp32":"diffusers",
-    "analogdiffusion/fp16":"main",
-    "analogdiffusion/fp32":"main",
-    "openjourney/fp16":"main",
-    "openjourney/fp32":"main"
-  }
-]
--- a/apps/stable_diffusion/src/utils/resources/model_db.json
+++ b/apps/stable_diffusion/src/utils/resources/model_db.json
@@ -1,85 +0,0 @@
-[
-  {
-    "stablediffusion/untuned":"gs://shark_tank/sd_untuned",
-    "stablediffusion/tuned":"gs://shark_tank/sd_tuned",
-    "stablediffusion/tuned/cuda":"gs://shark_tank/sd_tuned/cuda",
-    "anythingv3/untuned":"gs://shark_tank/sd_anythingv3",
-    "anythingv3/tuned":"gs://shark_tank/sd_tuned",
-    "anythingv3/tuned/cuda":"gs://shark_tank/sd_tuned/cuda",
-    "analogdiffusion/untuned":"gs://shark_tank/sd_analog_diffusion",
-    "analogdiffusion/tuned":"gs://shark_tank/sd_tuned",
-    "analogdiffusion/tuned/cuda":"gs://shark_tank/sd_tuned/cuda",
-    "openjourney/untuned":"gs://shark_tank/sd_openjourney",
-    "openjourney/tuned":"gs://shark_tank/sd_tuned",
-    "dreamlike/untuned":"gs://shark_tank/sd_dreamlike_diffusion"
-  },
-  {
-    "stablediffusion/v1_4/unet/fp16/length_77/untuned":"unet_8dec_fp16",
-    "stablediffusion/v1_4/unet/fp16/length_77/tuned":"unet_8dec_fp16_tuned",
-    "stablediffusion/v1_4/unet/fp16/length_77/tuned/cuda":"unet_8dec_fp16_cuda_tuned",
-    "stablediffusion/v1_4/unet/fp32/length_77/untuned":"unet_1dec_fp32",
-    "stablediffusion/v1_4/unet/fp32/length_64/untuned":"unet_1_64_512_512_fp32_CompVis_stable_diffusion_v1_4",
-    "stablediffusion/v1_4/vae/fp16/length_77/untuned":"vae_19dec_fp16",
-    "stablediffusion/v1_4/vae/fp16/length_77/tuned":"vae_19dec_fp16_tuned",
-    "stablediffusion/v1_4/vae/fp16/length_77/tuned/cuda":"vae_19dec_fp16_cuda_tuned",
-    "stablediffusion/v1_4/vae/fp16/length_77/untuned/base":"vae_8dec_fp16",
-    "stablediffusion/v1_4/vae/fp32/length_77/untuned":"vae_1_64_512_512_fp32_CompVis_stable_diffusion_v1_4",
-    "stablediffusion/v1_4/vae/fp32/length_64/untuned":"vae_1_64_512_512_fp32_CompVis_stable_diffusion_v1_4",
-    "stablediffusion/v1_4/clip/fp32/length_77/untuned":"clip_18dec_fp32",
-    "stablediffusion/v1_4/clip/fp32/length_64/untuned":"clip_1_64_512_512_fp32_CompVis_stable_diffusion_v1_4",
-    "stablediffusion/v2_1base/unet/fp16/length_77/untuned":"unet77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/unet/fp16/length_77/tuned":"unet2base_8dec_fp16_tuned_v2",
-    "stablediffusion/v2_1base/unet/fp16/length_77/tuned/cuda":"unet2base_8dec_fp16_cuda_tuned",
-    "stablediffusion/v2_1base/unet/fp16/length_64/untuned":"unet64_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/unet/fp16/length_64/tuned":"unet_19dec_v2p1base_fp16_64_tuned",
-    "stablediffusion/v2_1base/unet/fp16/length_64/tuned/cuda":"unet_19dec_v2p1base_fp16_64_cuda_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/untuned":"vae77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned":"vae2base_19dec_fp16_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/cuda":"vae2base_19dec_fp16_cuda_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/untuned/base":"vae2base_8dec_fp16",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/base":"vae2base_8dec_fp16_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/base/cuda":"vae2base_8dec_fp16_cuda_tuned",
-    "stablediffusion/v2_1base/clip/fp32/length_77/untuned":"clip77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/clip/fp32/length_64/untuned":"clip64_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1/unet/fp16/length_77/untuned":"unet77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1/vae/fp16/length_77/untuned":"vae77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1/vae/fp16/length_77/untuned/base":"vae2_8dec_fp16",
-    "stablediffusion/v2_1/clip/fp32/length_77/untuned":"clip77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "anythingv3/v1_4/unet/fp16/length_77/untuned":"av3_unet_19dec_fp16",
-    "anythingv3/v1_4/unet/fp16/length_77/tuned":"av3_unet_19dec_fp16_tuned",
-    "anythingv3/v1_4/unet/fp16/length_77/tuned/cuda":"av3_unet_19dec_fp16_cuda_tuned",
-    "anythingv3/v1_4/unet/fp32/length_77/untuned":"av3_unet_19dec_fp32",
-    "anythingv3/v1_4/vae/fp16/length_77/untuned":"av3_vae_19dec_fp16",
-    "anythingv3/v1_4/vae/fp16/length_77/tuned":"av3_vae_19dec_fp16_tuned",
-    "anythingv3/v1_4/vae/fp16/length_77/tuned/cuda":"av3_vae_19dec_fp16_cuda_tuned",
-    "anythingv3/v1_4/vae/fp16/length_77/untuned/base":"av3_vaebase_22dec_fp16",
-    "anythingv3/v1_4/vae/fp32/length_77/untuned":"av3_vae_19dec_fp32",
-    "anythingv3/v1_4/vae/fp32/length_77/untuned/base":"av3_vaebase_22dec_fp32",
-    "anythingv3/v1_4/clip/fp32/length_77/untuned":"av3_clip_19dec_fp32",
-    "analogdiffusion/v1_4/unet/fp16/length_77/untuned":"ad_unet_19dec_fp16",
-    "analogdiffusion/v1_4/unet/fp16/length_77/tuned":"ad_unet_19dec_fp16_tuned",
-    "analogdiffusion/v1_4/unet/fp16/length_77/tuned/cuda":"ad_unet_19dec_fp16_cuda_tuned",
-    "analogdiffusion/v1_4/unet/fp32/length_77/untuned":"ad_unet_19dec_fp32",
-    "analogdiffusion/v1_4/vae/fp16/length_77/untuned":"ad_vae_19dec_fp16",
-    "analogdiffusion/v1_4/vae/fp16/length_77/tuned":"ad_vae_19dec_fp16_tuned",
-    "analogdiffusion/v1_4/vae/fp16/length_77/tuned/cuda":"ad_vae_19dec_fp16_cuda_tuned",
-    "analogdiffusion/v1_4/vae/fp16/length_77/untuned/base":"ad_vaebase_22dec_fp16",
-    "analogdiffusion/v1_4/vae/fp32/length_77/untuned":"ad_vae_19dec_fp32",
-    "analogdiffusion/v1_4/vae/fp32/length_77/untuned/base":"ad_vaebase_22dec_fp32",
-    "analogdiffusion/v1_4/clip/fp32/length_77/untuned":"ad_clip_19dec_fp32",
-    "openjourney/v1_4/unet/fp16/length_64/untuned":"oj_unet_22dec_fp16_64",
-    "openjourney/v1_4/unet/fp32/length_64/untuned":"oj_unet_22dec_fp32_64",
-    "openjourney/v1_4/vae/fp16/length_77/untuned":"oj_vae_22dec_fp16",
-    "openjourney/v1_4/vae/fp16/length_77/untuned/base":"oj_vaebase_22dec_fp16",
-    "openjourney/v1_4/vae/fp32/length_77/untuned":"oj_vae_22dec_fp32",
-    "openjourney/v1_4/vae/fp32/length_77/untuned/base":"oj_vaebase_22dec_fp32",
-    "openjourney/v1_4/clip/fp32/length_64/untuned":"oj_clip_22dec_fp32_64",
-    "dreamlike/v1_4/unet/fp16/length_77/untuned":"dl_unet_23dec_fp16_77",
-    "dreamlike/v1_4/unet/fp32/length_77/untuned":"dl_unet_23dec_fp32_77",
-    "dreamlike/v1_4/vae/fp16/length_77/untuned":"dl_vae_23dec_fp16",
-    "dreamlike/v1_4/vae/fp16/length_77/untuned/base":"dl_vaebase_23dec_fp16",
-    "dreamlike/v1_4/vae/fp32/length_77/untuned":"dl_vae_23dec_fp32",
-    "dreamlike/v1_4/vae/fp32/length_77/untuned/base":"dl_vaebase_23dec_fp32",
-    "dreamlike/v1_4/clip/fp32/length_77/untuned":"dl_clip_23dec_fp32_77"
-  }
-]
--- a/apps/stable_diffusion/src/utils/resources/opt_flags.json
+++ b/apps/stable_diffusion/src/utils/resources/opt_flags.json
@@ -1,84 +0,0 @@
-{
-  "unet": {
-    "tuned": {
-      "fp16": {
-        "default_compilation_flags": []
-      },
-      "fp32": {
-        "default_compilation_flags": []
-      }
-    },
-    "untuned": {
-      "fp16": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-flow-convert-1x1-filter-conv2d-to-matmul,iree-preprocessing-convert-conv2d-to-img2col,iree-preprocessing-pad-linalg-ops{pad-size=32}))"
-        ]
-      },
-      "fp32": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-flow-convert-1x1-filter-conv2d-to-matmul,iree-preprocessing-convert-conv2d-to-img2col,iree-preprocessing-pad-linalg-ops{pad-size=16}))"
-        ]
-      }
-    }
-  },
-  "vae": {
-    "tuned": {
-      "fp16": {
-        "default_compilation_flags": [],
-        "specified_compilation_flags": {
-          "cuda": [],
-          "default_device": [
-            "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-flow-convert-1x1-filter-conv2d-to-matmul,iree-preprocessing-convert-conv2d-to-img2col,iree-preprocessing-pad-linalg-ops{pad-size=32},iree-linalg-ext-convert-conv2d-to-winograd))"
-          ]
-        }
-      },
-      "fp32": {
-        "default_compilation_flags": [],
-        "specified_compilation_flags": {
-          "cuda": [],
-          "default_device": [
-            "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-flow-convert-1x1-filter-conv2d-to-matmul,iree-preprocessing-convert-conv2d-to-img2col,iree-preprocessing-pad-linalg-ops{pad-size=16},iree-linalg-ext-convert-conv2d-to-winograd))"
-          ]
-        }
-      }
-    },
-    "untuned": {
-      "fp16": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-preprocessing-pad-linalg-ops{pad-size=32}))"
-        ]
-      },
-      "fp32": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-preprocessing-pad-linalg-ops{pad-size=16}))"
-        ]
-      }
-    }
-  },
-  "clip": {
-    "tuned": {
-      "fp16": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-pad-linalg-ops{pad-size=16}))"
-        ]
-      },
-      "fp32": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-pad-linalg-ops{pad-size=16}))"
-        ]
-      }
-    },
-    "untuned": {
-      "fp16": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-pad-linalg-ops{pad-size=16}))"
-        ]
-      },
-      "fp32": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-pad-linalg-ops{pad-size=16}))"
-        ]
-      }
-    }
-  }
-}
--- a/apps/stable_diffusion/src/utils/sd_annotation.py
+++ b/apps/stable_diffusion/src/utils/sd_annotation.py
@@ -1,244 +0,0 @@
-import os
-import io
-from shark.model_annotation import model_annotation, create_context
-from shark.iree_utils._common import iree_target_map, run_cmd
-from shark.shark_downloader import (
-    download_model,
-    download_public_file,
-    WORKDIR,
-)
-from shark.parser import shark_args
-from apps.stable_diffusion.src.utils.stable_args import args
-
-
-def get_device():
-    device = (
-        args.device
-        if "://" not in args.device
-        else args.device.split("://")[0]
-    )
-    return device
-
-
-def get_device_args():
-    device = get_device()
-    device_spec_args = []
-    if device == "cuda":
-        from shark.iree_utils.gpu_utils import get_iree_gpu_args
-
-        gpu_flags = get_iree_gpu_args()
-        for flag in gpu_flags:
-            device_spec_args.append(flag)
-    elif device == "vulkan":
-        device_spec_args.append(
-            f"--iree-vulkan-target-triple={args.iree_vulkan_target_triple} "
-        )
-    return device, device_spec_args
-
-
-# Download the model (Unet or VAE fp16) from shark_tank
-def load_model_from_tank():
-    from apps.stable_diffusion.src.models import (
-        get_params,
-        get_variant_version,
-    )
-
-    variant, version = get_variant_version(args.hf_model_id)
-
-    shark_args.local_tank_cache = args.local_tank_cache
-    bucket_key = f"{variant}/untuned"
-    if args.annotation_model == "unet":
-        model_key = f"{variant}/{version}/unet/{args.precision}/length_{args.max_length}/untuned"
-    elif args.annotation_model == "vae":
-        is_base = "/base" if args.use_base_vae else ""
-        model_key = f"{variant}/{version}/vae/{args.precision}/length_77/untuned{is_base}"
-
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, args.annotation_model, "untuned", args.precision
-    )
-    mlir_model, func_name, inputs, golden_out = download_model(
-        model_name,
-        tank_url=bucket,
-        frontend="torch",
-    )
-    return mlir_model, model_name
-
-
-# Download the tuned config files from shark_tank
-def load_winograd_configs():
-    device = get_device()
-    config_bucket = "gs://shark_tank/sd_tuned/configs/"
-    config_name = f"{args.annotation_model}_winograd_{device}.json"
-    full_gs_url = config_bucket + config_name
-    winograd_config_dir = os.path.join(WORKDIR, "configs", config_name)
-    print("Loading Winograd config file from ", winograd_config_dir)
-    download_public_file(full_gs_url, winograd_config_dir, True)
-    return winograd_config_dir
-
-
-def load_lower_configs():
-    from apps.stable_diffusion.src.models import get_variant_version
-    from apps.stable_diffusion.src.utils.utils import (
-        fetch_and_update_base_model_id,
-    )
-
-    if args.ckpt_loc != "":
-        base_model_id = fetch_and_update_base_model_id(args.ckpt_loc)
-    else:
-        base_model_id = fetch_and_update_base_model_id(args.hf_model_id)
-        if base_model_id == "":
-            base_model_id = args.hf_model_id
-
-    variant, version = get_variant_version(base_model_id)
-
-    if version == "inpaint_v1":
-        version = "v1_4"
-    elif version == "inpaint_v2":
-        version = "v2_1base"
-
-    config_bucket = "gs://shark_tank/sd_tuned_configs/"
-
-    device, device_spec_args = get_device_args()
-    spec = ""
-    if device_spec_args:
-        spec = device_spec_args[-1].split("=")[-1].strip()
-        if device == "vulkan":
-            spec = spec.split("-")[0]
-
-    if args.annotation_model == "vae":
-        if not spec or spec in ["rdna3", "sm_80"]:
-            config_name = (
-                f"{args.annotation_model}_{args.precision}_{device}.json"
-            )
-        else:
-            config_name = f"{args.annotation_model}_{args.precision}_{device}_{spec}.json"
-    else:
-        if not spec or spec in ["rdna3", "sm_80"]:
-            config_name = f"{args.annotation_model}_{version}_{args.precision}_{device}.json"
-        else:
-            config_name = f"{args.annotation_model}_{version}_{args.precision}_{device}_{spec}.json"
-
-    full_gs_url = config_bucket + config_name
-    lowering_config_dir = os.path.join(WORKDIR, "configs", config_name)
-    print("Loading lowering config file from ", lowering_config_dir)
-    download_public_file(full_gs_url, lowering_config_dir, True)
-    return lowering_config_dir
-
-
-# Annotate the model with Winograd attribute on selected conv ops
-def annotate_with_winograd(input_mlir, winograd_config_dir, model_name):
-    with create_context() as ctx:
-        winograd_model = model_annotation(
-            ctx,
-            input_contents=input_mlir,
-            config_path=winograd_config_dir,
-            search_op="conv",
-            winograd=True,
-        )
-
-    bytecode_stream = io.BytesIO()
-    winograd_model.operation.write_bytecode(bytecode_stream)
-    bytecode = bytecode_stream.getvalue()
-
-    if args.save_annotation:
-        if model_name.split("_")[-1] != "tuned":
-            out_file_path = os.path.join(
-                args.annotation_output, model_name + "_tuned_torch.mlir"
-            )
-        else:
-            out_file_path = os.path.join(
-                args.annotation_output, model_name + "_torch.mlir"
-            )
-        with open(out_file_path, "w") as f:
-            f.write(str(winograd_model))
-            f.close()
-
-    return bytecode
-
-
-def dump_after_mlir(input_mlir, use_winograd):
-    import iree.compiler as ireec
-
-    device, device_spec_args = get_device_args()
-    if use_winograd:
-        preprocess_flag = "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-flow-convert-1x1-filter-conv2d-to-matmul,iree-preprocessing-convert-conv2d-to-img2col,iree-preprocessing-pad-linalg-ops{pad-size=32},iree-linalg-ext-convert-conv2d-to-winograd))"
-    else:
-        preprocess_flag = "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-flow-convert-1x1-filter-conv2d-to-matmul,iree-preprocessing-convert-conv2d-to-img2col,iree-preprocessing-pad-linalg-ops{pad-size=32}))"
-
-    dump_module = ireec.compile_str(
-        input_mlir,
-        target_backends=[iree_target_map(device)],
-        extra_args=device_spec_args
-        + [
-            preprocess_flag,
-            "--compile-to=preprocessing",
-        ],
-    )
-    return dump_module
-
-
-# For Unet annotate the model with tuned lowering configs
-def annotate_with_lower_configs(
-    input_mlir, lowering_config_dir, model_name, use_winograd
-):
-    # Dump IR after padding/img2col/winograd passes
-    dump_module = dump_after_mlir(input_mlir, use_winograd)
-    print("Applying tuned configs on", model_name)
-
-    # Annotate the model with lowering configs in the config file
-    with create_context() as ctx:
-        tuned_model = model_annotation(
-            ctx,
-            input_contents=dump_module,
-            config_path=lowering_config_dir,
-            search_op="all",
-        )
-
-    bytecode_stream = io.BytesIO()
-    tuned_model.operation.write_bytecode(bytecode_stream)
-    bytecode = bytecode_stream.getvalue()
-
-    if args.save_annotation:
-        if model_name.split("_")[-1] != "tuned":
-            out_file_path = (
-                f"{args.annotation_output}/{model_name}_tuned_torch.mlir"
-            )
-        else:
-            out_file_path = f"{args.annotation_output}/{model_name}_torch.mlir"
-        with open(out_file_path, "w") as f:
-            f.write(str(tuned_model))
-            f.close()
-
-    return bytecode
-
-
-def sd_model_annotation(mlir_model, model_name):
-    device = get_device()
-    if args.annotation_model == "unet" and device == "vulkan":
-        use_winograd = True
-        winograd_config_dir = load_winograd_configs()
-        winograd_model = annotate_with_winograd(
-            mlir_model, winograd_config_dir, model_name
-        )
-        lowering_config_dir = load_lower_configs()
-        tuned_model = annotate_with_lower_configs(
-            winograd_model, lowering_config_dir, model_name, use_winograd
-        )
-    elif args.annotation_model == "vae" and device == "vulkan":
-        use_winograd = True
-        winograd_config_dir = load_winograd_configs()
-        tuned_model = annotate_with_winograd(
-            mlir_model, winograd_config_dir, model_name
-        )
-    else:
-        use_winograd = False
-        lowering_config_dir = load_lower_configs()
-        tuned_model = annotate_with_lower_configs(
-            mlir_model, lowering_config_dir, model_name, use_winograd
-        )
-    return tuned_model
-
-
-if __name__ == "__main__":
-    mlir_model, model_name = load_model_from_tank()
-    sd_model_annotation(mlir_model, model_name)
--- a/apps/stable_diffusion/src/utils/stable_args.py
+++ b/apps/stable_diffusion/src/utils/stable_args.py
@@ -1,495 +0,0 @@
-import argparse
-import os
-from pathlib import Path
-
-
-def path_expand(s):
-    return Path(s).expanduser().resolve()
-
-
-def is_valid_file(arg):
-    if not os.path.exists(arg):
-        return None
-    else:
-        return arg
-
-
-p = argparse.ArgumentParser(
-    description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
-)
-
-##############################################################################
-### Stable Diffusion Params
-##############################################################################
-
-p.add_argument(
-    "-p",
-    "--prompts",
-    nargs="+",
-    default=["cyberpunk forest by Salvador Dali"],
-    help="text of which images to be generated.",
-)
-
-p.add_argument(
-    "--negative_prompts",
-    nargs="+",
-    default=["trees, green"],
-    help="text you don't want to see in the generated image.",
-)
-
-p.add_argument(
-    "--img_path",
-    type=str,
-    help="Path to the image input for img2img/inpainting",
-)
-
-p.add_argument(
-    "--steps",
-    type=int,
-    default=50,
-    help="the no. of steps to do the sampling.",
-)
-
-p.add_argument(
-    "--seed",
-    type=int,
-    default=-1,
-    help="the seed to use. -1 for a random one.",
-)
-
-p.add_argument(
-    "--batch_size",
-    type=int,
-    default=1,
-    choices=range(1, 4),
-    help="the number of inferences to be made in a single `batch_count`.",
-)
-
-p.add_argument(
-    "--height",
-    type=int,
-    default=512,
-    choices=range(128, 769, 8),
-    help="the height of the output image.",
-)
-
-p.add_argument(
-    "--width",
-    type=int,
-    default=512,
-    choices=range(128, 769, 8),
-    help="the width of the output image.",
-)
-
-p.add_argument(
-    "--guidance_scale",
-    type=float,
-    default=7.5,
-    help="the value to be used for guidance scaling.",
-)
-
-p.add_argument(
-    "--noise_level",
-    type=int,
-    default=20,
-    help="the value to be used for noise level of upscaler.",
-)
-
-p.add_argument(
-    "--max_length",
-    type=int,
-    default=64,
-    help="max length of the tokenizer output, options are 64 and 77.",
-)
-
-p.add_argument(
-    "--strength",
-    type=float,
-    default=0.8,
-    help="the strength of change applied on the given input image for img2img",
-)
-
-##############################################################################
-### Inpainting and Outpainting Params
-##############################################################################
-
-p.add_argument(
-    "--mask_path",
-    type=str,
-    help="Path to the mask image input for inpainting",
-)
-
-p.add_argument(
-    "--inpaint_full_res",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="If inpaint only masked area or whole picture",
-)
-
-p.add_argument(
-    "--inpaint_full_res_padding",
-    type=int,
-    default=32,
-    choices=range(0, 257, 4),
-    help="Number of pixels for only masked padding",
-)
-
-p.add_argument(
-    "--pixels",
-    type=int,
-    default=128,
-    choices=range(8, 257, 8),
-    help="Number of expended pixels for one direction for outpainting",
-)
-
-p.add_argument(
-    "--mask_blur",
-    type=int,
-    default=8,
-    choices=range(0, 65),
-    help="Number of blur pixels for outpainting",
-)
-
-p.add_argument(
-    "--left",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="If expend left for outpainting",
-)
-
-p.add_argument(
-    "--right",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="If expend right for outpainting",
-)
-
-p.add_argument(
-    "--top",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="If expend top for outpainting",
-)
-
-p.add_argument(
-    "--bottom",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="If expend bottom for outpainting",
-)
-
-p.add_argument(
-    "--noise_q",
-    type=float,
-    default=1.0,
-    help="Fall-off exponent for outpainting (lower=higher detail) (min=0.0, max=4.0)",
-)
-
-p.add_argument(
-    "--color_variation",
-    type=float,
-    default=0.05,
-    help="Color variation for outpainting (min=0.0, max=1.0)",
-)
-
-##############################################################################
-### Model Config and Usage Params
-##############################################################################
-
-p.add_argument(
-    "--device", type=str, default="vulkan", help="device to run the model."
-)
-
-p.add_argument(
-    "--precision", type=str, default="fp16", help="precision to run the model."
-)
-
-p.add_argument(
-    "--import_mlir",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="imports the model from torch module to shark_module otherwise downloads the model from shark_tank.",
-)
-
-p.add_argument(
-    "--load_vmfb",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="attempts to load the model from a precompiled flatbuffer and compiles + saves it if not found.",
-)
-
-p.add_argument(
-    "--save_vmfb",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="saves the compiled flatbuffer to the local directory",
-)
-
-p.add_argument(
-    "--use_tuned",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="Download and use the tuned version of the model if available",
-)
-
-p.add_argument(
-    "--use_base_vae",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="Do conversion from the VAE output to pixel space on cpu.",
-)
-
-p.add_argument(
-    "--scheduler",
-    type=str,
-    default="SharkEulerDiscrete",
-    help="other supported schedulers are [PNDM, DDIM, LMSDiscrete, EulerDiscrete, DPMSolverMultistep]",
-)
-
-p.add_argument(
-    "--output_img_format",
-    type=str,
-    default="png",
-    help="specify the format in which output image is save. Supported options: jpg / png",
-)
-
-p.add_argument(
-    "--output_dir",
-    type=str,
-    default=None,
-    help="Directory path to save the output images and json",
-)
-
-p.add_argument(
-    "--batch_count",
-    type=int,
-    default=1,
-    help="number of batch to be generated with random seeds in single execution",
-)
-
-p.add_argument(
-    "--ckpt_loc",
-    type=str,
-    default="",
-    help="Path to SD's .ckpt file.",
-)
-
-p.add_argument(
-    "--custom_vae",
-    type=str,
-    default="",
-    help="HuggingFace repo-id or path to SD model's checkpoint whose Vae needs to be plugged in.",
-)
-
-p.add_argument(
-    "--hf_model_id",
-    type=str,
-    default="stabilityai/stable-diffusion-2-1-base",
-    help="The repo-id of hugging face.",
-)
-
-p.add_argument(
-    "--low_cpu_mem_usage",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="Use the accelerate package to reduce cpu memory consumption",
-)
-
-p.add_argument(
-    "--attention_slicing",
-    type=str,
-    default="none",
-    help="Amount of attention slicing to use (one of 'max', 'auto', 'none', or an integer)",
-)
-
-p.add_argument(
-    "--use_stencil",
-    choices=["canny", "openpose", "scribble"],
-    help="Enable the stencil feature.",
-)
-
-p.add_argument(
-    "--use_lora",
-    type=str,
-    default="",
-    help="Use standalone LoRA weight using a HF ID or a checkpoint file (~3 MB)",
-)
-
-##############################################################################
-### IREE - Vulkan supported flags
-##############################################################################
-
-p.add_argument(
-    "--iree_vulkan_target_triple",
-    type=str,
-    default="",
-    help="Specify target triple for vulkan",
-)
-
-p.add_argument(
-    "--vulkan_debug_utils",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="Profiles vulkan device and collects the .rdc info",
-)
-
-p.add_argument(
-    "--vulkan_large_heap_block_size",
-    default="4147483648",
-    help="flag for setting VMA preferredLargeHeapBlockSize for vulkan device, default is 4G",
-)
-
-p.add_argument(
-    "--vulkan_validation_layers",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for disabling vulkan validation layers when benchmarking",
-)
-
-##############################################################################
-### Misc. Debug and Optimization flags
-##############################################################################
-
-p.add_argument(
-    "--use_compiled_scheduler",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="use the default scheduler precompiled into the model if available",
-)
-
-p.add_argument(
-    "--local_tank_cache",
-    default="",
-    help="Specify where to save downloaded shark_tank artifacts. If this is not set, the default is ~/.local/shark_tank/.",
-)
-
-p.add_argument(
-    "--dump_isa",
-    default=False,
-    action="store_true",
-    help="When enabled call amdllpc to get ISA dumps. use with dispatch benchmarks.",
-)
-
-p.add_argument(
-    "--dispatch_benchmarks",
-    default=None,
-    help='dispatches to return benchamrk data on.  use "All" for all, and None for none.',
-)
-
-p.add_argument(
-    "--dispatch_benchmarks_dir",
-    default="temp_dispatch_benchmarks",
-    help='directory where you want to store dispatch data generated with "--dispatch_benchmarks"',
-)
-
-p.add_argument(
-    "--enable_rgp",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for inserting debug frames between iterations for use with rgp.",
-)
-
-p.add_argument(
-    "--hide_steps",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="flag for hiding the details of iteration/sec for each step.",
-)
-
-p.add_argument(
-    "--warmup_count",
-    type=int,
-    default=0,
-    help="flag setting warmup count for clip and vae [>= 0].",
-)
-
-p.add_argument(
-    "--clear_all",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag to clear all mlir and vmfb from common locations. Recompiling will take several minutes",
-)
-
-p.add_argument(
-    "--save_metadata_to_json",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for whether or not to save a generation information json file with the image.",
-)
-
-p.add_argument(
-    "--write_metadata_to_png",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="flag for whether or not to save generation information in PNG chunk text to generated images.",
-)
-
-p.add_argument(
-    "--import_debug",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="if import_mlir is True, saves mlir via the debug option in shark importer. Does nothing if import_mlir is false (the default)",
-)
-##############################################################################
-### Web UI flags
-##############################################################################
-
-p.add_argument(
-    "--progress_bar",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="flag for removing the progress bar animation during image generation",
-)
-
-p.add_argument(
-    "--ckpt_dir",
-    type=str,
-    default="",
-    help="Path to directory where all .ckpts are stored in order to populate them in the web UI",
-)
-
-
-p.add_argument(
-    "--share",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for generating a public URL",
-)
-
-p.add_argument(
-    "--server_port",
-    type=int,
-    default=8080,
-    help="flag for setting server port",
-)
-
-##############################################################################
-### SD model auto-annotation flags
-##############################################################################
-
-p.add_argument(
-    "--annotation_output",
-    type=path_expand,
-    default="./",
-    help="Directory to save the annotated mlir file",
-)
-
-p.add_argument(
-    "--annotation_model",
-    type=str,
-    default="unet",
-    help="Options are unet and vae.",
-)
-
-p.add_argument(
-    "--save_annotation",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="Save annotated mlir file",
-)
-
-args, unknown = p.parse_known_args()
-if args.import_debug:
-    os.environ["IREE_SAVE_TEMPS"] = os.path.join(
-        os.getcwd(), args.hf_model_id.replace("/", "_")
-    )
--- a/apps/stable_diffusion/src/utils/stencils/init.py
+++ b/apps/stable_diffusion/src/utils/stencils/init.py
@@ -1,2 +0,0 @@
-from apps.stable_diffusion.src.utils.stencils.canny import CannyDetector
-from apps.stable_diffusion.src.utils.stencils.openpose import OpenposeDetector
--- a/apps/stable_diffusion/src/utils/stencils/canny/init.py
+++ b/apps/stable_diffusion/src/utils/stencils/canny/init.py
@@ -1,6 +0,0 @@
-import cv2
-
-
-class CannyDetector:
-    def __call__(self, img, low_threshold, high_threshold):
-        return cv2.Canny(img, low_threshold, high_threshold)
--- a/apps/stable_diffusion/src/utils/stencils/openpose/init.py
+++ b/apps/stable_diffusion/src/utils/stencils/openpose/init.py
@@ -1,62 +0,0 @@
-import requests
-from pathlib import Path
-
-import torch
-import numpy as np
-
-# from annotator.util import annotator_ckpts_path
-from apps.stable_diffusion.src.utils.stencils.openpose.body import Body
-from apps.stable_diffusion.src.utils.stencils.openpose.hand import Hand
-from apps.stable_diffusion.src.utils.stencils.openpose.openpose_util import (
-    draw_bodypose,
-    draw_handpose,
-    handDetect,
-)
-
-
-body_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/body_pose_model.pth"
-hand_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/hand_pose_model.pth"
-
-
-class OpenposeDetector:
-    def __init__(self):
-        cwd = Path.cwd()
-        ckpt_path = Path(cwd, "stencil_annotator")
-        ckpt_path.mkdir(parents=True, exist_ok=True)
-        body_modelpath = ckpt_path / "body_pose_model.pth"
-        hand_modelpath = ckpt_path / "hand_pose_model.pth"
-
-        if not body_modelpath.is_file():
-            r = requests.get(body_model_path, allow_redirects=True)
-            open(body_modelpath, "wb").write(r.content)
-        if not hand_modelpath.is_file():
-            r = requests.get(hand_model_path, allow_redirects=True)
-            open(hand_modelpath, "wb").write(r.content)
-
-        self.body_estimation = Body(body_modelpath)
-        self.hand_estimation = Hand(hand_modelpath)
-
-    def __call__(self, oriImg, hand=False):
-        oriImg = oriImg[:, :, ::-1].copy()
-        with torch.no_grad():
-            candidate, subset = self.body_estimation(oriImg)
-            canvas = np.zeros_like(oriImg)
-            canvas = draw_bodypose(canvas, candidate, subset)
-            if hand:
-                hands_list = handDetect(candidate, subset, oriImg)
-                all_hand_peaks = []
-                for x, y, w, is_left in hands_list:
-                    peaks = self.hand_estimation(
-                        oriImg[y : y + w, x : x + w, :]
-                    )
-                    peaks[:, 0] = np.where(
-                        peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x
-                    )
-                    peaks[:, 1] = np.where(
-                        peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y
-                    )
-                    all_hand_peaks.append(peaks)
-                canvas = draw_handpose(canvas, all_hand_peaks)
-            return canvas, dict(
-                candidate=candidate.tolist(), subset=subset.tolist()
-            )
--- a/apps/stable_diffusion/src/utils/stencils/openpose/body.py
+++ b/apps/stable_diffusion/src/utils/stencils/openpose/body.py
@@ -1,499 +0,0 @@
-import cv2
-import numpy as np
-import math
-from scipy.ndimage.filters import gaussian_filter
-import torch
-import torch.nn as nn
-from collections import OrderedDict
-from apps.stable_diffusion.src.utils.stencils.openpose.openpose_util import (
-    make_layers,
-    transfer,
-    padRightDownCorner,
-)
-
-
-class BodyPoseModel(nn.Module):
-    def __init__(self):
-        super(BodyPoseModel, self).__init__()
-
-        # these layers have no relu layer
-        no_relu_layers = [
-            "conv5_5_CPM_L1",
-            "conv5_5_CPM_L2",
-            "Mconv7_stage2_L1",
-            "Mconv7_stage2_L2",
-            "Mconv7_stage3_L1",
-            "Mconv7_stage3_L2",
-            "Mconv7_stage4_L1",
-            "Mconv7_stage4_L2",
-            "Mconv7_stage5_L1",
-            "Mconv7_stage5_L2",
-            "Mconv7_stage6_L1",
-            "Mconv7_stage6_L1",
-        ]
-        blocks = {}
-        block0 = OrderedDict(
-            [
-                ("conv1_1", [3, 64, 3, 1, 1]),
-                ("conv1_2", [64, 64, 3, 1, 1]),
-                ("pool1_stage1", [2, 2, 0]),
-                ("conv2_1", [64, 128, 3, 1, 1]),
-                ("conv2_2", [128, 128, 3, 1, 1]),
-                ("pool2_stage1", [2, 2, 0]),
-                ("conv3_1", [128, 256, 3, 1, 1]),
-                ("conv3_2", [256, 256, 3, 1, 1]),
-                ("conv3_3", [256, 256, 3, 1, 1]),
-                ("conv3_4", [256, 256, 3, 1, 1]),
-                ("pool3_stage1", [2, 2, 0]),
-                ("conv4_1", [256, 512, 3, 1, 1]),
-                ("conv4_2", [512, 512, 3, 1, 1]),
-                ("conv4_3_CPM", [512, 256, 3, 1, 1]),
-                ("conv4_4_CPM", [256, 128, 3, 1, 1]),
-            ]
-        )
-
-        # Stage 1
-        block1_1 = OrderedDict(
-            [
-                ("conv5_1_CPM_L1", [128, 128, 3, 1, 1]),
-                ("conv5_2_CPM_L1", [128, 128, 3, 1, 1]),
-                ("conv5_3_CPM_L1", [128, 128, 3, 1, 1]),
-                ("conv5_4_CPM_L1", [128, 512, 1, 1, 0]),
-                ("conv5_5_CPM_L1", [512, 38, 1, 1, 0]),
-            ]
-        )
-
-        block1_2 = OrderedDict(
-            [
-                ("conv5_1_CPM_L2", [128, 128, 3, 1, 1]),
-                ("conv5_2_CPM_L2", [128, 128, 3, 1, 1]),
-                ("conv5_3_CPM_L2", [128, 128, 3, 1, 1]),
-                ("conv5_4_CPM_L2", [128, 512, 1, 1, 0]),
-                ("conv5_5_CPM_L2", [512, 19, 1, 1, 0]),
-            ]
-        )
-        blocks["block1_1"] = block1_1
-        blocks["block1_2"] = block1_2
-
-        self.model0 = make_layers(block0, no_relu_layers)
-
-        # Stages 2 - 6
-        for i in range(2, 7):
-            blocks["block%d_1" % i] = OrderedDict(
-                [
-                    ("Mconv1_stage%d_L1" % i, [185, 128, 7, 1, 3]),
-                    ("Mconv2_stage%d_L1" % i, [128, 128, 7, 1, 3]),
-                    ("Mconv3_stage%d_L1" % i, [128, 128, 7, 1, 3]),
-                    ("Mconv4_stage%d_L1" % i, [128, 128, 7, 1, 3]),
-                    ("Mconv5_stage%d_L1" % i, [128, 128, 7, 1, 3]),
-                    ("Mconv6_stage%d_L1" % i, [128, 128, 1, 1, 0]),
-                    ("Mconv7_stage%d_L1" % i, [128, 38, 1, 1, 0]),
-                ]
-            )
-
-            blocks["block%d_2" % i] = OrderedDict(
-                [
-                    ("Mconv1_stage%d_L2" % i, [185, 128, 7, 1, 3]),
-                    ("Mconv2_stage%d_L2" % i, [128, 128, 7, 1, 3]),
-                    ("Mconv3_stage%d_L2" % i, [128, 128, 7, 1, 3]),
-                    ("Mconv4_stage%d_L2" % i, [128, 128, 7, 1, 3]),
-                    ("Mconv5_stage%d_L2" % i, [128, 128, 7, 1, 3]),
-                    ("Mconv6_stage%d_L2" % i, [128, 128, 1, 1, 0]),
-                    ("Mconv7_stage%d_L2" % i, [128, 19, 1, 1, 0]),
-                ]
-            )
-
-        for k in blocks.keys():
-            blocks[k] = make_layers(blocks[k], no_relu_layers)
-
-        self.model1_1 = blocks["block1_1"]
-        self.model2_1 = blocks["block2_1"]
-        self.model3_1 = blocks["block3_1"]
-        self.model4_1 = blocks["block4_1"]
-        self.model5_1 = blocks["block5_1"]
-        self.model6_1 = blocks["block6_1"]
-
-        self.model1_2 = blocks["block1_2"]
-        self.model2_2 = blocks["block2_2"]
-        self.model3_2 = blocks["block3_2"]
-        self.model4_2 = blocks["block4_2"]
-        self.model5_2 = blocks["block5_2"]
-        self.model6_2 = blocks["block6_2"]
-
-    def forward(self, x):
-        out1 = self.model0(x)
-
-        out1_1 = self.model1_1(out1)
-        out1_2 = self.model1_2(out1)
-        out2 = torch.cat([out1_1, out1_2, out1], 1)
-
-        out2_1 = self.model2_1(out2)
-        out2_2 = self.model2_2(out2)
-        out3 = torch.cat([out2_1, out2_2, out1], 1)
-
-        out3_1 = self.model3_1(out3)
-        out3_2 = self.model3_2(out3)
-        out4 = torch.cat([out3_1, out3_2, out1], 1)
-
-        out4_1 = self.model4_1(out4)
-        out4_2 = self.model4_2(out4)
-        out5 = torch.cat([out4_1, out4_2, out1], 1)
-
-        out5_1 = self.model5_1(out5)
-        out5_2 = self.model5_2(out5)
-        out6 = torch.cat([out5_1, out5_2, out1], 1)
-
-        out6_1 = self.model6_1(out6)
-        out6_2 = self.model6_2(out6)
-
-        return out6_1, out6_2
-
-
-class Body(object):
-    def __init__(self, model_path):
-        self.model = BodyPoseModel()
-        if torch.cuda.is_available():
-            self.model = self.model.cuda()
-        model_dict = transfer(self.model, torch.load(model_path))
-        self.model.load_state_dict(model_dict)
-        self.model.eval()
-
-    def __call__(self, oriImg):
-        scale_search = [0.5]
-        boxsize = 368
-        stride = 8
-        padValue = 128
-        thre1 = 0.1
-        thre2 = 0.05
-        multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
-        heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
-        paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
-
-        for m in range(len(multiplier)):
-            scale = multiplier[m]
-            imageToTest = cv2.resize(
-                oriImg,
-                (0, 0),
-                fx=scale,
-                fy=scale,
-                interpolation=cv2.INTER_CUBIC,
-            )
-            imageToTest_padded, pad = padRightDownCorner(
-                imageToTest, stride, padValue
-            )
-            im = (
-                np.transpose(
-                    np.float32(imageToTest_padded[:, :, :, np.newaxis]),
-                    (3, 2, 0, 1),
-                )
-                / 256
-                - 0.5
-            )
-            im = np.ascontiguousarray(im)
-
-            data = torch.from_numpy(im).float()
-            if torch.cuda.is_available():
-                data = data.cuda()
-            with torch.no_grad():
-                Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data)
-            Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
-            Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
-
-            # extract outputs, resize, and remove padding
-            heatmap = np.transpose(
-                np.squeeze(Mconv7_stage6_L2), (1, 2, 0)
-            )  # output 1 is heatmaps
-            heatmap = cv2.resize(
-                heatmap,
-                (0, 0),
-                fx=stride,
-                fy=stride,
-                interpolation=cv2.INTER_CUBIC,
-            )
-            heatmap = heatmap[
-                : imageToTest_padded.shape[0] - pad[2],
-                : imageToTest_padded.shape[1] - pad[3],
-                :,
-            ]
-            heatmap = cv2.resize(
-                heatmap,
-                (oriImg.shape[1], oriImg.shape[0]),
-                interpolation=cv2.INTER_CUBIC,
-            )
-
-            # paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0))  # output 0 is PAFs
-            paf = np.transpose(
-                np.squeeze(Mconv7_stage6_L1), (1, 2, 0)
-            )  # output 0 is PAFs
-            paf = cv2.resize(
-                paf,
-                (0, 0),
-                fx=stride,
-                fy=stride,
-                interpolation=cv2.INTER_CUBIC,
-            )
-            paf = paf[
-                : imageToTest_padded.shape[0] - pad[2],
-                : imageToTest_padded.shape[1] - pad[3],
-                :,
-            ]
-            paf = cv2.resize(
-                paf,
-                (oriImg.shape[1], oriImg.shape[0]),
-                interpolation=cv2.INTER_CUBIC,
-            )
-
-            heatmap_avg += heatmap_avg + heatmap / len(multiplier)
-            paf_avg += +paf / len(multiplier)
-
-        all_peaks = []
-        peak_counter = 0
-
-        for part in range(18):
-            map_ori = heatmap_avg[:, :, part]
-            one_heatmap = gaussian_filter(map_ori, sigma=3)
-
-            map_left = np.zeros(one_heatmap.shape)
-            map_left[1:, :] = one_heatmap[:-1, :]
-            map_right = np.zeros(one_heatmap.shape)
-            map_right[:-1, :] = one_heatmap[1:, :]
-            map_up = np.zeros(one_heatmap.shape)
-            map_up[:, 1:] = one_heatmap[:, :-1]
-            map_down = np.zeros(one_heatmap.shape)
-            map_down[:, :-1] = one_heatmap[:, 1:]
-
-            peaks_binary = np.logical_and.reduce(
-                (
-                    one_heatmap >= map_left,
-                    one_heatmap >= map_right,
-                    one_heatmap >= map_up,
-                    one_heatmap >= map_down,
-                    one_heatmap > thre1,
-                )
-            )
-            peaks = list(
-                zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])
-            )  # note reverse
-            peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
-            peak_id = range(peak_counter, peak_counter + len(peaks))
-            peaks_with_score_and_id = [
-                peaks_with_score[i] + (peak_id[i],)
-                for i in range(len(peak_id))
-            ]
-
-            all_peaks.append(peaks_with_score_and_id)
-            peak_counter += len(peaks)
-
-        # find connection in the specified sequence, center 29 is in the position 15
-        limbSeq = [
-            [2, 3],
-            [2, 6],
-            [3, 4],
-            [4, 5],
-            [6, 7],
-            [7, 8],
-            [2, 9],
-            [9, 10],
-            [10, 11],
-            [2, 12],
-            [12, 13],
-            [13, 14],
-            [2, 1],
-            [1, 15],
-            [15, 17],
-            [1, 16],
-            [16, 18],
-            [3, 17],
-            [6, 18],
-        ]
-        # the middle joints heatmap correpondence
-        mapIdx = [
-            [31, 32],
-            [39, 40],
-            [33, 34],
-            [35, 36],
-            [41, 42],
-            [43, 44],
-            [19, 20],
-            [21, 22],
-            [23, 24],
-            [25, 26],
-            [27, 28],
-            [29, 30],
-            [47, 48],
-            [49, 50],
-            [53, 54],
-            [51, 52],
-            [55, 56],
-            [37, 38],
-            [45, 46],
-        ]
-
-        connection_all = []
-        special_k = []
-        mid_num = 10
-
-        for k in range(len(mapIdx)):
-            score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
-            candA = all_peaks[limbSeq[k][0] - 1]
-            candB = all_peaks[limbSeq[k][1] - 1]
-            nA = len(candA)
-            nB = len(candB)
-            indexA, indexB = limbSeq[k]
-            if nA != 0 and nB != 0:
-                connection_candidate = []
-                for i in range(nA):
-                    for j in range(nB):
-                        vec = np.subtract(candB[j][:2], candA[i][:2])
-                        norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
-                        norm = max(0.001, norm)
-                        vec = np.divide(vec, norm)
-
-                        startend = list(
-                            zip(
-                                np.linspace(
-                                    candA[i][0], candB[j][0], num=mid_num
-                                ),
-                                np.linspace(
-                                    candA[i][1], candB[j][1], num=mid_num
-                                ),
-                            )
-                        )
-
-                        vec_x = np.array(
-                            [
-                                score_mid[
-                                    int(round(startend[I][1])),
-                                    int(round(startend[I][0])),
-                                    0,
-                                ]
-                                for I in range(len(startend))
-                            ]
-                        )
-                        vec_y = np.array(
-                            [
-                                score_mid[
-                                    int(round(startend[I][1])),
-                                    int(round(startend[I][0])),
-                                    1,
-                                ]
-                                for I in range(len(startend))
-                            ]
-                        )
-
-                        score_midpts = np.multiply(
-                            vec_x, vec[0]
-                        ) + np.multiply(vec_y, vec[1])
-                        score_with_dist_prior = sum(score_midpts) / len(
-                            score_midpts
-                        ) + min(0.5 * oriImg.shape[0] / norm - 1, 0)
-                        criterion1 = len(
-                            np.nonzero(score_midpts > thre2)[0]
-                        ) > 0.8 * len(score_midpts)
-                        criterion2 = score_with_dist_prior > 0
-                        if criterion1 and criterion2:
-                            connection_candidate.append(
-                                [
-                                    i,
-                                    j,
-                                    score_with_dist_prior,
-                                    score_with_dist_prior
-                                    + candA[i][2]
-                                    + candB[j][2],
-                                ]
-                            )
-
-                connection_candidate = sorted(
-                    connection_candidate, key=lambda x: x[2], reverse=True
-                )
-                connection = np.zeros((0, 5))
-                for c in range(len(connection_candidate)):
-                    i, j, s = connection_candidate[c][0:3]
-                    if i not in connection[:, 3] and j not in connection[:, 4]:
-                        connection = np.vstack(
-                            [connection, [candA[i][3], candB[j][3], s, i, j]]
-                        )
-                        if len(connection) >= min(nA, nB):
-                            break
-
-                connection_all.append(connection)
-            else:
-                special_k.append(k)
-                connection_all.append([])
-
-        # last number in each row is the total parts number of that person
-        # the second last number in each row is the score of the overall configuration
-        subset = -1 * np.ones((0, 20))
-        candidate = np.array(
-            [item for sublist in all_peaks for item in sublist]
-        )
-
-        for k in range(len(mapIdx)):
-            if k not in special_k:
-                partAs = connection_all[k][:, 0]
-                partBs = connection_all[k][:, 1]
-                indexA, indexB = np.array(limbSeq[k]) - 1
-
-                for i in range(len(connection_all[k])):  # = 1:size(temp,1)
-                    found = 0
-                    subset_idx = [-1, -1]
-                    for j in range(len(subset)):  # 1:size(subset,1):
-                        if (
-                            subset[j][indexA] == partAs[i]
-                            or subset[j][indexB] == partBs[i]
-                        ):
-                            subset_idx[found] = j
-                            found += 1
-
-                    if found == 1:
-                        j = subset_idx[0]
-                        if subset[j][indexB] != partBs[i]:
-                            subset[j][indexB] = partBs[i]
-                            subset[j][-1] += 1
-                            subset[j][-2] += (
-                                candidate[partBs[i].astype(int), 2]
-                                + connection_all[k][i][2]
-                            )
-                    elif found == 2:  # if found 2 and disjoint, merge them
-                        j1, j2 = subset_idx
-                        membership = (
-                            (subset[j1] >= 0).astype(int)
-                            + (subset[j2] >= 0).astype(int)
-                        )[:-2]
-                        if len(np.nonzero(membership == 2)[0]) == 0:  # merge
-                            subset[j1][:-2] += subset[j2][:-2] + 1
-                            subset[j1][-2:] += subset[j2][-2:]
-                            subset[j1][-2] += connection_all[k][i][2]
-                            subset = np.delete(subset, j2, 0)
-                        else:  # as like found == 1
-                            subset[j1][indexB] = partBs[i]
-                            subset[j1][-1] += 1
-                            subset[j1][-2] += (
-                                candidate[partBs[i].astype(int), 2]
-                                + connection_all[k][i][2]
-                            )
-
-                    # if find no partA in the subset, create a new subset
-                    elif not found and k < 17:
-                        row = -1 * np.ones(20)
-                        row[indexA] = partAs[i]
-                        row[indexB] = partBs[i]
-                        row[-1] = 2
-                        row[-2] = (
-                            sum(
-                                candidate[
-                                    connection_all[k][i, :2].astype(int), 2
-                                ]
-                            )
-                            + connection_all[k][i][2]
-                        )
-                        subset = np.vstack([subset, row])
-        # delete some rows of subset which has few parts occur
-        deleteIdx = []
-        for i in range(len(subset)):
-            if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
-                deleteIdx.append(i)
-        subset = np.delete(subset, deleteIdx, axis=0)
-
-        # candidate: x, y, score, id
-        return candidate, subset
--- a/apps/stable_diffusion/src/utils/stencils/openpose/hand.py
+++ b/apps/stable_diffusion/src/utils/stencils/openpose/hand.py
@@ -1,205 +0,0 @@
-import cv2
-import numpy as np
-from scipy.ndimage.filters import gaussian_filter
-import torch
-import torch.nn as nn
-from skimage.measure import label
-from collections import OrderedDict
-from apps.stable_diffusion.src.utils.stencils.openpose.openpose_util import (
-    make_layers,
-    transfer,
-    padRightDownCorner,
-    npmax,
-)
-
-
-class HandPoseModel(nn.Module):
-    def __init__(self):
-        super(HandPoseModel, self).__init__()
-
-        # these layers have no relu layer
-        no_relu_layers = [
-            "conv6_2_CPM",
-            "Mconv7_stage2",
-            "Mconv7_stage3",
-            "Mconv7_stage4",
-            "Mconv7_stage5",
-            "Mconv7_stage6",
-        ]
-        # stage 1
-        block1_0 = OrderedDict(
-            [
-                ("conv1_1", [3, 64, 3, 1, 1]),
-                ("conv1_2", [64, 64, 3, 1, 1]),
-                ("pool1_stage1", [2, 2, 0]),
-                ("conv2_1", [64, 128, 3, 1, 1]),
-                ("conv2_2", [128, 128, 3, 1, 1]),
-                ("pool2_stage1", [2, 2, 0]),
-                ("conv3_1", [128, 256, 3, 1, 1]),
-                ("conv3_2", [256, 256, 3, 1, 1]),
-                ("conv3_3", [256, 256, 3, 1, 1]),
-                ("conv3_4", [256, 256, 3, 1, 1]),
-                ("pool3_stage1", [2, 2, 0]),
-                ("conv4_1", [256, 512, 3, 1, 1]),
-                ("conv4_2", [512, 512, 3, 1, 1]),
-                ("conv4_3", [512, 512, 3, 1, 1]),
-                ("conv4_4", [512, 512, 3, 1, 1]),
-                ("conv5_1", [512, 512, 3, 1, 1]),
-                ("conv5_2", [512, 512, 3, 1, 1]),
-                ("conv5_3_CPM", [512, 128, 3, 1, 1]),
-            ]
-        )
-
-        block1_1 = OrderedDict(
-            [
-                ("conv6_1_CPM", [128, 512, 1, 1, 0]),
-                ("conv6_2_CPM", [512, 22, 1, 1, 0]),
-            ]
-        )
-
-        blocks = {}
-        blocks["block1_0"] = block1_0
-        blocks["block1_1"] = block1_1
-
-        # stage 2-6
-        for i in range(2, 7):
-            blocks["block%d" % i] = OrderedDict(
-                [
-                    ("Mconv1_stage%d" % i, [150, 128, 7, 1, 3]),
-                    ("Mconv2_stage%d" % i, [128, 128, 7, 1, 3]),
-                    ("Mconv3_stage%d" % i, [128, 128, 7, 1, 3]),
-                    ("Mconv4_stage%d" % i, [128, 128, 7, 1, 3]),
-                    ("Mconv5_stage%d" % i, [128, 128, 7, 1, 3]),
-                    ("Mconv6_stage%d" % i, [128, 128, 1, 1, 0]),
-                    ("Mconv7_stage%d" % i, [128, 22, 1, 1, 0]),
-                ]
-            )
-
-        for k in blocks.keys():
-            blocks[k] = make_layers(blocks[k], no_relu_layers)
-
-        self.model1_0 = blocks["block1_0"]
-        self.model1_1 = blocks["block1_1"]
-        self.model2 = blocks["block2"]
-        self.model3 = blocks["block3"]
-        self.model4 = blocks["block4"]
-        self.model5 = blocks["block5"]
-        self.model6 = blocks["block6"]
-
-    def forward(self, x):
-        out1_0 = self.model1_0(x)
-        out1_1 = self.model1_1(out1_0)
-        concat_stage2 = torch.cat([out1_1, out1_0], 1)
-        out_stage2 = self.model2(concat_stage2)
-        concat_stage3 = torch.cat([out_stage2, out1_0], 1)
-        out_stage3 = self.model3(concat_stage3)
-        concat_stage4 = torch.cat([out_stage3, out1_0], 1)
-        out_stage4 = self.model4(concat_stage4)
-        concat_stage5 = torch.cat([out_stage4, out1_0], 1)
-        out_stage5 = self.model5(concat_stage5)
-        concat_stage6 = torch.cat([out_stage5, out1_0], 1)
-        out_stage6 = self.model6(concat_stage6)
-        return out_stage6
-
-
-class Hand(object):
-    def __init__(self, model_path):
-        self.model = HandPoseModel()
-        if torch.cuda.is_available():
-            self.model = self.model.cuda()
-        model_dict = transfer(self.model, torch.load(model_path))
-        self.model.load_state_dict(model_dict)
-        self.model.eval()
-
-    def __call__(self, oriImg):
-        scale_search = [0.5, 1.0, 1.5, 2.0]
-        # scale_search = [0.5]
-        boxsize = 368
-        stride = 8
-        padValue = 128
-        thre = 0.05
-        multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
-        heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 22))
-        # paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
-
-        for m in range(len(multiplier)):
-            scale = multiplier[m]
-            imageToTest = cv2.resize(
-                oriImg,
-                (0, 0),
-                fx=scale,
-                fy=scale,
-                interpolation=cv2.INTER_CUBIC,
-            )
-            imageToTest_padded, pad = padRightDownCorner(
-                imageToTest, stride, padValue
-            )
-            im = (
-                np.transpose(
-                    np.float32(imageToTest_padded[:, :, :, np.newaxis]),
-                    (3, 2, 0, 1),
-                )
-                / 256
-                - 0.5
-            )
-            im = np.ascontiguousarray(im)
-
-            data = torch.from_numpy(im).float()
-            if torch.cuda.is_available():
-                data = data.cuda()
-            # data = data.permute([2, 0, 1]).unsqueeze(0).float()
-            with torch.no_grad():
-                output = self.model(data).cpu().numpy()
-                # output = self.model(data).numpy()q
-
-            # extract outputs, resize, and remove padding
-            heatmap = np.transpose(
-                np.squeeze(output), (1, 2, 0)
-            )  # output 1 is heatmaps
-            heatmap = cv2.resize(
-                heatmap,
-                (0, 0),
-                fx=stride,
-                fy=stride,
-                interpolation=cv2.INTER_CUBIC,
-            )
-            heatmap = heatmap[
-                : imageToTest_padded.shape[0] - pad[2],
-                : imageToTest_padded.shape[1] - pad[3],
-                :,
-            ]
-            heatmap = cv2.resize(
-                heatmap,
-                (oriImg.shape[1], oriImg.shape[0]),
-                interpolation=cv2.INTER_CUBIC,
-            )
-
-            heatmap_avg += heatmap / len(multiplier)
-
-        all_peaks = []
-        for part in range(21):
-            map_ori = heatmap_avg[:, :, part]
-            one_heatmap = gaussian_filter(map_ori, sigma=3)
-            binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
-            # 全部小于阈值
-            if np.sum(binary) == 0:
-                all_peaks.append([0, 0])
-                continue
-            label_img, label_numbers = label(
-                binary, return_num=True, connectivity=binary.ndim
-            )
-            max_index = (
-                np.argmax(
-                    [
-                        np.sum(map_ori[label_img == i])
-                        for i in range(1, label_numbers + 1)
-                    ]
-                )
-                + 1
-            )
-            label_img[label_img != max_index] = 0
-            map_ori[label_img == 0] = 0
-
-            y, x = npmax(map_ori)
-            all_peaks.append([x, y])
-        return np.array(all_peaks)
--- a/apps/stable_diffusion/src/utils/stencils/openpose/openpose_util.py
+++ b/apps/stable_diffusion/src/utils/stencils/openpose/openpose_util.py
@@ -1,272 +0,0 @@
-import math
-import numpy as np
-import matplotlib
-import cv2
-from collections import OrderedDict
-import torch.nn as nn
-
-
-def make_layers(block, no_relu_layers):
-    layers = []
-    for layer_name, v in block.items():
-        if "pool" in layer_name:
-            layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2])
-            layers.append((layer_name, layer))
-        else:
-            conv2d = nn.Conv2d(
-                in_channels=v[0],
-                out_channels=v[1],
-                kernel_size=v[2],
-                stride=v[3],
-                padding=v[4],
-            )
-            layers.append((layer_name, conv2d))
-            if layer_name not in no_relu_layers:
-                layers.append(("relu_" + layer_name, nn.ReLU(inplace=True)))
-
-    return nn.Sequential(OrderedDict(layers))
-
-
-def padRightDownCorner(img, stride, padValue):
-    h = img.shape[0]
-    w = img.shape[1]
-
-    pad = 4 * [None]
-    pad[0] = 0  # up
-    pad[1] = 0  # left
-    pad[2] = 0 if (h % stride == 0) else stride - (h % stride)  # down
-    pad[3] = 0 if (w % stride == 0) else stride - (w % stride)  # right
-
-    img_padded = img
-    pad_up = np.tile(img_padded[0:1, :, :] * 0 + padValue, (pad[0], 1, 1))
-    img_padded = np.concatenate((pad_up, img_padded), axis=0)
-    pad_left = np.tile(img_padded[:, 0:1, :] * 0 + padValue, (1, pad[1], 1))
-    img_padded = np.concatenate((pad_left, img_padded), axis=1)
-    pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + padValue, (pad[2], 1, 1))
-    img_padded = np.concatenate((img_padded, pad_down), axis=0)
-    pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + padValue, (1, pad[3], 1))
-    img_padded = np.concatenate((img_padded, pad_right), axis=1)
-
-    return img_padded, pad
-
-
-# transfer caffe model to pytorch which will match the layer name
-def transfer(model, model_weights):
-    transfered_model_weights = {}
-    for weights_name in model.state_dict().keys():
-        transfered_model_weights[weights_name] = model_weights[
-            ".".join(weights_name.split(".")[1:])
-        ]
-    return transfered_model_weights
-
-
-# draw the body keypoint and lims
-def draw_bodypose(canvas, candidate, subset):
-    stickwidth = 4
-    limbSeq = [
-        [2, 3],
-        [2, 6],
-        [3, 4],
-        [4, 5],
-        [6, 7],
-        [7, 8],
-        [2, 9],
-        [9, 10],
-        [10, 11],
-        [2, 12],
-        [12, 13],
-        [13, 14],
-        [2, 1],
-        [1, 15],
-        [15, 17],
-        [1, 16],
-        [16, 18],
-        [3, 17],
-        [6, 18],
-    ]
-
-    colors = [
-        [255, 0, 0],
-        [255, 85, 0],
-        [255, 170, 0],
-        [255, 255, 0],
-        [170, 255, 0],
-        [85, 255, 0],
-        [0, 255, 0],
-        [0, 255, 85],
-        [0, 255, 170],
-        [0, 255, 255],
-        [0, 170, 255],
-        [0, 85, 255],
-        [0, 0, 255],
-        [85, 0, 255],
-        [170, 0, 255],
-        [255, 0, 255],
-        [255, 0, 170],
-        [255, 0, 85],
-    ]
-    for i in range(18):
-        for n in range(len(subset)):
-            index = int(subset[n][i])
-            if index == -1:
-                continue
-            x, y = candidate[index][0:2]
-            cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
-    for i in range(17):
-        for n in range(len(subset)):
-            index = subset[n][np.array(limbSeq[i]) - 1]
-            if -1 in index:
-                continue
-            cur_canvas = canvas.copy()
-            Y = candidate[index.astype(int), 0]
-            X = candidate[index.astype(int), 1]
-            mX = np.mean(X)
-            mY = np.mean(Y)
-            length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
-            angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
-            polygon = cv2.ellipse2Poly(
-                (int(mY), int(mX)),
-                (int(length / 2), stickwidth),
-                int(angle),
-                0,
-                360,
-                1,
-            )
-            cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
-            canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
-    return canvas
-
-
-# image drawed by opencv is not good.
-def draw_handpose(canvas, all_hand_peaks, show_number=False):
-    edges = [
-        [0, 1],
-        [1, 2],
-        [2, 3],
-        [3, 4],
-        [0, 5],
-        [5, 6],
-        [6, 7],
-        [7, 8],
-        [0, 9],
-        [9, 10],
-        [10, 11],
-        [11, 12],
-        [0, 13],
-        [13, 14],
-        [14, 15],
-        [15, 16],
-        [0, 17],
-        [17, 18],
-        [18, 19],
-        [19, 20],
-    ]
-
-    for peaks in all_hand_peaks:
-        for ie, e in enumerate(edges):
-            if np.sum(np.all(peaks[e], axis=1) == 0) == 0:
-                x1, y1 = peaks[e[0]]
-                x2, y2 = peaks[e[1]]
-                cv2.line(
-                    canvas,
-                    (x1, y1),
-                    (x2, y2),
-                    matplotlib.colors.hsv_to_rgb(
-                        [ie / float(len(edges)), 1.0, 1.0]
-                    )
-                    * 255,
-                    thickness=2,
-                )
-
-        for i, keyponit in enumerate(peaks):
-            x, y = keyponit
-            cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
-            if show_number:
-                cv2.putText(
-                    canvas,
-                    str(i),
-                    (x, y),
-                    cv2.FONT_HERSHEY_SIMPLEX,
-                    0.3,
-                    (0, 0, 0),
-                    lineType=cv2.LINE_AA,
-                )
-    return canvas
-
-
-# detect hand according to body pose keypoints
-# please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
-def handDetect(candidate, subset, oriImg):
-    # right hand: wrist 4, elbow 3, shoulder 2
-    # left hand: wrist 7, elbow 6, shoulder 5
-    ratioWristElbow = 0.33
-    detect_result = []
-    image_height, image_width = oriImg.shape[0:2]
-    for person in subset.astype(int):
-        # if any of three not detected
-        has_left = np.sum(person[[5, 6, 7]] == -1) == 0
-        has_right = np.sum(person[[2, 3, 4]] == -1) == 0
-        if not (has_left or has_right):
-            continue
-        hands = []
-        # left hand
-        if has_left:
-            left_shoulder_index, left_elbow_index, left_wrist_index = person[
-                [5, 6, 7]
-            ]
-            x1, y1 = candidate[left_shoulder_index][:2]
-            x2, y2 = candidate[left_elbow_index][:2]
-            x3, y3 = candidate[left_wrist_index][:2]
-            hands.append([x1, y1, x2, y2, x3, y3, True])
-        # right hand
-        if has_right:
-            (
-                right_shoulder_index,
-                right_elbow_index,
-                right_wrist_index,
-            ) = person[[2, 3, 4]]
-            x1, y1 = candidate[right_shoulder_index][:2]
-            x2, y2 = candidate[right_elbow_index][:2]
-            x3, y3 = candidate[right_wrist_index][:2]
-            hands.append([x1, y1, x2, y2, x3, y3, False])
-
-        for x1, y1, x2, y2, x3, y3, is_left in hands:
-            x = x3 + ratioWristElbow * (x3 - x2)
-            y = y3 + ratioWristElbow * (y3 - y2)
-            distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
-            distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
-            width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
-            # x-y refers to the center --> offset to topLeft point
-            x -= width / 2
-            y -= width / 2  # width = height
-            # overflow the image
-            if x < 0:
-                x = 0
-            if y < 0:
-                y = 0
-            width1 = width
-            width2 = width
-            if x + width > image_width:
-                width1 = image_width - x
-            if y + width > image_height:
-                width2 = image_height - y
-            width = min(width1, width2)
-            # the max hand box value is 20 pixels
-            if width >= 20:
-                detect_result.append([int(x), int(y), int(width), is_left])
-
-    """
-    return value: [[x, y, w, True if left hand else False]].
-    width=height since the network require squared input.
-    x, y is the coordinate of top left 
-    """
-    return detect_result
-
-
-# get max index of 2d array
-def npmax(array):
-    arrayindex = array.argmax(1)
-    arrayvalue = array.max(1)
-    i = arrayvalue.argmax()
-    j = arrayindex[i]
-    return (i,)
--- a/apps/stable_diffusion/src/utils/stencils/stencil_utils.py
+++ b/apps/stable_diffusion/src/utils/stencils/stencil_utils.py
@@ -1,186 +0,0 @@
-import numpy as np
-from PIL import Image
-import torch
-from apps.stable_diffusion.src.utils.stencils import (
-    CannyDetector,
-    OpenposeDetector,
-)
-
-stencil = {}
-
-
-def HWC3(x):
-    assert x.dtype == np.uint8
-    if x.ndim == 2:
-        x = x[:, :, None]
-    assert x.ndim == 3
-    H, W, C = x.shape
-    assert C == 1 or C == 3 or C == 4
-    if C == 3:
-        return x
-    if C == 1:
-        return np.concatenate([x, x, x], axis=2)
-    if C == 4:
-        color = x[:, :, 0:3].astype(np.float32)
-        alpha = x[:, :, 3:4].astype(np.float32) / 255.0
-        y = color * alpha + 255.0 * (1.0 - alpha)
-        y = y.clip(0, 255).astype(np.uint8)
-        return y
-
-
-def controlnet_hint_shaping(
-    controlnet_hint, height, width, dtype, num_images_per_prompt=1
-):
-    channels = 3
-    if isinstance(controlnet_hint, torch.Tensor):
-        # torch.Tensor: acceptble shape are any of chw, bchw(b==1) or bchw(b==num_images_per_prompt)
-        shape_chw = (channels, height, width)
-        shape_bchw = (1, channels, height, width)
-        shape_nchw = (num_images_per_prompt, channels, height, width)
-        if controlnet_hint.shape in [shape_chw, shape_bchw, shape_nchw]:
-            controlnet_hint = controlnet_hint.to(
-                dtype=dtype, device=torch.device("cpu")
-            )
-            if controlnet_hint.shape != shape_nchw:
-                controlnet_hint = controlnet_hint.repeat(
-                    num_images_per_prompt, 1, 1, 1
-                )
-            return controlnet_hint
-        else:
-            raise ValueError(
-                f"Acceptble shape of `stencil` are any of ({channels}, {height}, {width}),"
-                + f" (1, {channels}, {height}, {width}) or ({num_images_per_prompt}, "
-                + f"{channels}, {height}, {width}) but is {controlnet_hint.shape}"
-            )
-    elif isinstance(controlnet_hint, np.ndarray):
-        # np.ndarray: acceptable shape is any of hw, hwc, bhwc(b==1) or bhwc(b==num_images_per_promot)
-        # hwc is opencv compatible image format. Color channel must be BGR Format.
-        if controlnet_hint.shape == (height, width):
-            controlnet_hint = np.repeat(
-                controlnet_hint[:, :, np.newaxis], channels, axis=2
-            )  # hw -> hwc(c==3)
-        shape_hwc = (height, width, channels)
-        shape_bhwc = (1, height, width, channels)
-        shape_nhwc = (num_images_per_prompt, height, width, channels)
-        if controlnet_hint.shape in [shape_hwc, shape_bhwc, shape_nhwc]:
-            controlnet_hint = torch.from_numpy(controlnet_hint.copy())
-            controlnet_hint = controlnet_hint.to(
-                dtype=dtype, device=torch.device("cpu")
-            )
-            controlnet_hint /= 255.0
-            if controlnet_hint.shape != shape_nhwc:
-                controlnet_hint = controlnet_hint.repeat(
-                    num_images_per_prompt, 1, 1, 1
-                )
-            controlnet_hint = controlnet_hint.permute(
-                0, 3, 1, 2
-            )  # b h w c -> b c h w
-            return controlnet_hint
-        else:
-            raise ValueError(
-                f"Acceptble shape of `stencil` are any of ({width}, {channels}), "
-                + f"({height}, {width}, {channels}), "
-                + f"(1, {height}, {width}, {channels}) or "
-                + f"({num_images_per_prompt}, {channels}, {height}, {width}) but is {controlnet_hint.shape}"
-            )
-    elif isinstance(controlnet_hint, Image.Image):
-        if controlnet_hint.size == (width, height):
-            controlnet_hint = controlnet_hint.convert(
-                "RGB"
-            )  # make sure 3 channel RGB format
-            controlnet_hint = np.array(controlnet_hint)  # to numpy
-            controlnet_hint = controlnet_hint[:, :, ::-1]  # RGB -> BGR
-            return controlnet_hint_shaping(
-                controlnet_hint, height, width, num_images_per_prompt
-            )
-        else:
-            raise ValueError(
-                f"Acceptable image size of `stencil` is ({width}, {height}) but is {controlnet_hint.size}"
-            )
-    else:
-        raise ValueError(
-            f"Acceptable type of `stencil` are any of torch.Tensor, np.ndarray, PIL.Image.Image but is {type(controlnet_hint)}"
-        )
-
-
-def controlnet_hint_conversion(
-    image, use_stencil, height, width, dtype, num_images_per_prompt=1
-):
-    controlnet_hint = None
-    match use_stencil:
-        case "canny":
-            print("Detecting edge with canny")
-            controlnet_hint = hint_canny(image)
-        case "openpose":
-            print("Detecting human pose")
-            controlnet_hint = hint_openpose(image)
-        case "scribble":
-            print("Working with scribble")
-            controlnet_hint = hint_scribble(image)
-        case _:
-            return None
-    controlnet_hint = controlnet_hint_shaping(
-        controlnet_hint, height, width, dtype, num_images_per_prompt
-    )
-    return controlnet_hint
-
-
-stencil_to_model_id_map = {
-    "canny": "lllyasviel/sd-controlnet-canny",
-    "depth": "lllyasviel/sd-controlnet-depth",
-    "hed": "lllyasviel/sd-controlnet-hed",
-    "mlsd": "lllyasviel/sd-controlnet-mlsd",
-    "normal": "lllyasviel/sd-controlnet-normal",
-    "openpose": "lllyasviel/sd-controlnet-openpose",
-    "scribble": "lllyasviel/sd-controlnet-scribble",
-    "seg": "lllyasviel/sd-controlnet-seg",
-}
-
-
-def get_stencil_model_id(use_stencil):
-    if use_stencil in stencil_to_model_id_map:
-        return stencil_to_model_id_map[use_stencil]
-    return None
-
-
-# Stencil 1. Canny
-def hint_canny(
-    image: Image.Image,
-    low_threshold=100,
-    high_threshold=200,
-):
-    with torch.no_grad():
-        input_image = np.array(image)
-
-        if not "canny" in stencil:
-            stencil["canny"] = CannyDetector()
-        detected_map = stencil["canny"](
-            input_image, low_threshold, high_threshold
-        )
-        detected_map = HWC3(detected_map)
-        return detected_map
-
-
-# Stencil 2. OpenPose.
-def hint_openpose(
-    image: Image.Image,
-):
-    with torch.no_grad():
-        input_image = np.array(image)
-
-        if not "openpose" in stencil:
-            stencil["openpose"] = OpenposeDetector()
-
-        detected_map, _ = stencil["openpose"](input_image)
-        detected_map = HWC3(detected_map)
-        return detected_map
-
-
-# Stencil 3. Scribble.
-def hint_scribble(image: Image.Image):
-    with torch.no_grad():
-        input_image = np.array(image)
-
-        detected_map = np.zeros_like(input_image, dtype=np.uint8)
-        detected_map[np.min(input_image, axis=2) < 127] = 255
-        return detected_map
--- a/apps/stable_diffusion/src/utils/utils.py
+++ b/apps/stable_diffusion/src/utils/utils.py
@@ -1,631 +0,0 @@
-import os
-import gc
-import json
-import re
-from PIL import PngImagePlugin
-from datetime import datetime as dt
-from csv import DictWriter
-from pathlib import Path
-import numpy as np
-from random import randint
-import tempfile
-from shark.shark_inference import SharkInference
-from shark.shark_importer import import_with_fx
-from shark.iree_utils.vulkan_utils import (
-    set_iree_vulkan_runtime_flags,
-    get_vulkan_target_triple,
-)
-from shark.iree_utils.gpu_utils import get_cuda_sm_cc
-from apps.stable_diffusion.src.utils.stable_args import args
-from apps.stable_diffusion.src.utils.resources import opt_flags
-from apps.stable_diffusion.src.utils.sd_annotation import sd_model_annotation
-import sys
-from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
-    load_pipeline_from_original_stable_diffusion_ckpt,
-)
-
-
-def get_extended_name(model_name):
-    device = args.device.split("://", 1)[0]
-    extended_name = "{}_{}".format(model_name, device)
-    return extended_name
-
-
-def get_vmfb_path_name(model_name):
-    vmfb_path = os.path.join(os.getcwd(), model_name + ".vmfb")
-    return vmfb_path
-
-
-def _compile_module(shark_module, model_name, extra_args=[]):
-    if args.load_vmfb or args.save_vmfb:
-        vmfb_path = get_vmfb_path_name(model_name)
-        if args.load_vmfb and os.path.isfile(vmfb_path) and not args.save_vmfb:
-            print(f"loading existing vmfb from: {vmfb_path}")
-            shark_module.load_module(vmfb_path, extra_args=extra_args)
-        else:
-            if args.save_vmfb:
-                print("Saving to {}".format(vmfb_path))
-            else:
-                print(
-                    "No vmfb found. Compiling and saving to {}".format(
-                        vmfb_path
-                    )
-                )
-            path = shark_module.save_module(
-                os.getcwd(), model_name, extra_args
-            )
-            shark_module.load_module(path, extra_args=extra_args)
-    else:
-        shark_module.compile(extra_args)
-    return shark_module
-
-
-# Downloads the model from shark_tank and returns the shark_module.
-def get_shark_model(tank_url, model_name, extra_args=[]):
-    from shark.parser import shark_args
-
-    # Set local shark_tank cache directory.
-    shark_args.local_tank_cache = args.local_tank_cache
-
-    from shark.shark_downloader import download_model
-
-    if "cuda" in args.device:
-        shark_args.enable_tf32 = True
-
-    mlir_model, func_name, inputs, golden_out = download_model(
-        model_name,
-        tank_url=tank_url,
-        frontend="torch",
-    )
-    shark_module = SharkInference(
-        mlir_model, device=args.device, mlir_dialect="linalg"
-    )
-    return _compile_module(shark_module, model_name, extra_args)
-
-
-# Converts the torch-module into a shark_module.
-def compile_through_fx(
-    model,
-    inputs,
-    model_name,
-    is_f16=False,
-    f16_input_mask=None,
-    use_tuned=False,
-    save_dir=tempfile.gettempdir(),
-    debug=False,
-    generate_vmfb=True,
-    extra_args=[],
-):
-    from shark.parser import shark_args
-
-    if "cuda" in args.device:
-        shark_args.enable_tf32 = True
-
-    (
-        mlir_module,
-        func_name,
-    ) = import_with_fx(
-        model=model,
-        inputs=inputs,
-        is_f16=is_f16,
-        f16_input_mask=f16_input_mask,
-        debug=debug,
-        model_name=model_name,
-        save_dir=save_dir,
-    )
-    if use_tuned:
-        if "vae" in model_name.split("_")[0]:
-            args.annotation_model = "vae"
-        mlir_module = sd_model_annotation(mlir_module, model_name)
-
-    shark_module = SharkInference(
-        mlir_module,
-        device=args.device,
-        mlir_dialect="linalg",
-    )
-
-    if generate_vmfb:
-        shark_module = SharkInference(
-            mlir_module,
-            device=args.device,
-            mlir_dialect="linalg",
-        )
-        del mlir_module
-        gc.collect()
-        return _compile_module(shark_module, model_name, extra_args)
-
-    del mlir_module
-    gc.collect()
-
-
-def set_iree_runtime_flags():
-    vulkan_runtime_flags = [
-        f"--vulkan_large_heap_block_size={args.vulkan_large_heap_block_size}",
-        f"--vulkan_validation_layers={'true' if args.vulkan_validation_layers else 'false'}",
-    ]
-    if args.enable_rgp:
-        vulkan_runtime_flags += [
-            f"--enable_rgp=true",
-            f"--vulkan_debug_utils=true",
-        ]
-    set_iree_vulkan_runtime_flags(flags=vulkan_runtime_flags)
-
-
-def get_all_devices(driver_name):
-    """
-    Inputs: driver_name
-    Returns a list of all the available devices for a given driver sorted by
-    the iree path names of the device as in --list_devices option in iree.
-    """
-    from iree.runtime import get_driver
-
-    driver = get_driver(driver_name)
-    device_list_src = driver.query_available_devices()
-    device_list_src.sort(key=lambda d: d["path"])
-    return device_list_src
-
-
-def get_device_mapping(driver, key_combination=3):
-    """This method ensures consistent device ordering when choosing
-    specific devices for execution
-    Args:
-        driver (str): execution driver (vulkan, cuda, rocm, etc)
-        key_combination (int, optional): choice for mapping value for device name.
-        1 : path
-        2 : name
-        3 : (name, path)
-        Defaults to 3.
-    Returns:
-        dict: map to possible device names user can input mapped to desired combination of name/path.
-    """
-    from shark.iree_utils._common import iree_device_map
-
-    driver = iree_device_map(driver)
-    device_list = get_all_devices(driver)
-    device_map = dict()
-
-    def get_output_value(dev_dict):
-        if key_combination == 1:
-            return f"{driver}://{dev_dict['path']}"
-        if key_combination == 2:
-            return dev_dict["name"]
-        if key_combination == 3:
-            return (dev_dict["name"], f"{driver}://{dev_dict['path']}")
-
-    # mapping driver name to default device (driver://0)
-    device_map[f"{driver}"] = get_output_value(device_list[0])
-    for i, device in enumerate(device_list):
-        # mapping with index
-        device_map[f"{driver}://{i}"] = get_output_value(device)
-        # mapping with full path
-        device_map[f"{driver}://{device['path']}"] = get_output_value(device)
-    return device_map
-
-
-def map_device_to_name_path(device, key_combination=3):
-    """Gives the appropriate device data (supported name/path) for user selected execution device
-    Args:
-        device (str): user
-        key_combination (int, optional): choice for mapping value for device name.
-        1 : path
-        2 : name
-        3 : (name, path)
-        Defaults to 3.
-    Raises:
-        ValueError:
-    Returns:
-        str / tuple: returns the mapping str or tuple of mapping str for the device depending on key_combination value
-    """
-    driver = device.split("://")[0]
-    device_map = get_device_mapping(driver, key_combination)
-    try:
-        device_mapping = device_map[device]
-    except KeyError:
-        raise ValueError(f"Device '{device}' is not a valid device.")
-    return device_mapping
-
-
-def set_init_device_flags():
-    if "vulkan" in args.device:
-        # set runtime flags for vulkan.
-        set_iree_runtime_flags()
-
-        # set triple flag to avoid multiple calls to get_vulkan_triple_flag
-        device_name, args.device = map_device_to_name_path(args.device)
-        if not args.iree_vulkan_target_triple:
-            triple = get_vulkan_target_triple(device_name)
-            if triple is not None:
-                args.iree_vulkan_target_triple = triple
-        print(
-            f"Found device {device_name}. Using target triple {args.iree_vulkan_target_triple}."
-        )
-    elif "cuda" in args.device:
-        args.device = "cuda"
-    elif "cpu" in args.device:
-        args.device = "cpu"
-
-    # set max_length based on availability.
-    if args.hf_model_id in [
-        "Linaqruf/anything-v3.0",
-        "wavymulder/Analog-Diffusion",
-        "dreamlike-art/dreamlike-diffusion-1.0",
-    ]:
-        args.max_length = 77
-    elif args.hf_model_id == "prompthero/openjourney":
-        args.max_length = 64
-
-    # Use tuned models in the case of fp16, vulkan rdna3 or cuda sm devices.
-    if args.ckpt_loc != "":
-        base_model_id = fetch_and_update_base_model_id(args.ckpt_loc)
-    else:
-        base_model_id = fetch_and_update_base_model_id(args.hf_model_id)
-        if base_model_id == "":
-            base_model_id = args.hf_model_id
-
-    if (
-        args.precision != "fp16"
-        or args.height != 512
-        or args.width != 512
-        or args.batch_size != 1
-        or ("vulkan" not in args.device and "cuda" not in args.device)
-    ):
-        args.use_tuned = False
-
-    elif base_model_id not in [
-        "Linaqruf/anything-v3.0",
-        "dreamlike-art/dreamlike-diffusion-1.0",
-        "prompthero/openjourney",
-        "wavymulder/Analog-Diffusion",
-        "stabilityai/stable-diffusion-2-1",
-        "stabilityai/stable-diffusion-2-1-base",
-        "CompVis/stable-diffusion-v1-4",
-        "runwayml/stable-diffusion-v1-5",
-        "runwayml/stable-diffusion-inpainting",
-        "stabilityai/stable-diffusion-2-inpainting",
-    ]:
-        args.use_tuned = False
-
-    elif "vulkan" in args.device and not any(
-        x in args.iree_vulkan_target_triple for x in ["rdna2", "rdna3"]
-    ):
-        args.use_tuned = False
-
-    elif "cuda" in args.device and get_cuda_sm_cc() not in ["sm_80", "sm_89"]:
-        args.use_tuned = False
-
-    elif args.use_base_vae and args.hf_model_id not in [
-        "stabilityai/stable-diffusion-2-1-base",
-        "CompVis/stable-diffusion-v1-4",
-    ]:
-        args.use_tuned = False
-
-    if args.use_tuned:
-        print(f"Using tuned models for {base_model_id}/fp16/{args.device}.")
-    else:
-        print("Tuned models are currently not supported for this setting.")
-
-    # set import_mlir to True for unuploaded models.
-    if args.ckpt_loc != "":
-        args.import_mlir = True
-
-    elif args.hf_model_id not in [
-        "Linaqruf/anything-v3.0",
-        "dreamlike-art/dreamlike-diffusion-1.0",
-        "prompthero/openjourney",
-        "wavymulder/Analog-Diffusion",
-        "stabilityai/stable-diffusion-2-1",
-        "stabilityai/stable-diffusion-2-1-base",
-        "CompVis/stable-diffusion-v1-4",
-    ]:
-        args.import_mlir = True
-
-    elif args.height != 512 or args.width != 512 or args.batch_size != 1:
-        args.import_mlir = True
-
-    elif args.use_tuned and args.hf_model_id in [
-        "dreamlike-art/dreamlike-diffusion-1.0",
-        "prompthero/openjourney",
-        "stabilityai/stable-diffusion-2-1",
-    ]:
-        args.import_mlir = True
-
-    elif (
-        args.use_tuned
-        and "vulkan" in args.device
-        and "rdna2" in args.iree_vulkan_target_triple
-    ):
-        args.import_mlir = True
-
-    elif (
-        args.use_tuned
-        and "cuda" in args.device
-        and get_cuda_sm_cc() == "sm_89"
-    ):
-        args.import_mlir = True
-
-
-# Utility to get list of devices available.
-def get_available_devices():
-    def get_devices_by_name(driver_name):
-        from shark.iree_utils._common import iree_device_map
-
-        device_list = []
-        try:
-            driver_name = iree_device_map(driver_name)
-            device_list_dict = get_all_devices(driver_name)
-            print(f"{driver_name} devices are available.")
-        except:
-            print(f"{driver_name} devices are not available.")
-        else:
-            for i, device in enumerate(device_list_dict):
-                device_list.append(f"{device['name']} => {driver_name}://{i}")
-        return device_list
-
-    set_iree_runtime_flags()
-
-    available_devices = []
-    vulkan_devices = get_devices_by_name("vulkan")
-    available_devices.extend(vulkan_devices)
-    cuda_devices = get_devices_by_name("cuda")
-    available_devices.extend(cuda_devices)
-    available_devices.append("cpu")
-    return available_devices
-
-
-def disk_space_check(path, lim=20):
-    from shutil import disk_usage
-
-    du = disk_usage(path)
-    free = du.free / (1024 * 1024 * 1024)
-    if free <= lim:
-        print(f"[WARNING] Only {free:.2f}GB space available in {path}.")
-
-
-def get_opt_flags(model, precision="fp16"):
-    iree_flags = []
-    is_tuned = "tuned" if args.use_tuned else "untuned"
-    if len(args.iree_vulkan_target_triple) > 0:
-        iree_flags.append(
-            f"-iree-vulkan-target-triple={args.iree_vulkan_target_triple}"
-        )
-
-    # Disable bindings fusion to work with moltenVK.
-    if sys.platform == "darwin":
-        iree_flags.append("-iree-stream-fuse-binding=false")
-
-    if "default_compilation_flags" in opt_flags[model][is_tuned][precision]:
-        iree_flags += opt_flags[model][is_tuned][precision][
-            "default_compilation_flags"
-        ]
-
-    if "specified_compilation_flags" in opt_flags[model][is_tuned][precision]:
-        device = (
-            args.device
-            if "://" not in args.device
-            else args.device.split("://")[0]
-        )
-        if (
-            device
-            not in opt_flags[model][is_tuned][precision][
-                "specified_compilation_flags"
-            ]
-        ):
-            device = "default_device"
-        iree_flags += opt_flags[model][is_tuned][precision][
-            "specified_compilation_flags"
-        ][device]
-    return iree_flags
-
-
-def get_path_stem(path):
-    path = Path(path)
-    return path.stem
-
-
-def get_path_to_diffusers_checkpoint(custom_weights):
-    path = Path(custom_weights)
-    diffusers_path = path.parent.absolute()
-    diffusers_directory_name = path.stem
-    complete_path_to_diffusers = diffusers_path / diffusers_directory_name
-    complete_path_to_diffusers.mkdir(parents=True, exist_ok=True)
-    path_to_diffusers = complete_path_to_diffusers.as_posix()
-    return path_to_diffusers
-
-
-def preprocessCKPT(custom_weights, is_inpaint=False):
-    path_to_diffusers = get_path_to_diffusers_checkpoint(custom_weights)
-    if next(Path(path_to_diffusers).iterdir(), None):
-        print("Checkpoint already loaded at : ", path_to_diffusers)
-        return
-    else:
-        print(
-            "Diffusers' checkpoint will be identified here : ",
-            path_to_diffusers,
-        )
-    from_safetensors = (
-        True if custom_weights.lower().endswith(".safetensors") else False
-    )
-    # EMA weights usually yield higher quality images for inference but non-EMA weights have
-    # been yielding better results in our case.
-    # TODO: Add an option `--ema` (`--no-ema`) for users to specify if they want to go for EMA
-    #       weight extraction or not.
-    extract_ema = False
-    print(
-        "Loading diffusers' pipeline from original stable diffusion checkpoint"
-    )
-    num_in_channels = 9 if is_inpaint else 4
-    pipe = load_pipeline_from_original_stable_diffusion_ckpt(
-        checkpoint_path=custom_weights,
-        extract_ema=extract_ema,
-        from_safetensors=from_safetensors,
-        num_in_channels=num_in_channels,
-    )
-    pipe.save_pretrained(path_to_diffusers)
-    print("Loading complete")
-
-
-def load_vmfb(vmfb_path, model, precision):
-    model = "vae" if "base_vae" in model or "vae_encode" in model else model
-    model = "unet" if "stencil" in model else model
-    precision = "fp32" if "clip" in model else precision
-    extra_args = get_opt_flags(model, precision)
-    shark_module = SharkInference(mlir_module=None, device=args.device)
-    shark_module.load_module(vmfb_path, extra_args=extra_args)
-    return shark_module
-
-
-# This utility returns vmfbs of Clip, Unet, Vae and Vae_encode, in case all of them
-# are present; deletes them otherwise.
-def fetch_or_delete_vmfbs(extended_model_name, precision="fp32"):
-    vmfb_path = [
-        get_vmfb_path_name(extended_model_name[model])
-        for model in extended_model_name
-    ]
-    number_of_vmfbs = len(vmfb_path)
-    vmfb_present = [os.path.isfile(vmfb) for vmfb in vmfb_path]
-    all_vmfb_present = True
-    compiled_models = [None] * number_of_vmfbs
-
-    for i in range(number_of_vmfbs):
-        all_vmfb_present = all_vmfb_present and vmfb_present[i]
-
-    # We need to delete vmfbs only if some of the models were compiled.
-    if not all_vmfb_present:
-        for i in range(number_of_vmfbs):
-            if vmfb_present[i]:
-                os.remove(vmfb_path[i])
-                print("Deleted: ", vmfb_path[i])
-    else:
-        model_name = [model for model in extended_model_name.keys()]
-        for i in range(number_of_vmfbs):
-            compiled_models[i] = load_vmfb(
-                vmfb_path[i], model_name[i], precision
-            )
-    return compiled_models
-
-
-# `fetch_and_update_base_model_id` is a resource utility function which
-# helps maintaining mapping of the model to run with its base model.
-# If `base_model` is "", then this function tries to fetch the base model
-# info for the `model_to_run`.
-def fetch_and_update_base_model_id(model_to_run, base_model=""):
-    variants_path = os.path.join(os.getcwd(), "variants.json")
-    data = {model_to_run: base_model}
-    json_data = {}
-    if os.path.exists(variants_path):
-        with open(variants_path, "r", encoding="utf-8") as jsonFile:
-            json_data = json.load(jsonFile)
-            # Return with base_model's info if base_model is "".
-            if base_model == "":
-                if model_to_run in json_data:
-                    base_model = json_data[model_to_run]
-                return base_model
-    elif base_model == "":
-        return base_model
-    # Update JSON data to contain an entry mapping model_to_run with base_model.
-    json_data.update(data)
-    with open(variants_path, "w", encoding="utf-8") as jsonFile:
-        json.dump(json_data, jsonFile)
-
-
-# Generate and return a new seed if the provided one is not in the supported range (including -1)
-def sanitize_seed(seed):
-    uint32_info = np.iinfo(np.uint32)
-    uint32_min, uint32_max = uint32_info.min, uint32_info.max
-    if seed < uint32_min or seed >= uint32_max:
-        seed = randint(uint32_min, uint32_max)
-    return seed
-
-
-# clear all the cached objects to recompile cleanly.
-def clear_all():
-    print("CLEARING ALL, EXPECT SEVERAL MINUTES TO RECOMPILE")
-    from glob import glob
-    import shutil
-
-    vmfbs = glob(os.path.join(os.getcwd(), "*.vmfb"))
-    for vmfb in vmfbs:
-        if os.path.exists(vmfb):
-            os.remove(vmfb)
-    # Temporary workaround of deleting yaml files to incorporate diffusers' pipeline.
-    # TODO: Remove this once we have better weight updation logic.
-    inference_yaml = ["v2-inference-v.yaml", "v1-inference.yaml"]
-    for yaml in inference_yaml:
-        if os.path.exists(yaml):
-            os.remove(yaml)
-    home = os.path.expanduser("~")
-    if os.name == "nt":  # Windows
-        appdata = os.getenv("LOCALAPPDATA")
-        shutil.rmtree(os.path.join(appdata, "AMD/VkCache"), ignore_errors=True)
-        shutil.rmtree(os.path.join(home, "shark_tank"), ignore_errors=True)
-    elif os.name == "unix":
-        shutil.rmtree(os.path.join(home, ".cache/AMD/VkCache"))
-        shutil.rmtree(os.path.join(home, ".local/shark_tank"))
-
-
-# save output images and the inputs corresponding to it.
-def save_output_img(output_img, img_seed, extra_info={}):
-    output_path = args.output_dir if args.output_dir else Path.cwd()
-    generated_imgs_path = Path(
-        output_path, "generated_imgs", dt.now().strftime("%Y%m%d")
-    )
-    generated_imgs_path.mkdir(parents=True, exist_ok=True)
-    csv_path = Path(generated_imgs_path, "imgs_details.csv")
-
-    prompt_slice = re.sub("[^a-zA-Z0-9]", "_", args.prompts[0][:15])
-    out_img_name = (
-        f"{prompt_slice}_{img_seed}_{dt.now().strftime('%y%m%d_%H%M%S')}"
-    )
-
-    img_model = args.hf_model_id
-    if args.ckpt_loc:
-        img_model = Path(os.path.basename(args.ckpt_loc)).stem
-
-    if args.output_img_format == "jpg":
-        out_img_path = Path(generated_imgs_path, f"{out_img_name}.jpg")
-        output_img.save(out_img_path, quality=95, subsampling=0)
-    else:
-        out_img_path = Path(generated_imgs_path, f"{out_img_name}.png")
-        pngInfo = PngImagePlugin.PngInfo()
-
-        if args.write_metadata_to_png:
-            pngInfo.add_text(
-                "parameters",
-                f"{args.prompts[0]}\nNegative prompt: {args.negative_prompts[0]}\nSteps:{args.steps}, Sampler: {args.scheduler}, CFG scale: {args.guidance_scale}, Seed: {img_seed}, Size: {args.width}x{args.height}, Model: {img_model}",
-            )
-
-        output_img.save(out_img_path, "PNG", pnginfo=pngInfo)
-
-        if args.output_img_format not in ["png", "jpg"]:
-            print(
-                f"[ERROR] Format {args.output_img_format} is not supported yet."
-                "Image saved as png instead. Supported formats: png / jpg"
-            )
-
-    new_entry = {
-        "VARIANT": img_model,
-        "SCHEDULER": args.scheduler,
-        "PROMPT": args.prompts[0],
-        "NEG_PROMPT": args.negative_prompts[0],
-        "SEED": img_seed,
-        "CFG_SCALE": args.guidance_scale,
-        "PRECISION": args.precision,
-        "STEPS": args.steps,
-        "HEIGHT": args.height,
-        "WIDTH": args.width,
-        "MAX_LENGTH": args.max_length,
-        "OUTPUT": out_img_path,
-    }
-
-    new_entry.update(extra_info)
-
-    with open(csv_path, "a", encoding="utf-8") as csv_obj:
-        dictwriter_obj = DictWriter(csv_obj, fieldnames=list(new_entry.keys()))
-        dictwriter_obj.writerow(new_entry)
-        csv_obj.close()
-
-    if args.save_metadata_to_json:
-        del new_entry["OUTPUT"]
-        json_path = Path(generated_imgs_path, f"{out_img_name}.json")
-        with open(json_path, "w") as f:
-            json.dump(new_entry, f, indent=4)
--- a/apps/stable_diffusion/stable_diffusion_telegram_bot.md
+++ b/apps/stable_diffusion/stable_diffusion_telegram_bot.md
@@ -1,15 +0,0 @@
-You need to pre-create your bot (https://core.telegram.org/bots#how-do-i-create-a-bot)
-Then create in the directory web file .env
-In it the record:
-TG_TOKEN="your_token"
-specifying your bot's token from previous step.
-Then run telegram_bot.py with the same parameters that you use when running index.py, for example:
-python telegram_bot.py --max_length=77 --vulkan_large_heap_block_size=0 --use_base_vae --local_tank_cache h:\shark\TEMP
-
-Bot commands:
-/select_model
-/select_scheduler
-/set_steps "integer number of steps"
-/set_guidance_scale "integer number"
-/set_negative_prompt "negative text"
-Any other text triggers the creation of an image based on it.
--- a/apps/stable_diffusion/web/index.py
+++ b/apps/stable_diffusion/web/index.py
@@ -1,201 +0,0 @@
-import os
-import sys
-
-if sys.platform == "darwin":
-    os.environ["DYLD_LIBRARY_PATH"] = "/usr/local/lib"
-
-import gradio as gr
-import apps.stable_diffusion.web.utils.global_obj as global_obj
-from apps.stable_diffusion.src import args, clear_all
-from apps.stable_diffusion.web.utils.gradio_configs import (
-    clear_gradio_tmp_imgs_folder,
-)
-from apps.stable_diffusion.web.ui.utils import get_custom_model_path
-
-# Clear all gradio tmp images from the last session
-clear_gradio_tmp_imgs_folder()
-# Create the custom model folder if it doesn't already exist
-get_custom_model_path().mkdir(parents=True, exist_ok=True)
-
-if args.clear_all:
-    clear_all()
-
-
-def resource_path(relative_path):
-    """Get absolute path to resource, works for dev and for PyInstaller"""
-    base_path = getattr(
-        sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))
-    )
-    return os.path.join(base_path, relative_path)
-
-
-dark_theme = resource_path("ui/css/sd_dark_theme.css")
-
-from apps.stable_diffusion.web.ui import (
-    txt2img_web,
-    txt2img_gallery,
-    txt2img_sendto_img2img,
-    txt2img_sendto_inpaint,
-    txt2img_sendto_outpaint,
-    txt2img_sendto_upscaler,
-    img2img_web,
-    img2img_gallery,
-    img2img_init_image,
-    img2img_sendto_inpaint,
-    img2img_sendto_outpaint,
-    img2img_sendto_upscaler,
-    inpaint_web,
-    inpaint_gallery,
-    inpaint_init_image,
-    inpaint_sendto_img2img,
-    inpaint_sendto_outpaint,
-    inpaint_sendto_upscaler,
-    outpaint_web,
-    outpaint_gallery,
-    outpaint_init_image,
-    outpaint_sendto_img2img,
-    outpaint_sendto_inpaint,
-    outpaint_sendto_upscaler,
-    upscaler_web,
-    upscaler_gallery,
-    upscaler_init_image,
-    upscaler_sendto_img2img,
-    upscaler_sendto_inpaint,
-    upscaler_sendto_outpaint,
-)
-
-# init global sd pipeline and config
-global_obj.init()
-
-
-def register_button_click(button, selectedid, inputs, outputs):
-    button.click(
-        lambda x: (
-            x[0]["name"] if len(x) != 0 else None,
-            gr.Tabs.update(selected=selectedid),
-        ),
-        inputs,
-        outputs,
-    )
-
-
-with gr.Blocks(
-    css=dark_theme, analytics_enabled=False, title="Stable Diffusion"
-) as sd_web:
-    with gr.Tabs() as tabs:
-        with gr.TabItem(label="Text-to-Image", id=0):
-            txt2img_web.render()
-        with gr.TabItem(label="Image-to-Image", id=1):
-            img2img_web.render()
-        with gr.TabItem(label="Inpainting", id=2):
-            inpaint_web.render()
-        with gr.TabItem(label="Outpainting", id=3):
-            outpaint_web.render()
-        with gr.TabItem(label="Upscaler", id=4):
-            upscaler_web.render()
-
-    register_button_click(
-        txt2img_sendto_img2img,
-        1,
-        [txt2img_gallery],
-        [img2img_init_image, tabs],
-    )
-    register_button_click(
-        txt2img_sendto_inpaint,
-        2,
-        [txt2img_gallery],
-        [inpaint_init_image, tabs],
-    )
-    register_button_click(
-        txt2img_sendto_outpaint,
-        3,
-        [txt2img_gallery],
-        [outpaint_init_image, tabs],
-    )
-    register_button_click(
-        txt2img_sendto_upscaler,
-        4,
-        [txt2img_gallery],
-        [upscaler_init_image, tabs],
-    )
-    register_button_click(
-        img2img_sendto_inpaint,
-        2,
-        [img2img_gallery],
-        [inpaint_init_image, tabs],
-    )
-    register_button_click(
-        img2img_sendto_outpaint,
-        3,
-        [img2img_gallery],
-        [outpaint_init_image, tabs],
-    )
-    register_button_click(
-        img2img_sendto_upscaler,
-        4,
-        [img2img_gallery],
-        [upscaler_init_image, tabs],
-    )
-    register_button_click(
-        inpaint_sendto_img2img,
-        1,
-        [inpaint_gallery],
-        [img2img_init_image, tabs],
-    )
-    register_button_click(
-        inpaint_sendto_outpaint,
-        3,
-        [inpaint_gallery],
-        [outpaint_init_image, tabs],
-    )
-    register_button_click(
-        inpaint_sendto_upscaler,
-        4,
-        [inpaint_gallery],
-        [upscaler_init_image, tabs],
-    )
-    register_button_click(
-        outpaint_sendto_img2img,
-        1,
-        [outpaint_gallery],
-        [img2img_init_image, tabs],
-    )
-    register_button_click(
-        outpaint_sendto_inpaint,
-        2,
-        [outpaint_gallery],
-        [inpaint_init_image, tabs],
-    )
-    register_button_click(
-        outpaint_sendto_upscaler,
-        4,
-        [outpaint_gallery],
-        [upscaler_init_image, tabs],
-    )
-    register_button_click(
-        upscaler_sendto_img2img,
-        1,
-        [upscaler_gallery],
-        [img2img_init_image, tabs],
-    )
-    register_button_click(
-        upscaler_sendto_inpaint,
-        2,
-        [upscaler_gallery],
-        [inpaint_init_image, tabs],
-    )
-    register_button_click(
-        upscaler_sendto_outpaint,
-        3,
-        [upscaler_gallery],
-        [outpaint_init_image, tabs],
-    )
-
-
-sd_web.queue()
-sd_web.launch(
-    share=args.share,
-    inbrowser=True,
-    server_name="0.0.0.0",
-    server_port=args.server_port,
-)
--- a/apps/stable_diffusion/web/ui/init.py
+++ b/apps/stable_diffusion/web/ui/init.py
@@ -1,40 +0,0 @@
-from apps.stable_diffusion.web.ui.txt2img_ui import (
-    txt2img_web,
-    txt2img_gallery,
-    txt2img_sendto_img2img,
-    txt2img_sendto_inpaint,
-    txt2img_sendto_outpaint,
-    txt2img_sendto_upscaler,
-)
-from apps.stable_diffusion.web.ui.img2img_ui import (
-    img2img_web,
-    img2img_gallery,
-    img2img_init_image,
-    img2img_sendto_inpaint,
-    img2img_sendto_outpaint,
-    img2img_sendto_upscaler,
-)
-from apps.stable_diffusion.web.ui.inpaint_ui import (
-    inpaint_web,
-    inpaint_gallery,
-    inpaint_init_image,
-    inpaint_sendto_img2img,
-    inpaint_sendto_outpaint,
-    inpaint_sendto_upscaler,
-)
-from apps.stable_diffusion.web.ui.outpaint_ui import (
-    outpaint_web,
-    outpaint_gallery,
-    outpaint_init_image,
-    outpaint_sendto_img2img,
-    outpaint_sendto_inpaint,
-    outpaint_sendto_upscaler,
-)
-from apps.stable_diffusion.web.ui.upscaler_ui import (
-    upscaler_web,
-    upscaler_gallery,
-    upscaler_init_image,
-    upscaler_sendto_img2img,
-    upscaler_sendto_inpaint,
-    upscaler_sendto_outpaint,
-)
--- a/apps/stable_diffusion/web/ui/css/sd_dark_theme.css
+++ b/apps/stable_diffusion/web/ui/css/sd_dark_theme.css
@@ -1,192 +0,0 @@
-/*
-Apply Gradio dark theme to the default Gradio theme.
-Procedure to upgrade the dark theme:
- Using your browser, visit http://localhost:8080/?__theme=dark
- Open your browser inspector, search for the .dark css class
- Copy .dark class declarations, apply them here into :root
-*/
-
-:root {
-    --color-accent-soft: var(--neutral-700);
-    --color-background-primary: var(--neutral-950);
-    --color-background-secondary: var(--neutral-900);
-    --color-border-accent: var(--neutral-600);
-    --color-border-primary: var(--neutral-700);
-    --text-color-code-background: var(--neutral-800);
-    --text-color-link-active: var(--secondary-500);
-    --text-color-link: var(--secondary-500);
-    --text-color-link-hover: var(--secondary-400);
-    --text-color-link-visited: var(--secondary-600);
-    --text-color-subdued: var(--neutral-400);
-    --body-background-color: var(--color-background-primary);
-    --body-text-color: var(--neutral-100);
-    --shadow-spread: 1px;
-    --block-background: var(--neutral-800);
-    --block-border-color: var(--color-border-primary);
-    --block-border-width: 1px;
-    --block-info-color: var(--text-color-subdued);
-    --block-label-background: var(--color-background-secondary);
-    --block-label-border-color: var(--color-border-primary);
-    --block-label-border-width: 1px;
-    --block-label-color: var(--neutral-200);
-    --block-shadow: none;
-    --block-title-background: none;
-    --block-title-border-color: none;
-    --block-title-border-width: 0px;
-    --block-title-color: var(--neutral-200);
-    --panel-background: var(--color-background-secondary);
-    --panel-border-color: var(--color-border-primary);
-    --checkbox-background: var(--neutral-800);
-    --checkbox-background-focus: var(--checkbox-background);
-    --checkbox-background-hover: var(--checkbox-background);
-    --checkbox-background-selected: var(--secondary-600);
-    --checkbox-border-color: var(--neutral-700);
-    --checkbox-border-color-focus: var(--secondary-500);
-    --checkbox-border-color-hover: var(--neutral-600);
-    --checkbox-border-color-selected: var(--secondary-600);
-    --checkbox-label-background: linear-gradient(to top, var(--neutral-900), var(--neutral-800));
-    --checkbox-label-background-hover: linear-gradient(to top, var(--neutral-900), var(--neutral-800));
-    --checkbox-label-background-selected: var(--checkbox-label-background);
-    --checkbox-label-border-color: var(--color-border-primary);
-    --checkbox-label-border-color-hover: var(--color-border-primary);
-    --checkbox-text-color: var(--body-text-color);
-    --checkbox-text-color-selected: var(--checkbox-text-color);
-    --error-background: var(--color-background-primary);
-    --error-border-color: var(--color-border-primary);
-    --error-border-width: var(--error-border-width);
-    --error-color: #ef4444;
-    --input-background: var(--neutral-800);
-    --input-background-focus: var(--secondary-600);
-    --input-background-hover: var(--input-background);
-    --input-border-color: var(--color-border-primary);
-    --input-border-color-focus: var(--neutral-700);
-    --input-border-color-hover: var(--color-border-primary);
-    --input-placeholder-color: var(--neutral-500);
-    --input-shadow: var(--input-shadow);
-    --input-shadow-focus: 0 0 0 var(--shadow-spread) var(--neutral-700), var(--shadow-inset);
-    --loader-color: var(--color-accent);
-    --stat-color-background: linear-gradient(to right, var(--primary-400), var(--primary-600));
-    --table-border-color: var(--neutral-700);
-    --table-even-background: var(--neutral-950);
-    --table-odd-background: var(--neutral-900);
-    --table-row-focus: var(--color-accent-soft);
-    --button-cancel-background: linear-gradient(to bottom right, #dc2626, #b91c1c);
-    --button-cancel-background-hover: linear-gradient(to bottom right, #dc2626, #dc2626);
-    --button-cancel-border-color: #dc2626;
-    --button-cancel-border-color-hover: var(--button-cancel-border-color);
-    --button-cancel-text-color: white;
-    --button-cancel-text-color-hover: var(--button-cancel-text-color);
-    --button-primary-background: linear-gradient(to bottom right, var(--primary-600), var(--primary-700));
-    --button-primary-background-hover: linear-gradient(to bottom right, var(--primary-600), var(--primary-600));
-    --button-primary-border-color: var(--primary-600);
-    --button-primary-border-color-hover: var(--button-primary-border-color);
-    --button-primary-text-color: white;
-    --button-primary-text-color-hover: var(--button-primary-text-color);
-    --button-secondary-background: linear-gradient(to bottom right, var(--neutral-600), var(--neutral-700));
-    --button-secondary-background-hover: linear-gradient(to bottom right, var(--neutral-600), var(--neutral-600));
-    --button-secondary-border-color: var(--neutral-600);
-    --button-secondary-border-color-hover: var(--button-secondary-border-color);
-    --button-secondary-text-color: white;
-    --button-secondary-text-color-hover: var(--button-secondary-text-color);
-}
-
-/* SHARK theme */
-body {
-    background-color: var(--color-background-primary);
-}
-
-/* display in full width for desktop devices */
-@media (min-width: 1536px)
-{
-    .gradio-container {
-        max-width: var(--size-full) !important;
-    }
-}
-
-.gradio-container .contain {
-    padding: 0 var(--size-4) !important;
-}
-
-.container {
-    background-color: black !important;
-    padding-top: var(--size-5) !important;
-}
-
-#ui_title {
-    padding: var(--size-2) 0 0 var(--size-1);
-}
-
-#top_logo {
-    background-color: transparent;
-    border-radius: 0 !important;
-    border: 0;
-}
-
-#demo_title_outer {
-    border-radius: 0;
-}
-
-#prompt_box_outer div:first-child {
-    border-radius: 0 !important
-}
-
-#prompt_box textarea, #negative_prompt_box textarea {
-    background-color: var(--color-background-primary) !important;
-}
-
-#prompt_examples {
-    margin: 0 !important;
-}
-
-#prompt_examples svg {
-    display: none !important;
-}
-
-#ui_body {
-    background-color: var(--color-background-secondary) !important;
-    padding: var(--size-2) !important;
-    border-radius: 0.5em !important;
-}
-
-#img_result+div {
-    display: none !important;
-}
-
-footer {
-    display: none !important;
-}
-
-#gallery + div {
-    border-radius: 0 !important;
-}
-
-/* Prevent progress bar to block gallery navigation while building images (Gradio V3.19.0) */
-#gallery .wrap.default {
-    pointer-events: none;
-}
-
-/* Import Png info box */
-#txt2img_prompt_image .fixed-height {
-    height: var(--size-32);
-}
-
-/* Hide "remove buttons" from ui dropdowns */
-#custom_model .token-remove.remove-all,
-#scheduler .token-remove.remove-all,
-#device .token-remove.remove-all,
-#stencil_model .token-remove.remove-all {
-    display: none;
-}
-
-/* Hide selected items from ui dropdowns */
-#custom_model .options .item .inner-item,
-#scheduler .options .item .inner-item,
-#device .options .item .inner-item,
-#stencil_model .options .item .inner-item {
-    display:none;
-}
-
-/* Hide the download icon from the nod logo */
-#top_logo .download {
-    display: none;
-}
--- a/apps/stable_diffusion/web/ui/img2img_ui.py
+++ b/apps/stable_diffusion/web/ui/img2img_ui.py
@@ -1,241 +0,0 @@
-from pathlib import Path
-import os
-import gradio as gr
-from PIL import Image
-from apps.stable_diffusion.scripts import img2img_inf
-from apps.stable_diffusion.src import args
-from apps.stable_diffusion.web.ui.utils import (
-    available_devices,
-    nodlogo_loc,
-    get_custom_model_path,
-    get_custom_model_files,
-    scheduler_list,
-    predefined_models,
-)
-
-
-with gr.Blocks(title="Image-to-Image") as img2img_web:
-    with gr.Row(elem_id="ui_title"):
-        nod_logo = Image.open(nodlogo_loc)
-        with gr.Row():
-            with gr.Column(scale=1, elem_id="demo_title_outer"):
-                gr.Image(
-                    value=nod_logo,
-                    show_label=False,
-                    interactive=False,
-                    elem_id="top_logo",
-                ).style(width=150, height=50)
-    with gr.Row(elem_id="ui_body"):
-        with gr.Row():
-            with gr.Column(scale=1, min_width=600):
-                with gr.Row():
-                    custom_model = gr.Dropdown(
-                        label=f"Models (Custom Model path: {get_custom_model_path()})",
-                        elem_id="custom_model",
-                        value=os.path.basename(args.ckpt_loc)
-                        if args.ckpt_loc
-                        else "None",
-                        choices=["None"]
-                        + get_custom_model_files()
-                        + predefined_models,
-                    )
-                    hf_model_id = gr.Textbox(
-                        elem_id="hf_model_id",
-                        placeholder="Select 'None' in the Models dropdown on the left and enter model ID here e.g: SG161222/Realistic_Vision_V1.3",
-                        value="",
-                        label="HuggingFace Model ID",
-                        lines=3,
-                    )
-
-                with gr.Group(elem_id="prompt_box_outer"):
-                    prompt = gr.Textbox(
-                        label="Prompt",
-                        value=args.prompts[0],
-                        lines=1,
-                        elem_id="prompt_box",
-                    )
-                    negative_prompt = gr.Textbox(
-                        label="Negative Prompt",
-                        value=args.negative_prompts[0],
-                        lines=1,
-                        elem_id="negative_prompt_box",
-                    )
-
-                img2img_init_image = gr.Image(
-                    label="Input Image", type="pil"
-                ).style(height=300)
-
-                with gr.Accordion(label="Stencil Options", open=False):
-                    with gr.Row():
-                        use_stencil = gr.Dropdown(
-                            elem_id="stencil_model",
-                            label="Stencil model",
-                            value="None",
-                            choices=["None", "canny", "openpose", "scribble"],
-                        )
-                with gr.Accordion(label="Advanced Options", open=False):
-                    with gr.Row():
-                        scheduler = gr.Dropdown(
-                            elem_id="scheduler",
-                            label="Scheduler",
-                            value="PNDM",
-                            choices=scheduler_list,
-                        )
-                        with gr.Group():
-                            save_metadata_to_png = gr.Checkbox(
-                                label="Save prompt information to PNG",
-                                value=args.write_metadata_to_png,
-                                interactive=True,
-                            )
-                            save_metadata_to_json = gr.Checkbox(
-                                label="Save prompt information to JSON file",
-                                value=args.save_metadata_to_json,
-                                interactive=True,
-                            )
-                    with gr.Row():
-                        height = gr.Slider(
-                            384, 768, value=args.height, step=8, label="Height"
-                        )
-                        width = gr.Slider(
-                            384, 768, value=args.width, step=8, label="Width"
-                        )
-                        precision = gr.Radio(
-                            label="Precision",
-                            value=args.precision,
-                            choices=[
-                                "fp16",
-                                "fp32",
-                            ],
-                            visible=True,
-                        )
-                        max_length = gr.Radio(
-                            label="Max Length",
-                            value=args.max_length,
-                            choices=[
-                                64,
-                                77,
-                            ],
-                            visible=False,
-                        )
-                    with gr.Row():
-                        steps = gr.Slider(
-                            1, 100, value=args.steps, step=1, label="Steps"
-                        )
-                        strength = gr.Slider(
-                            0,
-                            1,
-                            value=args.strength,
-                            step=0.01,
-                            label="Denoising Strength",
-                        )
-                    with gr.Row():
-                        guidance_scale = gr.Slider(
-                            0,
-                            50,
-                            value=args.guidance_scale,
-                            step=0.1,
-                            label="CFG Scale",
-                        )
-                        batch_count = gr.Slider(
-                            1,
-                            100,
-                            value=args.batch_count,
-                            step=1,
-                            label="Batch Count",
-                            interactive=True,
-                        )
-                        batch_size = gr.Slider(
-                            1,
-                            4,
-                            value=args.batch_size,
-                            step=1,
-                            label="Batch Size",
-                            interactive=False,
-                            visible=False,
-                        )
-                with gr.Row():
-                    seed = gr.Number(
-                        value=args.seed, precision=0, label="Seed"
-                    )
-                    device = gr.Dropdown(
-                        elem_id="device",
-                        label="Device",
-                        value=available_devices[0],
-                        choices=available_devices,
-                    )
-                with gr.Row():
-                    with gr.Column(scale=2):
-                        random_seed = gr.Button("Randomize Seed")
-                        random_seed.click(
-                            None,
-                            inputs=[],
-                            outputs=[seed],
-                            _js="() => -1",
-                        )
-                    with gr.Column(scale=6):
-                        stable_diffusion = gr.Button("Generate Image(s)")
-                    with gr.Column(scale=1, min_width=150):
-                        clear_queue = gr.Button("Clear Queue")
-
-            with gr.Column(scale=1, min_width=600):
-                with gr.Group():
-                    img2img_gallery = gr.Gallery(
-                        label="Generated images",
-                        show_label=False,
-                        elem_id="gallery",
-                    ).style(grid=[2])
-                    std_output = gr.Textbox(
-                        value="Nothing to show.",
-                        lines=1,
-                        show_label=False,
-                    )
-                output_dir = args.output_dir if args.output_dir else Path.cwd()
-                output_dir = Path(output_dir, "generated_imgs")
-                output_loc = gr.Textbox(
-                    label="Saving Images at",
-                    value=output_dir,
-                    interactive=False,
-                )
-                with gr.Row():
-                    img2img_sendto_inpaint = gr.Button(value="SendTo Inpaint")
-                    img2img_sendto_outpaint = gr.Button(
-                        value="SendTo Outpaint"
-                    )
-                    img2img_sendto_upscaler = gr.Button(
-                        value="SendTo Upscaler"
-                    )
-
-        kwargs = dict(
-            fn=img2img_inf,
-            inputs=[
-                prompt,
-                negative_prompt,
-                img2img_init_image,
-                height,
-                width,
-                steps,
-                strength,
-                guidance_scale,
-                seed,
-                batch_count,
-                batch_size,
-                scheduler,
-                custom_model,
-                hf_model_id,
-                precision,
-                device,
-                max_length,
-                use_stencil,
-                save_metadata_to_json,
-                save_metadata_to_png,
-            ],
-            outputs=[img2img_gallery, std_output],
-            show_progress=args.progress_bar,
-        )
-
-        prompt_submit = prompt.submit(**kwargs)
-        neg_prompt_submit = negative_prompt.submit(**kwargs)
-        generate_click = stable_diffusion.click(**kwargs)
-        clear_queue.click(
-            fn=None, cancels=[prompt_submit, neg_prompt_submit, generate_click]
-        )
--- a/apps/stable_diffusion/web/ui/inpaint_ui.py
+++ b/apps/stable_diffusion/web/ui/inpaint_ui.py
@@ -1,243 +0,0 @@
-from pathlib import Path
-import os
-import gradio as gr
-from PIL import Image
-from apps.stable_diffusion.scripts import inpaint_inf
-from apps.stable_diffusion.src import args
-from apps.stable_diffusion.web.ui.utils import (
-    available_devices,
-    nodlogo_loc,
-    get_custom_model_path,
-    get_custom_model_files,
-    scheduler_list,
-    predefined_paint_models,
-)
-
-
-with gr.Blocks(title="Inpainting") as inpaint_web:
-    with gr.Row(elem_id="ui_title"):
-        nod_logo = Image.open(nodlogo_loc)
-        with gr.Row():
-            with gr.Column(scale=1, elem_id="demo_title_outer"):
-                gr.Image(
-                    value=nod_logo,
-                    show_label=False,
-                    interactive=False,
-                    elem_id="top_logo",
-                ).style(width=150, height=50)
-    with gr.Row(elem_id="ui_body"):
-        with gr.Row():
-            with gr.Column(scale=1, min_width=600):
-                with gr.Row():
-                    custom_model = gr.Dropdown(
-                        label=f"Models (Custom Model path: {get_custom_model_path()})",
-                        elem_id="custom_model",
-                        value=os.path.basename(args.ckpt_loc)
-                        if args.ckpt_loc
-                        else "None",
-                        choices=["None"]
-                        + get_custom_model_files()
-                        + predefined_paint_models,
-                    )
-                    hf_model_id = gr.Textbox(
-                        elem_id="hf_model_id",
-                        placeholder="Select 'None' in the Models dropdown on the left and enter model ID here e.g: ghunkins/stable-diffusion-liberty-inpainting",
-                        value="",
-                        label="HuggingFace Model ID",
-                        lines=3,
-                    )
-
-                with gr.Group(elem_id="prompt_box_outer"):
-                    prompt = gr.Textbox(
-                        label="Prompt",
-                        value=args.prompts[0],
-                        lines=1,
-                        elem_id="prompt_box",
-                    )
-                    negative_prompt = gr.Textbox(
-                        label="Negative Prompt",
-                        value=args.negative_prompts[0],
-                        lines=1,
-                        elem_id="negative_prompt_box",
-                    )
-
-                inpaint_init_image = gr.Image(
-                    label="Masked Image",
-                    source="upload",
-                    tool="sketch",
-                    type="pil",
-                ).style(height=350)
-
-                with gr.Accordion(label="Advanced Options", open=False):
-                    with gr.Row():
-                        scheduler = gr.Dropdown(
-                            elem_id="scheduler",
-                            label="Scheduler",
-                            value="PNDM",
-                            choices=scheduler_list,
-                        )
-                        with gr.Group():
-                            save_metadata_to_png = gr.Checkbox(
-                                label="Save prompt information to PNG",
-                                value=args.write_metadata_to_png,
-                                interactive=True,
-                            )
-                            save_metadata_to_json = gr.Checkbox(
-                                label="Save prompt information to JSON file",
-                                value=args.save_metadata_to_json,
-                                interactive=True,
-                            )
-                    with gr.Row():
-                        height = gr.Slider(
-                            384, 768, value=args.height, step=8, label="Height"
-                        )
-                        width = gr.Slider(
-                            384, 768, value=args.width, step=8, label="Width"
-                        )
-                        precision = gr.Radio(
-                            label="Precision",
-                            value=args.precision,
-                            choices=[
-                                "fp16",
-                                "fp32",
-                            ],
-                            visible=False,
-                        )
-                        max_length = gr.Radio(
-                            label="Max Length",
-                            value=args.max_length,
-                            choices=[
-                                64,
-                                77,
-                            ],
-                            visible=False,
-                        )
-                    with gr.Row():
-                        inpaint_full_res = gr.Radio(
-                            choices=["Whole picture", "Only masked"],
-                            type="index",
-                            value="Whole picture",
-                            label="Inpaint area",
-                        )
-                        inpaint_full_res_padding = gr.Slider(
-                            minimum=0,
-                            maximum=256,
-                            step=4,
-                            value=32,
-                            label="Only masked padding, pixels",
-                        )
-                    with gr.Row():
-                        steps = gr.Slider(
-                            1, 100, value=args.steps, step=1, label="Steps"
-                        )
-                    with gr.Row():
-                        guidance_scale = gr.Slider(
-                            0,
-                            50,
-                            value=args.guidance_scale,
-                            step=0.1,
-                            label="CFG Scale",
-                        )
-                        batch_count = gr.Slider(
-                            1,
-                            100,
-                            value=args.batch_count,
-                            step=1,
-                            label="Batch Count",
-                            interactive=True,
-                        )
-                        batch_size = gr.Slider(
-                            1,
-                            4,
-                            value=args.batch_size,
-                            step=1,
-                            label="Batch Size",
-                            interactive=False,
-                            visible=False,
-                        )
-                with gr.Row():
-                    seed = gr.Number(
-                        value=args.seed, precision=0, label="Seed"
-                    )
-                    device = gr.Dropdown(
-                        elem_id="device",
-                        label="Device",
-                        value=available_devices[0],
-                        choices=available_devices,
-                    )
-                with gr.Row():
-                    with gr.Column(scale=2):
-                        random_seed = gr.Button("Randomize Seed")
-                        random_seed.click(
-                            None,
-                            inputs=[],
-                            outputs=[seed],
-                            _js="() => -1",
-                        )
-                    with gr.Column(scale=6):
-                        stable_diffusion = gr.Button("Generate Image(s)")
-                    with gr.Column(scale=1, min_width=150):
-                        clear_queue = gr.Button("Clear Queue")
-
-            with gr.Column(scale=1, min_width=600):
-                with gr.Group():
-                    inpaint_gallery = gr.Gallery(
-                        label="Generated images",
-                        show_label=False,
-                        elem_id="gallery",
-                    ).style(grid=[2])
-                    std_output = gr.Textbox(
-                        value="Nothing to show.",
-                        lines=1,
-                        show_label=False,
-                    )
-                output_dir = args.output_dir if args.output_dir else Path.cwd()
-                output_dir = Path(output_dir, "generated_imgs")
-                output_loc = gr.Textbox(
-                    label="Saving Images at",
-                    value=output_dir,
-                    interactive=False,
-                )
-                with gr.Row():
-                    inpaint_sendto_img2img = gr.Button(value="SendTo Img2Img")
-                    inpaint_sendto_outpaint = gr.Button(
-                        value="SendTo Outpaint"
-                    )
-                    inpaint_sendto_upscaler = gr.Button(
-                        value="SendTo Upscaler"
-                    )
-
-        kwargs = dict(
-            fn=inpaint_inf,
-            inputs=[
-                prompt,
-                negative_prompt,
-                inpaint_init_image,
-                height,
-                width,
-                inpaint_full_res,
-                inpaint_full_res_padding,
-                steps,
-                guidance_scale,
-                seed,
-                batch_count,
-                batch_size,
-                scheduler,
-                custom_model,
-                hf_model_id,
-                precision,
-                device,
-                max_length,
-                save_metadata_to_json,
-                save_metadata_to_png,
-            ],
-            outputs=[inpaint_gallery, std_output],
-            show_progress=args.progress_bar,
-        )
-
-        prompt_submit = prompt.submit(**kwargs)
-        neg_prompt_submit = negative_prompt.submit(**kwargs)
-        generate_click = stable_diffusion.click(**kwargs)
-        clear_queue.click(
-            fn=None, cancels=[prompt_submit, neg_prompt_submit, generate_click]
-        )
--- a/apps/stable_diffusion/web/ui/logos/nod-logo.png
+++ b/apps/stable_diffusion/web/ui/logos/nod-logo.png
--- a/apps/stable_diffusion/web/ui/outpaint_ui.py
+++ b/apps/stable_diffusion/web/ui/outpaint_ui.py
@@ -1,263 +0,0 @@
-from pathlib import Path
-import os
-import gradio as gr
-from PIL import Image
-from apps.stable_diffusion.scripts import outpaint_inf
-from apps.stable_diffusion.src import args
-from apps.stable_diffusion.web.ui.utils import (
-    available_devices,
-    nodlogo_loc,
-    get_custom_model_path,
-    get_custom_model_files,
-    scheduler_list,
-    predefined_paint_models,
-)
-
-
-with gr.Blocks(title="Outpainting") as outpaint_web:
-    with gr.Row(elem_id="ui_title"):
-        nod_logo = Image.open(nodlogo_loc)
-        with gr.Row():
-            with gr.Column(scale=1, elem_id="demo_title_outer"):
-                gr.Image(
-                    value=nod_logo,
-                    show_label=False,
-                    interactive=False,
-                    elem_id="top_logo",
-                ).style(width=150, height=50)
-    with gr.Row(elem_id="ui_body"):
-        with gr.Row():
-            with gr.Column(scale=1, min_width=600):
-                with gr.Row():
-                    custom_model = gr.Dropdown(
-                        label=f"Models (Custom Model path: {get_custom_model_path()})",
-                        elem_id="custom_model",
-                        value=os.path.basename(args.ckpt_loc)
-                        if args.ckpt_loc
-                        else "None",
-                        choices=["None"]
-                        + get_custom_model_files()
-                        + predefined_paint_models,
-                    )
-                    hf_model_id = gr.Textbox(
-                        elem_id="hf_model_id",
-                        placeholder="Select 'None' in the Models dropdown on the left and enter model ID here e.g: ghunkins/stable-diffusion-liberty-inpainting",
-                        value="",
-                        label="HuggingFace Model ID",
-                        lines=3,
-                    )
-
-                with gr.Group(elem_id="prompt_box_outer"):
-                    prompt = gr.Textbox(
-                        label="Prompt",
-                        value=args.prompts[0],
-                        lines=1,
-                        elem_id="prompt_box",
-                    )
-                    negative_prompt = gr.Textbox(
-                        label="Negative Prompt",
-                        value=args.negative_prompts[0],
-                        lines=1,
-                        elem_id="negative_prompt_box",
-                    )
-
-                outpaint_init_image = gr.Image(
-                    label="Input Image", type="pil"
-                ).style(height=300)
-
-                with gr.Accordion(label="Advanced Options", open=False):
-                    with gr.Row():
-                        scheduler = gr.Dropdown(
-                            elem_id="scheduler",
-                            label="Scheduler",
-                            value="PNDM",
-                            choices=scheduler_list,
-                        )
-                        with gr.Group():
-                            save_metadata_to_png = gr.Checkbox(
-                                label="Save prompt information to PNG",
-                                value=args.write_metadata_to_png,
-                                interactive=True,
-                            )
-                            save_metadata_to_json = gr.Checkbox(
-                                label="Save prompt information to JSON file",
-                                value=args.save_metadata_to_json,
-                                interactive=True,
-                            )
-                    with gr.Row():
-                        pixels = gr.Slider(
-                            8,
-                            256,
-                            value=args.pixels,
-                            step=8,
-                            label="Pixels to expand",
-                        )
-                        mask_blur = gr.Slider(
-                            0,
-                            64,
-                            value=args.mask_blur,
-                            step=1,
-                            label="Mask blur",
-                        )
-                    with gr.Row():
-                        directions = gr.CheckboxGroup(
-                            label="Outpainting direction",
-                            choices=["left", "right", "up", "down"],
-                            value=["left", "right", "up", "down"],
-                        )
-                    with gr.Row():
-                        noise_q = gr.Slider(
-                            0.0,
-                            4.0,
-                            value=1.0,
-                            step=0.01,
-                            label="Fall-off exponent (lower=higher detail)",
-                        )
-                        color_variation = gr.Slider(
-                            0.0,
-                            1.0,
-                            value=0.05,
-                            step=0.01,
-                            label="Color variation",
-                        )
-                    with gr.Row():
-                        height = gr.Slider(
-                            384, 768, value=args.height, step=8, label="Height"
-                        )
-                        width = gr.Slider(
-                            384, 768, value=args.width, step=8, label="Width"
-                        )
-                        precision = gr.Radio(
-                            label="Precision",
-                            value=args.precision,
-                            choices=[
-                                "fp16",
-                                "fp32",
-                            ],
-                            visible=False,
-                        )
-                        max_length = gr.Radio(
-                            label="Max Length",
-                            value=args.max_length,
-                            choices=[
-                                64,
-                                77,
-                            ],
-                            visible=False,
-                        )
-                    with gr.Row():
-                        steps = gr.Slider(
-                            1, 100, value=20, step=1, label="Steps"
-                        )
-                    with gr.Row():
-                        guidance_scale = gr.Slider(
-                            0,
-                            50,
-                            value=args.guidance_scale,
-                            step=0.1,
-                            label="CFG Scale",
-                        )
-                        batch_count = gr.Slider(
-                            1,
-                            100,
-                            value=args.batch_count,
-                            step=1,
-                            label="Batch Count",
-                            interactive=True,
-                        )
-                        batch_size = gr.Slider(
-                            1,
-                            4,
-                            value=args.batch_size,
-                            step=1,
-                            label="Batch Size",
-                            interactive=False,
-                            visible=False,
-                        )
-                with gr.Row():
-                    seed = gr.Number(
-                        value=args.seed, precision=0, label="Seed"
-                    )
-                    device = gr.Dropdown(
-                        elem_id="device",
-                        label="Device",
-                        value=available_devices[0],
-                        choices=available_devices,
-                    )
-                with gr.Row():
-                    with gr.Column(scale=2):
-                        random_seed = gr.Button("Randomize Seed")
-                        random_seed.click(
-                            None,
-                            inputs=[],
-                            outputs=[seed],
-                            _js="() => -1",
-                        )
-                    with gr.Column(scale=6):
-                        stable_diffusion = gr.Button("Generate Image(s)")
-                    with gr.Column(scale=1, min_width=150):
-                        clear_queue = gr.Button("Clear Queue")
-
-            with gr.Column(scale=1, min_width=600):
-                with gr.Group():
-                    outpaint_gallery = gr.Gallery(
-                        label="Generated images",
-                        show_label=False,
-                        elem_id="gallery",
-                    ).style(grid=[2])
-                    std_output = gr.Textbox(
-                        value="Nothing to show.",
-                        lines=1,
-                        show_label=False,
-                    )
-                output_dir = args.output_dir if args.output_dir else Path.cwd()
-                output_dir = Path(output_dir, "generated_imgs")
-                output_loc = gr.Textbox(
-                    label="Saving Images at",
-                    value=output_dir,
-                    interactive=False,
-                )
-                with gr.Row():
-                    outpaint_sendto_img2img = gr.Button(value="SendTo Img2Img")
-                    outpaint_sendto_inpaint = gr.Button(value="SendTo Inpaint")
-                    outpaint_sendto_upscaler = gr.Button(
-                        value="SendTo Upscaler"
-                    )
-
-        kwargs = dict(
-            fn=outpaint_inf,
-            inputs=[
-                prompt,
-                negative_prompt,
-                outpaint_init_image,
-                pixels,
-                mask_blur,
-                directions,
-                noise_q,
-                color_variation,
-                height,
-                width,
-                steps,
-                guidance_scale,
-                seed,
-                batch_count,
-                batch_size,
-                scheduler,
-                custom_model,
-                hf_model_id,
-                precision,
-                device,
-                max_length,
-                save_metadata_to_json,
-                save_metadata_to_png,
-            ],
-            outputs=[outpaint_gallery, std_output],
-            show_progress=args.progress_bar,
-        )
-
-        prompt_submit = prompt.submit(**kwargs)
-        neg_prompt_submit = negative_prompt.submit(**kwargs)
-        generate_click = stable_diffusion.click(**kwargs)
-        clear_queue.click(
-            fn=None, cancels=[prompt_submit, neg_prompt_submit, generate_click]
-        )
--- a/apps/stable_diffusion/web/ui/txt2img_ui.py
+++ b/apps/stable_diffusion/web/ui/txt2img_ui.py
@@ -1,278 +0,0 @@
-from pathlib import Path
-import os
-import gradio as gr
-from PIL import Image
-from apps.stable_diffusion.scripts import txt2img_inf
-from apps.stable_diffusion.src import prompt_examples, args
-from apps.stable_diffusion.web.ui.utils import (
-    available_devices,
-    nodlogo_loc,
-    get_custom_model_path,
-    get_custom_model_files,
-    scheduler_list_txt2img,
-    predefined_models,
-)
-
-with gr.Blocks(title="Text-to-Image") as txt2img_web:
-    with gr.Row(elem_id="ui_title"):
-        nod_logo = Image.open(nodlogo_loc)
-        with gr.Row():
-            with gr.Column(scale=1, elem_id="demo_title_outer"):
-                gr.Image(
-                    value=nod_logo,
-                    show_label=False,
-                    interactive=False,
-                    elem_id="top_logo",
-                ).style(width=150, height=50)
-    with gr.Row(elem_id="ui_body"):
-        with gr.Row():
-            with gr.Column(scale=1, min_width=600):
-                with gr.Row():
-                    with gr.Column(scale=10):
-                        with gr.Row():
-                            custom_model = gr.Dropdown(
-                                label=f"Models (Custom Model path: {get_custom_model_path()})",
-                                elem_id="custom_model",
-                                value=os.path.basename(args.ckpt_loc)
-                                if args.ckpt_loc
-                                else "None",
-                                choices=["None"]
-                                + get_custom_model_files()
-                                + predefined_models,
-                            )
-                            hf_model_id = gr.Textbox(
-                                elem_id="hf_model_id",
-                                placeholder="Select 'None' in the Models dropdown on the left and enter model ID here e.g: SG161222/Realistic_Vision_V1.3",
-                                value="",
-                                label="HuggingFace Model ID",
-                                lines=3,
-                            )
-                    with gr.Column(scale=1, min_width=170):
-                        png_info_img = gr.Image(
-                            label="Import PNG info",
-                            elem_id="txt2img_prompt_image",
-                            type="pil",
-                            tool="None",
-                            visible=True,
-                        )
-
-                with gr.Group(elem_id="prompt_box_outer"):
-                    prompt = gr.Textbox(
-                        label="Prompt",
-                        value=args.prompts[0],
-                        lines=1,
-                        elem_id="prompt_box",
-                    )
-                    negative_prompt = gr.Textbox(
-                        label="Negative Prompt",
-                        value=args.negative_prompts[0],
-                        lines=1,
-                        elem_id="negative_prompt_box",
-                    )
-                with gr.Accordion(
-                    label="Lora based inference option", open=False
-                ):
-                    with gr.Row():
-                        lora_weights = gr.Dropdown(
-                            label=f"Standlone LoRA weights (Path: {get_custom_model_path()})",
-                            elem_id="lora_weights",
-                            value="None",
-                            choices=["None"] + get_custom_model_files(),
-                        )
-                        lora_hf_id = gr.Textbox(
-                            elem_id="lora_hf_id",
-                            placeholder="Select 'None' in the Standlone LoRA weights dropdown on the left if you want to use a standalone HuggingFace model ID for LoRA here e.g: sayakpaul/sd-model-finetuned-lora-t4",
-                            value="",
-                            label="HuggingFace Model ID",
-                            lines=3,
-                        )
-                with gr.Accordion(label="Advanced Options", open=False):
-                    with gr.Row():
-                        scheduler = gr.Dropdown(
-                            elem_id="scheduler",
-                            label="Scheduler",
-                            value=args.scheduler,
-                            choices=scheduler_list_txt2img,
-                        )
-                        with gr.Group():
-                            save_metadata_to_png = gr.Checkbox(
-                                label="Save prompt information to PNG",
-                                value=args.write_metadata_to_png,
-                                interactive=True,
-                            )
-                            save_metadata_to_json = gr.Checkbox(
-                                label="Save prompt information to JSON file",
-                                value=args.save_metadata_to_json,
-                                interactive=True,
-                            )
-                    with gr.Row():
-                        height = gr.Slider(
-                            384, 768, value=args.height, step=8, label="Height"
-                        )
-                        width = gr.Slider(
-                            384, 768, value=args.width, step=8, label="Width"
-                        )
-                        precision = gr.Radio(
-                            label="Precision",
-                            value=args.precision,
-                            choices=[
-                                "fp16",
-                                "fp32",
-                            ],
-                            visible=False,
-                        )
-                        max_length = gr.Radio(
-                            label="Max Length",
-                            value=args.max_length,
-                            choices=[
-                                64,
-                                77,
-                            ],
-                            visible=False,
-                        )
-                    with gr.Row():
-                        steps = gr.Slider(
-                            1, 100, value=args.steps, step=1, label="Steps"
-                        )
-                        guidance_scale = gr.Slider(
-                            0,
-                            50,
-                            value=args.guidance_scale,
-                            step=0.1,
-                            label="CFG Scale",
-                        )
-                    with gr.Row():
-                        batch_count = gr.Slider(
-                            1,
-                            100,
-                            value=args.batch_count,
-                            step=1,
-                            label="Batch Count",
-                            interactive=True,
-                        )
-                        batch_size = gr.Slider(
-                            1,
-                            4,
-                            value=args.batch_size,
-                            step=1,
-                            label="Batch Size",
-                            interactive=True,
-                        )
-                with gr.Row():
-                    seed = gr.Number(
-                        value=args.seed, precision=0, label="Seed"
-                    )
-                    device = gr.Dropdown(
-                        elem_id="device",
-                        label="Device",
-                        value=available_devices[0],
-                        choices=available_devices,
-                    )
-                with gr.Row():
-                    with gr.Column(scale=2):
-                        random_seed = gr.Button("Randomize Seed")
-                        random_seed.click(
-                            None,
-                            inputs=[],
-                            outputs=[seed],
-                            _js="() => -1",
-                        )
-                    with gr.Column(scale=6):
-                        stable_diffusion = gr.Button("Generate Image(s)")
-                    with gr.Column(scale=1, min_width=150):
-                        clear_queue = gr.Button("Clear Queue")
-
-                with gr.Accordion(label="Prompt Examples!", open=False):
-                    ex = gr.Examples(
-                        examples=prompt_examples,
-                        inputs=prompt,
-                        cache_examples=False,
-                        elem_id="prompt_examples",
-                    )
-
-            with gr.Column(scale=1, min_width=600):
-                with gr.Group():
-                    txt2img_gallery = gr.Gallery(
-                        label="Generated images",
-                        show_label=False,
-                        elem_id="gallery",
-                    ).style(grid=[2])
-                    std_output = gr.Textbox(
-                        value="Nothing to show.",
-                        lines=1,
-                        show_label=False,
-                    )
-                output_dir = args.output_dir if args.output_dir else Path.cwd()
-                output_dir = Path(output_dir, "generated_imgs")
-                output_loc = gr.Textbox(
-                    label="Saving Images at",
-                    value=output_dir,
-                    interactive=False,
-                )
-                with gr.Row():
-                    txt2img_sendto_img2img = gr.Button(value="SendTo Img2Img")
-                    txt2img_sendto_inpaint = gr.Button(value="SendTo Inpaint")
-                    txt2img_sendto_outpaint = gr.Button(
-                        value="SendTo Outpaint"
-                    )
-                    txt2img_sendto_upscaler = gr.Button(
-                        value="SendTo Upscaler"
-                    )
-
-        kwargs = dict(
-            fn=txt2img_inf,
-            inputs=[
-                prompt,
-                negative_prompt,
-                height,
-                width,
-                steps,
-                guidance_scale,
-                seed,
-                batch_count,
-                batch_size,
-                scheduler,
-                custom_model,
-                hf_model_id,
-                precision,
-                device,
-                max_length,
-                save_metadata_to_json,
-                save_metadata_to_png,
-                lora_weights,
-                lora_hf_id,
-            ],
-            outputs=[txt2img_gallery, std_output],
-            show_progress=args.progress_bar,
-        )
-
-        prompt_submit = prompt.submit(**kwargs)
-        neg_prompt_submit = negative_prompt.submit(**kwargs)
-        generate_click = stable_diffusion.click(**kwargs)
-        clear_queue.click(
-            fn=None, cancels=[prompt_submit, neg_prompt_submit, generate_click]
-        )
-
-        from apps.stable_diffusion.web.utils.png_metadata import (
-            import_png_metadata,
-        )
-
-        png_info_img.change(
-            fn=import_png_metadata,
-            inputs=[
-                png_info_img,
-            ],
-            outputs=[
-                png_info_img,
-                prompt,
-                negative_prompt,
-                steps,
-                scheduler,
-                guidance_scale,
-                seed,
-                width,
-                height,
-                custom_model,
-                hf_model_id,
-            ],
-        )
--- a/apps/stable_diffusion/web/ui/upscaler_ui.py
+++ b/apps/stable_diffusion/web/ui/upscaler_ui.py
@@ -1,241 +0,0 @@
-from pathlib import Path
-import os
-import gradio as gr
-from PIL import Image
-from apps.stable_diffusion.scripts import upscaler_inf
-from apps.stable_diffusion.src import args
-from apps.stable_diffusion.web.ui.utils import (
-    available_devices,
-    nodlogo_loc,
-    get_custom_model_path,
-    get_custom_model_files,
-    scheduler_list,
-    predefined_upscaler_models,
-)
-
-
-with gr.Blocks(title="Upscaler") as upscaler_web:
-    with gr.Row(elem_id="ui_title"):
-        nod_logo = Image.open(nodlogo_loc)
-        with gr.Row():
-            with gr.Column(scale=1, elem_id="demo_title_outer"):
-                gr.Image(
-                    value=nod_logo,
-                    show_label=False,
-                    interactive=False,
-                    elem_id="top_logo",
-                ).style(width=150, height=50)
-    with gr.Row(elem_id="ui_body"):
-        with gr.Row():
-            with gr.Column(scale=1, min_width=600):
-                with gr.Row():
-                    custom_model = gr.Dropdown(
-                        label=f"Models (Custom Model path: {get_custom_model_path()})",
-                        elem_id="custom_model",
-                        value=os.path.basename(args.ckpt_loc)
-                        if args.ckpt_loc
-                        else "None",
-                        choices=["None"]
-                        + get_custom_model_files()
-                        + predefined_upscaler_models,
-                    )
-                    hf_model_id = gr.Textbox(
-                        elem_id="hf_model_id",
-                        placeholder="Select 'None' in the Models dropdown on the left and enter model ID here e.g: SG161222/Realistic_Vision_V1.3",
-                        value="",
-                        label="HuggingFace Model ID",
-                        lines=3,
-                    )
-
-                with gr.Group(elem_id="prompt_box_outer"):
-                    prompt = gr.Textbox(
-                        label="Prompt",
-                        value=args.prompts[0],
-                        lines=1,
-                        elem_id="prompt_box",
-                    )
-                    negative_prompt = gr.Textbox(
-                        label="Negative Prompt",
-                        value=args.negative_prompts[0],
-                        lines=1,
-                        elem_id="negative_prompt_box",
-                    )
-
-                upscaler_init_image = gr.Image(
-                    label="Input Image", type="pil"
-                ).style(height=300)
-
-                with gr.Accordion(label="Advanced Options", open=False):
-                    with gr.Row():
-                        scheduler = gr.Dropdown(
-                            elem_id="scheduler",
-                            label="Scheduler",
-                            value="DDIM",
-                            choices=scheduler_list,
-                        )
-                        with gr.Group():
-                            save_metadata_to_png = gr.Checkbox(
-                                label="Save prompt information to PNG",
-                                value=args.write_metadata_to_png,
-                                interactive=True,
-                            )
-                            save_metadata_to_json = gr.Checkbox(
-                                label="Save prompt information to JSON file",
-                                value=args.save_metadata_to_json,
-                                interactive=True,
-                            )
-                    with gr.Row():
-                        height = gr.Slider(
-                            128,
-                            512,
-                            value=128,
-                            step=128,
-                            label="Height",
-                            interactive=False,
-                        )
-                        width = gr.Slider(
-                            128,
-                            512,
-                            value=128,
-                            step=128,
-                            label="Width",
-                            interactive=False,
-                        )
-                        precision = gr.Radio(
-                            label="Precision",
-                            value=args.precision,
-                            choices=[
-                                "fp16",
-                                "fp32",
-                            ],
-                            visible=True,
-                            interactive=False,
-                        )
-                        max_length = gr.Radio(
-                            label="Max Length",
-                            value=args.max_length,
-                            choices=[
-                                64,
-                                77,
-                            ],
-                            visible=False,
-                        )
-                    with gr.Row():
-                        steps = gr.Slider(
-                            1, 100, value=args.steps, step=1, label="Steps"
-                        )
-                        noise_level = gr.Slider(
-                            0,
-                            100,
-                            value=args.noise_level,
-                            step=1,
-                            label="Noise Level",
-                        )
-                    with gr.Row():
-                        guidance_scale = gr.Slider(
-                            0,
-                            50,
-                            value=args.guidance_scale,
-                            step=0.1,
-                            label="CFG Scale",
-                        )
-                        batch_count = gr.Slider(
-                            1,
-                            100,
-                            value=args.batch_count,
-                            step=1,
-                            label="Batch Count",
-                            interactive=True,
-                        )
-                        batch_size = gr.Slider(
-                            1,
-                            4,
-                            value=args.batch_size,
-                            step=1,
-                            label="Batch Size",
-                            interactive=False,
-                            visible=False,
-                        )
-                with gr.Row():
-                    seed = gr.Number(
-                        value=args.seed, precision=0, label="Seed"
-                    )
-                    device = gr.Dropdown(
-                        elem_id="device",
-                        label="Device",
-                        value=available_devices[0],
-                        choices=available_devices,
-                    )
-                with gr.Row():
-                    with gr.Column(scale=2):
-                        random_seed = gr.Button("Randomize Seed")
-                        random_seed.click(
-                            None,
-                            inputs=[],
-                            outputs=[seed],
-                            _js="() => -1",
-                        )
-                    with gr.Column(scale=6):
-                        stable_diffusion = gr.Button("Generate Image(s)")
-                    with gr.Column(scale=1, min_width=150):
-                        clear_queue = gr.Button("Clear Queue")
-
-            with gr.Column(scale=1, min_width=600):
-                with gr.Group():
-                    upscaler_gallery = gr.Gallery(
-                        label="Generated images",
-                        show_label=False,
-                        elem_id="gallery",
-                    ).style(grid=[2])
-                    std_output = gr.Textbox(
-                        value="Nothing to show.",
-                        lines=1,
-                        show_label=False,
-                    )
-                output_dir = args.output_dir if args.output_dir else Path.cwd()
-                output_dir = Path(output_dir, "generated_imgs")
-                output_loc = gr.Textbox(
-                    label="Saving Images at",
-                    value=output_dir,
-                    interactive=False,
-                )
-                with gr.Row():
-                    upscaler_sendto_img2img = gr.Button(value="SendTo Img2Img")
-                    upscaler_sendto_inpaint = gr.Button(value="SendTo Inpaint")
-                    upscaler_sendto_outpaint = gr.Button(
-                        value="SendTo Outpaint"
-                    )
-
-        kwargs = dict(
-            fn=upscaler_inf,
-            inputs=[
-                prompt,
-                negative_prompt,
-                upscaler_init_image,
-                height,
-                width,
-                steps,
-                noise_level,
-                guidance_scale,
-                seed,
-                batch_count,
-                batch_size,
-                scheduler,
-                custom_model,
-                hf_model_id,
-                precision,
-                device,
-                max_length,
-                save_metadata_to_json,
-                save_metadata_to_png,
-            ],
-            outputs=[upscaler_gallery, std_output],
-            show_progress=args.progress_bar,
-        )
-
-        prompt_submit = prompt.submit(**kwargs)
-        neg_prompt_submit = negative_prompt.submit(**kwargs)
-        generate_click = stable_diffusion.click(**kwargs)
-        clear_queue.click(
-            fn=None, cancels=[prompt_submit, neg_prompt_submit, generate_click]
-        )
--- a/apps/stable_diffusion/web/ui/utils.py
+++ b/apps/stable_diffusion/web/ui/utils.py
@@ -1,93 +0,0 @@
-import os
-import sys
-from apps.stable_diffusion.src import get_available_devices
-import glob
-from pathlib import Path
-from apps.stable_diffusion.src import args
-from dataclasses import dataclass
-
-
-@dataclass
-class Config:
-    mode: str
-    model_id: str
-    ckpt_loc: str
-    precision: str
-    batch_size: int
-    max_length: int
-    height: int
-    width: int
-    device: str
-    use_lora: str
-    use_stencil: str
-
-
-custom_model_filetypes = (
-    "*.ckpt",
-    "*.safetensors",
-)  # the tuple of file types
-
-scheduler_list = [
-    "DDIM",
-    "PNDM",
-    "DPMSolverMultistep",
-    "EulerAncestralDiscrete",
-]
-scheduler_list_txt2img = [
-    "DDIM",
-    "PNDM",
-    "LMSDiscrete",
-    "KDPM2Discrete",
-    "DPMSolverMultistep",
-    "EulerDiscrete",
-    "EulerAncestralDiscrete",
-    "SharkEulerDiscrete",
-]
-
-predefined_models = [
-    "Linaqruf/anything-v3.0",
-    "prompthero/openjourney",
-    "wavymulder/Analog-Diffusion",
-    "stabilityai/stable-diffusion-2-1",
-    "stabilityai/stable-diffusion-2-1-base",
-    "CompVis/stable-diffusion-v1-4",
-]
-
-predefined_paint_models = [
-    "runwayml/stable-diffusion-inpainting",
-    "stabilityai/stable-diffusion-2-inpainting",
-]
-predefined_upscaler_models = [
-    "stabilityai/stable-diffusion-x4-upscaler",
-]
-
-
-def resource_path(relative_path):
-    """Get absolute path to resource, works for dev and for PyInstaller"""
-    base_path = getattr(
-        sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))
-    )
-    return os.path.join(base_path, relative_path)
-
-
-def get_custom_model_path():
-    return Path(args.ckpt_dir) if args.ckpt_dir else Path(Path.cwd(), "models")
-
-
-def get_custom_model_pathfile(custom_model_name):
-    return os.path.join(get_custom_model_path(), custom_model_name)
-
-
-def get_custom_model_files():
-    ckpt_files = []
-    for extn in custom_model_filetypes:
-        files = [
-            os.path.basename(x)
-            for x in glob.glob(os.path.join(get_custom_model_path(), extn))
-        ]
-        ckpt_files.extend(files)
-    return sorted(ckpt_files, key=str.casefold)
-
-
-nodlogo_loc = resource_path("logos/nod-logo.png")
-available_devices = get_available_devices()
--- a/apps/stable_diffusion/web/utils/global_obj.py
+++ b/apps/stable_diffusion/web/utils/global_obj.py
@@ -1,46 +0,0 @@
-import gc
-
-
-"""
-The global objects include SD pipeline and config.
-Maintaining the global objects would avoid creating extra pipeline objects when switching modes.
-Also we could avoid memory leak when switching models by clearing the cache.
-"""
-
-
-def init():
-    global sd_obj
-    global config_obj
-    sd_obj = None
-    config_obj = None
-
-
-def set_sd_obj(value):
-    global sd_obj
-    sd_obj = value
-
-
-def set_cfg_obj(value):
-    global config_obj
-    config_obj = value
-
-
-def set_schedulers(value):
-    global sd_obj
-    sd_obj.scheduler = value
-
-
-def get_sd_obj():
-    return sd_obj
-
-
-def get_cfg_obj():
-    return config_obj
-
-
-def clear_cache():
-    global sd_obj
-    global config_obj
-    del sd_obj
-    del config_obj
-    gc.collect()
--- a/apps/stable_diffusion/web/utils/gradio_configs.py
+++ b/apps/stable_diffusion/web/utils/gradio_configs.py
@@ -1,31 +0,0 @@
-import os
-import tempfile
-import gradio
-from os import listdir
-
-gradio_tmp_imgs_folder = os.path.join(os.getcwd(), "shark_tmp/")
-
-
-# Clear all gradio tmp images
-def clear_gradio_tmp_imgs_folder():
-    if not os.path.exists(gradio_tmp_imgs_folder):
-        return
-    for fileName in listdir(gradio_tmp_imgs_folder):
-        # Delete tmp png files
-        if fileName.startswith("tmp") and fileName.endswith(".png"):
-            os.remove(gradio_tmp_imgs_folder + fileName)
-
-
-# Overwrite save_pil_to_file from gradio to save tmp images generated by gradio into our own tmp folder
-def save_pil_to_file(pil_image, dir=None):
-    if not os.path.exists(gradio_tmp_imgs_folder):
-        os.mkdir(gradio_tmp_imgs_folder)
-    file_obj = tempfile.NamedTemporaryFile(
-        delete=False, suffix=".png", dir=gradio_tmp_imgs_folder
-    )
-    pil_image.save(file_obj)
-    return file_obj
-
-
-# Register save_pil_to_file override
-gradio.processing_utils.save_pil_to_file = save_pil_to_file
--- a/apps/stable_diffusion/web/utils/png_metadata.py
+++ b/apps/stable_diffusion/web/utils/png_metadata.py
@@ -1,148 +0,0 @@
-import re
-from pathlib import Path
-from apps.stable_diffusion.web.ui.txt2img_ui import (
-    png_info_img,
-    prompt,
-    negative_prompt,
-    steps,
-    scheduler,
-    guidance_scale,
-    seed,
-    width,
-    height,
-    custom_model,
-    hf_model_id,
-)
-from apps.stable_diffusion.web.ui.utils import (
-    get_custom_model_pathfile,
-    scheduler_list_txt2img,
-    predefined_models,
-)
-
-re_param_code = r'\s*([\w ]+):\s*("(?:\\"[^,]|\\"|\\|[^\"])+"|[^,]*)(?:,|$)'
-re_param = re.compile(re_param_code)
-re_imagesize = re.compile(r"^(\d+)x(\d+)$")
-
-
-def parse_generation_parameters(x: str):
-    res = {}
-    prompt = ""
-    negative_prompt = ""
-    done_with_prompt = False
-
-    *lines, lastline = x.strip().split("\n")
-    if len(re_param.findall(lastline)) < 3:
-        lines.append(lastline)
-        lastline = ""
-
-    for i, line in enumerate(lines):
-        line = line.strip()
-        if line.startswith("Negative prompt:"):
-            done_with_prompt = True
-            line = line[16:].strip()
-
-        if done_with_prompt:
-            negative_prompt += ("" if negative_prompt == "" else "\n") + line
-        else:
-            prompt += ("" if prompt == "" else "\n") + line
-
-    res["Prompt"] = prompt
-    res["Negative prompt"] = negative_prompt
-
-    for k, v in re_param.findall(lastline):
-        v = v[1:-1] if v[0] == '"' and v[-1] == '"' else v
-        m = re_imagesize.match(v)
-        if m is not None:
-            res[k + "-1"] = m.group(1)
-            res[k + "-2"] = m.group(2)
-        else:
-            res[k] = v
-
-    # Missing CLIP skip means it was set to 1 (the default)
-    if "Clip skip" not in res:
-        res["Clip skip"] = "1"
-
-    hypernet = res.get("Hypernet", None)
-    if hypernet is not None:
-        res[
-            "Prompt"
-        ] += f"""<hypernet:{hypernet}:{res.get("Hypernet strength", "1.0")}>"""
-
-    if "Hires resize-1" not in res:
-        res["Hires resize-1"] = 0
-        res["Hires resize-2"] = 0
-
-    return res
-
-
-def import_png_metadata(pil_data):
-    try:
-        png_info = pil_data.info["parameters"]
-        metadata = parse_generation_parameters(png_info)
-        png_hf_model_id = ""
-        png_custom_model = ""
-
-        if "Model" in metadata:
-            # Remove extension from model info
-            if metadata["Model"].endswith(".safetensors") or metadata[
-                "Model"
-            ].endswith(".ckpt"):
-                metadata["Model"] = Path(metadata["Model"]).stem
-            # Check for the model name match with one of the local ckpt or safetensors files
-            if Path(
-                get_custom_model_pathfile(metadata["Model"] + ".ckpt")
-            ).is_file():
-                png_custom_model = metadata["Model"] + ".ckpt"
-            if Path(
-                get_custom_model_pathfile(metadata["Model"] + ".safetensors")
-            ).is_file():
-                png_custom_model = metadata["Model"] + ".safetensors"
-            # Check for a model match with one of the default model list (ex: "Linaqruf/anything-v3.0")
-            if metadata["Model"] in predefined_models:
-                png_custom_model = metadata["Model"]
-            # If nothing had matched, check vendor/hf_model_id
-            if not png_custom_model and metadata["Model"].count("/"):
-                png_hf_model_id = metadata["Model"]
-            # No matching model was found
-            if not png_custom_model and not png_hf_model_id:
-                print(
-                    "Import PNG info: Unable to find a matching model for %s"
-                    % metadata["Model"]
-                )
-
-        outputs = {
-            png_info_img: None,
-            negative_prompt: metadata["Negative prompt"],
-            steps: int(metadata["Steps"]),
-            guidance_scale: float(metadata["CFG scale"]),
-            seed: int(metadata["Seed"]),
-            width: float(metadata["Size-1"]),
-            height: float(metadata["Size-2"]),
-        }
-        if "Model" in metadata and png_custom_model:
-            outputs[custom_model] = png_custom_model
-            outputs[hf_model_id] = ""
-        if "Model" in metadata and png_hf_model_id:
-            outputs[custom_model] = "None"
-            outputs[hf_model_id] = png_hf_model_id
-        if "Prompt" in metadata:
-            outputs[prompt] = metadata["Prompt"]
-        if "Sampler" in metadata:
-            if metadata["Sampler"] in scheduler_list_txt2img:
-                outputs[scheduler] = metadata["Sampler"]
-            else:
-                print(
-                    "Import PNG info: Unable to find a scheduler for %s"
-                    % metadata["Sampler"]
-                )
-
-        return outputs
-
-    except Exception as ex:
-        if pil_data and pil_data.info.get("parameters"):
-            print("import_png_metadata failed with %s" % ex)
-        pass
-
-    return {
-        png_info_img: None,
-    }
--- a/benchmarks/tests/test_benchmark.py
+++ b/benchmarks/tests/test_benchmark.py
@@ -42,7 +42,7 @@ class TFHuggingFaceLanguage(tf.Module):
            input_ids=x, attention_mask=y, token_type_ids=z, training=False
        )

-    @tf.function(input_signature=tf_bert_input, jit_compile=True)
+    @tf.function(input_signature=tf_bert_input)
    def forward(self, input_ids, attention_mask, token_type_ids):
        return self.m.predict(input_ids, attention_mask, token_type_ids)

--- a/build_tools/image_comparison.py
+++ b/build_tools/image_comparison.py
@@ -1,51 +0,0 @@
-import argparse
-from PIL import Image
-import numpy as np
-
-import requests
-import shutil
-import os
-import subprocess
-
-parser = argparse.ArgumentParser()
-
-parser.add_argument("-n", "--newfile")
-parser.add_argument(
-    "-g",
-    "--golden_url",
-    default="https://storage.googleapis.com/shark_tank/testdata/cyberpunk_fores_42_0_230119_021148.png",
-)
-
-
-def get_image(url, local_filename):
-    res = requests.get(url, stream=True)
-    if res.status_code == 200:
-        with open(local_filename, "wb") as f:
-            shutil.copyfileobj(res.raw, f)
-
-
-def compare_images(new_filename, golden_filename):
-    new = np.array(Image.open(new_filename)) / 255.0
-    golden = np.array(Image.open(golden_filename)) / 255.0
-    diff = np.abs(new - golden)
-    mean = np.mean(diff)
-    if mean > 0.1:
-        if os.name != "nt":
-            subprocess.run(
-                [
-                    "gsutil",
-                    "cp",
-                    new_filename,
-                    "gs://shark_tank/testdata/builder/",
-                ]
-            )
-        raise SystemExit("new and golden not close")
-    else:
-        print("SUCCESS")
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-    tempfile_name = os.path.join(os.getcwd(), "golden.png")
-    get_image(args.golden_url, tempfile_name)
-    compare_images(args.newfile, tempfile_name)
--- a/build_tools/populate_sharktank_ci.sh
+++ b/build_tools/populate_sharktank_ci.sh
@@ -1,5 +1,5 @@
 #!/bin/bash

-IMPORTER=1 BENCHMARK=1 ./setup_venv.sh
+IMPORTER=1 ./setup_venv.sh
 source $GITHUB_WORKSPACE/shark.venv/bin/activate
-python generate_sharktank.py
+python generate_sharktank.py --upload=False --ci_tank_dir=True
--- a/build_tools/stable_diffusion_testing.py
+++ b/build_tools/stable_diffusion_testing.py
@@ -1,209 +0,0 @@
-import os
-from sys import executable
-import subprocess
-from apps.stable_diffusion.src.utils.resources import (
-    get_json_file,
-)
-from datetime import datetime as dt
-from shark.shark_downloader import download_public_file
-from image_comparison import compare_images
-import argparse
-from glob import glob
-import shutil
-import requests
-
-model_config_dicts = get_json_file(
-    os.path.join(
-        os.getcwd(),
-        "apps/stable_diffusion/src/utils/resources/model_config.json",
-    )
-)
-
-
-def parse_sd_out(filename, command, device, use_tune, model_name, import_mlir):
-    with open(filename, "r+") as f:
-        lines = f.readlines()
-    metrics = {}
-    vals_to_read = [
-        "Clip Inference time",
-        "Average step",
-        "VAE Inference time",
-        "Total image generation",
-    ]
-    for line in lines:
-        for val in vals_to_read:
-            if val in line:
-                metrics[val] = line.split(" ")[-1].strip("\n")
-
-    metrics["Average step"] = metrics["Average step"].strip("ms/it")
-    metrics["Total image generation"] = metrics[
-        "Total image generation"
-    ].strip("sec")
-    metrics["device"] = device
-    metrics["use_tune"] = use_tune
-    metrics["model_name"] = model_name
-    metrics["import_mlir"] = import_mlir
-    metrics["command"] = command
-    return metrics
-
-
-def get_inpaint_inputs():
-    os.mkdir("./test_images/inputs")
-    img_url = (
-        "https://huggingface.co/datasets/diffusers/test-arrays/resolve"
-        "/main/stable_diffusion_inpaint/input_bench_image.png"
-    )
-    mask_url = (
-        "https://huggingface.co/datasets/diffusers/test-arrays/resolve"
-        "/main/stable_diffusion_inpaint/input_bench_mask.png"
-    )
-    img = requests.get(img_url)
-    mask = requests.get(mask_url)
-    open("./test_images/inputs/image.png", "wb").write(img.content)
-    open("./test_images/inputs/mask.png", "wb").write(mask.content)
-
-
-def test_loop(device="vulkan", beta=False, extra_flags=[]):
-    # Get golden values from tank
-    shutil.rmtree("./test_images", ignore_errors=True)
-    model_metrics = []
-    os.mkdir("./test_images")
-    os.mkdir("./test_images/golden")
-    get_inpaint_inputs()
-    hf_model_names = model_config_dicts[0].values()
-    tuned_options = ["--no-use_tuned", "--use_tuned"]
-    import_options = ["--import_mlir", "--no-import_mlir"]
-    prompt_text = "--prompt=cyberpunk forest by Salvador Dali"
-    inpaint_prompt_text = "--prompt=Face of a yellow cat, high resolution, sitting on a park bench"
-    if os.name == "nt":
-        prompt_text = '--prompt="cyberpunk forest by Salvador Dali"'
-        inpaint_prompt_text = '--prompt="Face of a yellow cat, high resolution, sitting on a park bench"'
-    if beta:
-        extra_flags.append("--beta_models=True")
-    extra_flags.append("--no-progress_bar")
-    to_skip = [
-        "Linaqruf/anything-v3.0",
-        "prompthero/openjourney",
-        "wavymulder/Analog-Diffusion",
-        "dreamlike-art/dreamlike-diffusion-1.0",
-    ]
-    for import_opt in import_options:
-        for model_name in hf_model_names:
-            if model_name in to_skip:
-                continue
-            for use_tune in tuned_options:
-                command = (
-                    [
-                        executable,  # executable is the python from the venv used to run this
-                        "apps/stable_diffusion/scripts/txt2img.py",
-                        "--device=" + device,
-                        prompt_text,
-                        "--negative_prompts=" + '""',
-                        "--seed=42",
-                        import_opt,
-                        "--output_dir="
-                        + os.path.join(os.getcwd(), "test_images", model_name),
-                        "--hf_model_id=" + model_name,
-                        use_tune,
-                    ]
-                    if "inpainting" not in model_name
-                    else [
-                        executable,
-                        "apps/stable_diffusion/scripts/inpaint.py",
-                        "--device=" + device,
-                        inpaint_prompt_text,
-                        "--negative_prompts=" + '""',
-                        "--img_path=./test_images/inputs/image.png",
-                        "--mask_path=./test_images/inputs/mask.png",
-                        "--seed=42",
-                        "--import_mlir",
-                        "--output_dir="
-                        + os.path.join(os.getcwd(), "test_images", model_name),
-                        "--hf_model_id=" + model_name,
-                        use_tune,
-                    ]
-                )
-                command += extra_flags
-                if os.name == "nt":
-                    command = " ".join(command)
-                dumpfile_name = "_".join(model_name.split("/")) + ".txt"
-                dumpfile_name = os.path.join(os.getcwd(), dumpfile_name)
-                with open(dumpfile_name, "w+") as f:
-                    generated_image = not subprocess.call(
-                        command,
-                        stdout=f,
-                        stderr=f,
-                    )
-                if os.name != "nt":
-                    command = " ".join(command)
-                if generated_image:
-                    model_metrics.append(
-                        parse_sd_out(
-                            dumpfile_name,
-                            command,
-                            device,
-                            use_tune,
-                            model_name,
-                            import_opt,
-                        )
-                    )
-                    print(command)
-                    print("Successfully generated image")
-                    os.makedirs(
-                        "./test_images/golden/" + model_name, exist_ok=True
-                    )
-                    download_public_file(
-                        "gs://shark_tank/testdata/golden/" + model_name,
-                        "./test_images/golden/" + model_name,
-                    )
-                    test_file_path = os.path.join(
-                        os.getcwd(),
-                        "test_images",
-                        model_name,
-                        "generated_imgs",
-                        dt.now().strftime("%Y%m%d"),
-                        "*.png",
-                    )
-                    test_file = glob(test_file_path)[0]
-
-                    golden_path = (
-                        "./test_images/golden/" + model_name + "/*.png"
-                    )
-                    golden_file = glob(golden_path)[0]
-                    compare_images(test_file, golden_file)
-                else:
-                    print(command)
-                    print("failed to generate image for this configuration")
-                    if "2_1_base" in model_name:
-                        print("failed a known successful model.")
-                        exit(1)
-    with open(os.path.join(os.getcwd(), "sd_testing_metrics.csv"), "w+") as f:
-        header = "model_name;device;use_tune;import_opt;Clip Inference time(ms);Average Step (ms/it);VAE Inference time(ms);total image generation(s);command\n"
-        f.write(header)
-        for metric in model_metrics:
-            output = [
-                metric["model_name"],
-                metric["device"],
-                metric["use_tune"],
-                metric["import_mlir"],
-                metric["Clip Inference time"],
-                metric["Average step"],
-                metric["VAE Inference time"],
-                metric["Total image generation"],
-                metric["command"],
-            ]
-            f.write(";".join(output) + "\n")
-
-
-parser = argparse.ArgumentParser()
-
-parser.add_argument("-d", "--device", default="vulkan")
-parser.add_argument(
-    "-b", "--beta", action=argparse.BooleanOptionalAction, default=False
-)
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-    print(args)
-    test_loop(args.device, args.beta, [])
--- a/conftest.py
+++ b/conftest.py
@@ -60,13 +60,3 @@ def pytest_addoption(parser):
        default="gs://shark_tank/latest",
        help="URL to bucket from which to download SHARK tank artifacts. Default is gs://shark_tank/latest",
    )
-    parser.addoption(
-        "--benchmark_dispatches",
-        default=None,
-        help="Benchmark individual dispatch kernels produced by IREE compiler. Use 'All' for all, or specific dispatches e.g. '0 1 2 10'",
-    )
-    parser.addoption(
-        "--dispatch_benchmarks_dir",
-        default="./temp_dispatch_benchmarks",
-        help="Directory in which dispatch benchmarks are saved.",
-    )
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -40,7 +40,7 @@ cmake --build build/
 *Prepare the model*
 ```bash
 wget https://storage.googleapis.com/shark_tank/latest/resnet50_tf/resnet50_tf.mlir
-iree-compile --iree-input-type=mhlo --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --iree-llvmcpu-embedded-linker-path=`python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])'`/iree/compiler/tools/../_mlir_libs/iree-lld --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=ist/core-reproducer.mlir --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 resnet50_tf.mlir -o resnet50_tf.vmfb
+iree-compile --iree-input-type=mhlo --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --iree-llvm-embedded-linker-path=`python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])'`/iree/compiler/tools/../_mlir_libs/iree-lld --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=ist/core-reproducer.mlir --iree-llvm-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 resnet50_tf.mlir -o resnet50_tf.vmfb
 ```
 *Prepare the input*

@@ -65,18 +65,18 @@ A tool for benchmarking other models is built and can be invoked with a command
 see `./build/vulkan_gui/iree-vulkan-gui --help` for an explanation on the function input. For example, stable diffusion unet can be tested with the following commands:
 ```bash
 wget https://storage.googleapis.com/shark_tank/quinn/stable_diff_tf/stable_diff_tf.mlir
-iree-compile --iree-input-type=mhlo --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 stable_diff_tf.mlir -o stable_diff_tf.vmfb
+iree-compile --iree-input-type=mhlo --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvm-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 stable_diff_tf.mlir -o stable_diff_tf.vmfb
 ./build/vulkan_gui/iree-vulkan-gui --module-file=stable_diff_tf.vmfb --function_input=2x4x64x64xf32 --function_input=1xf32 --function_input=2x77x768xf32
 ```
 VAE and Autoencoder are also available
 ```bash
 # VAE
 wget https://storage.googleapis.com/shark_tank/quinn/stable_diff_tf/vae_tf/vae.mlir
-iree-compile --iree-input-type=mhlo --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 vae.mlir -o vae.vmfb
+iree-compile --iree-input-type=mhlo --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvm-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 vae.mlir -o vae.vmfb
 ./build/vulkan_gui/iree-vulkan-gui --module-file=stable_diff_tf.vmfb --function_input=1x4x64x64xf32

 # CLIP Autoencoder
 wget https://storage.googleapis.com/shark_tank/quinn/stable_diff_tf/clip_tf/clip_autoencoder.mlir
-iree-compile --iree-input-type=mhlo --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 clip_autoencoder.mlir -o clip_autoencoder.vmfb
+iree-compile --iree-input-type=mhlo --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvm-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 clip_autoencoder.mlir -o clip_autoencoder.vmfb
 ./build/vulkan_gui/iree-vulkan-gui --module-file=stable_diff_tf.vmfb --function_input=1x77xi32 --function_input=1x77xi32
 ```
--- a/cpp/save_img.py
+++ b/cpp/save_img.py
@@ -1,6 +1,7 @@
 import numpy as np
 import tensorflow as tf
 from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model


 def load_and_preprocess_image(fname: str):
--- a/dataset/README.md
+++ b/dataset/README.md
@@ -1,27 +0,0 @@
-# Dataset annotation tool
-
-SHARK annotator for adding or modifying prompts of dataset images
-
-## Set up
-
-Activate SHARK Python virtual environment and install additional packages
-```shell
-source ../shark.venv/bin/activate
-pip install -r requirements.txt
-```
-
-## Run annotator
-
-```shell
-python annotation_tool.py
-```
-
-<img width="1280" alt="annotator" src="https://user-images.githubusercontent.com/49575973/214521137-7ef6ae10-7cd8-46e6-b270-b6c0445157f1.png">
-
-* Select a dataset from `Dataset` dropdown list
-* Select an image from `Image` dropdown list
-* Image and the existing prompt will be loaded
-* Select a prompt from `Prompt` dropdown list to modify or "Add new" to add a prompt
-* Click `Save` to save changes, click `Delete` to delete prompt
-* Click `Back` or `Next` to switch image, you could also select other images from `Image`
-* Click `Finish` when finishing annotation or before switching dataset
--- a/dataset/annotation_tool.py
+++ b/dataset/annotation_tool.py
@@ -1,247 +0,0 @@
-import gradio as gr
-import json
-import jsonlines
-import os
-from args import args
-from pathlib import Path
-from PIL import Image
-from utils import get_datasets
-
-
-shark_root = Path(__file__).parent.parent
-demo_css = shark_root.joinpath("web/demo.css").resolve()
-nodlogo_loc = shark_root.joinpath(
-    "web/models/stable_diffusion/logos/nod-logo.png"
-)
-
-
-with gr.Blocks(title="Dataset Annotation Tool", css=demo_css) as shark_web:
-    with gr.Row(elem_id="ui_title"):
-        nod_logo = Image.open(nodlogo_loc)
-        with gr.Column(scale=1, elem_id="demo_title_outer"):
-            gr.Image(
-                value=nod_logo,
-                show_label=False,
-                interactive=False,
-                elem_id="top_logo",
-            ).style(width=150, height=100)
-
-    datasets, images, ds_w_prompts = get_datasets(args.gs_url)
-    prompt_data = dict()
-
-    with gr.Row(elem_id="ui_body"):
-        # TODO: add multiselect dataset, there is a gradio version conflict
-        dataset = gr.Dropdown(label="Dataset", choices=datasets)
-        image_name = gr.Dropdown(label="Image", choices=[])
-
-    with gr.Row(elem_id="ui_body"):
-        # TODO: add ability to search image by typing
-        with gr.Column(scale=1, min_width=600):
-            image = gr.Image(type="filepath").style(height=512)
-
-        with gr.Column(scale=1, min_width=600):
-            prompts = gr.Dropdown(
-                label="Prompts",
-                choices=[],
-            )
-            prompt = gr.Textbox(
-                label="Editor",
-                lines=3,
-            )
-            with gr.Row():
-                save = gr.Button("Save")
-                delete = gr.Button("Delete")
-            with gr.Row():
-                back_image = gr.Button("Back")
-                next_image = gr.Button("Next")
-            finish = gr.Button("Finish")
-
-    def filter_datasets(dataset):
-        if dataset is None:
-            return gr.Dropdown.update(value=None, choices=[])
-
-        # create the dataset dir if doesn't exist and download prompt file
-        dataset_path = str(shark_root) + "/dataset/" + dataset
-        if not os.path.exists(dataset_path):
-            os.mkdir(dataset_path)
-
-        # read prompt jsonlines file
-        prompt_data.clear()
-        if dataset in ds_w_prompts:
-            prompt_gs_path = args.gs_url + "/" + dataset + "/metadata.jsonl"
-            os.system(f'gsutil cp "{prompt_gs_path}" "{dataset_path}"/')
-            with jsonlines.open(dataset_path + "/metadata.jsonl") as reader:
-                for line in reader.iter(type=dict, skip_invalid=True):
-                    prompt_data[line["file_name"]] = (
-                        [line["text"]]
-                        if type(line["text"]) is str
-                        else line["text"]
-                    )
-
-        return gr.Dropdown.update(choices=images[dataset])
-
-    dataset.change(fn=filter_datasets, inputs=dataset, outputs=image_name)
-
-    def display_image(dataset, image_name):
-        if dataset is None or image_name is None:
-            return gr.Image.update(value=None), gr.Dropdown.update(value=None)
-
-        # download and load the image
-        img_gs_path = args.gs_url + "/" + dataset + "/" + image_name
-        img_sub_path = "/".join(image_name.split("/")[:-1])
-        img_dst_path = (
-            str(shark_root) + "/dataset/" + dataset + "/" + img_sub_path + "/"
-        )
-        if not os.path.exists(img_dst_path):
-            os.mkdir(img_dst_path)
-        os.system(f'gsutil cp "{img_gs_path}" "{img_dst_path}"')
-        img = Image.open(img_dst_path + image_name.split("/")[-1])
-
-        if image_name not in prompt_data.keys():
-            prompt_data[image_name] = []
-        prompt_choices = ["Add new"]
-        prompt_choices += prompt_data[image_name]
-        return gr.Image.update(value=img), gr.Dropdown.update(
-            choices=prompt_choices
-        )
-
-    image_name.change(
-        fn=display_image,
-        inputs=[dataset, image_name],
-        outputs=[image, prompts],
-    )
-
-    def edit_prompt(prompts):
-        if prompts == "Add new":
-            return gr.Textbox.update(value=None)
-
-        return gr.Textbox.update(value=prompts)
-
-    prompts.change(fn=edit_prompt, inputs=prompts, outputs=prompt)
-
-    def save_prompt(dataset, image_name, prompts, prompt):
-        if (
-            dataset is None
-            or image_name is None
-            or prompts is None
-            or prompt is None
-        ):
-            return
-
-        if prompts == "Add new":
-            prompt_data[image_name].append(prompt)
-        else:
-            idx = prompt_data[image_name].index(prompts)
-            prompt_data[image_name][idx] = prompt
-
-        prompt_path = (
-            str(shark_root) + "/dataset/" + dataset + "/metadata.jsonl"
-        )
-        # write prompt jsonlines file
-        with open(prompt_path, "w") as f:
-            for key, value in prompt_data.items():
-                if not value:
-                    continue
-                v = value if len(value) > 1 else value[0]
-                f.write(json.dumps({"file_name": key, "text": v}))
-                f.write("\n")
-
-        prompt_choices = ["Add new"]
-        prompt_choices += prompt_data[image_name]
-        return gr.Dropdown.update(choices=prompt_choices, value=None)
-
-    save.click(
-        fn=save_prompt,
-        inputs=[dataset, image_name, prompts, prompt],
-        outputs=prompts,
-    )
-
-    def delete_prompt(dataset, image_name, prompts):
-        if dataset is None or image_name is None or prompts is None:
-            return
-        if prompts == "Add new":
-            return
-
-        prompt_data[image_name].remove(prompts)
-        prompt_path = (
-            str(shark_root) + "/dataset/" + dataset + "/metadata.jsonl"
-        )
-        # write prompt jsonlines file
-        with open(prompt_path, "w") as f:
-            for key, value in prompt_data.items():
-                if not value:
-                    continue
-                v = value if len(value) > 1 else value[0]
-                f.write(json.dumps({"file_name": key, "text": v}))
-                f.write("\n")
-
-        prompt_choices = ["Add new"]
-        prompt_choices += prompt_data[image_name]
-        return gr.Dropdown.update(choices=prompt_choices, value=None)
-
-    delete.click(
-        fn=delete_prompt,
-        inputs=[dataset, image_name, prompts],
-        outputs=prompts,
-    )
-
-    def get_back_image(dataset, image_name):
-        if dataset is None or image_name is None:
-            return
-
-        # remove local image
-        img_path = str(shark_root) + "/dataset/" + dataset + "/" + image_name
-        os.system(f'rm "{img_path}"')
-        # get the index for the back image
-        idx = images[dataset].index(image_name)
-        if idx == 0:
-            return gr.Dropdown.update(value=None)
-
-        return gr.Dropdown.update(value=images[dataset][idx - 1])
-
-    back_image.click(
-        fn=get_back_image, inputs=[dataset, image_name], outputs=image_name
-    )
-
-    def get_next_image(dataset, image_name):
-        if dataset is None or image_name is None:
-            return
-
-        # remove local image
-        img_path = str(shark_root) + "/dataset/" + dataset + "/" + image_name
-        os.system(f'rm "{img_path}"')
-        # get the index for the next image
-        idx = images[dataset].index(image_name)
-        if idx == len(images[dataset]) - 1:
-            return gr.Dropdown.update(value=None)
-
-        return gr.Dropdown.update(value=images[dataset][idx + 1])
-
-    next_image.click(
-        fn=get_next_image, inputs=[dataset, image_name], outputs=image_name
-    )
-
-    def finish_annotation(dataset):
-        if dataset is None:
-            return
-
-        # upload prompt and remove local data
-        dataset_path = str(shark_root) + "/dataset/" + dataset
-        dataset_gs_path = args.gs_url + "/" + dataset + "/"
-        os.system(
-            f'gsutil cp "{dataset_path}/metadata.jsonl" "{dataset_gs_path}"'
-        )
-        os.system(f'rm -rf "{dataset_path}"')
-
-        return gr.Dropdown.update(value=None)
-
-    finish.click(fn=finish_annotation, inputs=dataset, outputs=dataset)
-
-
-if __name__ == "__main__":
-    shark_web.launch(
-        share=args.share,
-        inbrowser=True,
-        server_name="0.0.0.0",
-        server_port=args.server_port,
-    )
--- a/dataset/args.py
+++ b/dataset/args.py
@@ -1,34 +0,0 @@
-import argparse
-
-p = argparse.ArgumentParser(
-    description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
-)
-
-##############################################################################
-### Dataset Annotator flags
-##############################################################################
-
-p.add_argument(
-    "--gs_url",
-    type=str,
-    required=True,
-    help="URL to datasets in GS bucket",
-)
-
-p.add_argument(
-    "--share",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for generating a public URL",
-)
-
-p.add_argument(
-    "--server_port",
-    type=int,
-    default=8080,
-    help="flag for setting server port",
-)
-
-##############################################################################
-
-args = p.parse_args()
--- a/dataset/requirements.txt
+++ b/dataset/requirements.txt
@@ -1,3 +0,0 @@
-# SHARK Annotator
-gradio==3.15.0
-jsonlines
--- a/dataset/utils.py
+++ b/dataset/utils.py
@@ -1,29 +0,0 @@
-from google.cloud import storage
-
-
-def get_datasets(gs_url):
-    datasets = set()
-    images = dict()
-    ds_w_prompts = []
-
-    storage_client = storage.Client()
-    bucket_name = gs_url.split("/")[2]
-    source_blob_name = "/".join(gs_url.split("/")[3:])
-    blobs = storage_client.list_blobs(bucket_name, prefix=source_blob_name)
-
-    for blob in blobs:
-        dataset_name = blob.name.split("/")[1]
-        if dataset_name == "":
-            continue
-        datasets.add(dataset_name)
-        if dataset_name not in images.keys():
-            images[dataset_name] = []
-
-        # check if image or jsonl
-        file_sub_path = "/".join(blob.name.split("/")[2:])
-        if "/" in file_sub_path:
-            images[dataset_name] += [file_sub_path]
-        elif "metadata.jsonl" in file_sub_path:
-            ds_w_prompts.append(dataset_name)
-
-    return list(datasets), images, ds_w_prompts
--- a/docs/shark_iree_profiling.md
+++ b/docs/shark_iree_profiling.md
@@ -1,118 +0,0 @@
-# Overview
-
-This document is intended to provide a starting point for profiling with SHARK/IREE. At it's core
-[SHARK](https://github.com/nod-ai/SHARK/tree/main/tank) is a python API that links the MLIR lowerings from various
-frameworks + frontends (e.g. PyTorch -> Torch-MLIR) with the compiler + runtime offered by IREE. More information
-on model coverage and framework support can be found [here](https://github.com/nod-ai/SHARK/tree/main/tank). The intended
-use case for SHARK is for compilation and deployment of performant state of the art AI models.
-
-![image](https://user-images.githubusercontent.com/22101546/217151219-9bb184a3-cfb9-4788-bb7e-5b502953525c.png)
-
-## Benchmarking with SHARK
-
-TODO: Expand this section.
-
-SHARK offers native benchmarking support, although because it is model focused, fine grain profiling is
-hidden when compared against the common "model benchmarking suite" use case SHARK is good at.
-
-### SharkBenchmarkRunner
-
-SharkBenchmarkRunner is a class designed for benchmarking models against other runtimes.
-TODO: List supported runtimes for comparison + example on how to benchmark with it.
-
-## Directly profiling IREE
-
-A number of excellent developer resources on profiling with IREE can be
-found [here](https://github.com/iree-org/iree/tree/main/docs/developers/developing_iree). As a result this section will
-focus on the bridging the gap between the two.
- - https://github.com/iree-org/iree/blob/main/docs/developers/developing_iree/profiling.md
- - https://github.com/iree-org/iree/blob/main/docs/developers/developing_iree/profiling_with_tracy.md
- - https://github.com/iree-org/iree/blob/main/docs/developers/developing_iree/profiling_vulkan_gpu.md
- - https://github.com/iree-org/iree/blob/main/docs/developers/developing_iree/profiling_cpu_events.md
-
-Internally, SHARK builds a pair of IREE commands to compile + run a model. At a high level the flow starts with the
-model represented with a high level dialect (commonly Linalg) and is compiled to a flatbuffer (.vmfb) that
-the runtime is capable of ingesting. At this point (with potentially a few runtime flags) the compiled model is then run
-through the IREE runtime. This is all facilitated with the IREE python bindings, which offers a convenient method
-to capture the compile command SHARK comes up with. This is done by setting the environment variable
-`IREE_SAVE_TEMPS` to point to a directory of choice, e.g. for stable diffusion
-```
-# Linux
-$ export IREE_SAVE_TEMPS=/path/to/some/directory
-# Windows
-$ $env:IREE_SAVE_TEMPS="C:\path\to\some\directory"
-$ python apps/stable_diffusion/scripts/txt2img.py -p "a photograph of an astronaut riding a horse" --save_vmfb
-```
-NOTE: Currently this will only save the compile command + input MLIR for a single model if run in a pipeline.
-In the case of stable diffusion this (should) be UNet so to get examples for other models in the pipeline they
-need to be extracted and tested individually.
-
-The save temps directory should contain three files: `core-command-line.txt`, `core-input.mlir`, and `core-output.bin`.
-The command line for compilation will start something like this, where the `-` needs to be replaced with the path to `core-input.mlir`.
-```
-/home/quinn/nod/iree-build/compiler/bindings/python/iree/compiler/tools/../_mlir_libs/iree-compile - --iree-input-type=none ...
-```
-The `-o output_filename.vmfb` flag can be used to specify the location to save the compiled vmfb. Note that a dump of the
-dispatches that can be compiled + run in isolation can be generated by adding `--iree-hal-dump-executable-benchmarks-to=/some/directory`. Say, if they are in the `benchmarks` directory, the following compile/run commands would work for Vulkan on RDNA3.
-```
-iree-compile --iree-input-type=none --iree-hal-target-backends=vulkan --iree-vulkan-target-triple=rdna3-unknown-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 benchmarks/module_forward_dispatch_${NUM}_vulkan_spirv_fb.mlir -o benchmarks/module_forward_dispatch_${NUM}_vulkan_spirv_fb.vmfb
-
-iree-benchmark-module --module=benchmarks/module_forward_dispatch_${NUM}_vulkan_spirv_fb.vmfb --function=forward --device=vulkan
-```
-Where `${NUM}` is the dispatch number that you want to benchmark/profile in isolation.
-
-### Enabling Tracy for Vulkan profiling
-
-To begin profiling with Tracy, a build of IREE runtime with tracing enabled is needed. SHARK-Runtime builds an
-instrumented version alongside the normal version nightly (.whls typically found [here](https://github.com/nod-ai/SHARK-Runtime/releases)), however this is only available for Linux. For Windows, tracing can be enabled by enabling a CMake flag.
-```
-$env:IREE_ENABLE_RUNTIME_TRACING="ON"
-```
-Getting a trace can then be done by setting environment variable `TRACY_NO_EXIT=1` and running the program that is to be
-traced. Then, to actually capture the trace, use the `iree-tracy-capture` tool in a different terminal. Note that to get
-the capture and profiler tools the `IREE_BUILD_TRACY=ON` CMake flag needs to be set.
-```
-TRACY_NO_EXIT=1 python apps/stable_diffusion/scripts/txt2img.py -p "a photograph of an astronaut riding a horse"
-
-# (in another terminal, either on the same machine or through ssh with a tunnel through port 8086)
-iree-tracy-capture -o trace_filename.tracy
-```
-To do it over ssh, the flow looks like this
-```
-# From terminal 1 on local machine
-ssh -L 8086:localhost:8086 <remote_server_name>
-TRACY_NO_EXIT=1 python apps/stable_diffusion/scripts/txt2img.py -p "a photograph of an astronaut riding a horse"
-
-# From terminal 2 on local machine. Requires having built IREE with the CMake flag `IREE_BUILD_TRACY=ON` to build the required tooling.
-iree-tracy-capture -o /path/to/trace.tracy
-```
-
-The trace can then be viewed with
-```
-iree-tracy-profiler /path/to/trace.tracy
-```
-Capturing a runtime trace will work with any IREE tooling that uses the runtime. For example, `iree-benchmark-module`
-can be used for benchmarking an individual module. Importantly this means that any SHARK script can be profiled with tracy.
-
-NOTE: Not all backends have the same tracy support. This writeup is focused on CPU/Vulkan backends but there is recently added support for tracing on CUDA (requires the `--cuda_tracing` flag).
-
-## Experimental RGP support
-
-TODO: This section is temporary until proper RGP support is added.
-
-Currently, for stable diffusion there is a flag for enabling UNet to be visible to RGP with `--enable_rgp`. To get a proper capture though, the `DevModeSqttPrepareFrameCount=1` flag needs to be set for the driver (done with `VkPanel` on Windows).
-With these two settings, a single iteration of UNet can be captured.
-
-(AMD only) To get a dump of the pipelines (result of compiled SPIR-V) the `EnablePipelineDump=1` driver flag can be set. The
-files will typically be dumped to a directory called `spvPipeline` (on Linux `/var/tmp/spvPipeline`. The dumped files will
-include header information that can be used to map back to the source dispatch/SPIR-V, e.g.
-```
-[Version]
-version = 57 
-
-[CsSpvFile]
-fileName = Shader_0x946C08DFD0C10D9A.spv
-
-[CsInfo]
-entryPoint = forward_dispatch_193_matmul_256x65536x2304
-```
--- a/generate_sharktank.py
+++ b/generate_sharktank.py
@@ -2,26 +2,33 @@
 """SHARK Tank"""
 # python generate_sharktank.py, you have to give a csv tile with [model_name, model_download_url]
 # will generate local shark tank folder like this:
-#   /SHARK
-#     /gen_shark_tank
-#       /albert_lite_base
-#       /...model_name...
+#   HOME
+#     /.local
+#       /shark_tank
+#           /albert_lite_base
+#           /...model_name...
 #

 import os
 import csv
 import argparse
 from shark.shark_importer import SharkImporter
+from shark.parser import shark_args
+import tensorflow as tf
 import subprocess as sp
 import hashlib
 import numpy as np
 from pathlib import Path
-from apps.stable_diffusion.src.models import (
-    model_wrappers as mw,
-)
-from apps.stable_diffusion.src.utils.stable_args import (
-    args,
-)
+
+visible_default = tf.config.list_physical_devices("GPU")
+try:
+    tf.config.set_visible_devices([], "GPU")
+    visible_devices = tf.config.get_visible_devices()
+    for device in visible_devices:
+        assert device.device_type != "GPU"
+except:
+    # Invalid device or cannot modify virtual devices once initialized.
+    pass


 def create_hash(file_name):
@@ -34,12 +41,9 @@ def create_hash(file_name):


 def save_torch_model(torch_model_list):
-    from tank.model_utils import (
-        get_hf_model,
-        get_vision_model,
-        get_hf_img_cls_model,
-        get_fp16_model,
-    )
+    from tank.model_utils import get_hf_model
+    from tank.model_utils import get_vision_model
+    from tank.model_utils import get_hf_img_cls_model

    with open(torch_model_list) as csvfile:
        torch_reader = csv.reader(csvfile, delimiter=",")
@@ -52,42 +56,16 @@ def save_torch_model(torch_model_list):

            tracing_required = False if tracing_required == "False" else True
            is_dynamic = False if is_dynamic == "False" else True
-            print("generating artifacts for: " + torch_model_name)
+
            model = None
            input = None
-            if model_type == "stable_diffusion":
-                args.use_tuned = False
-                args.import_mlir = True
-                args.use_tuned = False
-                args.local_tank_cache = WORKDIR
-
-                precision_values = ["fp16"]
-                seq_lengths = [64, 77]
-                for precision_value in precision_values:
-                    args.precision = precision_value
-                    for length in seq_lengths:
-                        model = mw.SharkifyStableDiffusionModel(
-                            model_id=torch_model_name,
-                            custom_weights="",
-                            precision=precision_value,
-                            max_len=length,
-                            width=512,
-                            height=512,
-                            use_base_vae=False,
-                            debug=True,
-                            sharktank_dir=WORKDIR,
-                            generate_vmfb=False,
-                        )
-                        model()
-                continue
            if model_type == "vision":
                model, input, _ = get_vision_model(torch_model_name)
            elif model_type == "hf":
                model, input, _ = get_hf_model(torch_model_name)
            elif model_type == "hf_img_cls":
                model, input, _ = get_hf_img_cls_model(torch_model_name)
-            elif model_type == "fp16":
-                model, input, _ = get_fp16_model(torch_model_name)
+
            torch_model_name = torch_model_name.replace("/", "_")
            torch_model_dir = os.path.join(
                WORKDIR, str(torch_model_name) + "_torch"
@@ -105,6 +83,12 @@ def save_torch_model(torch_model_list):
                dir=torch_model_dir,
                model_name=torch_model_name,
            )
+            mlir_hash = create_hash(
+                os.path.join(
+                    torch_model_dir, torch_model_name + "_torch" + ".mlir"
+                )
+            )
+            np.save(os.path.join(torch_model_dir, "hash"), np.array(mlir_hash))
            # Generate torch dynamic models.
            if is_dynamic:
                mlir_importer.import_debug(
@@ -122,17 +106,6 @@ def save_tf_model(tf_model_list):
        get_keras_model,
        get_TFhf_model,
    )
-    import tensorflow as tf
-
-    visible_default = tf.config.list_physical_devices("GPU")
-    try:
-        tf.config.set_visible_devices([], "GPU")
-        visible_devices = tf.config.get_visible_devices()
-        for device in visible_devices:
-            assert device.device_type != "GPU"
-    except:
-        # Invalid device or cannot modify virtual devices once initialized.
-        pass

    with open(tf_model_list) as csvfile:
        tf_reader = csv.reader(csvfile, delimiter=",")
@@ -156,13 +129,13 @@ def save_tf_model(tf_model_list):
            tf_model_name = tf_model_name.replace("/", "_")
            tf_model_dir = os.path.join(WORKDIR, str(tf_model_name) + "_tf")
            os.makedirs(tf_model_dir, exist_ok=True)
+
            mlir_importer = SharkImporter(
                model,
-                inputs=input,
+                input,
                frontend="tf",
            )
            mlir_importer.import_debug(
-                is_dynamic=False,
                dir=tf_model_dir,
                model_name=tf_model_name,
            )
@@ -228,51 +201,51 @@ def is_valid_file(arg):


 if __name__ == "__main__":
-    # Note, all of these flags are overridden by the import of args from stable_args.py, flags are duplicated temporarily to preserve functionality
-    # parser = argparse.ArgumentParser()
-    # parser.add_argument(
-    #    "--torch_model_csv",
-    #    type=lambda x: is_valid_file(x),
-    #    default="./tank/torch_model_list.csv",
-    #    help="""Contains the file with torch_model name and args.
-    #         Please see: https://github.com/nod-ai/SHARK/blob/main/tank/torch_model_list.csv""",
-    # )
-    # parser.add_argument(
-    #    "--tf_model_csv",
-    #    type=lambda x: is_valid_file(x),
-    #    default="./tank/tf_model_list.csv",
-    #    help="Contains the file with tf model name and args.",
-    # )
-    # parser.add_argument(
-    #    "--tflite_model_csv",
-    #    type=lambda x: is_valid_file(x),
-    #    default="./tank/tflite/tflite_model_list.csv",
-    #    help="Contains the file with tf model name and args.",
-    # )
-    # parser.add_argument(
-    #    "--ci_tank_dir",
-    #    type=bool,
-    #    default=False,
-    # )
-    # parser.add_argument("--upload", type=bool, default=False)
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--torch_model_csv",
+        type=lambda x: is_valid_file(x),
+        default="./tank/torch_model_list.csv",
+        help="""Contains the file with torch_model name and args.
+             Please see: https://github.com/nod-ai/SHARK/blob/main/tank/torch_model_list.csv""",
+    )
+    parser.add_argument(
+        "--tf_model_csv",
+        type=lambda x: is_valid_file(x),
+        default="./tank/tf_model_list.csv",
+        help="Contains the file with tf model name and args.",
+    )
+    parser.add_argument(
+        "--tflite_model_csv",
+        type=lambda x: is_valid_file(x),
+        default="./tank/tflite/tflite_model_list.csv",
+        help="Contains the file with tf model name and args.",
+    )
+    parser.add_argument(
+        "--ci_tank_dir",
+        type=bool,
+        default=False,
+    )
+    parser.add_argument("--upload", type=bool, default=False)

-    # old_args = parser.parse_args()
+    args = parser.parse_args()

    home = str(Path.home())
-    WORKDIR = os.path.join(os.path.dirname(__file__), "gen_shark_tank")
-    torch_model_csv = os.path.join(
-        os.path.dirname(__file__), "tank", "torch_model_list.csv"
-    )
-    tf_model_csv = os.path.join(
-        os.path.dirname(__file__), "tank", "tf_model_list.csv"
-    )
-    tflite_model_csv = os.path.join(
-        os.path.dirname(__file__), "tank", "tflite", "tflite_model_list.csv"
-    )
+    if args.ci_tank_dir == True:
+        WORKDIR = os.path.join(os.path.dirname(__file__), "gen_shark_tank")
+    else:
+        WORKDIR = os.path.join(home, ".local/shark_tank/")

-    save_torch_model(
-        os.path.join(os.path.dirname(__file__), "tank", "torch_sd_list.csv")
-    )
-    save_torch_model(torch_model_csv)
-    save_tf_model(tf_model_csv)
-    save_tflite_model(tflite_model_csv)
+    if args.torch_model_csv:
+        save_torch_model(args.torch_model_csv)
+
+    if args.tf_model_csv:
+        save_tf_model(args.tf_model_csv)
+
+    if args.tflite_model_csv:
+        save_tflite_model(args.tflite_model_csv)
+
+    if args.upload:
+        git_hash = sp.getoutput("git log -1 --format='%h'") + "/"
+        print("uploading files to gs://shark_tank/" + git_hash)
+        os.system(f"gsutil cp -r {WORKDIR}* gs://shark_tank/" + git_hash)
--- a/process_skipfiles.py
+++ b/process_skipfiles.py
@@ -1,78 +0,0 @@
-# This script will toggle the comment/uncommenting aspect for dealing
-# with __file__ AttributeError arising in case of a few modules in
-# `torch/_dynamo/skipfiles.py` (within shark.venv)
-
-from distutils.sysconfig import get_python_lib
-import fileinput
-from pathlib import Path
-
-# Diffusers 0.13.1 fails with transformers __init.py errros in BLIP. So remove it for now until we fork it
-pix2pix_init = Path(get_python_lib() + "/diffusers/__init__.py")
-for line in fileinput.input(pix2pix_init, inplace=True):
-    if "Pix2Pix" in line:
-        if not line.startswith("#"):
-            print(f"#{line}", end="")
-        else:
-            print(f"{line[1:]}", end="")
-    else:
-        print(line, end="")
-pix2pix_init = Path(get_python_lib() + "/diffusers/pipelines/__init__.py")
-for line in fileinput.input(pix2pix_init, inplace=True):
-    if "Pix2Pix" in line:
-        if not line.startswith("#"):
-            print(f"#{line}", end="")
-        else:
-            print(f"{line[1:]}", end="")
-    else:
-        print(line, end="")
-pix2pix_init = Path(
-    get_python_lib() + "/diffusers/pipelines/stable_diffusion/__init__.py"
-)
-for line in fileinput.input(pix2pix_init, inplace=True):
-    if "StableDiffusionPix2PixZeroPipeline" in line:
-        if not line.startswith("#"):
-            print(f"#{line}", end="")
-        else:
-            print(f"{line[1:]}", end="")
-    else:
-        print(line, end="")
-
-path_to_skipfiles = Path(get_python_lib() + "/torch/_dynamo/skipfiles.py")
-
-modules_to_comment = ["abc,", "os,", "posixpath,", "_collections_abc,"]
-startMonitoring = 0
-for line in fileinput.input(path_to_skipfiles, inplace=True):
-    if "SKIP_DIRS = " in line:
-        startMonitoring = 1
-        print(line, end="")
-    elif startMonitoring in [1, 2]:
-        if "]" in line:
-            startMonitoring += 1
-            print(line, end="")
-        else:
-            flag = True
-            for module in modules_to_comment:
-                if module in line:
-                    if not line.startswith("#"):
-                        print(f"#{line}", end="")
-                    else:
-                        print(f"{line[1:]}", end="")
-                    flag = False
-                    break
-            if flag:
-                print(line, end="")
-    else:
-        print(line, end="")
-
-# For getting around scikit-image's packaging, laze_loader has had a patch merged but yet to be released.
-# Refer: https://github.com/scientific-python/lazy_loader
-path_to_lazy_loader = Path(get_python_lib() + "/lazy_loader/__init__.py")
-
-for line in fileinput.input(path_to_lazy_loader, inplace=True):
-    if 'stubfile = filename if filename.endswith("i")' in line:
-        print(
-            '    stubfile = (filename if filename.endswith("i") else f"{os.path.splitext(filename)[0]}.pyi")',
-            end="",
-        )
-    else:
-        print(line, end="")
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,8 +10,3 @@ requires = [
    "iree-runtime>=20221022.190",
 ]
 build-backend = "setuptools.build_meta"
-
-[tool.black]
-line-length = 79
-include = '\.pyi?$'
-
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,3 +1,3 @@
 [pytest]
 addopts = --verbose -p no:warnings
-norecursedirs = inference tank/tflite examples benchmarks shark 
+norecursedirs = inference tank/tflite 
--- a/requirements-importer-macos.txt
+++ b/requirements-importer-macos.txt
@@ -2,7 +2,7 @@
 --pre

 numpy
-torch
+torch==1.14.0.dev20221021
 torchvision

 tqdm
@@ -28,7 +28,6 @@ Pillow

 # web dependecies.
 gradio
-altair

 # Testing and support.
 #lit
--- a/requirements-importer.txt
+++ b/requirements-importer.txt
@@ -1,10 +1,9 @@
 -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
 --pre

-numpy>1.22.4
+numpy==1.22.4
+torch
 torchvision
-pytorch-triton
-tabulate

 tqdm

@@ -15,8 +14,8 @@ iree-tools-tf

 # TensorFlow and JAX.
 gin-config
-tf-nightly
-keras>=2.10
+tensorflow==2.10
+keras==2.10
 #tf-models-nightly
 #tensorflow-text-nightly
 transformers
@@ -36,7 +35,6 @@ sacremoses

 # web dependecies.
 gradio
-altair
 scipy

 #ONNX and ORT for benchmarking
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,27 +5,10 @@ wheel
 tqdm

 # SHARK Downloader
-google-cloud-storage
+gsutil

 # Testing
 pytest
 pytest-xdist
-pytest-forked
 Pillow
 parameterized
-
-# Add transformers, diffusers and scipy since it most commonly used
-transformers
-diffusers
-scipy
-ftfy
-gradio
-altair
-omegaconf
-safetensors
-opencv-python
-scikit-image
-
-# Keep PyInstaller at the end. Sometimes Windows Defender flags it but most folks can continue even if it errors
-pefile
-pyinstaller
--- a/setup.py
+++ b/setup.py
@@ -2,12 +2,11 @@ from setuptools import find_packages
 from setuptools import setup

 import os
-import glob

 with open("README.md", "r", encoding="utf-8") as fh:
    long_description = fh.read()

-PACKAGE_VERSION = os.environ.get("SHARK_PACKAGE_VERSION") or "0.0.5"
+PACKAGE_VERSION = os.environ.get("SHARK_PACKAGE_VERSION") or "0.0.4"
 backend_deps = []
 if "NO_BACKEND" in os.environ.keys():
    backend_deps = [
@@ -35,7 +34,6 @@ setup(
    ],
    packages=find_packages(exclude=("examples")),
    python_requires=">=3.9",
-    data_files=glob.glob("apps/stable_diffusion/resources/**"),
    install_requires=[
        "numpy",
        "PyYAML",
--- a/setup_venv.ps1
+++ b/setup_venv.ps1
@@ -1,54 +1,13 @@
-<#
-.SYNOPSIS
-  A script to update and install the SHARK runtime and its dependencies.
+#Write-Host "Installing python"

-.DESCRIPTION
-  This script updates and installs the SHARK runtime and its dependencies.
-  It checks the Python version installed and installs any required build
-  dependencies into a Python virtual environment.
-  If that environment does not exist, it creates it.
-  
-.PARAMETER update-src
-  git pulls latest version
+#Start-Process winget install Python.Python.3.10 '/quiet InstallAllUsers=1 PrependPath=1' -wait -NoNewWindow

-.PARAMETER force
-  removes and recreates venv to force update of all dependencies
-  
-.EXAMPLE
-  .\setup_venv.ps1 --force
+#Write-Host "python installation completed successfully"

-.EXAMPLE
-  .\setup_venv.ps1 --update-src
+#Write-Host "Reload environment variables"
+#$env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
+#Write-Host "Reloaded environment variables"

-.INPUTS
-  None
-
-.OUTPUTS
-  None
-
-#>
-
-param([string]$arguments)
-
-if ($arguments -eq "--update-src"){
-	git pull
-}
-
-if ($arguments -eq "--force"){
-	if (Test-Path env:VIRTUAL_ENV) {
-        Write-Host "deactivating..."
-        Deactivate
-    }
-    
-    if (Test-Path .\shark.venv\) {
-        Write-Host "removing and recreating venv..."
-        Remove-Item .\shark.venv -Force -Recurse
-        if (Test-Path .\shark.venv\) {
-            Write-Host 'could not remove .\shark-venv - please try running ".\setup_venv.ps1 --force" again!'
-            break
-        }
-    }
-}

 # redirect stderr into stdout
 $p = &{python -V} 2>&1
@@ -60,38 +19,22 @@ $version = if($p -is [System.Management.Automation.ErrorRecord])
 }
 else
 {
-    # otherwise return complete Python list
-    $ErrorActionPreference = 'SilentlyContinue'
-    $PyVer = py --list
+    # otherwise return as is
+    $p
 }

-# deactivate any activated venvs
-if ($PyVer -like "*venv*")
-{
-  deactivate # make sure we don't update the wrong venv
-  $PyVer = py --list # update list
-}
+Write-Host "Python version found is"
+Write-Host $p

-Write-Host "Python versions found are"
-Write-Host ($PyVer | Out-String) # formatted output with line breaks
-if (!($PyVer.length -ne 0)) {$p} # return Python --version String if py.exe is unavailable
-if (!($PyVer -like "*3.11*") -and !($p -like "*3.11*")) # if 3.11 is not in any list
-{
-    Write-Host "Please install Python 3.11 and try again"
-    break
-}

 Write-Host "Installing Build Dependencies"
-# make sure we really use 3.11 from list, even if it's not the default.
-if (!($PyVer.length -ne 0)) {py -3.11 -m venv .\shark.venv\}
-else {python -m venv .\shark.venv\}
+python -m venv .\shark.venv\
 .\shark.venv\Scripts\activate
-python -m pip install --upgrade pip
-pip install wheel
 pip install -r requirements.txt
-pip install --pre torch-mlir torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://llvm.github.io/torch-mlir/package-index/
+pip install --pre torch-mlir torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cu116 -f https://llvm.github.io/torch-mlir/package-index/
 pip install --upgrade -f https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html iree-compiler iree-runtime
 Write-Host "Building SHARK..."
 pip install -e . -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html
+pip install diffusers transformers scipy pillow gradio
 Write-Host "Build and installation completed successfully"
 Write-Host "Source your venv with ./shark.venv/Scripts/activate"
--- a/setup_venv.sh
+++ b/setup_venv.sh
@@ -42,7 +42,7 @@ Green=`tput setaf 2`
 Yellow=`tput setaf 3`

 # Assume no binary torch-mlir.
-# Currently available for macOS m1&intel (3.11) and Linux(3.8,3.10,3.11)
+# Currently available for macOS m1&intel (3.10) and Linux(3.7,3.8,3.9,3.10)
 torch_mlir_bin=false
 if [[ $(uname -s) = 'Darwin' ]]; then
  echo "${Yellow}Apple macOS detected"
@@ -60,12 +60,12 @@ if [[ $(uname -s) = 'Darwin' ]]; then
  fi
  echo "${Yellow}Run the following commands to setup your SSL certs for your Python version if you see SSL errors with tests"
  echo "${Yellow}/Applications/Python\ 3.XX/Install\ Certificates.command"
-  if [ "$PYTHON_VERSION_X_Y" == "3.11" ]; then
+  if [ "$PYTHON_VERSION_X_Y" == "3.10" ]; then
    torch_mlir_bin=true
  fi
 elif [[ $(uname -s) = 'Linux' ]]; then
  echo "${Yellow}Linux detected"
-  if [ "$PYTHON_VERSION_X_Y" == "3.8" ]  || [ "$PYTHON_VERSION_X_Y" == "3.10" ] || [ "$PYTHON_VERSION_X_Y" == "3.11" ] ; then
+  if [ "$PYTHON_VERSION_X_Y" == "3.7" ] || [ "$PYTHON_VERSION_X_Y" == "3.8" ]  || [ "$PYTHON_VERSION_X_Y" == "3.9" ] || [ "$PYTHON_VERSION_X_Y" == "3.10" ] ; then
    torch_mlir_bin=true
  fi
 else
@@ -77,8 +77,7 @@ $PYTHON -m pip install --upgrade pip || die "Could not upgrade pip"
 $PYTHON -m pip install --upgrade -r "$TD/requirements.txt"
 if [ "$torch_mlir_bin" = true ]; then
  if [[ $(uname -s) = 'Darwin' ]]; then
-    echo "MacOS detected. Installing torch-mlir from .whl, to avoid dependency problems with torch."
-    $PYTHON -m pip install --pre --no-cache-dir torch-mlir -f https://llvm.github.io/torch-mlir/package-index/ -f https://download.pytorch.org/whl/nightly/torch/
+    echo "MacOS detected. Please install torch-mlir from source or .whl, as dependency problems may occur otherwise."
  else
    $PYTHON -m pip install --pre torch-mlir -f https://llvm.github.io/torch-mlir/package-index/
    if [ $? -eq 0 ];then
@@ -89,51 +88,44 @@ if [ "$torch_mlir_bin" = true ]; then
  fi
 else
  echo "${Red}No binaries found for Python $PYTHON_VERSION_X_Y on $(uname -s)"
-  echo "${Yello}Python 3.11 supported on macOS and 3.8,3.10 and 3.11 on Linux"
+  echo "${Yello}Python 3.10 supported on macOS and 3.7,3.8,3.9 and 3.10 on Linux"
  echo "${Red}Please build torch-mlir from source in your environment"
  exit 1
 fi
 if [[ -z "${USE_IREE}" ]]; then
-  rm .use-iree
  RUNTIME="https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html"
 else
-  touch ./.use-iree
-  RUNTIME="https://openxla.github.io/iree/pip-release-links.html"
+  RUNTIME="https://iree-org.github.io/iree/pip-release-links.html"
 fi
 if [[ -z "${NO_BACKEND}" ]]; then
  echo "Installing ${RUNTIME}..."
-  $PYTHON -m pip install --pre --upgrade --find-links ${RUNTIME} iree-compiler iree-runtime
+  $PYTHON -m pip install --upgrade --find-links ${RUNTIME} iree-compiler iree-runtime
 else
  echo "Not installing a backend, please make sure to add your backend to PYTHONPATH"
 fi
-
 if [[ ! -z "${IMPORTER}" ]]; then
  echo "${Yellow}Installing importer tools.."
  if [[ $(uname -s) = 'Linux' ]]; then
    echo "${Yellow}Linux detected.. installing Linux importer tools"
    #Always get the importer tools from upstream IREE
-    $PYTHON -m pip install --no-warn-conflicts --upgrade -r "$TD/requirements-importer.txt" -f https://openxla.github.io/iree/pip-release-links.html --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+    $PYTHON -m pip install --upgrade -r "$TD/requirements-importer.txt" -f https://iree-org.github.io/iree/pip-release-links.html --extra-index-url https://download.pytorch.org/whl/nightly/cpu
  elif [[ $(uname -s) = 'Darwin' ]]; then
    echo "${Yellow}macOS detected.. installing macOS importer tools"
    #Conda seems to have some problems installing these packages and hope they get resolved upstream.
-    $PYTHON -m pip install --no-warn-conflicts --upgrade -r "$TD/requirements-importer-macos.txt" -f ${RUNTIME} --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+    $PYTHON -m pip install --upgrade -r "$TD/requirements-importer-macos.txt" -f ${RUNTIME} --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+    $PYTHON -m pip install https://github.com/llvm/torch-mlir/releases/download/snapshot-20221024.636/torch_mlir-20221024.636-cp310-cp310-macosx_11_0_universal2.whl
  fi
 fi

-$PYTHON -m pip install --no-warn-conflicts -e . -f https://llvm.github.io/torch-mlir/package-index/ -f ${RUNTIME} -f https://download.pytorch.org/whl/nightly/torch/
+$PYTHON -m pip install -e . -f https://llvm.github.io/torch-mlir/package-index/ -f ${RUNTIME}

 if [[ $(uname -s) = 'Linux' && ! -z "${BENCHMARK}" ]]; then
-  T_VER=$($PYTHON -m pip show torch | grep Version)
-  TORCH_VERSION=${T_VER:9:17}
-  TV_VER=$($PYTHON -m pip show torchvision | grep Version)
-  TV_VERSION=${TV_VER:9:18}
  $PYTHON -m pip uninstall -y torch torchvision
-  $PYTHON -m pip install -U --pre --no-warn-conflicts triton
-  $PYTHON -m pip install --no-deps https://download.pytorch.org/whl/nightly/cu117/torch-${TORCH_VERSION}%2Bcu117-cp311-cp311-linux_x86_64.whl https://download.pytorch.org/whl/nightly/cu117/torchvision-${TV_VERSION}%2Bcu117-cp311-cp311-linux_x86_64.whl
+  $PYTHON -m pip install --pre torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cu116
  if [ $? -eq 0 ];then
-    echo "Successfully Installed torch + cu117."
+    echo "Successfully Installed torch + cu116."
  else
-    echo "Could not install torch + cu117." >&2
+    echo "Could not install torch + cu116." >&2
  fi
 fi

--- a/shark/examples/shark_dynamo/basic_examples.py
+++ b/shark/examples/shark_dynamo/basic_examples.py
@@ -1,6 +1,6 @@
+import torchdynamo
 import torch
 import torch_mlir
-import torch._dynamo as torchdynamo
 from shark.sharkdynamo.utils import make_shark_compiler


--- a/shark/examples/shark_eager/dynamo_demo.ipynb
+++ b/shark/examples/shark_eager/dynamo_demo.ipynb
@@ -36,9 +36,7 @@
    "    from torchdynamo.optimizations.backends import create_backend\n",
    "    from torchdynamo.optimizations.subgraph import SubGraph\n",
    "except ModuleNotFoundError:\n",
-    "    print(\n",
-    "        \"Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo\"\n",
-    "    )\n",
+    "    print(\"Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo\")\n",
    "    exit()\n",
    "\n",
    "# torch-mlir imports for compiling\n",
@@ -99,9 +97,7 @@
    "\n",
    "        for node in fx_g.graph.nodes:\n",
    "            if node.op == \"output\":\n",
-    "                assert (\n",
-    "                    len(node.args) == 1\n",
-    "                ), \"Output node must have a single argument\"\n",
+    "                assert len(node.args) == 1, \"Output node must have a single argument\"\n",
    "                node_arg = node.args[0]\n",
    "                if isinstance(node_arg, tuple) and len(node_arg) == 1:\n",
    "                    node.args = (node_arg[0],)\n",
@@ -120,12 +116,8 @@
    "    if len(args) == 1 and isinstance(args[0], list):\n",
    "        args = args[0]\n",
    "\n",
-    "    linalg_module = compile(\n",
-    "        ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS\n",
-    "    )\n",
-    "    callable, _ = get_iree_compiled_module(\n",
-    "        linalg_module, \"cuda\", func_name=\"forward\"\n",
-    "    )\n",
+    "    linalg_module = compile(ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS)\n",
+    "    callable, _ = get_iree_compiled_module(linalg_module, \"cuda\", func_name=\"forward\")\n",
    "\n",
    "    def forward(*inputs):\n",
    "        return callable(*inputs)\n",
@@ -220,7 +212,6 @@
    "    assert isinstance(subgraph, SubGraph), \"Model must be a dynamo SubGraph.\"\n",
    "    return __torch_mlir(subgraph.model, *list(subgraph.example_inputs))\n",
    "\n",
-    "\n",
    "@torchdynamo.optimize(\"torch_mlir\")\n",
    "def toy_example2(*args):\n",
    "    a, b = args\n",
--- a/shark/examples/shark_inference/CLIPModel_tf.py
+++ b/shark/examples/shark_inference/CLIPModel_tf.py
@@ -22,7 +22,7 @@ class CLIPModule(tf.Module):
            input_ids=x, attention_mask=y, pixel_values=z
        )

-    @tf.function(input_signature=clip_vit_inputs, jit_compile=True)
+    @tf.function(input_signature=clip_vit_inputs)
    def forward(self, input_ids, attention_mask, pixel_values):
        return self.m.predict(
            input_ids, attention_mask, pixel_values
--- a/shark/examples/shark_inference/ESRGAN/README.md
+++ b/shark/examples/shark_inference/ESRGAN/README.md
@@ -1,15 +0,0 @@
-## Running ESRGAN
-
-```
-1. pip install numpy opencv-python
-2. mkdir InputImages
-   (this is where all the input images will reside in)
-3. mkdir OutputImages
-   (this is where the model will generate all the images)
-4. mkdir models
-   (save the .pth checkpoint file here)
-5. python esrgan.py
-```
-
- Download [RRDB_ESRGAN_x4.pth](https://drive.google.com/drive/u/0/folders/17VYV_SoZZesU6mbxz2dMAIccSSlqLecY) and place it in the `models` directory as mentioned above in step 4.
- Credits : [ESRGAN](https://github.com/xinntao/ESRGAN)
--- a/shark/examples/shark_inference/ESRGAN/esrgan.py
+++ b/shark/examples/shark_inference/ESRGAN/esrgan.py
@@ -1,239 +0,0 @@
-from ast import arg
-import os.path as osp
-import glob
-import cv2
-import numpy as np
-import torch
-
-from torch.fx.experimental.proxy_tensor import make_fx
-from torch._decomp import get_decompositions
-from shark.shark_inference import SharkInference
-import torch_mlir
-import tempfile
-import functools
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-def make_layer(block, n_layers):
-    layers = []
-    for _ in range(n_layers):
-        layers.append(block())
-    return nn.Sequential(*layers)
-
-
-class ResidualDenseBlock_5C(nn.Module):
-    def __init__(self, nf=64, gc=32, bias=True):
-        super(ResidualDenseBlock_5C, self).__init__()
-        # gc: growth channel, i.e. intermediate channels
-        self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias)
-        self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias)
-        self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias)
-        self.conv4 = nn.Conv2d(nf + 3 * gc, gc, 3, 1, 1, bias=bias)
-        self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias=bias)
-        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
-
-        # initialization
-        # mutil.initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1)
-
-    def forward(self, x):
-        x1 = self.lrelu(self.conv1(x))
-        x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
-        x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
-        x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
-        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
-        return x5 * 0.2 + x
-
-
-class RRDB(nn.Module):
-    """Residual in Residual Dense Block"""
-
-    def __init__(self, nf, gc=32):
-        super(RRDB, self).__init__()
-        self.RDB1 = ResidualDenseBlock_5C(nf, gc)
-        self.RDB2 = ResidualDenseBlock_5C(nf, gc)
-        self.RDB3 = ResidualDenseBlock_5C(nf, gc)
-
-    def forward(self, x):
-        out = self.RDB1(x)
-        out = self.RDB2(out)
-        out = self.RDB3(out)
-        return out * 0.2 + x
-
-
-class RRDBNet(nn.Module):
-    def __init__(self, in_nc, out_nc, nf, nb, gc=32):
-        super(RRDBNet, self).__init__()
-        RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc)
-
-        self.conv_first = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True)
-        self.RRDB_trunk = make_layer(RRDB_block_f, nb)
-        self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        #### upsampling
-        self.upconv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        self.upconv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        self.HRconv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True)
-
-        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
-
-    def forward(self, x):
-        fea = self.conv_first(x)
-        trunk = self.trunk_conv(self.RRDB_trunk(fea))
-        fea = fea + trunk
-
-        fea = self.lrelu(
-            self.upconv1(F.interpolate(fea, scale_factor=2, mode="nearest"))
-        )
-        fea = self.lrelu(
-            self.upconv2(F.interpolate(fea, scale_factor=2, mode="nearest"))
-        )
-        out = self.conv_last(self.lrelu(self.HRconv(fea)))
-
-        return out
-
-
-############### Parsing args #####################
-import argparse
-
-p = argparse.ArgumentParser(
-    description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
-)
-
-p.add_argument("--device", type=str, default="cpu", help="the device to use")
-p.add_argument(
-    "--mlir_loc",
-    type=str,
-    default=None,
-    help="location of the model's mlir file",
-)
-args = p.parse_args()
-###################################################
-
-
-def inference(input_m):
-    return model(input_m)
-
-
-def load_mlir(mlir_loc):
-    import os
-
-    if mlir_loc == None:
-        return None
-    print(f"Trying to load the model from {mlir_loc}.")
-    with open(os.path.join(mlir_loc)) as f:
-        mlir_module = f.read()
-    return mlir_module
-
-
-def compile_through_fx(model, inputs, mlir_loc=None):
-    module = load_mlir(mlir_loc)
-    if module == None:
-        fx_g = make_fx(
-            model,
-            decomposition_table=get_decompositions(
-                [
-                    torch.ops.aten.embedding_dense_backward,
-                    torch.ops.aten.native_layer_norm_backward,
-                    torch.ops.aten.slice_backward,
-                    torch.ops.aten.select_backward,
-                    torch.ops.aten.norm.ScalarOpt_dim,
-                    torch.ops.aten.native_group_norm,
-                    torch.ops.aten.upsample_bilinear2d.vec,
-                    torch.ops.aten.split.Tensor,
-                    torch.ops.aten.split_with_sizes,
-                ]
-            ),
-        )(inputs)
-
-        fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
-        fx_g.recompile()
-
-        def strip_overloads(gm):
-            """
-            Modifies the target of graph nodes in :attr:`gm` to strip overloads.
-            Args:
-                gm(fx.GraphModule): The input Fx graph module to be modified
-            """
-            for node in gm.graph.nodes:
-                if isinstance(node.target, torch._ops.OpOverload):
-                    node.target = node.target.overloadpacket
-            gm.recompile()
-
-        strip_overloads(fx_g)
-
-        ts_g = torch.jit.script(fx_g)
-
-        print("Torchscript graph generated successfully")
-        module = torch_mlir.compile(
-            ts_g,
-            inputs,
-            torch_mlir.OutputType.LINALG_ON_TENSORS,
-            use_tracing=False,
-            verbose=False,
-        )
-
-    mlir_model = str(module)
-    func_name = "forward"
-    shark_module = SharkInference(
-        mlir_model, func_name, device=args.device, mlir_dialect="linalg"
-    )
-    shark_module.compile()
-
-    return shark_module
-
-
-model_path = "models/RRDB_ESRGAN_x4.pth"  # models/RRDB_ESRGAN_x4.pth OR models/RRDB_PSNR_x4.pth
-# device = torch.device('cuda')  # if you want to run on CPU, change 'cuda' -> cpu
-device = torch.device("cpu")
-
-test_img_folder = "InputImages/*"
-
-model = RRDBNet(3, 3, 64, 23, gc=32)
-model.load_state_dict(torch.load(model_path), strict=True)
-model.eval()
-model = model.to(device)
-
-print("Model path {:s}. \nTesting...".format(model_path))
-
-if __name__ == "__main__":
-    idx = 0
-    for path in glob.glob(test_img_folder):
-        idx += 1
-        base = osp.splitext(osp.basename(path))[0]
-        print(idx, base)
-        # read images
-        img = cv2.imread(path, cv2.IMREAD_COLOR)
-        img = img * 1.0 / 255
-        img = torch.from_numpy(
-            np.transpose(img[:, :, [2, 1, 0]], (2, 0, 1))
-        ).float()
-        img_LR = img.unsqueeze(0)
-        img_LR = img_LR.to(device)
-
-        with torch.no_grad():
-            shark_module = compile_through_fx(inference, img_LR)
-            shark_output = shark_module.forward((img_LR,))
-            shark_output = torch.from_numpy(shark_output)
-            shark_output = (
-                shark_output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
-            )
-            esrgan_output = (
-                model(img_LR).data.squeeze().float().cpu().clamp_(0, 1).numpy()
-            )
-        # SHARK OUTPUT
-        shark_output = np.transpose(shark_output[[2, 1, 0], :, :], (1, 2, 0))
-        shark_output = (shark_output * 255.0).round()
-        cv2.imwrite(
-            "OutputImages/{:s}_rlt_shark_output.png".format(base), shark_output
-        )
-        print("Generated SHARK's output")
-        # ESRGAN OUTPUT
-        esrgan_output = np.transpose(esrgan_output[[2, 1, 0], :, :], (1, 2, 0))
-        esrgan_output = (esrgan_output * 255.0).round()
-        cv2.imwrite(
-            "OutputImages/{:s}_rlt_esrgan_output.png".format(base),
-            esrgan_output,
-        )
-        print("Generated ESRGAN's output")
--- a/shark/examples/shark_inference/albert_maskfill_tf.py
+++ b/shark/examples/shark_inference/albert_maskfill_tf.py
@@ -28,7 +28,7 @@ class AlbertModule(tf.Module):
        self.m = TFAutoModelForMaskedLM.from_pretrained("albert-base-v2")
        self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)

-    @tf.function(input_signature=t5_inputs, jit_compile=True)
+    @tf.function(input_signature=t5_inputs)
    def forward(self, input_ids, attention_mask):
        return self.m.predict(input_ids, attention_mask)

--- a/shark/examples/shark_inference/bloom_tank.py
+++ b/shark/examples/shark_inference/bloom_tank.py
@@ -1,9 +1,7 @@
 from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_model
+from shark.shark_downloader import download_torch_model

-mlir_model, func_name, inputs, golden_out = download_model(
-    "bloom", frontend="torch"
-)
+mlir_model, func_name, inputs, golden_out = download_torch_model("bloom")

 shark_module = SharkInference(
    mlir_model, func_name, device="cpu", mlir_dialect="tm_tensor"
--- a/shark/examples/shark_inference/gpt2_tf.py
+++ b/shark/examples/shark_inference/gpt2_tf.py
@@ -19,7 +19,7 @@ class GPT2Module(tf.Module):

        self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)

-    @tf.function(input_signature=gpt2_inputs, jit_compile=True)
+    @tf.function(input_signature=gpt2_inputs)
    def forward(self, input_ids, attention_mask):
        return self.m.predict(input_ids, attention_mask)

--- a/shark/examples/shark_inference/llama/README.md
+++ b/shark/examples/shark_inference/llama/README.md
@@ -1,18 +0,0 @@
-# SHARK LLaMA
-
-## TORCH-MLIR Version
-
-```
-https://github.com/nod-ai/torch-mlir.git
-```
-Then check out the `complex` branch and `git submodule update --init` and then build with `.\build_tools\python_deploy\build_windows.ps1`
-
-### Setup & Run
-```
-git clone https://github.com/nod-ai/llama.git
-```
-Then in this repository
-```
-pip install -e .
-python llama/shark_model.py
-```
--- a/shark/examples/shark_inference/minilm_benchmark_tf.py
+++ b/shark/examples/shark_inference/minilm_benchmark_tf.py
@@ -26,7 +26,7 @@ class BertModule(tf.Module):
            input_ids=x, attention_mask=y, token_type_ids=z, training=False
        )

-    @tf.function(input_signature=bert_input, jit_compile=True)
+    @tf.function(input_signature=bert_input)
    def forward(self, input_ids, attention_mask, token_type_ids):
        return self.m.predict(input_ids, attention_mask, token_type_ids)

--- a/shark/examples/shark_inference/minilm_jit.py
+++ b/shark/examples/shark_inference/minilm_jit.py
@@ -1,10 +1,9 @@
 from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_model
+from shark.shark_downloader import download_torch_model


-mlir_model, func_name, inputs, golden_out = download_model(
-    "microsoft/MiniLM-L12-H384-uncased",
-    frontend="torch",
+mlir_model, func_name, inputs, golden_out = download_torch_model(
+    "microsoft/MiniLM-L12-H384-uncased"
 )


--- a/shark/examples/shark_inference/minilm_tf.py
+++ b/shark/examples/shark_inference/minilm_tf.py
@@ -26,7 +26,7 @@ class BertModule(tf.Module):
            input_ids=x, attention_mask=y, token_type_ids=z, training=False
        )

-    @tf.function(input_signature=bert_input, jit_compile=True)
+    @tf.function(input_signature=bert_input)
    def forward(self, input_ids, attention_mask, token_type_ids):
        return self.m.predict(input_ids, attention_mask, token_type_ids)

--- a/shark/examples/shark_inference/resnet50_script.py
+++ b/shark/examples/shark_inference/resnet50_script.py
@@ -5,7 +5,7 @@ import torchvision.models as models
 from torchvision import transforms
 import sys
 from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_model
+from shark.shark_downloader import download_torch_model


 ################################## Preprocessing inputs and model ############
@@ -66,9 +66,7 @@ labels = load_labels()


 ## Can pass any img or input to the forward module.
-mlir_model, func_name, inputs, golden_out = download_model(
-    "resnet50", frontend="torch"
-)
+mlir_model, func_name, inputs, golden_out = download_torch_model("resnet50")

 shark_module = SharkInference(mlir_model, func_name, mlir_dialect="linalg")
 shark_module.compile()
--- a/shark/examples/shark_inference/sharded_bloom.py
+++ b/shark/examples/shark_inference/sharded_bloom.py
@@ -1,842 +0,0 @@
-####################################################################################
-# Please make sure you have transformers 4.21.2 installed before running this demo
-#
-# -p --model_path: the directory in which you want to store the bloom files.
-# -dl --device_list: the list of device indices you want to use.  if you want to only use the first device, or you are running on cpu leave this blank.
-#                     Otherwise, please give this argument in this format: "[0, 1, 2]"
-# -de --device: the device you want to run bloom on.  E.G. cpu, cuda
-# -c, --recompile: set to true if you want to recompile to vmfb.
-# -d, --download: set to true if you want to redownload the mlir files
-# -cm, --create_mlirs: set to true if you want to create the mlir files from scratch.  please make sure you have transformers 4.21.2 before using this option
-# -t --token_count: the number of tokens you want to generate
-# -pr --prompt: the prompt you want to feed to the model
-# -m --model_name: the name of the model, e.g. bloom-560m
-#
-# If you don't specify a prompt when you run this example, you will be able to give prompts through the terminal.  Run the
-# example in this way if you want to run multiple examples without reinitializing the model
-#####################################################################################
-
-import os
-import io
-import torch
-import torch.nn as nn
-from collections import OrderedDict
-import torch_mlir
-from torch_mlir import TensorPlaceholder
-import re
-from transformers.models.bloom.configuration_bloom import BloomConfig
-import json
-import sys
-import argparse
-import json
-import urllib.request
-import subprocess
-
-from torch.fx.experimental.proxy_tensor import make_fx
-from torch._decomp import get_decompositions
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_public_file
-from transformers import (
-    BloomTokenizerFast,
-    BloomForSequenceClassification,
-    BloomForCausalLM,
-)
-from transformers.models.bloom.modeling_bloom import (
-    BloomBlock,
-    build_alibi_tensor,
-)
-
-IS_CUDA = False
-
-
-class ShardedBloom:
-    def __init__(self, src_folder):
-        f = open(f"{src_folder}/config.json")
-        config = json.load(f)
-        f.close()
-
-        self.layers_initialized = False
-
-        self.src_folder = src_folder
-        try:
-            self.n_embed = config["n_embed"]
-        except KeyError:
-            self.n_embed = config["hidden_size"]
-        self.vocab_size = config["vocab_size"]
-        self.n_layer = config["n_layer"]
-        try:
-            self.n_head = config["num_attention_heads"]
-        except KeyError:
-            self.n_head = config["n_head"]
-
-    def _init_layer(self, layer_name, device, replace, device_idx):
-        if replace or not os.path.exists(
-            f"{self.src_folder}/{layer_name}.vmfb"
-        ):
-            f_ = open(f"{self.src_folder}/{layer_name}.mlir", encoding="utf-8")
-            module = f_.read()
-            f_.close()
-            module = bytes(module, "utf-8")
-            shark_module = SharkInference(
-                module,
-                device=device,
-                mlir_dialect="tm_tensor",
-                device_idx=device_idx,
-            )
-            shark_module.save_module(
-                module_name=f"{self.src_folder}/{layer_name}",
-                extra_args=[
-                    "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
-                    "--iree-stream-resource-max-allocation-size=1000000000",
-                    "--iree-codegen-check-ir-before-llvm-conversion=false",
-                ],
-            )
-        else:
-            shark_module = SharkInference(
-                "",
-                device=device,
-                mlir_dialect="tm_tensor",
-                device_idx=device_idx,
-            )
-
-        return shark_module
-
-    def init_layers(self, device, replace=False, device_idx=[0]):
-        if device_idx is not None:
-            n_devices = len(device_idx)
-
-        self.word_embeddings_module = self._init_layer(
-            "word_embeddings",
-            device,
-            replace,
-            device_idx if device_idx is None else device_idx[0 % n_devices],
-        )
-        self.word_embeddings_layernorm_module = self._init_layer(
-            "word_embeddings_layernorm",
-            device,
-            replace,
-            device_idx if device_idx is None else device_idx[1 % n_devices],
-        )
-        self.ln_f_module = self._init_layer(
-            "ln_f",
-            device,
-            replace,
-            device_idx if device_idx is None else device_idx[2 % n_devices],
-        )
-        self.lm_head_module = self._init_layer(
-            "lm_head",
-            device,
-            replace,
-            device_idx if device_idx is None else device_idx[3 % n_devices],
-        )
-        self.block_modules = [
-            self._init_layer(
-                f"bloom_block_{i}",
-                device,
-                replace,
-                device_idx
-                if device_idx is None
-                else device_idx[(i + 4) % n_devices],
-            )
-            for i in range(self.n_layer)
-        ]
-
-        self.layers_initialized = True
-
-    def load_layers(self):
-        assert self.layers_initialized
-
-        self.word_embeddings_module.load_module(
-            f"{self.src_folder}/word_embeddings.vmfb"
-        )
-        self.word_embeddings_layernorm_module.load_module(
-            f"{self.src_folder}/word_embeddings_layernorm.vmfb"
-        )
-        for block_module, i in zip(self.block_modules, range(self.n_layer)):
-            block_module.load_module(f"{self.src_folder}/bloom_block_{i}.vmfb")
-        self.ln_f_module.load_module(f"{self.src_folder}/ln_f.vmfb")
-        self.lm_head_module.load_module(f"{self.src_folder}/lm_head.vmfb")
-
-    def forward_pass(self, input_ids, device):
-        if IS_CUDA:
-            cudaSetDevice(self.word_embeddings_module.device_idx)
-
-        input_embeds = self.word_embeddings_module(
-            inputs=(input_ids,), function_name="forward"
-        )
-
-        input_embeds = torch.tensor(input_embeds).float()
-        if IS_CUDA:
-            cudaSetDevice(self.word_embeddings_layernorm_module.device_idx)
-        hidden_states = self.word_embeddings_layernorm_module(
-            inputs=(input_embeds,), function_name="forward"
-        )
-
-        hidden_states = torch.tensor(hidden_states).float()
-
-        attention_mask = torch.ones(
-            [hidden_states.shape[0], len(input_ids[0])]
-        )
-        alibi = build_alibi_tensor(
-            attention_mask,
-            self.n_head,
-            hidden_states.dtype,
-            hidden_states.device,
-        )
-
-        causal_mask = _prepare_attn_mask(
-            attention_mask, input_ids.size(), input_embeds, 0
-        )
-        causal_mask = torch.tensor(causal_mask).float()
-
-        presents = ()
-        all_hidden_states = tuple(hidden_states)
-
-        for block_module, i in zip(self.block_modules, range(self.n_layer)):
-            if IS_CUDA:
-                cudaSetDevice(block_module.device_idx)
-
-            output = block_module(
-                inputs=(
-                    hidden_states.detach().numpy(),
-                    alibi.detach().numpy(),
-                    causal_mask.detach().numpy(),
-                ),
-                function_name="forward",
-            )
-            hidden_states = torch.tensor(output[0]).float()
-            all_hidden_states = all_hidden_states + (hidden_states,)
-            presents = presents + (
-                tuple(
-                    (
-                        output[1],
-                        output[2],
-                    )
-                ),
-            )
-        if IS_CUDA:
-            cudaSetDevice(self.ln_f_module.device_idx)
-
-        hidden_states = self.ln_f_module(
-            inputs=(hidden_states,), function_name="forward"
-        )
-        if IS_CUDA:
-            cudaSetDevice(self.lm_head_module.device_idx)
-
-        logits = self.lm_head_module(
-            inputs=(hidden_states,), function_name="forward"
-        )
-        logits = torch.tensor(logits).float()
-
-        return torch.argmax(logits[:, -1, :], dim=-1)
-
-
-def _make_causal_mask(
-    input_ids_shape: torch.Size,
-    dtype: torch.dtype,
-    past_key_values_length: int = 0,
-):
-    """
-    Make causal mask used for bi-directional self-attention.
-    """
-    batch_size, target_length = input_ids_shape
-    mask = torch.full((target_length, target_length), torch.finfo(dtype).min)
-    mask_cond = torch.arange(mask.size(-1))
-    intermediate_mask = mask_cond < (mask_cond + 1).view(mask.size(-1), 1)
-    mask.masked_fill_(intermediate_mask, 0)
-    mask = mask.to(dtype)
-
-    if past_key_values_length > 0:
-        mask = torch.cat(
-            [
-                torch.zeros(
-                    target_length, past_key_values_length, dtype=dtype
-                ),
-                mask,
-            ],
-            dim=-1,
-        )
-    expanded_mask = mask[None, None, :, :].expand(
-        batch_size, 1, target_length, target_length + past_key_values_length
-    )
-    return expanded_mask
-
-
-def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: int = None):
-    """
-    Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
-    """
-    batch_size, source_length = mask.size()
-    tgt_len = tgt_len if tgt_len is not None else source_length
-
-    expanded_mask = (
-        mask[:, None, None, :]
-        .expand(batch_size, 1, tgt_len, source_length)
-        .to(dtype)
-    )
-
-    inverted_mask = 1.0 - expanded_mask
-
-    return inverted_mask.masked_fill(
-        inverted_mask.to(torch.bool), torch.finfo(dtype).min
-    )
-
-
-def _prepare_attn_mask(
-    attention_mask, input_shape, inputs_embeds, past_key_values_length
-):
-    # create causal mask
-    # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-    combined_attention_mask = None
-    if input_shape[-1] > 1:
-        combined_attention_mask = _make_causal_mask(
-            input_shape,
-            inputs_embeds.dtype,
-            past_key_values_length=past_key_values_length,
-        ).to(attention_mask.device)
-
-    if attention_mask is not None:
-        # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-        expanded_attn_mask = _expand_mask(
-            attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]
-        )
-        combined_attention_mask = (
-            expanded_attn_mask
-            if combined_attention_mask is None
-            else expanded_attn_mask + combined_attention_mask
-        )
-
-    return combined_attention_mask
-
-
-def download_model(destination_folder, model_name):
-    download_public_file(
-        f"gs://shark_tank/sharded_bloom/{model_name}/", destination_folder
-    )
-
-
-def compile_embeddings(embeddings_layer, input_ids, path):
-    input_ids_placeholder = torch_mlir.TensorPlaceholder.like(
-        input_ids, dynamic_axes=[1]
-    )
-    module = torch_mlir.compile(
-        embeddings_layer,
-        (input_ids_placeholder),
-        torch_mlir.OutputType.LINALG_ON_TENSORS,
-        use_tracing=False,
-        verbose=False,
-    )
-
-    bytecode_stream = io.BytesIO()
-    module.operation.write_bytecode(bytecode_stream)
-    bytecode = bytecode_stream.getvalue()
-
-    f_ = open(path, "w+")
-    f_.write(str(module))
-    f_.close()
-    return
-
-
-def compile_word_embeddings_layernorm(
-    embeddings_layer_layernorm, embeds, path
-):
-    embeds_placeholder = torch_mlir.TensorPlaceholder.like(
-        embeds, dynamic_axes=[1]
-    )
-    module = torch_mlir.compile(
-        embeddings_layer_layernorm,
-        (embeds_placeholder),
-        torch_mlir.OutputType.LINALG_ON_TENSORS,
-        use_tracing=False,
-        verbose=False,
-    )
-
-    bytecode_stream = io.BytesIO()
-    module.operation.write_bytecode(bytecode_stream)
-    bytecode = bytecode_stream.getvalue()
-
-    f_ = open(path, "w+")
-    f_.write(str(module))
-    f_.close()
-    return
-
-
-def strip_overloads(gm):
-    """
-    Modifies the target of graph nodes in :attr:`gm` to strip overloads.
-    Args:
-        gm(fx.GraphModule): The input Fx graph module to be modified
-    """
-    for node in gm.graph.nodes:
-        if isinstance(node.target, torch._ops.OpOverload):
-            node.target = node.target.overloadpacket
-    gm.recompile()
-
-
-def compile_to_mlir(
-    bblock,
-    hidden_states,
-    layer_past=None,
-    attention_mask=None,
-    head_mask=None,
-    use_cache=None,
-    output_attentions=False,
-    alibi=None,
-    block_index=0,
-    path=".",
-):
-    fx_g = make_fx(
-        bblock,
-        decomposition_table=get_decompositions(
-            [
-                torch.ops.aten.split.Tensor,
-                torch.ops.aten.split_with_sizes,
-            ]
-        ),
-        tracing_mode="real",
-        _allow_non_fake_inputs=False,
-    )(hidden_states, alibi, attention_mask)
-
-    fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
-    fx_g.recompile()
-
-    strip_overloads(fx_g)
-
-    hidden_states_placeholder = TensorPlaceholder.like(
-        hidden_states, dynamic_axes=[1]
-    )
-    attention_mask_placeholder = TensorPlaceholder.like(
-        attention_mask, dynamic_axes=[2, 3]
-    )
-    alibi_placeholder = TensorPlaceholder.like(alibi, dynamic_axes=[2])
-
-    ts_g = torch.jit.script(fx_g)
-
-    module = torch_mlir.compile(
-        ts_g,
-        (
-            hidden_states_placeholder,
-            alibi_placeholder,
-            attention_mask_placeholder,
-        ),
-        torch_mlir.OutputType.LINALG_ON_TENSORS,
-        use_tracing=False,
-        verbose=False,
-    )
-
-    module_placeholder = module
-    module_context = module_placeholder.context
-
-    def check_valid_line(line, line_n, mlir_file_len):
-        if "private" in line:
-            return False
-        if "attributes" in line:
-            return False
-        if mlir_file_len - line_n == 2:
-            return False
-
-        return True
-
-    mlir_file_len = len(str(module).split("\n"))
-
-    def remove_constant_dim(line):
-        if "17x" in line:
-            line = re.sub("17x", "?x", line)
-            line = re.sub("tensor.empty\(\)", "tensor.empty(%dim)", line)
-        if "tensor.empty" in line and "?x?" in line:
-            line = re.sub(
-                "tensor.empty\(%dim\)", "tensor.empty(%dim, %dim)", line
-            )
-        if "arith.cmpi eq" in line:
-            line = re.sub("c17", "dim", line)
-        if " 17," in line:
-            line = re.sub(" 17,", " %dim,", line)
-        return line
-
-    module = "\n".join(
-        [
-            remove_constant_dim(line)
-            for line, line_n in zip(
-                str(module).split("\n"), range(mlir_file_len)
-            )
-            if check_valid_line(line, line_n, mlir_file_len)
-        ]
-    )
-
-    module = module_placeholder.parse(module, context=module_context)
-    bytecode_stream = io.BytesIO()
-    module.operation.write_bytecode(bytecode_stream)
-    bytecode = bytecode_stream.getvalue()
-
-    f_ = open(path, "w+")
-    f_.write(str(module))
-    f_.close()
-    return
-
-
-def compile_ln_f(ln_f, hidden_layers, path):
-    hidden_layers_placeholder = torch_mlir.TensorPlaceholder.like(
-        hidden_layers, dynamic_axes=[1]
-    )
-    module = torch_mlir.compile(
-        ln_f,
-        (hidden_layers_placeholder),
-        torch_mlir.OutputType.LINALG_ON_TENSORS,
-        use_tracing=False,
-        verbose=False,
-    )
-
-    bytecode_stream = io.BytesIO()
-    module.operation.write_bytecode(bytecode_stream)
-    bytecode = bytecode_stream.getvalue()
-
-    f_ = open(path, "w+")
-    f_.write(str(module))
-    f_.close()
-    return
-
-
-def compile_lm_head(lm_head, hidden_layers, path):
-    hidden_layers_placeholder = torch_mlir.TensorPlaceholder.like(
-        hidden_layers, dynamic_axes=[1]
-    )
-    module = torch_mlir.compile(
-        lm_head,
-        (hidden_layers_placeholder),
-        torch_mlir.OutputType.LINALG_ON_TENSORS,
-        use_tracing=False,
-        verbose=False,
-    )
-
-    bytecode_stream = io.BytesIO()
-    module.operation.write_bytecode(bytecode_stream)
-    bytecode = bytecode_stream.getvalue()
-
-    f_ = open(path, "w+")
-    f_.write(str(module))
-    f_.close()
-    return
-
-
-def create_mlirs(destination_folder, model_name):
-    model_config = "bigscience/" + model_name
-    sample_input_ids = torch.ones([1, 17], dtype=torch.int64)
-
-    urllib.request.urlretrieve(
-        f"https://huggingface.co/bigscience/{model_name}/resolve/main/config.json",
-        filename=f"{destination_folder}/config.json",
-    )
-    urllib.request.urlretrieve(
-        f"https://huggingface.co/bigscience/bloom/resolve/main/tokenizer.json",
-        filename=f"{destination_folder}/tokenizer.json",
-    )
-
-    class HuggingFaceLanguage(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.model = BloomForCausalLM.from_pretrained(model_config)
-
-        def forward(self, tokens):
-            return self.model.forward(tokens)[0]
-
-    class HuggingFaceBlock(torch.nn.Module):
-        def __init__(self, block):
-            super().__init__()
-            self.model = block
-
-        def forward(self, tokens, alibi, attention_mask):
-            output = self.model(
-                hidden_states=tokens,
-                alibi=alibi,
-                attention_mask=attention_mask,
-                use_cache=True,
-                output_attentions=False,
-            )
-            return (output[0], output[1][0], output[1][1])
-
-    model = HuggingFaceLanguage()
-
-    compile_embeddings(
-        model.model.transformer.word_embeddings,
-        sample_input_ids,
-        f"{destination_folder}/word_embeddings.mlir",
-    )
-
-    inputs_embeds = model.model.transformer.word_embeddings(sample_input_ids)
-
-    compile_word_embeddings_layernorm(
-        model.model.transformer.word_embeddings_layernorm,
-        inputs_embeds,
-        f"{destination_folder}/word_embeddings_layernorm.mlir",
-    )
-
-    hidden_states = model.model.transformer.word_embeddings_layernorm(
-        inputs_embeds
-    )
-
-    input_shape = sample_input_ids.size()
-
-    current_sequence_length = hidden_states.shape[1]
-    past_key_values_length = 0
-    past_key_values = tuple([None] * len(model.model.transformer.h))
-
-    attention_mask = torch.ones(
-        (hidden_states.shape[0], current_sequence_length), device="cpu"
-    )
-
-    alibi = build_alibi_tensor(
-        attention_mask,
-        model.model.transformer.n_head,
-        hidden_states.dtype,
-        "cpu",
-    )
-
-    causal_mask = _prepare_attn_mask(
-        attention_mask, input_shape, inputs_embeds, past_key_values_length
-    )
-
-    head_mask = model.model.transformer.get_head_mask(
-        None, model.model.transformer.config.n_layer
-    )
-    output_attentions = model.model.transformer.config.output_attentions
-
-    all_hidden_states = ()
-
-    for i, (block, layer_past) in enumerate(
-        zip(model.model.transformer.h, past_key_values)
-    ):
-        all_hidden_states = all_hidden_states + (hidden_states,)
-
-        proxy_model = HuggingFaceBlock(block)
-
-        compile_to_mlir(
-            proxy_model,
-            hidden_states,
-            layer_past=layer_past,
-            attention_mask=causal_mask,
-            head_mask=head_mask[i],
-            use_cache=True,
-            output_attentions=output_attentions,
-            alibi=alibi,
-            block_index=i,
-            path=f"{destination_folder}/bloom_block_{i}.mlir",
-        )
-
-    compile_ln_f(
-        model.model.transformer.ln_f,
-        hidden_states,
-        f"{destination_folder}/ln_f.mlir",
-    )
-    hidden_states = model.model.transformer.ln_f(hidden_states)
-    compile_lm_head(
-        model.model.lm_head,
-        hidden_states,
-        f"{destination_folder}/lm_head.mlir",
-    )
-
-
-def run_large_model(
-    token_count,
-    recompile,
-    model_path,
-    prompt,
-    device_list,
-    script_path,
-    device,
-):
-    f = open(f"{model_path}/prompt.txt", "w+")
-    f.write(prompt)
-    f.close()
-    for i in range(token_count):
-        if i == 0:
-            will_compile = recompile
-        else:
-            will_compile = False
-            f = open(f"{model_path}/prompt.txt", "r")
-            prompt = f.read()
-            f.close()
-
-        subprocess.run(
-            [
-                "python",
-                script_path,
-                model_path,
-                "start",
-                str(will_compile),
-                "cpu",
-                "None",
-                prompt,
-            ]
-        )
-        for i in range(config["n_layer"]):
-            if device_list is not None:
-                device_idx = str(device_list[i % len(device_list)])
-            else:
-                device_idx = "None"
-            subprocess.run(
-                [
-                    "python",
-                    script_path,
-                    model_path,
-                    str(i),
-                    str(will_compile),
-                    device,
-                    device_idx,
-                    prompt,
-                ]
-            )
-        subprocess.run(
-            [
-                "python",
-                script_path,
-                model_path,
-                "end",
-                str(will_compile),
-                "cpu",
-                "None",
-                prompt,
-            ]
-        )
-
-    f = open(f"{model_path}/prompt.txt", "r")
-    output = f.read()
-    f.close()
-    print(output)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(prog="Bloom-560m")
-    parser.add_argument("-p", "--model_path")
-    parser.add_argument("-dl", "--device_list", default=None)
-    parser.add_argument("-de", "--device", default="cpu")
-    parser.add_argument("-c", "--recompile", default=False, type=bool)
-    parser.add_argument("-d", "--download", default=False, type=bool)
-    parser.add_argument("-t", "--token_count", default=10, type=int)
-    parser.add_argument("-m", "--model_name", default="bloom-560m")
-    parser.add_argument("-cm", "--create_mlirs", default=False, type=bool)
-
-    parser.add_argument(
-        "-lm", "--large_model_memory_efficient", default=False, type=bool
-    )
-
-    parser.add_argument(
-        "-pr",
-        "--prompt",
-        default=None,
-    )
-    args = parser.parse_args()
-
-    if args.create_mlirs and args.large_model_memory_efficient:
-        print(
-            "Warning: If you need to use memory efficient mode, you probably want to use 'download' instead"
-        )
-
-    if not os.path.isdir(args.model_path):
-        os.mkdir(args.model_path)
-
-    if args.device_list is not None:
-        args.device_list = json.loads(args.device_list)
-
-    if args.device == "cuda" and args.device_list is not None:
-        IS_CUDA = True
-        from cuda.cudart import cudaSetDevice
-    if args.download and args.create_mlirs:
-        print(
-            "WARNING: It is not advised to turn on both download and create_mlirs"
-        )
-    if args.download:
-        download_model(args.model_path, args.model_name)
-    if args.create_mlirs:
-        create_mlirs(args.model_path, args.model_name)
-    from transformers import AutoTokenizer, AutoModelForCausalLM, BloomConfig
-
-    tokenizer = AutoTokenizer.from_pretrained(args.model_path)
-    if args.prompt is not None:
-        input_ids = tokenizer.encode(args.prompt, return_tensors="pt")
-
-    if args.large_model_memory_efficient:
-        f = open(f"{args.model_path}/config.json")
-        config = json.load(f)
-        f.close()
-
-        self_path = os.path.dirname(os.path.abspath(__file__))
-        script_path = os.path.join(self_path, "sharded_bloom_large_models.py")
-
-        if args.prompt is not None:
-            run_large_model(
-                args.token_count,
-                args.recompile,
-                args.model_path,
-                args.prompt,
-                args.device_list,
-                script_path,
-                args.device,
-            )
-
-        else:
-            while True:
-                prompt = input("Enter Prompt: ")
-                try:
-                    token_count = int(
-                        input("Enter number of tokens you want to generate: ")
-                    )
-                except:
-                    print(
-                        "Invalid integer entered.  Using default value of 10"
-                    )
-                    token_count = 10
-
-                run_large_model(
-                    token_count,
-                    args.recompile,
-                    args.model_path,
-                    prompt,
-                    args.device_list,
-                    script_path,
-                    args.device,
-                )
-
-    else:
-        shardedbloom = ShardedBloom(args.model_path)
-        shardedbloom.init_layers(
-            device=args.device,
-            replace=args.recompile,
-            device_idx=args.device_list,
-        )
-        shardedbloom.load_layers()
-
-        if args.prompt is not None:
-            for _ in range(args.token_count):
-                next_token = shardedbloom.forward_pass(
-                    torch.tensor(input_ids), device=args.device
-                )
-                input_ids = torch.cat(
-                    [input_ids, next_token.unsqueeze(-1)], dim=-1
-                )
-
-            print(tokenizer.decode(input_ids.squeeze()))
-
-        else:
-            while True:
-                prompt = input("Enter Prompt: ")
-                try:
-                    token_count = int(
-                        input("Enter number of tokens you want to generate: ")
-                    )
-                except:
-                    print(
-                        "Invalid integer entered.  Using default value of 10"
-                    )
-                    token_count = 10
-
-                input_ids = tokenizer.encode(prompt, return_tensors="pt")
-
-                for _ in range(token_count):
-                    next_token = shardedbloom.forward_pass(
-                        torch.tensor(input_ids), device=args.device
-                    )
-                    input_ids = torch.cat(
-                        [input_ids, next_token.unsqueeze(-1)], dim=-1
-                    )
-
-                print(tokenizer.decode(input_ids.squeeze()))
--- a/shark/examples/shark_inference/sharded_bloom_large_models.py
+++ b/shark/examples/shark_inference/sharded_bloom_large_models.py
@@ -1,381 +0,0 @@
-import sys
-import os
-from transformers import AutoTokenizer, AutoModelForCausalLM, BloomConfig
-import re
-from shark.shark_inference import SharkInference
-import torch
-import torch.nn as nn
-from collections import OrderedDict
-from transformers.models.bloom.modeling_bloom import (
-    BloomBlock,
-    build_alibi_tensor,
-)
-import time
-import json
-
-
-def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: int = None):
-    """
-    Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
-    """
-    batch_size, source_length = mask.size()
-    tgt_len = tgt_len if tgt_len is not None else source_length
-
-    expanded_mask = (
-        mask[:, None, None, :]
-        .expand(batch_size, 1, tgt_len, source_length)
-        .to(dtype)
-    )
-
-    inverted_mask = 1.0 - expanded_mask
-
-    return inverted_mask.masked_fill(
-        inverted_mask.to(torch.bool), torch.finfo(dtype).min
-    )
-
-
-def _prepare_attn_mask(
-    attention_mask, input_shape, inputs_embeds, past_key_values_length
-):
-    # create causal mask
-    # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-    combined_attention_mask = None
-    if input_shape[-1] > 1:
-        combined_attention_mask = _make_causal_mask(
-            input_shape,
-            inputs_embeds.dtype,
-            past_key_values_length=past_key_values_length,
-        ).to(attention_mask.device)
-
-    if attention_mask is not None:
-        # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-        expanded_attn_mask = _expand_mask(
-            attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]
-        )
-        combined_attention_mask = (
-            expanded_attn_mask
-            if combined_attention_mask is None
-            else expanded_attn_mask + combined_attention_mask
-        )
-
-    return combined_attention_mask
-
-
-def _make_causal_mask(
-    input_ids_shape: torch.Size,
-    dtype: torch.dtype,
-    past_key_values_length: int = 0,
-):
-    """
-    Make causal mask used for bi-directional self-attention.
-    """
-    batch_size, target_length = input_ids_shape
-    mask = torch.full((target_length, target_length), torch.finfo(dtype).min)
-    mask_cond = torch.arange(mask.size(-1))
-    intermediate_mask = mask_cond < (mask_cond + 1).view(mask.size(-1), 1)
-    mask.masked_fill_(intermediate_mask, 0)
-    mask = mask.to(dtype)
-
-    if past_key_values_length > 0:
-        mask = torch.cat(
-            [
-                torch.zeros(
-                    target_length, past_key_values_length, dtype=dtype
-                ),
-                mask,
-            ],
-            dim=-1,
-        )
-    expanded_mask = mask[None, None, :, :].expand(
-        batch_size, 1, target_length, target_length + past_key_values_length
-    )
-    return expanded_mask
-
-
-if __name__ == "__main__":
-    working_dir = sys.argv[1]
-    layer_name = sys.argv[2]
-    will_compile = sys.argv[3]
-    device = sys.argv[4]
-    device_idx = sys.argv[5]
-    prompt = sys.argv[6]
-
-    if device_idx.lower().strip() == "none":
-        device_idx = None
-    else:
-        device_idx = int(device_idx)
-
-    if will_compile.lower().strip() == "true":
-        will_compile = True
-    else:
-        will_compile = False
-
-    f = open(f"{working_dir}/config.json")
-    config = json.load(f)
-    f.close()
-
-    layers_initialized = False
-    try:
-        n_embed = config["n_embed"]
-    except KeyError:
-        n_embed = config["hidden_size"]
-    vocab_size = config["vocab_size"]
-    n_layer = config["n_layer"]
-    try:
-        n_head = config["num_attention_heads"]
-    except KeyError:
-        n_head = config["n_head"]
-
-    if not os.path.isdir(working_dir):
-        os.mkdir(working_dir)
-
-    if layer_name == "start":
-        tokenizer = AutoTokenizer.from_pretrained(working_dir)
-        input_ids = tokenizer.encode(prompt, return_tensors="pt")
-
-        mlir_str = ""
-
-        if will_compile:
-            f = open(f"{working_dir}/word_embeddings.mlir", encoding="utf-8")
-            mlir_str = f.read()
-            f.close()
-
-            mlir_str = bytes(mlir_str, "utf-8")
-
-        shark_module = SharkInference(
-            mlir_str,
-            device="cpu",
-            mlir_dialect="tm_tensor",
-            device_idx=None,
-        )
-
-        if will_compile:
-            shark_module.save_module(
-                module_name=f"{working_dir}/word_embeddings",
-                extra_args=[
-                    "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
-                    "--iree-stream-resource-max-allocation-size=1000000000",
-                    "--iree-codegen-check-ir-before-llvm-conversion=false",
-                ],
-            )
-
-        shark_module.load_module(f"{working_dir}/word_embeddings.vmfb")
-        input_embeds = shark_module(
-            inputs=(input_ids,), function_name="forward"
-        )
-        input_embeds = torch.tensor(input_embeds).float()
-
-        mlir_str = ""
-
-        if will_compile:
-            f = open(
-                f"{working_dir}/word_embeddings_layernorm.mlir",
-                encoding="utf-8",
-            )
-            mlir_str = f.read()
-            f.close()
-
-        shark_module = SharkInference(
-            mlir_str,
-            device="cpu",
-            mlir_dialect="tm_tensor",
-            device_idx=None,
-        )
-
-        if will_compile:
-            shark_module.save_module(
-                module_name=f"{working_dir}/word_embeddings_layernorm",
-                extra_args=[
-                    "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
-                    "--iree-stream-resource-max-allocation-size=1000000000",
-                    "--iree-codegen-check-ir-before-llvm-conversion=false",
-                ],
-            )
-
-        shark_module.load_module(
-            f"{working_dir}/word_embeddings_layernorm.vmfb"
-        )
-        hidden_states = shark_module(
-            inputs=(input_embeds,), function_name="forward"
-        )
-        hidden_states = torch.tensor(hidden_states).float()
-
-        torch.save(hidden_states, f"{working_dir}/hidden_states_0.pt")
-
-        attention_mask = torch.ones(
-            [hidden_states.shape[0], len(input_ids[0])]
-        )
-
-        attention_mask = torch.tensor(attention_mask).float()
-
-        alibi = build_alibi_tensor(
-            attention_mask,
-            n_head,
-            hidden_states.dtype,
-            device="cpu",
-        )
-
-        torch.save(alibi, f"{working_dir}/alibi.pt")
-
-        causal_mask = _prepare_attn_mask(
-            attention_mask, input_ids.size(), input_embeds, 0
-        )
-        causal_mask = torch.tensor(causal_mask).float()
-
-        torch.save(causal_mask, f"{working_dir}/causal_mask.pt")
-
-    elif layer_name in [str(x) for x in range(n_layer)]:
-        hidden_states = torch.load(
-            f"{working_dir}/hidden_states_{layer_name}.pt"
-        )
-        alibi = torch.load(f"{working_dir}/alibi.pt")
-        causal_mask = torch.load(f"{working_dir}/causal_mask.pt")
-
-        mlir_str = ""
-
-        if will_compile:
-            f = open(
-                f"{working_dir}/bloom_block_{layer_name}.mlir",
-                encoding="utf-8",
-            )
-            mlir_str = f.read()
-            f.close()
-
-            mlir_str = bytes(mlir_str, "utf-8")
-
-        shark_module = SharkInference(
-            mlir_str,
-            device=device,
-            mlir_dialect="tm_tensor",
-            device_idx=device_idx,
-        )
-
-        if will_compile:
-            shark_module.save_module(
-                module_name=f"{working_dir}/bloom_block_{layer_name}",
-                extra_args=[
-                    "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
-                    "--iree-stream-resource-max-allocation-size=1000000000",
-                    "--iree-codegen-check-ir-before-llvm-conversion=false",
-                ],
-            )
-
-        shark_module.load_module(
-            f"{working_dir}/bloom_block_{layer_name}.vmfb"
-        )
-
-        output = shark_module(
-            inputs=(
-                hidden_states.detach().numpy(),
-                alibi.detach().numpy(),
-                causal_mask.detach().numpy(),
-            ),
-            function_name="forward",
-        )
-
-        hidden_states = torch.tensor(output[0]).float()
-
-        torch.save(
-            hidden_states,
-            f"{working_dir}/hidden_states_{int(layer_name) + 1}.pt",
-        )
-
-    elif layer_name == "end":
-        mlir_str = ""
-
-        if will_compile:
-            f = open(f"{working_dir}/ln_f.mlir", encoding="utf-8")
-            mlir_str = f.read()
-            f.close()
-
-            mlir_str = bytes(mlir_str, "utf-8")
-
-        shark_module = SharkInference(
-            mlir_str,
-            device="cpu",
-            mlir_dialect="tm_tensor",
-            device_idx=None,
-        )
-
-        if will_compile:
-            shark_module.save_module(
-                module_name=f"{working_dir}/ln_f",
-                extra_args=[
-                    "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
-                    "--iree-stream-resource-max-allocation-size=1000000000",
-                    "--iree-codegen-check-ir-before-llvm-conversion=false",
-                ],
-            )
-
-        shark_module.load_module(f"{working_dir}/ln_f.vmfb")
-
-        hidden_states = torch.load(f"{working_dir}/hidden_states_{n_layer}.pt")
-
-        hidden_states = shark_module(
-            inputs=(hidden_states,), function_name="forward"
-        )
-
-        mlir_str = ""
-
-        if will_compile:
-            f = open(f"{working_dir}/lm_head.mlir", encoding="utf-8")
-            mlir_str = f.read()
-            f.close()
-
-            mlir_str = bytes(mlir_str, "utf-8")
-
-        if config["n_embed"] == 14336:
-
-            def get_state_dict():
-                d = torch.load(
-                    f"{working_dir}/pytorch_model_00001-of-00072.bin"
-                )
-                return OrderedDict(
-                    (k.replace("word_embeddings.", ""), v)
-                    for k, v in d.items()
-                )
-
-            def load_causal_lm_head():
-                linear = nn.utils.skip_init(
-                    nn.Linear, 14336, 250880, bias=False, dtype=torch.float
-                )
-                linear.load_state_dict(get_state_dict(), strict=False)
-                return linear.float()
-
-            lm_head = load_causal_lm_head()
-
-            logits = lm_head(torch.tensor(hidden_states).float())
-
-        else:
-            shark_module = SharkInference(
-                mlir_str,
-                device="cpu",
-                mlir_dialect="tm_tensor",
-                device_idx=None,
-            )
-
-            if will_compile:
-                shark_module.save_module(
-                    module_name=f"{working_dir}/lm_head",
-                    extra_args=[
-                        "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
-                        "--iree-stream-resource-max-allocation-size=1000000000",
-                        "--iree-codegen-check-ir-before-llvm-conversion=false",
-                    ],
-                )
-
-            shark_module.load_module(f"{working_dir}/lm_head.vmfb")
-
-            logits = shark_module(
-                inputs=(hidden_states,), function_name="forward"
-            )
-
-        logits = torch.tensor(logits).float()
-
-        tokenizer = AutoTokenizer.from_pretrained(working_dir)
-
-        next_token = tokenizer.decode(torch.argmax(logits[:, -1, :], dim=-1))
-
-        f = open(f"{working_dir}/prompt.txt", "w+")
-        f.write(prompt + next_token)
-        f.close()
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Gaurav Shukla	4b1a0b43ff	[WEB] Remove long prompts support It removes support to long prompts due to higher lag in loading long prompts. Signed-Off-by: Gaurav Shukla <gaurav@nod-labs>	2022-11-03 18:57:58 +05:30
Gaurav Shukla	099f2160c3	[WEB] fix background color Signed-Off-by: Gaurav Shukla	2022-11-03 17:36:24 +05:30
Gaurav Shukla	9d2d62dedf	[WEB] Add support for long prompts (#467 )	2022-11-03 03:27:36 -07:00
Gaurav Shukla	15ed05b221	[WEB] Update the title (#466 )	2022-11-02 14:30:03 -07:00
Gaurav Shukla	7c825fc288	[WEB] CSS changes to the web-ui (#465 ) This commit updates UI with styling. Signed-Off-by: Gaurav Shukla <gaurav@nod-labs.com> Signed-off-by: Gaurav Shukla <gaurav@nod-labs.com>	2022-11-02 12:36:11 -07:00