Update releases.

2026-01-11 14:58:11 -05:00 · 2023-01-31 15:15:57 +00:00 · 2022-12-07 06:07:38 +00:00 · 2022-11-01 18:31:37 +00:00 · 2022-10-13 18:12:28 +00:00 · 2022-10-12 16:53:10 +00:00
310 changed files with 115 additions and 45634 deletions
--- a/.github/workflows/gh-pages-releases.yml
+++ b/.github/workflows/gh-pages-releases.yml
@@ -23,7 +23,7 @@ jobs:
      - run: git fetch --all
      - run: git switch github-pages
      - run: git config --global user.email "none@none.com"
-      - run: git config --global user.name "nod-ai"
+      - run: git config --global user.name "nod-team"
      - run: mv /tmp/index.html package-index/index.html
      - run: git add package-index/index.html

--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -9,92 +9,13 @@ on:
  workflow_dispatch:

 jobs:
-  windows-build:
-    runs-on: 7950X
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.11"]
-
-    steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
-      with:
-        python-version: ${{ matrix.python-version }}
-
-    - name: Compute version
-      shell: powershell
-      run: |
-        $package_version = $(Get-Date -UFormat "%Y%m%d")+"."+${{ github.run_number }}
-        $package_version_ = $(Get-Date -UFormat "%Y%m%d")+"_"+${{ github.run_number }}
-        $tag_name=$package_version
-        echo "package_version=$package_version" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append
-        echo "package_version_=$package_version_" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append
-        echo "tag_name=$tag_name" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append
-
-    - name: Create Release
-      id: create_release
-      uses: actions/create-release@v1
-      env:
-        GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
-      with:
-        tag_name: ${{ env.tag_name }}
-        release_name: nod.ai SHARK ${{ env.tag_name }}
-        body: |
-          Automatic snapshot release of nod.ai SHARK.
-        draft: true
-        prerelease: false
-
-    - name: Build Package 
-      shell: powershell
-      run: |
-        ./setup_venv.ps1
-        python process_skipfiles.py
-        pyinstaller .\apps\stable_diffusion\shark_sd.spec
-        mv ./dist/shark_sd.exe ./dist/shark_sd_${{ env.package_version_ }}.exe
-        signtool sign /f c:\g\shark_02152023.cer /csp "eToken Base Cryptographic Provider" /k "${{ secrets.CI_CERT }}" ./dist/shark_sd_${{ env.package_version_ }}.exe
-        pyinstaller .\apps\stable_diffusion\shark_sd_cli.spec
-        python process_skipfiles.py
-        mv ./dist/shark_sd_cli.exe ./dist/shark_sd_cli_${{ env.package_version_ }}.exe
-        signtool sign /f c:\g\shark_02152023.cer /csp "eToken Base Cryptographic Provider" /k "${{ secrets.CI_CERT }}" ./dist/shark_sd_cli_${{ env.package_version_ }}.exe
-
-        
-    # GHA windows VM OOMs so disable for now
-    #- name: Build and validate the SHARK Runtime package
-    #  shell: powershell
-    #  run: |
-    #    $env:SHARK_PACKAGE_VERSION=${{ env.package_version }}
-    #    pip wheel -v -w dist . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html
-
-    - uses: actions/upload-artifact@v2
-      with:
-        path: dist/*
-    
-    - name: Upload Release Assets
-      id: upload-release-assets
-      uses: dwenegar/upload-release-assets@v1
-      env:
-        GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
-      with:
-        release_id: ${{ steps.create_release.outputs.id }}
-        assets_path: ./dist/*
-
-    - name: Publish Release
-      id: publish_release
-      uses: eregon/publish-release@v1
-      env:
-        GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
-      with:
-        release_id: ${{ steps.create_release.outputs.id }}
-
-  linux-build:
+  build:

    runs-on: a100
    strategy:
      fail-fast: false
      matrix:
-        python-version: ["3.11"]
+        python-version: ["3.10"]
        backend: [IREE, SHARK]

    steps:
@@ -111,13 +32,40 @@ jobs:
        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
        restore-keys: |
          ${{ runner.os }}-pip-
-
+    
+    - name: Compute version
+      run: |
+        package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
+        tag_name="${package_version}"
+        echo "package_version=${package_version}" >> $GITHUB_ENV
+        echo "tag_name=${tag_name}" >> $GITHUB_ENV    
+    - name: Set Environment Variables
+      run: |
+        echo "SHORT_SHA=`git rev-parse --short=4 HEAD`" >> $GITHUB_ENV
+        echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
+    - name: Create Release
+      id: create_release
+      uses: actions/create-release@v1
+      env:
+        GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
+      with:
+        tag_name: ${{ env.tag_name }}
+        release_name: nod.ai SHARK ${{ env.tag_name }}
+        body: |
+          Automatic snapshot release of nod.ai SHARK.
+        draft: true
+        prerelease: false
+    - name: Find Torch-MLIR Release
+      run: |
+        TM_HTML_URL="$(python3 -c "import urllib.request, json, sys; u=json.loads(urllib.request.urlopen('https://api.github.com/repos/llvm/torch-mlir/releases/latest').read().decode()).get('html_url', False); print(u) if u else sys.exit(1);")"
+        TM_RELEASE_DIR=${TM_HTML_URL/"tag"/"expanded_assets"}
+        echo "TM_RELEASE_DIR=${TM_RELEASE_DIR}" >> $GITHUB_ENV
    - name: Install dependencies
      run: |
-        echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
+        echo "Torch-MLIR Release DIR is ${{ env.TM_RELEASE_DIR }}"
        python -m pip install --upgrade pip
        python -m pip install flake8 pytest toml
-        if [ -f requirements.txt ]; then pip install -r requirements.txt -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html; fi
+        if [ -f requirements.txt ]; then pip install -r requirements.txt -f ${{ env.TM_RELEASE_DIR }} -f https://github.com/nod-ai/SHARK-Runtime/releases; fi
    - name: Lint with flake8
      run: |
        # stop the build if there are Python syntax errors or undefined names
@@ -126,26 +74,25 @@ jobs:
        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude shark.venv,lit.cfg.py 
    - name: Build and validate the IREE package
      if: ${{ matrix.backend == 'IREE' }}
-      continue-on-error: true
      run: |
        cd $GITHUB_WORKSPACE
        USE_IREE=1 VENV_DIR=iree.venv ./setup_venv.sh
        source iree.venv/bin/activate
        package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
        SHARK_PACKAGE_VERSION=${package_version} \
-        pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://llvm.github.io/torch-mlir/package-index/ -f https://iree-org.github.io/iree/pip-release-links.html
+        pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f ${{ env.TM_RELEASE_DIR }} -f https://github.com/iree-org/iree/releases
        # Install the built wheel
        pip install ./wheelhouse/nodai*
        # Validate the Models
        /bin/bash "$GITHUB_WORKSPACE/build_tools/populate_sharktank_ci.sh"
-        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="./gen_shark_tank/" -k "not metal" |
+        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="./gen_shark_tank/" tank/test_models.py |
          tail -n 1 |
          tee -a pytest_results.txt
        if !(grep -Fxq " failed" pytest_results.txt) 
          then 
            export SHA=$(git log -1 --format='%h')
-            gsutil -m cp -r $GITHUB_WORKSPACE/gen_shark_tank/* gs://shark_tank/${DATE}_$SHA
-            gsutil -m cp -r gs://shark_tank/${DATE}_$SHA/* gs://shark_tank/nightly/
+            gsutil -m cp -r $GITHUB_WORKSPACE/gen_shark_tank/* gs://shark_tank/$SHA
+            gsutil -m cp -r gs://shark_tank/$SHA/* gs://shark_tank/latest/
        fi
        rm -rf ./wheelhouse/nodai*

@@ -157,10 +104,29 @@ jobs:
        source shark.venv/bin/activate
        package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
        SHARK_PACKAGE_VERSION=${package_version} \
-        pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html
+        pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f ${{ env.TM_RELEASE_DIR }} -f https://github.com/nod-ai/SHARK-Runtime/releases
        # Install the built wheel
        pip install ./wheelhouse/nodai*
        # Validate the Models
-        pytest --ci --ci_sha=${SHORT_SHA} -k "not metal" |
+        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="./gen_shark_tank/" tank/test_models.py |
          tail -n 1 |
          tee -a pytest_results.txt
+    
+    - name: Upload Release Assets
+      if: ${{ matrix.backend == 'SHARK' }}
+      id: upload-release-assets
+      uses: dwenegar/upload-release-assets@v1
+      env:
+        GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
+      with:
+        release_id: ${{ steps.create_release.outputs.id }}
+        assets_path: ${GITHUB_WORKSPACE}/wheelhouse/nodai_*.whl
+
+    - name: Publish Release
+      if: ${{ matrix.backend == 'SHARK' }}
+      id: publish_release
+      uses: eregon/publish-release@v1
+      env:
+        GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
+      with:
+        release_id: ${{ steps.create_release.outputs.id }}
--- a/.github/workflows/test-models.yml
+++ b/.github/workflows/test-models.yml
@@ -6,32 +6,18 @@ name: Validate Models on Shark Runtime
 on:
  push:
    branches: [ main ]
-    paths-ignore:
-      - '**.md'
-      - 'shark/examples/**'
  pull_request:
    branches: [ main ]
-    paths-ignore:
-      - '**.md'
-      - 'shark/examples/**'
  workflow_dispatch:

-# Ensure that only a single job or workflow using the same
-# concurrency group will run at a time. This would cancel
-# any in-progress jobs in the same github workflow and github
-# ref (e.g. refs/heads/main or refs/pull/<pr_number>/merge).
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
 jobs:
  build-validate:
    strategy:
      fail-fast: true
      matrix:
-        os: [7950x, icelake, a100, MacStudio, ubuntu-latest]
+        os: [icelake, a100, MacStudio, ubuntu-latest]
        suite: [cpu,cuda,vulkan]
-        python-version: ["3.11"]
+        python-version: ["3.10"]
        include:
          - os: ubuntu-latest
            suite: lint
@@ -46,25 +32,21 @@ jobs:
            suite: cuda
          - os: MacStudio
            suite: cpu
+          - os: MacStudio
+            suite: vulkan
          - os: icelake
            suite: vulkan
          - os: icelake
            suite: cuda
          - os: a100
            suite: cpu
-          - os: 7950x
-            suite: cpu
-          - os: 7950x
-            suite: cuda

    runs-on: ${{ matrix.os }}

    steps:
    - uses: actions/checkout@v3
-      if: matrix.os != '7950x'
    
    - name: Set Environment Variables
-      if: matrix.os != '7950x'
      run: |
        echo "SHORT_SHA=`git rev-parse --short=4 HEAD`" >> $GITHUB_ENV
        echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
@@ -84,9 +66,6 @@ jobs:
        #cache-dependency-path: |
        #  **/requirements-importer.txt
        #  **/requirements.txt
-    
-    - uses: actions/checkout@v2
-      if: matrix.os == '7950x'
          
    - name: Install dependencies
      if: matrix.suite == 'lint'
@@ -109,9 +88,9 @@ jobs:
      if: matrix.suite == 'cpu'
      run: |
        cd $GITHUB_WORKSPACE
-        PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
+        PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest --forked --benchmark --ci --ci_sha=${SHORT_SHA} --update_tank -k cpu
+        pytest --benchmark --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k cpu
        gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv
        gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cpu_latest.csv

@@ -121,41 +100,14 @@ jobs:
        cd $GITHUB_WORKSPACE
        PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest --forked --benchmark --ci --ci_sha=${SHORT_SHA} --update_tank -k cuda
+        pytest --benchmark --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k cuda
        gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv
        gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cuda_latest.csv
-        # Disabled due to black image bug
-        # python build_tools/stable_diffusion_testing.py --device=cuda 

-    - name: Validate Vulkan Models (MacOS)
-      if: matrix.suite == 'vulkan' && matrix.os == 'MacStudio'
+    - name: Validate Vulkan Models
+      if: matrix.suite == 'vulkan'
      run: |
        cd $GITHUB_WORKSPACE
-        PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
+        PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        export DYLD_LIBRARY_PATH=/usr/local/lib/
-        echo $PATH
-        pip list | grep -E "torch|iree"
-        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/Volumes/builder/anush/shark_cache" -k vulkan --update_tank
-
-    - name: Validate Vulkan Models (a100)
-      if: matrix.suite == 'vulkan' && matrix.os == 'a100'
-      run: |
-        cd $GITHUB_WORKSPACE
-        PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
-        source shark.venv/bin/activate
-        pytest --forked --benchmark --ci --ci_sha=${SHORT_SHA} --update_tank -k vulkan
-        python build_tools/stable_diffusion_testing.py --device=vulkan
-
-    - name: Validate Vulkan Models (Windows)
-      if: matrix.suite == 'vulkan' && matrix.os == '7950x'
-      run: |
-        ./setup_venv.ps1
-        pytest --benchmark -k vulkan -s
-        type bench_results.csv
-
-    - name: Validate Stable Diffusion Models (Windows)
-      if: matrix.suite == 'vulkan' && matrix.os == '7950x'
-      run: |
-        ./setup_venv.ps1
-        python build_tools/stable_diffusion_testing.py --device=vulkan
+        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k vulkan
--- a/.gitignore
+++ b/.gitignore
@@ -31,6 +31,7 @@ MANIFEST
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
+*.spec

 # Installer logs
 pip-log.txt
@@ -159,26 +160,10 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/

-# vscode related
-.vscode
-
 # Shark related artefacts
 *venv/
 shark_tmp/
-*.vmfb
-.use-iree
-tank/dict_configs.py

 # ORT related artefacts
 cache_models/
 onnx_models/
-
-# Generated images
-generated_imgs/
-
-# Custom model related artefacts
-variants.json
-models/
-
-# models folder
-apps/stable_diffusion/web/models/
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,4 +0,0 @@
-[submodule "inference/thirdparty/shark-runtime"]
-	path = inference/thirdparty/shark-runtime
-	url =https://github.com/nod-ai/SHARK-Runtime.git
-	branch = shark-06032022
--- a/.style.yapf
+++ b/.style.yapf
@@ -1,3 +0,0 @@
-[style]
-  based_on_style = google
-  column_limit = 80
--- a/218
+++ b/218
@@ -1,218 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-    1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-    2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-    3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-    4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-    5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-    6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-    7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-    8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-    9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-    END OF TERMS AND CONDITIONS
-
-    APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-    Copyright [yyyy] [name of copyright owner]
-
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
-
-
---- LLVM Exceptions to the Apache 2.0 License ----
-
-As an exception, if, as a result of your compiling your source code, portions
-of this Software are embedded into an Object form of such source code, you
-may redistribute such embedded portions in such Object form without complying
-with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
-
-In addition, if you combine or link compiled forms of this Software with
-software that is licensed under the GPLv2 ("Combined Software") and if a
-court of competent jurisdiction determines that the patent provision (Section
-3), the indemnity provision (Section 9) or other Section of the License
-conflicts with the conditions of the GPLv2, you may retroactively and
-prospectively choose to deem waived or otherwise exclude such Section(s) of
-the License, but only in their entirety and only with respect to the Combined
-Software.
--- a/README.md
+++ b/README.md
@@ -1,376 +0,0 @@
-# SHARK
-
-High Performance Machine Learning Distribution
-
-[![Nightly Release](https://github.com/nod-ai/SHARK/actions/workflows/nightly.yml/badge.svg)](https://github.com/nod-ai/SHARK/actions/workflows/nightly.yml)
-[![Validate torch-models on Shark Runtime](https://github.com/nod-ai/SHARK/actions/workflows/test-models.yml/badge.svg)](https://github.com/nod-ai/SHARK/actions/workflows/test-models.yml)
-
-
-<details>
-  <summary>Prerequisites - Drivers </summary>
-  
-#### Install your Windows hardware drivers
-* [AMD RDNA Users] Download the latest driver [here](https://www.amd.com/en/support/kb/release-notes/rn-rad-win-23-2-1).
-* [macOS Users] Download and install the 1.3.216 Vulkan SDK from [here](https://sdk.lunarg.com/sdk/download/1.3.216.0/mac/vulkansdk-macos-1.3.216.0.dmg). Newer versions of the SDK will not work. 
-* [Nvidia Users] Download and install the latest CUDA / Vulkan drivers from [here](https://developer.nvidia.com/cuda-downloads)
-  
-#### Linux Drivers
-* MESA / RADV drivers wont work with FP16. Please use the latest AMGPU-PRO drivers (non-pro OSS drivers also wont work) or the latest NVidia Linux Drivers.
-
-Other users please ensure you have your latest vendor drivers and Vulkan SDK from [here](https://vulkan.lunarg.com/sdk/home) and if you are using vulkan check `vulkaninfo` works in a terminal window
-
-</details>
-
-
- 
-### Quick Start for SHARK Stable Diffusion for Windows 10/11 Users
-
-Install the Driver from [Prerequisites](https://github.com/nod-ai/SHARK#install-your-hardware-drivers) above 
-
-Download the stable release [539](https://github.com/nod-ai/SHARK/releases/download/20230216.539/shark_sd_20230216_539.exe) or if you are adventurous the latest .exe from [releases page](https://github.com/nod-ai/SHARK/releases).
-
-Double click the .exe and you should have the [UI](http://localhost:8080/) in the browser. 
-
-If you have custom models put them in a `models/` directory where the .exe is. 
-
-Enjoy. 
-
-<details>
-  <summary>More installation notes</summary>
-* We recommend that you download EXE in a new folder, whenever you download a new EXE version. If you download it in the same folder as a previous install, you must delete the old `*.vmfb` files with `rm *.vmfb`. You can also use `--clear_all` flag once to clean all the old files. 
-* If you recently updated the driver or this binary (EXE file), we recommend you clear all the local artifacts with `--clear_all` 
-
-## Running
-
-* Open a Command Prompt or Powershell terminal, change folder (`cd`) to the .exe folder. Then run the EXE from the command prompt. That way, if an error occurs, you'll be able to cut-and-paste it to ask for help. (if it always works for you without error, you may simply double-click the EXE)
-* The first run may take few minutes when the models are downloaded and compiled. Your patience is appreciated. The download could be about 5GB.
-* You will likely see a Windows Defender message asking you to give permission to open a web server port. Accept it.
-* Open a browser to access the Stable Diffusion web server. By default, the port is 8080, so you can go to http://localhost:8080/.
-
-## Stopping
-
-* Select the command prompt that's running the EXE. Press CTRL-C and wait a moment or close the terminal. 
-</details>
-
-<details>
-  <summary>Advanced Installation (Only for developers)</summary>
-  
-## Advanced Installation (Windows, Linux and macOS) for developers
-
-## Check out the code
-
-```shell
-git clone https://github.com/nod-ai/SHARK.git
-cd SHARK
-```
-
-## Setup your Python VirtualEnvironment and Dependencies
-
-### Windows 10/11 Users
-
-* Install the latest Python 3.11.x version from [here](https://www.python.org/downloads/windows/)
-
-* Install Git for Windows from [here](https://git-scm.com/download/win)
-
-#### Allow the install script to run in Powershell
-```powershell
-set-executionpolicy remotesigned
-```
-
-#### Setup venv and install necessary packages (torch-mlir, nodLabs/Shark, ...)
-```powershell
-./setup_venv.ps1 #You can re-run this script to get the latest version
-```
-
-### Linux / macOS Users
-
-```shell
-./setup_venv.sh
-source shark.venv/bin/activate
-```
-
-
-### Run Stable Diffusion on your device - WebUI
-
-#### Windows 10/11 Users
-```powershell
-(shark.venv) PS C:\g\shark> cd .\apps\stable_diffusion\web\
-(shark.venv) PS C:\g\shark\apps\stable_diffusion\web> python .\index.py
-```
-#### Linux / macOS Users
-```shell
-(shark.venv) > cd apps/stable_diffusion/web
-(shark.venv) > python index.py
-```
-
-#### Access Stable Diffusion on http://localhost:8080/?__theme=dark
-
-
-<img width="1607" alt="webui" src="https://user-images.githubusercontent.com/74956/204939260-b8308bc2-8dc4-47f6-9ac0-f60b66edab99.png">
-
-
-
-### Run Stable Diffusion on your device - Commandline
-
-#### Windows 10/11 Users
-```powershell
-(shark.venv) PS C:\g\shark> python .\apps\stable_diffusion\scripts\txt2img.py --precision="fp16" --prompt="tajmahal, snow, sunflowers, oil on canvas" --device="vulkan"
-```
-
-#### Linux / macOS Users
-```shell
-python3.11 apps/stable_diffusion/scripts/txt2img.py --precision=fp16 --device=vulkan --prompt="tajmahal, oil on canvas, sunflowers, 4k, uhd"
-```
-
-You can replace `vulkan` with `cpu` to run on your CPU or with `cuda` to run on CUDA devices. If you have multiple vulkan devices you can address them with `--device=vulkan://1` etc
-</details>
-
-The output on a AMD 7900XTX would look something like:
-
-```shell
-Average step time: 47.19188690185547ms/it
-Clip Inference time (ms) = 109.531
-VAE Inference time (ms): 78.590
-
-Total image generation time: 2.5788655281066895sec
-```
-
-Here are some samples generated:
-
-![tajmahal, snow, sunflowers, oil on canvas_0](https://user-images.githubusercontent.com/74956/204934186-141f7e43-6eb2-4e89-a99c-4704d20444b3.jpg)
-
-![a photo of a crab playing a trumpet](https://user-images.githubusercontent.com/74956/204933258-252e7240-8548-45f7-8253-97647d38313d.jpg)
-
-
-Find us on [SHARK Discord server](https://discord.gg/RUqY2h2s9u) if you have any trouble with running it on your hardware. 
-
-
-<details>
-  <summary>Binary Installation</summary>
-
-### Setup a new pip Virtual Environment
-
-This step sets up a new VirtualEnv for Python
-
-```shell
-python --version #Check you have 3.11 on Linux, macOS or Windows Powershell
-python -m venv shark_venv
-source shark_venv/bin/activate   # Use shark_venv/Scripts/activate on Windows
-
-# If you are using conda create and activate a new conda env
-
-# Some older pip installs may not be able to handle the recent PyTorch deps
-python -m pip install --upgrade pip
-```
-
-*macOS Metal* users please install https://sdk.lunarg.com/sdk/download/latest/mac/vulkan-sdk.dmg and enable "System wide install"
-
-### Install SHARK
-
-This step pip installs SHARK and related packages on Linux Python 3.8, 3.10 and 3.11 and macOS / Windows Python 3.11
-
-```shell
-pip install nodai-shark -f https://nod-ai.github.io/SHARK/package-index/ -f https://llvm.github.io/torch-mlir/package-index/ -f  https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-```
-
-### Run shark tank model tests.
-```shell
-pytest tank/test_models.py
-```
-See tank/README.md for a more detailed walkthrough of our pytest suite and CLI.
-
-### Download and run Resnet50 sample
-
-```shell
-curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/resnet50_script.py
-#Install deps for test script
-pip install --pre torch torchvision torchaudio tqdm pillow gsutil --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-python ./resnet50_script.py --device="cpu"  #use cuda or vulkan or metal
-```
-
-### Download and run BERT (MiniLM) sample
-```shell
-curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/minilm_jit.py
-#Install deps for test script
-pip install transformers torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-python ./minilm_jit.py --device="cpu"  #use cuda or vulkan or metal
-```
-</details>
-
-
-
-<details>
-  <summary>Development, Testing and Benchmarks</summary>
-
-If you want to use Python3.11 and with TF Import tools you can use the environment variables like:
-Set `USE_IREE=1` to use upstream IREE
-```
-# PYTHON=python3.11 VENV_DIR=0617_venv IMPORTER=1 ./setup_venv.sh 
-```
-
-### Run any of the hundreds of SHARK tank models via the test framework
-```shell
-python -m  shark.examples.shark_inference.resnet50_script --device="cpu" # Use gpu | vulkan
-# Or a pytest
-pytest tank/test_models.py -k "MiniLM"
-```
-  
-### How to use your locally built IREE / Torch-MLIR with SHARK
-If you are a *Torch-mlir developer or an IREE developer* and want to test local changes you can uninstall
-the provided packages with `pip uninstall torch-mlir` and / or `pip uninstall iree-compiler iree-runtime` and build locally
-with Python bindings and set your PYTHONPATH as mentioned [here](https://github.com/iree-org/iree/tree/main/docs/api_docs/python#install-iree-binaries)
-for IREE and [here](https://github.com/llvm/torch-mlir/blob/main/development.md#setup-python-environment-to-export-the-built-python-packages)
-for Torch-MLIR.
-
-How to use your locally built Torch-MLIR with SHARK:
-```shell
-1.) Run `./setup_venv.sh in SHARK` and activate `shark.venv` virtual env.
-2.) Run `pip uninstall torch-mlir`.
-3.) Go to your local Torch-MLIR directory.
-4.) Activate mlir_venv virtual envirnoment.
-5.) Run `pip uninstall -r requirements.txt`.
-6.) Run `pip install -r requirements.txt`.
-7.) Build Torch-MLIR.
-8.) Activate shark.venv virtual environment from the Torch-MLIR directory.
-8.) Run `export PYTHONPATH=`pwd`/build/tools/torch-mlir/python_packages/torch_mlir:`pwd`/examples` in the Torch-MLIR directory.
-9.) Go to the SHARK directory.
-```
-Now the SHARK will use your locally build Torch-MLIR repo.
-
-
-## Benchmarking Dispatches
-
-To produce benchmarks of individual dispatches, you can add `--dispatch_benchmarks=All --dispatch_benchmarks_dir=<output_dir>` to your pytest command line argument.  
-If you only want to compile specific dispatches, you can specify them with a space seperated string instead of `"All"`.  E.G. `--dispatch_benchmarks="0 1 2 10"`
-
-For example, to generate and run dispatch benchmarks for MiniLM on CUDA:
-```
-pytest -k "MiniLM and torch and static and cuda" --benchmark_dispatches=All -s --dispatch_benchmarks_dir=./my_dispatch_benchmarks                                                                                
-```
-The given command will populate `<dispatch_benchmarks_dir>/<model_name>/` with an `ordered_dispatches.txt` that lists and orders the dispatches and their latencies, as well as folders for each dispatch that contain .mlir, .vmfb, and results of the benchmark for that dispatch.
-
-if you want to instead incorporate this into a python script, you can pass the `dispatch_benchmarks` and `dispatch_benchmarks_dir` commands when initializing `SharkInference`, and the benchmarks will be generated when compiled.  E.G:
-
-```
-shark_module = SharkInference(
-        mlir_model,
-        func_name,
-        device=args.device,
-        mlir_dialect="tm_tensor",
-        dispatch_benchmarks="all",
-        dispatch_benchmarks_dir="results"
-    )
-```
-
-Output will include:
- An ordered list ordered-dispatches.txt of all the dispatches with their runtime
- Inside the specified directory, there will be a directory for each dispatch (there will be mlir files for all dispatches, but only compiled binaries and benchmark data for the specified dispatches)
- An .mlir file containing the dispatch benchmark 
- A compiled .vmfb file containing the dispatch benchmark
- An .mlir file containing just the hal executable
- A compiled .vmfb file of the hal executable
- A .txt file containing benchmark output
-
-
-See tank/README.md for further instructions on how to run model tests and benchmarks from the SHARK tank.
-
-</details>
-
-<details>
-  <summary>API Reference</summary>
-
-### Shark Inference API
-
-```
-
-from shark.shark_importer import SharkImporter
-
-# SharkImporter imports mlir file from the torch, tensorflow or tf-lite module.
-
-mlir_importer = SharkImporter(
-    torch_module,
-    (input),
-    frontend="torch",  #tf, #tf-lite
-)
-torch_mlir, func_name = mlir_importer.import_mlir(tracing_required=True)
-
-# SharkInference accepts mlir in linalg, mhlo, and tosa dialect.
-
-from shark.shark_inference import SharkInference
-shark_module = SharkInference(torch_mlir, func_name, device="cpu", mlir_dialect="linalg")
-shark_module.compile()
-result = shark_module.forward((input))
-
-```
-
-
-### Example demonstrating running MHLO IR.
-
-```
-from shark.shark_inference import SharkInference
-import numpy as np
-
-mhlo_ir = r"""builtin.module  {
-      func.func @forward(%arg0: tensor<1x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4x4xf32> {
-        %0 = chlo.broadcast_add %arg0, %arg1 : (tensor<1x4xf32>, tensor<4x1xf32>) -> tensor<4x4xf32>
-        %1 = "mhlo.abs"(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
-        return %1 : tensor<4x4xf32>
-      }
-}"""
-
-arg0 = np.ones((1, 4)).astype(np.float32)
-arg1 = np.ones((4, 1)).astype(np.float32)
-shark_module = SharkInference(mhlo_ir, func_name="forward", device="cpu", mlir_dialect="mhlo")
-shark_module.compile()
-result = shark_module.forward((arg0, arg1))
-```
-</details>
-
-## Supported and Validated Models
-
-SHARK is maintained to support the latest innovations in ML Models: 
-
-| TF HuggingFace Models | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
-|---------------------|----------|----------|-------------|
-| BERT                | :green_heart:         | :green_heart:         | :green_heart:            |
-| DistilBERT         | :green_heart:         | :green_heart:         | :green_heart:            |
-| GPT2         | :green_heart:         | :green_heart:         | :green_heart:            |
-| BLOOM         | :green_heart:         | :green_heart:         | :green_heart:            |
-| Stable Diffusion         | :green_heart:         | :green_heart:         | :green_heart:            |
-| Vision Transformer       | :green_heart:         | :green_heart:         | :green_heart:            |
-| ResNet50         | :green_heart:         | :green_heart:         | :green_heart:            |
-
-For a complete list of the models supported in SHARK, please refer to [tank/README.md](https://github.com/nod-ai/SHARK/blob/main/tank/README.md).
-
-## Communication Channels
-
-*   [SHARK Discord server](https://discord.gg/RUqY2h2s9u): Real time discussions with the SHARK team and other users
-*   [GitHub issues](https://github.com/nod-ai/SHARK/issues): Feature requests, bugs etc
-
-## Related Projects
-
-<details>
-  <summary>IREE Project Channels</summary>
-
-*   [Upstream IREE issues](https://github.com/google/iree/issues): Feature requests,
-    bugs, and other work tracking
-*   [Upstream IREE Discord server](https://discord.gg/26P4xW4): Daily development
-    discussions with the core team and collaborators
-*   [iree-discuss email list](https://groups.google.com/forum/#!forum/iree-discuss):
-    Announcements, general and low-priority discussion
-</details>
-
-<details>
-  <summary>MLIR and Torch-MLIR Project Channels</summary>
-
-* `#torch-mlir` channel on the LLVM [Discord](https://discord.gg/xS7Z362) - this is the most active communication channel
-* Torch-MLIR Github issues [here](https://github.com/llvm/torch-mlir/issues)
-* [`torch-mlir` section](https://llvm.discourse.group/c/projects-that-want-to-become-official-llvm-projects/torch-mlir/41) of LLVM Discourse
-*  Weekly meetings on Mondays 9AM PST. See [here](https://discourse.llvm.org/t/community-meeting-developer-hour-refactoring-recurring-meetings/62575) for more information.
-* [MLIR topic within LLVM Discourse](https://llvm.discourse.group/c/llvm-project/mlir/31) SHARK and IREE is enabled by and heavily relies on [MLIR](https://mlir.llvm.org).
-</details>
-  
-## License
-
-nod.ai SHARK is licensed under the terms of the Apache 2.0 License with LLVM Exceptions.
-See [LICENSE](LICENSE) for more information.
--- a/apps/init.py
+++ b/apps/init.py
--- a/apps/stable_diffusion/init.py
+++ b/apps/stable_diffusion/init.py
--- a/apps/stable_diffusion/profiling_with_iree.md
+++ b/apps/stable_diffusion/profiling_with_iree.md
@@ -1,87 +0,0 @@
-Compile / Run Instructions:
-
-To compile .vmfb for SD (vae, unet, CLIP), run the following commands with the .mlir in your local shark_tank cache (default location for Linux users is `~/.local/shark_tank`). These will be available once the script from [this README](https://github.com/nod-ai/SHARK/blob/main/shark/examples/shark_inference/stable_diffusion/README.md) is run once.
-Running the script mentioned above with the `--save_vmfb` flag will also save the .vmfb in your SHARK base directory if you want to skip straight to benchmarks.
-
-Compile Commands FP32/FP16: 
-
-```shell
-Vulkan AMD: 
-iree-compile --iree-input-type=none --iree-hal-target-backends=vulkan --iree-vulkan-target-triple=rdna2-unknown-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 /path/to/input/mlir -o /path/to/output/vmfb
-
-#  add --mlir-print-debuginfo --mlir-print-op-on-diagnostic=true for debug
-#  use –iree-input-type=mhlo for tf models
-
-CUDA NVIDIA:
-iree-compile --iree-input-type=none --iree-hal-target-backends=cuda --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 /path/to/input/mlir -o /path/to/output/vmfb
-
-CPU:
-iree-compile --iree-input-type=none --iree-hal-target-backends=llvm-cpu  --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 /path/to/input/mlir -o /path/to/output/vmfb
-```
-
-
-
-Run / Benchmark Command (FP32 - NCHW):
-(NEED to use BS=2 since we do two forward passes to unet as a result of classifier free guidance.)
-
-```shell
-## Vulkan AMD:
-iree-benchmark-module --module=/path/to/output/vmfb --function=forward --device=vulkan --input=1x4x64x64xf32 --input=1xf32 --input=2x77x768xf32 --input=f32=1.0 --input=f32=1.0
-
-## CUDA:
-iree-benchmark-module --module=/path/to/vmfb --function=forward --device=cuda  --input=1x4x64x64xf32 --input=1xf32 --input=2x77x768xf32 --input=f32=1.0 --input=f32=1.0
-
-## CPU:
-iree-benchmark-module --module=/path/to/vmfb --function=forward --device=local-task  --input=1x4x64x64xf32 --input=1xf32 --input=2x77x768xf32 --input=f32=1.0 --input=f32=1.0
-
-```
-
-Run via vulkan_gui for RGP Profiling:
-
-To build the vulkan app for profiling UNet follow the instructions [here](https://github.com/nod-ai/SHARK/tree/main/cpp) and then run the following command from the cpp directory with your compiled stable_diff.vmfb
-```shell
-./build/vulkan_gui/iree-vulkan-gui --module=/path/to/unet.vmfb --input=1x4x64x64xf32 --input=1xf32 --input=2x77x768xf32 --input=f32=1.0 --input=f32=1.0
-```
-
-</details>
-  <details>
-  <summary>Debug Commands</summary>
-
-## Debug commands and other advanced usage follows.
-
-```shell
-python txt2img.py --precision="fp32"|"fp16" --device="cpu"|"cuda"|"vulkan" --import_mlir|--no-import_mlir --prompt "enter the text" 
-```
-
-## dump all dispatch .spv and isa using amdllpc
-
-```shell
-python txt2img.py --precision="fp16" --device="vulkan" --iree-vulkan-target-triple=rdna3-unknown-linux --no-load_vmfb --dispatch_benchmarks="all" --dispatch_benchmarks_dir="SD_dispatches" --dump_isa
-```
-
-## Compile and save the .vmfb (using vulkan fp16 as an example):
-
-```shell
-python txt2img.py --precision=fp16 --device=vulkan --steps=50 --save_vmfb
-```
-
-## Capture an RGP trace
-
-```shell
-python txt2img.py --precision=fp16 --device=vulkan --steps=50 --save_vmfb --enable_rgp
-```
-
-## Run the vae module with iree-benchmark-module (NCHW, fp16, vulkan, for example):
-
-```shell
-iree-benchmark-module --module=/path/to/output/vmfb --function=forward --device=vulkan --input=1x4x64x64xf16  
-```
-
-## Run the unet module with iree-benchmark-module (same config as above):
-```shell
-##if you want to use .npz inputs:
-unzip ~/.local/shark_tank/<your unet>/inputs.npz
-iree-benchmark-module --module=/path/to/output/vmfb --function=forward --input=@arr_0.npy --input=1xf16 --input=@arr_2.npy --input=@arr_3.npy --input=@arr_4.npy  
-```
-
-</details>
--- a/apps/stable_diffusion/scripts/init.py
+++ b/apps/stable_diffusion/scripts/init.py
@@ -1,2 +0,0 @@
-from apps.stable_diffusion.scripts.txt2img import txt2img_inf
-from apps.stable_diffusion.scripts.img2img import img2img_inf
--- a/apps/stable_diffusion/scripts/img2img.py
+++ b/apps/stable_diffusion/scripts/img2img.py
@@ -1,264 +0,0 @@
-import sys
-import torch
-import time
-from PIL import Image
-from dataclasses import dataclass
-from apps.stable_diffusion.src import (
-    args,
-    Image2ImagePipeline,
-    get_schedulers,
-    set_init_device_flags,
-    utils,
-    clear_all,
-    save_output_img,
-)
-
-
-@dataclass
-class Config:
-    model_id: str
-    ckpt_loc: str
-    precision: str
-    batch_size: int
-    max_length: int
-    height: int
-    width: int
-    device: str
-
-
-img2img_obj = None
-config_obj = None
-schedulers = None
-
-
-# Exposed to UI.
-def img2img_inf(
-    prompt: str,
-    negative_prompt: str,
-    init_image: str,
-    height: int,
-    width: int,
-    steps: int,
-    strength: float,
-    guidance_scale: float,
-    seed: int,
-    batch_count: int,
-    batch_size: int,
-    scheduler: str,
-    custom_model: str,
-    hf_model_id: str,
-    precision: str,
-    device: str,
-    max_length: int,
-    save_metadata_to_json: bool,
-    save_metadata_to_png: bool,
-):
-    global img2img_obj
-    global config_obj
-    global schedulers
-
-    args.prompts = [prompt]
-    args.negative_prompts = [negative_prompt]
-    args.guidance_scale = guidance_scale
-    args.seed = seed
-    args.steps = steps
-    args.strength = strength
-    args.scheduler = scheduler
-    args.img_path = init_image
-    image = Image.open(args.img_path).convert("RGB")
-
-    # set ckpt_loc and hf_model_id.
-    types = (
-        ".ckpt",
-        ".safetensors",
-    )  # the tuple of file types
-    args.ckpt_loc = ""
-    args.hf_model_id = ""
-    if custom_model == "None":
-        if not hf_model_id:
-            return (
-                None,
-                "Please provide either custom model or huggingface model ID, both must not be empty",
-            )
-        args.hf_model_id = hf_model_id
-    elif ".ckpt" in custom_model or ".safetensors" in custom_model:
-        args.ckpt_loc = custom_model
-    else:
-        args.hf_model_id = custom_model
-
-    if image is None:
-        return None, "An Initial Image is required"
-
-    args.save_metadata_to_json = save_metadata_to_json
-    args.write_metadata_to_png = save_metadata_to_png
-
-    dtype = torch.float32 if precision == "fp32" else torch.half
-    cpu_scheduling = not scheduler.startswith("Shark")
-    new_config_obj = Config(
-        args.hf_model_id,
-        args.ckpt_loc,
-        precision,
-        batch_size,
-        max_length,
-        height,
-        width,
-        device,
-    )
-    if not img2img_obj or config_obj != new_config_obj:
-        config_obj = new_config_obj
-        args.precision = precision
-        args.batch_size = batch_size
-        args.max_length = max_length
-        args.height = height
-        args.width = width
-        args.device = device.split("=>", 1)[1].strip()
-        args.iree_vulkan_target_triple = ""
-        args.use_tuned = True
-        args.import_mlir = True
-        set_init_device_flags()
-        model_id = (
-            args.hf_model_id
-            if args.hf_model_id
-            else "runwayml/stable-diffusion-inpainting"
-        )
-        schedulers = get_schedulers(model_id)
-        scheduler_obj = schedulers[scheduler]
-        img2img_obj = Image2ImagePipeline.from_pretrained(
-            scheduler_obj,
-            args.import_mlir,
-            args.hf_model_id,
-            args.ckpt_loc,
-            args.custom_vae,
-            args.precision,
-            args.max_length,
-            args.batch_size,
-            args.height,
-            args.width,
-            args.use_base_vae,
-            args.use_tuned,
-            low_cpu_mem_usage=args.low_cpu_mem_usage,
-        )
-
-    img2img_obj.scheduler = schedulers[scheduler]
-
-    start_time = time.time()
-    img2img_obj.log = ""
-    generated_imgs = []
-    seeds = []
-    img_seed = utils.sanitize_seed(seed)
-    for current_batch in range(batch_count):
-        if current_batch > 0:
-            img_seed = utils.sanitize_seed(-1)
-        out_imgs = img2img_obj.generate_images(
-            prompt,
-            negative_prompt,
-            image,
-            batch_size,
-            height,
-            width,
-            steps,
-            strength,
-            guidance_scale,
-            img_seed,
-            args.max_length,
-            dtype,
-            args.use_base_vae,
-            cpu_scheduling,
-        )
-        save_output_img(out_imgs[0], img_seed)
-        generated_imgs.extend(out_imgs)
-        seeds.append(img_seed)
-        img2img_obj.log += "\n"
-
-    total_time = time.time() - start_time
-    text_output = f"prompt={args.prompts}"
-    text_output += f"\nnegative prompt={args.negative_prompts}"
-    text_output += f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-    text_output += f"\nscheduler={args.scheduler}, device={device}"
-    text_output += f"\nsteps={steps}, strength={args.strength}, guidance_scale={guidance_scale}, seed={seeds}"
-    text_output += f"\nsize={height}x{width}, batch_count={batch_count}, batch_size={batch_size}, max_length={args.max_length}"
-    text_output += img2img_obj.log
-    text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-    return generated_imgs, text_output
-
-
-if __name__ == "__main__":
-    if args.clear_all:
-        clear_all()
-
-    if args.img_path is None:
-        print("Flag --img_path is required.")
-        exit()
-
-    # When the models get uploaded, it should be default to False.
-    args.import_mlir = True
-
-    dtype = torch.float32 if args.precision == "fp32" else torch.half
-    cpu_scheduling = not args.scheduler.startswith("Shark")
-    set_init_device_flags()
-    schedulers = get_schedulers(args.hf_model_id)
-    if args.scheduler != "PNDM":
-        if "Shark" in args.scheduler:
-            print(
-                f"SharkEulerDiscrete scheduler not supported. Switching to PNDM scheduler"
-            )
-            args.scheduler = "PNDM"
-        else:
-            sys.exit(
-                "Img2Img works best with PNDM scheduler. Other schedulers are not supported yet."
-            )
-
-    scheduler_obj = schedulers[args.scheduler]
-    image = Image.open(args.img_path).convert("RGB")
-    seed = utils.sanitize_seed(args.seed)
-
-    # Adjust for height and width based on model
-
-    img2img_obj = Image2ImagePipeline.from_pretrained(
-        scheduler_obj,
-        args.import_mlir,
-        args.hf_model_id,
-        args.ckpt_loc,
-        args.custom_vae,
-        args.precision,
-        args.max_length,
-        args.batch_size,
-        args.height,
-        args.width,
-        args.use_base_vae,
-        args.use_tuned,
-        low_cpu_mem_usage=args.low_cpu_mem_usage,
-    )
-
-    start_time = time.time()
-    generated_imgs = img2img_obj.generate_images(
-        args.prompts,
-        args.negative_prompts,
-        image,
-        args.batch_size,
-        args.height,
-        args.width,
-        args.steps,
-        args.strength,
-        args.guidance_scale,
-        seed,
-        args.max_length,
-        dtype,
-        args.use_base_vae,
-        cpu_scheduling,
-    )
-    total_time = time.time() - start_time
-    text_output = f"prompt={args.prompts}"
-    text_output += f"\nnegative prompt={args.negative_prompts}"
-    text_output += f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-    text_output += f"\nscheduler={args.scheduler}, device={args.device}"
-    text_output += f"\nsteps={args.steps}, strength={args.strength}, guidance_scale={args.guidance_scale}, seed={seed}, size={args.height}x{args.width}"
-    text_output += (
-        f", batch size={args.batch_size}, max_length={args.max_length}"
-    )
-    text_output += img2img_obj.log
-    text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-    save_output_img(generated_imgs[0], seed)
-    print(text_output)
--- a/apps/stable_diffusion/scripts/inpaint.py
+++ b/apps/stable_diffusion/scripts/inpaint.py
@@ -1,253 +0,0 @@
-import sys
-import torch
-import time
-from PIL import Image
-from dataclasses import dataclass
-from apps.stable_diffusion.src import (
-    args,
-    InpaintPipeline,
-    get_schedulers,
-    set_init_device_flags,
-    utils,
-    clear_all,
-    save_output_img,
-)
-
-
-@dataclass
-class Config:
-    model_id: str
-    ckpt_loc: str
-    precision: str
-    batch_size: int
-    max_length: int
-    height: int
-    width: int
-    device: str
-
-
-inpaint_obj = None
-config_obj = None
-schedulers = None
-
-
-# Exposed to UI.
-def inpaint_inf(
-    prompt: str,
-    negative_prompt: str,
-    image: Image,
-    mask_image: Image,
-    height: int,
-    width: int,
-    steps: int,
-    guidance_scale: float,
-    seed: int,
-    batch_count: int,
-    batch_size: int,
-    scheduler: str,
-    custom_model: str,
-    hf_model_id: str,
-    precision: str,
-    device: str,
-    max_length: int,
-    save_metadata_to_json: bool,
-    save_metadata_to_png: bool,
-):
-    global inpaint_obj
-    global config_obj
-    global schedulers
-
-    args.prompts = [prompt]
-    args.negative_prompts = [negative_prompt]
-    args.guidance_scale = guidance_scale
-    args.steps = steps
-    args.scheduler = scheduler
-
-    # set ckpt_loc and hf_model_id.
-    types = (
-        ".ckpt",
-        ".safetensors",
-    )  # the tuple of file types
-    args.ckpt_loc = ""
-    args.hf_model_id = ""
-    if custom_model == "None":
-        if not hf_model_id:
-            return (
-                None,
-                "Please provide either custom model or huggingface model ID, both must not be empty",
-            )
-        args.hf_model_id = hf_model_id
-    elif ".ckpt" in custom_model or ".safetensors" in custom_model:
-        args.ckpt_loc = custom_model
-    else:
-        args.hf_model_id = custom_model
-
-    args.save_metadata_to_json = save_metadata_to_json
-    args.write_metadata_to_png = save_metadata_to_png
-
-    dtype = torch.float32 if precision == "fp32" else torch.half
-    cpu_scheduling = not scheduler.startswith("Shark")
-    new_config_obj = Config(
-        args.hf_model_id,
-        args.ckpt_loc,
-        precision,
-        batch_size,
-        max_length,
-        height,
-        width,
-        device,
-    )
-    if not inpaint_obj or config_obj != new_config_obj:
-        config_obj = new_config_obj
-        args.precision = precision
-        args.batch_size = batch_size
-        args.max_length = max_length
-        args.height = height
-        args.width = width
-        args.device = device.split("=>", 1)[1].strip()
-        args.iree_vulkan_target_triple = ""
-        args.use_tuned = True
-        args.import_mlir = False
-        set_init_device_flags()
-        model_id = (
-            args.hf_model_id
-            if args.hf_model_id
-            else "stabilityai/stable-diffusion-2-inpainting"
-        )
-        schedulers = get_schedulers(model_id)
-        scheduler_obj = schedulers[scheduler]
-        inpaint_obj = InpaintPipeline.from_pretrained(
-            scheduler_obj,
-            args.import_mlir,
-            args.hf_model_id,
-            args.ckpt_loc,
-            args.custom_vae,
-            args.precision,
-            args.max_length,
-            args.batch_size,
-            args.height,
-            args.width,
-            args.use_base_vae,
-            args.use_tuned,
-        )
-
-    inpaint_obj.scheduler = schedulers[scheduler]
-
-    start_time = time.time()
-    inpaint_obj.log = ""
-    generated_imgs = []
-    seeds = []
-    img_seed = utils.sanitize_seed(seed)
-    for i in range(batch_count):
-        if i > 0:
-            img_seed = utils.sanitize_seed(-1)
-        out_imgs = inpaint_obj.generate_images(
-            prompt,
-            negative_prompt,
-            image,
-            mask_image,
-            batch_size,
-            height,
-            width,
-            steps,
-            guidance_scale,
-            img_seed,
-            args.max_length,
-            dtype,
-            args.use_base_vae,
-            cpu_scheduling,
-        )
-        save_output_img(out_imgs[0], img_seed)
-        generated_imgs.extend(out_imgs)
-        seeds.append(img_seed)
-        inpaint_obj.log += "\n"
-
-    total_time = time.time() - start_time
-    text_output = f"prompt={args.prompts}"
-    text_output += f"\nnegative prompt={args.negative_prompts}"
-    text_output += f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-    text_output += f"\nscheduler={args.scheduler}, device={device}"
-    text_output += f"\nsteps={args.steps}, guidance_scale={args.guidance_scale}, seed={seeds}"
-    text_output += f"\nsize={args.height}x{args.width}, batch-count={batch_count}, batch-size={args.batch_size}, max_length={args.max_length}"
-    text_output += inpaint_obj.log
-    text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-    return generated_imgs, text_output
-
-
-if __name__ == "__main__":
-    if args.clear_all:
-        clear_all()
-
-    if args.img_path is None:
-        print("Flag --img_path is required.")
-        exit()
-    if args.mask_path is None:
-        print("Flag --mask_path is required.")
-        exit()
-    if "inpaint" not in args.hf_model_id:
-        print("Please use inpainting model with --hf_model_id.")
-        exit()
-
-    dtype = torch.float32 if args.precision == "fp32" else torch.half
-    cpu_scheduling = not args.scheduler.startswith("Shark")
-    set_init_device_flags()
-    schedulers = get_schedulers(args.hf_model_id)
-    scheduler_obj = schedulers[args.scheduler]
-    seed = args.seed
-    image = Image.open(args.img_path)
-    mask_image = Image.open(args.mask_path)
-
-    inpaint_obj = InpaintPipeline.from_pretrained(
-        scheduler_obj,
-        args.import_mlir,
-        args.hf_model_id,
-        args.ckpt_loc,
-        args.custom_vae,
-        args.precision,
-        args.max_length,
-        args.batch_size,
-        args.height,
-        args.width,
-        args.use_base_vae,
-        args.use_tuned,
-    )
-
-    for current_batch in range(args.batch_count):
-        if current_batch > 0:
-            seed = -1
-        seed = utils.sanitize_seed(seed)
-
-        start_time = time.time()
-        generated_imgs = inpaint_obj.generate_images(
-            args.prompts,
-            args.negative_prompts,
-            image,
-            mask_image,
-            args.batch_size,
-            args.height,
-            args.width,
-            args.steps,
-            args.guidance_scale,
-            seed,
-            args.max_length,
-            dtype,
-            args.use_base_vae,
-            cpu_scheduling,
-        )
-        total_time = time.time() - start_time
-        text_output = f"prompt={args.prompts}"
-        text_output += f"\nnegative prompt={args.negative_prompts}"
-        text_output += (
-            f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-        )
-        text_output += f"\nscheduler={args.scheduler}, device={args.device}"
-        text_output += f"\nsteps={args.steps}, guidance_scale={args.guidance_scale}, seed={seed}, size={args.height}x{args.width}"
-        text_output += (
-            f", batch size={args.batch_size}, max_length={args.max_length}"
-        )
-        text_output += inpaint_obj.log
-        text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-        save_output_img(generated_imgs[0], seed)
-        print(text_output)
--- a/apps/stable_diffusion/scripts/telegram_bot.py
+++ b/apps/stable_diffusion/scripts/telegram_bot.py
@@ -1,240 +0,0 @@
-import logging
-import os
-from models.stable_diffusion.main import stable_diff_inf
-from models.stable_diffusion.utils import get_available_devices
-from dotenv import load_dotenv
-from telegram import Update, InlineKeyboardButton, InlineKeyboardMarkup
-from telegram import BotCommand
-from telegram.ext import Application, ApplicationBuilder, CallbackQueryHandler
-from telegram.ext import ContextTypes, MessageHandler, CommandHandler, filters
-from io import BytesIO
-import random
-
-log = logging.getLogger("TG.Bot")
-logging.basicConfig()
-log.warning("Start")
-load_dotenv()
-os.environ["AMD_ENABLE_LLPC"] = "0"
-TG_TOKEN = os.getenv("TG_TOKEN")
-SELECTED_MODEL = "stablediffusion"
-SELECTED_SCHEDULER = "EulerAncestralDiscrete"
-STEPS = 30
-NEGATIVE_PROMPT = (
-    "Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra"
-    " limbs,Gross proportions,Missing arms,Mutated hands,Long"
-    " neck,Duplicate,Mutilated,Mutilated hands,Poorly drawn face,Deformed,Bad"
-    " anatomy,Cloned face,Malformed limbs,Missing legs,Too many"
-    " fingers,blurry, lowres, text, error, cropped, worst quality, low"
-    " quality, jpeg artifacts, out of frame, extra fingers, mutated hands,"
-    " poorly drawn hands, poorly drawn face, bad anatomy, extra limbs, cloned"
-    " face, malformed limbs, missing arms, missing legs, extra arms, extra"
-    " legs, fused fingers, too many fingers"
-)
-GUIDANCE_SCALE = 6
-available_devices = get_available_devices()
-models_list = [
-    "stablediffusion",
-    "anythingv3",
-    "analogdiffusion",
-    "openjourney",
-    "dreamlike",
-]
-sheds_list = [
-    "DDIM",
-    "PNDM",
-    "LMSDiscrete",
-    "DPMSolverMultistep",
-    "EulerDiscrete",
-    "EulerAncestralDiscrete",
-    "SharkEulerDiscrete",
-]
-
-
-def image_to_bytes(image):
-    bio = BytesIO()
-    bio.name = "image.jpeg"
-    image.save(bio, "JPEG")
-    bio.seek(0)
-    return bio
-
-
-def get_try_again_markup():
-    keyboard = [[InlineKeyboardButton("Try again", callback_data="TRYAGAIN")]]
-    reply_markup = InlineKeyboardMarkup(keyboard)
-    return reply_markup
-
-
-def generate_image(prompt):
-    seed = random.randint(1, 10000)
-    log.warning(SELECTED_MODEL)
-    log.warning(STEPS)
-    image, text = stable_diff_inf(
-        prompt=prompt,
-        negative_prompt=NEGATIVE_PROMPT,
-        steps=STEPS,
-        guidance_scale=GUIDANCE_SCALE,
-        seed=seed,
-        scheduler_key=SELECTED_SCHEDULER,
-        variant=SELECTED_MODEL,
-        device_key=available_devices[0],
-    )
-
-    return image, seed
-
-
-async def generate_and_send_photo(
-    update: Update, context: ContextTypes.DEFAULT_TYPE
-) -> None:
-    progress_msg = await update.message.reply_text(
-        "Generating image...", reply_to_message_id=update.message.message_id
-    )
-    im, seed = generate_image(prompt=update.message.text)
-    await context.bot.delete_message(
-        chat_id=progress_msg.chat_id, message_id=progress_msg.message_id
-    )
-    await context.bot.send_photo(
-        update.effective_user.id,
-        image_to_bytes(im),
-        caption=f'"{update.message.text}" (Seed: {seed})',
-        reply_markup=get_try_again_markup(),
-        reply_to_message_id=update.message.message_id,
-    )
-
-
-async def button(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
-    query = update.callback_query
-    if query.data in models_list:
-        global SELECTED_MODEL
-        SELECTED_MODEL = query.data
-        await query.answer()
-        await query.edit_message_text(text=f"Selected model: {query.data}")
-        return
-    if query.data in sheds_list:
-        global SELECTED_SCHEDULER
-        SELECTED_SCHEDULER = query.data
-        await query.answer()
-        await query.edit_message_text(text=f"Selected scheduler: {query.data}")
-        return
-    replied_message = query.message.reply_to_message
-    await query.answer()
-    progress_msg = await query.message.reply_text(
-        "Generating image...", reply_to_message_id=replied_message.message_id
-    )
-
-    if query.data == "TRYAGAIN":
-        prompt = replied_message.text
-        im, seed = generate_image(prompt)
-
-    await context.bot.delete_message(
-        chat_id=progress_msg.chat_id, message_id=progress_msg.message_id
-    )
-    await context.bot.send_photo(
-        update.effective_user.id,
-        image_to_bytes(im),
-        caption=f'"{prompt}" (Seed: {seed})',
-        reply_markup=get_try_again_markup(),
-        reply_to_message_id=replied_message.message_id,
-    )
-
-
-async def select_model_handler(update, context):
-    text = "Select model"
-    keyboard = []
-    for model in models_list:
-        keyboard.append(
-            [
-                InlineKeyboardButton(text=model, callback_data=model),
-            ]
-        )
-    markup = InlineKeyboardMarkup(keyboard)
-    await update.message.reply_text(text=text, reply_markup=markup)
-
-
-async def select_scheduler_handler(update, context):
-    text = "Select schedule"
-    keyboard = []
-    for shed in sheds_list:
-        keyboard.append(
-            [
-                InlineKeyboardButton(text=shed, callback_data=shed),
-            ]
-        )
-    markup = InlineKeyboardMarkup(keyboard)
-    await update.message.reply_text(text=text, reply_markup=markup)
-
-
-async def set_steps_handler(update, context):
-    input_mex = update.message.text
-    log.warning(input_mex)
-    try:
-        input_args = input_mex.split("/set_steps ")[1]
-        global STEPS
-        STEPS = int(input_args)
-    except Exception:
-        input_args = (
-            "Invalid parameter for command. Correct command looks like\n"
-            " /set_steps 30"
-        )
-    await update.message.reply_text(input_args)
-
-
-async def set_negative_prompt_handler(update, context):
-    input_mex = update.message.text
-    log.warning(input_mex)
-    try:
-        input_args = input_mex.split("/set_negative_prompt ")[1]
-        global NEGATIVE_PROMPT
-        NEGATIVE_PROMPT = input_args
-    except Exception:
-        input_args = (
-            "Invalid parameter for command. Correct command looks like\n"
-            " /set_negative_prompt ugly, bad art, mutated"
-        )
-    await update.message.reply_text(input_args)
-
-
-async def set_guidance_scale_handler(update, context):
-    input_mex = update.message.text
-    log.warning(input_mex)
-    try:
-        input_args = input_mex.split("/set_guidance_scale ")[1]
-        global GUIDANCE_SCALE
-        GUIDANCE_SCALE = int(input_args)
-    except Exception:
-        input_args = (
-            "Invalid parameter for command. Correct command looks like\n"
-            " /set_guidance_scale 7"
-        )
-    await update.message.reply_text(input_args)
-
-
-async def setup_bot_commands(application: Application) -> None:
-    await application.bot.set_my_commands(
-        [
-            BotCommand("select_model", "to select model"),
-            BotCommand("select_scheduler", "to select scheduler"),
-            BotCommand("set_steps", "to set steps"),
-            BotCommand("set_guidance_scale", "to set guidance scale"),
-            BotCommand("set_negative_prompt", "to set negative prompt"),
-        ]
-    )
-
-
-app = (
-    ApplicationBuilder().token(TG_TOKEN).post_init(setup_bot_commands).build()
-)
-app.add_handler(CommandHandler("select_model", select_model_handler))
-app.add_handler(CommandHandler("select_scheduler", select_scheduler_handler))
-app.add_handler(CommandHandler("set_steps", set_steps_handler))
-app.add_handler(
-    CommandHandler("set_guidance_scale", set_guidance_scale_handler)
-)
-app.add_handler(
-    CommandHandler("set_negative_prompt", set_negative_prompt_handler)
-)
-app.add_handler(
-    MessageHandler(filters.TEXT & ~filters.COMMAND, generate_and_send_photo)
-)
-app.add_handler(CallbackQueryHandler(button))
-log.warning("Start bot")
-app.run_polling()
--- a/apps/stable_diffusion/scripts/txt2img.py
+++ b/apps/stable_diffusion/scripts/txt2img.py
@@ -1,240 +0,0 @@
-import sys
-import torch
-import time
-from dataclasses import dataclass
-from apps.stable_diffusion.src import (
-    args,
-    Text2ImagePipeline,
-    get_schedulers,
-    set_init_device_flags,
-    utils,
-    clear_all,
-    save_output_img,
-)
-
-
-@dataclass
-class Config:
-    model_id: str
-    ckpt_loc: str
-    precision: str
-    batch_size: int
-    max_length: int
-    height: int
-    width: int
-    device: str
-
-
-txt2img_obj = None
-config_obj = None
-schedulers = None
-
-
-# Exposed to UI.
-def txt2img_inf(
-    prompt: str,
-    negative_prompt: str,
-    height: int,
-    width: int,
-    steps: int,
-    guidance_scale: float,
-    seed: int,
-    batch_count: int,
-    batch_size: int,
-    scheduler: str,
-    custom_model: str,
-    hf_model_id: str,
-    precision: str,
-    device: str,
-    max_length: int,
-    save_metadata_to_json: bool,
-    save_metadata_to_png: bool,
-):
-    global txt2img_obj
-    global config_obj
-    global schedulers
-
-    args.prompts = [prompt]
-    args.negative_prompts = [negative_prompt]
-    args.guidance_scale = guidance_scale
-    args.steps = steps
-    args.scheduler = scheduler
-
-    # set ckpt_loc and hf_model_id.
-    types = (
-        ".ckpt",
-        ".safetensors",
-    )  # the tuple of file types
-    args.ckpt_loc = ""
-    args.hf_model_id = ""
-    if custom_model == "None":
-        if not hf_model_id:
-            return (
-                None,
-                "Please provide either custom model or huggingface model ID, both must not be empty",
-            )
-        args.hf_model_id = hf_model_id
-    elif ".ckpt" in custom_model or ".safetensors" in custom_model:
-        args.ckpt_loc = custom_model
-    else:
-        args.hf_model_id = custom_model
-
-    args.save_metadata_to_json = save_metadata_to_json
-    args.write_metadata_to_png = save_metadata_to_png
-
-    dtype = torch.float32 if precision == "fp32" else torch.half
-    cpu_scheduling = not scheduler.startswith("Shark")
-    new_config_obj = Config(
-        args.hf_model_id,
-        args.ckpt_loc,
-        precision,
-        batch_size,
-        max_length,
-        height,
-        width,
-        device,
-    )
-    if not txt2img_obj or config_obj != new_config_obj:
-        config_obj = new_config_obj
-        args.precision = precision
-        args.batch_size = batch_size
-        args.max_length = max_length
-        args.height = height
-        args.width = width
-        args.device = device.split("=>", 1)[1].strip()
-        args.iree_vulkan_target_triple = ""
-        args.use_tuned = True
-        args.import_mlir = False
-        args.img_path = None
-        set_init_device_flags()
-        model_id = (
-            args.hf_model_id
-            if args.hf_model_id
-            else "stabilityai/stable-diffusion-2-1-base"
-        )
-        schedulers = get_schedulers(model_id)
-        scheduler_obj = schedulers[scheduler]
-        txt2img_obj = Text2ImagePipeline.from_pretrained(
-            scheduler_obj,
-            args.import_mlir,
-            args.hf_model_id,
-            args.ckpt_loc,
-            args.custom_vae,
-            args.precision,
-            args.max_length,
-            args.batch_size,
-            args.height,
-            args.width,
-            args.use_base_vae,
-            args.use_tuned,
-            low_cpu_mem_usage=args.low_cpu_mem_usage,
-        )
-
-    txt2img_obj.scheduler = schedulers[scheduler]
-
-    start_time = time.time()
-    txt2img_obj.log = ""
-    generated_imgs = []
-    seeds = []
-    img_seed = utils.sanitize_seed(seed)
-    for i in range(batch_count):
-        if i > 0:
-            img_seed = utils.sanitize_seed(-1)
-        out_imgs = txt2img_obj.generate_images(
-            prompt,
-            negative_prompt,
-            batch_size,
-            height,
-            width,
-            steps,
-            guidance_scale,
-            img_seed,
-            args.max_length,
-            dtype,
-            args.use_base_vae,
-            cpu_scheduling,
-        )
-        save_output_img(out_imgs[0], img_seed)
-        generated_imgs.extend(out_imgs)
-        seeds.append(img_seed)
-        txt2img_obj.log += "\n"
-
-    total_time = time.time() - start_time
-    text_output = f"prompt={args.prompts}"
-    text_output += f"\nnegative prompt={args.negative_prompts}"
-    text_output += f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-    text_output += f"\nscheduler={args.scheduler}, device={device}"
-    text_output += (
-        f"\nsteps={steps}, guidance_scale={guidance_scale}, seed={seeds}"
-    )
-    text_output += f"\nsize={height}x{width}, batch_count={batch_count}, batch_size={batch_size}, max_length={args.max_length}"
-    text_output += txt2img_obj.log
-    text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-    return generated_imgs, text_output
-
-
-if __name__ == "__main__":
-    if args.clear_all:
-        clear_all()
-
-    dtype = torch.float32 if args.precision == "fp32" else torch.half
-    cpu_scheduling = not args.scheduler.startswith("Shark")
-    set_init_device_flags()
-    schedulers = get_schedulers(args.hf_model_id)
-    scheduler_obj = schedulers[args.scheduler]
-    seed = args.seed
-
-    txt2img_obj = Text2ImagePipeline.from_pretrained(
-        scheduler_obj,
-        args.import_mlir,
-        args.hf_model_id,
-        args.ckpt_loc,
-        args.custom_vae,
-        args.precision,
-        args.max_length,
-        args.batch_size,
-        args.height,
-        args.width,
-        args.use_base_vae,
-        args.use_tuned,
-        low_cpu_mem_usage=args.low_cpu_mem_usage,
-    )
-
-    for current_batch in range(args.batch_count):
-        if current_batch > 0:
-            seed = -1
-        seed = utils.sanitize_seed(seed)
-
-        start_time = time.time()
-        generated_imgs = txt2img_obj.generate_images(
-            args.prompts,
-            args.negative_prompts,
-            args.batch_size,
-            args.height,
-            args.width,
-            args.steps,
-            args.guidance_scale,
-            seed,
-            args.max_length,
-            dtype,
-            args.use_base_vae,
-            cpu_scheduling,
-        )
-        total_time = time.time() - start_time
-        text_output = f"prompt={args.prompts}"
-        text_output += f"\nnegative prompt={args.negative_prompts}"
-        text_output += (
-            f"\nmodel_id={args.hf_model_id}, ckpt_loc={args.ckpt_loc}"
-        )
-        text_output += f"\nscheduler={args.scheduler}, device={args.device}"
-        text_output += f"\nsteps={args.steps}, guidance_scale={args.guidance_scale}, seed={seed}, size={args.height}x{args.width}"
-        text_output += (
-            f", batch size={args.batch_size}, max_length={args.max_length}"
-        )
-        # TODO: if using --batch_count=x txt2img_obj.log will output on each display every iteration infos from the start
-        text_output += txt2img_obj.log
-        text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-        save_output_img(generated_imgs[0], seed)
-        print(text_output)
--- a/apps/stable_diffusion/shark_sd.spec
+++ b/apps/stable_diffusion/shark_sd.spec
@@ -1,79 +0,0 @@
-# -*- mode: python ; coding: utf-8 -*-
-from PyInstaller.utils.hooks import collect_data_files
-from PyInstaller.utils.hooks import copy_metadata
-
-import sys ; sys.setrecursionlimit(sys.getrecursionlimit() * 5)
-
-datas = []
-datas += collect_data_files('torch')
-datas += copy_metadata('torch')
-datas += copy_metadata('tqdm')
-datas += copy_metadata('regex')
-datas += copy_metadata('requests')
-datas += copy_metadata('packaging')
-datas += copy_metadata('filelock')
-datas += copy_metadata('numpy')
-datas += copy_metadata('tokenizers')
-datas += copy_metadata('importlib_metadata')
-datas += copy_metadata('torchvision')
-datas += copy_metadata('torch-mlir')
-datas += copy_metadata('diffusers')
-datas += copy_metadata('transformers')
-datas += copy_metadata('omegaconf')
-datas += copy_metadata('safetensors')
-datas += collect_data_files('gradio')
-datas += collect_data_files('iree')
-datas += collect_data_files('google-cloud-storage')
-datas += collect_data_files('shark')
-datas += [
-         ( 'src/utils/resources/prompts.json', 'resources' ),
-         ( 'src/utils/resources/model_db.json', 'resources' ),
-         ( 'src/utils/resources/opt_flags.json', 'resources' ),
-         ( 'src/utils/resources/base_model.json', 'resources' ),
-         ( 'web/ui/css/*', 'ui/css' ),
-         ( 'web/ui/logos/*', 'logos' )
-         ]
-
-binaries = []
-
-block_cipher = None
-
-
-a = Analysis(
-    ['web/index.py'],
-    pathex=['.'],
-    binaries=binaries,
-    datas=datas,
-    hiddenimports=['shark', 'shark.*', 'shark.shark_inference', 'shark_inference', 'iree.tools.core', 'gradio', 'apps'],
-    hookspath=[],
-    hooksconfig={},
-    runtime_hooks=[],
-    excludes=[],
-    win_no_prefer_redirects=False,
-    win_private_assemblies=False,
-    cipher=block_cipher,
-    noarchive=False,
-)
-pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
-
-exe = EXE(
-    pyz,
-    a.scripts,
-    a.binaries,
-    a.zipfiles,
-    a.datas,
-    [],
-    name='shark_sd',
-    debug=False,
-    bootloader_ignore_signals=False,
-    strip=False,
-    upx=True,
-    upx_exclude=[],
-    runtime_tmpdir=None,
-    console=True,
-    disable_windowed_traceback=False,
-    argv_emulation=False,
-    target_arch=None,
-    codesign_identity=None,
-    entitlements_file=None,
-)
--- a/apps/stable_diffusion/shark_sd_cli.spec
+++ b/apps/stable_diffusion/shark_sd_cli.spec
@@ -1,77 +0,0 @@
-# -*- mode: python ; coding: utf-8 -*-
-from PyInstaller.utils.hooks import collect_data_files
-from PyInstaller.utils.hooks import copy_metadata
-
-import sys ; sys.setrecursionlimit(sys.getrecursionlimit() * 5)
-
-datas = []
-datas += collect_data_files('torch')
-datas += copy_metadata('torch')
-datas += copy_metadata('tqdm')
-datas += copy_metadata('regex')
-datas += copy_metadata('requests')
-datas += copy_metadata('packaging')
-datas += copy_metadata('filelock')
-datas += copy_metadata('numpy')
-datas += copy_metadata('tokenizers')
-datas += copy_metadata('importlib_metadata')
-datas += copy_metadata('torchvision')
-datas += copy_metadata('torch-mlir')
-datas += copy_metadata('diffusers')
-datas += copy_metadata('transformers')
-datas += copy_metadata('omegaconf')
-datas += copy_metadata('safetensors')
-datas += collect_data_files('gradio')
-datas += collect_data_files('iree')
-datas += collect_data_files('google-cloud-storage')
-datas += collect_data_files('shark')
-datas += [
-         ( 'src/utils/resources/prompts.json', 'resources' ),
-         ( 'src/utils/resources/model_db.json', 'resources' ),
-         ( 'src/utils/resources/opt_flags.json', 'resources' ),
-         ( 'src/utils/resources/base_model.json', 'resources' ),
-         ]
-
-binaries = []
-
-block_cipher = None
-
-
-a = Analysis(
-    ['scripts/txt2img.py'],
-    pathex=['.'],
-    binaries=binaries,
-    datas=datas,
-    hiddenimports=['shark', 'shark.*', 'shark.shark_inference', 'shark_inference', 'iree.tools.core', 'gradio', 'apps'],
-    hookspath=[],
-    hooksconfig={},
-    runtime_hooks=[],
-    excludes=[],
-    win_no_prefer_redirects=False,
-    win_private_assemblies=False,
-    cipher=block_cipher,
-    noarchive=False,
-)
-pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
-
-exe = EXE(
-    pyz,
-    a.scripts,
-    a.binaries,
-    a.zipfiles,
-    a.datas,
-    [],
-    name='shark_sd_cli',
-    debug=False,
-    bootloader_ignore_signals=False,
-    strip=False,
-    upx=True,
-    upx_exclude=[],
-    runtime_tmpdir=None,
-    console=True,
-    disable_windowed_traceback=False,
-    argv_emulation=False,
-    target_arch=None,
-    codesign_identity=None,
-    entitlements_file=None,
-)
--- a/apps/stable_diffusion/src/init.py
+++ b/apps/stable_diffusion/src/init.py
@@ -1,14 +0,0 @@
-from apps.stable_diffusion.src.utils import (
-    args,
-    set_init_device_flags,
-    prompt_examples,
-    get_available_devices,
-    clear_all,
-    save_output_img,
-)
-from apps.stable_diffusion.src.pipelines import (
-    Text2ImagePipeline,
-    InpaintPipeline,
-    Image2ImagePipeline,
-)
-from apps.stable_diffusion.src.schedulers import get_schedulers
--- a/apps/stable_diffusion/src/models/init.py
+++ b/apps/stable_diffusion/src/models/init.py
@@ -1,12 +0,0 @@
-from apps.stable_diffusion.src.models.model_wrappers import (
-    SharkifyStableDiffusionModel,
-)
-from apps.stable_diffusion.src.models.opt_params import (
-    get_vae_encode,
-    get_vae,
-    get_unet,
-    get_clip,
-    get_tokenizer,
-    get_params,
-    get_variant_version,
-)
--- a/apps/stable_diffusion/src/models/model_wrappers.py
+++ b/apps/stable_diffusion/src/models/model_wrappers.py
@@ -1,395 +0,0 @@
-from diffusers import AutoencoderKL, UNet2DConditionModel
-from transformers import CLIPTextModel
-from collections import defaultdict
-import torch
-import safetensors.torch
-import traceback
-import sys
-from apps.stable_diffusion.src.utils import (
-    compile_through_fx,
-    get_opt_flags,
-    base_models,
-    args,
-    fetch_or_delete_vmfbs,
-    preprocessCKPT,
-    get_path_to_diffusers_checkpoint,
-    fetch_and_update_base_model_id,
-    get_path_stem,
-    get_extended_name,
-)
-
-
-# These shapes are parameter dependent.
-def replace_shape_str(shape, max_len, width, height, batch_size):
-    new_shape = []
-    for i in range(len(shape)):
-        if shape[i] == "max_len":
-            new_shape.append(max_len)
-        elif shape[i] == "height":
-            new_shape.append(height)
-        elif shape[i] == "width":
-            new_shape.append(width)
-        elif isinstance(shape[i], str):
-            mul_val = int(shape[i].split("*")[0])
-            if "batch_size" in shape[i]:
-                new_shape.append(batch_size * mul_val)
-            elif "height" in shape[i]:
-                new_shape.append(height * mul_val)
-            elif "width" in shape[i]:
-                new_shape.append(width * mul_val)
-        else:
-            new_shape.append(shape[i])
-    return new_shape
-
-
-# Get the input info for various models i.e. "unet", "clip", "vae", "vae_encode".
-def get_input_info(model_info, max_len, width, height, batch_size):
-    dtype_config = {"f32": torch.float32, "i64": torch.int64}
-    input_map = defaultdict(list)
-    for k in model_info:
-        for inp in model_info[k]:
-            shape = model_info[k][inp]["shape"]
-            dtype = dtype_config[model_info[k][inp]["dtype"]]
-            tensor = None
-            if isinstance(shape, list):
-                clean_shape = replace_shape_str(
-                    shape, max_len, width, height, batch_size
-                )
-                if dtype == torch.int64:
-                    tensor = torch.randint(1, 3, tuple(clean_shape))
-                else:
-                    tensor = torch.randn(*clean_shape).to(dtype)
-            elif isinstance(shape, int):
-                tensor = torch.tensor(shape).to(dtype)
-            else:
-                sys.exit("shape isn't specified correctly.")
-            input_map[k].append(tensor)
-    return input_map
-
-
-class SharkifyStableDiffusionModel:
-    def __init__(
-        self,
-        model_id: str,
-        custom_weights: str,
-        custom_vae: str,
-        precision: str,
-        max_len: int = 64,
-        width: int = 512,
-        height: int = 512,
-        batch_size: int = 1,
-        use_base_vae: bool = False,
-        use_tuned: bool = False,
-        low_cpu_mem_usage: bool = False
-    ):
-        self.check_params(max_len, width, height)
-        self.max_len = max_len
-        self.height = height // 8
-        self.width = width // 8
-        self.batch_size = batch_size
-        self.custom_weights = custom_weights
-        if custom_weights != "":
-            assert custom_weights.lower().endswith(
-                (".ckpt", ".safetensors")
-            ), "checkpoint files supported can be any of [.ckpt, .safetensors] type"
-            custom_weights = get_path_to_diffusers_checkpoint(custom_weights)
-        self.model_id = model_id if custom_weights == "" else custom_weights
-        # TODO: remove the following line when stable-diffusion-2-1 works
-        if self.model_id == "stabilityai/stable-diffusion-2-1":
-            self.model_id = "stabilityai/stable-diffusion-2-1-base"
-        self.custom_vae = custom_vae
-        self.precision = precision
-        self.base_vae = use_base_vae
-        self.model_name = (
-            str(batch_size)
-            + "_"
-            + str(max_len)
-            + "_"
-            + str(height)
-            + "_"
-            + str(width)
-            + "_"
-            + precision
-        )
-        self.use_tuned = use_tuned
-        if use_tuned:
-            self.model_name = self.model_name + "_tuned"
-        self.model_name = self.model_name + "_" + get_path_stem(self.model_id)
-        self.low_cpu_mem_usage = low_cpu_mem_usage
-
-    def get_extended_name_for_all_model(self):
-        model_name = {}
-        sub_model_list = ["clip", "unet", "vae", "vae_encode"]
-        for model in sub_model_list:
-            sub_model = model
-            model_config = self.model_name
-            if "vae" == model:
-                if self.custom_vae != "":
-                    model_config = model_config + get_path_stem(self.custom_vae)
-                if self.base_vae:
-                    sub_model = "base_vae"
-            model_name[model] = get_extended_name(sub_model + model_config)
-        return model_name
-
-    def check_params(self, max_len, width, height):
-        if not (max_len >= 32 and max_len <= 77):
-            sys.exit("please specify max_len in the range [32, 77].")
-        if not (width % 8 == 0 and width >= 384):
-            sys.exit("width should be greater than 384 and multiple of 8")
-        if not (height % 8 == 0 and height >= 384):
-            sys.exit("height should be greater than 384 and multiple of 8")
-
-    def get_vae_encode(self):
-        class VaeEncodeModel(torch.nn.Module):
-            def __init__(self, model_id=self.model_id, low_cpu_mem_usage=False):
-                super().__init__()
-                self.vae = AutoencoderKL.from_pretrained(
-                    model_id,
-                    subfolder="vae",
-                    low_cpu_mem_usage=low_cpu_mem_usage,
-                )
-
-            def forward(self, input):
-                latents = self.vae.encode(input).latent_dist.sample()
-                return 0.18215 * latents
-
-        vae_encode = VaeEncodeModel()
-        inputs = tuple(self.inputs["vae_encode"])
-        is_f16 = True if self.precision == "fp16" else False
-        shark_vae_encode = compile_through_fx(
-            vae_encode,
-            inputs,
-            is_f16=is_f16,
-            use_tuned=self.use_tuned,
-            model_name=self.model_name["vae_encode"],
-            extra_args=get_opt_flags("vae", precision=self.precision),
-        )
-        return shark_vae_encode
-
-    def get_vae(self):
-        class VaeModel(torch.nn.Module):
-            def __init__(self, model_id=self.model_id, base_vae=self.base_vae, custom_vae=self.custom_vae, low_cpu_mem_usage=False):
-                super().__init__()
-                self.vae = None
-                if custom_vae == "":
-                    self.vae = AutoencoderKL.from_pretrained(
-                        model_id,
-                        subfolder="vae",
-                        low_cpu_mem_usage=low_cpu_mem_usage,
-                    )
-                elif not isinstance(custom_vae, dict):
-                    self.vae = AutoencoderKL.from_pretrained(
-                        custom_vae,
-                        subfolder="vae",
-                        low_cpu_mem_usage=low_cpu_mem_usage,
-                    )
-                else:
-                    self.vae = AutoencoderKL.from_pretrained(
-                        model_id,
-                        subfolder="vae",
-                        low_cpu_mem_usage=low_cpu_mem_usage,
-                    )
-                    self.vae.load_state_dict(custom_vae)
-                self.base_vae = base_vae
-
-            def forward(self, input):
-                if not self.base_vae:
-                    input = 1 / 0.18215 * input
-                x = self.vae.decode(input, return_dict=False)[0]
-                x = (x / 2 + 0.5).clamp(0, 1)
-                if self.base_vae:
-                    return x
-                x = x * 255.0
-                return x.round()
-
-        vae = VaeModel(low_cpu_mem_usage=self.low_cpu_mem_usage)
-        inputs = tuple(self.inputs["vae"])
-        is_f16 = True if self.precision == "fp16" else False
-        shark_vae = compile_through_fx(
-            vae,
-            inputs,
-            is_f16=is_f16,
-            use_tuned=self.use_tuned,
-            model_name=self.model_name["vae"],
-            extra_args=get_opt_flags("vae", precision=self.precision),
-        )
-        return shark_vae
-
-    def get_unet(self):
-        class UnetModel(torch.nn.Module):
-            def __init__(self, model_id=self.model_id, low_cpu_mem_usage=False):
-                super().__init__()
-                self.unet = UNet2DConditionModel.from_pretrained(
-                    model_id,
-                    subfolder="unet",
-                    low_cpu_mem_usage=low_cpu_mem_usage,
-                )
-                self.in_channels = self.unet.in_channels
-                self.train(False)
-
-            def forward(
-                self, latent, timestep, text_embedding, guidance_scale
-            ):
-                # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes.
-                latents = torch.cat([latent] * 2)
-                unet_out = self.unet.forward(
-                    latents, timestep, text_embedding, return_dict=False
-                )[0]
-                noise_pred_uncond, noise_pred_text = unet_out.chunk(2)
-                noise_pred = noise_pred_uncond + guidance_scale * (
-                    noise_pred_text - noise_pred_uncond
-                )
-                return noise_pred
-
-        unet = UnetModel(low_cpu_mem_usage=self.low_cpu_mem_usage)
-        is_f16 = True if self.precision == "fp16" else False
-        inputs = tuple(self.inputs["unet"])
-        input_mask = [True, True, True, False]
-        shark_unet = compile_through_fx(
-            unet,
-            inputs,
-            model_name=self.model_name["unet"],
-            is_f16=is_f16,
-            f16_input_mask=input_mask,
-            use_tuned=self.use_tuned,
-            extra_args=get_opt_flags("unet", precision=self.precision),
-        )
-        return shark_unet
-
-    def get_clip(self):
-        class CLIPText(torch.nn.Module):
-            def __init__(self, model_id=self.model_id, low_cpu_mem_usage=False):
-                super().__init__()
-                self.text_encoder = CLIPTextModel.from_pretrained(
-                    model_id,
-                    subfolder="text_encoder",
-                    low_cpu_mem_usage=low_cpu_mem_usage,
-                )
-
-            def forward(self, input):
-                return self.text_encoder(input)[0]
-
-        clip_model = CLIPText(low_cpu_mem_usage=self.low_cpu_mem_usage)
-        shark_clip = compile_through_fx(
-            clip_model,
-            tuple(self.inputs["clip"]),
-            model_name=self.model_name["clip"],
-            extra_args=get_opt_flags("clip", precision="fp32"),
-        )
-        return shark_clip
-
-    def process_custom_vae(self):
-        custom_vae = self.custom_vae.lower()
-        if not custom_vae.endswith((".ckpt", ".safetensors")):
-            return self.custom_vae
-        try:
-            preprocessCKPT(self.custom_vae)
-            return get_path_to_diffusers_checkpoint(self.custom_vae)
-        except:
-            print("Processing standalone Vae checkpoint")
-            vae_checkpoint = None
-            vae_ignore_keys = {"model_ema.decay", "model_ema.num_updates"}
-            if custom_vae.endswith(".ckpt"):
-                vae_checkpoint = torch.load(self.custom_vae, map_location="cpu")
-            else:
-                vae_checkpoint = safetensors.torch.load_file(self.custom_vae, device="cpu")
-            if "state_dict" in vae_checkpoint:
-                vae_checkpoint = vae_checkpoint["state_dict"]
-            vae_dict = {k: v for k, v in vae_checkpoint.items() if k[0:4] != "loss" and k not in vae_ignore_keys}
-            return vae_dict
-        
-            
-    # Compiles Clip, Unet and Vae with `base_model_id` as defining their input
-    # configiration.
-    def compile_all(self, base_model_id, need_vae_encode):
-        self.inputs = get_input_info(
-            base_models[base_model_id],
-            self.max_len,
-            self.width,
-            self.height,
-            self.batch_size,
-        )
-        compiled_unet = self.get_unet()
-        if self.custom_vae != "":
-            print("Plugging in custom Vae")
-        compiled_vae = self.get_vae()
-        compiled_clip = self.get_clip()
-        if need_vae_encode:
-            compiled_vae_encode = self.get_vae_encode()
-            return compiled_clip, compiled_unet, compiled_vae, compiled_vae_encode
-
-        return compiled_clip, compiled_unet, compiled_vae
-
-    def __call__(self):
-        # Step 1:
-        # --  Fetch all vmfbs for the model, if present, else delete the lot.
-        need_vae_encode = args.img_path is not None
-        self.model_name = self.get_extended_name_for_all_model()
-        vmfbs = fetch_or_delete_vmfbs(self.model_name, need_vae_encode, self.precision)   
-        if vmfbs[0]:
-            # -- If all vmfbs are indeed present, we also try and fetch the base
-            #    model configuration for running SD with custom checkpoints.
-            if self.custom_weights != "":
-                args.hf_model_id = fetch_and_update_base_model_id(self.custom_weights)
-            if args.hf_model_id == "":
-                sys.exit("Base model configuration for the custom model is missing. Use `--clear_all` and re-run.")
-            print("Loaded vmfbs from cache and successfully fetched base model configuration.")
-            if not need_vae_encode:
-                return vmfbs[:3]
-            return vmfbs
-
-        # Step 2:
-        # -- If vmfbs weren't found, we try to see if the base model configuration
-        #    for the required SD run is known to us and bypass the retry mechanism.
-        model_to_run = ""
-        if self.custom_weights != "":
-            model_to_run = self.custom_weights
-            assert self.custom_weights.lower().endswith(
-                (".ckpt", ".safetensors")
-            ), "checkpoint files supported can be any of [.ckpt, .safetensors] type"
-            preprocessCKPT(self.custom_weights)
-        else:
-            model_to_run = args.hf_model_id
-        # For custom Vae user can provide either the repo-id or a checkpoint file,
-        # and for a checkpoint file we'd need to process it via Diffusers' script.
-        self.custom_vae = self.process_custom_vae()
-        base_model_fetched = fetch_and_update_base_model_id(model_to_run)
-        if base_model_fetched != "":
-            print("Compiling all the models with the fetched base model configuration.")
-            if args.ckpt_loc != "":
-                args.hf_model_id = base_model_fetched
-            return self.compile_all(base_model_fetched, need_vae_encode)
-
-        # Step 3:
-        # -- This is the retry mechanism where the base model's configuration is not
-        #    known to us and figure that out by trial and error.
-        print("Inferring base model configuration.")
-        for model_id in base_models:
-            try:
-                if need_vae_encode:
-                    compiled_clip, compiled_unet, compiled_vae, compiled_vae_encode = self.compile_all(model_id, need_vae_encode)
-                else:
-                    compiled_clip, compiled_unet, compiled_vae = self.compile_all(model_id, need_vae_encode)
-            except Exception as e:
-                print("Retrying with a different base model configuration")
-                continue
-            # -- Once a successful compilation has taken place we'd want to store
-            #    the base model's configuration inferred.
-            fetch_and_update_base_model_id(model_to_run, model_id)
-            # This is done just because in main.py we are basing the choice of tokenizer and scheduler
-            # on `args.hf_model_id`. Since now, we don't maintain 1:1 mapping of variants and the base
-            # model and rely on retrying method to find the input configuration, we should also update
-            # the knowledge of base model id accordingly into `args.hf_model_id`.
-            if args.ckpt_loc != "":
-                args.hf_model_id = model_id
-            if need_vae_encode:
-                return (
-                    compiled_clip,
-                    compiled_unet,
-                    compiled_vae,
-                    compiled_vae_encode,
-                )
-            return compiled_clip, compiled_unet, compiled_vae
-        sys.exit(
-            "Cannot compile the model. Please create an issue with the detailed log at https://github.com/nod-ai/SHARK/issues"
-        )
--- a/apps/stable_diffusion/src/models/opt_params.py
+++ b/apps/stable_diffusion/src/models/opt_params.py
@@ -1,108 +0,0 @@
-import sys
-from transformers import CLIPTokenizer
-from apps.stable_diffusion.src.utils import (
-    models_db,
-    args,
-    get_shark_model,
-    get_opt_flags,
-)
-
-
-hf_model_variant_map = {
-    "Linaqruf/anything-v3.0": ["anythingv3", "v1_4"],
-    "dreamlike-art/dreamlike-diffusion-1.0": ["dreamlike", "v1_4"],
-    "prompthero/openjourney": ["openjourney", "v1_4"],
-    "wavymulder/Analog-Diffusion": ["analogdiffusion", "v1_4"],
-    "stabilityai/stable-diffusion-2-1": ["stablediffusion", "v2_1base"],
-    "stabilityai/stable-diffusion-2-1-base": ["stablediffusion", "v2_1base"],
-    "CompVis/stable-diffusion-v1-4": ["stablediffusion", "v1_4"],
-    "runwayml/stable-diffusion-inpainting": ["stablediffusion", "inpaint_v1"],
-    "stabilityai/stable-diffusion-2-inpainting": ["stablediffusion", "inpaint_v2"],
-}
-
-
-def get_variant_version(hf_model_id):
-    return hf_model_variant_map[hf_model_id]
-
-
-def get_params(bucket_key, model_key, model, is_tuned, precision):
-    try:
-        bucket = models_db[0][bucket_key]
-        model_name = models_db[1][model_key]
-    except KeyError:
-        raise Exception(
-            f"{bucket_key}/{model_key} is not present in the models database"
-        )
-    iree_flags = get_opt_flags(model, precision="fp16")
-    return bucket, model_name, iree_flags
-
-
-def get_unet():
-    variant, version = get_variant_version(args.hf_model_id)
-    # Tuned model is present only for `fp16` precision.
-    is_tuned = "tuned" if args.use_tuned else "untuned"
-    if "vulkan" not in args.device and args.use_tuned:
-        bucket_key = f"{variant}/{is_tuned}/{args.device}"
-        model_key = f"{variant}/{version}/unet/{args.precision}/length_{args.max_length}/{is_tuned}/{args.device}"
-    else:
-        bucket_key = f"{variant}/{is_tuned}"
-        model_key = f"{variant}/{version}/unet/{args.precision}/length_{args.max_length}/{is_tuned}"
-
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, "unet", is_tuned, args.precision
-    )
-    return get_shark_model(bucket, model_name, iree_flags)
-
-
-def get_vae_encode():
-    variant, version = get_variant_version(args.hf_model_id)
-    # Tuned model is present only for `fp16` precision.
-    is_tuned = "tuned" if args.use_tuned else "untuned"
-    if "vulkan" not in args.device and args.use_tuned:
-        bucket_key = f"{variant}/{is_tuned}/{args.device}"
-        model_key = f"{variant}/{version}/vae_encode/{args.precision}/length_77/{is_tuned}/{args.device}"
-    else:
-        bucket_key = f"{variant}/{is_tuned}"
-        model_key = f"{variant}/{version}/vae_encode/{args.precision}/length_77/{is_tuned}"
-
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, "vae", is_tuned, args.precision
-    )
-    return get_shark_model(bucket, model_name, iree_flags)
-
-
-def get_vae():
-    variant, version = get_variant_version(args.hf_model_id)
-    # Tuned model is present only for `fp16` precision.
-    is_tuned = "tuned" if args.use_tuned else "untuned"
-    is_base = "/base" if args.use_base_vae else ""
-    if "vulkan" not in args.device and args.use_tuned:
-        bucket_key = f"{variant}/{is_tuned}/{args.device}"
-        model_key = f"{variant}/{version}/vae/{args.precision}/length_77/{is_tuned}{is_base}/{args.device}"
-    else:
-        bucket_key = f"{variant}/{is_tuned}"
-        model_key = f"{variant}/{version}/vae/{args.precision}/length_77/{is_tuned}{is_base}"
-
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, "vae", is_tuned, args.precision
-    )
-    return get_shark_model(bucket, model_name, iree_flags)
-
-
-def get_clip():
-    variant, version = get_variant_version(args.hf_model_id)
-    bucket_key = f"{variant}/untuned"
-    model_key = (
-        f"{variant}/{version}/clip/fp32/length_{args.max_length}/untuned"
-    )
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, "clip", "untuned", "fp32"
-    )
-    return get_shark_model(bucket, model_name, iree_flags)
-
-
-def get_tokenizer():
-    tokenizer = CLIPTokenizer.from_pretrained(
-        args.hf_model_id, subfolder="tokenizer"
-    )
-    return tokenizer
--- a/apps/stable_diffusion/src/pipelines/init.py
+++ b/apps/stable_diffusion/src/pipelines/init.py
@@ -1,9 +0,0 @@
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_txt2img import (
-    Text2ImagePipeline,
-)
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_inpaint import (
-    InpaintPipeline,
-)
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_img2img import (
-    Image2ImagePipeline,
-)
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_img2img.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_img2img.py
@@ -1,169 +0,0 @@
-import torch
-import time
-import numpy as np
-from tqdm.auto import tqdm
-from random import randint
-from PIL import Image
-from transformers import CLIPTokenizer
-from typing import Union
-from shark.shark_inference import SharkInference
-from diffusers import (
-    DDIMScheduler,
-    PNDMScheduler,
-    LMSDiscreteScheduler,
-    EulerDiscreteScheduler,
-    EulerAncestralDiscreteScheduler,
-    DPMSolverMultistepScheduler,
-)
-from apps.stable_diffusion.src.schedulers import SharkEulerDiscreteScheduler
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_utils import (
-    StableDiffusionPipeline,
-)
-
-
-class Image2ImagePipeline(StableDiffusionPipeline):
-    def __init__(
-        self,
-        vae_encode: SharkInference,
-        vae: SharkInference,
-        text_encoder: SharkInference,
-        tokenizer: CLIPTokenizer,
-        unet: SharkInference,
-        scheduler: Union[
-            DDIMScheduler,
-            PNDMScheduler,
-            LMSDiscreteScheduler,
-            EulerDiscreteScheduler,
-            EulerAncestralDiscreteScheduler,
-            DPMSolverMultistepScheduler,
-            SharkEulerDiscreteScheduler,
-        ],
-    ):
-        super().__init__(vae, text_encoder, tokenizer, unet, scheduler)
-        self.vae_encode = vae_encode
-
-    def prepare_image_latents(
-        self,
-        image,
-        batch_size,
-        height,
-        width,
-        generator,
-        num_inference_steps,
-        strength,
-        dtype,
-    ):
-        # Pre process image -> get image encoded -> process latents
-
-        # TODO: process with variable HxW combos
-
-        # Pre process image
-        image = image.resize((width, height))
-        image_arr = np.stack([np.array(i) for i in (image,)], axis=0)
-        image_arr = image_arr / 255.0
-        image_arr = torch.from_numpy(image_arr).permute(0, 3, 1, 2).to(dtype)
-        image_arr = 2 * (image_arr - 0.5)
-
-        # set scheduler steps
-        self.scheduler.set_timesteps(num_inference_steps)
-        init_timestep = min(
-            int(num_inference_steps * strength), num_inference_steps
-        )
-        t_start = max(num_inference_steps - init_timestep, 0)
-        # timesteps reduced as per strength
-        timesteps = self.scheduler.timesteps[t_start:]
-        # new number of steps to be used as per strength will be
-        # num_inference_steps = num_inference_steps - t_start
-
-        # image encode
-        latents = self.encode_image((image_arr,))
-        latents = torch.from_numpy(latents).to(dtype)
-        # add noise to data
-        noise = torch.randn(latents.shape, generator=generator, dtype=dtype)
-        latents = self.scheduler.add_noise(
-            latents, noise, timesteps[0].repeat(1)
-        )
-
-        return latents, timesteps
-
-    def encode_image(self, input_image):
-        vae_encode_start = time.time()
-        latents = self.vae_encode("forward", input_image)
-        vae_inf_time = (time.time() - vae_encode_start) * 1000
-        self.log += f"\nVAE Encode Inference time (ms): {vae_inf_time:.3f}"
-
-        return latents
-
-    def generate_images(
-        self,
-        prompts,
-        neg_prompts,
-        image,
-        batch_size,
-        height,
-        width,
-        num_inference_steps,
-        strength,
-        guidance_scale,
-        seed,
-        max_length,
-        dtype,
-        use_base_vae,
-        cpu_scheduling,
-    ):
-        # prompts and negative prompts must be a list.
-        if isinstance(prompts, str):
-            prompts = [prompts]
-
-        if isinstance(neg_prompts, str):
-            neg_prompts = [neg_prompts]
-
-        prompts = prompts * batch_size
-        neg_prompts = neg_prompts * batch_size
-
-        # seed generator to create the inital latent noise. Also handle out of range seeds.
-        uint32_info = np.iinfo(np.uint32)
-        uint32_min, uint32_max = uint32_info.min, uint32_info.max
-        if seed < uint32_min or seed >= uint32_max:
-            seed = randint(uint32_min, uint32_max)
-        generator = torch.manual_seed(seed)
-
-        # Get text embeddings from prompts
-        text_embeddings = self.encode_prompts(prompts, neg_prompts, max_length)
-
-        # guidance scale as a float32 tensor.
-        guidance_scale = torch.tensor(guidance_scale).to(torch.float32)
-
-        # Prepare input image latent
-        image_latents, final_timesteps = self.prepare_image_latents(
-            image=image,
-            batch_size=batch_size,
-            height=height,
-            width=width,
-            generator=generator,
-            num_inference_steps=num_inference_steps,
-            strength=strength,
-            dtype=dtype,
-        )
-
-        # Get Image latents
-        latents = self.produce_img_latents(
-            latents=image_latents,
-            text_embeddings=text_embeddings,
-            guidance_scale=guidance_scale,
-            total_timesteps=final_timesteps,
-            dtype=dtype,
-            cpu_scheduling=cpu_scheduling,
-        )
-
-        # Img latents -> PIL images
-        all_imgs = []
-        for i in tqdm(range(0, latents.shape[0], batch_size)):
-            imgs = self.decode_latents(
-                latents=latents[i : i + batch_size],
-                use_base_vae=use_base_vae,
-                cpu_scheduling=cpu_scheduling,
-            )
-            all_imgs.extend(imgs)
-
-        return all_imgs
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_inpaint.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_inpaint.py
@@ -1,229 +0,0 @@
-import torch
-from tqdm.auto import tqdm
-import numpy as np
-from random import randint
-from PIL import Image
-from transformers import CLIPTokenizer
-from typing import Union
-from shark.shark_inference import SharkInference
-from diffusers import (
-    DDIMScheduler,
-    PNDMScheduler,
-    LMSDiscreteScheduler,
-    EulerDiscreteScheduler,
-    EulerAncestralDiscreteScheduler,
-    DPMSolverMultistepScheduler,
-)
-from apps.stable_diffusion.src.schedulers import SharkEulerDiscreteScheduler
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_utils import (
-    StableDiffusionPipeline,
-)
-
-
-class InpaintPipeline(StableDiffusionPipeline):
-    def __init__(
-        self,
-        vae_encode: SharkInference,
-        vae: SharkInference,
-        text_encoder: SharkInference,
-        tokenizer: CLIPTokenizer,
-        unet: SharkInference,
-        scheduler: Union[
-            DDIMScheduler,
-            PNDMScheduler,
-            LMSDiscreteScheduler,
-            EulerDiscreteScheduler,
-            EulerAncestralDiscreteScheduler,
-            DPMSolverMultistepScheduler,
-            SharkEulerDiscreteScheduler,
-        ],
-    ):
-        super().__init__(vae, text_encoder, tokenizer, unet, scheduler)
-        self.vae_encode = vae_encode
-
-    def prepare_mask_and_masked_image(self, image, mask):
-        # preprocess image
-        if isinstance(image, (Image.Image, np.ndarray)):
-            image = [image]
-
-        if isinstance(image, list) and isinstance(image[0], Image.Image):
-            image = [np.array(i.convert("RGB"))[None, :] for i in image]
-            image = np.concatenate(image, axis=0)
-        elif isinstance(image, list) and isinstance(image[0], np.ndarray):
-            image = np.concatenate([i[None, :] for i in image], axis=0)
-
-        image = image.transpose(0, 3, 1, 2)
-        image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
-
-        # preprocess mask
-        if isinstance(mask, (Image.Image, np.ndarray)):
-            mask = [mask]
-
-        if isinstance(mask, list) and isinstance(mask[0], Image.Image):
-            mask = np.concatenate(
-                [np.array(m.convert("L"))[None, None, :] for m in mask], axis=0
-            )
-            mask = mask.astype(np.float32) / 255.0
-        elif isinstance(mask, list) and isinstance(mask[0], np.ndarray):
-            mask = np.concatenate([m[None, None, :] for m in mask], axis=0)
-
-        mask[mask < 0.5] = 0
-        mask[mask >= 0.5] = 1
-        mask = torch.from_numpy(mask)
-
-        masked_image = image * (mask < 0.5)
-
-        return mask, masked_image
-
-    def prepare_latents(
-        self,
-        batch_size,
-        height,
-        width,
-        generator,
-        num_inference_steps,
-        dtype,
-    ):
-        latents = torch.randn(
-            (
-                batch_size,
-                4,
-                height // 8,
-                width // 8,
-            ),
-            generator=generator,
-            dtype=torch.float32,
-        ).to(dtype)
-
-        self.scheduler.set_timesteps(num_inference_steps)
-        self.scheduler.is_scale_input_called = True
-        latents = latents * self.scheduler.init_noise_sigma
-        return latents
-
-    def prepare_mask_latents(
-        self,
-        mask,
-        masked_image,
-        batch_size,
-        height,
-        width,
-        dtype,
-    ):
-        mask = torch.nn.functional.interpolate(
-            mask, size=(height // 8, width // 8)
-        )
-        mask = mask.to(dtype)
-
-        masked_image = masked_image.to(dtype)
-        masked_image_latents = self.vae_encode("forward", (masked_image,))
-        masked_image_latents = torch.from_numpy(masked_image_latents)
-
-        # duplicate mask and masked_image_latents for each generation per prompt, using mps friendly method
-        if mask.shape[0] < batch_size:
-            if not batch_size % mask.shape[0] == 0:
-                raise ValueError(
-                    "The passed mask and the required batch size don't match. Masks are supposed to be duplicated to"
-                    f" a total batch size of {batch_size}, but {mask.shape[0]} masks were passed. Make sure the number"
-                    " of masks that you pass is divisible by the total requested batch size."
-                )
-            mask = mask.repeat(batch_size // mask.shape[0], 1, 1, 1)
-        if masked_image_latents.shape[0] < batch_size:
-            if not batch_size % masked_image_latents.shape[0] == 0:
-                raise ValueError(
-                    "The passed images and the required batch size don't match. Images are supposed to be duplicated"
-                    f" to a total batch size of {batch_size}, but {masked_image_latents.shape[0]} images were passed."
-                    " Make sure the number of images that you pass is divisible by the total requested batch size."
-                )
-            masked_image_latents = masked_image_latents.repeat(
-                batch_size // masked_image_latents.shape[0], 1, 1, 1
-            )
-        return mask, masked_image_latents
-
-    def generate_images(
-        self,
-        prompts,
-        neg_prompts,
-        image,
-        mask_image,
-        batch_size,
-        height,
-        width,
-        num_inference_steps,
-        guidance_scale,
-        seed,
-        max_length,
-        dtype,
-        use_base_vae,
-        cpu_scheduling,
-    ):
-        # prompts and negative prompts must be a list.
-        if isinstance(prompts, str):
-            prompts = [prompts]
-
-        if isinstance(neg_prompts, str):
-            neg_prompts = [neg_prompts]
-
-        prompts = prompts * batch_size
-        neg_prompts = neg_prompts * batch_size
-
-        # seed generator to create the inital latent noise. Also handle out of range seeds.
-        uint32_info = np.iinfo(np.uint32)
-        uint32_min, uint32_max = uint32_info.min, uint32_info.max
-        if seed < uint32_min or seed >= uint32_max:
-            seed = randint(uint32_min, uint32_max)
-        generator = torch.manual_seed(seed)
-
-        # Get initial latents
-        init_latents = self.prepare_latents(
-            batch_size=batch_size,
-            height=height,
-            width=width,
-            generator=generator,
-            num_inference_steps=num_inference_steps,
-            dtype=dtype,
-        )
-
-        # Get text embeddings from prompts
-        text_embeddings = self.encode_prompts(prompts, neg_prompts, max_length)
-
-        # guidance scale as a float32 tensor.
-        guidance_scale = torch.tensor(guidance_scale).to(torch.float32)
-
-        # Preprocess mask and image
-        mask, masked_image = self.prepare_mask_and_masked_image(
-            image, mask_image
-        )
-
-        # Prepare mask latent variables
-        mask, masked_image_latents = self.prepare_mask_latents(
-            mask=mask,
-            masked_image=masked_image,
-            batch_size=batch_size,
-            height=height,
-            width=width,
-            dtype=dtype,
-        )
-
-        # Get Image latents
-        latents = self.produce_img_latents(
-            latents=init_latents,
-            text_embeddings=text_embeddings,
-            guidance_scale=guidance_scale,
-            total_timesteps=self.scheduler.timesteps,
-            dtype=dtype,
-            cpu_scheduling=cpu_scheduling,
-            mask=mask,
-            masked_image_latents=masked_image_latents,
-        )
-
-        # Img latents -> PIL images
-        all_imgs = []
-        for i in tqdm(range(0, latents.shape[0], batch_size)):
-            imgs = self.decode_latents(
-                latents=latents[i : i + batch_size],
-                use_base_vae=use_base_vae,
-                cpu_scheduling=cpu_scheduling,
-            )
-            all_imgs.extend(imgs)
-
-        return all_imgs
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_txt2img.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_txt2img.py
@@ -1,137 +0,0 @@
-import torch
-from tqdm.auto import tqdm
-import numpy as np
-from random import randint
-from transformers import CLIPTokenizer
-from typing import Union
-from shark.shark_inference import SharkInference
-from diffusers import (
-    DDIMScheduler,
-    PNDMScheduler,
-    LMSDiscreteScheduler,
-    KDPM2DiscreteScheduler,
-    EulerDiscreteScheduler,
-    EulerAncestralDiscreteScheduler,
-    DPMSolverMultistepScheduler,
-)
-from apps.stable_diffusion.src.schedulers import SharkEulerDiscreteScheduler
-from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_utils import (
-    StableDiffusionPipeline,
-)
-
-
-class Text2ImagePipeline(StableDiffusionPipeline):
-    def __init__(
-        self,
-        vae: SharkInference,
-        text_encoder: SharkInference,
-        tokenizer: CLIPTokenizer,
-        unet: SharkInference,
-        scheduler: Union[
-            DDIMScheduler,
-            PNDMScheduler,
-            LMSDiscreteScheduler,
-            KDPM2DiscreteScheduler,
-            EulerDiscreteScheduler,
-            EulerAncestralDiscreteScheduler,
-            DPMSolverMultistepScheduler,
-            SharkEulerDiscreteScheduler,
-        ],
-    ):
-        super().__init__(vae, text_encoder, tokenizer, unet, scheduler)
-
-    def prepare_latents(
-        self,
-        batch_size,
-        height,
-        width,
-        generator,
-        num_inference_steps,
-        dtype,
-    ):
-        latents = torch.randn(
-            (
-                batch_size,
-                4,
-                height // 8,
-                width // 8,
-            ),
-            generator=generator,
-            dtype=torch.float32,
-        ).to(dtype)
-
-        self.scheduler.set_timesteps(num_inference_steps)
-        self.scheduler.is_scale_input_called = True
-        latents = latents * self.scheduler.init_noise_sigma
-        return latents
-
-    def generate_images(
-        self,
-        prompts,
-        neg_prompts,
-        batch_size,
-        height,
-        width,
-        num_inference_steps,
-        guidance_scale,
-        seed,
-        max_length,
-        dtype,
-        use_base_vae,
-        cpu_scheduling,
-    ):
-        # prompts and negative prompts must be a list.
-        if isinstance(prompts, str):
-            prompts = [prompts]
-
-        if isinstance(neg_prompts, str):
-            neg_prompts = [neg_prompts]
-
-        prompts = prompts * batch_size
-        neg_prompts = neg_prompts * batch_size
-
-        # seed generator to create the inital latent noise. Also handle out of range seeds.
-        # TODO: Wouldn't it be preferable to just report an error instead of modifying the seed on the fly?
-        uint32_info = np.iinfo(np.uint32)
-        uint32_min, uint32_max = uint32_info.min, uint32_info.max
-        if seed < uint32_min or seed >= uint32_max:
-            seed = randint(uint32_min, uint32_max)
-        generator = torch.manual_seed(seed)
-
-        # Get initial latents
-        init_latents = self.prepare_latents(
-            batch_size=batch_size,
-            height=height,
-            width=width,
-            generator=generator,
-            num_inference_steps=num_inference_steps,
-            dtype=dtype,
-        )
-
-        # Get text embeddings from prompts
-        text_embeddings = self.encode_prompts(prompts, neg_prompts, max_length)
-
-        # guidance scale as a float32 tensor.
-        guidance_scale = torch.tensor(guidance_scale).to(torch.float32)
-
-        # Get Image latents
-        latents = self.produce_img_latents(
-            latents=init_latents,
-            text_embeddings=text_embeddings,
-            guidance_scale=guidance_scale,
-            total_timesteps=self.scheduler.timesteps,
-            dtype=dtype,
-            cpu_scheduling=cpu_scheduling,
-        )
-
-        # Img latents -> PIL images
-        all_imgs = []
-        for i in tqdm(range(0, latents.shape[0], batch_size)):
-            imgs = self.decode_latents(
-                latents=latents[i : i + batch_size],
-                use_base_vae=use_base_vae,
-                cpu_scheduling=cpu_scheduling,
-            )
-            all_imgs.extend(imgs)
-
-        return all_imgs
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_utils.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_utils.py
@@ -1,261 +0,0 @@
-import torch
-import numpy as np
-from transformers import CLIPTokenizer
-from PIL import Image
-from tqdm.auto import tqdm
-import time
-from typing import Union
-from diffusers import (
-    DDIMScheduler,
-    PNDMScheduler,
-    LMSDiscreteScheduler,
-    KDPM2DiscreteScheduler,
-    EulerDiscreteScheduler,
-    EulerAncestralDiscreteScheduler,
-    DPMSolverMultistepScheduler,
-)
-from shark.shark_inference import SharkInference
-from apps.stable_diffusion.src.schedulers import SharkEulerDiscreteScheduler
-from apps.stable_diffusion.src.models import (
-    SharkifyStableDiffusionModel,
-    get_vae_encode,
-    get_vae,
-    get_clip,
-    get_unet,
-    get_tokenizer,
-)
-from apps.stable_diffusion.src.utils import (
-    start_profiling,
-    end_profiling,
-)
-
-
-class StableDiffusionPipeline:
-    def __init__(
-        self,
-        vae: SharkInference,
-        text_encoder: SharkInference,
-        tokenizer: CLIPTokenizer,
-        unet: SharkInference,
-        scheduler: Union[
-            DDIMScheduler,
-            PNDMScheduler,
-            LMSDiscreteScheduler,
-            KDPM2DiscreteScheduler,
-            EulerDiscreteScheduler,
-            EulerAncestralDiscreteScheduler,
-            DPMSolverMultistepScheduler,
-            SharkEulerDiscreteScheduler,
-        ],
-    ):
-        self.vae = vae
-        self.text_encoder = text_encoder
-        self.tokenizer = tokenizer
-        self.unet = unet
-        self.scheduler = scheduler
-        # TODO: Implement using logging python utility.
-        self.log = ""
-
-    def encode_prompts(self, prompts, neg_prompts, max_length):
-        # Tokenize text and get embeddings
-        text_input = self.tokenizer(
-            prompts,
-            padding="max_length",
-            max_length=max_length,
-            truncation=True,
-            return_tensors="pt",
-        )
-
-        # Get unconditional embeddings as well
-        uncond_input = self.tokenizer(
-            neg_prompts,
-            padding="max_length",
-            max_length=max_length,
-            truncation=True,
-            return_tensors="pt",
-        )
-
-        text_input = torch.cat([uncond_input.input_ids, text_input.input_ids])
-
-        clip_inf_start = time.time()
-        text_embeddings = self.text_encoder("forward", (text_input,))
-        clip_inf_time = (time.time() - clip_inf_start) * 1000
-        self.log += f"\nClip Inference time (ms) = {clip_inf_time:.3f}"
-
-        return text_embeddings
-
-    def decode_latents(self, latents, use_base_vae, cpu_scheduling):
-        if use_base_vae:
-            latents = 1 / 0.18215 * latents
-
-        latents_numpy = latents
-        if cpu_scheduling:
-            latents_numpy = latents.detach().numpy()
-
-        profile_device = start_profiling(file_path="vae.rdc")
-        vae_start = time.time()
-        images = self.vae("forward", (latents_numpy,))
-        vae_inf_time = (time.time() - vae_start) * 1000
-        end_profiling(profile_device)
-        self.log += f"\nVAE Inference time (ms): {vae_inf_time:.3f}"
-
-        if use_base_vae:
-            images = torch.from_numpy(images)
-            images = (images.detach().cpu() * 255.0).numpy()
-            images = images.round()
-
-        images = torch.from_numpy(images).to(torch.uint8).permute(0, 2, 3, 1)
-        pil_images = [Image.fromarray(image) for image in images.numpy()]
-        return pil_images
-
-    def produce_img_latents(
-        self,
-        latents,
-        text_embeddings,
-        guidance_scale,
-        total_timesteps,
-        dtype,
-        cpu_scheduling,
-        mask=None,
-        masked_image_latents=None,
-        return_all_latents=False,
-    ):
-        step_time_sum = 0
-        latent_history = [latents]
-        text_embeddings = torch.from_numpy(text_embeddings).to(dtype)
-        text_embeddings_numpy = text_embeddings.detach().numpy()
-        for i, t in tqdm(enumerate(total_timesteps)):
-            step_start_time = time.time()
-            timestep = torch.tensor([t]).to(dtype).detach().numpy()
-            latent_model_input = self.scheduler.scale_model_input(latents, t)
-            if mask is not None and masked_image_latents is not None:
-                latent_model_input = torch.cat(
-                    [
-                        torch.from_numpy(np.asarray(latent_model_input)),
-                        mask,
-                        masked_image_latents,
-                    ],
-                    dim=1,
-                ).to(dtype)
-            if cpu_scheduling:
-                latent_model_input = latent_model_input.detach().numpy()
-
-            # Profiling Unet.
-            profile_device = start_profiling(file_path="unet.rdc")
-            noise_pred = self.unet(
-                "forward",
-                (
-                    latent_model_input,
-                    timestep,
-                    text_embeddings_numpy,
-                    guidance_scale,
-                ),
-                send_to_host=False,
-            )
-            end_profiling(profile_device)
-
-            if cpu_scheduling:
-                noise_pred = torch.from_numpy(noise_pred.to_host())
-                latents = self.scheduler.step(
-                    noise_pred, t, latents
-                ).prev_sample
-            else:
-                latents = self.scheduler.step(noise_pred, t, latents)
-
-            latent_history.append(latents)
-            step_time = (time.time() - step_start_time) * 1000
-            #  self.log += (
-            #      f"\nstep = {i} | timestep = {t} | time = {step_time:.2f}ms"
-            #  )
-            step_time_sum += step_time
-
-        avg_step_time = step_time_sum / len(total_timesteps)
-        self.log += f"\nAverage step time: {avg_step_time}ms/it"
-
-        if not return_all_latents:
-            return latents
-        all_latents = torch.cat(latent_history, dim=0)
-        return all_latents
-
-    @classmethod
-    def from_pretrained(
-        cls,
-        scheduler: Union[
-            DDIMScheduler,
-            PNDMScheduler,
-            LMSDiscreteScheduler,
-            KDPM2DiscreteScheduler,
-            EulerDiscreteScheduler,
-            EulerAncestralDiscreteScheduler,
-            DPMSolverMultistepScheduler,
-            SharkEulerDiscreteScheduler,
-        ],
-        import_mlir: bool,
-        model_id: str,
-        ckpt_loc: str,
-        custom_vae: str,
-        precision: str,
-        max_length: int,
-        batch_size: int,
-        height: int,
-        width: int,
-        use_base_vae: bool,
-        use_tuned: bool,
-        low_cpu_mem_usage: bool = False,
-    ):
-        if import_mlir:
-            mlir_import = SharkifyStableDiffusionModel(
-                model_id,
-                ckpt_loc,
-                custom_vae,
-                precision,
-                max_len=max_length,
-                batch_size=batch_size,
-                height=height,
-                width=width,
-                use_base_vae=use_base_vae,
-                use_tuned=use_tuned,
-                low_cpu_mem_usage=low_cpu_mem_usage,
-            )
-            if cls.__name__ in ["Image2ImagePipeline", "InpaintPipeline"]:
-                clip, unet, vae, vae_encode = mlir_import()
-                return cls(
-                    vae_encode, vae, clip, get_tokenizer(), unet, scheduler
-                )
-            clip, unet, vae = mlir_import()
-            return cls(vae, clip, get_tokenizer(), unet, scheduler)
-        try:
-            if cls.__name__ in ["Image2ImagePipeline", "InpaintPipeline"]:
-                return cls(
-                    get_vae_encode(),
-                    get_vae(),
-                    get_clip(),
-                    get_tokenizer(),
-                    get_unet(),
-                    scheduler,
-                )
-            return cls(
-                get_vae(), get_clip(), get_tokenizer(), get_unet(), scheduler
-            )
-        except:
-            print("download pipeline failed, falling back to import_mlir")
-            mlir_import = SharkifyStableDiffusionModel(
-                model_id,
-                ckpt_loc,
-                custom_vae,
-                precision,
-                max_len=max_length,
-                batch_size=batch_size,
-                height=height,
-                width=width,
-                use_base_vae=use_base_vae,
-                use_tuned=use_tuned,
-                low_cpu_mem_usage=low_cpu_mem_usage,
-            )
-            if cls.__name__ in ["Image2ImagePipeline", "InpaintPipeline"]:
-                clip, unet, vae, vae_encode = mlir_import()
-                return cls(
-                    vae_encode, vae, clip, get_tokenizer(), unet, scheduler
-                )
-            clip, unet, vae = mlir_import()
-            return cls(vae, clip, get_tokenizer(), unet, scheduler)
--- a/apps/stable_diffusion/src/schedulers/init.py
+++ b/apps/stable_diffusion/src/schedulers/init.py
@@ -1,4 +0,0 @@
-from apps.stable_diffusion.src.schedulers.sd_schedulers import get_schedulers
-from apps.stable_diffusion.src.schedulers.shark_eulerdiscrete import (
-    SharkEulerDiscreteScheduler,
-)
--- a/apps/stable_diffusion/src/schedulers/sd_schedulers.py
+++ b/apps/stable_diffusion/src/schedulers/sd_schedulers.py
@@ -1,56 +0,0 @@
-from diffusers import (
-    LMSDiscreteScheduler,
-    PNDMScheduler,
-    DDIMScheduler,
-    DPMSolverMultistepScheduler,
-    KDPM2DiscreteScheduler,
-    EulerDiscreteScheduler,
-    EulerAncestralDiscreteScheduler,
-)
-from apps.stable_diffusion.src.schedulers.shark_eulerdiscrete import (
-    SharkEulerDiscreteScheduler,
-)
-
-
-def get_schedulers(model_id):
-    schedulers = dict()
-    schedulers["PNDM"] = PNDMScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["KDPM2Discrete"] = KDPM2DiscreteScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["LMSDiscrete"] = LMSDiscreteScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["DDIM"] = DDIMScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers[
-        "DPMSolverMultistep"
-    ] = DPMSolverMultistepScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["EulerDiscrete"] = EulerDiscreteScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers[
-        "EulerAncestralDiscrete"
-    ] = EulerAncestralDiscreteScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers[
-        "SharkEulerDiscrete"
-    ] = SharkEulerDiscreteScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["SharkEulerDiscrete"].compile()
-    return schedulers
--- a/apps/stable_diffusion/src/schedulers/shark_eulerdiscrete.py
+++ b/apps/stable_diffusion/src/schedulers/shark_eulerdiscrete.py
@@ -1,156 +0,0 @@
-import sys
-import numpy as np
-from typing import List, Optional, Tuple, Union
-from diffusers import (
-    LMSDiscreteScheduler,
-    PNDMScheduler,
-    DDIMScheduler,
-    DPMSolverMultistepScheduler,
-    EulerDiscreteScheduler,
-)
-from diffusers.configuration_utils import register_to_config
-from apps.stable_diffusion.src.utils import (
-    compile_through_fx,
-    get_shark_model,
-    args,
-)
-import torch
-
-
-class SharkEulerDiscreteScheduler(EulerDiscreteScheduler):
-    @register_to_config
-    def __init__(
-        self,
-        num_train_timesteps: int = 1000,
-        beta_start: float = 0.0001,
-        beta_end: float = 0.02,
-        beta_schedule: str = "linear",
-        trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
-        prediction_type: str = "epsilon",
-    ):
-        super().__init__(
-            num_train_timesteps,
-            beta_start,
-            beta_end,
-            beta_schedule,
-            trained_betas,
-            prediction_type,
-        )
-
-    def compile(self):
-        SCHEDULER_BUCKET = "gs://shark_tank/stable_diffusion/schedulers"
-        BATCH_SIZE = args.batch_size
-
-        model_input = {
-            "euler": {
-                "latent": torch.randn(
-                    BATCH_SIZE, 4, args.height // 8, args.width // 8
-                ),
-                "output": torch.randn(
-                    BATCH_SIZE, 4, args.height // 8, args.width // 8
-                ),
-                "sigma": torch.tensor(1).to(torch.float32),
-                "dt": torch.tensor(1).to(torch.float32),
-            },
-        }
-
-        example_latent = model_input["euler"]["latent"]
-        example_output = model_input["euler"]["output"]
-        if args.precision == "fp16":
-            example_latent = example_latent.half()
-            example_output = example_output.half()
-        example_sigma = model_input["euler"]["sigma"]
-        example_dt = model_input["euler"]["dt"]
-
-        class ScalingModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
-            def forward(self, latent, sigma):
-                return latent / ((sigma**2 + 1) ** 0.5)
-
-        class SchedulerStepModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
-            def forward(self, noise_pred, sigma, latent, dt):
-                pred_original_sample = latent - sigma * noise_pred
-                derivative = (latent - pred_original_sample) / sigma
-                return latent + derivative * dt
-
-        iree_flags = []
-        if len(args.iree_vulkan_target_triple) > 0:
-            iree_flags.append(
-                f"-iree-vulkan-target-triple={args.iree_vulkan_target_triple}"
-            )
-        # Disable bindings fusion to work with moltenVK.
-        if sys.platform == "darwin":
-            iree_flags.append("-iree-stream-fuse-binding=false")
-
-        def _import(self):
-            scaling_model = ScalingModel()
-            self.scaling_model = compile_through_fx(
-                scaling_model,
-                (example_latent, example_sigma),
-                model_name=f"euler_scale_model_input_{BATCH_SIZE}_{args.height}_{args.width}"
-                + args.precision,
-                extra_args=iree_flags,
-            )
-
-            step_model = SchedulerStepModel()
-            self.step_model = compile_through_fx(
-                step_model,
-                (example_output, example_sigma, example_latent, example_dt),
-                model_name=f"euler_step_{BATCH_SIZE}_{args.height}_{args.width}"
-                + args.precision,
-                extra_args=iree_flags,
-            )
-
-        if args.import_mlir:
-            _import(self)
-
-        else:
-            try:
-                self.scaling_model = get_shark_model(
-                    SCHEDULER_BUCKET,
-                    "euler_scale_model_input_" + args.precision,
-                    iree_flags,
-                )
-                self.step_model = get_shark_model(
-                    SCHEDULER_BUCKET,
-                    "euler_step_" + args.precision,
-                    iree_flags,
-                )
-            except:
-                print(
-                    "failed to download model, falling back and using import_mlir"
-                )
-                args.import_mlir = True
-                _import(self)
-
-    def scale_model_input(self, sample, timestep):
-        step_index = (self.timesteps == timestep).nonzero().item()
-        sigma = self.sigmas[step_index]
-        return self.scaling_model(
-            "forward",
-            (
-                sample,
-                sigma,
-            ),
-            send_to_host=False,
-        )
-
-    def step(self, noise_pred, timestep, latent):
-        step_index = (self.timesteps == timestep).nonzero().item()
-        sigma = self.sigmas[step_index]
-        dt = self.sigmas[step_index + 1] - sigma
-        return self.step_model(
-            "forward",
-            (
-                noise_pred,
-                sigma,
-                latent,
-                dt,
-            ),
-            send_to_host=False,
-        )
--- a/apps/stable_diffusion/src/utils/init.py
+++ b/apps/stable_diffusion/src/utils/init.py
@@ -1,31 +0,0 @@
-from apps.stable_diffusion.src.utils.profiler import (
-    start_profiling,
-    end_profiling,
-)
-from apps.stable_diffusion.src.utils.resources import (
-    prompt_examples,
-    models_db,
-    base_models,
-    opt_flags,
-    resource_path,
-)
-from apps.stable_diffusion.src.utils.sd_annotation import sd_model_annotation
-from apps.stable_diffusion.src.utils.stable_args import args
-from apps.stable_diffusion.src.utils.utils import (
-    get_shark_model,
-    compile_through_fx,
-    set_iree_runtime_flags,
-    map_device_to_name_path,
-    set_init_device_flags,
-    get_available_devices,
-    get_opt_flags,
-    preprocessCKPT,
-    fetch_or_delete_vmfbs,
-    fetch_and_update_base_model_id,
-    get_path_to_diffusers_checkpoint,
-    sanitize_seed,
-    get_path_stem,
-    get_extended_name,
-    clear_all,
-    save_output_img,
-)
--- a/apps/stable_diffusion/src/utils/profiler.py
+++ b/apps/stable_diffusion/src/utils/profiler.py
@@ -1,18 +0,0 @@
-from apps.stable_diffusion.src.utils.stable_args import args
-
-
-# Helper function to profile the vulkan device.
-def start_profiling(file_path="foo.rdc", profiling_mode="queue"):
-    if args.vulkan_debug_utils and "vulkan" in args.device:
-        import iree
-
-        print(f"Profiling and saving to {file_path}.")
-        vulkan_device = iree.runtime.get_device(args.device)
-        vulkan_device.begin_profiling(mode=profiling_mode, file_path=file_path)
-        return vulkan_device
-    return None
-
-
-def end_profiling(device):
-    if device:
-        return device.end_profiling()
--- a/apps/stable_diffusion/src/utils/resources.py
+++ b/apps/stable_diffusion/src/utils/resources.py
@@ -1,37 +0,0 @@
-import os
-import json
-import sys
-
-
-def resource_path(relative_path):
-    """Get absolute path to resource, works for dev and for PyInstaller"""
-    base_path = getattr(
-        sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))
-    )
-    return os.path.join(base_path, relative_path)
-
-
-def get_json_file(path):
-    json_var = []
-    loc_json = resource_path(path)
-    if os.path.exists(loc_json):
-        with open(loc_json, encoding="utf-8") as fopen:
-            json_var = json.load(fopen)
-
-    if not json_var:
-        print(f"Unable to fetch {path}")
-
-    return json_var
-
-
-# TODO: This shouldn't be called from here, every time the file imports
-# it will run all the global vars.
-prompt_examples = get_json_file("resources/prompts.json")
-models_db = get_json_file("resources/model_db.json")
-
-# The base_model contains the input configuration for the different
-# models and also helps in providing information for the variants.
-base_models = get_json_file("resources/base_model.json")
-
-# Contains optimization flags for different models.
-opt_flags = get_json_file("resources/opt_flags.json")
--- a/apps/stable_diffusion/src/utils/resources/base_model.json
+++ b/apps/stable_diffusion/src/utils/resources/base_model.json
@@ -1,226 +0,0 @@
-{
-    "stabilityai/stable-diffusion-2-1": {
-        "unet": {
-            "latents": {
-                "shape": [
-                    "1*batch_size",
-                    4,
-                    "height",
-                    "width"
-                ],
-                "dtype": "f32"
-            },
-            "timesteps": {
-                "shape": [
-                    1
-                ],
-                "dtype": "f32"
-            },
-            "embedding": {
-                "shape": [
-                    "2*batch_size",
-                    "max_len",
-                    1024
-                ],
-                "dtype": "f32"
-            },
-            "guidance_scale": {
-                "shape": 2,
-                "dtype": "f32"
-            }
-        },
-        "vae_encode": {
-            "image" : {
-                "shape" : [
-                    "1*batch_size",3,"8*height","8*width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "vae": {
-            "latents" : {
-                "shape" : [
-                    "1*batch_size",4,"height","width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "clip": {
-            "token" : {
-                "shape" : [
-                    "2*batch_size",
-                    "max_len"
-                ],
-                "dtype":"i64"
-            }
-        }
-    },
-    "CompVis/stable-diffusion-v1-4": {
-        "unet": {
-            "latents": {
-                "shape": [
-                    "1*batch_size",
-                    4,
-                    "height",
-                    "width"
-                ],
-                "dtype": "f32"
-            },
-            "timesteps": {
-                "shape": [
-                    1
-                ],
-                "dtype": "f32"
-            },
-            "embedding": {
-                "shape": [
-                    "2*batch_size",
-                    "max_len",
-                    768
-                ],
-                "dtype": "f32"
-            },
-            "guidance_scale": {
-                "shape": 2,
-                "dtype": "f32"
-            }
-        },
-        "vae_encode": {
-            "image" : {
-                "shape" : [
-                    "1*batch_size",3,"8*height","8*width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "vae": {
-            "latents" : {
-                "shape" : [
-                    "1*batch_size",4,"height","width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "clip": {
-            "token" : {
-                "shape" : [
-                    "2*batch_size",
-                    "max_len"
-                ],
-                "dtype":"i64"
-            }
-        }
-    },
-    "runwayml/stable-diffusion-inpainting": {
-        "unet": {
-            "latents": {
-                "shape": [
-                    "1*batch_size",
-                    9,
-                    "height",
-                    "width"
-                ],
-                "dtype": "f32"
-            },
-            "timesteps": {
-                "shape": [
-                    1
-                ],
-                "dtype": "f32"
-            },
-            "embedding": {
-                "shape": [
-                    "2*batch_size",
-                    "max_len",
-                    768
-                ],
-                "dtype": "f32"
-            },
-            "guidance_scale": {
-                "shape": 2,
-                "dtype": "f32"
-            }
-        },
-        "vae_encode": {
-            "image" : {
-                "shape" : [
-                    "1*batch_size",3,"8*height","8*width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "vae": {
-            "latents" : {
-                "shape" : [
-                    "1*batch_size",4,"height","width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "clip": {
-            "token" : {
-                "shape" : [
-                    "2*batch_size",
-                    "max_len"
-                ],
-                "dtype":"i64"
-            }
-        }
-    },
-    "stabilityai/stable-diffusion-2-inpainting": {
-        "unet": {
-            "latents": {
-                "shape": [
-                    "1*batch_size",
-                    9,
-                    "height",
-                    "width"
-                ],
-                "dtype": "f32"
-            },
-            "timesteps": {
-                "shape": [
-                    1
-                ],
-                "dtype": "f32"
-            },
-            "embedding": {
-                "shape": [
-                    "2*batch_size",
-                    "max_len",
-                    1024
-                ],
-                "dtype": "f32"
-            },
-            "guidance_scale": {
-                "shape": 2,
-                "dtype": "f32"
-            }
-        },
-        "vae_encode": {
-            "image" : {
-                "shape" : [
-                    "1*batch_size",3,"8*height","8*width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "vae": {
-            "latents" : {
-                "shape" : [
-                    "1*batch_size",4,"height","width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "clip": {
-            "token" : {
-                "shape" : [
-                    "2*batch_size",
-                    "max_len"
-                ],
-                "dtype":"i64"
-            }
-        }
-    }
-}
--- a/apps/stable_diffusion/src/utils/resources/model_config.json
+++ b/apps/stable_diffusion/src/utils/resources/model_config.json
@@ -1,23 +0,0 @@
-[
-  {
-    "stablediffusion/v1_4":"CompVis/stable-diffusion-v1-4",
-    "stablediffusion/v2_1base":"stabilityai/stable-diffusion-2-1-base",
-    "stablediffusion/v2_1":"stabilityai/stable-diffusion-2-1",
-    "stablediffusion/inpaint_v1":"runwayml/stable-diffusion-inpainting",
-    "stablediffusion/inpaint_v2":"stabilityai/stable-diffusion-2-inpainting",
-    "anythingv3/v1_4":"Linaqruf/anything-v3.0",
-    "analogdiffusion/v1_4":"wavymulder/Analog-Diffusion",
-    "openjourney/v1_4":"prompthero/openjourney",
-    "dreamlike/v1_4":"dreamlike-art/dreamlike-diffusion-1.0"
-  },
-  {
-    "stablediffusion/fp16":"fp16",
-    "stablediffusion/fp32":"main",
-    "anythingv3/fp16":"diffusers",
-    "anythingv3/fp32":"diffusers",
-    "analogdiffusion/fp16":"main",
-    "analogdiffusion/fp32":"main",
-    "openjourney/fp16":"main",
-    "openjourney/fp32":"main"
-  }
-]
--- a/apps/stable_diffusion/src/utils/resources/model_db.json
+++ b/apps/stable_diffusion/src/utils/resources/model_db.json
@@ -1,91 +0,0 @@
-[
-  {
-    "stablediffusion/untuned":"gs://shark_tank/sd_untuned",
-    "stablediffusion/tuned":"gs://shark_tank/sd_tuned",
-    "stablediffusion/tuned/cuda":"gs://shark_tank/sd_tuned/cuda",
-    "anythingv3/untuned":"gs://shark_tank/sd_anythingv3",
-    "anythingv3/tuned":"gs://shark_tank/sd_tuned",
-    "anythingv3/tuned/cuda":"gs://shark_tank/sd_tuned/cuda",
-    "analogdiffusion/untuned":"gs://shark_tank/sd_analog_diffusion",
-    "analogdiffusion/tuned":"gs://shark_tank/sd_tuned",
-    "analogdiffusion/tuned/cuda":"gs://shark_tank/sd_tuned/cuda",
-    "openjourney/untuned":"gs://shark_tank/sd_openjourney",
-    "openjourney/tuned":"gs://shark_tank/sd_tuned",
-    "dreamlike/untuned":"gs://shark_tank/sd_dreamlike_diffusion"
-  },
-  {
-    "stablediffusion/v1_4/unet/fp16/length_77/untuned":"unet_8dec_fp16",
-    "stablediffusion/v1_4/unet/fp16/length_77/tuned":"unet_8dec_fp16_tuned",
-    "stablediffusion/v1_4/unet/fp16/length_77/tuned/cuda":"unet_8dec_fp16_cuda_tuned",
-    "stablediffusion/v1_4/unet/fp32/length_77/untuned":"unet_1dec_fp32",
-    "stablediffusion/v1_4/vae/fp16/length_77/untuned":"vae_19dec_fp16",
-    "stablediffusion/v1_4/vae/fp16/length_77/tuned":"vae_19dec_fp16_tuned",
-    "stablediffusion/v1_4/vae/fp16/length_77/tuned/cuda":"vae_19dec_fp16_cuda_tuned",
-    "stablediffusion/v1_4/vae/fp16/length_77/untuned/base":"vae_8dec_fp16",
-    "stablediffusion/v2_1base/unet/fp16/length_77/untuned":"unet77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/unet/fp16/length_77/tuned":"unet2base_8dec_fp16_tuned_v2",
-    "stablediffusion/v2_1base/unet/fp16/length_77/tuned/cuda":"unet2base_8dec_fp16_cuda_tuned",
-    "stablediffusion/v2_1base/unet/fp16/length_64/untuned":"unet64_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/unet/fp16/length_64/tuned":"unet_19dec_v2p1base_fp16_64_tuned",
-    "stablediffusion/v2_1base/unet/fp16/length_64/tuned/cuda":"unet_19dec_v2p1base_fp16_64_cuda_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/untuned":"vae77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned":"vae2base_19dec_fp16_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/cuda":"vae2base_19dec_fp16_cuda_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/untuned/base":"vae2base_8dec_fp16",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/base":"vae2base_8dec_fp16_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/base/cuda":"vae2base_8dec_fp16_cuda_tuned",
-    "stablediffusion/v2_1base/clip/fp32/length_77/untuned":"clip77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/clip/fp32/length_64/untuned":"clip64_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1/unet/fp16/length_77/untuned":"unet77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1/vae/fp16/length_77/untuned":"vae77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1/vae/fp16/length_77/untuned/base":"vae2_8dec_fp16",
-    "stablediffusion/v2_1/clip/fp32/length_77/untuned":"clip77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/inpaint_v1/unet/fp16/length_77/untuned":"unet_inpaint_fp16",
-    "stablediffusion/inpaint_v1/unet/fp32/length_77/untuned":"unet_inpaint_fp32",
-    "stablediffusion/inpaint_v1/vae_encode/fp16/length_77/untuned":"vae_encode_inpaint_fp16",
-    "stablediffusion/inpaint_v1/vae_encode/fp32/length_77/untuned":"vae_encode_inpaint_fp32",
-    "stablediffusion/inpaint_v1/vae/fp16/length_77/untuned":"vae_inpaint_fp16",
-    "stablediffusion/inpaint_v1/vae/fp32/length_77/untuned":"vae_inpaint_fp32",
-    "stablediffusion/inpaint_v1/clip/fp32/length_77/untuned":"clip_inpaint_fp32",
-    "stablediffusion/inpaint_v2/unet/fp16/length_77/untuned":"unet_inpaint_fp16",
-    "stablediffusion/inpaint_v2/vae_encode/fp16/length_77/untuned":"vae_encode_inpaint_fp16",
-    "stablediffusion/inpaint_v2/vae/fp16/length_77/untuned":"vae_inpaint_fp16",
-    "stablediffusion/inpaint_v2/clip/fp32/length_77/untuned":"clip_inpaint_fp32",
-    "anythingv3/v1_4/unet/fp16/length_77/untuned":"av3_unet_19dec_fp16",
-    "anythingv3/v1_4/unet/fp16/length_77/tuned":"av3_unet_19dec_fp16_tuned",
-    "anythingv3/v1_4/unet/fp16/length_77/tuned/cuda":"av3_unet_19dec_fp16_cuda_tuned",
-    "anythingv3/v1_4/unet/fp32/length_77/untuned":"av3_unet_19dec_fp32",
-    "anythingv3/v1_4/vae/fp16/length_77/untuned":"av3_vae_19dec_fp16",
-    "anythingv3/v1_4/vae/fp16/length_77/tuned":"av3_vae_19dec_fp16_tuned",
-    "anythingv3/v1_4/vae/fp16/length_77/tuned/cuda":"av3_vae_19dec_fp16_cuda_tuned",
-    "anythingv3/v1_4/vae/fp16/length_77/untuned/base":"av3_vaebase_22dec_fp16",
-    "anythingv3/v1_4/vae/fp32/length_77/untuned":"av3_vae_19dec_fp32",
-    "anythingv3/v1_4/vae/fp32/length_77/untuned/base":"av3_vaebase_22dec_fp32",
-    "anythingv3/v1_4/clip/fp32/length_77/untuned":"av3_clip_19dec_fp32",
-    "analogdiffusion/v1_4/unet/fp16/length_77/untuned":"ad_unet_19dec_fp16",
-    "analogdiffusion/v1_4/unet/fp16/length_77/tuned":"ad_unet_19dec_fp16_tuned",
-    "analogdiffusion/v1_4/unet/fp16/length_77/tuned/cuda":"ad_unet_19dec_fp16_cuda_tuned",
-    "analogdiffusion/v1_4/unet/fp32/length_77/untuned":"ad_unet_19dec_fp32",
-    "analogdiffusion/v1_4/vae/fp16/length_77/untuned":"ad_vae_19dec_fp16",
-    "analogdiffusion/v1_4/vae/fp16/length_77/tuned":"ad_vae_19dec_fp16_tuned",
-    "analogdiffusion/v1_4/vae/fp16/length_77/tuned/cuda":"ad_vae_19dec_fp16_cuda_tuned",
-    "analogdiffusion/v1_4/vae/fp16/length_77/untuned/base":"ad_vaebase_22dec_fp16",
-    "analogdiffusion/v1_4/vae/fp32/length_77/untuned":"ad_vae_19dec_fp32",
-    "analogdiffusion/v1_4/vae/fp32/length_77/untuned/base":"ad_vaebase_22dec_fp32",
-    "analogdiffusion/v1_4/clip/fp32/length_77/untuned":"ad_clip_19dec_fp32",
-    "openjourney/v1_4/unet/fp16/length_64/untuned":"oj_unet_22dec_fp16_64",
-    "openjourney/v1_4/unet/fp32/length_64/untuned":"oj_unet_22dec_fp32_64",
-    "openjourney/v1_4/vae/fp16/length_77/untuned":"oj_vae_22dec_fp16",
-    "openjourney/v1_4/vae/fp16/length_77/untuned/base":"oj_vaebase_22dec_fp16",
-    "openjourney/v1_4/vae/fp32/length_77/untuned":"oj_vae_22dec_fp32",
-    "openjourney/v1_4/vae/fp32/length_77/untuned/base":"oj_vaebase_22dec_fp32",
-    "openjourney/v1_4/clip/fp32/length_64/untuned":"oj_clip_22dec_fp32_64",
-    "dreamlike/v1_4/unet/fp16/length_77/untuned":"dl_unet_23dec_fp16_77",
-    "dreamlike/v1_4/unet/fp32/length_77/untuned":"dl_unet_23dec_fp32_77",
-    "dreamlike/v1_4/vae/fp16/length_77/untuned":"dl_vae_23dec_fp16",
-    "dreamlike/v1_4/vae/fp16/length_77/untuned/base":"dl_vaebase_23dec_fp16",
-    "dreamlike/v1_4/vae/fp32/length_77/untuned":"dl_vae_23dec_fp32",
-    "dreamlike/v1_4/vae/fp32/length_77/untuned/base":"dl_vaebase_23dec_fp32",
-    "dreamlike/v1_4/clip/fp32/length_77/untuned":"dl_clip_23dec_fp32_77"
-  }
-]
--- a/apps/stable_diffusion/src/utils/resources/opt_flags.json
+++ b/apps/stable_diffusion/src/utils/resources/opt_flags.json
@@ -1,84 +0,0 @@
-{
-  "unet": {
-    "tuned": {
-      "fp16": {
-        "default_compilation_flags": []
-      },
-      "fp32": {
-        "default_compilation_flags": []
-      }
-    },
-    "untuned": {
-      "fp16": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-flow-convert-1x1-filter-conv2d-to-matmul,iree-preprocessing-convert-conv2d-to-img2col,iree-preprocessing-pad-linalg-ops{pad-size=32}))"
-        ]
-      },
-      "fp32": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-flow-convert-1x1-filter-conv2d-to-matmul,iree-preprocessing-convert-conv2d-to-img2col,iree-preprocessing-pad-linalg-ops{pad-size=16}))"
-        ]
-      }
-    }
-  },
-  "vae": {
-    "tuned": {
-      "fp16": {
-        "default_compilation_flags": [],
-        "specified_compilation_flags": {
-          "cuda": [],
-          "default_device": [
-            "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-flow-convert-1x1-filter-conv2d-to-matmul,iree-preprocessing-convert-conv2d-to-img2col,iree-preprocessing-pad-linalg-ops{pad-size=32},iree-linalg-ext-convert-conv2d-to-winograd))"
-          ]
-        }
-      },
-      "fp32": {
-        "default_compilation_flags": [],
-        "specified_compilation_flags": {
-          "cuda": [],
-          "default_device": [
-            "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-flow-convert-1x1-filter-conv2d-to-matmul,iree-preprocessing-convert-conv2d-to-img2col,iree-preprocessing-pad-linalg-ops{pad-size=16},iree-linalg-ext-convert-conv2d-to-winograd))"
-          ]
-        }
-      }
-    },
-    "untuned": {
-      "fp16": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-preprocessing-pad-linalg-ops{pad-size=32}))"
-        ]
-      },
-      "fp32": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-preprocessing-pad-linalg-ops{pad-size=16}))"
-        ]
-      }
-    }
-  },
-  "clip": {
-    "tuned": {
-      "fp16": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-pad-linalg-ops{pad-size=16}))"
-        ]
-      },
-      "fp32": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-pad-linalg-ops{pad-size=16}))"
-        ]
-      }
-    },
-    "untuned": {
-      "fp16": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-pad-linalg-ops{pad-size=16}))"
-        ]
-      },
-      "fp32": {
-        "default_compilation_flags": [
-          "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-preprocessing-pad-linalg-ops{pad-size=16}))"
-        ]
-      }
-    }
-  }
-}
--- a/apps/stable_diffusion/src/utils/resources/prompts.json
+++ b/apps/stable_diffusion/src/utils/resources/prompts.json
@@ -1,8 +0,0 @@
-[["A high tech solarpunk utopia in the Amazon rainforest"],
-["A pikachu fine dining with a view to the Eiffel Tower"],
-["A mecha robot in a favela in expressionist style"],
-["an insect robot preparing a delicious meal"],
-["A digital Illustration of the Babel tower, 4k, detailed, trending in artstation, fantasy vivid colors"],
-["Cluttered house in the woods, anime, oil painting, high resolution, cottagecore, ghibli inspired, 4k"],
-["A beautiful mansion beside a waterfall in the woods, by josef thoma, matte painting, trending on artstation HQ"],
-["portrait photo of a asia old warrior chief, tribal panther make up, blue on red, side profile, looking away, serious eyes"]]
--- a/apps/stable_diffusion/src/utils/sd_annotation.py
+++ b/apps/stable_diffusion/src/utils/sd_annotation.py
@@ -1,236 +0,0 @@
-import os
-import io
-from shark.model_annotation import model_annotation, create_context
-from shark.iree_utils._common import iree_target_map, run_cmd
-from shark.shark_downloader import (
-    download_model,
-    download_public_file,
-    WORKDIR,
-)
-from shark.parser import shark_args
-from apps.stable_diffusion.src.utils.stable_args import args
-
-
-def get_device():
-    device = (
-        args.device
-        if "://" not in args.device
-        else args.device.split("://")[0]
-    )
-    return device
-
-
-def get_device_args():
-    device = get_device()
-    device_spec_args = []
-    if device == "cuda":
-        from shark.iree_utils.gpu_utils import get_iree_gpu_args
-
-        gpu_flags = get_iree_gpu_args()
-        for flag in gpu_flags:
-            device_spec_args.append(flag)
-    elif device == "vulkan":
-        device_spec_args.append(
-            f"--iree-vulkan-target-triple={args.iree_vulkan_target_triple} "
-        )
-    return device, device_spec_args
-
-
-# Download the model (Unet or VAE fp16) from shark_tank
-def load_model_from_tank():
-    from apps.stable_diffusion.src.models import (
-        get_params,
-        get_variant_version,
-    )
-
-    variant, version = get_variant_version(args.hf_model_id)
-
-    shark_args.local_tank_cache = args.local_tank_cache
-    bucket_key = f"{variant}/untuned"
-    if args.annotation_model == "unet":
-        model_key = f"{variant}/{version}/unet/{args.precision}/length_{args.max_length}/untuned"
-    elif args.annotation_model == "vae":
-        is_base = "/base" if args.use_base_vae else ""
-        model_key = f"{variant}/{version}/vae/{args.precision}/length_77/untuned{is_base}"
-
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, args.annotation_model, "untuned", args.precision
-    )
-    mlir_model, func_name, inputs, golden_out = download_model(
-        model_name,
-        tank_url=bucket,
-        frontend="torch",
-    )
-    return mlir_model, model_name
-
-
-# Download the tuned config files from shark_tank
-def load_winograd_configs():
-    device = get_device()
-    config_bucket = "gs://shark_tank/sd_tuned/configs/"
-    config_name = f"{args.annotation_model}_winograd_{device}.json"
-    full_gs_url = config_bucket + config_name
-    winograd_config_dir = os.path.join(WORKDIR, "configs", config_name)
-    print("Loading Winograd config file from ", winograd_config_dir)
-    download_public_file(full_gs_url, winograd_config_dir, True)
-    return winograd_config_dir
-
-
-def load_lower_configs():
-    from apps.stable_diffusion.src.models import get_variant_version
-    from apps.stable_diffusion.src.utils.utils import (
-        fetch_and_update_base_model_id,
-    )
-
-    base_model_id = args.hf_model_id
-    if args.ckpt_loc != "":
-        base_model_id = fetch_and_update_base_model_id(args.ckpt_loc)
-    if base_model_id == "runwayml/stable-diffusion-v1-5":
-        base_model_id = "CompVis/stable-diffusion-v1-4"
-
-    variant, version = get_variant_version(base_model_id)
-
-    config_bucket = "gs://shark_tank/sd_tuned_configs/"
-
-    device, device_spec_args = get_device_args()
-    spec = ""
-    if device_spec_args:
-        spec = device_spec_args[-1].split("=")[-1].strip()
-        if device == "vulkan":
-            spec = spec.split("-")[0]
-
-    if args.annotation_model == "vae":
-        if not spec or spec in ["rdna3", "sm_80"]:
-            config_name = (
-                f"{args.annotation_model}_{args.precision}_{device}.json"
-            )
-        else:
-            config_name = f"{args.annotation_model}_{args.precision}_{device}_{spec}.json"
-    else:
-        if not spec or spec in ["rdna3", "sm_80"]:
-            config_name = f"{args.annotation_model}_{version}_{args.precision}_{device}.json"
-        else:
-            config_name = f"{args.annotation_model}_{version}_{args.precision}_{device}_{spec}.json"
-
-    full_gs_url = config_bucket + config_name
-    lowering_config_dir = os.path.join(WORKDIR, "configs", config_name)
-    print("Loading lowering config file from ", lowering_config_dir)
-    download_public_file(full_gs_url, lowering_config_dir, True)
-    return lowering_config_dir
-
-
-# Annotate the model with Winograd attribute on selected conv ops
-def annotate_with_winograd(input_mlir, winograd_config_dir, model_name):
-    with create_context() as ctx:
-        winograd_model = model_annotation(
-            ctx,
-            input_contents=input_mlir,
-            config_path=winograd_config_dir,
-            search_op="conv",
-            winograd=True,
-        )
-
-    bytecode_stream = io.BytesIO()
-    winograd_model.operation.write_bytecode(bytecode_stream)
-    bytecode = bytecode_stream.getvalue()
-
-    if args.save_annotation:
-        if model_name.split("_")[-1] != "tuned":
-            out_file_path = (
-                f"{args.annotation_output}/{model_name}_tuned_torch.mlir"
-            )
-        else:
-            out_file_path = f"{args.annotation_output}/{model_name}_torch.mlir"
-        with open(out_file_path, "w") as f:
-            f.write(str(winograd_model))
-            f.close()
-
-    return bytecode
-
-
-def dump_after_mlir(input_mlir, use_winograd):
-    import iree.compiler as ireec
-
-    device, device_spec_args = get_device_args()
-    if use_winograd:
-        preprocess_flag = "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-flow-convert-1x1-filter-conv2d-to-matmul,iree-preprocessing-convert-conv2d-to-img2col,iree-preprocessing-pad-linalg-ops{pad-size=32},iree-linalg-ext-convert-conv2d-to-winograd))"
-    else:
-        preprocess_flag = "--iree-preprocessing-pass-pipeline=builtin.module(func.func(iree-flow-detach-elementwise-from-named-ops,iree-flow-convert-1x1-filter-conv2d-to-matmul,iree-preprocessing-convert-conv2d-to-img2col,iree-preprocessing-pad-linalg-ops{pad-size=32}))"
-
-    dump_module = ireec.compile_str(
-        input_mlir,
-        target_backends=[iree_target_map(device)],
-        extra_args=device_spec_args
-        + [
-            preprocess_flag,
-            "--compile-to=preprocessing",
-        ],
-    )
-    return dump_module
-
-
-# For Unet annotate the model with tuned lowering configs
-def annotate_with_lower_configs(
-    input_mlir, lowering_config_dir, model_name, use_winograd
-):
-    # Dump IR after padding/img2col/winograd passes
-    dump_module = dump_after_mlir(input_mlir, use_winograd)
-    print("Applying tuned configs on", model_name)
-
-    # Annotate the model with lowering configs in the config file
-    with create_context() as ctx:
-        tuned_model = model_annotation(
-            ctx,
-            input_contents=dump_module,
-            config_path=lowering_config_dir,
-            search_op="all",
-        )
-
-    bytecode_stream = io.BytesIO()
-    tuned_model.operation.write_bytecode(bytecode_stream)
-    bytecode = bytecode_stream.getvalue()
-
-    if args.save_annotation:
-        if model_name.split("_")[-1] != "tuned":
-            out_file_path = (
-                f"{args.annotation_output}/{model_name}_tuned_torch.mlir"
-            )
-        else:
-            out_file_path = f"{args.annotation_output}/{model_name}_torch.mlir"
-        with open(out_file_path, "w") as f:
-            f.write(str(tuned_model))
-            f.close()
-
-    return bytecode
-
-
-def sd_model_annotation(mlir_model, model_name):
-    device = get_device()
-    if args.annotation_model == "unet" and device == "vulkan":
-        use_winograd = True
-        winograd_config_dir = load_winograd_configs()
-        winograd_model = annotate_with_winograd(
-            mlir_model, winograd_config_dir, model_name
-        )
-        lowering_config_dir = load_lower_configs()
-        tuned_model = annotate_with_lower_configs(
-            winograd_model, lowering_config_dir, model_name, use_winograd
-        )
-    elif args.annotation_model == "vae" and device == "vulkan":
-        use_winograd = True
-        winograd_config_dir = load_winograd_configs()
-        tuned_model = annotate_with_winograd(
-            mlir_model, winograd_config_dir, model_name
-        )
-    else:
-        use_winograd = False
-        lowering_config_dir = load_lower_configs()
-        tuned_model = annotate_with_lower_configs(
-            mlir_model, lowering_config_dir, model_name, use_winograd
-        )
-    return tuned_model
-
-
-if __name__ == "__main__":
-    mlir_model, model_name = load_model_from_tank()
-    sd_model_annotation(mlir_model, model_name)
--- a/apps/stable_diffusion/src/utils/stable_args.py
+++ b/apps/stable_diffusion/src/utils/stable_args.py
@@ -1,370 +0,0 @@
-import argparse
-from pathlib import Path
-
-
-def path_expand(s):
-    return Path(s).expanduser().resolve()
-
-
-p = argparse.ArgumentParser(
-    description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
-)
-
-##############################################################################
-### Stable Diffusion Params
-##############################################################################
-
-p.add_argument(
-    "-p",
-    "--prompts",
-    nargs="+",
-    default=["cyberpunk forest by Salvador Dali"],
-    help="text of which images to be generated.",
-)
-
-p.add_argument(
-    "--negative_prompts",
-    nargs="+",
-    default=["trees, green"],
-    help="text you don't want to see in the generated image.",
-)
-
-p.add_argument(
-    "--img_path",
-    type=str,
-    help="Path to the image input for img2img/inpainting",
-)
-
-p.add_argument(
-    "--mask_path",
-    type=str,
-    help="Path to the mask image input for inpainting",
-)
-
-p.add_argument(
-    "--steps",
-    type=int,
-    default=50,
-    help="the no. of steps to do the sampling.",
-)
-
-p.add_argument(
-    "--seed",
-    type=int,
-    default=-1,
-    help="the seed to use. -1 for a random one.",
-)
-
-p.add_argument(
-    "--batch_size",
-    type=int,
-    default=1,
-    choices=range(1, 4),
-    help="the number of inferences to be made in a single `batch_count`.",
-)
-
-p.add_argument(
-    "--height",
-    type=int,
-    default=512,
-    help="the height of the output image.",
-)
-
-p.add_argument(
-    "--width",
-    type=int,
-    default=512,
-    help="the width of the output image.",
-)
-
-p.add_argument(
-    "--guidance_scale",
-    type=float,
-    default=7.5,
-    help="the value to be used for guidance scaling.",
-)
-
-p.add_argument(
-    "--max_length",
-    type=int,
-    default=64,
-    help="max length of the tokenizer output, options are 64 and 77.",
-)
-
-p.add_argument(
-    "--strength",
-    type=float,
-    default=0.8,
-    help="the strength of change applied on the given input image for img2img",
-)
-##############################################################################
-### Model Config and Usage Params
-##############################################################################
-
-p.add_argument(
-    "--device", type=str, default="vulkan", help="device to run the model."
-)
-
-p.add_argument(
-    "--precision", type=str, default="fp16", help="precision to run the model."
-)
-
-p.add_argument(
-    "--import_mlir",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="imports the model from torch module to shark_module otherwise downloads the model from shark_tank.",
-)
-
-p.add_argument(
-    "--load_vmfb",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="attempts to load the model from a precompiled flatbuffer and compiles + saves it if not found.",
-)
-
-p.add_argument(
-    "--save_vmfb",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="saves the compiled flatbuffer to the local directory",
-)
-
-p.add_argument(
-    "--use_tuned",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="Download and use the tuned version of the model if available",
-)
-
-p.add_argument(
-    "--use_base_vae",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="Do conversion from the VAE output to pixel space on cpu.",
-)
-
-p.add_argument(
-    "--scheduler",
-    type=str,
-    default="SharkEulerDiscrete",
-    help="other supported schedulers are [PNDM, DDIM, LMSDiscrete, EulerDiscrete, DPMSolverMultistep]",
-)
-
-p.add_argument(
-    "--output_img_format",
-    type=str,
-    default="png",
-    help="specify the format in which output image is save. Supported options: jpg / png",
-)
-
-p.add_argument(
-    "--output_dir",
-    type=str,
-    default=None,
-    help="Directory path to save the output images and json",
-)
-
-p.add_argument(
-    "--batch_count",
-    type=int,
-    default=1,
-    help="number of batch to be generated with random seeds in single execution",
-)
-
-p.add_argument(
-    "--ckpt_loc",
-    type=str,
-    default="",
-    help="Path to SD's .ckpt file.",
-)
-
-p.add_argument(
-    "--custom_vae",
-    type=str,
-    default="",
-    help="HuggingFace repo-id or path to SD model's checkpoint whose Vae needs to be plugged in.",
-)
-
-p.add_argument(
-    "--hf_model_id",
-    type=str,
-    default="stabilityai/stable-diffusion-2-1-base",
-    help="The repo-id of hugging face.",
-)
-
-p.add_argument(
-    "--low_cpu_mem_usage",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="Use the accelerate package to reduce cpu memory consumption",
-)
-
-##############################################################################
-### IREE - Vulkan supported flags
-##############################################################################
-
-p.add_argument(
-    "--iree_vulkan_target_triple",
-    type=str,
-    default="",
-    help="Specify target triple for vulkan",
-)
-
-p.add_argument(
-    "--vulkan_debug_utils",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="Profiles vulkan device and collects the .rdc info",
-)
-
-p.add_argument(
-    "--vulkan_large_heap_block_size",
-    default="4147483648",
-    help="flag for setting VMA preferredLargeHeapBlockSize for vulkan device, default is 4G",
-)
-
-p.add_argument(
-    "--vulkan_validation_layers",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for disabling vulkan validation layers when benchmarking",
-)
-
-##############################################################################
-### Misc. Debug and Optimization flags
-##############################################################################
-
-p.add_argument(
-    "--use_compiled_scheduler",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="use the default scheduler precompiled into the model if available",
-)
-
-p.add_argument(
-    "--local_tank_cache",
-    default="",
-    help="Specify where to save downloaded shark_tank artifacts. If this is not set, the default is ~/.local/shark_tank/.",
-)
-
-p.add_argument(
-    "--dump_isa",
-    default=False,
-    action="store_true",
-    help="When enabled call amdllpc to get ISA dumps. use with dispatch benchmarks.",
-)
-
-p.add_argument(
-    "--dispatch_benchmarks",
-    default=None,
-    help='dispatches to return benchamrk data on.  use "All" for all, and None for none.',
-)
-
-p.add_argument(
-    "--dispatch_benchmarks_dir",
-    default="temp_dispatch_benchmarks",
-    help='directory where you want to store dispatch data generated with "--dispatch_benchmarks"',
-)
-
-p.add_argument(
-    "--enable_rgp",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for inserting debug frames between iterations for use with rgp.",
-)
-
-p.add_argument(
-    "--hide_steps",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="flag for hiding the details of iteration/sec for each step.",
-)
-
-p.add_argument(
-    "--warmup_count",
-    type=int,
-    default=0,
-    help="flag setting warmup count for clip and vae [>= 0].",
-)
-
-p.add_argument(
-    "--clear_all",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag to clear all mlir and vmfb from common locations. Recompiling will take several minutes",
-)
-
-p.add_argument(
-    "--save_metadata_to_json",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for whether or not to save a generation information json file with the image.",
-)
-
-p.add_argument(
-    "--write_metadata_to_png",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="flag for whether or not to save generation information in PNG chunk text to generated images.",
-)
-
-##############################################################################
-### Web UI flags
-##############################################################################
-
-p.add_argument(
-    "--progress_bar",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="flag for removing the progress bar animation during image generation",
-)
-
-p.add_argument(
-    "--ckpt_dir",
-    type=str,
-    default="",
-    help="Path to directory where all .ckpts are stored in order to populate them in the web UI",
-)
-
-
-p.add_argument(
-    "--share",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for generating a public URL",
-)
-
-p.add_argument(
-    "--server_port",
-    type=int,
-    default=8080,
-    help="flag for setting server port",
-)
-
-##############################################################################
-### SD model auto-annotation flags
-##############################################################################
-
-p.add_argument(
-    "--annotation_output",
-    type=path_expand,
-    default="./",
-    help="Directory to save the annotated mlir file",
-)
-
-p.add_argument(
-    "--annotation_model",
-    type=str,
-    default="unet",
-    help="Options are unet and vae.",
-)
-
-p.add_argument(
-    "--save_annotation",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="Save annotated mlir file",
-)
-
-args, unknown = p.parse_known_args()
--- a/apps/stable_diffusion/src/utils/utils.py
+++ b/apps/stable_diffusion/src/utils/utils.py
@@ -1,616 +0,0 @@
-import os
-import gc
-import json
-import re
-from PIL import PngImagePlugin
-from datetime import datetime as dt
-from csv import DictWriter
-from pathlib import Path
-import numpy as np
-from random import randint
-from shark.shark_inference import SharkInference
-from shark.shark_importer import import_with_fx
-from shark.iree_utils.vulkan_utils import (
-    set_iree_vulkan_runtime_flags,
-    get_vulkan_target_triple,
-)
-from shark.iree_utils.gpu_utils import get_cuda_sm_cc
-from apps.stable_diffusion.src.utils.stable_args import args
-from apps.stable_diffusion.src.utils.resources import opt_flags
-from apps.stable_diffusion.src.utils.sd_annotation import sd_model_annotation
-import sys
-from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
-    load_pipeline_from_original_stable_diffusion_ckpt,
-)
-
-
-def get_extended_name(model_name):
-    device = (
-        args.device
-        if "://" not in args.device
-        else "-".join(args.device.split("://"))
-    )
-    extended_name = "{}_{}".format(model_name, device)
-    return extended_name
-
-
-def get_vmfb_path_name(model_name):
-    vmfb_path = os.path.join(os.getcwd(), model_name + ".vmfb")
-    return vmfb_path
-
-
-def _compile_module(shark_module, model_name, extra_args=[]):
-    if args.load_vmfb or args.save_vmfb:
-        vmfb_path = get_vmfb_path_name(model_name)
-        if args.load_vmfb and os.path.isfile(vmfb_path) and not args.save_vmfb:
-            print(f"loading existing vmfb from: {vmfb_path}")
-            shark_module.load_module(vmfb_path, extra_args=extra_args)
-        else:
-            if args.save_vmfb:
-                print("Saving to {}".format(vmfb_path))
-            else:
-                print(
-                    "No vmfb found. Compiling and saving to {}".format(
-                        vmfb_path
-                    )
-                )
-            path = shark_module.save_module(
-                os.getcwd(), model_name, extra_args
-            )
-            shark_module.load_module(path, extra_args=extra_args)
-    else:
-        shark_module.compile(extra_args)
-    return shark_module
-
-
-# Downloads the model from shark_tank and returns the shark_module.
-def get_shark_model(tank_url, model_name, extra_args=[]):
-    from shark.parser import shark_args
-
-    # Set local shark_tank cache directory.
-    shark_args.local_tank_cache = args.local_tank_cache
-
-    from shark.shark_downloader import download_model
-
-    if "cuda" in args.device:
-        shark_args.enable_tf32 = True
-
-    mlir_model, func_name, inputs, golden_out = download_model(
-        model_name,
-        tank_url=tank_url,
-        frontend="torch",
-    )
-    shark_module = SharkInference(
-        mlir_model, device=args.device, mlir_dialect="linalg"
-    )
-    return _compile_module(shark_module, model_name, extra_args)
-
-
-# Converts the torch-module into a shark_module.
-def compile_through_fx(
-    model,
-    inputs,
-    model_name,
-    is_f16=False,
-    f16_input_mask=None,
-    use_tuned=False,
-    extra_args=[],
-):
-    from shark.parser import shark_args
-
-    if "cuda" in args.device:
-        shark_args.enable_tf32 = True
-
-    mlir_module, func_name = import_with_fx(
-        model, inputs, is_f16, f16_input_mask
-    )
-
-    if use_tuned:
-        if "vae" in model_name.split("_")[0]:
-            args.annotation_model = "vae"
-        mlir_module = sd_model_annotation(mlir_module, model_name)
-
-    shark_module = SharkInference(
-        mlir_module,
-        device=args.device,
-        mlir_dialect="linalg",
-    )
-
-    del mlir_module
-    gc.collect()
-
-    return _compile_module(shark_module, model_name, extra_args)
-
-
-def set_iree_runtime_flags():
-    vulkan_runtime_flags = [
-        f"--vulkan_large_heap_block_size={args.vulkan_large_heap_block_size}",
-        f"--vulkan_validation_layers={'true' if args.vulkan_validation_layers else 'false'}",
-    ]
-    if args.enable_rgp:
-        vulkan_runtime_flags += [
-            f"--enable_rgp=true",
-            f"--vulkan_debug_utils=true",
-        ]
-    set_iree_vulkan_runtime_flags(flags=vulkan_runtime_flags)
-
-
-def get_all_devices(driver_name):
-    """
-    Inputs: driver_name
-    Returns a list of all the available devices for a given driver sorted by
-    the iree path names of the device as in --list_devices option in iree.
-    """
-    from iree.runtime import get_driver
-
-    driver = get_driver(driver_name)
-    device_list_src = driver.query_available_devices()
-    device_list_src.sort(key=lambda d: d["path"])
-    return device_list_src
-
-
-def get_device_mapping(driver, key_combination=3):
-    """This method ensures consistent device ordering when choosing
-    specific devices for execution
-    Args:
-        driver (str): execution driver (vulkan, cuda, rocm, etc)
-        key_combination (int, optional): choice for mapping value for device name.
-        1 : path
-        2 : name
-        3 : (name, path)
-        Defaults to 3.
-    Returns:
-        dict: map to possible device names user can input mapped to desired combination of name/path.
-    """
-    from shark.iree_utils._common import iree_device_map
-
-    driver = iree_device_map(driver)
-    device_list = get_all_devices(driver)
-    device_map = dict()
-
-    def get_output_value(dev_dict):
-        if key_combination == 1:
-            return f"{driver}://{dev_dict['path']}"
-        if key_combination == 2:
-            return dev_dict["name"]
-        if key_combination == 3:
-            return (dev_dict["name"], f"{driver}://{dev_dict['path']}")
-
-    # mapping driver name to default device (driver://0)
-    device_map[f"{driver}"] = get_output_value(device_list[0])
-    for i, device in enumerate(device_list):
-        # mapping with index
-        device_map[f"{driver}://{i}"] = get_output_value(device)
-        # mapping with full path
-        device_map[f"{driver}://{device['path']}"] = get_output_value(device)
-    return device_map
-
-
-def map_device_to_name_path(device, key_combination=3):
-    """Gives the appropriate device data (supported name/path) for user selected execution device
-    Args:
-        device (str): user
-        key_combination (int, optional): choice for mapping value for device name.
-        1 : path
-        2 : name
-        3 : (name, path)
-        Defaults to 3.
-    Raises:
-        ValueError:
-    Returns:
-        str / tuple: returns the mapping str or tuple of mapping str for the device depending on key_combination value
-    """
-    driver = device.split("://")[0]
-    device_map = get_device_mapping(driver, key_combination)
-    try:
-        device_mapping = device_map[device]
-    except KeyError:
-        raise ValueError(f"Device '{device}' is not a valid device.")
-    return device_mapping
-
-
-def set_init_device_flags():
-    if "vulkan" in args.device:
-        # set runtime flags for vulkan.
-        set_iree_runtime_flags()
-
-        # set triple flag to avoid multiple calls to get_vulkan_triple_flag
-        device_name, args.device = map_device_to_name_path(args.device)
-        if not args.iree_vulkan_target_triple:
-            triple = get_vulkan_target_triple(device_name)
-            if triple is not None:
-                args.iree_vulkan_target_triple = triple
-        print(
-            f"Found device {device_name}. Using target triple {args.iree_vulkan_target_triple}."
-        )
-    elif "cuda" in args.device:
-        args.device = "cuda"
-    elif "cpu" in args.device:
-        args.device = "cpu"
-
-    # set max_length based on availability.
-    if args.hf_model_id in [
-        "Linaqruf/anything-v3.0",
-        "wavymulder/Analog-Diffusion",
-        "dreamlike-art/dreamlike-diffusion-1.0",
-    ]:
-        args.max_length = 77
-    elif args.hf_model_id == "prompthero/openjourney":
-        args.max_length = 64
-
-    # Use tuned models in the case of fp16, vulkan rdna3 or cuda sm devices.
-    base_model_id = args.hf_model_id
-    if args.ckpt_loc != "":
-        base_model_id = fetch_and_update_base_model_id(args.ckpt_loc)
-
-    if (
-        args.hf_model_id
-        in [
-            "runwayml/stable-diffusion-inpainting",
-            "stabilityai/stable-diffusion-2-inpainting",
-        ]
-        or args.precision != "fp16"
-        or args.height != 512
-        or args.width != 512
-        or args.batch_size != 1
-        or ("vulkan" not in args.device and "cuda" not in args.device)
-    ):
-        args.use_tuned = False
-
-    elif args.ckpt_loc != "" and base_model_id not in [
-        "Linaqruf/anything-v3.0",
-        "dreamlike-art/dreamlike-diffusion-1.0",
-        "prompthero/openjourney",
-        "wavymulder/Analog-Diffusion",
-        "stabilityai/stable-diffusion-2-1",
-        "stabilityai/stable-diffusion-2-1-base",
-        "CompVis/stable-diffusion-v1-4",
-        "runwayml/stable-diffusion-v1-5",
-    ]:
-        args.use_tuned = False
-
-    elif "vulkan" in args.device and not any(
-        x in args.iree_vulkan_target_triple for x in ["rdna2", "rdna3"]
-    ):
-        args.use_tuned = False
-
-    elif "cuda" in args.device and get_cuda_sm_cc() not in ["sm_80", "sm_89"]:
-        args.use_tuned = False
-
-    elif args.use_base_vae and args.hf_model_id not in [
-        "stabilityai/stable-diffusion-2-1-base",
-        "CompVis/stable-diffusion-v1-4",
-    ]:
-        args.use_tuned = False
-
-    if args.use_tuned:
-        print(f"Using tuned models for {base_model_id}/fp16/{args.device}.")
-    else:
-        print("Tuned models are currently not supported for this setting.")
-
-    # set import_mlir to True for unuploaded models.
-    if args.ckpt_loc != "":
-        args.import_mlir = True
-
-    elif args.hf_model_id not in [
-        "Linaqruf/anything-v3.0",
-        "dreamlike-art/dreamlike-diffusion-1.0",
-        "prompthero/openjourney",
-        "wavymulder/Analog-Diffusion",
-        "stabilityai/stable-diffusion-2-1",
-        "stabilityai/stable-diffusion-2-1-base",
-        "CompVis/stable-diffusion-v1-4",
-        "runwayml/stable-diffusion-inpainting",
-        "stabilityai/stable-diffusion-2-inpainting",
-    ]:
-        args.import_mlir = True
-
-    elif args.height != 512 or args.width != 512 or args.batch_size != 1:
-        args.import_mlir = True
-
-    elif args.use_tuned and args.hf_model_id in [
-        "dreamlike-art/dreamlike-diffusion-1.0",
-        "prompthero/openjourney",
-        "stabilityai/stable-diffusion-2-1",
-    ]:
-        args.import_mlir = True
-
-    elif (
-        args.use_tuned
-        and "vulkan" in args.device
-        and "rdna2" in args.iree_vulkan_target_triple
-    ):
-        args.import_mlir = True
-
-    elif (
-        args.use_tuned
-        and "cuda" in args.device
-        and get_cuda_sm_cc() == "sm_89"
-    ):
-        args.import_mlir = True
-
-
-# Utility to get list of devices available.
-def get_available_devices():
-    def get_devices_by_name(driver_name):
-        from shark.iree_utils._common import iree_device_map
-
-        device_list = []
-        try:
-            driver_name = iree_device_map(driver_name)
-            device_list_dict = get_all_devices(driver_name)
-            print(f"{driver_name} devices are available.")
-        except:
-            print(f"{driver_name} devices are not available.")
-        else:
-            for i, device in enumerate(device_list_dict):
-                device_list.append(f"{device['name']} => {driver_name}://{i}")
-        return device_list
-
-    set_iree_runtime_flags()
-
-    available_devices = []
-    vulkan_devices = get_devices_by_name("vulkan")
-    available_devices.extend(vulkan_devices)
-    cuda_devices = get_devices_by_name("cuda")
-    available_devices.extend(cuda_devices)
-    available_devices.append("cpu")
-    return available_devices
-
-
-def disk_space_check(path, lim=20):
-    from shutil import disk_usage
-
-    du = disk_usage(path)
-    free = du.free / (1024 * 1024 * 1024)
-    if free <= lim:
-        print(f"[WARNING] Only {free:.2f}GB space available in {path}.")
-
-
-def get_opt_flags(model, precision="fp16"):
-    iree_flags = []
-    is_tuned = "tuned" if args.use_tuned else "untuned"
-    if len(args.iree_vulkan_target_triple) > 0:
-        iree_flags.append(
-            f"-iree-vulkan-target-triple={args.iree_vulkan_target_triple}"
-        )
-
-    # Disable bindings fusion to work with moltenVK.
-    if sys.platform == "darwin":
-        iree_flags.append("-iree-stream-fuse-binding=false")
-
-    if "default_compilation_flags" in opt_flags[model][is_tuned][precision]:
-        iree_flags += opt_flags[model][is_tuned][precision][
-            "default_compilation_flags"
-        ]
-
-    if "specified_compilation_flags" in opt_flags[model][is_tuned][precision]:
-        device = (
-            args.device
-            if "://" not in args.device
-            else args.device.split("://")[0]
-        )
-        if (
-            device
-            not in opt_flags[model][is_tuned][precision][
-                "specified_compilation_flags"
-            ]
-        ):
-            device = "default_device"
-        iree_flags += opt_flags[model][is_tuned][precision][
-            "specified_compilation_flags"
-        ][device]
-    return iree_flags
-
-
-def get_path_stem(path):
-    path = Path(path)
-    return path.stem
-
-
-def get_path_to_diffusers_checkpoint(custom_weights):
-    path = Path(custom_weights)
-    diffusers_path = path.parent.absolute()
-    diffusers_directory_name = path.stem
-    complete_path_to_diffusers = diffusers_path / diffusers_directory_name
-    complete_path_to_diffusers.mkdir(parents=True, exist_ok=True)
-    path_to_diffusers = complete_path_to_diffusers.as_posix()
-    return path_to_diffusers
-
-
-def preprocessCKPT(custom_weights):
-    path_to_diffusers = get_path_to_diffusers_checkpoint(custom_weights)
-    if next(Path(path_to_diffusers).iterdir(), None):
-        print("Checkpoint already loaded at : ", path_to_diffusers)
-        return
-    else:
-        print(
-            "Diffusers' checkpoint will be identified here : ",
-            path_to_diffusers,
-        )
-    from_safetensors = (
-        True if custom_weights.lower().endswith(".safetensors") else False
-    )
-    # EMA weights usually yield higher quality images for inference but non-EMA weights have
-    # been yielding better results in our case.
-    # TODO: Add an option `--ema` (`--no-ema`) for users to specify if they want to go for EMA
-    #       weight extraction or not.
-    extract_ema = False
-    print(
-        "Loading diffusers' pipeline from original stable diffusion checkpoint"
-    )
-    pipe = load_pipeline_from_original_stable_diffusion_ckpt(
-        checkpoint_path=custom_weights,
-        extract_ema=extract_ema,
-        from_safetensors=from_safetensors,
-    )
-    pipe.save_pretrained(path_to_diffusers)
-    print("Loading complete")
-
-
-def load_vmfb(vmfb_path, model, precision):
-    model = "vae" if "base_vae" in model or "vae_encode" in model else model
-    precision = "fp32" if "clip" in model else precision
-    extra_args = get_opt_flags(model, precision)
-    shark_module = SharkInference(mlir_module=None, device=args.device)
-    shark_module.load_module(vmfb_path, extra_args=extra_args)
-    return shark_module
-
-
-# This utility returns vmfbs of Clip, Unet, Vae and Vae_encode, in case all of them
-# are present; deletes them otherwise.
-def fetch_or_delete_vmfbs(
-    extended_model_name, need_vae_encode, precision="fp32"
-):
-    vmfb_path = [
-        get_vmfb_path_name(extended_model_name[model])
-        for model in extended_model_name
-    ]
-    vmfb_present = [os.path.isfile(vmfb) for vmfb in vmfb_path]
-    all_vmfb_present = True
-    compiled_models = []
-    for i in range(3):
-        all_vmfb_present = all_vmfb_present and vmfb_present[i]
-        compiled_models.append(None)
-    if need_vae_encode:
-        all_vmfb_present = all_vmfb_present and vmfb_present[3]
-        compiled_models.append(None)
-
-    # We need to delete vmfbs only if some of the models were compiled.
-    if not all_vmfb_present:
-        for i in range(len(compiled_models)):
-            if vmfb_present[i]:
-                os.remove(vmfb_path[i])
-                print("Deleted: ", vmfb_path[i])
-    else:
-        model_name = [model for model in extended_model_name.keys()]
-        for i in range(len(compiled_models)):
-            compiled_models[i] = load_vmfb(
-                vmfb_path[i], model_name[i], precision
-            )
-    return compiled_models
-
-
-# `fetch_and_update_base_model_id` is a resource utility function which
-# helps maintaining mapping of the model to run with its base model.
-# If `base_model` is "", then this function tries to fetch the base model
-# info for the `model_to_run`.
-def fetch_and_update_base_model_id(model_to_run, base_model=""):
-    variants_path = os.path.join(os.getcwd(), "variants.json")
-    data = {model_to_run: base_model}
-    json_data = {}
-    if os.path.exists(variants_path):
-        with open(variants_path, "r", encoding="utf-8") as jsonFile:
-            json_data = json.load(jsonFile)
-            # Return with base_model's info if base_model is "".
-            if base_model == "":
-                if model_to_run in json_data:
-                    base_model = json_data[model_to_run]
-                return base_model
-    elif base_model == "":
-        return base_model
-    # Update JSON data to contain an entry mapping model_to_run with base_model.
-    json_data.update(data)
-    with open(variants_path, "w", encoding="utf-8") as jsonFile:
-        json.dump(json_data, jsonFile)
-
-
-# Generate and return a new seed if the provided one is not in the supported range (including -1)
-def sanitize_seed(seed):
-    uint32_info = np.iinfo(np.uint32)
-    uint32_min, uint32_max = uint32_info.min, uint32_info.max
-    if seed < uint32_min or seed >= uint32_max:
-        seed = randint(uint32_min, uint32_max)
-    return seed
-
-
-# clear all the cached objects to recompile cleanly.
-def clear_all():
-    print("CLEARING ALL, EXPECT SEVERAL MINUTES TO RECOMPILE")
-    from glob import glob
-    import shutil
-
-    vmfbs = glob(os.path.join(os.getcwd(), "*.vmfb"))
-    for vmfb in vmfbs:
-        if os.path.exists(vmfb):
-            os.remove(vmfb)
-    # Temporary workaround of deleting yaml files to incorporate diffusers' pipeline.
-    # TODO: Remove this once we have better weight updation logic.
-    inference_yaml = ["v2-inference-v.yaml", "v1-inference.yaml"]
-    for yaml in inference_yaml:
-        if os.path.exists(yaml):
-            os.remove(yaml)
-    home = os.path.expanduser("~")
-    if os.name == "nt":  # Windows
-        appdata = os.getenv("LOCALAPPDATA")
-        shutil.rmtree(os.path.join(appdata, "AMD/VkCache"), ignore_errors=True)
-        shutil.rmtree(os.path.join(home, "shark_tank"), ignore_errors=True)
-    elif os.name == "unix":
-        shutil.rmtree(os.path.join(home, ".cache/AMD/VkCache"))
-        shutil.rmtree(os.path.join(home, ".local/shark_tank"))
-
-
-# save output images and the inputs corresponding to it.
-def save_output_img(output_img, img_seed):
-    output_path = args.output_dir if args.output_dir else Path.cwd()
-    generated_imgs_path = Path(
-        output_path, "generated_imgs", dt.now().strftime("%Y%m%d")
-    )
-    generated_imgs_path.mkdir(parents=True, exist_ok=True)
-    csv_path = Path(generated_imgs_path, "imgs_details.csv")
-
-    prompt_slice = re.sub("[^a-zA-Z0-9]", "_", args.prompts[0][:15])
-    out_img_name = (
-        f"{prompt_slice}_{img_seed}_{dt.now().strftime('%y%m%d_%H%M%S')}"
-    )
-
-    img_model = args.hf_model_id
-    if args.ckpt_loc:
-        img_model = os.path.basename(args.ckpt_loc)
-
-    if args.output_img_format == "jpg":
-        out_img_path = Path(generated_imgs_path, f"{out_img_name}.jpg")
-        output_img.save(out_img_path, quality=95, subsampling=0)
-    else:
-        out_img_path = Path(generated_imgs_path, f"{out_img_name}.png")
-        pngInfo = PngImagePlugin.PngInfo()
-
-        if args.write_metadata_to_png:
-            pngInfo.add_text(
-                "parameters",
-                f"{args.prompts[0]}\nNegative prompt: {args.negative_prompts[0]}\nSteps:{args.steps}, Sampler: {args.scheduler}, CFG scale: {args.guidance_scale}, Seed: {img_seed}, Size: {args.width}x{args.height}, Model: {img_model}",
-            )
-
-        output_img.save(out_img_path, "PNG", pnginfo=pngInfo)
-
-        if args.output_img_format not in ["png", "jpg"]:
-            print(
-                f"[ERROR] Format {args.output_img_format} is not supported yet."
-                "Image saved as png instead. Supported formats: png / jpg"
-            )
-
-    new_entry = {
-        "VARIANT": img_model,
-        "SCHEDULER": args.scheduler,
-        "PROMPT": args.prompts[0],
-        "NEG_PROMPT": args.negative_prompts[0],
-        "SEED": img_seed,
-        "CFG_SCALE": args.guidance_scale,
-        "PRECISION": args.precision,
-        "STEPS": args.steps,
-        "HEIGHT": args.height,
-        "WIDTH": args.width,
-        "MAX_LENGTH": args.max_length,
-        "OUTPUT": out_img_path,
-    }
-
-    with open(csv_path, "a") as csv_obj:
-        dictwriter_obj = DictWriter(csv_obj, fieldnames=list(new_entry.keys()))
-        dictwriter_obj.writerow(new_entry)
-        csv_obj.close()
-
-    if args.save_metadata_to_json:
-        del new_entry["OUTPUT"]
-        json_path = Path(generated_imgs_path, f"{out_img_name}.json")
-        with open(json_path, "w") as f:
-            json.dump(new_entry, f, indent=4)
--- a/apps/stable_diffusion/stable_diffusion_telegram_bot.md
+++ b/apps/stable_diffusion/stable_diffusion_telegram_bot.md
@@ -1,15 +0,0 @@
-You need to pre-create your bot (https://core.telegram.org/bots#how-do-i-create-a-bot)
-Then create in the directory web file .env
-In it the record:
-TG_TOKEN="your_token"
-specifying your bot's token from previous step.
-Then run telegram_bot.py with the same parameters that you use when running index.py, for example:
-python telegram_bot.py --max_length=77 --vulkan_large_heap_block_size=0 --use_base_vae --local_tank_cache h:\shark\TEMP
-
-Bot commands:
-/select_model
-/select_scheduler
-/set_steps "integer number of steps"
-/set_guidance_scale "integer number"
-/set_negative_prompt "negative text"
-Any other text triggers the creation of an image based on it.
--- a/apps/stable_diffusion/web/index.py
+++ b/apps/stable_diffusion/web/index.py
@@ -1,44 +0,0 @@
-import os
-import sys
-
-if sys.platform == "darwin":
-    os.environ["DYLD_LIBRARY_PATH"] = "/usr/local/lib"
-
-import gradio as gr
-from apps.stable_diffusion.src import args, clear_all
-from apps.stable_diffusion.web.utils.gradio_configs import (
-    clear_gradio_tmp_imgs_folder,
-)
-
-# clear all gradio tmp images from the last session
-clear_gradio_tmp_imgs_folder()
-
-if args.clear_all:
-    clear_all()
-
-
-def resource_path(relative_path):
-    """Get absolute path to resource, works for dev and for PyInstaller"""
-    base_path = getattr(
-        sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))
-    )
-    return os.path.join(base_path, relative_path)
-
-
-dark_theme = resource_path("ui/css/sd_dark_theme.css")
-
-from apps.stable_diffusion.web.ui import txt2img_web, img2img_web
-
-sd_web = gr.TabbedInterface(
-    [txt2img_web, img2img_web],
-    ["Text-to-Image", "Image-to-Image"],
-    css=dark_theme,
-)
-
-sd_web.queue()
-sd_web.launch(
-    share=args.share,
-    inbrowser=True,
-    server_name="0.0.0.0",
-    server_port=args.server_port,
-)
--- a/apps/stable_diffusion/web/ui/init.py
+++ b/apps/stable_diffusion/web/ui/init.py
@@ -1,2 +0,0 @@
-from apps.stable_diffusion.web.ui.txt2img_ui import txt2img_web
-from apps.stable_diffusion.web.ui.img2img_ui import img2img_web
--- a/apps/stable_diffusion/web/ui/css/sd_dark_theme.css
+++ b/apps/stable_diffusion/web/ui/css/sd_dark_theme.css
@@ -1,216 +0,0 @@
-
-/* Overwrite the Gradio default theme with their .dark theme declarations */
-
-:root {
-    --color-focus-primary: var(--color-grey-700);
-    --color-focus-secondary: var(--color-grey-600);
-    --color-focus-ring: rgb(55 65 81);
-    --color-background-primary: var(--color-grey-950);
-    --color-background-secondary: var(--color-grey-900);
-    --color-background-tertiary: var(--color-grey-800);
-    --color-text-body: var(--color-grey-100);
-    --color-text-label: var(--color-grey-200);
-    --color-text-placeholder: var(--color-grey);
-    --color-text-subdued: var(--color-grey-400);
-    --color-text-link-base: var(--color-blue-500);
-    --color-text-link-hover: var(--color-blue-400);
-    --color-text-link-visited: var(--color-blue-600);
-    --color-text-link-active: var(--color-blue-500);
-    --color-text-code-background: var(--color-grey-800);
-    --color-text-code-border: color.border-primary;
-    --color-border-primary: var(--color-grey-700);
-    --color-border-secondary: var(--color-grey-600);
-    --color-border-highlight: var(--color-accent-base);
-    --color-accent-base: var(--color-orange-500);
-    --color-accent-light: var(--color-orange-300);
-    --color-accent-dark: var(--color-orange-700);
-    --color-functional-error-base: var(--color-red-400);
-    --color-functional-error-subdued: var(--color-red-300);
-    --color-functional-error-background: var(--color-background-primary);
-    --color-functional-info-base: var(--color-yellow);
-    --color-functional-info-subdued: var(--color-yellow-300);
-    --color-functional-success-base: var(--color-green);
-    --color-functional-success-subdued: var(--color-green-300);
-    --shadow-spread: 2px;
-    --api-background: linear-gradient(to bottom, rgba(255, 216, 180, .05), transparent);
-    --api-pill-background: var(--color-orange-400);
-    --api-pill-border: var(--color-orange-600);
-    --api-pill-text: var(--color-orange-900);
-    --block-border-color: var(--color-border-primary);
-    --block-background: var(--color-background-tertiary);
-    --uploadable-border-color-hover: var(--color-border-primary);
-    --uploadable-border-color-loaded: var(--color-functional-success);
-    --uploadable-text-color: var(--color-text-subdued);
-    --block_label-border-color: var(--color-border-primary);
-    --block_label-icon-color: var(--color-text-label);
-    --block_label-shadow: var(--shadow-drop);
-    --block_label-background: var(--color-background-secondary);
-    --icon_button-icon-color-base: var(--color-text-label);
-    --icon_button-icon-color-hover: var(--color-text-label);
-    --icon_button-background-base: var(--color-background-primary);
-    --icon_button-background-hover: var(--color-background-primary);
-    --icon_button-border-color-base: var(--color-background-primary);
-    --icon_button-border-color-hover: var(--color-border-secondary);
-    --input-text-color: var(--color-text-body);
-    --input-border-color-base: var(--color-border-primary);
-    --input-border-color-hover: var(--color-border-primary);
-    --input-border-color-focus: var(--color-border-primary);
-    --input-background-base: var(--color-background-tertiary);
-    --input-background-hover: var(--color-background-tertiary);
-    --input-background-focus: var(--color-background-tertiary);
-    --input-shadow: var(--shadow-inset);
-    --checkbox-border-color-base: var(--color-border-primary);
-    --checkbox-border-color-hover: var(--color-focus-primary);
-    --checkbox-border-color-focus: var(--color-blue-500);
-    --checkbox-background-base: var(--color-background-primary);
-    --checkbox-background-hover: var(--color-background-primary);
-    --checkbox-background-focus: var(--color-background-primary);
-    --checkbox-background-selected: var(--color-blue-600);
-    --checkbox-label-border-color-base: var(--color-border-primary);
-    --checkbox-label-border-color-hover: var(--color-border-primary);
-    --checkbox-label-border-color-focus: var(--color-border-secondary);
-    --checkbox-label-background-base: linear-gradient(to top, var(--color-grey-900), var(--color-grey-800));
-    --checkbox-label-background-hover: linear-gradient(to top, var(--color-grey-900), var(--color-grey-800));
-    --checkbox-label-background-focus: linear-gradient(to top, var(--color-grey-900), var(--color-grey-800));
-    --form-seperator-color: var(--color-border-primary);
-    --button-primary-border-color-base: var(--color-orange-600);
-    --button-primary-border-color-hover: var(--color-orange-600);
-    --button-primary-border-color-focus: var(--color-orange-600);
-    --button-primary-text-color-base: white;
-    --button-primary-text-color-hover: white;
-    --button-primary-text-color-focus: white;
-    --button-primary-background-base: linear-gradient(to bottom right, var(--color-orange-700), var(--color-orange-700));
-    --button-primary-background-hover: linear-gradient(to bottom right, var(--color-orange-700), var(--color-orange-500));
-    --button-primary-background-focus: linear-gradient(to bottom right, var(--color-orange-700), var(--color-orange-500));
-    --button-secondary-border-color-base: var(--color-grey-600);
-    --button-secondary-border-color-hover: var(--color-grey-600);
-    --button-secondary-border-color-focus: var(--color-grey-600);
-    --button-secondary-text-color-base: white;
-    --button-secondary-text-color-hover: white;
-    --button-secondary-text-color-focus: white;
-    --button-secondary-background-base: linear-gradient(to bottom right, var(--color-grey-600), var(--color-grey-700));
-    --button-secondary-background-hover: linear-gradient(to bottom right, var(--color-grey-600), var(--color-grey-600));
-    --button-secondary-background-focus: linear-gradient(to bottom right, var(--color-grey-600), var(--color-grey-600));
-    --button-cancel-border-color-base: var(--color-red-600);
-    --button-cancel-border-color-hover: var(--color-red-600);
-    --button-cancel-border-color-focus: var(--color-red-600);
-    --button-cancel-text-color-base: white;
-    --button-cancel-text-color-hover: white;
-    --button-cancel-text-color-focus: white;
-    --button-cancel-background-base: linear-gradient(to bottom right, var(--color-red-700), var(--color-red-700));
-    --button-cancel-background-focus: linear-gradient(to bottom right, var(--color-red-700), var(--color-red-500));
-    --button-cancel-background-hover: linear-gradient(to bottom right, var(--color-red-700), var(--color-red-500));
-    --button-plain-border-color-base: var(--color-grey-600);
-    --button-plain-border-color-hover: var(--color-grey-500);
-    --button-plain-border-color-focus: var(--color-grey-500);
-    --button-plain-text-color-base: var(--color-text-body);
-    --button-plain-text-color-hover: var(--color-text-body);
-    --button-plain-text-color-focus: var(--color-text-body);
-    --button-plain-background-base: var(--color-grey-700);
-    --button-plain-background-hover: var(--color-grey-700);
-    --button-plain-background-focus: var(--color-grey-700);
-    --gallery-label-background-base: var(--color-grey-50);
-    --gallery-label-background-hover: var(--color-grey-50);
-    --gallery-label-border-color-base: var(--color-border-primary);
-    --gallery-label-border-color-hover: var(--color-border-primary);
-    --gallery-thumb-background-base: var(--color-grey-900);
-    --gallery-thumb-background-hover: var(--color-grey-900);
-    --gallery-thumb-border-color-base: var(--color-border-primary);
-    --gallery-thumb-border-color-hover: var(--color-accent-base);
-    --gallery-thumb-border-color-focus: var(--color-blue-500);
-    --gallery-thumb-border-color-selected: var(--color-accent-base);
-    --chatbot-border-border-color-base: transparent;
-    --chatbot-border-border-color-latest: transparent;
-    --chatbot-user-background-base: ;
-    --chatbot-user-background-latest: ;
-    --chatbot-user-text-color-base: white;
-    --chatbot-user-text-color-latest: white;
-    --chatbot-bot-background-base: ;
-    --chatbot-bot-background-latest: ;
-    --chatbot-bot-text-color-base: white;
-    --chatbot-bot-text-color-latest: white;
-    --label-gradient-from: var(--color-orange-400);
-    --label-gradient-to: var(--color-orange-600);
-    --table-odd-background: var(--color-grey-900);
-    --table-even-background: var(--color-grey-950);
-    --table-background-edit: transparent;
-    --dataset-gallery-background-base: var(--color-background-primary);
-    --dataset-gallery-background-hover: var(--color-grey-800);
-    --dataset-dataframe-border-base: var(--color-border-primary);
-    --dataset-dataframe-border-hover: var(--color-border-secondary);
-    --dataset-table-background-base: transparent;
-    --dataset-table-background-hover: var(--color-grey-700);
-    --dataset-table-border-base: var(--color-grey-800);
-    --dataset-table-border-hover: var(--color-grey-800);
-}
-
-/* SHARK theme */
-
-/* display in full width for desktop devices */
-@media (min-width: 1536px)
-{
-    .gradio-container .contain {
-        max-width: var(--size-full) !important;
-    }
-}
-
-.gradio-container .contain {
-    padding: 0 var(--size-4) !important;
-}
-
-.gradio-container {
-    background-color: var(--color-background-primary);
-}
-
-.container {
-    background-color: black !important;
-    padding-top: var(--size-5) !important;
-}
-
-#ui_title {
-    padding: var(--size-2) 0 0 var(--size-1);
-}
-
-#top_logo {
-    background-color: transparent;
-    border-radius: 0 !important;
-    border: 0;
-}
-
-#demo_title_outer {
-    border-radius: 0;
-}
-
-#prompt_box_outer div:first-child {
-    border-radius: 0 !important
-}
-
-#prompt_box textarea, #negative_prompt_box textarea {
-    background-color: var(--color-background-primary) !important;
-}
-
-#prompt_examples {
-    margin: 0 !important;
-}
-
-#prompt_examples svg {
-    display: none !important;
-}
-
-#ui_body {
-    background-color: var(--color-background-secondary) !important;
-    padding: var(--size-2) !important;
-    border-radius: 0.5em !important;
-}
-
-#img_result+div {
-    display: none !important;
-}
-
-footer {
-    display: none !important;
-}
-
-#gallery + div {
-    border-radius: 0 !important;
-}
--- a/apps/stable_diffusion/web/ui/img2img_ui.py
+++ b/apps/stable_diffusion/web/ui/img2img_ui.py
@@ -1,231 +0,0 @@
-import os
-import sys
-import glob
-from pathlib import Path
-import gradio as gr
-from PIL import Image
-from apps.stable_diffusion.scripts import img2img_inf
-from apps.stable_diffusion.src import args
-from apps.stable_diffusion.web.ui.utils import (
-    available_devices,
-    nodlogo_loc,
-)
-
-
-with gr.Blocks(title="Image-to-Image") as img2img_web:
-    with gr.Row(elem_id="ui_title"):
-        nod_logo = Image.open(nodlogo_loc)
-        with gr.Row():
-            with gr.Column(scale=1, elem_id="demo_title_outer"):
-                gr.Image(
-                    value=nod_logo,
-                    show_label=False,
-                    interactive=False,
-                    elem_id="top_logo",
-                ).style(width=150, height=50)
-    with gr.Row(elem_id="ui_body"):
-        with gr.Row():
-            with gr.Column(scale=1, min_width=600):
-                with gr.Row():
-                    ckpt_path = (
-                        Path(args.ckpt_dir)
-                        if args.ckpt_dir
-                        else Path(Path.cwd(), "models")
-                    )
-                    ckpt_path.mkdir(parents=True, exist_ok=True)
-                    types = (
-                        "*.ckpt",
-                        "*.safetensors",
-                    )  # the tuple of file types
-                    ckpt_files = ["None"]
-                    for extn in types:
-                        files = glob.glob(os.path.join(ckpt_path, extn))
-                        ckpt_files.extend(files)
-                    custom_model = gr.Dropdown(
-                        label=f"Models (Custom Model path: {ckpt_path})",
-                        value=args.ckpt_loc if args.ckpt_loc else "None",
-                        choices=ckpt_files
-                        + [
-                            "Linaqruf/anything-v3.0",
-                            "prompthero/openjourney",
-                            "wavymulder/Analog-Diffusion",
-                            "stabilityai/stable-diffusion-2-1",
-                            "stabilityai/stable-diffusion-2-1-base",
-                            "CompVis/stable-diffusion-v1-4",
-                        ],
-                    )
-                    hf_model_id = gr.Textbox(
-                        placeholder="Select 'None' in the Models dropdown on the left and enter model ID here e.g: SG161222/Realistic_Vision_V1.3",
-                        value="",
-                        label="HuggingFace Model ID",
-                        lines=3,
-                    )
-
-                with gr.Group(elem_id="prompt_box_outer"):
-                    prompt = gr.Textbox(
-                        label="Prompt",
-                        value=args.prompts[0],
-                        lines=1,
-                        elem_id="prompt_box",
-                    )
-                    negative_prompt = gr.Textbox(
-                        label="Negative Prompt",
-                        value=args.negative_prompts[0],
-                        lines=1,
-                        elem_id="negative_prompt_box",
-                    )
-
-                init_image = gr.Image(label="Input Image", type="filepath")
-
-                with gr.Accordion(label="Advanced Options", open=False):
-                    with gr.Row():
-                        scheduler = gr.Dropdown(
-                            label="Scheduler",
-                            value="PNDM",
-                            choices=[
-                                "DDIM",
-                                "PNDM",
-                                "DPMSolverMultistep",
-                                "EulerAncestralDiscrete",
-                            ],
-                        )
-                        with gr.Group():
-                            save_metadata_to_png = gr.Checkbox(
-                                label="Save prompt information to PNG",
-                                value=args.write_metadata_to_png,
-                                interactive=True,
-                            )
-                            save_metadata_to_json = gr.Checkbox(
-                                label="Save prompt information to JSON file",
-                                value=args.save_metadata_to_json,
-                                interactive=True,
-                            )
-                    with gr.Row():
-                        height = gr.Slider(
-                            384, 786, value=args.height, step=8, label="Height"
-                        )
-                        width = gr.Slider(
-                            384, 786, value=args.width, step=8, label="Width"
-                        )
-                        precision = gr.Radio(
-                            label="Precision",
-                            value=args.precision,
-                            choices=[
-                                "fp16",
-                                "fp32",
-                            ],
-                            visible=False,
-                        )
-                        max_length = gr.Radio(
-                            label="Max Length",
-                            value=args.max_length,
-                            choices=[
-                                64,
-                                77,
-                            ],
-                            visible=False,
-                        )
-                    with gr.Row():
-                        steps = gr.Slider(
-                            1, 100, value=args.steps, step=1, label="Steps"
-                        )
-                        strength = gr.Slider(
-                            0,
-                            1,
-                            value=args.strength,
-                            step=0.1,
-                            label="Strength",
-                        )
-                    with gr.Row():
-                        guidance_scale = gr.Slider(
-                            0,
-                            50,
-                            value=args.guidance_scale,
-                            step=0.1,
-                            label="CFG Scale",
-                        )
-                        batch_count = gr.Slider(
-                            1,
-                            100,
-                            value=args.batch_count,
-                            step=1,
-                            label="Batch Count",
-                            interactive=True,
-                        )
-                        batch_size = gr.Slider(
-                            1,
-                            4,
-                            value=args.batch_size,
-                            step=1,
-                            label="Batch Size",
-                            interactive=False,
-                            visible=False,
-                        )
-                with gr.Row():
-                    seed = gr.Number(
-                        value=args.seed, precision=0, label="Seed"
-                    )
-                    device = gr.Dropdown(
-                        label="Device",
-                        value=available_devices[0],
-                        choices=available_devices,
-                    )
-                with gr.Row():
-                    random_seed = gr.Button("Randomize Seed")
-                    random_seed.click(
-                        None,
-                        inputs=[],
-                        outputs=[seed],
-                        _js="() => Math.floor(Math.random() * 4294967295)",
-                    )
-                    stable_diffusion = gr.Button("Generate Image(s)")
-
-            with gr.Column(scale=1, min_width=600):
-                with gr.Group():
-                    gallery = gr.Gallery(
-                        label="Generated images",
-                        show_label=False,
-                        elem_id="gallery",
-                    ).style(grid=[2])
-                    std_output = gr.Textbox(
-                        value="Nothing to show.",
-                        lines=1,
-                        show_label=False,
-                    )
-                output_dir = args.output_dir if args.output_dir else Path.cwd()
-                output_dir = Path(output_dir, "generated_imgs")
-                output_loc = gr.Textbox(
-                    label="Saving Images at",
-                    value=output_dir,
-                    interactive=False,
-                )
-        kwargs = dict(
-            fn=img2img_inf,
-            inputs=[
-                prompt,
-                negative_prompt,
-                init_image,
-                height,
-                width,
-                steps,
-                strength,
-                guidance_scale,
-                seed,
-                batch_count,
-                batch_size,
-                scheduler,
-                custom_model,
-                hf_model_id,
-                precision,
-                device,
-                max_length,
-                save_metadata_to_json,
-                save_metadata_to_png,
-            ],
-            outputs=[gallery, std_output],
-            show_progress=args.progress_bar,
-        )
-
-        prompt.submit(**kwargs)
-        negative_prompt.submit(**kwargs)
-        stable_diffusion.click(**kwargs)
--- a/apps/stable_diffusion/web/ui/logos/nod-logo.png
+++ b/apps/stable_diffusion/web/ui/logos/nod-logo.png
--- a/apps/stable_diffusion/web/ui/txt2img_ui.py
+++ b/apps/stable_diffusion/web/ui/txt2img_ui.py
@@ -1,229 +0,0 @@
-import os
-import sys
-import glob
-from pathlib import Path
-import gradio as gr
-from PIL import Image
-from apps.stable_diffusion.scripts import txt2img_inf
-from apps.stable_diffusion.src import prompt_examples, args
-from apps.stable_diffusion.web.ui.utils import (
-    available_devices,
-    nodlogo_loc,
-)
-
-
-with gr.Blocks(title="Text-to-Image") as txt2img_web:
-    with gr.Row(elem_id="ui_title"):
-        nod_logo = Image.open(nodlogo_loc)
-        with gr.Row():
-            with gr.Column(scale=1, elem_id="demo_title_outer"):
-                gr.Image(
-                    value=nod_logo,
-                    show_label=False,
-                    interactive=False,
-                    elem_id="top_logo",
-                ).style(width=150, height=50)
-    with gr.Row(elem_id="ui_body"):
-        with gr.Row():
-            with gr.Column(scale=1, min_width=600):
-                with gr.Row():
-                    ckpt_path = (
-                        Path(args.ckpt_dir)
-                        if args.ckpt_dir
-                        else Path(Path.cwd(), "models")
-                    )
-                    ckpt_path.mkdir(parents=True, exist_ok=True)
-                    types = (
-                        "*.ckpt",
-                        "*.safetensors",
-                    )  # the tuple of file types
-                    ckpt_files = ["None"]
-                    for extn in types:
-                        files = glob.glob(os.path.join(ckpt_path, extn))
-                        ckpt_files.extend(files)
-                    custom_model = gr.Dropdown(
-                        label=f"Models (Custom Model path: {ckpt_path})",
-                        value=args.ckpt_loc if args.ckpt_loc else "None",
-                        choices=ckpt_files
-                        + [
-                            "Linaqruf/anything-v3.0",
-                            "prompthero/openjourney",
-                            "wavymulder/Analog-Diffusion",
-                            "stabilityai/stable-diffusion-2-1",
-                            "stabilityai/stable-diffusion-2-1-base",
-                            "CompVis/stable-diffusion-v1-4",
-                        ],
-                    )
-                    hf_model_id = gr.Textbox(
-                        placeholder="Select 'None' in the Models dropdown on the left and enter model ID here e.g: SG161222/Realistic_Vision_V1.3",
-                        value="",
-                        label="HuggingFace Model ID",
-                        lines=3,
-                    )
-
-                with gr.Group(elem_id="prompt_box_outer"):
-                    prompt = gr.Textbox(
-                        label="Prompt",
-                        value=args.prompts[0],
-                        lines=1,
-                        elem_id="prompt_box",
-                    )
-                    negative_prompt = gr.Textbox(
-                        label="Negative Prompt",
-                        value=args.negative_prompts[0],
-                        lines=1,
-                        elem_id="negative_prompt_box",
-                    )
-                with gr.Accordion(label="Advanced Options", open=False):
-                    with gr.Row():
-                        scheduler = gr.Dropdown(
-                            label="Scheduler",
-                            value=args.scheduler,
-                            choices=[
-                                "DDIM",
-                                "PNDM",
-                                "LMSDiscrete",
-                                "KDPM2Discrete",
-                                "DPMSolverMultistep",
-                                "EulerDiscrete",
-                                "EulerAncestralDiscrete",
-                                "SharkEulerDiscrete",
-                            ],
-                        )
-                        with gr.Group():
-                            save_metadata_to_png = gr.Checkbox(
-                                label="Save prompt information to PNG",
-                                value=args.write_metadata_to_png,
-                                interactive=True,
-                            )
-                            save_metadata_to_json = gr.Checkbox(
-                                label="Save prompt information to JSON file",
-                                value=args.save_metadata_to_json,
-                                interactive=True,
-                            )
-                    with gr.Row():
-                        height = gr.Slider(
-                            384, 786, value=args.height, step=8, label="Height"
-                        )
-                        width = gr.Slider(
-                            384, 786, value=args.width, step=8, label="Width"
-                        )
-                        precision = gr.Radio(
-                            label="Precision",
-                            value=args.precision,
-                            choices=[
-                                "fp16",
-                                "fp32",
-                            ],
-                            visible=False,
-                        )
-                        max_length = gr.Radio(
-                            label="Max Length",
-                            value=args.max_length,
-                            choices=[
-                                64,
-                                77,
-                            ],
-                            visible=False,
-                        )
-                    with gr.Row():
-                        steps = gr.Slider(
-                            1, 100, value=args.steps, step=1, label="Steps"
-                        )
-                        guidance_scale = gr.Slider(
-                            0,
-                            50,
-                            value=args.guidance_scale,
-                            step=0.1,
-                            label="CFG Scale",
-                        )
-                    with gr.Row():
-                        batch_count = gr.Slider(
-                            1,
-                            100,
-                            value=args.batch_count,
-                            step=1,
-                            label="Batch Count",
-                            interactive=True,
-                        )
-                        batch_size = gr.Slider(
-                            1,
-                            4,
-                            value=args.batch_size,
-                            step=1,
-                            label="Batch Size",
-                            interactive=True,
-                        )
-                with gr.Row():
-                    seed = gr.Number(
-                        value=args.seed, precision=0, label="Seed"
-                    )
-                    device = gr.Dropdown(
-                        label="Device",
-                        value=available_devices[0],
-                        choices=available_devices,
-                    )
-                with gr.Row():
-                    random_seed = gr.Button("Randomize Seed")
-                    random_seed.click(
-                        None,
-                        inputs=[],
-                        outputs=[seed],
-                        _js="() => Math.floor(Math.random() * 4294967295)",
-                    )
-                    stable_diffusion = gr.Button("Generate Image(s)")
-                with gr.Accordion(label="Prompt Examples!", open=False):
-                    ex = gr.Examples(
-                        examples=prompt_examples,
-                        inputs=prompt,
-                        cache_examples=False,
-                        elem_id="prompt_examples",
-                    )
-
-            with gr.Column(scale=1, min_width=600):
-                with gr.Group():
-                    gallery = gr.Gallery(
-                        label="Generated images",
-                        show_label=False,
-                        elem_id="gallery",
-                    ).style(grid=[2])
-                    std_output = gr.Textbox(
-                        value="Nothing to show.",
-                        lines=1,
-                        show_label=False,
-                    )
-                output_dir = args.output_dir if args.output_dir else Path.cwd()
-                output_dir = Path(output_dir, "generated_imgs")
-                output_loc = gr.Textbox(
-                    label="Saving Images at",
-                    value=output_dir,
-                    interactive=False,
-                )
-        kwargs = dict(
-            fn=txt2img_inf,
-            inputs=[
-                prompt,
-                negative_prompt,
-                height,
-                width,
-                steps,
-                guidance_scale,
-                seed,
-                batch_count,
-                batch_size,
-                scheduler,
-                custom_model,
-                hf_model_id,
-                precision,
-                device,
-                max_length,
-                save_metadata_to_json,
-                save_metadata_to_png,
-            ],
-            outputs=[gallery, std_output],
-            show_progress=args.progress_bar,
-        )
-
-        prompt.submit(**kwargs)
-        negative_prompt.submit(**kwargs)
-        stable_diffusion.click(**kwargs)
--- a/apps/stable_diffusion/web/ui/utils.py
+++ b/apps/stable_diffusion/web/ui/utils.py
@@ -1,15 +0,0 @@
-import os
-import sys
-from apps.stable_diffusion.src import get_available_devices
-
-
-def resource_path(relative_path):
-    """Get absolute path to resource, works for dev and for PyInstaller"""
-    base_path = getattr(
-        sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))
-    )
-    return os.path.join(base_path, relative_path)
-
-
-nodlogo_loc = resource_path("logos/nod-logo.png")
-available_devices = get_available_devices()
--- a/apps/stable_diffusion/web/utils/gradio_configs.py
+++ b/apps/stable_diffusion/web/utils/gradio_configs.py
@@ -1,31 +0,0 @@
-import os
-import tempfile
-import gradio
-from os import listdir
-
-gradio_tmp_imgs_folder = os.path.join(os.getcwd(), "shark_tmp/")
-
-
-# Clear all gradio tmp images
-def clear_gradio_tmp_imgs_folder():
-    if not os.path.exists(gradio_tmp_imgs_folder):
-        return
-    for fileName in listdir(gradio_tmp_imgs_folder):
-        # Delete tmp png files
-        if fileName.startswith("tmp") and fileName.endswith(".png"):
-            os.remove(gradio_tmp_imgs_folder + fileName)
-
-
-# Overwrite save_pil_to_file from gradio to save tmp images generated by gradio into our own tmp folder
-def save_pil_to_file(pil_image, dir=None):
-    if not os.path.exists(gradio_tmp_imgs_folder):
-        os.mkdir(gradio_tmp_imgs_folder)
-    file_obj = tempfile.NamedTemporaryFile(
-        delete=False, suffix=".png", dir=gradio_tmp_imgs_folder
-    )
-    pil_image.save(file_obj)
-    return file_obj
-
-
-# Register save_pil_to_file override
-gradio.processing_utils.save_pil_to_file = save_pil_to_file
--- a/benchmarks/init.py
+++ b/benchmarks/init.py
--- a/benchmarks/hf_model_benchmark.py
+++ b/benchmarks/hf_model_benchmark.py
@@ -1,22 +0,0 @@
-import torch
-from shark.parser import parser
-from benchmarks.hf_transformer import SharkHFBenchmarkRunner
-
-parser.add_argument(
-    "--model_name",
-    type=str,
-    required=True,
-    help='Specifies name of HF model to benchmark. (For exmaple "microsoft/MiniLM-L12-H384-uncased"',
-)
-load_args, unknown = parser.parse_known_args()
-
-if __name__ == "__main__":
-    model_name = load_args.model_name
-    test_input = torch.randint(2, (1, 128))
-    shark_module = SharkHFBenchmarkRunner(
-        model_name, (test_input,), jit_trace=True
-    )
-    shark_module.benchmark_c()
-    shark_module.benchmark_python((test_input,))
-    shark_module.benchmark_torch(test_input)
-    shark_module.benchmark_onnx(test_input)
--- a/benchmarks/hf_transformer.py
+++ b/benchmarks/hf_transformer.py
@@ -1,181 +0,0 @@
-import torch
-from shark.shark_benchmark_runner import SharkBenchmarkRunner
-from shark.parser import shark_args
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from onnxruntime.transformers.benchmark import (
-    run_pytorch,
-    run_tensorflow,
-    run_onnxruntime,
-)
-from onnxruntime.transformers.huggingface_models import MODELS
-from onnxruntime.transformers.benchmark_helper import ConfigModifier, Precision
-import os
-import psutil
-
-
-class OnnxFusionOptions(object):
-    def __init__(self):
-        self.disable_gelu = False
-        self.disable_layer_norm = False
-        self.disable_attention = False
-        self.disable_skip_layer_norm = False
-        self.disable_embed_layer_norm = False
-        self.disable_bias_skip_layer_norm = False
-        self.disable_bias_gelu = False
-        self.enable_gelu_approximation = False
-        self.use_mask_index = False
-        self.no_attention_mask = False
-
-
-class HuggingFaceLanguage(torch.nn.Module):
-    def __init__(self, hf_model_name):
-        super().__init__()
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            hf_model_name,  # The pretrained model.
-            num_labels=2,  # The number of output labels--2 for binary classification.
-            output_attentions=False,  # Whether the model returns attentions weights.
-            output_hidden_states=False,  # Whether the model returns all hidden-states.
-            torchscript=True,
-        )
-
-    def forward(self, tokens):
-        return self.model.forward(tokens)[0]
-
-
-class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
-    # SharkRunner derived class with Benchmarking capabilities.
-    def __init__(
-        self,
-        model_name: str,
-        input: tuple,
-        dynamic: bool = False,
-        device: str = None,
-        jit_trace: bool = False,
-        from_aot: bool = False,
-        frontend: str = "torch",
-    ):
-        self.device = device if device is not None else shark_args.device
-        if self.device == "gpu":
-            raise ValueError(
-                "Currently GPU Benchmarking is not supported due to OOM from ORT."
-            )
-        self.model_name = model_name
-        model = HuggingFaceLanguage(model_name)
-        SharkBenchmarkRunner.__init__(
-            self,
-            model,
-            input,
-            dynamic,
-            self.device,
-            jit_trace,
-            from_aot,
-            frontend,
-        )
-
-    def benchmark_torch(self, inputs):
-        use_gpu = self.device == "gpu"
-        # Set set the model's layer number to automatic.
-        config_modifier = ConfigModifier(None)
-        num_threads = psutil.cpu_count(logical=False)
-        batch_sizes = [inputs.shape[0]]
-        sequence_lengths = [inputs.shape[-1]]
-        cache_dir = os.path.join(".", "cache_models")
-        verbose = False
-        result = run_pytorch(
-            use_gpu,
-            [self.model_name],
-            None,
-            config_modifier,
-            Precision.FLOAT32,
-            num_threads,
-            batch_sizes,
-            sequence_lengths,
-            shark_args.num_iterations,
-            False,
-            cache_dir,
-            verbose,
-        )
-        print(
-            f"ONNX Pytorch-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
-
-    # TODO: Currently non-functional due to TF runtime error. There might be some issue with, initializing TF.
-    def benchmark_tf(self, inputs):
-        use_gpu = self.device == "gpu"
-        # Set set the model's layer number to automatic.
-        config_modifier = ConfigModifier(None)
-        num_threads = psutil.cpu_count(logical=False)
-        batch_sizes = [inputs.shape[0]]
-        sequence_lengths = [inputs.shape[-1]]
-        cache_dir = os.path.join(".", "cache_models")
-        verbose = False
-        result = run_tensorflow(
-            use_gpu,
-            [self.model_name],
-            None,
-            config_modifier,
-            Precision.FLOAT32,
-            num_threads,
-            batch_sizes,
-            sequence_lengths,
-            shark_args.num_iterations,
-            cache_dir,
-            verbose,
-        )
-        print(
-            f"ONNX TF-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
-
-    def benchmark_onnx(self, inputs):
-        if self.model_name not in MODELS:
-            print(
-                f"{self.model_name} is currently not supported in ORT's HF. Check \
-https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/huggingface_models.py \
-for currently supported models. Exiting benchmark ONNX."
-            )
-            return
-        use_gpu = self.device == "gpu"
-        num_threads = psutil.cpu_count(logical=False)
-        batch_sizes = [inputs.shape[0]]
-        sequence_lengths = [inputs.shape[-1]]
-        cache_dir = os.path.join(".", "cache_models")
-        onnx_dir = os.path.join(".", "onnx_models")
-        verbose = False
-        input_counts = [1]
-        optimize_onnx = True
-        validate_onnx = False
-        disable_ort_io_binding = False
-        use_raw_attention_mask = True
-        model_fusion_statistics = {}
-        overwrite = False
-        model_source = "pt"  # Either "pt" or "tf"
-        provider = None
-        config_modifier = ConfigModifier(None)
-        onnx_args = OnnxFusionOptions()
-        result = run_onnxruntime(
-            use_gpu,
-            provider,
-            [self.model_name],
-            None,
-            config_modifier,
-            Precision.FLOAT32,
-            num_threads,
-            batch_sizes,
-            sequence_lengths,
-            shark_args.num_iterations,
-            input_counts,
-            optimize_onnx,
-            validate_onnx,
-            cache_dir,
-            onnx_dir,
-            verbose,
-            overwrite,
-            disable_ort_io_binding,
-            use_raw_attention_mask,
-            model_fusion_statistics,
-            model_source,
-            onnx_args,
-        )
-        print(
-            f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
--- a/benchmarks/tests/test_benchmark.py
+++ b/benchmarks/tests/test_benchmark.py
@@ -1,231 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers
-
-import torch
-import tensorflow as tf
-import numpy as np
-import torchvision.models as models
-from transformers import (
-    AutoModelForSequenceClassification,
-    BertTokenizer,
-    TFBertModel,
-)
-import importlib
-import pytest
-import unittest
-
-torch.manual_seed(0)
-gpus = tf.config.experimental.list_physical_devices("GPU")
-for gpu in gpus:
-    tf.config.experimental.set_memory_growth(gpu, True)
-
-##################### Tensorflow Hugging Face LM Models ###################################
-MAX_SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-# Create a set of 2-dimensional inputs
-tf_bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-]
-
-
-class TFHuggingFaceLanguage(tf.Module):
-    def __init__(self, hf_model_name):
-        super(TFHuggingFaceLanguage, self).__init__()
-        # Create a BERT trainer with the created network.
-        self.m = TFBertModel.from_pretrained(hf_model_name, from_pt=True)
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m.predict = lambda x, y, z: self.m.call(
-            input_ids=x, attention_mask=y, token_type_ids=z, training=False
-        )
-
-    @tf.function(input_signature=tf_bert_input, jit_compile=True)
-    def forward(self, input_ids, attention_mask, token_type_ids):
-        return self.m.predict(input_ids, attention_mask, token_type_ids)
-
-
-def get_TFhf_model(name):
-    model = TFHuggingFaceLanguage(name)
-    tokenizer = BertTokenizer.from_pretrained(name)
-    text = "Replace me by any text you'd like."
-    encoded_input = tokenizer(
-        text,
-        padding="max_length",
-        truncation=True,
-        max_length=MAX_SEQUENCE_LENGTH,
-    )
-    for key in encoded_input:
-        encoded_input[key] = tf.expand_dims(
-            tf.convert_to_tensor(encoded_input[key]), 0
-        )
-    test_input = (
-        encoded_input["input_ids"],
-        encoded_input["attention_mask"],
-        encoded_input["token_type_ids"],
-    )
-    actual_out = model.forward(*test_input)
-    return model, test_input, actual_out
-
-
-##################### Hugging Face LM Models ###################################
-
-
-class HuggingFaceLanguage(torch.nn.Module):
-    def __init__(self, hf_model_name):
-        super().__init__()
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            hf_model_name,  # The pretrained model.
-            num_labels=2,  # The number of output labels--2 for binary classification.
-            output_attentions=False,  # Whether the model returns attentions weights.
-            output_hidden_states=False,  # Whether the model returns all hidden-states.
-            torchscript=True,
-        )
-
-    def forward(self, tokens):
-        return self.model.forward(tokens)[0]
-
-
-def get_hf_model(name):
-    model = HuggingFaceLanguage(name)
-    # TODO: Currently the test input is set to (1,128)
-    test_input = torch.randint(2, (1, 128))
-    actual_out = model(test_input)
-    return model, test_input, actual_out
-
-
-################################################################################
-
-##################### Torch Vision Models    ###################################
-
-
-class VisionModule(torch.nn.Module):
-    def __init__(self, model):
-        super().__init__()
-        self.model = model
-        self.train(False)
-
-    def forward(self, input):
-        return self.model.forward(input)
-
-
-def get_vision_model(torch_model):
-    model = VisionModule(torch_model)
-    # TODO: Currently the test input is set to (1,128)
-    test_input = torch.randn(1, 3, 224, 224)
-    actual_out = model(test_input)
-    return model, test_input, actual_out
-
-
-#############################   Benchmark Tests ####################################
-
-pytest_benchmark_param = pytest.mark.parametrize(
-    ("dynamic", "device"),
-    [
-        pytest.param(False, "cpu"),
-        # TODO: Language models are failing for dynamic case..
-        pytest.param(True, "cpu", marks=pytest.mark.skip),
-        pytest.param(
-            False,
-            "gpu",
-            marks=pytest.mark.skipif(
-                check_device_drivers("gpu"), reason="nvidia-smi not found"
-            ),
-        ),
-        pytest.param(True, "gpu", marks=pytest.mark.skip),
-        pytest.param(
-            False,
-            "vulkan",
-            marks=pytest.mark.skipif(
-                check_device_drivers("vulkan"),
-                reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases",
-            ),
-        ),
-        pytest.param(
-            True,
-            "vulkan",
-            marks=pytest.mark.skipif(
-                check_device_drivers("vulkan"),
-                reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases",
-            ),
-        ),
-    ],
-)
-
-
-@pytest.mark.skipif(
-    importlib.util.find_spec("iree.tools") is None,
-    reason="Cannot find tools to import TF",
-)
-@pytest_benchmark_param
-def test_bench_minilm_torch(dynamic, device):
-    model, test_input, act_out = get_hf_model(
-        "microsoft/MiniLM-L12-H384-uncased"
-    )
-    shark_module = SharkInference(
-        model,
-        (test_input,),
-        device=device,
-        dynamic=dynamic,
-        jit_trace=True,
-        benchmark_mode=True,
-    )
-    try:
-        # If becnhmarking succesful, assert success/True.
-        shark_module.compile()
-        shark_module.benchmark_all((test_input,))
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
-
-
-@pytest.mark.skipif(
-    importlib.util.find_spec("iree.tools") is None,
-    reason="Cannot find tools to import TF",
-)
-@pytest_benchmark_param
-def test_bench_distilbert(dynamic, device):
-    model, test_input, act_out = get_TFhf_model("distilbert-base-uncased")
-    shark_module = SharkInference(
-        model,
-        test_input,
-        device=device,
-        dynamic=dynamic,
-        jit_trace=True,
-        benchmark_mode=True,
-    )
-    try:
-        # If becnhmarking succesful, assert success/True.
-        shark_module.set_frontend("tensorflow")
-        shark_module.compile()
-        shark_module.benchmark_all(test_input)
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
-
-
-@pytest.mark.skip(reason="XLM Roberta too large to test.")
-@pytest_benchmark_param
-def test_bench_xlm_roberta(dynamic, device):
-    model, test_input, act_out = get_TFhf_model("xlm-roberta-base")
-    shark_module = SharkInference(
-        model,
-        test_input,
-        device=device,
-        dynamic=dynamic,
-        jit_trace=True,
-        benchmark_mode=True,
-    )
-    try:
-        # If becnhmarking succesful, assert success/True.
-        shark_module.set_frontend("tensorflow")
-        shark_module.compile()
-        shark_module.benchmark_all(test_input)
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
--- a/benchmarks/tests/test_hf_benchmark.py
+++ b/benchmarks/tests/test_hf_benchmark.py
@@ -1,45 +0,0 @@
-import torch
-from benchmarks.hf_transformer import SharkHFBenchmarkRunner
-import importlib
-import pytest
-
-torch.manual_seed(0)
-
-############################# HF Benchmark Tests ####################################
-
-# Test running benchmark module without failing.
-pytest_benchmark_param = pytest.mark.parametrize(
-    ("dynamic", "device"),
-    [
-        pytest.param(False, "cpu"),
-        # TODO: Language models are failing for dynamic case..
-        pytest.param(True, "cpu", marks=pytest.mark.skip),
-    ],
-)
-
-
-@pytest.mark.skipif(
-    importlib.util.find_spec("onnxruntime") is None,
-    reason="Cannot find ONNXRUNTIME.",
-)
-@pytest_benchmark_param
-def test_HFbench_minilm_torch(dynamic, device):
-    model_name = "bert-base-uncased"
-    test_input = torch.randint(2, (1, 128))
-    try:
-        shark_module = SharkHFBenchmarkRunner(
-            model_name,
-            (test_input,),
-            jit_trace=True,
-            dynamic=dynamic,
-            device=device,
-        )
-        shark_module.benchmark_c()
-        shark_module.benchmark_python((test_input,))
-        shark_module.benchmark_torch(test_input)
-        shark_module.benchmark_onnx(test_input)
-        # If becnhmarking succesful, assert success/True.
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
--- a/build_tools/image_comparison.py
+++ b/build_tools/image_comparison.py
@@ -1,51 +0,0 @@
-import argparse
-from PIL import Image
-import numpy as np
-
-import requests
-import shutil
-import os
-import subprocess
-
-parser = argparse.ArgumentParser()
-
-parser.add_argument("-n", "--newfile")
-parser.add_argument(
-    "-g",
-    "--golden_url",
-    default="https://storage.googleapis.com/shark_tank/testdata/cyberpunk_fores_42_0_230119_021148.png",
-)
-
-
-def get_image(url, local_filename):
-    res = requests.get(url, stream=True)
-    if res.status_code == 200:
-        with open(local_filename, "wb") as f:
-            shutil.copyfileobj(res.raw, f)
-
-
-def compare_images(new_filename, golden_filename):
-    new = np.array(Image.open(new_filename)) / 255.0
-    golden = np.array(Image.open(golden_filename)) / 255.0
-    diff = np.abs(new - golden)
-    mean = np.mean(diff)
-    if mean > 0.1:
-        if os.name != "nt":
-            subprocess.run(
-                [
-                    "gsutil",
-                    "cp",
-                    new_filename,
-                    "gs://shark_tank/testdata/builder/",
-                ]
-            )
-        raise SystemExit("new and golden not close")
-    else:
-        print("SUCCESS")
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-    tempfile_name = os.path.join(os.getcwd(), "golden.png")
-    get_image(args.golden_url, tempfile_name)
-    compare_images(args.newfile, tempfile_name)
--- a/build_tools/populate_sharktank_ci.sh
+++ b/build_tools/populate_sharktank_ci.sh
@@ -1,5 +0,0 @@
-#!/bin/bash
-
-IMPORTER=1 BENCHMARK=1 ./setup_venv.sh
-source $GITHUB_WORKSPACE/shark.venv/bin/activate
-python generate_sharktank.py
--- a/build_tools/scrape_releases.py
+++ b/build_tools/scrape_releases.py
@@ -1,37 +0,0 @@
-"""Scrapes the github releases API to generate a static pip-install-able releases page.
-
-See https://github.com/llvm/torch-mlir/issues/1374
-"""
-import argparse
-import json
-
-import requests
-
-# Parse arguments
-parser = argparse.ArgumentParser()
-parser.add_argument("owner", type=str)
-parser.add_argument("repo", type=str)
-args = parser.parse_args()
-
-# Get releases
-response = requests.get(
-    f"https://api.github.com/repos/{args.owner}/{args.repo}/releases"
-)
-body = json.loads(response.content)
-
-# Parse releases
-releases = []
-for row in body:
-    for asset in row["assets"]:
-        releases.append((asset["name"], asset["browser_download_url"]))
-
-# Output HTML
-html = """<!DOCTYPE html>
-<html>
-  <body>
-"""
-for name, url in releases:
-    html += f"    <a href='{url}'>{name}</a><br />\n"
-html += """  </body>
-</html>"""
-print(html)
--- a/build_tools/stable_diffusion_testing.py
+++ b/build_tools/stable_diffusion_testing.py
@@ -1,143 +0,0 @@
-import os
-from sys import executable
-import subprocess
-from apps.stable_diffusion.src.utils.resources import (
-    get_json_file,
-)
-from datetime import datetime as dt
-from shark.shark_downloader import download_public_file
-from image_comparison import compare_images
-import argparse
-from glob import glob
-import shutil
-import requests
-
-model_config_dicts = get_json_file(
-    os.path.join(
-        os.getcwd(),
-        "apps/stable_diffusion/src/utils/resources/model_config.json",
-    )
-)
-
-
-def get_inpaint_inputs():
-    os.mkdir("./test_images/inputs")
-    img_url = (
-        "https://huggingface.co/datasets/diffusers/test-arrays/resolve"
-        "/main/stable_diffusion_inpaint/input_bench_image.png"
-    )
-    mask_url = (
-        "https://huggingface.co/datasets/diffusers/test-arrays/resolve"
-        "/main/stable_diffusion_inpaint/input_bench_mask.png"
-    )
-    img = requests.get(img_url)
-    mask = requests.get(mask_url)
-    open("./test_images/inputs/image.png", "wb").write(img.content)
-    open("./test_images/inputs/mask.png", "wb").write(mask.content)
-
-
-def test_loop(device="vulkan", beta=False, extra_flags=[]):
-    # Get golden values from tank
-    shutil.rmtree("./test_images", ignore_errors=True)
-    os.mkdir("./test_images")
-    os.mkdir("./test_images/golden")
-    get_inpaint_inputs()
-    hf_model_names = model_config_dicts[0].values()
-    tuned_options = ["--no-use_tuned", "--use_tuned"]
-    import_options = ["--import_mlir", "--no-import_mlir"]
-    prompt_text = "--prompt=cyberpunk forest by Salvador Dali"
-    inpaint_prompt_text = "--prompt=Face of a yellow cat, high resolution, sitting on a park bench"
-    if os.name == "nt":
-        prompt_text = '--prompt="cyberpunk forest by Salvador Dali"'
-        inpaint_prompt_text = '--prompt="Face of a yellow cat, high resolution, sitting on a park bench"'
-    if beta:
-        extra_flags.append("--beta_models=True")
-    for import_opt in import_options:
-        for model_name in hf_model_names:
-            if model_name == "Linaqruf/anything-v3.0":
-                continue
-            for use_tune in tuned_options:
-                command = (
-                    [
-                        executable,  # executable is the python from the venv used to run this
-                        "apps/stable_diffusion/scripts/txt2img.py",
-                        "--device=" + device,
-                        prompt_text,
-                        "--negative_prompts=" + '""',
-                        "--seed=42",
-                        import_opt,
-                        "--output_dir="
-                        + os.path.join(os.getcwd(), "test_images", model_name),
-                        "--hf_model_id=" + model_name,
-                        use_tune,
-                    ]
-                    if "inpainting" not in model_name
-                    else [
-                        "python",
-                        "apps/stable_diffusion/scripts/inpaint.py",
-                        "--device=" + device,
-                        inpaint_prompt_text,
-                        "--negative_prompts=" + '""',
-                        "--img_path=./test_images/inputs/image.png",
-                        "--mask_path=./test_images/inputs/mask.png",
-                        "--seed=42",
-                        "--import_mlir",
-                        "--output_dir="
-                        + os.path.join(os.getcwd(), "test_images", model_name),
-                        "--hf_model_id=" + model_name,
-                        use_tune,
-                    ]
-                )
-                command += extra_flags
-                if os.name == "nt":
-                    command = " ".join(command)
-                generated_image = not subprocess.call(
-                    command, stdout=subprocess.DEVNULL
-                )
-                if os.name != "nt":
-                    command = " ".join(command)
-                if generated_image:
-                    print(command)
-                    print("Successfully generated image")
-                    os.makedirs(
-                        "./test_images/golden/" + model_name, exist_ok=True
-                    )
-                    download_public_file(
-                        "gs://shark_tank/testdata/golden/" + model_name,
-                        "./test_images/golden/" + model_name,
-                    )
-                    test_file_path = os.path.join(
-                        os.getcwd(),
-                        "test_images",
-                        model_name,
-                        "generated_imgs",
-                        dt.now().strftime("%Y%m%d"),
-                        "*.png",
-                    )
-                    test_file = glob(test_file_path)[0]
-
-                    golden_path = (
-                        "./test_images/golden/" + model_name + "/*.png"
-                    )
-                    golden_file = glob(golden_path)[0]
-                    compare_images(test_file, golden_file)
-                else:
-                    print(command)
-                    print("failed to generate image for this configuration")
-                    if "2_1_base" in model_name:
-                        print("failed a known successful model.")
-                        exit(1)
-
-
-parser = argparse.ArgumentParser()
-
-parser.add_argument("-d", "--device", default="vulkan")
-parser.add_argument(
-    "-b", "--beta", action=argparse.BooleanOptionalAction, default=False
-)
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-    print(args)
-    test_loop(args.device, args.beta, [])
--- a/conftest.py
+++ b/conftest.py
@@ -1,72 +0,0 @@
-def pytest_addoption(parser):
-    # Attaches SHARK command-line arguments to the pytest machinery.
-    parser.addoption(
-        "--benchmark",
-        action="store_true",
-        default="False",
-        help="Pass option to benchmark and write results.csv",
-    )
-    parser.addoption(
-        "--onnx_bench",
-        action="store_true",
-        default="False",
-        help="Add ONNX benchmark results to pytest benchmarks.",
-    )
-    parser.addoption(
-        "--tf32",
-        action="store_true",
-        default="False",
-        help="Use TensorFloat-32 calculations.",
-    )
-    parser.addoption(
-        "--save_repro",
-        action="store_true",
-        default="False",
-        help="Pass option to save reproduction artifacts to SHARK/shark_tmp/test_case/",
-    )
-    parser.addoption(
-        "--save_fails",
-        action="store_true",
-        default="False",
-        help="Save reproduction artifacts for a test case only if it fails. Default is False.",
-    )
-    parser.addoption(
-        "--ci",
-        action="store_true",
-        default="False",
-        help="Enables uploading of reproduction artifacts upon test case failure during iree-compile or validation. Must be passed with --ci_sha option ",
-    )
-    parser.addoption(
-        "--update_tank",
-        action="store_true",
-        default="False",
-        help="Update local shark tank with latest artifacts.",
-    )
-    parser.addoption(
-        "--ci_sha",
-        action="store",
-        default="None",
-        help="Passes the github SHA of the CI workflow to include in google storage directory for reproduction artifacts.",
-    )
-    parser.addoption(
-        "--local_tank_cache",
-        action="store",
-        default="",
-        help="Specify the directory in which all downloaded shark_tank artifacts will be cached.",
-    )
-    parser.addoption(
-        "--tank_url",
-        type=str,
-        default="gs://shark_tank/latest",
-        help="URL to bucket from which to download SHARK tank artifacts. Default is gs://shark_tank/latest",
-    )
-    parser.addoption(
-        "--benchmark_dispatches",
-        default=None,
-        help="Benchmark individual dispatch kernels produced by IREE compiler. Use 'All' for all, or specific dispatches e.g. '0 1 2 10'",
-    )
-    parser.addoption(
-        "--dispatch_benchmarks_dir",
-        default="./temp_dispatch_benchmarks",
-        help="Directory in which dispatch benchmarks are saved.",
-    )
--- a/cpp/.gitignore
+++ b/cpp/.gitignore
@@ -1,3 +0,0 @@
-*.mlir
-*.vmfb
-*.ini
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -1,52 +0,0 @@
-# Copyright 2022 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-cmake_minimum_required(VERSION 3.21...3.23)
-
-#-------------------------------------------------------------------------------
-# Project configuration
-#-------------------------------------------------------------------------------
-
-project(iree-samples C CXX)
-set(CMAKE_C_STANDARD 11)
-set(CMAKE_CXX_STANDARD 17)
-set_property(GLOBAL PROPERTY USE_FOLDERS ON)
-
-#-------------------------------------------------------------------------------
-# Core project dependency
-#-------------------------------------------------------------------------------
-
-message(STATUS "Fetching core IREE repo (this may take a few minutes)...")
-# Note: for log output, set -DFETCHCONTENT_QUIET=OFF,
-# see https://gitlab.kitware.com/cmake/cmake/-/issues/18238#note_440475
-
-include(FetchContent)
-
-FetchContent_Declare(
-  iree
-  GIT_REPOSITORY https://github.com/nod-ai/shark-runtime.git
-  GIT_TAG shark 
-  GIT_SUBMODULES_RECURSE OFF
-  GIT_SHALLOW OFF
-  GIT_PROGRESS ON
-  USES_TERMINAL_DOWNLOAD ON
-)
-
-# Extend module path to find MLIR CMake modules.
-list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_BINARY_DIR}/lib/cmake/mlir")
-
-# Disable core project features not needed for these out of tree samples.
-set(IREE_BUILD_TESTS OFF CACHE BOOL "" FORCE)
-set(IREE_BUILD_SAMPLES OFF CACHE BOOL "" FORCE)
-
-FetchContent_MakeAvailable(iree)
-FetchContent_GetProperties(iree SOURCE_DIR IREE_SOURCE_DIR)
-
-#-------------------------------------------------------------------------------
-# Individual samples
-#-------------------------------------------------------------------------------
-
-add_subdirectory(vulkan_gui)
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -1,82 +0,0 @@
-# SHARK C/C++ Samples
-
-These C/C++ samples can be built using CMake. The samples depend on the main
-SHARK-Runtime project's C/C++ sources, including both the runtime and the compiler. 
-
-Individual samples may require additional dependencies. Watch CMake's output
-for information about which you are missing for individual samples.
-
-On Windows we recommend using https://github.com/microsoft/vcpkg to download packages for
-your system. The general setup flow looks like
-
-*Install and activate SHARK*
-
-```bash
-source shark.venv/bin/activate #follow main repo instructions to setup your venv
-```
-
-*Install Dependencies*
-
-```bash
-vcpkg install [library] --triplet [your platform]
-vcpkg integrate install
-
-# Then pass `-DCMAKE_TOOLCHAIN_FILE=[check logs for path]` when configuring CMake
-```
-
-In Ubuntu Linux you can install
-
-```bash
-sudo apt install libsdl2-dev
-```
-
-*Build*
-```bash
-cd cpp
-cmake -GNinja -B build/
-cmake --build build/
-```
-
-*Prepare the model*
-```bash
-wget https://storage.googleapis.com/shark_tank/latest/resnet50_tf/resnet50_tf.mlir
-iree-compile --iree-input-type=mhlo --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --iree-llvm-embedded-linker-path=`python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])'`/iree/compiler/tools/../_mlir_libs/iree-lld --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=ist/core-reproducer.mlir --iree-llvm-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 resnet50_tf.mlir -o resnet50_tf.vmfb
-```
-*Prepare the input*
-
-```bash
-python save_img.py
-```
-Note that this requires tensorflow, e.g.
-```bash
-python -m pip install tensorflow
-```
-
-*Run the vulkan_gui*
-```bash
-./build/vulkan_gui/iree-samples-resnet-vulkan-gui
-```
-
-## Other models
-A tool for benchmarking other models is built and can be invoked with a command like the following
-```bash
-./build/vulkan_gui/iree-vulkan-gui --module-file=path/to/.vmfb --function_input=...
-```
-see `./build/vulkan_gui/iree-vulkan-gui --help` for an explanation on the function input. For example, stable diffusion unet can be tested with the following commands:
-```bash
-wget https://storage.googleapis.com/shark_tank/quinn/stable_diff_tf/stable_diff_tf.mlir
-iree-compile --iree-input-type=mhlo --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvm-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 stable_diff_tf.mlir -o stable_diff_tf.vmfb
-./build/vulkan_gui/iree-vulkan-gui --module-file=stable_diff_tf.vmfb --function_input=2x4x64x64xf32 --function_input=1xf32 --function_input=2x77x768xf32
-```
-VAE and Autoencoder are also available
-```bash
-# VAE
-wget https://storage.googleapis.com/shark_tank/quinn/stable_diff_tf/vae_tf/vae.mlir
-iree-compile --iree-input-type=mhlo --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvm-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 vae.mlir -o vae.vmfb
-./build/vulkan_gui/iree-vulkan-gui --module-file=stable_diff_tf.vmfb --function_input=1x4x64x64xf32
-
-# CLIP Autoencoder
-wget https://storage.googleapis.com/shark_tank/quinn/stable_diff_tf/clip_tf/clip_autoencoder.mlir
-iree-compile --iree-input-type=mhlo --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvm-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux --iree-stream-resource-index-bits=64 --iree-vm-target-index-bits=64 clip_autoencoder.mlir -o clip_autoencoder.vmfb
-./build/vulkan_gui/iree-vulkan-gui --module-file=stable_diff_tf.vmfb --function_input=1x77xi32 --function_input=1x77xi32
-```
--- a/cpp/dog_imagenet.jpg
+++ b/cpp/dog_imagenet.jpg
--- a/cpp/save_img.py
+++ b/cpp/save_img.py
@@ -1,18 +0,0 @@
-import numpy as np
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-
-
-def load_and_preprocess_image(fname: str):
-    image = tf.io.read_file(fname)
-    image = tf.image.decode_image(image, channels=3)
-    image = tf.image.resize(image, (224, 224))
-    image = image[tf.newaxis, :]
-    # preprocessing pipeline
-    input_tensor = tf.keras.applications.resnet50.preprocess_input(image)
-    return input_tensor
-
-
-data = load_and_preprocess_image("dog_imagenet.jpg").numpy()
-
-data.tofile("dog.bin")
--- a/cpp/vision_inference/CMakeLists.txt
+++ b/cpp/vision_inference/CMakeLists.txt
@@ -1,84 +0,0 @@
-# Copyright 2022 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-if(NOT IREE_TARGET_BACKEND_LLVM_CPU OR
-   NOT IREE_HAL_EXECUTABLE_LOADER_EMBEDDED_ELF)
-  message(STATUS "Missing LLVM backend and/or embeddded elf loader, skipping vision_inference sample")
-  return()
-endif()
-
-# vcpkg install stb
-#   tested with version 2021-09-10
-find_package(Stb)
-if(NOT Stb_FOUND)
-  message(STATUS "Could not find Stb, skipping vision inference sample")
-  return()
-endif()
-
-# Compile mnist.mlir to mnist.vmfb.
-set(_COMPILE_TOOL_EXECUTABLE $<TARGET_FILE:iree-compile>)
-set(_COMPILE_ARGS)
-list(APPEND _COMPILE_ARGS "--iree-input-type=mhlo")
-list(APPEND _COMPILE_ARGS "--iree-hal-target-backends=llvm-cpu")
-list(APPEND _COMPILE_ARGS "${IREE_SOURCE_DIR}/samples/models/mnist.mlir")
-list(APPEND _COMPILE_ARGS "-o")
-list(APPEND _COMPILE_ARGS "mnist.vmfb")
-add_custom_command(
-  OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/mnist.vmfb
-  COMMAND ${_COMPILE_TOOL_EXECUTABLE} ${_COMPILE_ARGS}
-  DEPENDS ${_COMPILE_TOOL_EXECUTABLE} "${IREE_SOURCE_DIR}/samples/models/mnist.mlir"
-)
-# Embed mnist.vmfb into a C file as mnist_bytecode_module_c.[h/c]
-set(_EMBED_DATA_EXECUTABLE $<TARGET_FILE:generate_embed_data>)
-set(_EMBED_ARGS)
-list(APPEND _EMBED_ARGS "--output_header=mnist_bytecode_module_c.h")
-list(APPEND _EMBED_ARGS "--output_impl=mnist_bytecode_module_c.c")
-list(APPEND _EMBED_ARGS "--identifier=iree_samples_vision_inference_mnist_bytecode_module")
-list(APPEND _EMBED_ARGS "--flatten")
-list(APPEND _EMBED_ARGS "${CMAKE_CURRENT_BINARY_DIR}/mnist.vmfb")
-add_custom_command(
-  OUTPUT "mnist_bytecode_module_c.h" "mnist_bytecode_module_c.c"
-  COMMAND ${_EMBED_DATA_EXECUTABLE} ${_EMBED_ARGS}
-  DEPENDS ${_EMBED_DATA_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/mnist.vmfb
-)
-# Define a library target for mnist_bytecode_module_c.
-add_library(iree_samples_vision_inference_mnist_bytecode_module_c OBJECT)
-target_sources(iree_samples_vision_inference_mnist_bytecode_module_c
-  PRIVATE
-    mnist_bytecode_module_c.h
-    mnist_bytecode_module_c.c
-)
-
-# Define the sample executable.
-set(_NAME "iree-run-mnist-module")
-add_executable(${_NAME} "")
-target_sources(${_NAME}
-  PRIVATE
-    "image_util.h"
-    "image_util.c"
-    "iree-run-mnist-module.c"
-)
-set_target_properties(${_NAME} PROPERTIES OUTPUT_NAME "iree-run-mnist-module")
-target_include_directories(${_NAME} PUBLIC
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
-)
-target_include_directories(${_NAME} PRIVATE
-    ${Stb_INCLUDE_DIR}
-)
-target_link_libraries(${_NAME}
-  iree_base_base
-  iree_base_tracing
-  iree_hal_hal
-  iree_runtime_runtime
-  iree_samples_vision_inference_mnist_bytecode_module_c
-)
-
-# Define a target that copies the test image into the build directory.
-add_custom_target(iree_samples_vision_inference_test_image
-  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/mnist_test.png" "${CMAKE_CURRENT_BINARY_DIR}/mnist_test.png")
-add_dependencies(${_NAME} iree_samples_vision_inference_test_image)
-
-message(STATUS "Configured vision_inference sample successfully")
--- a/cpp/vision_inference/README.md
+++ b/cpp/vision_inference/README.md
@@ -1,8 +0,0 @@
-# Vision Inference Sample (C code)
-
-This sample demonstrates how to run a MNIST handwritten digit detection vision
-model on an image using IREE's C API.
-
-A similar sample is implemented using a Python script and IREE's command line
-tools over in the primary iree repository at
-https://github.com/iree-org/iree/tree/main/samples/vision_inference
--- a/cpp/vision_inference/image_util.c
+++ b/cpp/vision_inference/image_util.c
@@ -1,224 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#include "image_util.h"
-
-#include <math.h>
-
-#include "iree/base/internal/flags.h"
-#include "iree/base/tracing.h"
-
-#define STB_IMAGE_IMPLEMENTATION
-#include "stb_image.h"
-
-iree_status_t iree_tools_utils_pixel_rescaled_to_buffer(
-    const uint8_t* pixel_data, iree_host_size_t buffer_length,
-    const float* input_range, iree_host_size_t range_length,
-    float* out_buffer) {
-  IREE_TRACE_ZONE_BEGIN(z0);
-  if (range_length != 2) {
-    IREE_TRACE_ZONE_END(z0);
-    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                            "range defined as 2-element [min, max] array.");
-  }
-  float input_scale = fabsf(input_range[1] - input_range[0]) / 2.0f;
-  float input_offset = (input_range[0] + input_range[1]) / 2.0f;
-  const float kUint8Mean = 127.5f;
-  for (int i = 0; i < buffer_length; ++i) {
-    out_buffer[i] =
-        (((float)(pixel_data[i])) - kUint8Mean) / kUint8Mean * input_scale +
-        input_offset;
-  }
-  IREE_TRACE_ZONE_END(z0);
-  return iree_ok_status();
-}
-
-iree_status_t iree_tools_utils_load_pixel_data_impl(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    uint8_t** out_pixel_data, iree_host_size_t* out_buffer_length) {
-  int img_dims[3];
-  if (stbi_info(filename.data, img_dims, &(img_dims[1]), &(img_dims[2])) == 0) {
-    return iree_make_status(IREE_STATUS_NOT_FOUND, "can't load image %.*s",
-                            (int)filename.size, filename.data);
-  }
-  if (!(element_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32 ||
-        element_type == IREE_HAL_ELEMENT_TYPE_SINT_8 ||
-        element_type == IREE_HAL_ELEMENT_TYPE_UINT_8)) {
-    char element_type_str[16];
-    IREE_RETURN_IF_ERROR(iree_hal_format_element_type(
-        element_type, sizeof(element_type_str), element_type_str, NULL));
-    return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
-                            "element type %s not supported", element_type_str);
-  }
-  switch (shape_rank) {
-    case 2: {  // Assume tensor <height x width>
-      if (img_dims[2] != 1 || (shape[0] != img_dims[1]) ||
-          (shape[1] != img_dims[0])) {
-        return iree_make_status(
-            IREE_STATUS_INVALID_ARGUMENT,
-            "image size: %dx%dx%d, expected: %" PRIdim "x%" PRIdim, img_dims[0],
-            img_dims[1], img_dims[2], shape[1], shape[0]);
-      }
-      break;
-    }
-    case 3: {  // Assume tensor <height x width x channel>
-      if (shape[0] != img_dims[1] || shape[1] != img_dims[0] ||
-          shape[2] != img_dims[2]) {
-        return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                                "image size: %dx%dx%d, expected: %" PRIdim
-                                "x%" PRIdim "x%" PRIdim,
-                                img_dims[0], img_dims[1], img_dims[2], shape[1],
-                                shape[0], shape[2]);
-      }
-      break;
-    }
-    case 4: {  // Assume tensor <batch x height x width x channel>
-      if (shape[1] != img_dims[1] || shape[2] != img_dims[0] ||
-          shape[3] != img_dims[2]) {
-        return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                                "image size: %dx%dx%d, expected: %" PRIdim
-                                "x%" PRIdim "x%" PRIdim,
-                                img_dims[0], img_dims[1], img_dims[2], shape[2],
-                                shape[1], shape[3]);
-      }
-      break;
-    }
-    default:
-      return iree_make_status(
-          IREE_STATUS_INVALID_ARGUMENT,
-          "Input buffer shape rank %" PRIhsz " not supported", shape_rank);
-  }
-  // Drop the alpha channel if present.
-  int req_ch = (img_dims[2] >= 3) ? 3 : 0;
-  *out_pixel_data = stbi_load(filename.data, img_dims, &(img_dims[1]),
-                              &(img_dims[2]), req_ch);
-  if (*out_pixel_data == NULL) {
-    return iree_make_status(IREE_STATUS_NOT_FOUND, "can't load image %.*s",
-                            (int)filename.size, filename.data);
-  }
-  *out_buffer_length =
-      img_dims[0] * img_dims[1] * (img_dims[2] > 3 ? 3 : img_dims[2]);
-  return iree_ok_status();
-}
-
-iree_status_t iree_tools_utils_load_pixel_data(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    uint8_t** out_pixel_data, iree_host_size_t* out_buffer_length) {
-  IREE_TRACE_ZONE_BEGIN(z0);
-  iree_status_t result = iree_tools_utils_load_pixel_data_impl(
-      filename, shape, shape_rank, element_type, out_pixel_data,
-      out_buffer_length);
-  IREE_TRACE_ZONE_END(z0);
-  return result;
-}
-
-iree_status_t iree_tools_utils_buffer_view_from_image(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_allocator_t* allocator, iree_hal_buffer_view_t** out_buffer_view) {
-  IREE_TRACE_ZONE_BEGIN(z0);
-  *out_buffer_view = NULL;
-  if (element_type != IREE_HAL_ELEMENT_TYPE_SINT_8 &&
-      element_type != IREE_HAL_ELEMENT_TYPE_UINT_8) {
-    IREE_TRACE_ZONE_END(z0);
-    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                            "element type should be i8 or u8");
-  }
-
-  iree_status_t result;
-  uint8_t* pixel_data = NULL;
-  iree_host_size_t buffer_length;
-  result = iree_tools_utils_load_pixel_data(
-      filename, shape, shape_rank, element_type, &pixel_data, &buffer_length);
-  if (iree_status_is_ok(result)) {
-    iree_host_size_t element_byte =
-        iree_hal_element_dense_byte_count(element_type);
-    // SINT_8 and UINT_8 perform direct buffer wrap.
-    result = iree_hal_buffer_view_allocate_buffer(
-        allocator, shape_rank, shape, element_type,
-        IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
-        (iree_hal_buffer_params_t){
-            .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,
-            .access = IREE_HAL_MEMORY_ACCESS_READ,
-            .usage = IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE |
-                     IREE_HAL_BUFFER_USAGE_TRANSFER,
-        },
-        iree_make_const_byte_span(pixel_data, element_byte * buffer_length),
-        out_buffer_view);
-  }
-  stbi_image_free(pixel_data);
-  IREE_TRACE_ZONE_END(z0);
-  return result;
-}
-
-typedef struct iree_tools_utils_buffer_view_load_params_t {
-  const uint8_t* pixel_data;
-  iree_host_size_t pixel_data_length;
-  const float* input_range;
-  iree_host_size_t input_range_length;
-} iree_tools_utils_buffer_view_load_params_t;
-static iree_status_t iree_tools_utils_buffer_view_load_image_rescaled(
-    iree_hal_buffer_mapping_t* mapping, void* user_data) {
-  iree_tools_utils_buffer_view_load_params_t* params =
-      (iree_tools_utils_buffer_view_load_params_t*)user_data;
-  return iree_tools_utils_pixel_rescaled_to_buffer(
-      params->pixel_data, params->pixel_data_length, params->input_range,
-      params->input_range_length, (float*)mapping->contents.data);
-}
-
-iree_status_t iree_tools_utils_buffer_view_from_image_rescaled(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_allocator_t* allocator, const float* input_range,
-    iree_host_size_t input_range_length,
-    iree_hal_buffer_view_t** out_buffer_view) {
-  IREE_TRACE_ZONE_BEGIN(z0);
-  *out_buffer_view = NULL;
-  if (element_type != IREE_HAL_ELEMENT_TYPE_FLOAT_32) {
-    IREE_TRACE_ZONE_END(z0);
-    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                            "element type should be f32");
-  }
-
-  // Classic row-major image layout.
-  iree_hal_encoding_type_t encoding_type =
-      IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR;
-
-  // Load pixel data from the file into a new host memory allocation (the only
-  // interface stb_image provides). A real application would want to use the
-  // generation callback to directly decode the image into the target mapped
-  // device buffer.
-  uint8_t* pixel_data = NULL;
-  iree_host_size_t buffer_length = 0;
-  IREE_RETURN_AND_END_ZONE_IF_ERROR(
-      z0, iree_tools_utils_load_pixel_data(filename, shape, shape_rank,
-                                           element_type, &pixel_data,
-                                           &buffer_length));
-
-  iree_tools_utils_buffer_view_load_params_t params = {
-      .pixel_data = pixel_data,
-      .pixel_data_length = buffer_length,
-      .input_range = input_range,
-      .input_range_length = input_range_length,
-  };
-  iree_status_t status = iree_hal_buffer_view_generate_buffer(
-      allocator, shape_rank, shape, element_type, encoding_type,
-      (iree_hal_buffer_params_t){
-          .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL |
-                  IREE_HAL_MEMORY_TYPE_HOST_VISIBLE,
-          .usage = IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE |
-                   IREE_HAL_BUFFER_USAGE_TRANSFER |
-                   IREE_HAL_BUFFER_USAGE_MAPPING,
-      },
-      iree_tools_utils_buffer_view_load_image_rescaled, &params,
-      out_buffer_view);
-
-  stbi_image_free(pixel_data);
-  IREE_TRACE_ZONE_END(z0);
-  return status;
-}
--- a/cpp/vision_inference/image_util.h
+++ b/cpp/vision_inference/image_util.h
@@ -1,77 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef IREE_SAMPLES_VISION_INFERENCE_IMAGE_UTIL_H_
-#define IREE_SAMPLES_VISION_INFERENCE_IMAGE_UTIL_H_
-
-#include "iree/base/api.h"
-#include "iree/hal/api.h"
-#include "iree/hal/buffer_view.h"
-
-#if __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-// Loads the image at |filename| into |out_pixel_data| and sets
-// |out_buffer_length| to its length.
-//
-// The image dimension must match the width, height, and channel in|shape|,
-// while 2 <= |shape_rank| <= 4 to match the image tensor format.
-//
-// The file must be in a format supported by stb_image.h.
-// The returned |out_pixel_data| buffer must be released by the caller.
-iree_status_t iree_tools_utils_load_pixel_data(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    uint8_t** out_pixel_data, iree_host_size_t* out_buffer_length);
-
-// Parse the content in an image file in |filename| into a HAL buffer view
-// |out_buffer_view|. |out_buffer_view| properties are defined by |shape|,
-// |shape_rank|, and |element_type|, while being allocated by |allocator|.
-//
-// The |element_type| has to be SINT_8 or UINT_8. For FLOAT_32, use
-// |iree_tools_utils_buffer_view_from_image_rescaled| instead.
-//
-// The returned |out_buffer_view| must be released by the caller.
-iree_status_t iree_tools_utils_buffer_view_from_image(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_allocator_t* allocator, iree_hal_buffer_view_t** out_buffer_view);
-
-// Parse the content in an image file in |filename| into a HAL buffer view
-// |out_buffer_view|. |out_buffer_view| properties are defined by |shape|,
-// |shape_rank|, and |element_type|, while being allocated by |allocator|.
-// The value in |out_buffer_view| is rescaled with |input_range|.
-//
-// The |element_type| has to be FLOAT_32, For SINT_8 or UINT_8, use
-// |iree_tools_utils_buffer_view_from_image| instead.
-//
-// The returned |out_buffer_view| must be released by the caller.
-iree_status_t iree_tools_utils_buffer_view_from_image_rescaled(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_allocator_t* allocator, const float* input_range,
-    iree_host_size_t input_range_length,
-    iree_hal_buffer_view_t** out_buffer_view);
-
-// Normalize uint8_t |pixel_data| of the size |buffer_length| to float buffer
-// |out_buffer| with the range |input_range|.
-//
-// float32_x = (uint8_x - 127.5) / 127.5 * input_scale + input_offset, where
-// input_scale = abs(|input_range[0]| - |input_range[1]| / 2
-// input_offset = |input_range[0]| + |input_range[1]| / 2
-//
-// |out_buffer| needs to be allocated before the call.
-iree_status_t iree_tools_utils_pixel_rescaled_to_buffer(
-    const uint8_t* pixel_data, iree_host_size_t pixel_count,
-    const float* input_range, iree_host_size_t input_range_length,
-    float* out_buffer);
-
-#if __cplusplus
-}
-#endif  // __cplusplus
-
-#endif  // IREE_SAMPLES_VISION_INFERENCE_IMAGE_UTIL_H_
--- a/cpp/vision_inference/iree-run-mnist-module.c
+++ b/cpp/vision_inference/iree-run-mnist-module.c
@@ -1,121 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-// This sample uses image_util to load a hand-written image as an
-// iree_hal_buffer_view_t then passes it to the bytecode module built from
-// mnist.mlir on the CPU backend with the local-task driver.
-
-#include <float.h>
-
-#include "image_util.h"
-#include "iree/runtime/api.h"
-#include "mnist_bytecode_module_c.h"
-
-iree_status_t Run(const iree_string_view_t image_path) {
-  iree_runtime_instance_options_t instance_options;
-  iree_runtime_instance_options_initialize(IREE_API_VERSION_LATEST,
-                                           &instance_options);
-  iree_runtime_instance_options_use_all_available_drivers(&instance_options);
-  iree_runtime_instance_t* instance = NULL;
-  IREE_RETURN_IF_ERROR(iree_runtime_instance_create(
-      &instance_options, iree_allocator_system(), &instance));
-
-  // TODO(#5724): move device selection into the compiled modules.
-  iree_hal_device_t* device = NULL;
-  IREE_RETURN_IF_ERROR(iree_runtime_instance_try_create_default_device(
-      instance, iree_make_cstring_view("local-task"), &device));
-
-  // Create one session per loaded module to hold the module state.
-  iree_runtime_session_options_t session_options;
-  iree_runtime_session_options_initialize(&session_options);
-  iree_runtime_session_t* session = NULL;
-  IREE_RETURN_IF_ERROR(iree_runtime_session_create_with_device(
-      instance, &session_options, device,
-      iree_runtime_instance_host_allocator(instance), &session));
-  iree_hal_device_release(device);
-
-  const struct iree_file_toc_t* module_file =
-      iree_samples_vision_inference_mnist_bytecode_module_create();
-
-  IREE_RETURN_IF_ERROR(iree_runtime_session_append_bytecode_module_from_memory(
-      session, iree_make_const_byte_span(module_file->data, module_file->size),
-      iree_allocator_null()));
-
-  iree_runtime_call_t call;
-  IREE_RETURN_IF_ERROR(iree_runtime_call_initialize_by_name(
-      session, iree_make_cstring_view("module.predict"), &call));
-
-  // Prepare the input hal buffer view with image_util library.
-  // The input of the mmist model is single 28x28 pixel image as a
-  // tensor<1x28x28x1xf32>, with pixels in [0.0, 1.0].
-  iree_hal_buffer_view_t* buffer_view = NULL;
-  iree_hal_dim_t buffer_shape[] = {1, 28, 28, 1};
-  iree_hal_element_type_t hal_element_type = IREE_HAL_ELEMENT_TYPE_FLOAT_32;
-  float input_range[2] = {0.0f, 1.0f};
-  IREE_RETURN_IF_ERROR(
-      iree_tools_utils_buffer_view_from_image_rescaled(
-          image_path, buffer_shape, IREE_ARRAYSIZE(buffer_shape),
-          hal_element_type, iree_hal_device_allocator(device), input_range,
-          IREE_ARRAYSIZE(input_range), &buffer_view),
-      "load image");
-  IREE_RETURN_IF_ERROR(
-      iree_runtime_call_inputs_push_back_buffer_view(&call, buffer_view));
-  iree_hal_buffer_view_release(buffer_view);
-
-  IREE_RETURN_IF_ERROR(iree_runtime_call_invoke(&call, /*flags=*/0));
-
-  // Get the result buffers from the invocation.
-  iree_hal_buffer_view_t* ret_buffer_view = NULL;
-  IREE_RETURN_IF_ERROR(
-      iree_runtime_call_outputs_pop_front_buffer_view(&call, &ret_buffer_view));
-
-  // Read back the results. The output of the mnist model is a 1x10 prediction
-  // confidence values for each digit in [0, 9].
-  float predictions[1 * 10] = {0.0f};
-  IREE_RETURN_IF_ERROR(iree_hal_device_transfer_d2h(
-      iree_runtime_session_device(session),
-      iree_hal_buffer_view_buffer(ret_buffer_view), 0, predictions,
-      sizeof(predictions), IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT,
-      iree_infinite_timeout()));
-  iree_hal_buffer_view_release(ret_buffer_view);
-
-  // Get the highest index from the output.
-  float result_val = FLT_MIN;
-  int result_idx = 0;
-  for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(predictions); ++i) {
-    if (predictions[i] > result_val) {
-      result_val = predictions[i];
-      result_idx = i;
-    }
-  }
-  fprintf(stdout, "Detected number: %d\n", result_idx);
-
-  iree_runtime_call_deinitialize(&call);
-  iree_runtime_session_release(session);
-  iree_runtime_instance_release(instance);
-  return iree_ok_status();
-}
-
-int main(int argc, char** argv) {
-  if (argc > 2) {
-    fprintf(stderr, "Usage: iree-run-mnist-module <image file>\n");
-    return -1;
-  }
-  iree_string_view_t image_path;
-  if (argc == 1) {
-    image_path = iree_make_cstring_view("mnist_test.png");
-  } else {
-    image_path = iree_make_cstring_view(argv[1]);
-  }
-  iree_status_t result = Run(image_path);
-  if (!iree_status_is_ok(result)) {
-    iree_status_fprint(stderr, result);
-    iree_status_ignore(result);
-    return -1;
-  }
-  iree_status_ignore(result);
-  return 0;
-}
--- a/cpp/vision_inference/mnist_test.png
+++ b/cpp/vision_inference/mnist_test.png
--- a/cpp/vulkan_gui/CMakeLists.txt
+++ b/cpp/vulkan_gui/CMakeLists.txt
@@ -1,116 +0,0 @@
-# Copyright 2022 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-if(NOT IREE_TARGET_BACKEND_VULKAN_SPIRV OR
-   NOT IREE_HAL_DRIVER_VULKAN)
-  message(STATUS "Missing Vulkan backend and/or driver, skipping vulkan_gui sample")
-  return()
-endif()
-
-# This target statically links against Vulkan.
-# One way to achieve this is by installing the Vulkan SDK from
-# https://vulkan.lunarg.com/.
-include(FindVulkan)
-if(NOT Vulkan_FOUND)
-  message(STATUS "Could not find Vulkan, skipping vulkan_gui sample")
-  return()
-endif()
-
-# vcpkg install sdl2[vulkan]
-#   tested with versions 2.0.14#4 - 2.0.22#1
-find_package(SDL2)
-if(NOT SDL2_FOUND)
-  message(STATUS "Could not find SDL2, skipping vulkan_gui sample")
-  return()
-endif()
-
-FetchContent_Declare(
-  imgui
-  GIT_REPOSITORY https://github.com/ocornut/imgui
-  GIT_TAG        master
-)
-
-FetchContent_MakeAvailable(imgui)
-
-# Dear ImGui
-set(IMGUI_DIR ${CMAKE_BINARY_DIR}/_deps/imgui-src)
-message("Looking for Imgui in ${IMGUI_DIR}")
-include_directories(${IMGUI_DIR} ${IMGUI_DIR}/backends ..)
-
-
-function(iree_vulkan_sample)
-
-  cmake_parse_arguments(
-    _RULE
-    ""
-    "NAME"
-    "SRCS"
-    ${ARGN}
-  )
-
-
-  # Define the sample executable.
-  set(_NAME "${_RULE_NAME}")
-  set(SRCS "${_RULE_SRCS}")
-  add_executable(${_NAME} "")
-  target_sources(${_NAME}
-    PRIVATE
-      ${SRCS}
-      "${IMGUI_DIR}/backends/imgui_impl_sdl.cpp"
-      "${IMGUI_DIR}/backends/imgui_impl_vulkan.cpp"
-      "${IMGUI_DIR}/imgui.cpp"
-      "${IMGUI_DIR}/imgui_draw.cpp"
-      "${IMGUI_DIR}/imgui_demo.cpp"
-      "${IMGUI_DIR}/imgui_tables.cpp"
-      "${IMGUI_DIR}/imgui_widgets.cpp"
-  )
-  set_target_properties(${_NAME} PROPERTIES OUTPUT_NAME "${_NAME}")
-  target_include_directories(${_NAME} PUBLIC
-      $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
-  )
-  target_link_libraries(${_NAME}
-    SDL2::SDL2
-    Vulkan::Vulkan
-    iree_runtime_runtime
-    iree_base_internal_main
-    iree_hal_drivers_vulkan_registration_registration
-    iree_modules_hal_hal
-    iree_vm_vm
-    iree_vm_bytecode_module
-    iree_vm_cc
-    iree_tooling_vm_util_cc
-    iree_tooling_context_util
-  )
-
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-    set(_GUI_LINKOPTS "-SUBSYSTEM:CONSOLE")
-  else()
-    set(_GUI_LINKOPTS "")
-  endif()
-
-  target_link_options(${_NAME}
-    PRIVATE
-      ${_GUI_LINKOPTS}
-  )
-endfunction()
-
-iree_vulkan_sample(
-    NAME
-      iree-samples-resnet-vulkan-gui
-
-    SRCS
-      vulkan_resnet_inference_gui.cc
-)
-
-iree_vulkan_sample(
-    NAME
-      iree-vulkan-gui
-
-    SRCS
-      vulkan_inference_gui.cc
-)
-
-message(STATUS "Configured vulkan_gui sample successfully")
--- a/cpp/vulkan_gui/simple_mul.mlir
+++ b/cpp/vulkan_gui/simple_mul.mlir
@@ -1,4 +0,0 @@
-func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> {
-  %0 = "arith.mulf"(%arg0, %arg1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
-  return %0 : tensor<4xf32>
-}
--- a/cpp/vulkan_gui/snail_imagenet.jpg
+++ b/cpp/vulkan_gui/snail_imagenet.jpg
--- a/cpp/vulkan_gui/stb_image.h
+++ b/cpp/vulkan_gui/stb_image.h
--- a/cpp/vulkan_gui/vulkan_inference_gui.cc
+++ b/cpp/vulkan_gui/vulkan_inference_gui.cc
@@ -1,957 +0,0 @@
-// Copyright 2019 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-// Vulkan Graphics + IREE API Integration Sample.
-
-#include <SDL.h>
-#include <SDL_vulkan.h>
-#include <imgui.h>
-#include <imgui_impl_sdl.h>
-#include <imgui_impl_vulkan.h>
-#include <vulkan/vulkan.h>
-
-
-#include <cstring>
-#include <set>
-#include <vector>
-#include <fstream>
-#include <array>
-#include <cstdio>
-#include <cstdlib>
-#include <iterator>
-#include <string>
-#include <utility>
-
-#include "iree/hal/drivers/vulkan/api.h"
-
-// IREE's C API:
-#include "iree/base/api.h"
-#include "iree/hal/api.h"
-#include "iree/hal/drivers/vulkan/registration/driver_module.h"
-#include "iree/modules/hal/module.h"
-#include "iree/vm/api.h"
-#include "iree/vm/bytecode_module.h"
-#include "iree/vm/ref_cc.h"
-
-// iree-run-module
-#include "iree/base/internal/flags.h"
-#include "iree/base/status_cc.h"
-#include "iree/base/tracing.h"
-#include "iree/modules/hal/types.h"
-#include "iree/tooling/comparison.h"
-#include "iree/tooling/context_util.h"
-#include "iree/tooling/vm_util_cc.h"
-
-// Other dependencies (helpers, etc.)
-#include "iree/base/internal/main.h"
-
-#define IMGUI_UNLIMITED_FRAME_RATE
-
-#define STB_IMAGE_IMPLEMENTATION
-#include "stb_image.h"
-
-IREE_FLAG(string, entry_function, "",
-          "Name of a function contained in the module specified by module_file "
-          "to run.");
-
-// TODO(benvanik): move --function_input= flag into a util.
-static iree_status_t parse_function_io(iree_string_view_t flag_name,
-                                       void* storage,
-                                       iree_string_view_t value) {
-  auto* list = (std::vector<std::string>*)storage;
-  list->push_back(std::string(value.data, value.size));
-  return iree_ok_status();
-}
-static void print_function_io(iree_string_view_t flag_name, void* storage,
-                              FILE* file) {
-  auto* list = (std::vector<std::string>*)storage;
-  if (list->empty()) {
-    fprintf(file, "# --%.*s=\n", (int)flag_name.size, flag_name.data);
-  } else {
-    for (size_t i = 0; i < list->size(); ++i) {
-      fprintf(file, "--%.*s=\"%s\"\n", (int)flag_name.size, flag_name.data,
-              list->at(i).c_str());
-    }
-  }
-}
-static std::vector<std::string> FLAG_function_inputs;
-IREE_FLAG_CALLBACK(
-    parse_function_io, print_function_io, &FLAG_function_inputs, function_input,
-    "An input (a) value or (b) buffer of the format:\n"
-    "  (a) scalar value\n"
-    "     value\n"
-    "     e.g.: --function_input=\"3.14\"\n"
-    "  (b) buffer:\n"
-    "     [shape]xtype=[value]\n"
-    "     e.g.: --function_input=\"2x2xi32=1 2 3 4\"\n"
-    "Optionally, brackets may be used to separate the element values:\n"
-    "  2x2xi32=[[1 2][3 4]]\n"
-    "Raw binary files can be read to provide buffer contents:\n"
-    "  2x2xi32=@some/file.bin\n"
-    "numpy npy files (from numpy.save) can be read to provide 1+ values:\n"
-    "  @some.npy\n"
-    "Each occurrence of the flag indicates an input in the order they were\n"
-    "specified on the command line.");
-
-typedef struct iree_file_toc_t {
-  const char* name;             // the file's original name
-  char* data;             // beginning of the file
-  size_t size;                  // length of the file
-} iree_file_toc_t;
-
-bool load_file(const char* filename, char** pOut, size_t* pSize)
-{
-    FILE* f = fopen(filename, "rb");
-    if (f == NULL)
-    {
-        fprintf(stderr, "Can't open %s\n", filename);
-        return false;
-    }
-
-    fseek(f, 0L, SEEK_END);
-    *pSize = ftell(f);
-    fseek(f, 0L, SEEK_SET);
-
-    *pOut = (char*)malloc(*pSize);
-
-    size_t size = fread(*pOut, *pSize, 1, f);
-
-    fclose(f);
-
-    return size != 0;
-}
-
-static VkAllocationCallbacks* g_Allocator = NULL;
-static VkInstance g_Instance = VK_NULL_HANDLE;
-static VkPhysicalDevice g_PhysicalDevice = VK_NULL_HANDLE;
-static VkDevice g_Device = VK_NULL_HANDLE;
-static uint32_t g_QueueFamily = (uint32_t)-1;
-static VkQueue g_Queue = VK_NULL_HANDLE;
-static VkPipelineCache g_PipelineCache = VK_NULL_HANDLE;
-static VkDescriptorPool g_DescriptorPool = VK_NULL_HANDLE;
-
-static ImGui_ImplVulkanH_Window g_MainWindowData;
-static uint32_t g_MinImageCount = 2;
-static bool g_SwapChainRebuild = false;
-static int g_SwapChainResizeWidth = 0;
-static int g_SwapChainResizeHeight = 0;
-
-static void check_vk_result(VkResult err) {
-  if (err == 0) return;
-  fprintf(stderr, "VkResult: %d\n", err);
-  abort();
-}
-
-// Returns the names of the Vulkan layers used for the given IREE
-// |extensibility_set| and |features|.
-std::vector<const char*> GetIreeLayers(
-    iree_hal_vulkan_extensibility_set_t extensibility_set,
-    iree_hal_vulkan_features_t features) {
-  iree_host_size_t required_count;
-  iree_hal_vulkan_query_extensibility_set(
-      features, extensibility_set, /*string_capacity=*/0, &required_count,
-      /*out_string_values=*/NULL);
-  std::vector<const char*> layers(required_count);
-  iree_hal_vulkan_query_extensibility_set(features, extensibility_set,
-                                          layers.size(), &required_count,
-                                          layers.data());
-  return layers;
-}
-
-// Returns the names of the Vulkan extensions used for the given IREE
-// |extensibility_set| and |features|.
-std::vector<const char*> GetIreeExtensions(
-    iree_hal_vulkan_extensibility_set_t extensibility_set,
-    iree_hal_vulkan_features_t features) {
-  iree_host_size_t required_count;
-  iree_hal_vulkan_query_extensibility_set(
-      features, extensibility_set, /*string_capacity=*/0, &required_count,
-      /*out_string_values=*/NULL);
-  std::vector<const char*> extensions(required_count);
-  iree_hal_vulkan_query_extensibility_set(features, extensibility_set,
-                                          extensions.size(), &required_count,
-                                          extensions.data());
-  return extensions;
-}
-
-// Returns the names of the Vulkan extensions used for the given IREE
-// |vulkan_features|.
-std::vector<const char*> GetDeviceExtensions(
-    VkPhysicalDevice physical_device,
-    iree_hal_vulkan_features_t vulkan_features) {
-  std::vector<const char*> iree_required_extensions = GetIreeExtensions(
-      IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_REQUIRED,
-      vulkan_features);
-  std::vector<const char*> iree_optional_extensions = GetIreeExtensions(
-      IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_OPTIONAL,
-      vulkan_features);
-
-  uint32_t extension_count = 0;
-  check_vk_result(vkEnumerateDeviceExtensionProperties(
-      physical_device, nullptr, &extension_count, nullptr));
-  std::vector<VkExtensionProperties> extension_properties(extension_count);
-  check_vk_result(vkEnumerateDeviceExtensionProperties(
-      physical_device, nullptr, &extension_count, extension_properties.data()));
-
-  // Merge extensions lists, including optional and required for simplicity.
-  std::set<const char*> ext_set;
-  ext_set.insert("VK_KHR_swapchain");
-  ext_set.insert(iree_required_extensions.begin(),
-                 iree_required_extensions.end());
-  for (int i = 0; i < iree_optional_extensions.size(); ++i) {
-    const char* optional_extension = iree_optional_extensions[i];
-    for (int j = 0; j < extension_count; ++j) {
-      if (strcmp(optional_extension, extension_properties[j].extensionName) ==
-          0) {
-        ext_set.insert(optional_extension);
-        break;
-      }
-    }
-  }
-  std::vector<const char*> extensions(ext_set.begin(), ext_set.end());
-  return extensions;
-}
-
-std::vector<const char*> GetInstanceLayers(
-    iree_hal_vulkan_features_t vulkan_features) {
-  // Query the layers that IREE wants / needs.
-  std::vector<const char*> required_layers = GetIreeLayers(
-      IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_LAYERS_REQUIRED, vulkan_features);
-  std::vector<const char*> optional_layers = GetIreeLayers(
-      IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_LAYERS_OPTIONAL, vulkan_features);
-
-  // Query the layers that are available on the Vulkan ICD.
-  uint32_t layer_property_count = 0;
-  check_vk_result(
-      vkEnumerateInstanceLayerProperties(&layer_property_count, NULL));
-  std::vector<VkLayerProperties> layer_properties(layer_property_count);
-  check_vk_result(vkEnumerateInstanceLayerProperties(&layer_property_count,
-                                                     layer_properties.data()));
-
-  // Match between optional/required and available layers.
-  std::vector<const char*> layers;
-  for (const char* layer_name : required_layers) {
-    bool found = false;
-    for (const auto& layer_property : layer_properties) {
-      if (std::strcmp(layer_name, layer_property.layerName) == 0) {
-        found = true;
-        layers.push_back(layer_name);
-        break;
-      }
-    }
-    if (!found) {
-      fprintf(stderr, "Required layer %s not available\n", layer_name);
-      abort();
-    }
-  }
-  for (const char* layer_name : optional_layers) {
-    for (const auto& layer_property : layer_properties) {
-      if (std::strcmp(layer_name, layer_property.layerName) == 0) {
-        layers.push_back(layer_name);
-        break;
-      }
-    }
-  }
-
-  return layers;
-}
-
-std::vector<const char*> GetInstanceExtensions(
-    SDL_Window* window, iree_hal_vulkan_features_t vulkan_features) {
-  // Ask SDL for its list of required instance extensions.
-  uint32_t sdl_extensions_count = 0;
-  SDL_Vulkan_GetInstanceExtensions(window, &sdl_extensions_count, NULL);
-  std::vector<const char*> sdl_extensions(sdl_extensions_count);
-  SDL_Vulkan_GetInstanceExtensions(window, &sdl_extensions_count,
-                                   sdl_extensions.data());
-
-  std::vector<const char*> iree_required_extensions = GetIreeExtensions(
-      IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_EXTENSIONS_REQUIRED,
-      vulkan_features);
-  std::vector<const char*> iree_optional_extensions = GetIreeExtensions(
-      IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_EXTENSIONS_OPTIONAL,
-      vulkan_features);
-
-  // Merge extensions lists, including optional and required for simplicity.
-  std::set<const char*> ext_set;
-  ext_set.insert(sdl_extensions.begin(), sdl_extensions.end());
-  ext_set.insert(iree_required_extensions.begin(),
-                 iree_required_extensions.end());
-  ext_set.insert(iree_optional_extensions.begin(),
-                 iree_optional_extensions.end());
-  std::vector<const char*> extensions(ext_set.begin(), ext_set.end());
-  return extensions;
-}
-
-void SetupVulkan(iree_hal_vulkan_features_t vulkan_features,
-                 const char** instance_layers, uint32_t instance_layers_count,
-                 const char** instance_extensions,
-                 uint32_t instance_extensions_count,
-                 const VkAllocationCallbacks* allocator, VkInstance* instance,
-                 uint32_t* queue_family_index,
-                 VkPhysicalDevice* physical_device, VkQueue* queue,
-                 VkDevice* device, VkDescriptorPool* descriptor_pool) {
-  VkResult err;
-
-  // Create Vulkan Instance
-  {
-    VkInstanceCreateInfo create_info = {};
-    create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
-    create_info.enabledLayerCount = instance_layers_count;
-    create_info.ppEnabledLayerNames = instance_layers;
-    create_info.enabledExtensionCount = instance_extensions_count;
-    create_info.ppEnabledExtensionNames = instance_extensions;
-    err = vkCreateInstance(&create_info, allocator, instance);
-    check_vk_result(err);
-  }
-
-  // Select GPU
-  {
-    uint32_t gpu_count;
-    err = vkEnumeratePhysicalDevices(*instance, &gpu_count, NULL);
-    check_vk_result(err);
-    IM_ASSERT(gpu_count > 0);
-
-    VkPhysicalDevice* gpus =
-        (VkPhysicalDevice*)malloc(sizeof(VkPhysicalDevice) * gpu_count);
-    err = vkEnumeratePhysicalDevices(*instance, &gpu_count, gpus);
-    check_vk_result(err);
-
-    // Use the first reported GPU for simplicity.
-    *physical_device = gpus[0];
-
-    VkPhysicalDeviceProperties properties;
-    vkGetPhysicalDeviceProperties(*physical_device, &properties);
-    fprintf(stdout, "Selected Vulkan device: '%s'\n", properties.deviceName);
-    free(gpus);
-  }
-
-  // Select queue family. We want a single queue with graphics and compute for
-  // simplicity, but we could also discover and use separate queues for each.
-  {
-    uint32_t count;
-    vkGetPhysicalDeviceQueueFamilyProperties(*physical_device, &count, NULL);
-    VkQueueFamilyProperties* queues = (VkQueueFamilyProperties*)malloc(
-        sizeof(VkQueueFamilyProperties) * count);
-    vkGetPhysicalDeviceQueueFamilyProperties(*physical_device, &count, queues);
-    for (uint32_t i = 0; i < count; i++) {
-      if (queues[i].queueFlags &
-          (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) {
-        *queue_family_index = i;
-        break;
-      }
-    }
-    free(queues);
-    IM_ASSERT(*queue_family_index != (uint32_t)-1);
-  }
-
-  // Create Logical Device (with 1 queue)
-  {
-    std::vector<const char*> device_extensions =
-        GetDeviceExtensions(*physical_device, vulkan_features);
-    const float queue_priority[] = {1.0f};
-    VkDeviceQueueCreateInfo queue_info = {};
-    queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
-    queue_info.queueFamilyIndex = *queue_family_index;
-    queue_info.queueCount = 1;
-    queue_info.pQueuePriorities = queue_priority;
-    VkDeviceCreateInfo create_info = {};
-    create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
-    create_info.queueCreateInfoCount = 1;
-    create_info.pQueueCreateInfos = &queue_info;
-    create_info.enabledExtensionCount =
-        static_cast<uint32_t>(device_extensions.size());
-    create_info.ppEnabledExtensionNames = device_extensions.data();
-
-    // Enable timeline semaphores.
-    VkPhysicalDeviceFeatures2 features2;
-    memset(&features2, 0, sizeof(features2));
-    features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
-    create_info.pNext = &features2;
-    VkPhysicalDeviceTimelineSemaphoreFeatures semaphore_features;
-    memset(&semaphore_features, 0, sizeof(semaphore_features));
-    semaphore_features.sType =
-        VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES;
-    semaphore_features.pNext = features2.pNext;
-    features2.pNext = &semaphore_features;
-    semaphore_features.timelineSemaphore = VK_TRUE;
-
-    err = vkCreateDevice(*physical_device, &create_info, allocator, device);
-    check_vk_result(err);
-    vkGetDeviceQueue(*device, *queue_family_index, 0, queue);
-  }
-
-  // Create Descriptor Pool
-  {
-    VkDescriptorPoolSize pool_sizes[] = {
-        {VK_DESCRIPTOR_TYPE_SAMPLER, 1000},
-        {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1000},
-        {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1000},
-        {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1000},
-        {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1000},
-        {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1000},
-        {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1000},
-        {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1000},
-        {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1000},
-        {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, 1000},
-        {VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 1000}};
-    VkDescriptorPoolCreateInfo pool_info = {};
-    pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
-    pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
-    pool_info.maxSets = 1000 * IREE_ARRAYSIZE(pool_sizes);
-    pool_info.poolSizeCount = (uint32_t)IREE_ARRAYSIZE(pool_sizes);
-    pool_info.pPoolSizes = pool_sizes;
-    err =
-        vkCreateDescriptorPool(*device, &pool_info, allocator, descriptor_pool);
-    check_vk_result(err);
-  }
-}
-
-void SetupVulkanWindow(ImGui_ImplVulkanH_Window* wd,
-                       const VkAllocationCallbacks* allocator,
-                       VkInstance instance, uint32_t queue_family_index,
-                       VkPhysicalDevice physical_device, VkDevice device,
-                       VkSurfaceKHR surface, int width, int height,
-                       uint32_t min_image_count) {
-  wd->Surface = surface;
-
-  // Check for WSI support
-  VkBool32 res;
-  vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, queue_family_index,
-                                       wd->Surface, &res);
-  if (res != VK_TRUE) {
-    fprintf(stderr, "Error no WSI support on physical device 0\n");
-    exit(-1);
-  }
-
-  // Select Surface Format
-  const VkFormat requestSurfaceImageFormat[] = {
-      VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_R8G8B8A8_UNORM,
-      VK_FORMAT_B8G8R8_UNORM, VK_FORMAT_R8G8B8_UNORM};
-  const VkColorSpaceKHR requestSurfaceColorSpace =
-      VK_COLORSPACE_SRGB_NONLINEAR_KHR;
-  wd->SurfaceFormat = ImGui_ImplVulkanH_SelectSurfaceFormat(
-      physical_device, wd->Surface, requestSurfaceImageFormat,
-      (size_t)IREE_ARRAYSIZE(requestSurfaceImageFormat),
-      requestSurfaceColorSpace);
-
-  // Select Present Mode
-#ifdef IMGUI_UNLIMITED_FRAME_RATE
-  VkPresentModeKHR present_modes[] = {VK_PRESENT_MODE_MAILBOX_KHR,
-                                      VK_PRESENT_MODE_IMMEDIATE_KHR,
-                                      VK_PRESENT_MODE_FIFO_KHR};
-#else
-  VkPresentModeKHR present_modes[] = {VK_PRESENT_MODE_FIFO_KHR};
-#endif
-  wd->PresentMode = ImGui_ImplVulkanH_SelectPresentMode(
-      physical_device, wd->Surface, &present_modes[0],
-      IREE_ARRAYSIZE(present_modes));
-
-  // Create SwapChain, RenderPass, Framebuffer, etc.
-  IM_ASSERT(min_image_count >= 2);
-  ImGui_ImplVulkanH_CreateOrResizeWindow(instance, physical_device, device, wd,
-                                         queue_family_index, allocator, width,
-                                         height, min_image_count);
-
-  // Set clear color.
-  ImVec4 clear_color = ImVec4(0.45f, 0.55f, 0.60f, 1.00f);
-  memcpy(&wd->ClearValue.color.float32[0], &clear_color, 4 * sizeof(float));
-}
-
-void RenderFrame(ImGui_ImplVulkanH_Window* wd, VkDevice device, VkQueue queue) {
-  VkResult err;
-
-  VkSemaphore image_acquired_semaphore =
-      wd->FrameSemaphores[wd->SemaphoreIndex].ImageAcquiredSemaphore;
-  VkSemaphore render_complete_semaphore =
-      wd->FrameSemaphores[wd->SemaphoreIndex].RenderCompleteSemaphore;
-  err = vkAcquireNextImageKHR(device, wd->Swapchain, UINT64_MAX,
-                              image_acquired_semaphore, VK_NULL_HANDLE,
-                              &wd->FrameIndex);
-  check_vk_result(err);
-
-  ImGui_ImplVulkanH_Frame* fd = &wd->Frames[wd->FrameIndex];
-  {
-    err = vkWaitForFences(
-        device, 1, &fd->Fence, VK_TRUE,
-        UINT64_MAX);  // wait indefinitely instead of periodically checking
-    check_vk_result(err);
-
-    err = vkResetFences(device, 1, &fd->Fence);
-    check_vk_result(err);
-  }
-  {
-    err = vkResetCommandPool(device, fd->CommandPool, 0);
-    check_vk_result(err);
-    VkCommandBufferBeginInfo info = {};
-    info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
-    info.flags |= VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
-    err = vkBeginCommandBuffer(fd->CommandBuffer, &info);
-    check_vk_result(err);
-  }
-  {
-    VkRenderPassBeginInfo info = {};
-    info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
-    info.renderPass = wd->RenderPass;
-    info.framebuffer = fd->Framebuffer;
-    info.renderArea.extent.width = wd->Width;
-    info.renderArea.extent.height = wd->Height;
-    info.clearValueCount = 1;
-    info.pClearValues = &wd->ClearValue;
-    vkCmdBeginRenderPass(fd->CommandBuffer, &info, VK_SUBPASS_CONTENTS_INLINE);
-  }
-
-  // Record Imgui Draw Data and draw funcs into command buffer
-  ImGui_ImplVulkan_RenderDrawData(ImGui::GetDrawData(), fd->CommandBuffer);
-
-  // Submit command buffer
-  vkCmdEndRenderPass(fd->CommandBuffer);
-  {
-    VkPipelineStageFlags wait_stage =
-        VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
-    VkSubmitInfo info = {};
-    info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-    info.waitSemaphoreCount = 1;
-    info.pWaitSemaphores = &image_acquired_semaphore;
-    info.pWaitDstStageMask = &wait_stage;
-    info.commandBufferCount = 1;
-    info.pCommandBuffers = &fd->CommandBuffer;
-    info.signalSemaphoreCount = 1;
-    info.pSignalSemaphores = &render_complete_semaphore;
-
-    err = vkEndCommandBuffer(fd->CommandBuffer);
-    check_vk_result(err);
-    err = vkQueueSubmit(queue, 1, &info, fd->Fence);
-    check_vk_result(err);
-  }
-}
-
-void PresentFrame(ImGui_ImplVulkanH_Window* wd, VkQueue queue) {
-  VkSemaphore render_complete_semaphore =
-      wd->FrameSemaphores[wd->SemaphoreIndex].RenderCompleteSemaphore;
-  VkPresentInfoKHR info = {};
-  info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
-  info.waitSemaphoreCount = 1;
-  info.pWaitSemaphores = &render_complete_semaphore;
-  info.swapchainCount = 1;
-  info.pSwapchains = &wd->Swapchain;
-  info.pImageIndices = &wd->FrameIndex;
-  VkResult err = vkQueuePresentKHR(queue, &info);
-  check_vk_result(err);
-  wd->SemaphoreIndex =
-      (wd->SemaphoreIndex + 1) %
-      wd->ImageCount;  // Now we can use the next set of semaphores
-}
-
-static void CleanupVulkan() {
-  vkDestroyDescriptorPool(g_Device, g_DescriptorPool, g_Allocator);
-
-  vkDestroyDevice(g_Device, g_Allocator);
-  vkDestroyInstance(g_Instance, g_Allocator);
-}
-
-static void CleanupVulkanWindow() {
-  ImGui_ImplVulkanH_DestroyWindow(g_Instance, g_Device, &g_MainWindowData,
-                                  g_Allocator);
-}
-
-namespace iree {
-
-extern "C" int iree_main(int argc, char** argv) {
-
-  iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_DEFAULT, &argc, &argv);
-  if (argc > 1) {
-    // Avoid iree-run-module spinning endlessly on stdin if the user uses single
-    // dashes for flags.
-    printf(
-        "[ERROR] unexpected positional argument (expected none)."
-        " Did you use pass a flag with a single dash ('-')?"
-        " Use '--' instead.\n");
-    return 1;
-  }
-
-  // --------------------------------------------------------------------------
-  // Create a window.
-  if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_TIMER) != 0) {
-    fprintf(stderr, "Failed to initialize SDL\n");
-    abort();
-    return 1;
-  }
-
-  // Setup window
-  // clang-format off
-  SDL_WindowFlags window_flags = (SDL_WindowFlags)(
-      SDL_WINDOW_VULKAN | SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI);
-  // clang-format on
-  SDL_Window* window = SDL_CreateWindow(
-      "IREE Samples - Vulkan Inference GUI", SDL_WINDOWPOS_CENTERED,
-      SDL_WINDOWPOS_CENTERED, 1280, 720, window_flags);
-  if (window == nullptr)
-  {
-    const char* sdl_err = SDL_GetError();
-    fprintf(stderr, "Error, SDL_CreateWindow returned: %s\n", sdl_err);
-    abort();
-    return 1;
-  }
-
-  // Setup Vulkan
-  iree_hal_vulkan_features_t iree_vulkan_features =
-      static_cast<iree_hal_vulkan_features_t>(
-          IREE_HAL_VULKAN_FEATURE_ENABLE_VALIDATION_LAYERS |
-          IREE_HAL_VULKAN_FEATURE_ENABLE_DEBUG_UTILS);
-  std::vector<const char*> layers = GetInstanceLayers(iree_vulkan_features);
-  std::vector<const char*> extensions =
-      GetInstanceExtensions(window, iree_vulkan_features);
-  SetupVulkan(iree_vulkan_features, layers.data(),
-              static_cast<uint32_t>(layers.size()), extensions.data(),
-              static_cast<uint32_t>(extensions.size()), g_Allocator,
-              &g_Instance, &g_QueueFamily, &g_PhysicalDevice, &g_Queue,
-              &g_Device, &g_DescriptorPool);
-
-  // Create Window Surface
-  VkSurfaceKHR surface;
-  VkResult err;
-  if (SDL_Vulkan_CreateSurface(window, g_Instance, &surface) == 0) {
-    fprintf(stderr, "Failed to create Vulkan surface.\n");
-    abort();
-    return 1;
-  }
-
-  // Create Framebuffers
-  int w, h;
-  SDL_GetWindowSize(window, &w, &h);
-  ImGui_ImplVulkanH_Window* wd = &g_MainWindowData;
-  SetupVulkanWindow(wd, g_Allocator, g_Instance, g_QueueFamily,
-                    g_PhysicalDevice, g_Device, surface, w, h, g_MinImageCount);
-
-  // Setup Dear ImGui context
-  IMGUI_CHECKVERSION();
-  ImGui::CreateContext();
-  ImGuiIO& io = ImGui::GetIO();
-  (void)io;
-
-  ImGui::StyleColorsDark();
-
-  // Setup Platform/Renderer bindings
-  ImGui_ImplSDL2_InitForVulkan(window);
-  ImGui_ImplVulkan_InitInfo init_info = {};
-  init_info.Instance = g_Instance;
-  init_info.PhysicalDevice = g_PhysicalDevice;
-  init_info.Device = g_Device;
-  init_info.QueueFamily = g_QueueFamily;
-  init_info.Queue = g_Queue;
-  init_info.PipelineCache = g_PipelineCache;
-  init_info.DescriptorPool = g_DescriptorPool;
-  init_info.Allocator = g_Allocator;
-  init_info.MinImageCount = g_MinImageCount;
-  init_info.ImageCount = wd->ImageCount;
-  init_info.CheckVkResultFn = check_vk_result;
-  ImGui_ImplVulkan_Init(&init_info, wd->RenderPass);
-
-  // Upload Fonts
-  {
-    // Use any command queue
-    VkCommandPool command_pool = wd->Frames[wd->FrameIndex].CommandPool;
-    VkCommandBuffer command_buffer = wd->Frames[wd->FrameIndex].CommandBuffer;
-
-    err = vkResetCommandPool(g_Device, command_pool, 0);
-    check_vk_result(err);
-    VkCommandBufferBeginInfo begin_info = {};
-    begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
-    begin_info.flags |= VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
-    err = vkBeginCommandBuffer(command_buffer, &begin_info);
-    check_vk_result(err);
-
-    ImGui_ImplVulkan_CreateFontsTexture(command_buffer);
-
-    VkSubmitInfo end_info = {};
-    end_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-    end_info.commandBufferCount = 1;
-    end_info.pCommandBuffers = &command_buffer;
-    err = vkEndCommandBuffer(command_buffer);
-    check_vk_result(err);
-    err = vkQueueSubmit(g_Queue, 1, &end_info, VK_NULL_HANDLE);
-    check_vk_result(err);
-
-    err = vkDeviceWaitIdle(g_Device);
-    check_vk_result(err);
-    ImGui_ImplVulkan_DestroyFontUploadObjects();
-  }
-
-  // Demo state.
-  bool show_iree_window = true;
-  // --------------------------------------------------------------------------
-  // Setup IREE.
-
-  // Check API version.
-  iree_api_version_t actual_version;
-  iree_status_t status =
-      iree_api_version_check(IREE_API_VERSION_LATEST, &actual_version);
-  if (iree_status_is_ok(status)) {
-    fprintf(stdout, "IREE runtime API version: %d\n", actual_version);
-  } else {
-    fprintf(stderr, "Unsupported runtime API version: %d\n", actual_version);
-    abort();
-  }
-
-  // Create a runtime Instance.
-  iree_vm_instance_t* iree_instance = nullptr;
-  IREE_CHECK_OK(
-      iree_vm_instance_create(iree_allocator_system(), &iree_instance));
-
-  // Register HAL drivers and VM module types.
-  IREE_CHECK_OK(iree_hal_vulkan_driver_module_register(
-      iree_hal_driver_registry_default()));
-  IREE_CHECK_OK(iree_hal_module_register_all_types(iree_instance));
-
-  // Create IREE Vulkan Driver and Device, sharing our VkInstance/VkDevice.
-  fprintf(stdout, "Creating Vulkan driver/device\n");
-  // Load symbols from our static `vkGetInstanceProcAddr` for IREE to use.
-  iree_hal_vulkan_syms_t* iree_vk_syms = nullptr;
-  IREE_CHECK_OK(iree_hal_vulkan_syms_create(
-      reinterpret_cast<void*>(&vkGetInstanceProcAddr), iree_allocator_system(),
-      &iree_vk_syms));
-  // Create the driver sharing our VkInstance.
-  iree_hal_driver_t* iree_vk_driver = nullptr;
-  iree_string_view_t driver_identifier = iree_make_cstring_view("vulkan");
-  iree_hal_vulkan_driver_options_t driver_options;
-  driver_options.api_version = VK_API_VERSION_1_0;
-  driver_options.requested_features = static_cast<iree_hal_vulkan_features_t>(
-      IREE_HAL_VULKAN_FEATURE_ENABLE_DEBUG_UTILS);
-  IREE_CHECK_OK(iree_hal_vulkan_driver_create_using_instance(
-      driver_identifier, &driver_options, iree_vk_syms, g_Instance,
-      iree_allocator_system(), &iree_vk_driver));
-  // Create a device sharing our VkDevice and queue.
-  // We could also create a separate (possibly low priority) compute queue for
-  // IREE, and/or provide a dedicated transfer queue.
-  iree_string_view_t device_identifier = iree_make_cstring_view("vulkan");
-  iree_hal_vulkan_queue_set_t compute_queue_set;
-  compute_queue_set.queue_family_index = g_QueueFamily;
-  compute_queue_set.queue_indices = 1 << 0;
-  iree_hal_vulkan_queue_set_t transfer_queue_set;
-  transfer_queue_set.queue_indices = 0;
-  iree_hal_device_t* iree_vk_device = nullptr;
-  IREE_CHECK_OK(iree_hal_vulkan_wrap_device(
-      device_identifier, &driver_options.device_options, iree_vk_syms,
-      g_Instance, g_PhysicalDevice, g_Device, &compute_queue_set,
-      &transfer_queue_set, iree_allocator_system(), &iree_vk_device));
-  // Create a HAL module using the HAL device.
-  iree_vm_module_t* hal_module = nullptr;
-  IREE_CHECK_OK(iree_hal_module_create(iree_instance, iree_vk_device,
-                                       IREE_HAL_MODULE_FLAG_NONE,
-                                       iree_allocator_system(), &hal_module));
-
-
-  // Load bytecode module
-  //iree_file_toc_t module_file_toc;
-  //const char network_model[] = "resnet50_tf.vmfb";
-  //fprintf(stdout, "Loading: %s\n", network_model);
-  //if (load_file(network_model, &module_file_toc.data, &module_file_toc.size) == false)
-  //{
-  //    abort();
-  //    return 1;
-  //}
-  //fprintf(stdout, "module size: %zu\n", module_file_toc.size);
-
-  iree_vm_module_t* bytecode_module = nullptr;
-  iree_status_t module_status = iree_tooling_load_module_from_flags(
-      iree_instance, iree_allocator_system(), &bytecode_module);
-  if (!iree_status_is_ok(module_status))
-    return -1;
-  //IREE_CHECK_OK(iree_vm_bytecode_module_create(
-  //    iree_instance,
-  //    iree_const_byte_span_t{
-  //        reinterpret_cast<const uint8_t*>(module_file_toc.data),
-  //        module_file_toc.size},
-  //    iree_allocator_null(), iree_allocator_system(), &bytecode_module));
-  //// Query for details about what is in the loaded module.
-  //iree_vm_module_signature_t bytecode_module_signature =
-  //    iree_vm_module_signature(bytecode_module);
-  //fprintf(stdout, "Module loaded, have <%" PRIhsz "> exported functions:\n",
-  //        bytecode_module_signature.export_function_count);
-  //for (int i = 0; i < bytecode_module_signature.export_function_count; ++i) {
-  //  iree_vm_function_t function;
-  //  IREE_CHECK_OK(iree_vm_module_lookup_function_by_ordinal(
-  //      bytecode_module, IREE_VM_FUNCTION_LINKAGE_EXPORT, i, &function));
-  //  auto function_name = iree_vm_function_name(&function);
-  //  auto function_signature = iree_vm_function_signature(&function);
-
-  //  fprintf(stdout, "  %d: '%.*s' with calling convention '%.*s'\n", i,
-  //          (int)function_name.size, function_name.data,
-  //          (int)function_signature.calling_convention.size,
-  //          function_signature.calling_convention.data);
-  //}
-
-  // Allocate a context that will hold the module state across invocations.
-  iree_vm_context_t* iree_context = nullptr;
-  std::vector<iree_vm_module_t*> modules = {hal_module, bytecode_module};
-  IREE_CHECK_OK(iree_vm_context_create_with_modules(
-      iree_instance, IREE_VM_CONTEXT_FLAG_NONE, modules.size(), modules.data(),
-      iree_allocator_system(), &iree_context));
-  fprintf(stdout, "Context with modules is ready for use\n");
-
-  // Lookup the entry point function.
-  iree_vm_function_t main_function;
-  const char kMainFunctionName[] = "module.forward";
-  IREE_CHECK_OK(iree_vm_context_resolve_function(
-      iree_context,
-      iree_string_view_t{kMainFunctionName, sizeof(kMainFunctionName) - 1},
-      &main_function));
-  iree_string_view_t main_function_name = iree_vm_function_name(&main_function);
-  fprintf(stdout, "Resolved main function named '%.*s'\n",
-          (int)main_function_name.size, main_function_name.data);
-
-  // --------------------------------------------------------------------------
-
-        // Write inputs into mappable buffers.
-        iree_hal_allocator_t* allocator =
-            iree_hal_device_allocator(iree_vk_device);
-        //iree_hal_memory_type_t input_memory_type =
-        //    static_cast<iree_hal_memory_type_t>(
-        //        IREE_HAL_MEMORY_TYPE_HOST_LOCAL |
-        //        IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE);
-        //iree_hal_buffer_usage_t input_buffer_usage =
-        //    static_cast<iree_hal_buffer_usage_t>(IREE_HAL_BUFFER_USAGE_DEFAULT);
-        //iree_hal_buffer_params_t buffer_params;
-        //buffer_params.type = input_memory_type;
-        //buffer_params.usage = input_buffer_usage;
-        //buffer_params.access = IREE_HAL_MEMORY_ACCESS_READ | IREE_HAL_MEMORY_ACCESS_WRITE;
-
-       // Wrap input buffers in buffer views.
-
-        vm::ref<iree_vm_list_t> inputs;
-        iree_status_t input_status = ParseToVariantList(
-            allocator,
-            iree::span<const std::string>{FLAG_function_inputs.data(),
-                                          FLAG_function_inputs.size()},
-            iree_allocator_system(), &inputs);
-        if (!iree_status_is_ok(input_status))
-            return -1;
-        //vm::ref<iree_vm_list_t> inputs;
-        //IREE_CHECK_OK(iree_vm_list_create(/*element_type=*/nullptr, 6, iree_allocator_system(), &inputs));
-
-        //iree_hal_buffer_view_t* input0_buffer_view = nullptr;
-        //constexpr iree_hal_dim_t input_buffer_shape[] = {1, 224, 224, 3};
-        //IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer(
-        //    allocator,
-        //    /*shape_rank=*/4, /*shape=*/input_buffer_shape,
-        //    IREE_HAL_ELEMENT_TYPE_FLOAT_32,
-        //    IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, buffer_params,
-        //    iree_make_const_byte_span(&input_res50, sizeof(input_res50)),
-        //    &input0_buffer_view));
-
-        //auto input0_buffer_view_ref = iree_hal_buffer_view_move_ref(input0_buffer_view);
-        //IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs.get(), &input0_buffer_view_ref));
-
-        // Prepare outputs list to accept results from the invocation.
-
-        vm::ref<iree_vm_list_t> outputs;
-        constexpr iree_hal_dim_t kOutputCount = 1000;
-        IREE_CHECK_OK(iree_vm_list_create(/*element_type=*/nullptr, kOutputCount * sizeof(float), iree_allocator_system(), &outputs));
-
-  // --------------------------------------------------------------------------
-
-  // Main loop.
-  bool done = false;
-  while (!done) {
-    SDL_Event event;
-
-    while (SDL_PollEvent(&event)) {
-      if (event.type == SDL_QUIT) {
-        done = true;
-      }
-
-      ImGui_ImplSDL2_ProcessEvent(&event);
-      if (event.type == SDL_QUIT) done = true;
-      if (event.type == SDL_WINDOWEVENT &&
-          event.window.event == SDL_WINDOWEVENT_RESIZED &&
-          event.window.windowID == SDL_GetWindowID(window)) {
-        g_SwapChainResizeWidth = (int)event.window.data1;
-        g_SwapChainResizeHeight = (int)event.window.data2;
-        g_SwapChainRebuild = true;
-      }
-    }
-
-    if (g_SwapChainRebuild) {
-      g_SwapChainRebuild = false;
-      ImGui_ImplVulkan_SetMinImageCount(g_MinImageCount);
-      ImGui_ImplVulkanH_CreateOrResizeWindow(
-          g_Instance, g_PhysicalDevice, g_Device, &g_MainWindowData,
-          g_QueueFamily, g_Allocator, g_SwapChainResizeWidth,
-          g_SwapChainResizeHeight, g_MinImageCount);
-      g_MainWindowData.FrameIndex = 0;
-    }
-
-    // Start the Dear ImGui frame
-    ImGui_ImplVulkan_NewFrame();
-    ImGui_ImplSDL2_NewFrame(window);
-    ImGui::NewFrame();
-
-    // Custom window.
-    {
-      ImGui::Begin("IREE Vulkan Integration Demo", &show_iree_window);
-
-      ImGui::Separator();
-
-      // ImGui Inputs for two input tensors.
-      // Run computation whenever any of the values changes.
-      static bool dirty = true;
-      if (dirty) {
-
-        // Synchronously invoke the function.
-        IREE_CHECK_OK(iree_vm_invoke(iree_context, main_function,
-                                     IREE_VM_INVOCATION_FLAG_NONE,
-                                     /*policy=*/nullptr, inputs.get(),
-                                     outputs.get(), iree_allocator_system()));
-
-
-        // we want to run continuously so we can use tools like RenderDoc, RGP, etc...
-        dirty = true;
-      }
-
-      // Framerate counter.
-      ImGui::Text("Application average %.3f ms/frame (%.1f FPS)",
-                  1000.0f / ImGui::GetIO().Framerate, ImGui::GetIO().Framerate);
-
-      ImGui::End();
-    }
-
-    // Rendering
-    ImGui::Render();
-    RenderFrame(wd, g_Device, g_Queue);
-
-    PresentFrame(wd, g_Queue);
-  }
-  // --------------------------------------------------------------------------
-
-  // --------------------------------------------------------------------------
-  // Cleanup
-  iree_vm_module_release(hal_module);
-  iree_vm_module_release(bytecode_module);
-  iree_vm_context_release(iree_context);
-  iree_hal_device_release(iree_vk_device);
-  iree_hal_allocator_release(allocator);
-  iree_hal_driver_release(iree_vk_driver);
-  iree_hal_vulkan_syms_release(iree_vk_syms);
-  iree_vm_instance_release(iree_instance);
-
-  err = vkDeviceWaitIdle(g_Device);
-  check_vk_result(err);
-  ImGui_ImplVulkan_Shutdown();
-  ImGui_ImplSDL2_Shutdown();
-  ImGui::DestroyContext();
-
-  CleanupVulkanWindow();
-  CleanupVulkan();
-
-  SDL_DestroyWindow(window);
-  SDL_Quit();
-  // --------------------------------------------------------------------------
-
-  return 0;
-}
-
-}  // namespace iree
--- a/cpp/vulkan_gui/vulkan_resnet_inference_gui.cc
+++ b/cpp/vulkan_gui/vulkan_resnet_inference_gui.cc
--- a/dataset/README.md
+++ b/dataset/README.md
@@ -1,27 +0,0 @@
-# Dataset annotation tool
-
-SHARK annotator for adding or modifying prompts of dataset images
-
-## Set up
-
-Activate SHARK Python virtual environment and install additional packages
-```shell
-source ../shark.venv/bin/activate
-pip install -r requirements.txt
-```
-
-## Run annotator
-
-```shell
-python annotation_tool.py
-```
-
-<img width="1280" alt="annotator" src="https://user-images.githubusercontent.com/49575973/214521137-7ef6ae10-7cd8-46e6-b270-b6c0445157f1.png">
-
-* Select a dataset from `Dataset` dropdown list
-* Select an image from `Image` dropdown list
-* Image and the existing prompt will be loaded
-* Select a prompt from `Prompt` dropdown list to modify or "Add new" to add a prompt
-* Click `Save` to save changes, click `Delete` to delete prompt
-* Click `Back` or `Next` to switch image, you could also select other images from `Image`
-* Click `Finish` when finishing annotation or before switching dataset
--- a/dataset/annotation_tool.py
+++ b/dataset/annotation_tool.py
@@ -1,247 +0,0 @@
-import gradio as gr
-import json
-import jsonlines
-import os
-from args import args
-from pathlib import Path
-from PIL import Image
-from utils import get_datasets
-
-
-shark_root = Path(__file__).parent.parent
-demo_css = shark_root.joinpath("web/demo.css").resolve()
-nodlogo_loc = shark_root.joinpath(
-    "web/models/stable_diffusion/logos/nod-logo.png"
-)
-
-
-with gr.Blocks(title="Dataset Annotation Tool", css=demo_css) as shark_web:
-    with gr.Row(elem_id="ui_title"):
-        nod_logo = Image.open(nodlogo_loc)
-        with gr.Column(scale=1, elem_id="demo_title_outer"):
-            gr.Image(
-                value=nod_logo,
-                show_label=False,
-                interactive=False,
-                elem_id="top_logo",
-            ).style(width=150, height=100)
-
-    datasets, images, ds_w_prompts = get_datasets(args.gs_url)
-    prompt_data = dict()
-
-    with gr.Row(elem_id="ui_body"):
-        # TODO: add multiselect dataset, there is a gradio version conflict
-        dataset = gr.Dropdown(label="Dataset", choices=datasets)
-        image_name = gr.Dropdown(label="Image", choices=[])
-
-    with gr.Row(elem_id="ui_body"):
-        # TODO: add ability to search image by typing
-        with gr.Column(scale=1, min_width=600):
-            image = gr.Image(type="filepath").style(height=512)
-
-        with gr.Column(scale=1, min_width=600):
-            prompts = gr.Dropdown(
-                label="Prompts",
-                choices=[],
-            )
-            prompt = gr.Textbox(
-                label="Editor",
-                lines=3,
-            )
-            with gr.Row():
-                save = gr.Button("Save")
-                delete = gr.Button("Delete")
-            with gr.Row():
-                back_image = gr.Button("Back")
-                next_image = gr.Button("Next")
-            finish = gr.Button("Finish")
-
-    def filter_datasets(dataset):
-        if dataset is None:
-            return gr.Dropdown.update(value=None, choices=[])
-
-        # create the dataset dir if doesn't exist and download prompt file
-        dataset_path = str(shark_root) + "/dataset/" + dataset
-        if not os.path.exists(dataset_path):
-            os.mkdir(dataset_path)
-
-        # read prompt jsonlines file
-        prompt_data.clear()
-        if dataset in ds_w_prompts:
-            prompt_gs_path = args.gs_url + "/" + dataset + "/metadata.jsonl"
-            os.system(f'gsutil cp "{prompt_gs_path}" "{dataset_path}"/')
-            with jsonlines.open(dataset_path + "/metadata.jsonl") as reader:
-                for line in reader.iter(type=dict, skip_invalid=True):
-                    prompt_data[line["file_name"]] = (
-                        [line["text"]]
-                        if type(line["text"]) is str
-                        else line["text"]
-                    )
-
-        return gr.Dropdown.update(choices=images[dataset])
-
-    dataset.change(fn=filter_datasets, inputs=dataset, outputs=image_name)
-
-    def display_image(dataset, image_name):
-        if dataset is None or image_name is None:
-            return gr.Image.update(value=None), gr.Dropdown.update(value=None)
-
-        # download and load the image
-        img_gs_path = args.gs_url + "/" + dataset + "/" + image_name
-        img_sub_path = "/".join(image_name.split("/")[:-1])
-        img_dst_path = (
-            str(shark_root) + "/dataset/" + dataset + "/" + img_sub_path + "/"
-        )
-        if not os.path.exists(img_dst_path):
-            os.mkdir(img_dst_path)
-        os.system(f'gsutil cp "{img_gs_path}" "{img_dst_path}"')
-        img = Image.open(img_dst_path + image_name.split("/")[-1])
-
-        if image_name not in prompt_data.keys():
-            prompt_data[image_name] = []
-        prompt_choices = ["Add new"]
-        prompt_choices += prompt_data[image_name]
-        return gr.Image.update(value=img), gr.Dropdown.update(
-            choices=prompt_choices
-        )
-
-    image_name.change(
-        fn=display_image,
-        inputs=[dataset, image_name],
-        outputs=[image, prompts],
-    )
-
-    def edit_prompt(prompts):
-        if prompts == "Add new":
-            return gr.Textbox.update(value=None)
-
-        return gr.Textbox.update(value=prompts)
-
-    prompts.change(fn=edit_prompt, inputs=prompts, outputs=prompt)
-
-    def save_prompt(dataset, image_name, prompts, prompt):
-        if (
-            dataset is None
-            or image_name is None
-            or prompts is None
-            or prompt is None
-        ):
-            return
-
-        if prompts == "Add new":
-            prompt_data[image_name].append(prompt)
-        else:
-            idx = prompt_data[image_name].index(prompts)
-            prompt_data[image_name][idx] = prompt
-
-        prompt_path = (
-            str(shark_root) + "/dataset/" + dataset + "/metadata.jsonl"
-        )
-        # write prompt jsonlines file
-        with open(prompt_path, "w") as f:
-            for key, value in prompt_data.items():
-                if not value:
-                    continue
-                v = value if len(value) > 1 else value[0]
-                f.write(json.dumps({"file_name": key, "text": v}))
-                f.write("\n")
-
-        prompt_choices = ["Add new"]
-        prompt_choices += prompt_data[image_name]
-        return gr.Dropdown.update(choices=prompt_choices, value=None)
-
-    save.click(
-        fn=save_prompt,
-        inputs=[dataset, image_name, prompts, prompt],
-        outputs=prompts,
-    )
-
-    def delete_prompt(dataset, image_name, prompts):
-        if dataset is None or image_name is None or prompts is None:
-            return
-        if prompts == "Add new":
-            return
-
-        prompt_data[image_name].remove(prompts)
-        prompt_path = (
-            str(shark_root) + "/dataset/" + dataset + "/metadata.jsonl"
-        )
-        # write prompt jsonlines file
-        with open(prompt_path, "w") as f:
-            for key, value in prompt_data.items():
-                if not value:
-                    continue
-                v = value if len(value) > 1 else value[0]
-                f.write(json.dumps({"file_name": key, "text": v}))
-                f.write("\n")
-
-        prompt_choices = ["Add new"]
-        prompt_choices += prompt_data[image_name]
-        return gr.Dropdown.update(choices=prompt_choices, value=None)
-
-    delete.click(
-        fn=delete_prompt,
-        inputs=[dataset, image_name, prompts],
-        outputs=prompts,
-    )
-
-    def get_back_image(dataset, image_name):
-        if dataset is None or image_name is None:
-            return
-
-        # remove local image
-        img_path = str(shark_root) + "/dataset/" + dataset + "/" + image_name
-        os.system(f'rm "{img_path}"')
-        # get the index for the back image
-        idx = images[dataset].index(image_name)
-        if idx == 0:
-            return gr.Dropdown.update(value=None)
-
-        return gr.Dropdown.update(value=images[dataset][idx - 1])
-
-    back_image.click(
-        fn=get_back_image, inputs=[dataset, image_name], outputs=image_name
-    )
-
-    def get_next_image(dataset, image_name):
-        if dataset is None or image_name is None:
-            return
-
-        # remove local image
-        img_path = str(shark_root) + "/dataset/" + dataset + "/" + image_name
-        os.system(f'rm "{img_path}"')
-        # get the index for the next image
-        idx = images[dataset].index(image_name)
-        if idx == len(images[dataset]) - 1:
-            return gr.Dropdown.update(value=None)
-
-        return gr.Dropdown.update(value=images[dataset][idx + 1])
-
-    next_image.click(
-        fn=get_next_image, inputs=[dataset, image_name], outputs=image_name
-    )
-
-    def finish_annotation(dataset):
-        if dataset is None:
-            return
-
-        # upload prompt and remove local data
-        dataset_path = str(shark_root) + "/dataset/" + dataset
-        dataset_gs_path = args.gs_url + "/" + dataset + "/"
-        os.system(
-            f'gsutil cp "{dataset_path}/metadata.jsonl" "{dataset_gs_path}"'
-        )
-        os.system(f'rm -rf "{dataset_path}"')
-
-        return gr.Dropdown.update(value=None)
-
-    finish.click(fn=finish_annotation, inputs=dataset, outputs=dataset)
-
-
-if __name__ == "__main__":
-    shark_web.launch(
-        share=args.share,
-        inbrowser=True,
-        server_name="0.0.0.0",
-        server_port=args.server_port,
-    )
--- a/dataset/args.py
+++ b/dataset/args.py
@@ -1,34 +0,0 @@
-import argparse
-
-p = argparse.ArgumentParser(
-    description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
-)
-
-##############################################################################
-### Dataset Annotator flags
-##############################################################################
-
-p.add_argument(
-    "--gs_url",
-    type=str,
-    required=True,
-    help="URL to datasets in GS bucket",
-)
-
-p.add_argument(
-    "--share",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for generating a public URL",
-)
-
-p.add_argument(
-    "--server_port",
-    type=int,
-    default=8080,
-    help="flag for setting server port",
-)
-
-##############################################################################
-
-args = p.parse_args()
--- a/dataset/requirements.txt
+++ b/dataset/requirements.txt
@@ -1,3 +0,0 @@
-# SHARK Annotator
-gradio==3.15.0
-jsonlines
--- a/dataset/utils.py
+++ b/dataset/utils.py
@@ -1,29 +0,0 @@
-from google.cloud import storage
-
-
-def get_datasets(gs_url):
-    datasets = set()
-    images = dict()
-    ds_w_prompts = []
-
-    storage_client = storage.Client()
-    bucket_name = gs_url.split("/")[2]
-    source_blob_name = "/".join(gs_url.split("/")[3:])
-    blobs = storage_client.list_blobs(bucket_name, prefix=source_blob_name)
-
-    for blob in blobs:
-        dataset_name = blob.name.split("/")[1]
-        if dataset_name == "":
-            continue
-        datasets.add(dataset_name)
-        if dataset_name not in images.keys():
-            images[dataset_name] = []
-
-        # check if image or jsonl
-        file_sub_path = "/".join(blob.name.split("/")[2:])
-        if "/" in file_sub_path:
-            images[dataset_name] += [file_sub_path]
-        elif "metadata.jsonl" in file_sub_path:
-            ds_w_prompts.append(dataset_name)
-
-    return list(datasets), images, ds_w_prompts
--- a/generate_sharktank.py
+++ b/generate_sharktank.py
@@ -1,281 +0,0 @@
-# Lint as: python3
-"""SHARK Tank"""
-# python generate_sharktank.py, you have to give a csv tile with [model_name, model_download_url]
-# will generate local shark tank folder like this:
-#   /SHARK
-#     /gen_shark_tank
-#       /albert_lite_base
-#       /...model_name...
-#
-
-import os
-import csv
-import argparse
-from shark.shark_importer import SharkImporter
-import subprocess as sp
-import hashlib
-import numpy as np
-from pathlib import Path
-from apps.stable_diffusion.src.models import (
-    model_wrappers as mw,
-)
-from apps.stable_diffusion.src.utils.stable_args import (
-    args,
-)
-
-
-def create_hash(file_name):
-    with open(file_name, "rb") as f:
-        file_hash = hashlib.blake2b()
-        while chunk := f.read(2**20):
-            file_hash.update(chunk)
-
-    return file_hash.hexdigest()
-
-
-def save_torch_model(torch_model_list):
-    from tank.model_utils import (
-        get_hf_model,
-        get_vision_model,
-        get_hf_img_cls_model,
-        get_fp16_model,
-    )
-
-    with open(torch_model_list) as csvfile:
-        torch_reader = csv.reader(csvfile, delimiter=",")
-        fields = next(torch_reader)
-        for row in torch_reader:
-            torch_model_name = row[0]
-            tracing_required = row[1]
-            model_type = row[2]
-            is_dynamic = row[3]
-
-            tracing_required = False if tracing_required == "False" else True
-            is_dynamic = False if is_dynamic == "False" else True
-
-            model = None
-            input = None
-            if model_type == "stable_diffusion":
-                args.use_tuned = False
-                args.import_mlir = True
-                args.use_tuned = False
-                args.local_tank_cache = WORKDIR
-
-                precision_values = ["fp16"]
-                seq_lengths = [64, 77]
-                for precision_value in precision_values:
-                    args.precision = precision_value
-                    for length in seq_lengths:
-                        model = mw.SharkifyStableDiffusionModel(
-                            model_id=torch_model_name,
-                            custom_weights="",
-                            precision=precision_value,
-                            max_len=length,
-                            width=512,
-                            height=512,
-                            use_base_vae=False,
-                            debug=True,
-                            sharktank_dir=WORKDIR,
-                            generate_vmfb=False,
-                        )
-                        model()
-                continue
-            if model_type == "vision":
-                model, input, _ = get_vision_model(torch_model_name)
-            elif model_type == "hf":
-                model, input, _ = get_hf_model(torch_model_name)
-            elif model_type == "hf_img_cls":
-                model, input, _ = get_hf_img_cls_model(torch_model_name)
-            elif model_type == "fp16":
-                model, input, _ = get_fp16_model(torch_model_name)
-            torch_model_name = torch_model_name.replace("/", "_")
-            torch_model_dir = os.path.join(
-                WORKDIR, str(torch_model_name) + "_torch"
-            )
-            os.makedirs(torch_model_dir, exist_ok=True)
-
-            mlir_importer = SharkImporter(
-                model,
-                (input,),
-                frontend="torch",
-            )
-            mlir_importer.import_debug(
-                is_dynamic=False,
-                tracing_required=tracing_required,
-                dir=torch_model_dir,
-                model_name=torch_model_name,
-            )
-            mlir_hash = create_hash(
-                os.path.join(
-                    torch_model_dir, torch_model_name + "_torch" + ".mlir"
-                )
-            )
-            np.save(os.path.join(torch_model_dir, "hash"), np.array(mlir_hash))
-            # Generate torch dynamic models.
-            if is_dynamic:
-                mlir_importer.import_debug(
-                    is_dynamic=True,
-                    tracing_required=tracing_required,
-                    dir=torch_model_dir,
-                    model_name=torch_model_name + "_dynamic",
-                )
-
-
-def save_tf_model(tf_model_list):
-    from tank.model_utils_tf import (
-        get_causal_image_model,
-        get_causal_lm_model,
-        get_keras_model,
-        get_TFhf_model,
-    )
-    import tensorflow as tf
-
-    visible_default = tf.config.list_physical_devices("GPU")
-    try:
-        tf.config.set_visible_devices([], "GPU")
-        visible_devices = tf.config.get_visible_devices()
-        for device in visible_devices:
-            assert device.device_type != "GPU"
-    except:
-        # Invalid device or cannot modify virtual devices once initialized.
-        pass
-
-    with open(tf_model_list) as csvfile:
-        tf_reader = csv.reader(csvfile, delimiter=",")
-        fields = next(tf_reader)
-        for row in tf_reader:
-            tf_model_name = row[0]
-            model_type = row[1]
-
-            model = None
-            input = None
-            print(f"Generating artifacts for model {tf_model_name}")
-            if model_type == "hf":
-                model, input, _ = get_causal_lm_model(tf_model_name)
-            if model_type == "img":
-                model, input, _ = get_causal_image_model(tf_model_name)
-            if model_type == "keras":
-                model, input, _ = get_keras_model(tf_model_name)
-            if model_type == "TFhf":
-                model, input, _ = get_TFhf_model(tf_model_name)
-
-            tf_model_name = tf_model_name.replace("/", "_")
-            tf_model_dir = os.path.join(WORKDIR, str(tf_model_name) + "_tf")
-            os.makedirs(tf_model_dir, exist_ok=True)
-            mlir_importer = SharkImporter(
-                model,
-                inputs=input,
-                frontend="tf",
-            )
-            mlir_importer.import_debug(
-                is_dynamic=False,
-                dir=tf_model_dir,
-                model_name=tf_model_name,
-            )
-            mlir_hash = create_hash(
-                os.path.join(tf_model_dir, tf_model_name + "_tf" + ".mlir")
-            )
-            np.save(os.path.join(tf_model_dir, "hash"), np.array(mlir_hash))
-
-
-def save_tflite_model(tflite_model_list):
-    from shark.tflite_utils import TFLitePreprocessor
-
-    with open(tflite_model_list) as csvfile:
-        tflite_reader = csv.reader(csvfile, delimiter=",")
-        for row in tflite_reader:
-            print("\n")
-            tflite_model_name = row[0]
-            tflite_model_link = row[1]
-            print("tflite_model_name", tflite_model_name)
-            print("tflite_model_link", tflite_model_link)
-            tflite_model_name_dir = os.path.join(
-                WORKDIR, str(tflite_model_name) + "_tflite"
-            )
-            os.makedirs(tflite_model_name_dir, exist_ok=True)
-            print(f"TMP_TFLITE_MODELNAME_DIR = {tflite_model_name_dir}")
-
-            # Preprocess to get SharkImporter input args
-            tflite_preprocessor = TFLitePreprocessor(str(tflite_model_name))
-            raw_model_file_path = tflite_preprocessor.get_raw_model_file()
-            inputs = tflite_preprocessor.get_inputs()
-            tflite_interpreter = tflite_preprocessor.get_interpreter()
-
-            # Use SharkImporter to get SharkInference input args
-            my_shark_importer = SharkImporter(
-                module=tflite_interpreter,
-                inputs=inputs,
-                frontend="tflite",
-                raw_model_file=raw_model_file_path,
-            )
-            my_shark_importer.import_debug(
-                dir=tflite_model_name_dir,
-                model_name=tflite_model_name,
-                func_name="main",
-            )
-            mlir_hash = create_hash(
-                os.path.join(
-                    tflite_model_name_dir,
-                    tflite_model_name + "_tflite" + ".mlir",
-                )
-            )
-            np.save(
-                os.path.join(tflite_model_name_dir, "hash"),
-                np.array(mlir_hash),
-            )
-
-
-# Validates whether the file is present or not.
-def is_valid_file(arg):
-    if not os.path.exists(arg):
-        return None
-    else:
-        return arg
-
-
-if __name__ == "__main__":
-    # Note, all of these flags are overridden by the import of args from stable_args.py, flags are duplicated temporarily to preserve functionality
-    # parser = argparse.ArgumentParser()
-    # parser.add_argument(
-    #    "--torch_model_csv",
-    #    type=lambda x: is_valid_file(x),
-    #    default="./tank/torch_model_list.csv",
-    #    help="""Contains the file with torch_model name and args.
-    #         Please see: https://github.com/nod-ai/SHARK/blob/main/tank/torch_model_list.csv""",
-    # )
-    # parser.add_argument(
-    #    "--tf_model_csv",
-    #    type=lambda x: is_valid_file(x),
-    #    default="./tank/tf_model_list.csv",
-    #    help="Contains the file with tf model name and args.",
-    # )
-    # parser.add_argument(
-    #    "--tflite_model_csv",
-    #    type=lambda x: is_valid_file(x),
-    #    default="./tank/tflite/tflite_model_list.csv",
-    #    help="Contains the file with tf model name and args.",
-    # )
-    # parser.add_argument(
-    #    "--ci_tank_dir",
-    #    type=bool,
-    #    default=False,
-    # )
-    # parser.add_argument("--upload", type=bool, default=False)
-
-    # old_args = parser.parse_args()
-
-    home = str(Path.home())
-    WORKDIR = os.path.join(os.path.dirname(__file__), "gen_shark_tank")
-    torch_model_csv = os.path.join(
-        os.path.dirname(__file__), "tank", "torch_model_list.csv"
-    )
-    tf_model_csv = os.path.join(
-        os.path.dirname(__file__), "tank", "tf_model_list.csv"
-    )
-    tflite_model_csv = os.path.join(
-        os.path.dirname(__file__), "tank", "tflite", "tflite_model_list.csv"
-    )
-
-    save_torch_model(torch_model_csv)
-    save_tf_model(tf_model_csv)
-    save_tflite_model(tflite_model_csv)
--- a/inference/CMakeLists.txt
+++ b/inference/CMakeLists.txt
@@ -1,192 +0,0 @@
-# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-cmake_minimum_required(VERSION 3.17)
-
-project(sharkbackend LANGUAGES C CXX)
-
-#
-# Options
-#
-
-option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
-option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
-
-set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
-set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
-set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
-
-if(NOT CMAKE_BUILD_TYPE)
-  set(CMAKE_BUILD_TYPE Release)
-endif()
-
-#
-# Dependencies
-#
-# FetchContent requires us to include the transitive closure of all
-# repos that we depend on so that we can override the tags.
-#
-include(FetchContent)
-
-FetchContent_Declare(
-  repo-common
-  GIT_REPOSITORY https://github.com/triton-inference-server/common.git
-  GIT_TAG ${TRITON_COMMON_REPO_TAG}
-  GIT_SHALLOW ON
-)
-FetchContent_Declare(
-  repo-core
-  GIT_REPOSITORY https://github.com/triton-inference-server/core.git
-  GIT_TAG ${TRITON_CORE_REPO_TAG}
-  GIT_SHALLOW ON
-)
-FetchContent_Declare(
-  repo-backend
-  GIT_REPOSITORY https://github.com/triton-inference-server/backend.git
-  GIT_TAG ${TRITON_BACKEND_REPO_TAG}
-  GIT_SHALLOW ON
-)
-FetchContent_MakeAvailable(repo-common repo-core repo-backend)
-
-#
-# The backend must be built into a shared library. Use an ldscript to
-# hide all symbols except for the TRITONBACKEND API.
-#
-configure_file(src/libtriton_dshark.ldscript libtriton_dshark.ldscript COPYONLY)
-
-add_library(
-  triton-dshark-backend SHARED
-  src/dshark.cc
-  #src/dshark_driver_module.c
-)
-
-add_library(
-  SharkBackend::triton-dshark-backend ALIAS triton-dshark-backend
-)
-
-target_include_directories(
-  triton-dshark-backend
-  PRIVATE
-    ${CMAKE_CURRENT_SOURCE_DIR}/src
-)
-
-list(APPEND CMAKE_MODULE_PATH "${PROJECT_BINARY_DIR}/lib/cmake/mlir")
-
-add_subdirectory(thirdparty/shark-runtime EXCLUDE_FROM_ALL)
-
-target_link_libraries(triton-dshark-backend PRIVATE iree_base_base
-  iree_hal_hal
-  iree_hal_cuda_cuda
-  iree_hal_cuda_registration_registration
-  iree_hal_vmvx_registration_registration
-  iree_hal_dylib_registration_registration
-  iree_modules_hal_hal
-  iree_vm_vm
-  iree_vm_bytecode_module
-  iree_hal_local_loaders_system_library_loader
-  iree_hal_local_loaders_vmvx_module_loader
-  )
-
-target_compile_features(triton-dshark-backend PRIVATE cxx_std_11)
-
-
-target_link_libraries(
-  triton-dshark-backend
-  PRIVATE
-    triton-core-serverapi   # from repo-core
-    triton-core-backendapi  # from repo-core
-    triton-core-serverstub  # from repo-core
-    triton-backend-utils    # from repo-backend
-)
-
-if(WIN32)
-  set_target_properties(
-    triton-dshark-backend PROPERTIES
-    POSITION_INDEPENDENT_CODE ON
-    OUTPUT_NAME triton_dshark
-  )
-else()
-  set_target_properties(
-    triton-dshark-backend PROPERTIES
-    POSITION_INDEPENDENT_CODE ON
-    OUTPUT_NAME triton_dshark
-    LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_dshark.ldscript
-    LINK_FLAGS "-Wl,--version-script libtriton_dshark.ldscript"
-  )
-endif()
-
-
-
-#
-# Install
-#
-include(GNUInstallDirs)
-set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/SharkBackend)
-
-install(
-  TARGETS
-    triton-dshark-backend
-  EXPORT
-    triton-dshark-backend-targets
-  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/dshark
-  RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/dshark
-)
-
-install(
-  EXPORT
-    triton-dshark-backend-targets
-  FILE
-    SharkBackendTargets.cmake
-  NAMESPACE
-    SharkBackend::
-  DESTINATION
-    ${INSTALL_CONFIGDIR}
-)
-
-include(CMakePackageConfigHelpers)
-configure_package_config_file(
-  ${CMAKE_CURRENT_LIST_DIR}/cmake/SharkBackendConfig.cmake.in
-  ${CMAKE_CURRENT_BINARY_DIR}/SharkBackendConfig.cmake
-  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
-)
-
-install(
-  FILES
-  ${CMAKE_CURRENT_BINARY_DIR}/SharkBackendConfig.cmake
-  DESTINATION ${INSTALL_CONFIGDIR}
-)
-
-#
-# Export from build tree
-#
-export(
-  EXPORT triton-dshark-backend-targets
-  FILE ${CMAKE_CURRENT_BINARY_DIR}/SharkBackendTargets.cmake
-  NAMESPACE SharkBackend::
-)
-
-export(PACKAGE SharkBackend)
-
--- a/inference/README.md
+++ b/inference/README.md
@@ -1,100 +0,0 @@
-# SHARK Triton Backend
-
-The triton backend for shark.
-
-# Build
-
-Install SHARK
-
-```
-git clone https://github.com/nod-ai/SHARK.git
-# skip above step if dshark is already installed
-cd SHARK/inference
-```
-
-install dependancies
-
-```
-apt-get install patchelf rapidjson-dev python3-dev
-git submodule update --init
-```
-
-update the submodules of iree
-
-```
-cd thirdparty/shark-runtime
-git submodule update --init
-```
-
-Next, make the backend and install it
-
-```
-cd ../..
-mkdir build && cd build
-cmake -DTRITON_ENABLE_GPU=ON \
-DIREE_HAL_DRIVER_CUDA=ON \
-DIREE_TARGET_BACKEND_CUDA=ON \
-DMLIR_ENABLE_CUDA_RUNNER=ON \
-DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
-DTRITON_BACKEND_REPO_TAG=r22.02 \
-DTRITON_CORE_REPO_TAG=r22.02 \
-DTRITON_COMMON_REPO_TAG=r22.02 ..
-make install
-```
-
-# Incorporating into Triton
-
-There are much more in depth explenations for the following steps in triton's documentation:
-https://github.com/triton-inference-server/server/blob/main/docs/compose.md#triton-with-unsupported-and-custom-backends
-
-There should be a file at /build/install/backends/dshark/libtriton_dshark.so.  You will need to copy it into your triton server image.  
-More documentation is in the link above, but to create the docker image, you need to run the compose.py command in the triton-backend server repo
-
-
-To first build your image, clone the tritonserver repo.
-
-```
-git clone https://github.com/triton-inference-server/server.git
-```
-
-then run `compose.py` to build a docker compose file 
-```
-cd server
-python3 compose.py --repoagent checksum --dry-run
-```
-
-Because dshark is a third party backend, you will need to manually modify the `Dockerfile.compose` to include the dshark backend.  To do this, in the Dockerfile.compose file produced, copy this line.
-the dshark backend will be located in the build folder from earlier under `/build/install/backends`
-
-```
-COPY /path/to/build/install/backends/dshark /opt/tritonserver/backends/dshark
-```
-
-Next run 
-```
-docker build -t tritonserver_custom -f Dockerfile.compose .
-docker run -it --gpus=1 --net=host -v/path/to/model_repos:/models  tritonserver_custom:latest tritonserver --model-repository=/models
-```
-
-where `path/to/model_repos` is where you are storing the models you want to run
-
-if your not using gpus, omit `--gpus=1`
-
-```
-docker run -it  --net=host -v/path/to/model_repos:/models  tritonserver_custom:latest tritonserver --model-repository=/models
-```
-
-# Setting up a model
-
-to include a model in your backend, add a directory with your model name to your model repository directory.  examples of models can be seen here: https://github.com/triton-inference-server/backend/tree/main/examples/model_repos/minimal_models
-
-make sure to adjust the input correctly in the config.pbtxt file, and save a vmfb file under 1/model.vmfb
-
-# CUDA
-
-if you're having issues with cuda, make sure your correct drivers are installed, and that `nvidia-smi` works, and also make sure that the nvcc compiler is on the path.
-
-
-
-
-
--- a/inference/cmake/SharkBackendConfig.cmake.in
+++ b/inference/cmake/SharkBackendConfig.cmake.in
@@ -1,39 +0,0 @@
-# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include(CMakeFindDependencyMacro)
-
-get_filename_component(
-  SHARKBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
-)
-
-list(APPEND CMAKE_MODULE_PATH ${SHARKBACKEND_CMAKE_DIR})
-
-if(NOT TARGET SharkBackend::triton-dshark-backend)
-  include("${SHARKBACKEND_CMAKE_DIR}/SharkBackendTargets.cmake")
-endif()
-
-set(SHARKBACKEND_LIBRARIES SharkBackend::triton-dshark-backend)
--- a/inference/src/dshark.cc
+++ b/inference/src/dshark.cc
--- a/inference/src/libtriton_dshark.ldscript
+++ b/inference/src/libtriton_dshark.ldscript
@@ -1,30 +0,0 @@
-# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-{
-  global:
-    TRITONBACKEND_*;
-  local: *;
-};
--- a/inference/thirdparty/shark-runtime
+++ b/inference/thirdparty/shark-runtime
--- a/package-index/index.html
+++ b/package-index/index.html
@@ -0,0 +1,45 @@
+<!DOCTYPE html>
+<html>
+  <body>
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230130.481/shark_sd_20230130_481.exe'>shark_sd_20230130_481.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230130.481/shark_sd_cli_20230130_481.exe'>shark_sd_cli_20230130_481.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230129.479/shark_sd_20230129_479.exe'>shark_sd_20230129_479.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230129.479/shark_sd_cli_20230129_479.exe'>shark_sd_cli_20230129_479.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230129.480/shark_sd_20230129_480.exe'>shark_sd_20230129_480.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230129.480/shark_sd_cli_20230129_480.exe'>shark_sd_cli_20230129_480.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230129.478/shark_sd_20230129_478.exe'>shark_sd_20230129_478.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230129.478/shark_sd_cli_20230129_478.exe'>shark_sd_cli_20230129_478.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230128.477/shark_sd_20230128_477.exe'>shark_sd_20230128_477.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230128.477/shark_sd_cli_20230128_477.exe'>shark_sd_cli_20230128_477.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230127.476/shark_sd_20230127_476.exe'>shark_sd_20230127_476.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230127.476/shark_sd_cli_20230127_476.exe'>shark_sd_cli_20230127_476.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230126.475/shark_sd_20230126_475.exe'>shark_sd_20230126_475.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230126.475/shark_sd_cli_20230126_475.exe'>shark_sd_cli_20230126_475.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230125.474/shark_sd_20230125_474.exe'>shark_sd_20230125_474.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230125.474/shark_sd_cli_20230125_474.exe'>shark_sd_cli_20230125_474.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230125.473/shark_sd_20230125_473.exe'>shark_sd_20230125_473.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230125.473/shark_sd_cli_20230125_473.exe'>shark_sd_cli_20230125_473.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230125.472/shark_sd_20230125_472.exe'>shark_sd_20230125_472.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230125.471/shark_sd_20230125_471.exe'>shark_sd_20230125_471.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230125.468/shark_sd_20230125_468.exe'>shark_sd_20230125_468.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230124.470/shark_sd_20230124_470.exe'>shark_sd_20230124_470.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230124.470/shark_sd_cli_20230124_470.exe'>shark_sd_cli_20230124_470.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230124.469/shark_sd_20230124_469.exe'>shark_sd_20230124_469.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230124.467/shark_sd_20230124_467.exe'>shark_sd_20230124_467.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230124.466/shark_sd_20230124_466.exe'>shark_sd_20230124_466.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230124.462/shark_sd_20230124_462.exe'>shark_sd_20230124_462.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230123.461/shark_sd_20230123_461.exe'>shark_sd_20230123_461.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230123.460/shark_sd_20230123_460.exe'>shark_sd_20230123_460.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230122.459/shark_sd_20230122_459.exe'>shark_sd_20230122_459.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230122.458/shark_sd_20230122_458.exe'>shark_sd_20230122_458.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230122.457/shark_sd_20230122_457.exe'>shark_sd_20230122_457.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230121.456/shark_sd_20230121_456.exe'>shark_sd_20230121_456.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230120.455/shark_sd_20230120_455.exe'>shark_sd_20230120_455.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230119.454/shark_sd_20230119_454.exe'>shark_sd_20230119_454.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230118.453/shark_sd_20230118_453.exe'>shark_sd_20230118_453.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230117.452/shark_sd_20230117_452.exe'>shark_sd_20230117_452.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230116.451/shark_sd_20230116_451.exe'>shark_sd_20230116_451.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230115.450/shark_sd_20230115_450.exe'>shark_sd_20230115_450.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230114.449/shark_sd_20230114_449.exe'>shark_sd_20230114_449.exe</a><br />
+  </body>
+</html>
--- a/process_skipfiles.py
+++ b/process_skipfiles.py
@@ -1,34 +0,0 @@
-# This script will toggle the comment/uncommenting aspect for dealing
-# with __file__ AttributeError arising in case of a few modules in
-# `torch/_dynamo/skipfiles.py` (within shark.venv)
-
-from distutils.sysconfig import get_python_lib
-import fileinput
-from pathlib import Path
-
-path_to_skipfiles = Path(get_python_lib() + "/torch/_dynamo/skipfiles.py")
-
-modules_to_comment = ["abc,", "os,", "posixpath,", "_collections_abc,"]
-startMonitoring = 0
-for line in fileinput.input(path_to_skipfiles, inplace=True):
-    if "SKIP_DIRS = " in line:
-        startMonitoring = 1
-        print(line, end="")
-    elif startMonitoring in [1, 2]:
-        if "]" in line:
-            startMonitoring += 1
-            print(line, end="")
-        else:
-            flag = True
-            for module in modules_to_comment:
-                if module in line:
-                    if not line.startswith("#"):
-                        print(f"#{line}", end="")
-                    else:
-                        print(f"{line[1:]}", end="")
-                    flag = False
-                    break
-            if flag:
-                print(line, end="")
-    else:
-        print(line, end="")
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,12 +0,0 @@
-[build-system]
-requires = [
-    "setuptools>=42",
-    "wheel",
-    "packaging",
-
-    "numpy>=1.22.4",
-    "torch-mlir>=20221021.633",
-    "iree-compiler>=20221022.190",
-    "iree-runtime>=20221022.190",
-]
-build-backend = "setuptools.build_meta"
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,3 +0,0 @@
-[pytest]
-addopts = --verbose -p no:warnings
-norecursedirs = inference tank/tflite examples benchmarks shark 
--- a/requirements-importer-macos.txt
+++ b/requirements-importer-macos.txt
@@ -1,45 +0,0 @@
-f https://download.pytorch.org/whl/nightly/cpu/
--pre
-
-numpy
-torch
-torchvision
-
-tqdm
-
-#iree-compiler  | iree-runtime should already be installed
-#these dont work ok osx
-#iree-tools-tflite
-#iree-tools-xla
-#iree-tools-tf
-
-# TensorFlow and JAX.
-gin-config
-tensorflow-macos
-tensorflow-metal
-#tf-models-nightly
-#tensorflow-text-nightly
-transformers
-tensorflow-probability
-#jax[cpu]
-
-# tflitehub dependencies.
-Pillow
-
-# web dependecies.
-gradio
-altair
-
-# Testing and support.
-#lit
-#pyyaml
-
-#ONNX and ORT for benchmarking
-#--extra-index-url https://test.pypi.org/simple/
-#protobuf
-#coloredlogs
-#flatbuffers
-#sympy
-#psutil
-#onnx-weekly
-#ort-nightly
--- a/requirements-importer.txt
+++ b/requirements-importer.txt
@@ -1,50 +0,0 @@
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
--pre
-
-numpy>1.22.4
-torchvision
-pytorch-triton
-tabulate
-
-tqdm
-
-#iree-compiler  | iree-runtime should already be installed
-iree-tools-tflite
-iree-tools-xla
-iree-tools-tf
-
-# TensorFlow and JAX.
-gin-config
-tensorflow>=2.10.1
-keras>=2.10
-#tf-models-nightly
-#tensorflow-text-nightly
-transformers
-diffusers
-#tensorflow-probability
-#jax[cpu]
-
-
-# tflitehub dependencies.
-Pillow
-
-# Testing and support.
-lit
-pyyaml
-python-dateutil
-sacremoses
-
-# web dependecies.
-gradio
-altair
-scipy
-
-#ONNX and ORT for benchmarking
-#--extra-index-url https://test.pypi.org/simple/
-#protobuf
-#coloredlogs
-#flatbuffers
-#sympy
-#psutil
-#onnx-weekly
-#ort-nightly
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,29 +0,0 @@
-setuptools
-wheel
-
-# SHARK Runner
-tqdm
-
-# SHARK Downloader
-google-cloud-storage
-
-# Testing
-pytest
-pytest-xdist
-pytest-forked
-Pillow
-parameterized
-
-# Add transformers, diffusers and scipy since it most commonly used
-transformers
-diffusers @ git+https://github.com/huggingface/diffusers@4c52982a0be7dd850fb9eac55b11509846e4bbe6
-scipy
-ftfy
-gradio
-altair
-omegaconf
-safetensors
-
-# Keep PyInstaller at the end. Sometimes Windows Defender flags it but most folks can continue even if it errors
-pefile
-pyinstaller
--- a/setup.py
+++ b/setup.py
@@ -1,45 +0,0 @@
-from setuptools import find_packages
-from setuptools import setup
-
-import os
-import glob
-
-with open("README.md", "r", encoding="utf-8") as fh:
-    long_description = fh.read()
-
-PACKAGE_VERSION = os.environ.get("SHARK_PACKAGE_VERSION") or "0.0.5"
-backend_deps = []
-if "NO_BACKEND" in os.environ.keys():
-    backend_deps = [
-        "iree-compiler>=20221022.190",
-        "iree-runtime>=20221022.190",
-    ]
-
-setup(
-    name="nodai-SHARK",
-    version=f"{PACKAGE_VERSION}",
-    description="SHARK provides a High Performance Machine Learning Framework",
-    author="nod.ai",
-    author_email="stdin@nod.ai",
-    url="https://nod.ai",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    project_urls={
-        "Code": "https://github.com/nod-ai/SHARK",
-        "Bug Tracker": "https://github.com/nod-ai/SHARK/issues",
-    },
-    classifiers=[
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: MIT License",
-        "Operating System :: OS Independent",
-    ],
-    packages=find_packages(exclude=("examples")),
-    python_requires=">=3.9",
-    data_files=glob.glob("apps/stable_diffusion/resources/**"),
-    install_requires=[
-        "numpy",
-        "PyYAML",
-        "torch-mlir>=20221021.633",
-    ]
-    + backend_deps,
-)
--- a/setup_venv.ps1
+++ b/setup_venv.ps1
@@ -1,94 +0,0 @@
-<#
-.SYNOPSIS
-  A script to update and install the SHARK runtime and its dependencies.
-
-.DESCRIPTION
-  This script updates and installs the SHARK runtime and its dependencies.
-  It checks the Python version installed and installs any required build
-  dependencies into a Python virtual environment.
-  If that environment does not exist, it creates it.
-  
-.PARAMETER update-src
-  git pulls latest version
-
-.PARAMETER force
-  removes and recreates venv to force update of all dependencies
-  
-.EXAMPLE
-  .\setup_venv.ps1 --force
-
-.EXAMPLE
-  .\setup_venv.ps1 --update-src
-
-.INPUTS
-  None
-
-.OUTPUTS
-  None
-
-#>
-
-param([string]$arguments)
-
-if ($arguments -eq "--update-src"){
-	git pull
-}
-
-if ($arguments -eq "--force"){
-	if (Test-Path env:VIRTUAL_ENV) {
-        Write-Host "deactivating..."
-        Deactivate
-    }
-    
-    if (Test-Path .\shark.venv\) {
-        Write-Host "removing and recreating venv..."
-        Remove-Item .\shark.venv -Force -Recurse
-        if (Test-Path .\shark.venv\) {
-            Write-Host 'could not remove .\shark-venv - please try running ".\setup_venv.ps1 --force" again!'
-            break
-        }
-    }
-}
-
-# redirect stderr into stdout
-$p = &{python -V} 2>&1
-# check if an ErrorRecord was returned
-$version = if($p -is [System.Management.Automation.ErrorRecord])
-{
-    # grab the version string from the error message
-    $p.Exception.Message
-}
-else
-{
-    # otherwise return complete Python list
-    $PyVer = py --list
-}
-
-# deactivate any activated venvs
-if ($PyVer -like "*venv*")
-{
-  deactivate # make sure we don't update the wrong venv
-  $PyVer = py --list # update list
-}
-
-Write-Host "Python versions found are"
-Write-Host ($PyVer | Out-String) # formatted output with line breaks
-if (!($PyVer -like "*3.11*")) # if 3.11 is not in list
-{
-    Write-Host "Please install Python 3.11 and try again"
-    break
-}
-
-Write-Host "Installing Build Dependencies"
-# make sure we really use 3.11 from list, even if it's not the default.
-py -3.11 -m venv .\shark.venv\
-.\shark.venv\Scripts\activate
-python -m pip install --upgrade pip
-pip install wheel
-pip install -r requirements.txt
-pip install --pre torch-mlir torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://llvm.github.io/torch-mlir/package-index/
-pip install --upgrade -f https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html iree-compiler iree-runtime
-Write-Host "Building SHARK..."
-pip install -e . -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html
-Write-Host "Build and installation completed successfully"
-Write-Host "Source your venv with ./shark.venv/Scripts/activate"
--- a/setup_venv.sh
+++ b/setup_venv.sh
@@ -1,154 +0,0 @@
-#!/bin/bash
-# Sets up a venv suitable for running samples.
-# e.g:
-# ./setup_venv.sh  #setup a default $PYTHON3 shark.venv
-# Environment Variables by the script.
-# PYTHON=$PYTHON3.10 ./setup_venv.sh  #pass a version of $PYTHON to use
-# VENV_DIR=myshark.venv #create a venv called myshark.venv
-# USE_IREE=1 #use stock IREE instead of Nod.ai's SHARK build
-# IMPORTER=1 #Install importer deps
-# BENCHMARK=1 #Install benchmark deps
-# NO_BACKEND=1 #Don't install iree or shark backend
-# if you run the script from a conda env it will install in your conda env
-
-TD="$(cd $(dirname $0) && pwd)"
-if [ -z "$PYTHON" ]; then
-  PYTHON="$(which python3)"
-fi
-
-function die() {
-  echo "Error executing command: $*"
-  exit 1
-}
-
-PYTHON_VERSION_X_Y=`${PYTHON} -c 'import sys; version=sys.version_info[:2]; print("{0}.{1}".format(*version))'`
-
-echo "Python: $PYTHON"
-echo "Python version: $PYTHON_VERSION_X_Y"
-
-if [[ -z "${CONDA_PREFIX}" ]]; then
-  # Not a conda env. So create a new VENV dir
-  VENV_DIR=${VENV_DIR:-shark.venv}
-  echo "Using pip venv.. Setting up venv dir: $VENV_DIR"
-  $PYTHON -m venv "$VENV_DIR" || die "Could not create venv."
-  source "$VENV_DIR/bin/activate" || die "Could not activate venv"
-  PYTHON="$(which python3)"
-else
-  echo "Found conda env $CONDA_DEFAULT_ENV. Running pip install inside the conda env"
-fi
-
-Red=`tput setaf 1`
-Green=`tput setaf 2`
-Yellow=`tput setaf 3`
-
-# Assume no binary torch-mlir.
-# Currently available for macOS m1&intel (3.11) and Linux(3.8,3.10,3.11)
-torch_mlir_bin=false
-if [[ $(uname -s) = 'Darwin' ]]; then
-  echo "${Yellow}Apple macOS detected"
-  if [[ $(uname -m) == 'arm64' ]]; then
-    echo "${Yellow}Apple M1 Detected"
-    hash rustc 2>/dev/null
-    if [ $? -eq 0 ];then
-      echo "${Green}rustc found to compile HF tokenizers"
-    else
-      echo "${Red}Could not find rustc" >&2
-      echo "${Red}Please run:"
-      echo "${Red}curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh"
-      exit 1
-    fi
-  fi
-  echo "${Yellow}Run the following commands to setup your SSL certs for your Python version if you see SSL errors with tests"
-  echo "${Yellow}/Applications/Python\ 3.XX/Install\ Certificates.command"
-  if [ "$PYTHON_VERSION_X_Y" == "3.11" ]; then
-    torch_mlir_bin=true
-  fi
-elif [[ $(uname -s) = 'Linux' ]]; then
-  echo "${Yellow}Linux detected"
-  if [ "$PYTHON_VERSION_X_Y" == "3.8" ]  || [ "$PYTHON_VERSION_X_Y" == "3.10" ] || [ "$PYTHON_VERSION_X_Y" == "3.11" ] ; then
-    torch_mlir_bin=true
-  fi
-else
-  echo "${Red}OS not detected. Pray and Play"
-fi
-
-# Upgrade pip and install requirements.
-$PYTHON -m pip install --upgrade pip || die "Could not upgrade pip"
-$PYTHON -m pip install --upgrade -r "$TD/requirements.txt"
-if [ "$torch_mlir_bin" = true ]; then
-  if [[ $(uname -s) = 'Darwin' ]]; then
-    echo "MacOS detected. Installing torch-mlir from .whl, to avoid dependency problems with torch."
-    $PYTHON -m pip install --pre --no-cache-dir  torch-mlir -f https://llvm.github.io/torch-mlir/package-index/ -f https://download.pytorch.org/whl/nightly/torch/
-  else
-    $PYTHON -m pip install --pre torch-mlir -f https://llvm.github.io/torch-mlir/package-index/
-    if [ $? -eq 0 ];then
-      echo "Successfully Installed torch-mlir"
-    else
-      echo "Could not install torch-mlir" >&2
-    fi
-  fi
-else
-  echo "${Red}No binaries found for Python $PYTHON_VERSION_X_Y on $(uname -s)"
-  echo "${Yello}Python 3.11 supported on macOS and 3.8,3.10 and 3.11 on Linux"
-  echo "${Red}Please build torch-mlir from source in your environment"
-  exit 1
-fi
-if [[ -z "${USE_IREE}" ]]; then
-  rm .use-iree
-  RUNTIME="https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html"
-else
-  touch ./.use-iree
-  RUNTIME="https://iree-org.github.io/iree/pip-release-links.html"
-fi
-if [[ -z "${NO_BACKEND}" ]]; then
-  echo "Installing ${RUNTIME}..."
-  $PYTHON -m pip install --upgrade --find-links ${RUNTIME} iree-compiler iree-runtime
-else
-  echo "Not installing a backend, please make sure to add your backend to PYTHONPATH"
-fi
-
-if [[ ! -z "${IMPORTER}" ]]; then
-  echo "${Yellow}Installing importer tools.."
-  if [[ $(uname -s) = 'Linux' ]]; then
-    echo "${Yellow}Linux detected.. installing Linux importer tools"
-    #Always get the importer tools from upstream IREE
-    $PYTHON -m pip install --no-warn-conflicts --upgrade -r "$TD/requirements-importer.txt" -f https://iree-org.github.io/iree/pip-release-links.html --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-  elif [[ $(uname -s) = 'Darwin' ]]; then
-    echo "${Yellow}macOS detected.. installing macOS importer tools"
-    #Conda seems to have some problems installing these packages and hope they get resolved upstream.
-    $PYTHON -m pip install --no-warn-conflicts --upgrade -r "$TD/requirements-importer-macos.txt" -f ${RUNTIME} --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-  fi
-fi
-
-$PYTHON -m pip install --no-warn-conflicts -e . -f https://llvm.github.io/torch-mlir/package-index/ -f ${RUNTIME} -f https://download.pytorch.org/whl/nightly/torch/
-
-if [[ $(uname -s) = 'Linux' && ! -z "${BENCHMARK}" ]]; then
-  T_VER=$($PYTHON -m pip show torch | grep Version)
-  TORCH_VERSION=${T_VER:9:17}
-  TV_VER=$($PYTHON -m pip show torchvision | grep Version)
-  TV_VERSION=${TV_VER:9:18}
-  $PYTHON -m pip uninstall -y torch torchvision
-  $PYTHON -m pip install -U --pre --no-warn-conflicts triton
-  $PYTHON -m pip install --no-deps https://download.pytorch.org/whl/nightly/cu117/torch-${TORCH_VERSION}%2Bcu117-cp311-cp311-linux_x86_64.whl https://download.pytorch.org/whl/nightly/cu117/torchvision-${TV_VERSION}%2Bcu117-cp311-cp311-linux_x86_64.whl
-  if [ $? -eq 0 ];then
-    echo "Successfully Installed torch + cu117."
-  else
-    echo "Could not install torch + cu117." >&2
-  fi
-fi
-
-if [[ ! -z "${ONNX}" ]]; then
-  echo "${Yellow}Installing ONNX and onnxruntime for benchmarks..."
-  $PYTHON -m pip install onnx onnxruntime psutil
-  if [ $? -eq 0 ];then
-    echo "Successfully installed ONNX and ONNX runtime."
-  else
-    echo "Could not install ONNX." >&2
-  fi
-fi
-
-if [[ -z "${CONDA_PREFIX}" ]]; then
-  echo "${Green}Before running examples activate venv with:"
-  echo "  ${Green}source $VENV_DIR/bin/activate"
-fi
-
--- a/shark/init.py
+++ b/shark/init.py
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
nod-ai	d9c62e547c	Update releases.	2023-01-31 15:15:57 +00:00
nod-ai	d84a86f6d2	Update releases.	2022-12-07 06:07:38 +00:00
nod-ai	dadd6640fb	Update releases.	2022-11-01 18:31:37 +00:00
nod-ai	23501d34a1	Update releases.	2022-10-13 18:12:28 +00:00
nod-team	9b9eef1d22	Update releases.	2022-10-12 16:53:10 +00:00
Ean Garvey	e4b156f3b4	Add dummy index.html	2022-10-12 16:52:09 +00:00
Ean Garvey	ce26492a10	Remove SHARK source code for gh-pages workflow branch.	2022-10-12 16:37:01 +00:00