Update releases.

2026-01-11 23:08:19 -05:00 · 2023-01-31 15:15:57 +00:00 · 2022-12-07 06:07:38 +00:00 · 2022-11-01 18:31:37 +00:00 · 2022-10-13 18:12:28 +00:00 · 2022-10-12 16:53:10 +00:00
149 changed files with 205 additions and 10744 deletions
--- a/.github/workflows/gh-pages-releases.yml
+++ b/.github/workflows/gh-pages-releases.yml
@@ -0,0 +1,37 @@
+# See: https://github.com/llvm/torch-mlir/issues/1374
+name: Publish releases page
+
+on:
+  workflow_dispatch:
+
+jobs:
+  scrape_and_publish_releases:
+    name: "Scrape and publish releases"
+    runs-on: ubuntu-latest
+
+    # Don't run this in everyone's forks.
+    if: github.repository == 'nod-ai/SHARK'
+
+    steps:
+      - name: Checking out repository
+        uses: actions/checkout@v2
+        with:
+          token: ${{ secrets.NODAI_INVOCATION_TOKEN }}
+      - name: Run scrape releases script
+        run: python ./build_tools/scrape_releases.py nod-ai SHARK > /tmp/index.html
+        shell: bash
+      - run: git fetch --all
+      - run: git switch github-pages
+      - run: git config --global user.email "none@none.com"
+      - run: git config --global user.name "nod-team"
+      - run: mv /tmp/index.html package-index/index.html
+      - run: git add package-index/index.html
+
+      # Only try to make a commit if the file has changed.
+      - run: git diff --cached --exit-code || git commit -m "Update releases."
+
+      - name: GitHub Push
+        uses: ad-m/github-push-action@v0.6.0
+        with:
+          github_token: ${{ secrets.NODAI_INVOCATION_TOKEN }}
+          branch: github-pages
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -11,11 +11,12 @@ on:
 jobs:
  build:

-    runs-on: ubuntu-latest
+    runs-on: a100
    strategy:
      fail-fast: false
      matrix:
        python-version: ["3.10"]
+        backend: [IREE, SHARK]

    steps:
    - uses: actions/checkout@v3
@@ -38,6 +39,10 @@ jobs:
        tag_name="${package_version}"
        echo "package_version=${package_version}" >> $GITHUB_ENV
        echo "tag_name=${tag_name}" >> $GITHUB_ENV    
+    - name: Set Environment Variables
+      run: |
+        echo "SHORT_SHA=`git rev-parse --short=4 HEAD`" >> $GITHUB_ENV
+        echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
    - name: Create Release
      id: create_release
      uses: actions/create-release@v1
@@ -49,43 +54,76 @@ jobs:
        body: |
          Automatic snapshot release of nod.ai SHARK.
        draft: true
-        prerelease: false        
+        prerelease: false
+    - name: Find Torch-MLIR Release
+      run: |
+        TM_HTML_URL="$(python3 -c "import urllib.request, json, sys; u=json.loads(urllib.request.urlopen('https://api.github.com/repos/llvm/torch-mlir/releases/latest').read().decode()).get('html_url', False); print(u) if u else sys.exit(1);")"
+        TM_RELEASE_DIR=${TM_HTML_URL/"tag"/"expanded_assets"}
+        echo "TM_RELEASE_DIR=${TM_RELEASE_DIR}" >> $GITHUB_ENV
    - name: Install dependencies
      run: |
+        echo "Torch-MLIR Release DIR is ${{ env.TM_RELEASE_DIR }}"
        python -m pip install --upgrade pip
-        python -m pip install flake8 pytest yapf toml
-        if [ -f requirements.txt ]; then pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/nightly/cpu  -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/SHARK-Runtime/releases; fi
+        python -m pip install flake8 pytest toml
+        if [ -f requirements.txt ]; then pip install -r requirements.txt -f ${{ env.TM_RELEASE_DIR }} -f https://github.com/nod-ai/SHARK-Runtime/releases; fi
    - name: Lint with flake8
      run: |
        # stop the build if there are Python syntax errors or undefined names
        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude shark.venv,lit.cfg.py 
        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude shark.venv,lit.cfg.py 
-        yapf -i --style .style.yapf shark/*.py
-
-    - name: Build and validate the package
+    - name: Build and validate the IREE package
+      if: ${{ matrix.backend == 'IREE' }}
      run: |
        cd $GITHUB_WORKSPACE
-        IMPORTER=1 ./setup_venv.sh
-        source shark.venv/bin/activate
+        USE_IREE=1 VENV_DIR=iree.venv ./setup_venv.sh
+        source iree.venv/bin/activate
        package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
        SHARK_PACKAGE_VERSION=${package_version} \
-        pip wheel -v -w wheelhouse . --extra-index-url https://download.pytorch.org/whl/nightly/cpu  -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/SHARK-Runtime/releases
+        pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f ${{ env.TM_RELEASE_DIR }} -f https://github.com/iree-org/iree/releases
        # Install the built wheel
        pip install ./wheelhouse/nodai*
        # Validate the Models
-        pytest -k 'not benchmark' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/
+        /bin/bash "$GITHUB_WORKSPACE/build_tools/populate_sharktank_ci.sh"
+        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="./gen_shark_tank/" tank/test_models.py |
+          tail -n 1 |
+          tee -a pytest_results.txt
+        if !(grep -Fxq " failed" pytest_results.txt) 
+          then 
+            export SHA=$(git log -1 --format='%h')
+            gsutil -m cp -r $GITHUB_WORKSPACE/gen_shark_tank/* gs://shark_tank/$SHA
+            gsutil -m cp -r gs://shark_tank/$SHA/* gs://shark_tank/latest/
+        fi
+        rm -rf ./wheelhouse/nodai*
+
+    - name: Build and validate the SHARK Runtime package
+      if: ${{ matrix.backend == 'SHARK' }}
+      run: |
+        cd $GITHUB_WORKSPACE
+        ./setup_venv.sh
+        source shark.venv/bin/activate
+        package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
+        SHARK_PACKAGE_VERSION=${package_version} \
+        pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f ${{ env.TM_RELEASE_DIR }} -f https://github.com/nod-ai/SHARK-Runtime/releases
+        # Install the built wheel
+        pip install ./wheelhouse/nodai*
+        # Validate the Models
+        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="./gen_shark_tank/" tank/test_models.py |
+          tail -n 1 |
+          tee -a pytest_results.txt
    
    - name: Upload Release Assets
+      if: ${{ matrix.backend == 'SHARK' }}
      id: upload-release-assets
      uses: dwenegar/upload-release-assets@v1
      env:
        GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
      with:
        release_id: ${{ steps.create_release.outputs.id }}
-        assets_path: ./wheelhouse/nodai_*.whl
+        assets_path: ${GITHUB_WORKSPACE}/wheelhouse/nodai_*.whl

    - name: Publish Release
+      if: ${{ matrix.backend == 'SHARK' }}
      id: publish_release
      uses: eregon/publish-release@v1
      env:
--- a/.github/workflows/test-models.yml
+++ b/.github/workflows/test-models.yml
@@ -1,7 +1,7 @@
 # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

-name: Validate torch-models on Shark Runtime
+name: Validate Models on Shark Runtime

 on:
  push:
@@ -11,92 +11,103 @@ on:
  workflow_dispatch:

 jobs:
-  build-linux:
-
-    runs-on: ubuntu-latest
+  build-validate:
    strategy:
-      fail-fast: false
+      fail-fast: true
      matrix:
+        os: [icelake, a100, MacStudio, ubuntu-latest]
+        suite: [cpu,cuda,vulkan]
        python-version: ["3.10"]
+        include:
+          - os: ubuntu-latest
+            suite: lint
+        exclude:
+          - os: ubuntu-latest
+            suite: vulkan
+          - os: ubuntu-latest
+            suite: cuda
+          - os: ubuntu-latest
+            suite: cpu
+          - os: MacStudio
+            suite: cuda
+          - os: MacStudio
+            suite: cpu
+          - os: MacStudio
+            suite: vulkan
+          - os: icelake
+            suite: vulkan
+          - os: icelake
+            suite: cuda
+          - os: a100
+            suite: cpu
+
+    runs-on: ${{ matrix.os }}

    steps:
    - uses: actions/checkout@v3
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
-      with:
-        python-version: ${{ matrix.python-version }}
    
-    - name: Setup pip cache
-      uses: actions/cache@v3
+    - name: Set Environment Variables
+      run: |
+        echo "SHORT_SHA=`git rev-parse --short=4 HEAD`" >> $GITHUB_ENV
+        echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
+        
+    - name: Set up Python Version File ${{ matrix.python-version }}
+      if: matrix.os == 'a100' ||  matrix.os == 'ubuntu-latest' ||  matrix.os == 'icelake'
+      run: |
+        # See https://github.com/actions/setup-python/issues/433
+        echo ${{ matrix.python-version }} >> $GITHUB_WORKSPACE/.python-version
+    
+    - name: Set up Python ${{ matrix.python-version }}
+      if: matrix.os == 'a100' ||  matrix.os == 'ubuntu-latest' ||  matrix.os == 'icelake'
+      uses: actions/setup-python@v4
      with:
-        path: ~/.cache/pip
-        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
-        restore-keys: |
-          ${{ runner.os }}-pip-
-
+        python-version: '${{ matrix.python-version }}'
+        #cache: 'pip'
+        #cache-dependency-path: |
+        #  **/requirements-importer.txt
+        #  **/requirements.txt
+          
    - name: Install dependencies
+      if: matrix.suite == 'lint'
      run: |
        python -m pip install --upgrade pip
-        python -m pip install flake8 pytest yapf toml
-
+        python -m pip install flake8 pytest toml black
+        
    - name: Lint with flake8
+      if: matrix.suite == 'lint'
      run: |
+        # black format check
+        black --version
+        black --line-length 79 --check .
        # stop the build if there are Python syntax errors or undefined names
        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude lit.cfg.py
        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude lit.cfg.py
-        yapf -i --style .style.yapf shark/*.py

-    - name: Validate Models
+    - name: Validate Models on CPU
+      if: matrix.suite == 'cpu'
      run: |
        cd $GITHUB_WORKSPACE
-        IMPORTER=1 ./setup_venv.sh
+        PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest -k 'not benchmark' --ignore=tank/tf/ --ignore=shark/tests/test_shark_importer.py
-        
-  perf-macOS:
-    runs-on: MacStudio
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.10"]
+        pytest --benchmark --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k cpu
+        gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv
+        gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cpu_latest.csv

-    steps:
-    - uses: actions/checkout@v3   
-    - name: Validate Models dependencies
+    - name: Validate Models on NVIDIA GPU
+      if: matrix.suite == 'cuda'
      run: |
        cd $GITHUB_WORKSPACE
-        PYTHON=python3.10 IMPORTER=1 ./setup_venv.sh
+        PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest -k 'not benchmark' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=tank/tf/ --ignore=shark/tests/test_shark_importer.py 
-        
-  perf-linux:
-    runs-on: a100
-    timeout-minutes: 45
-    continue-on-error: true
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.10"]
+        pytest --benchmark --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k cuda
+        gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv
+        gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cuda_latest.csv

-    steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
-      with:
-        python-version: ${{ matrix.python-version }}
-    
-    - name: Setup pip cache
-      uses: actions/cache@v3
-      with:
-        path: ~/.cache/pip
-        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
-        restore-keys: |
-          ${{ runner.os }}-pip-
-
-    - name: Validate Models
+    - name: Validate Vulkan Models
+      if: matrix.suite == 'vulkan'
      run: |
        cd $GITHUB_WORKSPACE
-        IMPORTER=1 ./setup_venv.sh
+        PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/
+        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k vulkan
--- a/.gitignore
+++ b/.gitignore
@@ -162,6 +162,7 @@ cython_debug/

 # Shark related artefacts
 *venv/
+shark_tmp/

 # ORT related artefacts
 cache_models/
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,4 +0,0 @@
-[submodule "inference/thirdparty/shark-runtime"]
-	path = inference/thirdparty/shark-runtime
-	url =https://github.com/nod-ai/SHARK-Runtime.git
-	branch = shark-06032022
--- a/.style.yapf
+++ b/.style.yapf
@@ -1,3 +0,0 @@
-[style]
-  based_on_style = google
-  column_limit = 80
--- a/README.md
+++ b/README.md
@@ -1,260 +0,0 @@
-# SHARK
-
-High Performance Machine Learning and Data Analytics for CPUs, GPUs, Accelerators and Heterogeneous Clusters
-
-[![Nightly Release](https://github.com/nod-ai/SHARK/actions/workflows/nightly.yml/badge.svg)](https://github.com/nod-ai/SHARK/actions/workflows/nightly.yml)
-[![Validate torch-models on Shark Runtime](https://github.com/nod-ai/SHARK/actions/workflows/test-models.yml/badge.svg)](https://github.com/nod-ai/SHARK/actions/workflows/test-models.yml)
-
-## Communication Channels
-
-*   [SHARK Discord server](https://discord.gg/RUqY2h2s9u): Real time discussions with the SHARK team and other users
-*   [GitHub issues](https://github.com/nod-ai/SHARK/issues): Feature requests, bugs etc
-
-
-## Installation
-
-<details>
-  <summary>Installation (Linux and macOS)</summary>
-  
-### Setup a new pip Virtual Environment
-
-This step sets up a new VirtualEnv for Python
-  
-```shell
-python --version #Check you have 3.7->3.10 on Linux or 3.10 on macOS
-python -m venv shark_venv
-source shark_venv/bin/activate
-
-# If you are using conda create and activate a new conda env
-
-# Some older pip installs may not be able to handle the recent PyTorch deps
-python -m pip install --upgrade pip
-```
-
-*macOS Metal* users please install https://sdk.lunarg.com/sdk/download/latest/mac/vulkan-sdk.dmg
-
-### Install SHARK
-  
-This step pip installs SHARK and related packages on Linux Python 3.7, 3.8, 3.9, 3.10 and macOS Python 3.10
-
-```shell
-pip install nodai-shark -f https://github.com/nod-ai/SHARK/releases -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/shark-runtime/releases --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-```
-If you are on an Intel macOS machine you need this [workaround](https://github.com/nod-ai/SHARK/issues/102) for an upstream issue.
-
-### Download and run Resnet50 sample
-    
-```shell
-curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/resnet50_script.py
-#Install deps for test script
-pip install --pre torch torchvision torchaudio tqdm pillow --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-python ./resnet50_script.py --device="cpu"  #use cuda or vulkan or metal 
-```
-        
-### Download and run BERT (MiniLM) sample
-```shell
-curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/minilm_jit.py
-#Install deps for test script
-pip install transformers torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-python ./minilm_jit.py --device="cpu"  #use cuda or vulkan or metal 
-```
-</details>
-
-
-<details>
-  <summary>Source Installation</summary>
-
-## Check out the code
-
-```shell
-git clone https://github.com/nod-ai/SHARK.git 
-```
-
-## Setup your Python VirtualEnvironment and Dependencies
-```shell
-# Setup venv and install necessary packages (torch-mlir, nodLabs/Shark, ...).
-./setup_venv.sh
-# Please activate the venv after installation.
-```
-
-### Run a demo script
-```shell
-python -m  shark.examples.shark_inference.resnet50_script --device="cpu" # Use gpu | vulkan
-```
-
-
-### Run all model tests on CPU/GPU/VULKAN/Metal
-```shell
-pytest shark/tests/models
-
-# If on Linux for quicker results:
-pytest shark/tests/models -n auto
-```
-
-### Run all model benchmark tests on CPU/GPU/VULKAN/Metal
-```shell
-pytest shark/tests/benchmarks
-```
-</details>
-
-
-<details>
-  <summary>API Reference</summary>
-
-### Shark Inference API
-
-```
-from shark_runner import SharkInference
-
-shark_module = SharkInference(
-        module = model class.
-        (input,)  = inputs to model (must be a torch-tensor)
-        dynamic (boolean) = Pass the input shapes as static or dynamic.
-        device = `cpu`, `gpu` or `vulkan` is supported.
-        tracing_required = (boolean) = Jit trace the module with the given input, useful in the case where jit.script doesn't work. )
-shark_module.set_frontend("pytorch") # Use tensorflow, mhlo, linalg, tosa
-shark_module.compile()
-
-result = shark_module.forward(inputs)
-```
-
-
-### Example demonstrating running MHLO IR.
-
-```
-from shark.shark_inference import SharkInference
-import numpy as np
-
-mhlo_ir = r"""builtin.module  {
-      func.func @forward(%arg0: tensor<1x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4x4xf32> {
-        %0 = chlo.broadcast_add %arg0, %arg1 : (tensor<1x4xf32>, tensor<4x1xf32>) -> tensor<4x4xf32>
-        %1 = "mhlo.abs"(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
-        return %1 : tensor<4x4xf32>
-      }
-}"""
-
-arg0 = np.ones((1, 4)).astype(np.float32)
-arg1 = np.ones((4, 1)).astype(np.float32)
-
-shark_module = SharkInference(mhlo_ir, (arg0, arg1))
-shark_module.set_frontend("mhlo")
-shark_module.compile()
-print(shark_module.forward((arg0, arg1)))
-```
-</details>
-
-
-## Supported and Validated Models
-
-<details>
-  <summary>PyTorch Models</summary>
-
-### Huggingface PyTorch Models
-
-| Hugging Face Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
-|---------------------|----------------------|----------|----------|-------------|
-| BERT                | :heavy_check_mark: (JIT)          | :heavy_check_mark:         |          |             |
-| Albert              | :heavy_check_mark: (JIT)            | :heavy_check_mark:         |          |             |
-| BigBird             | :heavy_check_mark: (AOT)            |          |          |             |
-| DistilBERT          | :heavy_check_mark: (JIT)            | :heavy_check_mark:         |          |             |
-| GPT2                | :x: (AOT)            |          |          |             |
-
-### Torchvision  Models
-  
-| TORCHVISION Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
-|--------------------|----------------------|----------|----------|-------------|
-| AlexNet            | :heavy_check_mark: (Script)         | :heavy_check_mark:         | :heavy_check_mark:         |             |
-| DenseNet121        | :heavy_check_mark: (Script)         |          |          |             |
-| MNasNet1_0         | :heavy_check_mark: (Script)         |          |          |             |
-| MobileNetV2        | :heavy_check_mark: (Script)         |          |          |             |
-| MobileNetV3        | :heavy_check_mark: (Script)         |          |          |             |
-| Unet               | :x: (Script)         |          |          |             |
-| Resnet18           | :heavy_check_mark: (Script)         | :heavy_check_mark:         |  :heavy_check_mark:        |             |
-| Resnet50           | :heavy_check_mark: (Script)         | :heavy_check_mark:         |   :heavy_check_mark:       |             |
-| Resnet101           | :heavy_check_mark: (Script)         | :heavy_check_mark:         |   :heavy_check_mark:       |             |
-| Resnext50_32x4d    | :heavy_check_mark: (Script)         |          |          |             |
-| ShuffleNet_v2      | :x: (Script)         |          |          |             |
-| SqueezeNet         | :heavy_check_mark: (Script)         | :heavy_check_mark:         |   :heavy_check_mark:       |             |
-| EfficientNet       | :heavy_check_mark: (Script)         |          |          |             |
-| Regnet             | :heavy_check_mark: (Script)         |          |          |             |
-| Resnest            | :x: (Script)         |          |          |             |
-| Vision Transformer | :heavy_check_mark: (Script)         |          |          |             |
-| VGG 16             | :heavy_check_mark: (Script)         | :heavy_check_mark:         |   :heavy_check_mark:       |             |
-| Wide Resnet        | :heavy_check_mark: (Script)         | :heavy_check_mark:         | :heavy_check_mark:         |             |
-| RAFT               | :x: (JIT)            |          |          |             |
-
-For more information refer to [MODEL TRACKING SHEET](https://docs.google.com/spreadsheets/d/15PcjKeHZIrB5LfDyuw7DGEEE8XnQEX2aX8lm8qbxV8A/edit#gid=0)
-
-### PyTorch Training Models 
-
-| Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
-|---------------------|----------------------|----------|----------|-------------|
-| BERT                | :x:           | :x:         |          |             |
-| FullyConnected                | :heavy_check_mark:           | :heavy_check_mark:         |          |             |
-
-</details>
-  
-<details>
-  <summary>JAX Models</summary>
-
-
-### JAX  Models 
-
-| Models | JAX-MHLO lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
-|---------------------|----------------------|----------|----------|-------------|
-| DALL-E                | :x:           | :x:         |          |             |
-| FullyConnected                | :heavy_check_mark:           | :heavy_check_mark:         |          |             |
- 
-</details>
-  
-<details>
-  <summary>TFLite Models</summary>
- 
-### TFLite Models 
-
-| Models | TOSA/LinAlg  | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
-|---------------------|----------------------|----------|----------|-------------|
-| BERT                | :x:           | :x:         |          |             |
-| FullyConnected                | :heavy_check_mark:           | :heavy_check_mark:         |          |             |
-  
-</details>
-
-<details>
-  <summary>TF Models</summary>
- 
-### Tensorflow Models 
-
-| Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
-|---------------------|----------------------|----------|----------|-------------|
-| BERT                | :x:           | :x:         |          |             |
-| FullyConnected                | :heavy_check_mark:           | :heavy_check_mark:         |          |             |
-  
-</details>
-
-## Related Projects
-  
-<details>
-  <summary>IREE Project Channels</summary>
-
-*   [Upstream IREE issues](https://github.com/google/iree/issues): Feature requests,
-    bugs, and other work tracking
-*   [Upstream IREE Discord server](https://discord.gg/26P4xW4): Daily development
-    discussions with the core team and collaborators
-*   [iree-discuss email list](https://groups.google.com/forum/#!forum/iree-discuss):
-    Announcements, general and low-priority discussion
-</details>
-    
-<details>
-  <summary>MLIR and Torch-MLIR Project Channels</summary>
-
-* `#torch-mlir` channel on the LLVM [Discord](https://discord.gg/xS7Z362) - this is the most active communication channel
-* Torch-MLIR Github issues [here](https://github.com/llvm/torch-mlir/issues)
-* [`torch-mlir` section](https://llvm.discourse.group/c/projects-that-want-to-become-official-llvm-projects/torch-mlir/41) of LLVM Discourse
-*  Weekly meetings on Mondays 9AM PST. See [here](https://discourse.llvm.org/t/community-meeting-developer-hour-refactoring-recurring-meetings/62575) for more information.
-* [MLIR topic within LLVM Discourse](https://llvm.discourse.group/c/llvm-project/mlir/31) SHARK and IREE is enabled by and heavily relies on [MLIR](https://mlir.llvm.org).
-</details>
-  
-## License
-
-nod.ai SHARK is licensed under the terms of the Apache 2.0 License with LLVM Exceptions.
-See [LICENSE](LICENSE) for more information.
--- a/benchmarks/init.py
+++ b/benchmarks/init.py
--- a/benchmarks/hf_model_benchmark.py
+++ b/benchmarks/hf_model_benchmark.py
@@ -1,22 +0,0 @@
-import torch
-from shark.parser import parser
-from benchmarks.hf_transformer import SharkHFBenchmarkRunner
-
-parser.add_argument(
-    "--model_name",
-    type=str,
-    required=True,
-    help=
-    "Specifies name of HF model to benchmark. (For exmaple \"microsoft/MiniLM-L12-H384-uncased\""
-)
-load_args, unknown = parser.parse_known_args()
-
-if __name__ == "__main__":
-    model_name = load_args.model_name
-    test_input = torch.randint(2, (1, 128))
-    shark_module = SharkHFBenchmarkRunner(model_name, (test_input,),
-                                          jit_trace=True)
-    shark_module.benchmark_c()
-    shark_module.benchmark_python((test_input,))
-    shark_module.benchmark_torch(test_input)
-    shark_module.benchmark_onnx(test_input)
--- a/benchmarks/hf_transformer.py
+++ b/benchmarks/hf_transformer.py
@@ -1,137 +0,0 @@
-import torch
-from shark.shark_runner import SharkBenchmarkRunner
-from shark.parser import shark_args
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from onnxruntime.transformers.benchmark import run_pytorch, run_tensorflow, run_onnxruntime
-from onnxruntime.transformers.huggingface_models import MODELS
-from onnxruntime.transformers.benchmark_helper import ConfigModifier, Precision
-import os
-import psutil
-
-
-class OnnxFusionOptions(object):
-
-    def __init__(self):
-        self.disable_gelu = False
-        self.disable_layer_norm = False
-        self.disable_attention = False
-        self.disable_skip_layer_norm = False
-        self.disable_embed_layer_norm = False
-        self.disable_bias_skip_layer_norm = False
-        self.disable_bias_gelu = False
-        self.enable_gelu_approximation = False
-        self.use_mask_index = False
-        self.no_attention_mask = False
-
-
-class HuggingFaceLanguage(torch.nn.Module):
-
-    def __init__(self, hf_model_name):
-        super().__init__()
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            hf_model_name,  # The pretrained model.
-            num_labels=
-            2,  # The number of output labels--2 for binary classification.
-            output_attentions=
-            False,  # Whether the model returns attentions weights.
-            output_hidden_states=
-            False,  # Whether the model returns all hidden-states.
-            torchscript=True,
-        )
-
-    def forward(self, tokens):
-        return self.model.forward(tokens)[0]
-
-
-class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
-    # SharkRunner derived class with Benchmarking capabilities.
-    def __init__(
-        self,
-        model_name: str,
-        input: tuple,
-        dynamic: bool = False,
-        device: str = None,
-        jit_trace: bool = False,
-        from_aot: bool = False,
-        frontend: str = "torch",
-    ):
-        self.device = device if device is not None else shark_args.device
-        if self.device == "gpu":
-            raise ValueError(
-                "Currently GPU Benchmarking is not supported due to OOM from ORT."
-            )
-        self.model_name = model_name
-        model = HuggingFaceLanguage(model_name)
-        SharkBenchmarkRunner.__init__(self, model, input, dynamic, self.device,
-                                      jit_trace, from_aot, frontend)
-
-    def benchmark_torch(self, inputs):
-        use_gpu = self.device == "gpu"
-        # Set set the model's layer number to automatic.
-        config_modifier = ConfigModifier(None)
-        num_threads = psutil.cpu_count(logical=False)
-        batch_sizes = [inputs.shape[0]]
-        sequence_lengths = [inputs.shape[-1]]
-        cache_dir = os.path.join(".", "cache_models")
-        verbose = False
-        result = run_pytorch(use_gpu, [self.model_name], None, config_modifier,
-                             Precision.FLOAT32, num_threads, batch_sizes,
-                             sequence_lengths, shark_args.num_iterations, False,
-                             cache_dir, verbose)
-        print(
-            f"ONNX Pytorch-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
-
-    # TODO: Currently non-functional due to TF runtime error. There might be some issue with, initializing TF.
-    def benchmark_tf(self, inputs):
-        use_gpu = self.device == "gpu"
-        # Set set the model's layer number to automatic.
-        config_modifier = ConfigModifier(None)
-        num_threads = psutil.cpu_count(logical=False)
-        batch_sizes = [inputs.shape[0]]
-        sequence_lengths = [inputs.shape[-1]]
-        cache_dir = os.path.join(".", "cache_models")
-        verbose = False
-        result = run_tensorflow(use_gpu, [self.model_name], None,
-                                config_modifier, Precision.FLOAT32, num_threads,
-                                batch_sizes, sequence_lengths,
-                                shark_args.num_iterations, cache_dir, verbose)
-        print(
-            f"ONNX TF-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
-
-    def benchmark_onnx(self, inputs):
-        if self.model_name not in MODELS:
-            print(
-                f"{self.model_name} is currently not supported in ORT's HF. Check \
-https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/huggingface_models.py \
-for currently supported models. Exiting benchmark ONNX.")
-            return
-        use_gpu = self.device == "gpu"
-        num_threads = psutil.cpu_count(logical=False)
-        batch_sizes = [inputs.shape[0]]
-        sequence_lengths = [inputs.shape[-1]]
-        cache_dir = os.path.join(".", "cache_models")
-        onnx_dir = os.path.join(".", "onnx_models")
-        verbose = False
-        input_counts = [1]
-        optimize_onnx = True
-        validate_onnx = False
-        disable_ort_io_binding = False
-        use_raw_attention_mask = True
-        model_fusion_statistics = {}
-        overwrite = False
-        model_source = "pt"  #Either "pt" or "tf"
-        provider = None
-        config_modifier = ConfigModifier(None)
-        onnx_args = OnnxFusionOptions()
-        result = run_onnxruntime(
-            use_gpu, provider, [self.model_name], None, config_modifier,
-            Precision.FLOAT32, num_threads, batch_sizes, sequence_lengths,
-            shark_args.num_iterations, input_counts, optimize_onnx,
-            validate_onnx, cache_dir, onnx_dir, verbose, overwrite,
-            disable_ort_io_binding, use_raw_attention_mask,
-            model_fusion_statistics, model_source, onnx_args)
-        print(
-            f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
--- a/benchmarks/tests/test_benchmark.py
+++ b/benchmarks/tests/test_benchmark.py
@@ -1,210 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils import check_device_drivers
-
-import torch
-import tensorflow as tf
-import numpy as np
-import torchvision.models as models
-from transformers import AutoModelForSequenceClassification, BertTokenizer, TFBertModel
-import importlib
-import pytest
-import unittest
-
-torch.manual_seed(0)
-gpus = tf.config.experimental.list_physical_devices('GPU')
-for gpu in gpus:
-  tf.config.experimental.set_memory_growth(gpu, True)
-
-##################### Tensorflow Hugging Face LM Models ###################################
-MAX_SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-# Create a set of 2-dimensional inputs
-tf_bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
-]
-
-
-class TFHuggingFaceLanguage(tf.Module):
-
-    def __init__(self, hf_model_name):
-        super(TFHuggingFaceLanguage, self).__init__()
-        # Create a BERT trainer with the created network.
-        self.m = TFBertModel.from_pretrained(hf_model_name, from_pt=True)
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m.predict = lambda x, y, z: self.m.call(
-            input_ids=x, attention_mask=y, token_type_ids=z, training=False)
-
-    @tf.function(input_signature=tf_bert_input)
-    def forward(self, input_ids, attention_mask, token_type_ids):
-        return self.m.predict(input_ids, attention_mask, token_type_ids)
-
-
-def get_TFhf_model(name):
-    model = TFHuggingFaceLanguage(name)
-    tokenizer = BertTokenizer.from_pretrained(name)
-    text = "Replace me by any text you'd like."
-    encoded_input = tokenizer(text,
-                              padding='max_length',
-                              truncation=True,
-                              max_length=MAX_SEQUENCE_LENGTH)
-    for key in encoded_input:
-        encoded_input[key] = tf.expand_dims(
-            tf.convert_to_tensor(encoded_input[key]), 0)
-    test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
-                  encoded_input["token_type_ids"])
-    actual_out = model.forward(*test_input)
-    return model, test_input, actual_out
-
-
-##################### Hugging Face LM Models ###################################
-
-
-class HuggingFaceLanguage(torch.nn.Module):
-
-    def __init__(self, hf_model_name):
-        super().__init__()
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            hf_model_name,  # The pretrained model.
-            num_labels=
-            2,  # The number of output labels--2 for binary classification.
-            output_attentions=
-            False,  # Whether the model returns attentions weights.
-            output_hidden_states=
-            False,  # Whether the model returns all hidden-states.
-            torchscript=True,
-        )
-
-    def forward(self, tokens):
-        return self.model.forward(tokens)[0]
-
-
-def get_hf_model(name):
-    model = HuggingFaceLanguage(name)
-    # TODO: Currently the test input is set to (1,128)
-    test_input = torch.randint(2, (1, 128))
-    actual_out = model(test_input)
-    return model, test_input, actual_out
-
-
-################################################################################
-
-##################### Torch Vision Models    ###################################
-
-
-class VisionModule(torch.nn.Module):
-
-    def __init__(self, model):
-        super().__init__()
-        self.model = model
-        self.train(False)
-
-    def forward(self, input):
-        return self.model.forward(input)
-
-
-def get_vision_model(torch_model):
-    model = VisionModule(torch_model)
-    # TODO: Currently the test input is set to (1,128)
-    test_input = torch.randn(1, 3, 224, 224)
-    actual_out = model(test_input)
-    return model, test_input, actual_out
-
-
-#############################   Benchmark Tests ####################################
-
-pytest_benchmark_param = pytest.mark.parametrize(
-    ('dynamic', 'device'),
-    [
-        pytest.param(False, 'cpu'),
-        # TODO: Language models are failing for dynamic case..
-        pytest.param(True, 'cpu', marks=pytest.mark.skip),
-        pytest.param(False,
-                     'gpu',
-                     marks=pytest.mark.skipif(check_device_drivers("gpu"),
-                                              reason="nvidia-smi not found")),
-        pytest.param(True,
-                     'gpu',
-                     marks=pytest.mark.skip),
-        pytest.param(
-            False,
-            'vulkan',
-            marks=pytest.mark.skipif(
-                check_device_drivers("vulkan"),
-                reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )),
-        pytest.param(
-            True,
-            'vulkan',
-            marks=pytest.mark.skipif(
-                check_device_drivers("vulkan"),
-                reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )),
-    ])
-
-
-@pytest.mark.skipif(importlib.util.find_spec("iree.tools") is None,
-                    reason="Cannot find tools to import TF")
-@pytest_benchmark_param
-def test_bench_minilm_torch(dynamic, device):
-    model, test_input, act_out = get_hf_model(
-        "microsoft/MiniLM-L12-H384-uncased")
-    shark_module = SharkInference(model, (test_input,),
-                                  device=device,
-                                  dynamic=dynamic,
-                                  jit_trace=True,
-                                  benchmark_mode=True)
-    try:
-        # If becnhmarking succesful, assert success/True.
-        shark_module.compile()
-        shark_module.benchmark_all((test_input,))
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
-
-
-@pytest.mark.skipif(importlib.util.find_spec("iree.tools") is None,
-                    reason="Cannot find tools to import TF")
-@pytest_benchmark_param
-def test_bench_distilbert(dynamic, device):
-    model, test_input, act_out = get_TFhf_model("distilbert-base-uncased")
-    shark_module = SharkInference(model,
-                                  test_input,
-                                  device=device,
-                                  dynamic=dynamic,
-                                  jit_trace=True,
-                                  benchmark_mode=True)
-    try:
-        # If becnhmarking succesful, assert success/True.
-        shark_module.set_frontend("tensorflow")
-        shark_module.compile()
-        shark_module.benchmark_all(test_input)
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
-
-
-@pytest.mark.skip(reason="XLM Roberta too large to test.")
-@pytest_benchmark_param
-def test_bench_xlm_roberta(dynamic, device):
-    model, test_input, act_out = get_TFhf_model("xlm-roberta-base")
-    shark_module = SharkInference(model,
-                                  test_input,
-                                  device=device,
-                                  dynamic=dynamic,
-                                  jit_trace=True,
-                                  benchmark_mode=True)
-    try:
-        # If becnhmarking succesful, assert success/True.
-        shark_module.set_frontend("tensorflow")
-        shark_module.compile()
-        shark_module.benchmark_all(test_input)
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
--- a/benchmarks/tests/test_hf_benchmark.py
+++ b/benchmarks/tests/test_hf_benchmark.py
@@ -1,39 +0,0 @@
-import torch
-from benchmarks.hf_transformer import SharkHFBenchmarkRunner
-import importlib
-import pytest
-
-torch.manual_seed(0)
-
-############################# HF Benchmark Tests ####################################
-
-# Test running benchmark module without failing.
-pytest_benchmark_param = pytest.mark.parametrize(
-    ('dynamic', 'device'),
-    [
-        pytest.param(False, 'cpu'),
-        # TODO: Language models are failing for dynamic case..
-        pytest.param(True, 'cpu', marks=pytest.mark.skip),
-    ])
-
-
-@pytest.mark.skipif(importlib.util.find_spec("onnxruntime") is None,
-                    reason="Cannot find ONNXRUNTIME.")
-@pytest_benchmark_param
-def test_HFbench_minilm_torch(dynamic, device):
-    model_name = "bert-base-uncased"
-    test_input = torch.randint(2, (1, 128))
-    try:
-        shark_module = SharkHFBenchmarkRunner(model_name, (test_input,),
-                                              jit_trace=True,
-                                              dynamic=dynamic,
-                                              device=device)
-        shark_module.benchmark_c()
-        shark_module.benchmark_python((test_input,))
-        shark_module.benchmark_torch(test_input)
-        shark_module.benchmark_onnx(test_input)
-        # If becnhmarking succesful, assert success/True.
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
--- a/inference/CMakeLists.txt
+++ b/inference/CMakeLists.txt
@@ -1,192 +0,0 @@
-# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-cmake_minimum_required(VERSION 3.17)
-
-project(sharkbackend LANGUAGES C CXX)
-
-#
-# Options
-#
-
-option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
-option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
-
-set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
-set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
-set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
-
-if(NOT CMAKE_BUILD_TYPE)
-  set(CMAKE_BUILD_TYPE Release)
-endif()
-
-#
-# Dependencies
-#
-# FetchContent requires us to include the transitive closure of all
-# repos that we depend on so that we can override the tags.
-#
-include(FetchContent)
-
-FetchContent_Declare(
-  repo-common
-  GIT_REPOSITORY https://github.com/triton-inference-server/common.git
-  GIT_TAG ${TRITON_COMMON_REPO_TAG}
-  GIT_SHALLOW ON
-)
-FetchContent_Declare(
-  repo-core
-  GIT_REPOSITORY https://github.com/triton-inference-server/core.git
-  GIT_TAG ${TRITON_CORE_REPO_TAG}
-  GIT_SHALLOW ON
-)
-FetchContent_Declare(
-  repo-backend
-  GIT_REPOSITORY https://github.com/triton-inference-server/backend.git
-  GIT_TAG ${TRITON_BACKEND_REPO_TAG}
-  GIT_SHALLOW ON
-)
-FetchContent_MakeAvailable(repo-common repo-core repo-backend)
-
-#
-# The backend must be built into a shared library. Use an ldscript to
-# hide all symbols except for the TRITONBACKEND API.
-#
-configure_file(src/libtriton_dshark.ldscript libtriton_dshark.ldscript COPYONLY)
-
-add_library(
-  triton-dshark-backend SHARED
-  src/dshark.cc
-  #src/dshark_driver_module.c
-)
-
-add_library(
-  SharkBackend::triton-dshark-backend ALIAS triton-dshark-backend
-)
-
-target_include_directories(
-  triton-dshark-backend
-  PRIVATE
-    ${CMAKE_CURRENT_SOURCE_DIR}/src
-)
-
-list(APPEND CMAKE_MODULE_PATH "${PROJECT_BINARY_DIR}/lib/cmake/mlir")
-
-add_subdirectory(thirdparty/shark-runtime EXCLUDE_FROM_ALL)
-
-target_link_libraries(triton-dshark-backend PRIVATE iree_base_base
-  iree_hal_hal
-  iree_hal_cuda_cuda
-  iree_hal_cuda_registration_registration
-  iree_hal_vmvx_registration_registration
-  iree_hal_dylib_registration_registration
-  iree_modules_hal_hal
-  iree_vm_vm
-  iree_vm_bytecode_module
-  iree_hal_local_loaders_system_library_loader
-  iree_hal_local_loaders_vmvx_module_loader
-  )
-
-target_compile_features(triton-dshark-backend PRIVATE cxx_std_11)
-
-
-target_link_libraries(
-  triton-dshark-backend
-  PRIVATE
-    triton-core-serverapi   # from repo-core
-    triton-core-backendapi  # from repo-core
-    triton-core-serverstub  # from repo-core
-    triton-backend-utils    # from repo-backend
-)
-
-if(WIN32)
-  set_target_properties(
-    triton-dshark-backend PROPERTIES
-    POSITION_INDEPENDENT_CODE ON
-    OUTPUT_NAME triton_dshark
-  )
-else()
-  set_target_properties(
-    triton-dshark-backend PROPERTIES
-    POSITION_INDEPENDENT_CODE ON
-    OUTPUT_NAME triton_dshark
-    LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_dshark.ldscript
-    LINK_FLAGS "-Wl,--version-script libtriton_dshark.ldscript"
-  )
-endif()
-
-
-
-#
-# Install
-#
-include(GNUInstallDirs)
-set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/SharkBackend)
-
-install(
-  TARGETS
-    triton-dshark-backend
-  EXPORT
-    triton-dshark-backend-targets
-  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/dshark
-  RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/dshark
-)
-
-install(
-  EXPORT
-    triton-dshark-backend-targets
-  FILE
-    SharkBackendTargets.cmake
-  NAMESPACE
-    SharkBackend::
-  DESTINATION
-    ${INSTALL_CONFIGDIR}
-)
-
-include(CMakePackageConfigHelpers)
-configure_package_config_file(
-  ${CMAKE_CURRENT_LIST_DIR}/cmake/SharkBackendConfig.cmake.in
-  ${CMAKE_CURRENT_BINARY_DIR}/SharkBackendConfig.cmake
-  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
-)
-
-install(
-  FILES
-  ${CMAKE_CURRENT_BINARY_DIR}/SharkBackendConfig.cmake
-  DESTINATION ${INSTALL_CONFIGDIR}
-)
-
-#
-# Export from build tree
-#
-export(
-  EXPORT triton-dshark-backend-targets
-  FILE ${CMAKE_CURRENT_BINARY_DIR}/SharkBackendTargets.cmake
-  NAMESPACE SharkBackend::
-)
-
-export(PACKAGE SharkBackend)
-
--- a/inference/README.md
+++ b/inference/README.md
@@ -1,100 +0,0 @@
-# SHARK Triton Backend
-
-The triton backend for shark.
-
-# Build
-
-Install SHARK
-
-```
-git clone https://github.com/nod-ai/SHARK.git
-# skip above step if dshark is already installed
-cd SHARK/inference
-```
-
-install dependancies
-
-```
-apt-get install patchelf rapidjson-dev python3-dev
-git submodule update --init
-```
-
-update the submodules of iree
-
-```
-cd thirdparty/shark-runtime
-git submodule update --init
-```
-
-Next, make the backend and install it
-
-```
-cd ../..
-mkdir build && cd build
-cmake -DTRITON_ENABLE_GPU=ON \
-DIREE_HAL_DRIVER_CUDA=ON \
-DIREE_TARGET_BACKEND_CUDA=ON \
-DMLIR_ENABLE_CUDA_RUNNER=ON \
-DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
-DTRITON_BACKEND_REPO_TAG=r22.02 \
-DTRITON_CORE_REPO_TAG=r22.02 \
-DTRITON_COMMON_REPO_TAG=r22.02 ..
-make install
-```
-
-# Incorporating into Triton
-
-There are much more in depth explenations for the following steps in triton's documentation:
-https://github.com/triton-inference-server/server/blob/main/docs/compose.md#triton-with-unsupported-and-custom-backends
-
-There should be a file at /build/install/backends/dshark/libtriton_dshark.so.  You will need to copy it into your triton server image.  
-More documentation is in the link above, but to create the docker image, you need to run the compose.py command in the triton-backend server repo
-
-
-To first build your image, clone the tritonserver repo.
-
-```
-git clone https://github.com/triton-inference-server/server.git
-```
-
-then run `compose.py` to build a docker compose file 
-```
-cd server
-python3 compose.py --repoagent checksum --dry-run
-```
-
-Because dshark is a third party backend, you will need to manually modify the `Dockerfile.compose` to include the dshark backend.  To do this, in the Dockerfile.compose file produced, copy this line.
-the dshark backend will be located in the build folder from earlier under `/build/install/backends`
-
-```
-COPY /path/to/build/install/backends/dshark /opt/tritonserver/backends/dshark
-```
-
-Next run 
-```
-docker build -t tritonserver_custom -f Dockerfile.compose .
-docker run -it --gpus=1 --net=host -v/path/to/model_repos:/models  tritonserver_custom:latest tritonserver --model-repository=/models
-```
-
-where `path/to/model_repos` is where you are storing the models you want to run
-
-if your not using gpus, omit `--gpus=1`
-
-```
-docker run -it  --net=host -v/path/to/model_repos:/models  tritonserver_custom:latest tritonserver --model-repository=/models
-```
-
-# Setting up a model
-
-to include a model in your backend, add a directory with your model name to your model repository directory.  examples of models can be seen here: https://github.com/triton-inference-server/backend/tree/main/examples/model_repos/minimal_models
-
-make sure to adjust the input correctly in the config.pbtxt file, and save a vmfb file under 1/model.vmfb
-
-# CUDA
-
-if you're having issues with cuda, make sure your correct drivers are installed, and that `nvidia-smi` works, and also make sure that the nvcc compiler is on the path.
-
-
-
-
-
--- a/inference/cmake/SharkBackendConfig.cmake.in
+++ b/inference/cmake/SharkBackendConfig.cmake.in
@@ -1,39 +0,0 @@
-# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include(CMakeFindDependencyMacro)
-
-get_filename_component(
-  SHARKBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
-)
-
-list(APPEND CMAKE_MODULE_PATH ${SHARKBACKEND_CMAKE_DIR})
-
-if(NOT TARGET SharkBackend::triton-dshark-backend)
-  include("${SHARKBACKEND_CMAKE_DIR}/SharkBackendTargets.cmake")
-endif()
-
-set(SHARKBACKEND_LIBRARIES SharkBackend::triton-dshark-backend)
--- a/inference/src/dshark.cc
+++ b/inference/src/dshark.cc
--- a/inference/src/libtriton_dshark.ldscript
+++ b/inference/src/libtriton_dshark.ldscript
@@ -1,30 +0,0 @@
-# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-{
-  global:
-    TRITONBACKEND_*;
-  local: *;
-};
--- a/inference/thirdparty/shark-runtime
+++ b/inference/thirdparty/shark-runtime
--- a/package-index/index.html
+++ b/package-index/index.html
@@ -0,0 +1,45 @@
+<!DOCTYPE html>
+<html>
+  <body>
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230130.481/shark_sd_20230130_481.exe'>shark_sd_20230130_481.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230130.481/shark_sd_cli_20230130_481.exe'>shark_sd_cli_20230130_481.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230129.479/shark_sd_20230129_479.exe'>shark_sd_20230129_479.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230129.479/shark_sd_cli_20230129_479.exe'>shark_sd_cli_20230129_479.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230129.480/shark_sd_20230129_480.exe'>shark_sd_20230129_480.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230129.480/shark_sd_cli_20230129_480.exe'>shark_sd_cli_20230129_480.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230129.478/shark_sd_20230129_478.exe'>shark_sd_20230129_478.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230129.478/shark_sd_cli_20230129_478.exe'>shark_sd_cli_20230129_478.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230128.477/shark_sd_20230128_477.exe'>shark_sd_20230128_477.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230128.477/shark_sd_cli_20230128_477.exe'>shark_sd_cli_20230128_477.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230127.476/shark_sd_20230127_476.exe'>shark_sd_20230127_476.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230127.476/shark_sd_cli_20230127_476.exe'>shark_sd_cli_20230127_476.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230126.475/shark_sd_20230126_475.exe'>shark_sd_20230126_475.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230126.475/shark_sd_cli_20230126_475.exe'>shark_sd_cli_20230126_475.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230125.474/shark_sd_20230125_474.exe'>shark_sd_20230125_474.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230125.474/shark_sd_cli_20230125_474.exe'>shark_sd_cli_20230125_474.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230125.473/shark_sd_20230125_473.exe'>shark_sd_20230125_473.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230125.473/shark_sd_cli_20230125_473.exe'>shark_sd_cli_20230125_473.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230125.472/shark_sd_20230125_472.exe'>shark_sd_20230125_472.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230125.471/shark_sd_20230125_471.exe'>shark_sd_20230125_471.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230125.468/shark_sd_20230125_468.exe'>shark_sd_20230125_468.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230124.470/shark_sd_20230124_470.exe'>shark_sd_20230124_470.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230124.470/shark_sd_cli_20230124_470.exe'>shark_sd_cli_20230124_470.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230124.469/shark_sd_20230124_469.exe'>shark_sd_20230124_469.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230124.467/shark_sd_20230124_467.exe'>shark_sd_20230124_467.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230124.466/shark_sd_20230124_466.exe'>shark_sd_20230124_466.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230124.462/shark_sd_20230124_462.exe'>shark_sd_20230124_462.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230123.461/shark_sd_20230123_461.exe'>shark_sd_20230123_461.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230123.460/shark_sd_20230123_460.exe'>shark_sd_20230123_460.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230122.459/shark_sd_20230122_459.exe'>shark_sd_20230122_459.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230122.458/shark_sd_20230122_458.exe'>shark_sd_20230122_458.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230122.457/shark_sd_20230122_457.exe'>shark_sd_20230122_457.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230121.456/shark_sd_20230121_456.exe'>shark_sd_20230121_456.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230120.455/shark_sd_20230120_455.exe'>shark_sd_20230120_455.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230119.454/shark_sd_20230119_454.exe'>shark_sd_20230119_454.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230118.453/shark_sd_20230118_453.exe'>shark_sd_20230118_453.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230117.452/shark_sd_20230117_452.exe'>shark_sd_20230117_452.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230116.451/shark_sd_20230116_451.exe'>shark_sd_20230116_451.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230115.450/shark_sd_20230115_450.exe'>shark_sd_20230115_450.exe</a><br />
+    <a href='https://github.com/nod-ai/SHARK/releases/download/20230114.449/shark_sd_20230114_449.exe'>shark_sd_20230114_449.exe</a><br />
+  </body>
+</html>
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,12 +0,0 @@
-[build-system]
-requires = [
-    "setuptools>=42",
-    "wheel",
-    "packaging",
-
-    "numpy==1.22.4",
-    "torch-mlir>=20220428.420",
-    "iree-compiler>=20220427.13",
-    "iree-runtime>=20220427.13",
-]
-build-backend = "setuptools.build_meta"
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,3 +0,0 @@
-[pytest]
-addopts = --verbose -p no:warnings
-norecursedirs = inference tank/tflite 
--- a/requirements-importer-macos.txt
+++ b/requirements-importer-macos.txt
@@ -1,40 +0,0 @@
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
--pre
-
-numpy
-torch
-torchvision
-
-tqdm
-
-#iree-compiler  | iree-runtime should already be installed
-#these dont work ok osx
-#iree-tools-tflite
-#iree-tools-xla
-#iree-tools-tf
-
-# TensorFlow and JAX.
-gin-config
-tensorflow-macos
-tensorflow-metal
-#tf-models-nightly
-#tensorflow-text-nightly
-transformers==4.18.0
-#jax[cpu]
-
-# tflitehub dependencies.
-Pillow
-
-# Testing and support.
-#lit
-#pyyaml
-
-#ONNX and ORT for benchmarking
-#--extra-index-url https://test.pypi.org/simple/
-#protobuf
-#coloredlogs
-#flatbuffers
-#sympy
-#psutil
-#onnx-weekly
-#ort-nightly
--- a/requirements-importer.txt
+++ b/requirements-importer.txt
@@ -1,39 +0,0 @@
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
--pre
-
-numpy==1.22.4
-torch
-torchvision
-
-tqdm
-
-#iree-compiler  | iree-runtime should already be installed
-iree-tools-tflite
-iree-tools-xla
-iree-tools-tf
-
-# TensorFlow and JAX.
-gin-config
-tensorflow
-tf-models-nightly
-tensorflow-text-nightly
-transformers==4.18.0
-#jax[cpu]
-
-
-# tflitehub dependencies.
-Pillow
-
-# Testing and support.
-lit
-pyyaml
-
-#ONNX and ORT for benchmarking
--extra-index-url https://test.pypi.org/simple/
-protobuf
-coloredlogs
-flatbuffers
-sympy
-psutil
-onnx-weekly
-ort-nightly
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +0,0 @@
-setuptools
-wheel
-
-#SHARK Runner
-tqdm
-
-#Testing
-pytest
-pytest-xdist
--- a/setup.py
+++ b/setup.py
@@ -1,38 +0,0 @@
-from setuptools import find_packages
-from setuptools import setup
-
-import os
-
-with open("README.md", "r", encoding="utf-8") as fh:
-    long_description = fh.read()
-
-PACKAGE_VERSION = os.environ.get("SHARK_PACKAGE_VERSION") or "0.0.4"
-
-setup(
-    name="nodai-SHARK",
-    version=f"{PACKAGE_VERSION}",
-    description="SHARK provides a High Performance Machine Learning Framework",
-    author="nod.ai",
-    author_email="stdin@nod.ai",
-    url="https://nod.ai",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    project_urls={
-        "Code": "https://github.com/nod-ai/SHARK",
-        "Bug Tracker": "https://github.com/nod-ai/SHARK/issues",
-    },
-    classifiers=[
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: MIT License",
-        "Operating System :: OS Independent",
-    ],
-    packages=find_packages(exclude=('examples')),
-    python_requires=">=3.7",
-    install_requires=[
-        "numpy",
-        "PyYAML",
-        "torch-mlir>=20220428.420",
-        "iree-compiler>=20220427.13",
-        "iree-runtime>=20220427.13",
-    ],
-)
--- a/setup_venv.sh
+++ b/setup_venv.sh
@@ -1,115 +0,0 @@
-#!/bin/bash
-# Sets up a venv suitable for running samples.
-# e.g:
-# ./setup_venv.sh  #setup a default $PYTHON3 shark.venv
-# Environment Variables by the script.
-# PYTHON=$PYTHON3.10 ./setup_venv.sh  #pass a version of $PYTHON to use
-# VENV_DIR=myshark.venv #create a venv called myshark.venv
-# USE_IREE=1 #use stock IREE instead of Nod.ai's SHARK build
-# IMPORTER=1 #Install importer deps
-# if you run the script from a conda env it will install in your conda env
-
-TD="$(cd $(dirname $0) && pwd)"
-if [ -z "$PYTHON" ]; then
-  PYTHON="$(which python3)"
-fi
-
-function die() {
-  echo "Error executing command: $*"
-  exit 1
-}
-
-PYTHON_VERSION_X_Y=`${PYTHON} -c 'import sys; version=sys.version_info[:2]; print("{0}.{1}".format(*version))'`
-
-echo "Python: $PYTHON"
-echo "Python version: $PYTHON_VERSION_X_Y"
-
-if [[ -z "${CONDA_PREFIX}" ]]; then
-  # Not a conda env. So create a new VENV dir
-  VENV_DIR=${VENV_DIR:-shark.venv}
-  echo "Using pip venv.. Setting up venv dir: $VENV_DIR"
-  $PYTHON -m venv "$VENV_DIR" || die "Could not create venv."
-  source "$VENV_DIR/bin/activate" || die "Could not activate venv"
-  PYTHON="$(which python3)"
-else
-  echo "Found conda env $CONDA_DEFAULT_ENV. Running pip install inside the conda env"
-fi
-
-Red=`tput setaf 1`
-Green=`tput setaf 2`
-Yellow=`tput setaf 3`
-
-# Assume no binary torch-mlir.
-# Currently available for macOS m1&intel (3.10) and Linux(3.7,3.8,3.9,3.10)
-torch_mlir_bin=false
-if [[ $(uname -s) = 'Darwin' ]]; then
-  echo "${Yellow}Apple macOS detected"
-  if [[ $(uname -m) == 'arm64' ]]; then
-    echo "${Yellow}Apple M1 Detected"
-    hash rustc 2>/dev/null
-    if [ $? -eq 0 ];then
-      echo "${Green}rustc found to compile HF tokenizers"
-    else
-      echo "${Red}Could not find rustc" >&2
-      echo "${Red}Please run:"
-      echo "${Red}curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh"
-      exit 1
-    fi
-  fi
-  echo "${Yellow}Run the following commands to setup your SSL certs for your Python version if you see SSL errors with tests"
-  echo "${Yellow}/Applications/Python\ 3.XX/Install\ Certificates.command"
-  if [ "$PYTHON_VERSION_X_Y" == "3.10" ]; then
-    torch_mlir_bin=true
-  fi
-elif [[ $(uname -s) = 'Linux' ]]; then
-  echo "${Yellow}Linux detected"
-  if [ "$PYTHON_VERSION_X_Y" == "3.7" ] || [ "$PYTHON_VERSION_X_Y" == "3.8" ]  || [ "$PYTHON_VERSION_X_Y" == "3.9" ] || [ "$PYTHON_VERSION_X_Y" == "3.10" ] ; then
-    torch_mlir_bin=true
-  fi
-else
-  echo "${Red}OS not detected. Pray and Play"
-fi
-
-# Upgrade pip and install requirements.
-$PYTHON -m pip install --upgrade pip || die "Could not upgrade pip"
-$PYTHON -m pip install --upgrade -r "$TD/requirements.txt"
-if [ "$torch_mlir_bin" = true ]; then
-  $PYTHON -m pip install --find-links https://github.com/llvm/torch-mlir/releases torch-mlir --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-  if [ $? -eq 0 ];then
-    echo "Successfully Installed torch-mlir"
-  else
-    echo "Could not install torch-mlir" >&2
-  fi
-else
-  echo "${Red}No binaries found for Python $PYTHON_VERSION_X_Y on $(uname -s)"
-  echo "${Yello}Python 3.10 supported on macOS and 3.7,3.8,3.9 and 3.10 on Linux"
-  echo "${Red}Please build torch-mlir from source in your environment"
-  exit 1
-fi
-if [[ -z "${USE_IREE}" ]]; then
-  RUNTIME="nod-ai/SHARK-Runtime"
-else
-  RUNTIME="google/iree"
-fi
-echo "Installing ${RUNTIME}..."
-$PYTHON -m pip install --find-links https://github.com/${RUNTIME}/releases iree-compiler iree-runtime
-
-if [[ ! -z "${IMPORTER}" ]]; then
-  echo "${Yellow}Installing importer tools.."
-  if [[ $(uname -s) = 'Linux' ]]; then
-    echo "${Yellow}Linux detected.. installing Linux importer tools"
-    $PYTHON -m pip install --upgrade -r "$TD/requirements-importer.txt" -f https://github.com/${RUNTIME}/releases --extra-index-url https://test.pypi.org/simple/ --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-  elif [[ $(uname -s) = 'Darwin' ]]; then
-    echo "${Yellow}macOS detected.. installing macOS importer tools"
-    #Conda seems to have some problems installing these packages and hope they get resolved upstream.
-    $PYTHON -m pip install --upgrade -r "$TD/requirements-importer-macos.txt" -f https://github.com/${RUNTIME}/releases --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-  fi
-fi
-
-$PYTHON -m pip install -e . --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://github.com/llvm/torch-mlir/releases -f https://github.com/${RUNTIME}/releases
-
-if [[ -z "${CONDA_PREFIX}" ]]; then
-  echo "${Green}Before running examples activate venv with:"
-  echo "  ${Green}source $VENV_DIR/bin/activate"
-fi
-
--- a/shark/init.py
+++ b/shark/init.py
--- a/shark/backward_makefx.py
+++ b/shark/backward_makefx.py
@@ -1,72 +0,0 @@
-# Copyright 2020 The Nod Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from torch._decomp import get_decompositions
-from torch.fx.experimental.proxy_tensor import make_fx
-from torch.nn.utils import _stateless
-
-from torch import fx
-import copy
-import tempfile
-
-
-class MakeFxModule:
-
-    def __init__(self, model, inputs, labels=None, custom_inference_fn=None):
-        self.model = model
-        self.inputs = inputs
-        self.custom_inference_fn = custom_inference_fn
-        self.training_graph = None
-
-    # Doesn't replace the None type.
-    def change_fx_graph_return_to_tuple(self, fx_g: fx.GraphModule):
-        for node in fx_g.graph.nodes:
-            if node.op == "output":
-                # output nodes always have one argument
-                node_arg = node.args[0]
-                out_nodes = []
-                if isinstance(node_arg, list):
-                    # Don't return NoneType elements.
-                    for out_node in node_arg:
-                        if not isinstance(out_node, type(None)):
-                            out_nodes.append(out_node)
-                    # If there is a single tensor/element to be returned don't
-                    # a tuple for it.
-                    if len(out_nodes) == 1:
-                        node.args = out_nodes
-                    else:
-                        node.args = (tuple(out_nodes),)
-        fx_g.graph.lint()
-        fx_g.recompile()
-        return fx_g
-
-    def generate_graph(self):
-        fx_g = make_fx(self.custom_inference_fn,
-                       decomposition_table=get_decompositions([
-                           torch.ops.aten.embedding_dense_backward,
-                           torch.ops.aten.native_layer_norm_backward,
-                           torch.ops.aten.slice_backward,
-                           torch.ops.aten.select_backward
-                       ]))(dict(self.model.named_parameters()),
-                           dict(self.model.named_buffers()), self.inputs)
-        fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
-        fx_g.recompile()
-        fx_g = self.change_fx_graph_return_to_tuple(fx_g)
-        ts_g = torch.jit.script(fx_g)
-        temp = tempfile.NamedTemporaryFile(suffix='_shark_ts',
-                                           prefix='temp_ts_')
-        ts_g.save(temp.name)
-        new_ts = torch.jit.load(temp.name)
-        self.training_graph = new_ts
--- a/shark/cuda_utils.py
+++ b/shark/cuda_utils.py
@@ -1,78 +0,0 @@
-# Copyright 2020 The Nod Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import ctypes
-
-#Some constants taken from cuda.h
-CUDA_SUCCESS = 0
-CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16
-CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39
-CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13
-CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36
-
-
-def get_cuda_sm_cc():
-    libnames = ('libcuda.so', 'libcuda.dylib', 'cuda.dll')
-    for libname in libnames:
-        try:
-            cuda = ctypes.CDLL(libname)
-        except OSError:
-            continue
-        else:
-            break
-    else:
-        raise OSError("could not load any of: " + ' '.join(libnames))
-
-    nGpus = ctypes.c_int()
-    name = b' ' * 100
-    cc_major = ctypes.c_int()
-    cc_minor = ctypes.c_int()
-
-    result = ctypes.c_int()
-    device = ctypes.c_int()
-    context = ctypes.c_void_p()
-    error_str = ctypes.c_char_p()
-
-    result = cuda.cuInit(0)
-    if result != CUDA_SUCCESS:
-        cuda.cuGetErrorString(result, ctypes.byref(error_str))
-        print("cuInit failed with error code %d: %s" %
-              (result, error_str.value.decode()))
-        return 1
-    result = cuda.cuDeviceGetCount(ctypes.byref(nGpus))
-    if result != CUDA_SUCCESS:
-        cuda.cuGetErrorString(result, ctypes.byref(error_str))
-        print("cuDeviceGetCount failed with error code %d: %s" %
-              (result, error_str.value.decode()))
-        return 1
-    print("Found %d device(s)." % nGpus.value)
-    for i in range(nGpus.value):
-        result = cuda.cuDeviceGet(ctypes.byref(device), i)
-        if result != CUDA_SUCCESS:
-            cuda.cuGetErrorString(result, ctypes.byref(error_str))
-            print("cuDeviceGet failed with error code %d: %s" %
-                  (result, error_str.value.decode()))
-            return 1
-        print("Device: %d" % i)
-        if cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name),
-                                device) == CUDA_SUCCESS:
-            print("  Name: %s" % (name.split(b'\0', 1)[0].decode()))
-        if cuda.cuDeviceComputeCapability(ctypes.byref(cc_major),
-                                          ctypes.byref(cc_minor),
-                                          device) == CUDA_SUCCESS:
-            print("  Compute Capability: %d.%d" %
-                  (cc_major.value, cc_minor.value))
-    sm = f"sm_{cc_major.value}{cc_minor.value}"
-    return sm
--- a/shark/examples/shark_eager/dynamo_demo.ipynb
+++ b/shark/examples/shark_eager/dynamo_demo.ipynb
@@ -1,300 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": true,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/mlevental/miniconda3/envs/torch-mlir/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "# standard imports\n",
-    "import torch\n",
-    "from shark.iree_utils import get_iree_compiled_module"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "outputs": [],
-   "source": [
-    "# torch dynamo related imports\n",
-    "try:\n",
-    "    import torchdynamo\n",
-    "    from torchdynamo.optimizations.backends import create_backend\n",
-    "    from torchdynamo.optimizations.subgraph import SubGraph\n",
-    "except ModuleNotFoundError:\n",
-    "    print(\"Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo\")\n",
-    "    exit()\n",
-    "\n",
-    "# torch-mlir imports for compiling\n",
-    "from torch_mlir import compile, OutputType"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "[TorchDynamo](https://github.com/pytorch/torchdynamo) is a compiler for PyTorch programs that uses the [frame evaluation API](https://www.python.org/dev/peps/pep-0523/) in CPython to dynamically modify Python bytecode right before it is executed. It creates this FX Graph through bytecode analysis and is designed to mix Python execution with compiled backends."
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "outputs": [],
-   "source": [
-    "def toy_example(*args):\n",
-    "    a, b = args\n",
-    "\n",
-    "    x = a / (torch.abs(a) + 1)\n",
-    "    if b.sum() < 0:\n",
-    "        b = b * -1\n",
-    "    return x * b"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "outputs": [],
-   "source": [
-    "# compiler that lowers fx_graph to through MLIR\n",
-    "def __torch_mlir(fx_graph, *args, **kwargs):\n",
-    "    assert isinstance(\n",
-    "        fx_graph, torch.fx.GraphModule\n",
-    "    ), \"Model must be an FX GraphModule.\"\n",
-    "\n",
-    "    def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule):\n",
-    "        \"\"\"Replace tuple with tuple element in functions that return one-element tuples.\"\"\"\n",
-    "\n",
-    "        for node in fx_g.graph.nodes:\n",
-    "            if node.op == \"output\":\n",
-    "                assert len(node.args) == 1, \"Output node must have a single argument\"\n",
-    "                node_arg = node.args[0]\n",
-    "                if isinstance(node_arg, tuple) and len(node_arg) == 1:\n",
-    "                    node.args = (node_arg[0],)\n",
-    "        fx_g.graph.lint()\n",
-    "        fx_g.recompile()\n",
-    "        return fx_g\n",
-    "\n",
-    "    fx_graph = _unwrap_single_tuple_return(fx_graph)\n",
-    "    ts_graph = torch.jit.script(fx_graph)\n",
-    "\n",
-    "    # torchdynamo does munges the args differently depending on whether you use\n",
-    "    # the @torchdynamo.optimize decorator or the context manager\n",
-    "    if isinstance(args, tuple):\n",
-    "        args = list(args)\n",
-    "    assert isinstance(args, list)\n",
-    "    if len(args) == 1 and isinstance(args[0], list):\n",
-    "        args = args[0]\n",
-    "\n",
-    "    linalg_module = compile(ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS)\n",
-    "    callable, _ = get_iree_compiled_module(linalg_module, \"cuda\", func_name=\"forward\")\n",
-    "\n",
-    "    def forward(*inputs):\n",
-    "        return callable(*inputs)\n",
-    "\n",
-    "    return forward"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "Simplest way to use TorchDynamo with the `torchdynamo.optimize` context manager:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Found 1 device(s).\n",
-      "Device: 0\n",
-      "  Name: NVIDIA GeForce RTX 3080\n",
-      "  Compute Capability: 8.6\n",
-      "[-0.40066046 -0.4210303   0.03225489 -0.44849953  0.10370405 -0.04422468\n",
-      "  0.33262825 -0.20109026  0.02102537 -0.24882983]\n",
-      "[-0.07824923 -0.17004533  0.06439921 -0.06163602  0.26633525 -1.1560082\n",
-      " -0.06660341  0.24227881  0.1462235  -0.32055548]\n",
-      "[-0.01464001  0.442209   -0.0607936  -0.5477967  -0.25226554 -0.08588809\n",
-      " -0.30497575  0.00061084 -0.50069696  0.2317973 ]\n",
-      "[ 0.25726247  0.39388427 -0.24093066  0.12316308 -0.01981307  0.5661146\n",
-      "  0.26199922  0.8123446  -0.01576749  0.30846444]\n",
-      "[ 0.7878203  -0.45975062 -0.29956317 -0.07032048 -0.55817443 -0.62506855\n",
-      " -1.6837492  -0.38442805  0.28220773 -1.5325156 ]\n",
-      "[ 0.07975311  0.67754704 -0.30927914  0.00347631 -0.07326564  0.01893554\n",
-      " -0.7518105  -0.03078967 -0.07623022  0.38865626]\n",
-      "[-0.7751679  -0.5841397  -0.6622711   0.18574935 -0.6049372   0.02844244\n",
-      " -0.20471913  0.3337415  -0.3619432  -0.35087156]\n",
-      "[-0.08569919 -0.10775139 -0.02338934  0.21933547 -0.46712473  0.00062137\n",
-      " -0.58207744  0.06457533  0.18276742  0.03866556]\n",
-      "[-0.2311981  -0.43036282  0.20561649 -0.10363232 -0.13248594  0.02885137\n",
-      " -0.31241602 -0.36907142  0.08861586  0.2331427 ]\n",
-      "[-0.07273526 -0.31246194 -0.24218291 -0.24145737  0.0364486   0.14382267\n",
-      " -0.00531162  0.15447603 -0.5220248  -0.09016377]\n"
-     ]
-    }
-   ],
-   "source": [
-    "with torchdynamo.optimize(__torch_mlir):\n",
-    "    for _ in range(10):\n",
-    "        print(toy_example(torch.randn(10), torch.randn(10)))"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "It can also be used through a decorator:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "outputs": [],
-   "source": [
-    "@create_backend\n",
-    "def torch_mlir(subgraph, *args, **kwargs):\n",
-    "    assert isinstance(subgraph, SubGraph), \"Model must be a dynamo SubGraph.\"\n",
-    "    return __torch_mlir(subgraph.model, *list(subgraph.example_inputs))\n",
-    "\n",
-    "@torchdynamo.optimize(\"torch_mlir\")\n",
-    "def toy_example2(*args):\n",
-    "    a, b = args\n",
-    "\n",
-    "    x = a / (torch.abs(a) + 1)\n",
-    "    if b.sum() < 0:\n",
-    "        b = b * -1\n",
-    "    return x * b"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Found 1 device(s).\n",
-      "Device: 0\n",
-      "  Name: NVIDIA GeForce RTX 3080\n",
-      "  Compute Capability: 8.6\n",
-      "[-0.35494277  0.03409214 -0.02271946  0.7335942   0.03122527 -0.41881397\n",
-      " -0.6609761  -0.6418614   0.29336175 -0.01973678]\n",
-      "[-2.7246824e-01 -3.5543957e-01  6.0087401e-01 -7.4570496e-03\n",
-      " -4.2481605e-02 -5.0296803e-04  7.2928613e-01 -1.4673788e-03\n",
-      " -2.7621329e-01 -6.0995776e-02]\n",
-      "[-0.03165906  0.3889693   0.24052973  0.27279532 -0.02773128 -0.12602475\n",
-      " -1.0124422   0.5720256  -0.35437614 -0.20992722]\n",
-      "[-0.41831446  0.5525326  -0.29749998 -0.17044766  0.11804754 -0.05210691\n",
-      " -0.46145165 -0.8776549   0.10090438  0.17463352]\n",
-      "[ 0.02194221  0.20959911  0.26973712  0.12551276 -0.0020404   0.1490246\n",
-      " -0.04456685  1.1100804   0.8105744   0.6676846 ]\n",
-      "[ 0.06528181 -0.13591261  0.5370964  -0.4398162  -0.03372452  0.9691372\n",
-      " -0.01120087  0.2947028   0.4804801  -0.3324341 ]\n",
-      "[ 0.33549032 -0.23001772 -0.08681437  0.16490957 -0.11223086  0.09168988\n",
-      "  0.02403045  0.17344482  0.46406478 -0.00129451]\n",
-      "[-0.27475086  0.42384806  1.9090122  -0.41147137 -0.6888369   0.08435658\n",
-      " -0.26628923 -0.17436793 -0.8058869  -0.02582378]\n",
-      "[-0.10109414  0.08681287 -0.10055986  0.6858881   0.29267687 -0.02797117\n",
-      " -0.01425194  0.4882803   0.3551982  -0.858935  ]\n",
-      "[-0.22086617  0.524994    0.17721705 -0.03813264 -0.54570735 -0.4421502\n",
-      "  0.11938014 -0.01122053  0.39294165 -0.61770755]\n"
-     ]
-    }
-   ],
-   "source": [
-    "for _ in range(10):\n",
-    "    print(toy_example2(torch.randn(10), torch.randn(10)))"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
--- a/shark/examples/shark_eager/dynamo_demo.py
+++ b/shark/examples/shark_eager/dynamo_demo.py
@@ -1,84 +0,0 @@
-import torch
-from torch_mlir import compile, OutputType
-
-from shark.iree_utils import get_iree_compiled_module
-
-try:
-    import torchdynamo
-    from torchdynamo.optimizations.backends import create_backend
-    from torchdynamo.optimizations.subgraph import SubGraph
-except ModuleNotFoundError:
-    print("Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo")
-    exit()
-
-NUM_ITERS = 10
-
-
-def __torch_mlir(fx_graph, *args, **kwargs):
-    assert isinstance(
-        fx_graph, torch.fx.GraphModule
-    ), "Model must be an FX GraphModule."
-
-    def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule):
-        """Replace tuple with tuple element in functions that return one-element tuples."""
-
-        for node in fx_g.graph.nodes:
-            if node.op == "output":
-                assert len(node.args) == 1, "Output node must have a single argument"
-                node_arg = node.args[0]
-                if isinstance(node_arg, tuple) and len(node_arg) == 1:
-                    node.args = (node_arg[0],)
-        fx_g.graph.lint()
-        fx_g.recompile()
-        return fx_g
-
-    fx_graph = _unwrap_single_tuple_return(fx_graph)
-    ts_graph = torch.jit.script(fx_graph)
-
-    if isinstance(args, tuple):
-        args = list(args)
-    assert isinstance(args, list)
-    if len(args) == 1 and isinstance(args[0], list):
-        args = args[0]
-
-    linalg_module = compile(ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS)
-    callable, _ = get_iree_compiled_module(linalg_module, "cuda", func_name="forward")
-
-    def forward(*inputs):
-        return callable(*inputs)
-
-    return forward
-
-
-def toy_example(*args):
-    a, b = args
-
-    x = a / (torch.abs(a) + 1)
-    if b.sum() < 0:
-        b = b * -1
-    return x * b
-
-
-with torchdynamo.optimize(__torch_mlir):
-    for _ in range(10):
-        print(toy_example(torch.randn(10), torch.randn(10)))
-
-
-@create_backend
-def torch_mlir(subgraph, *args, **kwargs):
-    assert isinstance(subgraph, SubGraph), "Model must be a dynamo SubGraph."
-    return __torch_mlir(subgraph.model, *list(subgraph.example_inputs))
-
-
-@torchdynamo.optimize("torch_mlir")
-def toy_example2(*args):
-    a, b = args
-
-    x = a / (torch.abs(a) + 1)
-    if b.sum() < 0:
-        b = b * -1
-    return x * b
-
-
-for _ in range(10):
-    print(toy_example2(torch.randn(10), torch.randn(10)))
--- a/shark/examples/shark_eager/eager_mode.ipynb
+++ b/shark/examples/shark_eager/eager_mode.ipynb
@@ -1,805 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/mlevental/miniconda3/envs/torch-mlir/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "# standard imports\n",
-    "import torch\n",
-    "from torch_mlir.eager_mode import torch_mlir_tensor"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "outputs": [],
-   "source": [
-    "# eager mode imports\n",
-    "from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor\n",
-    "from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "The simplest way of using Eager Mode (through IREE) requires setting a \"backend\":"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "outputs": [],
-   "source": [
-    "torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(\"cpu\")"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "and wrapping all your `torch.Tensor`s:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n"
-     ]
-    }
-   ],
-   "source": [
-    "NUM_ITERS = 10\n",
-    "\n",
-    "t = torch.ones((10, 10))\n",
-    "u = 2 * torch.ones((10, 10))\n",
-    "\n",
-    "tt = TorchMLIRTensor(t)\n",
-    "print(tt)\n",
-    "uu = TorchMLIRTensor(u)\n",
-    "print(uu)"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "`TorchMLIRTensor` is a \"tensor wrapper subclass\" (more info [here](https://github.com/albanD/subclass_zoo)) that keeps the IREE `DeviceArray` in a field `elem`:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "for i in range(NUM_ITERS):\n",
-    "    yy = tt + uu\n",
-    "    print(type(yy))\n",
-    "    print(yy.elem.to_host())\n",
-    "    yy = tt * uu\n",
-    "    print(type(yy))\n",
-    "    print(yy.elem.to_host())"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "If you have a GPU (and CUDA installed) that works too (you can verify by having `watch -n1 nvidia-smi` up in a terminal while running the next cell):"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(\"gpu\")\n",
-    "\n",
-    "t = torch.ones((10, 10))\n",
-    "u = 2 * torch.ones((10, 10))\n",
-    "\n",
-    "tt = TorchMLIRTensor(t)\n",
-    "print(tt)\n",
-    "uu = TorchMLIRTensor(u)\n",
-    "print(uu)\n",
-    "\n",
-    "yy = tt + uu\n",
-    "print(yy.elem.to_host())\n",
-    "yy = tt * uu\n",
-    "print(yy.elem.to_host())"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "There is a convenience class `SharkEagerMode` that will handle both the installation of the backend and the wrapping of `torch.Tensor`s:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# eager mode RAII\n",
-    "from shark.shark_runner import SharkEagerMode\n",
-    "\n",
-    "shark_eager_mode = SharkEagerMode(\"cpu\")\n",
-    "\n",
-    "t = torch.ones((10, 10))\n",
-    "u = torch.ones((10, 10))\n",
-    "\n",
-    "print(t)\n",
-    "print(u)\n",
-    "\n",
-    "for i in range(NUM_ITERS):\n",
-    "    yy = t + u\n",
-    "    print(type(yy))\n",
-    "    print(yy.elem.to_host())\n",
-    "    yy = t * u\n",
-    "    print(type(yy))\n",
-    "    print(yy.elem.to_host())"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "The `SharkEagerMode` class is a hacky take on [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization) that defines a \"deleter\" that runs when an instantiation (of `SharkEagerMode`) is garbage collected. Takeaway is that if you want to turn off `SharkEagerMode`, or switch backends, you need to `del` the instance:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "del shark_eager_mode\n",
-    "shark_eager_mode = SharkEagerMode(\"cuda\")\n",
-    "\n",
-    "t = torch.ones((10, 10))\n",
-    "u = torch.ones((10, 10))\n",
-    "\n",
-    "print(t)\n",
-    "print(u)\n",
-    "\n",
-    "yy = t + u\n",
-    "print(type(yy))\n",
-    "print(yy.elem.to_host())\n",
-    "yy = t * u\n",
-    "print(type(yy))\n",
-    "print(yy.elem.to_host())"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
--- a/shark/examples/shark_eager/eager_mode.py
+++ b/shark/examples/shark_eager/eager_mode.py
@@ -1,148 +0,0 @@
-# Copyright 2020 The Nod Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from torch.utils.cpp_extension import load_inline, include_paths
-from torch_mlir.eager_mode import torch_mlir_tensor
-from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
-
-from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
-from shark.shark_runner import SharkEagerMode
-
-
-def test_cpu():
-    torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend("cpu")
-
-    t = torch.ones((10, 10), device="cpu")
-    u = 2 * torch.ones((10, 10), device="cpu")
-
-    tt = TorchMLIRTensor(t)
-    print(tt)
-    uu = TorchMLIRTensor(u)
-    print(uu)
-
-    for i in range(NUM_ITERS):
-        yy = tt + uu
-        print(type(yy))
-        print(yy.elem.to_host())
-        yy = tt * uu
-        print(type(yy))
-        print(yy.elem.to_host())
-
-
-def test_gpu():
-    source = """
-    #include <iostream>
-    #include "cuda.h"
-    #include "cuda_runtime_api.h"
-
-    using namespace std;
-
-    void print_free_mem() {
-        int num_gpus;
-        size_t free, total;
-        cudaSetDevice(0);
-        int id;
-        cudaGetDevice(&id);
-        cudaMemGetInfo(&free, &total);
-        cout << "GPU " << id << " memory: used=" << (total-free)/(1<<20) << endl;
-    }
-    """
-    gpu_stats = load_inline(
-        name="inline_extension",
-        cpp_sources=[source],
-        extra_include_paths=include_paths(cuda=True),
-        functions=["print_free_mem"],
-    )
-    torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend("gpu")
-
-    t = torch.ones((10, 10), device="cpu")
-    u = 2 * torch.ones((10, 10), device="cpu")
-
-    tt = TorchMLIRTensor(t)
-    print(tt)
-    uu = TorchMLIRTensor(u)
-    print(uu)
-
-    for i in range(NUM_ITERS):
-        yy = tt + uu
-        print(yy.elem.to_host())
-        yy = tt * uu
-        print(yy.elem.to_host())
-        gpu_stats.print_free_mem()
-
-
-def test_python_mode_ref_backend():
-    # hide this wherever you want?
-    _ = SharkEagerMode("refbackend")
-
-    t = torch.ones((10, 10), device="cpu")
-    u = torch.ones((10, 10), device="cpu")
-
-    print(t)
-    print(u)
-
-    for i in range(NUM_ITERS):
-        print(i)
-        yy = t + u
-        print(yy.elem)
-        yy = t * u
-        print(yy.elem)
-
-
-def test_python_mode_iree_cpu():
-    # hide this wherever you want?
-    _ = SharkEagerMode("cpu")
-
-    t = torch.ones((10, 10), device="cpu")
-    u = torch.ones((10, 10), device="cpu")
-
-    print(t)
-    print(u)
-
-    for i in range(NUM_ITERS):
-        yy = t + u
-        print(type(yy))
-        print(yy.elem.to_host())
-        yy = t * u
-        print(type(yy))
-        print(yy.elem.to_host())
-
-
-def test_python_mode_iree_gpu():
-    _ = SharkEagerMode("gpu")
-
-    t = torch.ones((10, 10), device="cpu")
-    u = torch.ones((10, 10), device="cpu")
-
-    print(t)
-    print(u)
-
-    for i in range(NUM_ITERS):
-        yy = t + u
-        print(type(yy))
-        print(yy.elem.to_host())
-        yy = t * u
-        print(type(yy))
-        print(yy.elem.to_host())
-
-
-if __name__ == "__main__":
-    NUM_ITERS = 10
-    test_cpu()
-    if torch.cuda.is_available():
-        test_gpu()
-    test_python_mode_ref_backend()
-    test_python_mode_iree_cpu()
-    test_python_mode_iree_gpu()
--- a/shark/examples/shark_inference/CLIPModel_tf.py
+++ b/shark/examples/shark_inference/CLIPModel_tf.py
@@ -1,51 +0,0 @@
-from PIL import Image
-import requests
-
-from transformers import CLIPProcessor, TFCLIPModel
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-
-# Create a set of inputs
-clip_vit_inputs = [
-    tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
-    tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
-    tf.TensorSpec(shape=[1, 3, 224, 224], dtype=tf.float32)
-]
-
-
-class CLIPModule(tf.Module):
-
-    def __init__(self):
-        super(CLIPModule, self).__init__()
-        self.m = TFCLIPModel.from_pretrained("openai/clip-vit-base-patch32")
-
-        self.m.predict = lambda x, y, z: self.m(
-            input_ids=x, attention_mask=y, pixel_values=z)
-
-    @tf.function(input_signature=clip_vit_inputs)
-    def forward(self, input_ids, attention_mask, pixel_values):
-        return self.m.predict(input_ids, attention_mask,
-                              pixel_values).logits_per_image
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
-
-    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
-    image = Image.open(requests.get(url, stream=True).raw)
-
-    inputs = processor(text=["a photo of a cat", "a photo of a dog"],
-                       images=image,
-                       return_tensors="tf",
-                       padding=True)
-
-    shark_module = SharkInference(
-        CLIPModule(),
-        (inputs["input_ids"], inputs["attention_mask"], inputs["pixel_values"]))
-    shark_module.set_frontend("tensorflow")
-    shark_module.compile()
-
-    print(
-        shark_module.forward((inputs["input_ids"], inputs["attention_mask"],
-                              inputs["pixel_values"])))
--- a/shark/examples/shark_inference/gpt2_tf.py
+++ b/shark/examples/shark_inference/gpt2_tf.py
@@ -1,38 +0,0 @@
-from PIL import Image
-import requests
-
-from transformers import GPT2Tokenizer, TFGPT2Model
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-
-# Create a set of inputs
-gpt2_inputs = [
-    tf.TensorSpec(shape=[1, 8], dtype=tf.int32),
-    tf.TensorSpec(shape=[1, 8], dtype=tf.int32),
-]
-
-
-class GPT2Module(tf.Module):
-
-    def __init__(self):
-        super(GPT2Module, self).__init__()
-        self.m = TFGPT2Model.from_pretrained("distilgpt2")
-
-        self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)
-
-    @tf.function(input_signature=gpt2_inputs)
-    def forward(self, input_ids, attention_mask):
-        return self.m.predict(input_ids, attention_mask)
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
-    text = "I love the distilled version of models."
-
-    inputs = tokenizer(text, return_tensors='tf')
-    shark_module = SharkInference(
-        GPT2Module(), (inputs["input_ids"], inputs["attention_mask"]))
-    shark_module.set_frontend("tensorflow")
-    shark_module.compile()
-    print(shark_module.forward((inputs["input_ids"], inputs["attention_mask"])))
--- a/shark/examples/shark_inference/mhlo_example.py
+++ b/shark/examples/shark_inference/mhlo_example.py
@@ -1,18 +0,0 @@
-from shark.shark_inference import SharkInference
-import numpy as np
-
-mhlo_ir = r"""builtin.module  {
-      func.func @forward(%arg0: tensor<1x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4x4xf32> {
-        %0 = chlo.broadcast_add %arg0, %arg1 : (tensor<1x4xf32>, tensor<4x1xf32>) -> tensor<4x4xf32>
-        %1 = "mhlo.abs"(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
-        return %1 : tensor<4x4xf32>
-      }
-}"""
-
-arg0 = np.ones((1, 4)).astype(np.float32)
-arg1 = np.ones((4, 1)).astype(np.float32)
-
-shark_module = SharkInference(mhlo_ir, (arg0, arg1))
-shark_module.set_frontend("mhlo")
-shark_module.compile()
-print(shark_module.forward((arg0, arg1)))
--- a/shark/examples/shark_inference/minilm_benchmark.py
+++ b/shark/examples/shark_inference/minilm_benchmark.py
@@ -1,36 +0,0 @@
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from shark.shark_inference import SharkInference
-
-torch.manual_seed(0)
-tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
-
-
-class MiniLMSequenceClassification(torch.nn.Module):
-
-    def __init__(self):
-        super().__init__()
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            "microsoft/MiniLM-L12-H384-uncased",  # The pretrained model.
-            num_labels=
-            2,  # The number of output labels--2 for binary classification.
-            output_attentions=
-            False,  # Whether the model returns attentions weights.
-            output_hidden_states=
-            False,  # Whether the model returns all hidden-states.
-            torchscript=True,
-        )
-
-    def forward(self, tokens):
-        return self.model.forward(tokens)[0]
-
-
-test_input = torch.randint(2, (1, 128))
-
-shark_module = SharkInference(MiniLMSequenceClassification(), (test_input,),
-                              jit_trace=True,
-                              benchmark_mode=True)
-
-shark_module.compile()
-shark_module.forward((test_input,))
-shark_module.benchmark_all((test_input,))
--- a/shark/examples/shark_inference/minilm_benchmark_tf.py
+++ b/shark/examples/shark_inference/minilm_benchmark_tf.py
@@ -1,58 +0,0 @@
-import tensorflow as tf
-from transformers import BertModel, BertTokenizer, TFBertModel
-from shark.shark_inference import SharkInference
-
-gpus = tf.config.experimental.list_physical_devices('GPU')
-for gpu in gpus:
-  tf.config.experimental.set_memory_growth(gpu, True)
-
-MAX_SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-# Create a set of 2-dimensional inputs
-bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
-]
-
-
-class BertModule(tf.Module):
-
-    def __init__(self):
-        super(BertModule, self).__init__()
-        # Create a BERT trainer with the created network.
-        self.m = TFBertModel.from_pretrained(
-            "microsoft/MiniLM-L12-H384-uncased", from_pt=True)
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m.predict = lambda x, y, z: self.m.call(
-            input_ids=x, attention_mask=y, token_type_ids=z, training=False)
-
-    @tf.function(input_signature=bert_input)
-    def forward(self, input_ids, attention_mask, token_type_ids):
-        return self.m.predict(input_ids, attention_mask, token_type_ids)
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    tokenizer = BertTokenizer.from_pretrained(
-        "microsoft/MiniLM-L12-H384-uncased")
-    text = "Replace me by any text you'd like."
-    encoded_input = tokenizer(text,
-                              padding='max_length',
-                              truncation=True,
-                              max_length=MAX_SEQUENCE_LENGTH)
-    for key in encoded_input:
-        encoded_input[key] = tf.expand_dims(
-            tf.convert_to_tensor(encoded_input[key]), 0)
-
-    test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
-         encoded_input["token_type_ids"])
-    shark_module = SharkInference(
-        BertModule(),
-        test_input,
-        benchmark_mode=True)
-    shark_module.set_frontend("tensorflow")
-    shark_module.compile()
-    shark_module.benchmark_all(test_input)
--- a/shark/examples/shark_inference/minilm_jit.py
+++ b/shark/examples/shark_inference/minilm_jit.py
@@ -1,35 +0,0 @@
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from shark.shark_inference import SharkInference
-
-torch.manual_seed(0)
-tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
-
-
-class MiniLMSequenceClassification(torch.nn.Module):
-
-    def __init__(self):
-        super().__init__()
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            "microsoft/MiniLM-L12-H384-uncased",  # The pretrained model.
-            num_labels=
-            2,  # The number of output labels--2 for binary classification.
-            output_attentions=
-            False,  # Whether the model returns attentions weights.
-            output_hidden_states=
-            False,  # Whether the model returns all hidden-states.
-            torchscript=True,
-        )
-
-    def forward(self, tokens):
-        return self.model.forward(tokens)[0]
-
-
-test_input = torch.randint(2, (1, 128))
-
-shark_module = SharkInference(MiniLMSequenceClassification(), (test_input,),
-                              jit_trace=True)
-
-shark_module.compile()
-result = shark_module.forward((test_input,))
-print("Obtained result", result)
--- a/shark/examples/shark_inference/minilm_load_benchmark_tf.py
+++ b/shark/examples/shark_inference/minilm_load_benchmark_tf.py
@@ -1,41 +0,0 @@
-import tensorflow as tf
-from transformers import BertModel, BertTokenizer, TFBertModel
-from shark.shark_inference import SharkInference
-from shark.shark_importer import shark_load
-from shark.parser import parser
-import os
-
-gpus = tf.config.experimental.list_physical_devices('GPU')
-for gpu in gpus:
-  tf.config.experimental.set_memory_growth(gpu, True)
-
-parser.add_argument(
-    "--download_mlir_path",
-    type=str,
-    default="minilm_tf_inference.mlir",
-    help="Specifies path to target mlir file that will be loaded.")
-load_args, unknown = parser.parse_known_args()
-
-MAX_SEQUENCE_LENGTH = 512
-
-if __name__ == "__main__":
-    # Prepping Data
-    tokenizer = BertTokenizer.from_pretrained(
-        "microsoft/MiniLM-L12-H384-uncased")
-    text = "Replace me by any text you'd like."
-    encoded_input = tokenizer(text,
-                              padding='max_length',
-                              truncation=True,
-                              max_length=MAX_SEQUENCE_LENGTH)
-    for key in encoded_input:
-        encoded_input[key] = tf.expand_dims(
-            tf.convert_to_tensor(encoded_input[key]), 0)
-    model_name = "minilm_tf_inference"
-    minilm_mlir = shark_load(model_name, load_args.download_mlir_path)
-    test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
-         encoded_input["token_type_ids"])
-    shark_module = SharkInference(
-        minilm_mlir, test_input, benchmark_mode=True)
-    shark_module.set_frontend("mhlo")
-    shark_module.compile()
-    shark_module.benchmark_all(test_input)
--- a/shark/examples/shark_inference/minilm_tf.py
+++ b/shark/examples/shark_inference/minilm_tf.py
@@ -1,56 +0,0 @@
-import tensorflow as tf
-from transformers import BertModel, BertTokenizer, TFBertModel
-from shark.shark_inference import SharkInference
-
-MAX_SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-# Create a set of 2-dimensional inputs
-bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
-]
-
-
-class BertModule(tf.Module):
-
-    def __init__(self):
-        super(BertModule, self).__init__()
-        # Create a BERT trainer with the created network.
-        self.m = TFBertModel.from_pretrained(
-            "microsoft/MiniLM-L12-H384-uncased", from_pt=True)
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m.predict = lambda x, y, z: self.m.call(
-            input_ids=x, attention_mask=y, token_type_ids=z, training=False)
-
-    @tf.function(input_signature=bert_input)
-    def forward(self, input_ids, attention_mask, token_type_ids):
-        return self.m.predict(input_ids, attention_mask, token_type_ids)
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    tokenizer = BertTokenizer.from_pretrained(
-        "microsoft/MiniLM-L12-H384-uncased")
-    text = "Replace me by any text you'd like."
-    encoded_input = tokenizer(text,
-                              padding='max_length',
-                              truncation=True,
-                              max_length=MAX_SEQUENCE_LENGTH)
-    for key in encoded_input:
-        encoded_input[key] = tf.expand_dims(
-            tf.convert_to_tensor(encoded_input[key]), 0)
-
-    shark_module = SharkInference(
-        BertModule(),
-        (encoded_input["input_ids"], encoded_input["attention_mask"],
-         encoded_input["token_type_ids"]))
-    shark_module.set_frontend("tensorflow")
-    shark_module.compile()
-
-    print(
-        shark_module.forward(
-            (encoded_input["input_ids"], encoded_input["attention_mask"],
-             encoded_input["token_type_ids"])))
--- a/shark/examples/shark_inference/minilm_tf_gpu_config.json
+++ b/shark/examples/shark_inference/minilm_tf_gpu_config.json
--- a/shark/examples/shark_inference/resnet50_script.py
+++ b/shark/examples/shark_inference/resnet50_script.py
@@ -1,80 +0,0 @@
-from PIL import Image
-import requests
-import torch
-import torchvision.models as models
-from torchvision import transforms
-import sys
-from shark.shark_inference import SharkInference
-
-
-################################## Preprocessing inputs and model ############
-def load_and_preprocess_image(url: str):
-    headers = {
-        "User-Agent":
-            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
-    }
-    img = Image.open(requests.get(url, headers=headers,
-                                  stream=True).raw).convert("RGB")
-    # preprocessing pipeline
-    preprocess = transforms.Compose([
-        transforms.Resize(256),
-        transforms.CenterCrop(224),
-        transforms.ToTensor(),
-        transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                             std=[0.229, 0.224, 0.225]),
-    ])
-    img_preprocessed = preprocess(img)
-    return torch.unsqueeze(img_preprocessed, 0)
-
-
-def load_labels():
-    classes_text = requests.get(
-        "https://raw.githubusercontent.com/cathyzhyi/ml-data/main/imagenet-classes.txt",
-        stream=True,
-    ).text
-    labels = [line.strip() for line in classes_text.splitlines()]
-    return labels
-
-
-def top3_possibilities(res):
-    _, indexes = torch.sort(res, descending=True)
-    percentage = torch.nn.functional.softmax(res, dim=1)[0] * 100
-    top3 = [(labels[idx], percentage[idx].item()) for idx in indexes[0][:3]]
-    return top3
-
-
-class Resnet50Module(torch.nn.Module):
-
-    def __init__(self):
-        super().__init__()
-        self.resnet = models.resnet50(pretrained=True)
-        self.train(False)
-
-    def forward(self, img):
-        return self.resnet.forward(img)
-
-
-image_url = "https://upload.wikimedia.org/wikipedia/commons/2/26/YellowLabradorLooking_new.jpg"
-print("load image from " + image_url, file=sys.stderr)
-img = load_and_preprocess_image(image_url)
-labels = load_labels()
-
-##############################################################################
-
-input = torch.randn(1, 3, 224, 224)
-print(input.shape)
-
-## The img is passed to determine the input shape.
-shark_module = SharkInference(Resnet50Module(), (img,))
-shark_module.compile()
-
-## Can pass any img or input to the forward module.
-results = shark_module.forward((img,))
-
-print("The top 3 results obtained via shark_runner is:")
-print(top3_possibilities(torch.from_numpy(results)))
-
-print()
-
-print("The top 3 results obtained via torch is:")
-print(top3_possibilities(Resnet50Module()(img)))
--- a/shark/examples/shark_inference/t5_tf.py
+++ b/shark/examples/shark_inference/t5_tf.py
@@ -1,38 +0,0 @@
-from PIL import Image
-import requests
-
-from transformers import T5Tokenizer, TFT5Model
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-
-# Create a set of inputs
-t5_inputs = [
-    tf.TensorSpec(shape=[1, 10], dtype=tf.int32),
-    tf.TensorSpec(shape=[1, 10], dtype=tf.int32),
-]
-
-class T5Module(tf.Module):
-
-    def __init__(self):
-        super(T5Module, self).__init__()
-        self.m = TFT5Model.from_pretrained("t5-small")
-        self.m.predict = lambda x,y: self.m(input_ids=x, decoder_input_ids=y)
-
-    @tf.function(input_signature=t5_inputs)
-    def forward(self, input_ids, decoder_input_ids):
-        return self.m.predict(input_ids, decoder_input_ids)
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    tokenizer = T5Tokenizer.from_pretrained("t5-small")
-    text = "I love the distilled version of models."
-    inputs = tokenizer(
-        text, return_tensors="tf"
-    ).input_ids
-
-    shark_module = SharkInference(
-        T5Module(), (inputs, inputs))
-    shark_module.set_frontend("tensorflow")
-    shark_module.compile()
-    print(shark_module.forward((inputs,inputs)))
--- a/shark/examples/shark_inference/torch_vision_models_script.py
+++ b/shark/examples/shark_inference/torch_vision_models_script.py
@@ -1,44 +0,0 @@
-import torch
-import torchvision.models as models
-from shark.shark_inference import SharkInference
-
-
-class VisionModule(torch.nn.Module):
-
-    def __init__(self, model):
-        super().__init__()
-        self.model = model
-        self.train(False)
-
-    def forward(self, input):
-        return self.model.forward(input)
-
-
-input = torch.randn(1, 3, 224, 224)
-
-## The vision models present here: https://pytorch.org/vision/stable/models.html
-vision_models_list = [
-    models.resnet18(pretrained=True),
-    models.alexnet(pretrained=True),
-    models.vgg16(pretrained=True),
-    models.squeezenet1_0(pretrained=True),
-    models.densenet161(pretrained=True),
-    models.inception_v3(pretrained=True),
-    models.shufflenet_v2_x1_0(pretrained=True),
-    models.mobilenet_v2(pretrained=True),
-    models.mobilenet_v3_small(pretrained=True),
-    models.resnext50_32x4d(pretrained=True),
-    models.wide_resnet50_2(pretrained=True),
-    models.mnasnet1_0(pretrained=True),
-    models.efficientnet_b0(pretrained=True),
-    models.regnet_y_400mf(pretrained=True),
-    models.regnet_x_400mf(pretrained=True),
-]
-
-for i, vision_model in enumerate(vision_models_list):
-    shark_module = SharkInference(
-        VisionModule(vision_model),
-        (input,),
-    )
-    shark_module.compile()
-    shark_module.forward((input,))
--- a/shark/examples/shark_inference/unet_script.py
+++ b/shark/examples/shark_inference/unet_script.py
@@ -1,32 +0,0 @@
-import torch
-from shark_runner import SharkInference
-
-
-# Currently not supported aten.transpose_conv2d missing.
-class UnetModule(torch.nn.Module):
-
-    def __init__(self):
-        super().__init__()
-        self.model = torch.hub.load(
-            "mateuszbuda/brain-segmentation-pytorch",
-            "unet",
-            in_channels=3,
-            out_channels=1,
-            init_features=32,
-            pretrained=True,
-        )
-        self.train(False)
-
-    def forward(self, input):
-        return self.model(input)
-
-
-input = torch.randn(1, 3, 224, 224)
-
-print(input)
-shark_module = SharkInference(
-    UnetModule(),
-    (input,),
-)
-shark_module.benchmark_forward((input,))
-print(input)
--- a/shark/examples/shark_training/bert_training.py
+++ b/shark/examples/shark_training/bert_training.py
@@ -1,50 +0,0 @@
-import torch
-from torch.nn.utils import _stateless
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from shark.shark_runner import SharkTrainer
-
-
-class MiniLMSequenceClassification(torch.nn.Module):
-
-    def __init__(self):
-        super().__init__()
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            "microsoft/MiniLM-L12-H384-uncased",  # The pretrained model.
-            num_labels=
-            2,  # The number of output labels--2 for binary classification.
-            output_attentions=
-            False,  # Whether the model returns attentions weights.
-            output_hidden_states=
-            False,  # Whether the model returns all hidden-states.
-            torchscript=True,
-        )
-
-    def forward(self, tokens):
-        return self.model.forward(tokens)[0]
-
-
-mod = MiniLMSequenceClassification()
-
-
-def get_sorted_params(named_params):
-    return [i[1] for i in sorted(named_params.items())]
-
-
-print(dict(mod.named_buffers()))
-
-inp = (torch.randint(2, (1, 128)),)
-
-
-def forward(params, buffers, args):
-    params_and_buffers = {**params, **buffers}
-    _stateless.functional_call(mod, params_and_buffers, args,
-                               {}).sum().backward()
-    optim = torch.optim.SGD(get_sorted_params(params), lr=0.01)
-    # optim.load_state_dict(optim_state)
-    optim.step()
-    return params, buffers
-
-
-shark_module = SharkTrainer(mod, inp, custom_inference_fn=forward)
-
-print(shark_module.forward())
--- a/shark/examples/shark_training/bert_training_load_tf.py
+++ b/shark/examples/shark_training/bert_training_load_tf.py
@@ -1,45 +0,0 @@
-import numpy as np
-import os
-import time
-import tensorflow as tf
-
-from shark.shark_trainer import SharkTrainer
-from shark.parser import parser
-from shark.shark_importer import shark_load
-
-parser.add_argument(
-    "--download_mlir_path",
-    type=str,
-    default="bert_tf_training.mlir",
-    help="Specifies path to target mlir file that will be loaded.")
-load_args, unknown = parser.parse_known_args()
-
-tf.random.set_seed(0)
-vocab_size = 100
-NUM_CLASSES = 5
-SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-# Download BERT model from tank and train.
-if __name__ == "__main__":
-    predict_sample_input = [
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
-    ]
-    model_name = "bert_tf_training"
-    bert_mlir = shark_load(model_name, load_args.download_mlir_path)
-    sample_input_tensors = [tf.convert_to_tensor(val, dtype=tf.int32) for val in predict_sample_input]
-    num_iter = 10
-    shark_module = SharkTrainer(
-        bert_mlir,
-        (sample_input_tensors,
-         tf.convert_to_tensor(np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32)))
-    shark_module.set_frontend("mhlo")
-    shark_module.compile()
-    start = time.time()
-    print(shark_module.train(num_iter))
-    end = time.time()
-    total_time = end - start
-    print("time: " + str(total_time))
-    print("time/iter: " + str(total_time / num_iter))
--- a/shark/examples/shark_training/bert_training_tf.py
+++ b/shark/examples/shark_training/bert_training_tf.py
@@ -1,88 +0,0 @@
-import sys
-from absl import app
-import time
-
-import numpy as np
-import os
-import tempfile
-import tensorflow as tf
-
-from official.nlp.modeling import layers
-from official.nlp.modeling import networks
-from official.nlp.modeling.models import bert_classifier
-
-from shark.shark_trainer import SharkTrainer
-
-
-tf.random.set_seed(0)
-vocab_size = 100
-NUM_CLASSES = 5
-SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-# Create a set of 2-dimensional inputs
-bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-]
-
-
-class BertModule(tf.Module):
-
-    def __init__(self):
-        super(BertModule, self).__init__()
-        dict_outputs = False
-        test_network = networks.BertEncoder(vocab_size=vocab_size,
-                                            num_layers=2,
-                                            dict_outputs=dict_outputs)
-
-        # Create a BERT trainer with the created network.
-        bert_trainer_model = bert_classifier.BertClassifier(
-            test_network, num_classes=NUM_CLASSES)
-        bert_trainer_model.summary()
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m = bert_trainer_model
-        self.m.predict = lambda x: self.m.call(x, training=False)
-        self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
-        self.m.learn = lambda x, y: self.m.call(x, training=False)
-        self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
-        self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
-
-    @tf.function(input_signature=[
-        bert_input,  # inputs
-        tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32)  # labels
-    ])
-    def forward(self, inputs, labels):
-        with tf.GradientTape() as tape:
-            # Capture the gradients from forward prop...
-            probs = self.m(inputs, training=True)
-            loss = self.loss(labels, probs)
-
-        # ...and use them to update the model's weights.
-        variables = self.m.trainable_variables
-        gradients = tape.gradient(loss, variables)
-        self.optimizer.apply_gradients(zip(gradients, variables))
-        return loss
-
-
-if __name__ == "__main__":
-    predict_sample_input = [
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
-    ]
-    sample_input_tensors = [tf.convert_to_tensor(val, dtype=tf.int32) for val in predict_sample_input]
-    num_iter = 10
-    shark_module = SharkTrainer(
-        BertModule(),
-        (sample_input_tensors,
-         tf.convert_to_tensor(np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32)))
-    shark_module.set_frontend("tensorflow")
-    shark_module.compile()
-    start = time.time()
-    print(shark_module.train(num_iter))
-    end = time.time()
-    total_time = end - start
-    print("time: " + str(total_time))
-    print("time/iter: " + str(total_time / num_iter))
--- a/shark/examples/shark_training/neural_net_training.py
+++ b/shark/examples/shark_training/neural_net_training.py
@@ -1,44 +0,0 @@
-import torch
-from torch.nn.utils import _stateless
-from shark.shark_trainer import SharkTrainer
-
-
-class Foo(torch.nn.Module):
-
-    def __init__(self):
-        super(Foo, self).__init__()
-        self.l1 = torch.nn.Linear(10, 16)
-        self.relu = torch.nn.ReLU()
-        self.l2 = torch.nn.Linear(16, 2)
-
-    def forward(self, x):
-        out = self.l1(x)
-        out = self.relu(out)
-        out = self.l2(out)
-        return out
-
-
-mod = Foo()
-inp = (torch.randn(10, 10),)
-
-
-def get_sorted_params(named_params):
-    return [i[1] for i in sorted(named_params.items())]
-
-
-def forward(params, buffers, args):
-    params_and_buffers = {**params, **buffers}
-    _stateless.functional_call(mod, params_and_buffers, args,
-                               {}).sum().backward()
-    optim = torch.optim.SGD(get_sorted_params(params), lr=0.01)
-    optim.step()
-    return params, buffers
-
-
-# fx_graph = forward(dict(mod.named_parameters()), dict(mod.named_buffers()), inp)
-
-shark_module = SharkTrainer(mod, inp)
-# Pass the training function in case of torch
-shark_module.compile(training_fn=forward)
-
-shark_module.train(num_iters=10)
--- a/shark/iree_eager_backend.py
+++ b/shark/iree_eager_backend.py
@@ -1,81 +0,0 @@
-# Copyright 2020 The Nod Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Dict, Any
-
-import iree
-import iree.runtime as ireert
-import numpy as np
-import torch
-from iree.runtime import DeviceArray
-from torch_mlir._mlir_libs._mlir.ir import Module
-from torch_mlir.compiler_utils import (
-    get_module_name_for_debug_dump,
-    run_pipeline_with_repro_report,
-)
-from torch_mlir.eager_mode.torch_mlir_eager_backend import (
-    TorchMLIREagerBackend,
-    TensorMetaData,
-)
-from torch_mlir_e2e_test.eager_backends.refbackend import NUMPY_TO_TORCH_DTYPE_DICT
-
-from shark.iree_utils import get_iree_compiled_module, IREE_DEVICE_MAP
-
-
-class EagerModeIREELinalgOnTensorsBackend(TorchMLIREagerBackend):
-    """Main entry-point for the iree backend for torch-mlir eager mode.
-
-    EagerModeIREELinalgOnTensorsBackend uses iree.DeviceArray representations of tensors and
-    thus all of the wrapping and unwrapping and munging here is done to between torch.Tensor and iree.DeviceArray,
-    with np.ndarray as an intermediary.
-    """
-
-    def __init__(self, device: str):
-        self.torch_device_str = device
-        self.iree_device_str = IREE_DEVICE_MAP[device]
-        self.config = ireert.Config(self.iree_device_str)
-
-    def get_torch_metadata(self, tensor: DeviceArray,
-                           kwargs: Dict[str, Any]) -> TensorMetaData:
-        return TensorMetaData(
-            size=tensor.shape,
-            dtype=NUMPY_TO_TORCH_DTYPE_DICT[tensor.dtype.type],
-            device=torch.device(self.torch_device_str),
-            requires_grad=tensor.dtype.type
-            in {np.float, np.float32, np.float64} and
-            kwargs.get("requires_grad", False),
-        )
-
-    def compile(self, imported_module: Module):
-        fn_name = get_module_name_for_debug_dump(imported_module)
-        run_pipeline_with_repro_report(
-            imported_module,
-            "torch-function-to-torch-backend-pipeline,torch-backend-to-linalg-on-tensors-backend-pipeline",
-            "EagerMode",
-        )
-        callable, _ = get_iree_compiled_module(imported_module,
-                                               self.iree_device_str,
-                                               func_name=fn_name)
-        return callable
-
-    def copy_into(self, dst, src):
-        """Copy output back to appropriate arg that it should alias."""
-        np.copyto(dst, src)
-
-    def transfer_from_device_to_torch(self, e):
-        return torch.from_numpy(e.to_host())
-
-    def transfer_from_torch_to_device(self,
-                                      tensor: torch.Tensor) -> DeviceArray:
-        return iree.runtime.asdevicearray(self.config.device, tensor.numpy())
--- a/shark/iree_utils.py
+++ b/shark/iree_utils.py
@@ -1,359 +0,0 @@
-# Copyright 2020 The Nod Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import iree.runtime as ireert
-import iree.runtime.scripts.iree_benchmark_module as benchmark_module
-import iree.compiler as ireec
-from shark.torch_mlir_utils import get_module_name_for_asm_dump
-from shark.cuda_utils import get_cuda_sm_cc
-from shark.model_annotation import *
-import subprocess
-import numpy as np
-import os
-import re
-import sys
-
-IREE_DEVICE_MAP = {
-    "cpu": "local-task",
-    "gpu": "cuda",
-    "cuda": "cuda",
-    "vulkan": "vulkan",
-    "metal": "vulkan",
-    "rocm": "rocm"
-}
-
-IREE_TARGET_MAP = {
-    "cpu": "dylib",
-    "gpu": "cuda",
-    "cuda": "cuda",
-    "vulkan": "vulkan",
-    "metal": "vulkan",
-    "rocm": "rocm"
-}
-
-UNIT_TO_SECOND_MAP = {"ms": 0.001, "s": 1}
-
-
-def check_device_drivers(device):
-    """Checks necessary drivers present for gpu and vulkan devices"""
-    if (device in ["gpu", "cuda"]):
-        try:
-            subprocess.check_output('nvidia-smi')
-        except Exception:
-            return True
-    elif (device in ["metal", "vulkan"]):
-        try:
-            subprocess.check_output('vulkaninfo')
-        except Exception:
-            return True
-    elif (device == "cpu"):
-        return False
-    # Unknown device.
-    else:
-        return True
-
-    return False
-
-
-def get_iree_cpu_args():
-    find_triple_cmd = "uname -s -m"
-    os_name, proc_name = subprocess.run(
-        find_triple_cmd, shell=True, stdout=subprocess.PIPE,
-        check=True).stdout.decode('utf-8').split()
-    if os_name == "Darwin":
-        find_kernel_version_cmd = "uname -r"
-        kernel_version = subprocess.run(find_kernel_version_cmd,
-                                        shell=True,
-                                        stdout=subprocess.PIPE,
-                                        check=True).stdout.decode('utf-8')
-        target_triple = f"{proc_name}-apple-darwin{kernel_version}"
-    elif os_name == "Linux":
-        target_triple = f"{proc_name}-linux-gnu"
-    else:
-        error_message = f"OS Type f{os_name} not supported and triple can't be determined, open issue to dSHARK team please :)"
-        raise Exception(error_message)
-    print(f"Target triple found:{target_triple}")
-    return [f"-iree-llvm-target-triple={target_triple}"]
-
-
-def get_iree_gpu_args():
-    ireert.flags.FUNCTION_INPUT_VALIDATION = False
-    ireert.flags.parse_flags("--cuda_allow_inline_execution")
-    sm_arch = get_cuda_sm_cc()
-    if sm_arch in ['sm_70', 'sm_72', 'sm_75', 'sm_80', 'sm_84', 'sm_86']:
-        return [
-            "--iree-hal-cuda-disable-loop-nounroll-wa",
-            f"--iree-hal-cuda-llvm-target-arch={sm_arch}"
-        ]
-    else:
-        return ["--iree-hal-cuda-disable-loop-nounroll-wa"]
-
-
-def get_vulkan_triple_flag():
-    vulkan_device_cmd = "vulkaninfo | grep deviceName | awk \'END{{print $NF}}\'"
-    vulkan_device = run_cmd(vulkan_device_cmd).strip()
-    if vulkan_device == "M1":
-        print("Found Apple Device. Using m1-moltenvk-macos")
-        return "-iree-vulkan-target-triple=m1-moltenvk-macos"
-    elif vulkan_device == "A100-SXM4-40GB":
-        print("Found Nvidia Device. Using ampere-rtx3080-linux")
-        return "-iree-vulkan-target-triple=ampere-rtx3080-linux"
-    else:
-        print(
-            "Optimized kernel for your target device is not added yet. Contact SHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u] or pull up an issue."
-        )
-        return None
-
-
-def get_iree_vulkan_args():
-    #vulkan_flag = ["--iree-flow-demote-i64-to-i32"]
-    vulkan_flag = []
-    vulkan_triple_flag = get_vulkan_triple_flag()
-    if vulkan_triple_flag is not None:
-        vulkan_flag.append(vulkan_triple_flag)
-    return vulkan_flag
-
-
-def get_iree_device_args(device):
-    if device == "cpu":
-        return get_iree_cpu_args()
-    if device in ["gpu", "cuda"]:
-        return get_iree_gpu_args()
-    if device in ["metal", "vulkan"]:
-        return get_iree_vulkan_args()
-    return []
-
-
-def get_iree_frontend_args(frontend):
-    if frontend in ["torch", "pytorch", "linalg"]:
-        return ["--iree-llvm-target-cpu-features=host"]
-    elif frontend in ["tensorflow", "tf", "mhlo"]:
-        return [
-            "--iree-llvm-target-cpu-features=host",
-            "--iree-mhlo-demote-i64-to-i32=false",
-            "--iree-flow-demote-i64-to-i32"
-        ]
-    else:
-        # Frontend not found.
-        return []
-
-
-def compile_module_to_flatbuffer(module, device, frontend, func_name,
-                                 model_config_path):
-    # Setup Compile arguments wrt to frontends.
-    input_type = ""
-    args = get_iree_frontend_args(frontend)
-    args += get_iree_device_args(device)
-
-    if frontend in ["tensorflow", "tf"]:
-        input_type = "mhlo"
-    elif frontend in ["mhlo", "tosa"]:
-        input_type = frontend
-    elif frontend in ["tflite"]:
-        input_type = "tosa"
-
-    # Annotate the input module with the configs
-    if model_config_path != None:
-        # Currently tuned model only works on tf frontend
-        if frontend in ["tensorflow", "tf"]:
-            input_module = module.decode('utf-8')
-        elif frontend in ["pytorch", "torch"]:
-            input_module = module.operation.get_asm()
-        with create_context() as ctx:
-            module = model_annotation(ctx,
-                                      input_contents=input_module,
-                                      config_path=model_config_path)
-            module = str(module)
-
-    # Compile according to the input type, else just try compiling.
-    if input_type not in ["mhlo", "tosa"]:
-        module = str(module)
-    if input_type != "":
-        # Currently for MHLO/TOSA.
-        flatbuffer_blob = ireec.compile_str(
-            module,
-            target_backends=[IREE_TARGET_MAP[device]],
-            extra_args=args,
-            input_type=input_type)
-    else:
-        # Currently for Torch.
-        flatbuffer_blob = ireec.compile_str(
-            str(module),
-            target_backends=[IREE_TARGET_MAP[device]],
-            extra_args=args)
-    return flatbuffer_blob
-
-
-def get_iree_module(flatbuffer_blob, device, func_name):
-    vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
-    config = ireert.Config(IREE_DEVICE_MAP[device])
-    ctx = ireert.SystemContext(config=config)
-    ctx.add_vm_module(vm_module)
-    ModuleCompiled = ctx.modules.module[func_name]
-    return ModuleCompiled, config
-
-
-def get_iree_compiled_module(module,
-                             device: str,
-                             frontend: str = "torch",
-                             func_name: str = "forward",
-                             model_config_path: str = None):
-    """Given a module returns the compiled .vmfb and configs"""
-    flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend,
-                                                   func_name, model_config_path)
-    return get_iree_module(flatbuffer_blob, device, func_name)
-
-
-def export_iree_module_to_vmfb(module,
-                               device: str,
-                               directory: str,
-                               frontend: str = "torch",
-                               func_name: str = "forward",
-                               model_config_path: str = None):
-    flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend,
-                                                   func_name, model_config_path)
-    module_name = f"{frontend}_{func_name}_{device}"
-    filename = os.path.join(directory, module_name + ".vmfb")
-    print(f"Saved vmfb in {filename}.")
-    with open(filename, 'wb') as f:
-        f.write(flatbuffer_blob)
-    return filename
-
-
-def export_module_to_mlir_file(module, frontend, directory: str):
-    mlir_str = module
-    if frontend in ["tensorflow", "tf", "mhlo"]:
-        mlir_str = module.decode('utf-8')
-    elif frontend in ["pytorch", "torch"]:
-        mlir_str = module.operation.get_asm()
-    filename = os.path.join(directory, "model.mlir")
-    with open(filename, 'w') as f:
-        f.write(mlir_str)
-    print(f"Saved mlir in {filename}.")
-    return filename
-
-
-def get_results(compiled_vm, input, config, frontend="torch"):
-    """Runs a .vmfb file given inputs and config and returns output."""
-    device_inputs = input
-    if frontend in ["torch", "pytorch"]:
-        device_inputs = [ireert.asdevicearray(config.device, a) for a in input]
-    if frontend in ["tensorflow", "tf", "tflite"]:
-        device_inputs = []
-        for a in input:
-            if (isinstance(a, list)):
-                device_inputs.append([
-                    ireert.asdevicearray(config.device, val, dtype=np.int32)
-                    for val in a
-                ])
-            else:
-                device_inputs.append(ireert.asdevicearray(config.device, a))
-    result = compiled_vm(*device_inputs)
-    result_tensors = []
-    if (isinstance(result, tuple)):
-        for val in result:
-            result_tensors.append(np.copy(np.asarray(val, val.dtype)))
-        return result_tensors
-    elif (isinstance(result, dict)):
-        data = list(result.items())
-        res = np.array(data, dtype=object)
-        return np.copy(res)
-    else:
-        return np.copy(np.asarray(result, dtype=result.dtype))
-
-
-######### Benchmark Related Tools ###########
-
-
-def tensor_to_type_str(input_tensors: tuple, frontend: str):
-    """
-    Input: A tuple of input tensors i.e tuple(torch.tensor)
-    Output: list of string that represent mlir types (i.e 1x24xf64)
-    # TODO: Support more than floats, and ints
-    """
-    list_of_type = []
-    for input_tensor in input_tensors:
-        type_string = "x".join([str(dim) for dim in input_tensor.shape])
-        if frontend in ["torch", "pytorch"]:
-            dtype_string = str(input_tensor.dtype).replace("torch.", "")
-        elif frontend in ["tensorflow", "tf", "mhlo"]:
-            dtype = input_tensor.dtype
-            dtype_string = re.findall('\'[^"]*\'',
-                                      str(dtype))[0].replace("\'", "")
-        regex_split = re.compile("([a-zA-Z]+)([0-9]+)")
-        match = regex_split.match(dtype_string)
-        mlir_type_string = str(match.group(1)[0]) + str(match.group(2))
-        type_string += f"x{mlir_type_string}"
-        list_of_type.append(type_string)
-    return list_of_type
-
-
-def build_benchmark_args(input_file: str,
-                         device: str,
-                         input_tensors: tuple,
-                         frontend: str,
-                         training=False):
-    """
-    Inputs: input_file leading to vmfb, input_tensor to function, target device, and whether it is training or not.
-    Outputs: string that execute benchmark-module on target model.
-    """
-    path = benchmark_module.__path__[0]
-    benchmarker_path = os.path.join(path, "..", "..", "iree-benchmark-module")
-    benchmark_cl = [benchmarker_path, f"--module_file={input_file}"]
-    fn_name = "forward"
-    if training == True:
-        # TODO: Replace name of train with actual train fn name.
-        fn_name = "train"
-    benchmark_cl.append(f"--entry_function={fn_name}")
-    benchmark_cl.append(f"--device={IREE_DEVICE_MAP[device]}")
-    mlir_input_types = tensor_to_type_str(input_tensors, frontend)
-    for mlir_input in mlir_input_types:
-        benchmark_cl.append(f"--function_input={mlir_input}")
-    time_extractor = "| awk \'END{{print $2 $3}}\'"
-    benchmark_cl.append(time_extractor)
-    return benchmark_cl
-
-
-def run_cmd(cmd):
-    """
-    Inputs: cli command string.
-    """
-    try:
-        result = subprocess.run(cmd,
-                                shell=True,
-                                stdout=subprocess.PIPE,
-                                stderr=subprocess.PIPE,
-                                check=True)
-        result_str = result.stdout.decode()
-        return result_str
-    except Exception:
-        sys.exit("Exiting program due to error running:", cmd)
-
-
-def run_benchmark_module(benchmark_cl):
-    """
-    Run benchmark command, extract result and return iteration/seconds.
-
-    Input: benchmark command.
-    """
-    benchmark_path = benchmark_cl[0]
-    assert os.path.exists(
-        benchmark_path
-    ), "Cannot find benchmark_module, Please contact SHARK maintainer on discord."
-    bench_result = run_cmd(' '.join(benchmark_cl))
-    regex_split = re.compile("([0-9]+[.]*[0-9]*)([a-zA-Z]+)")
-    match = regex_split.match(bench_result)
-    time = float(match.group(1))
-    unit = match.group(2)
-    return 1.0 / (time * UNIT_TO_SECOND_MAP[unit])
--- a/shark/model_annotation.py
+++ b/shark/model_annotation.py
@@ -1,143 +0,0 @@
-# Copyright 2020 The Nod Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import json
-import os
-from typing import List, Dict
-
-from iree.compiler import ir
-from iree.compiler.transforms import ireec as ireec_trans
-
-MATMUL_OP_NAMES = set(
-    ["linalg.matmul", "linalg.batch_matmul", "mhlo.dot", "mhlo.dot_general"])
-idx = 0
-
-
-def model_annotation(ctx: ir.Context, *, input_contents: str, config_path: str):
-    if os.path.isfile(input_contents):
-        with open(input_contents, "rb") as f:
-            input_contents = f.read()
-
-    module = ir.Module.parse(input_contents)
-
-    with open(config_path, "r") as f:
-        data = json.load(f)
-        configs = data["options"]
-
-    # The Python API does not expose a general walk() function, so we just
-    # do it ourselves.
-    walk_children(module.operation, configs)
-
-    if not module.operation.verify():
-        raise RuntimeError("Modified program does not verify!")
-
-    # More efficient than: print(module)
-    #   - Disables verification (already done above)
-    #   - Writes as binary, avoiding costly unicode conversions
-    sys.stdout.buffer.write(
-        module.operation.get_asm(assume_verified=True, binary=True))
-    return module
-
-
-def walk_children(op: ir.Operation, configs: List[Dict]):
-    for region in op.regions:
-        for block in region.blocks:
-            for child_op in block.operations:
-                # TODO: This is dumb. Both Operation and OpView should expose
-                # 'operation' and 'name' attributes.
-                if isinstance(child_op, ir.OpView):
-                    child_op = child_op.operation
-                if child_op.name in MATMUL_OP_NAMES:
-                    global idx
-                    tile_sizes, pipeline, workgroup_size, \
-                    split_k, pipeline_depth = parse_config(configs[idx])
-
-                    add_compilation_info(child_op,
-                                         tile_sizes=tile_sizes,
-                                         pipeline=pipeline,
-                                         workgroup_size=workgroup_size,
-                                         pipeline_depth=pipeline_depth)
-
-                    if split_k:
-                        add_split_k(child_op, split_k)
-
-                    idx = idx + 1
-                    print(f"Updated op {child_op}", file=sys.stderr)
-                walk_children(child_op, configs)
-
-
-def parse_config(config: Dict):
-    if config["pipeline"] == "GPU" or config["pipeline"] == "GPU_TENSORCORE":
-        pipeline = "LLVMGPUMatmulSimt" if config[
-            "pipeline"] == "GPU" else "LLVMGPUMatmulTensorCore"
-        tile_sizes = [config["work_group_tile_sizes"]]
-        workgroup_size = config["work_group_sizes"]
-        try:
-            pipeline_depth = config["pipeline_depth"]
-        except:
-            pipeline_depth = None
-        try:
-            split_k = config["split_k"]
-        except:
-            split_k = None
-    else:
-        pipeline = config["pipeline"]
-        tile_sizes = [
-            config["work_group_tile_sizes"], config["l1_tile_sizes"],
-            config["vector_tile_sizes"]
-        ]
-        workgroup_size = []
-        split_k = None
-        pipeline_depth = None
-    return tile_sizes, pipeline, workgroup_size, split_k, pipeline_depth
-
-
-def add_compilation_info(op: ir.Operation, tile_sizes: List[List[int]],
-                         pipeline: str, workgroup_size: List[int],
-                         pipeline_depth: int):
-    # We don't have a Python binding for CompilationInfo, so we just parse
-    # its string form.
-    if pipeline_depth:
-        attr = ir.Attribute.parse(
-            f"#iree_codegen.compilation_info<"
-            f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
-            f"translation_info = <{pipeline} pipeline_depth = {pipeline_depth}>, "
-            f"workgroup_size = {repr(workgroup_size)}>")
-    else:
-        attr = ir.Attribute.parse(
-            f"#iree_codegen.compilation_info<"
-            f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
-            f"translation_info = <{pipeline}>, "
-            f"workgroup_size = {repr(workgroup_size)}>")
-    op.attributes["compilation_info"] = attr
-
-
-def add_split_k(op: ir.Operation, k: int):
-    attr = ir.IntegerAttr.get(ir.IntegerType.get_signless(64), k)
-    op.attributes["iree_flow_split_k"] = attr
-
-
-def create_context() -> ir.Context:
-    context = ir.Context()
-    ireec_trans.register_all_dialects(context)
-    context.allow_unregistered_dialects = True
-    return context
-
-
-if __name__ == "__main__":
-    with create_context() as ctx:
-        model_annotation(ctx,
-                         input_contents=sys.argv[1],
-                         config_path=sys.argv[2])
--- a/shark/parser.py
+++ b/shark/parser.py
@@ -1,71 +0,0 @@
-# Copyright 2020 The Nod Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-
-
-def dir_path(path):
-    if os.path.isdir(path):
-        return path
-    else:
-        raise argparse.ArgumentTypeError(
-            f"readable_dir:{path} is not a valid path")
-
-
-def dir_file(path):
-    if os.path.isfile(path):
-        return path
-    else:
-        raise argparse.ArgumentTypeError(
-            f"readable_file:{path} is not a valid file")
-
-
-parser = argparse.ArgumentParser(description='SHARK runner.')
-parser.add_argument(
-    "--device",
-    type=str,
-    default="cpu",
-    help="Device on which shark_runner runs. options are cpu, gpu, and vulkan")
-parser.add_argument(
-    "--repro_dir",
-    help=
-    "Directory to which module files will be saved for reproduction or debugging.",
-    type=dir_path,
-    default="/tmp/")
-parser.add_argument("--save_mlir",
-                    default=False,
-                    action="store_true",
-                    help="Saves input MLIR module to /tmp/ directory.")
-parser.add_argument("--save_vmfb",
-                    default=False,
-                    action="store_true",
-                    help="Saves iree .vmfb module to /tmp/ directory.")
-parser.add_argument(
-    "--model_config_path",
-    help="Directory to where the tuned model config file is located.",
-    default=None)
-
-parser.add_argument(
-    "--num_warmup_iterations",
-    type=int,
-    default=2,
-    help="Run the model for the specified number of warmup iterations.")
-parser.add_argument(
-    "--num_iterations",
-    type=int,
-    default=1,
-    help="Run the model for the specified number of iterations.")
-
-shark_args, unknown = parser.parse_known_args()
--- a/shark/shark_importer.py
+++ b/shark/shark_importer.py
@@ -1,136 +0,0 @@
-# Lint as: python3
-"""SHARK Importer"""
-
-import iree.compiler.tflite as iree_tflite_compile
-import iree.runtime as iree_rt
-import numpy as np
-import os
-import sys
-import tensorflow.compat.v2 as tf
-import urllib.request
-from shark.shark_inference import SharkInference
-
-
-class SharkImporter:
-
-    def __init__(self,
-                 model_path,
-                 model_type: str = "tflite",
-                 model_source_hub: str = "tfhub",
-                 device: str = None,
-                 dynamic: bool = False,
-                 jit_trace: bool = False,
-                 benchmark_mode: bool = False):
-        self.model_path = model_path
-        self.model_type = model_type
-        self.model_source_hub = model_source_hub
-        self.device = device
-        self.dynamic = dynamic
-        self.jit_trace = jit_trace
-        self.benchmark_mode = benchmark_mode
-        self.inputs = None
-        self.input_details = None
-        self.output_details = None
-
-        # create tmp model file directory
-        if self.model_path is None:
-            print("Error. No model_path, Please input model path.")
-            return
-
-        if self.model_source_hub == "tfhub":
-            # compile and run tfhub tflite
-            if self.model_type == "tflite":
-                print("Setting up for TMP_DIR")
-                exe_basename = os.path.basename(sys.argv[0])
-                self.workdir = os.path.join(os.path.dirname(__file__), "tmp",
-                                            exe_basename)
-                print(f"TMP_DIR = {self.workdir}")
-                os.makedirs(self.workdir, exist_ok=True)
-                self.tflite_file = '/'.join([self.workdir, 'model.tflite'])
-                print("Setting up local address for tflite model file: ",
-                      self.tflite_file)
-                if os.path.exists(self.model_path):
-                    self.tflite_file = self.model_path
-                else:
-                    print("Download tflite model")
-                    urllib.request.urlretrieve(self.model_path,
-                                               self.tflite_file)
-                print("Setting up tflite interpreter")
-                self.tflite_interpreter = tf.lite.Interpreter(
-                    model_path=self.tflite_file)
-                self.tflite_interpreter.allocate_tensors()
-                # default input initialization
-                self.input_details, self.output_details = self.get_model_details(
-                )
-                inputs = self.generate_inputs(
-                    self.input_details)  # device_inputs
-                self.setup_inputs(inputs)
-
-    def generate_inputs(self, input_details):
-        args = []
-        for input in input_details:
-            print(str(input["shape"]), input["dtype"].__name__)
-            args.append(np.zeros(shape=input["shape"], dtype=input["dtype"]))
-        return args
-
-    def get_model_details(self):
-        if self.model_type == "tflite":
-            print("Get tflite input output details")
-            self.input_details = self.tflite_interpreter.get_input_details()
-            self.output_details = self.tflite_interpreter.get_output_details()
-            return self.input_details, self.output_details
-
-    def setup_inputs(self, inputs):
-        print("Setting up inputs")
-        self.inputs = inputs
-
-    def compile(self, inputs=None):
-        if inputs is not None:
-            self.setup_inputs(inputs)
-        # preprocess model_path to get model_type and Model Source Hub
-        print("Shark Importer Intialize SharkInference and Do Compile")
-        if self.model_source_hub == "tfhub":
-            # compile and run tfhub tflite
-            print("Inference tfhub model")
-            self.shark_module = SharkInference(self.tflite_file,
-                                               self.inputs,
-                                               device=self.device,
-                                               dynamic=self.dynamic,
-                                               jit_trace=self.jit_trace)
-            self.shark_module.set_frontend("tflite")
-            self.shark_module.compile()
-        elif self.model_source_hub == "huggingface":
-            print("Inference", self.model_source_hub, " not implemented yet")
-        elif self.model_source_hub == "jaxhub":
-            print("Inference", self.model_source_hub, " not implemented yet")
-
-    def forward(self, inputs=None):
-        if inputs is not None:
-            self.setup_inputs(inputs)
-        # preprocess model_path to get model_type and Model Source Hub
-        print("Shark Importer forward Model")
-        if self.model_source_hub == "tfhub":
-            shark_results = self.shark_module.forward(self.inputs)
-            # Fix type information for unsigned cases.
-            # for test compare result
-            shark_results = list(shark_results)
-            for i in range(len(self.output_details)):
-                dtype = self.output_details[i]["dtype"]
-                shark_results[i] = shark_results[i].astype(dtype)
-            return shark_results
-        elif self.model_source_hub == "huggingface":
-            print("Inference", self.model_source_hub, " not implemented yet")
-        elif self.model_source_hub == "jaxhub":
-            print("Inference", self.model_source_hub, " not implemented yet")
-
-
-def shark_load(model_name, file_path):
-    file_link = f"https://storage.googleapis.com/shark_tank/users/stanley/{model_name}.mlir"
-    response = urllib.request.urlretrieve(file_link, file_path)
-    if not os.path.isfile(file_path):
-        raise ValueError(
-            f"Tried looking for target mlir in {file_path}, but cannot be found."
-        )
-    with open(file_path, "rb") as input_file:
-        model_mlir = input_file.read()
-    return model_mlir
--- a/shark/shark_inference.py
+++ b/shark/shark_inference.py
@@ -1,115 +0,0 @@
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from shark.torch_mlir_utils import get_torch_mlir_module, run_on_refbackend
-import os
-from shark.parser import shark_args
-from shark.shark_runner import SharkRunner, SharkBenchmarkRunner
-import time
-import sys
-
-
-# Prints to stderr.
-def print_err(*a):
-    print(*a, file=sys.stderr)
-
-
-class SharkInference:
-    """Inference API targeting pytorch, tensorflow, linalg, mhlo and tosa frontend."""
-
-    def __init__(self,
-                 model,
-                 input: tuple,
-                 device: str = None,
-                 dynamic: bool = False,
-                 jit_trace: bool = False,
-                 benchmark_mode: bool = False):
-        self.model = model
-        self.input = input
-        self.dynamic = dynamic
-        self.jit_trace = jit_trace
-        self.benchmark_mode = benchmark_mode
-
-        # By default it's torch frontend.
-        self.frontend = "pytorch"
-
-        # Sets the device.
-        self.device = device if device is not None else shark_args.device
-
-        self.model_config_path = shark_args.model_config_path
-
-        self.shark_runner = None
-
-    # Sets the frontend i.e `pytorch` or `tensorflow`.
-    def set_frontend(self, frontend: str):
-        if frontend not in [
-                "pytorch", "torch", "tensorflow", "tf", "mhlo", "linalg",
-                "tosa", "tflite"
-        ]:
-            print_err("frontend not supported.")
-        else:
-            self.frontend = frontend
-
-    def compile(self):
-        # Inference do not use AOT.
-        from_aot = False
-        if (self.benchmark_mode == True):
-            self.shark_runner = SharkBenchmarkRunner(self.model, self.input,
-                                                     self.dynamic, self.device,
-                                                     self.jit_trace, from_aot,
-                                                     self.frontend)
-        else:
-            self.shark_runner = SharkRunner(self.model, self.input,
-                                            self.dynamic, self.device,
-                                            self.jit_trace, from_aot,
-                                            self.frontend,
-                                            self.model_config_path)
-
-    # inputs are considered to be np.array.
-    def forward(self, inputs):
-        input_list = inputs
-        # converts the inputs to numpy.
-        if self.frontend in ["pytorch", "torch"]:
-            input_list = [x.detach().numpy() for x in inputs]
-        elif self.frontend in ["tensorflow", "tf"]:
-            input_list = [x.numpy() for x in inputs]
-        return self.shark_runner.forward(input_list, self.frontend)
-
-    # Saves the .vmfb module.
-    def save_module(self, dir=None):
-        if dir is None:
-            return self.shark_runner.save_module()
-        return self.shark_runner.save_module(dir)
-
-    ######### Benchmark Related Functions #########
-    def benchmark_mode(func):
-
-        def inner(self, *args, **kwargs):
-            assert self.benchmark_mode, "SharkRunner needs to be in benchmark mode to run benchmark methods."
-            return func(self, *args, **kwargs)
-
-        return inner
-
-    @benchmark_mode
-    def benchmark_all(self, inputs):
-        self.shark_runner.benchmark_all(inputs)
-
-    @benchmark_mode
-    def benchmark_frontend(self, inputs):
-        self.shark_runner.benchmark_frontend(inputs)
-
-    @benchmark_mode
-    def benchmark_python(self, inputs):
-        self.shark_runner.benchmark_python(inputs)
-
-    @benchmark_mode
-    def benchmark_c(self):
-        self.shark_runner.benchmark_c()
--- a/shark/shark_runner.py
+++ b/shark/shark_runner.py
@@ -1,205 +0,0 @@
-# Copyright 2020 The Nod Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from iree.compiler import tf as tfc
-import iree.compiler.tflite as ireec_tflite
-from torch.utils._python_dispatch import enable_torch_dispatch_mode
-from torch_mlir.eager_mode import torch_mlir_tensor
-from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
-from torch_mlir_e2e_test.eager_backends.refbackend import EagerModeRefBackend
-
-from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
-from shark.torch_mlir_utils import get_torch_mlir_module, run_on_refbackend
-from shark.iree_utils import get_results, get_iree_compiled_module, export_iree_module_to_vmfb, export_module_to_mlir_file, build_benchmark_args, run_benchmark_module
-import os
-from shark.parser import shark_args
-from tqdm import tqdm
-import time
-
-
-class SharkRunner:
-    """Base class for Shark Inference and Shark Runner."""
-
-    def __init__(
-        self,
-        model,
-        input: tuple,
-        dynamic: bool = False,
-        device: str = None,
-        jit_trace: bool = False,
-        from_aot: bool = False,
-        frontend: str = "torch",
-        model_config_path: str = None,
-    ):
-        self.model = model
-        self.frontend_model = model
-        self.from_aot = from_aot
-        self.input = input
-        self.frontend = frontend
-        self.vmfb_file = None
-        func_name = "forward"
-        self.device = device if device is not None else shark_args.device
-        if self.frontend in ["pytorch", "torch"]:
-            # get torch-mlir dialect
-            # self.model = torch.Module
-            # TODO assert
-            self.model = get_torch_mlir_module(self.model, input, dynamic,
-                                               jit_trace, from_aot)
-        elif self.frontend in ["tensorflow", "tf"]:
-            # get mhlo dialect
-            # self.model = tf.Module
-            # TODO assert
-            self.model = tfc.compile_module(self.model,
-                                            exported_names=[func_name],
-                                            import_only=True)
-        elif self.frontend in ["tflite"]:
-            print("Setting up for IREE compiler tflite")
-            # get tosa dialect
-            # self.model = model.tflite
-            # TODO assert
-            self.model = ireec_tflite.compile_file(self.model,
-                                                   input_type="tosa",
-                                                   import_only=True)
-            func_name = "main"
-
-        # TODO: We can capture the .vmfb module here and later use it for saving
-        # rather than recompiling it again, if used for saving.
-        (
-            self.iree_compilation_module,
-            self.iree_config,
-        ) = get_iree_compiled_module(self.model,
-                                     self.device,
-                                     self.frontend,
-                                     func_name=func_name,
-                                     model_config_path=model_config_path)
-
-        # Debugging Options:
-        if shark_args.save_mlir:
-            export_module_to_mlir_file(self.model, self.frontend,
-                                       shark_args.repro_dir)
-        if shark_args.save_vmfb:
-            self.vmfb_file = self.save_module(shark_args.repro_dir)
-
-    # All the timings and benchmarking can be done here.
-    def forward(self, input, frontend):
-        return get_results(self.iree_compilation_module, input,
-                           self.iree_config, frontend)
-
-    # TODO: Instead of passing directory and having names decided by the module
-    # , user may want to save the module with manual names.
-    def save_module(self, dir=os.getcwd()):
-        return export_iree_module_to_vmfb(self.model, self.device, dir,
-                                          self.frontend)
-
-    # TODO: Load a module and directly use it, we will need to set the frontend
-    # in this case.
-    def load_module(self, name):
-        pass
-
-
-class SharkEagerMode:
-
-    def __init__(self, device="cpu"):
-        if device == "refbackend":
-            torch_mlir_tensor.backend = EagerModeRefBackend()
-        else:
-            torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(
-                device)
-        self.guard = enable_torch_dispatch_mode(TorchMLIRTensor)
-        self.guard.__enter__()
-
-    def __del__(self):
-        self.guard.__exit__(None, None, None)
-
-
-class SharkBenchmarkRunner(SharkRunner):
-    # SharkRunner derived class with Benchmarking capabilities.
-    def __init__(
-        self,
-        model,
-        input: tuple,
-        dynamic: bool = False,
-        device: str = None,
-        jit_trace: bool = False,
-        from_aot: bool = False,
-        frontend: str = "torch",
-    ):
-        SharkRunner.__init__(self, model, input, dynamic, device, jit_trace,
-                             from_aot, frontend)
-        if (self.vmfb_file == None):
-            self.vmfb_file = export_iree_module_to_vmfb(self.model, device,
-                                                        shark_args.repro_dir,
-                                                        frontend)
-        self.benchmark_cl = build_benchmark_args(self.vmfb_file, device, input,
-                                                 frontend, from_aot)
-
-    def benchmark_frontend(self, inputs):
-        if self.frontend in ["pytorch", "torch"]:
-            self.benchmark_torch(inputs)
-        elif self.frontend in ["tensorflow", "tf"]:
-            self.benchmark_tf(inputs)
-
-    def benchmark_torch(self, inputs):
-        inputs = self.input if self.from_aot else inputs
-        inputs = inputs[0]
-        for i in range(shark_args.num_warmup_iterations):
-            self.frontend_model.forward(inputs)
-
-        begin = time.time()
-        for i in range(shark_args.num_iterations):
-            out = self.frontend_model.forward(inputs)
-            if i == shark_args.num_iterations - 1:
-                end = time.time()
-                break
-        print(
-            f"Torch benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
-
-    def benchmark_tf(self, inputs):
-        for i in range(shark_args.num_warmup_iterations):
-            self.frontend_model.forward(*inputs)
-
-        begin = time.time()
-        for i in range(shark_args.num_iterations):
-            out = self.frontend_model.forward(*inputs)
-            if i == shark_args.num_iterations - 1:
-                end = time.time()
-                break
-        print(
-            f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
-        return
-
-    def benchmark_c(self):
-        result = run_benchmark_module(self.benchmark_cl)
-        print(f"Shark-{self.frontend} C-benchmark:{result} iter/second")
-
-    def benchmark_python(self, inputs):
-        inputs = self.input if self.from_aot else inputs
-        input_list = [x for x in inputs]
-        for i in range(shark_args.num_warmup_iterations):
-            self.forward(input_list, self.frontend)
-
-        begin = time.time()
-        for i in range(shark_args.num_iterations):
-            out = self.forward(input_list, self.frontend)
-            if i == shark_args.num_iterations - 1:
-                end = time.time()
-        print(
-            f"Shark-{self.frontend} Python-benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
-
-    def benchmark_all(self, inputs):
-        self.benchmark_frontend(inputs)
-        self.benchmark_python(inputs)
-        self.benchmark_c()
--- a/shark/shark_trainer.py
+++ b/shark/shark_trainer.py
@@ -1,139 +0,0 @@
-# Copyright 2020 The Nod Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from shark.torch_mlir_utils import get_torch_mlir_module, run_on_refbackend
-from shark.iree_utils import get_results, get_iree_compiled_module, export_iree_module_to_vmfb
-import os
-from shark.parser import shark_args
-from shark.shark_runner import SharkRunner
-from shark.backward_makefx import MakeFxModule
-import numpy as np
-from tqdm import tqdm
-import time
-import sys
-
-
-# Prints to stderr.
-def print_err(*a):
-    print(*a, file=sys.stderr)
-
-
-class SharkTrainer:
-    """Training pytorch, tensorflow module on shark runtime."""
-
-    def __init__(
-        self,
-        model,
-        input: tuple,
-        dynamic: bool = False,
-        device: str = None,
-        jit_trace: bool = False,
-        from_aot: bool = True,
-    ):
-        self.model = model
-        # Change tuple to list.
-        self.input = [x for x in input]
-        self.dynamic = dynamic
-        self.from_aot = from_aot
-        self.jit_trace = jit_trace
-        self.from_aot = from_aot
-
-        # By default it's the torch frontend.
-        self.frontend = "pytorch"
-        self.device = device if device is not None else shark_args.device
-
-        self.shark_runner = None
-
-    # Sets the frontend i.e `pytorch` or `tensorflow`.
-    def set_frontend(self, frontend: str):
-        if frontend not in [
-                "pytorch", "torch", "tensorflow", "tf", "mhlo", "linalg", "tosa"
-        ]:
-            print_err("frontend not supported.")
-        else:
-            self.frontend = frontend
-
-    # Training function is needed in the case of torch_fn.
-    def compile(self, training_fn=None):
-        if self.frontend in ["torch", "pytorch"]:
-            aot_module = MakeFxModule(self.model,
-                                      tuple(self.input),
-                                      custom_inference_fn=training_fn)
-            aot_module.generate_graph()
-            # Returns the backward graph.
-            training_graph = aot_module.training_graph
-            weights = self.get_torch_params()
-            self.shark_runner = SharkRunner(training_graph,
-                                            weights + self.input, self.dynamic,
-                                            self.device, self.jit_trace,
-                                            self.from_aot, self.frontend)
-        elif self.frontend in ["tensorflow", "tf", "mhlo"]:
-            self.shark_runner = SharkRunner(self.model, self.input,
-                                            self.dynamic, self.device,
-                                            self.jit_trace, self.from_aot,
-                                            self.frontend)
-        else:
-            print_err("Unknown frontend")
-            return
-
-    # The inputs to the mlir-graph are weights, buffers and inputs respectively.
-    def get_torch_params(self):
-        params = [i.detach() for i in self.model.parameters()]
-        buffers = [i.detach() for i in self.model.buffers()]
-        return params + buffers
-
-    # Function to train pytorch module.
-    def _train_torch(self, num_iters):
-        """Returns the updated weights after num_iters"""
-        params = self.get_torch_params()
-        params = [x.numpy() for x in params]
-        print(f"Training started for {num_iters} iterations:")
-        for i in tqdm(range(num_iters)):
-            params = self.shark_runner.forward(params + self.input,
-                                               self.frontend)
-
-        return params
-
-    # Function to train tensorflow module.
-    # Output final loss.
-    # TODO(raikonenfnu): Save updated weight/states in SHARK.
-    def _train_tf(self, num_iters):
-        input_list = []
-        for x in self.input:
-            if (isinstance(x, list)):
-                nested_list = []
-                for val in x:
-                    if (isinstance(val, np.ndarray)):
-                        nested_list.append(val)
-                    else:
-                        nested_list.append(val.numpy())
-                input_list.append(nested_list)
-            elif (isinstance(x, np.ndarray)):
-                input_list.append(x)
-            else:
-                input_list.append(x.numpy())
-
-        print(f"Training started for {num_iters} iterations:")
-        for i in tqdm(range(num_iters)):
-            outputs = self.shark_runner.forward(input_list, self.frontend)
-        return outputs
-
-    def train(self, num_iters=1):
-        if self.frontend in ["torch", "pytorch"]:
-            return self._train_torch(num_iters)
-        elif self.frontend in ["tf", "tensorflow", "mhlo"]:
-            return self._train_tf(num_iters)
-        else:
-            print_err("Unknown frontend")
-            return
--- a/shark/tests/test_shark_importer.py
+++ b/shark/tests/test_shark_importer.py
@@ -1,52 +0,0 @@
-# RUN: %PYTHON %s
-import numpy as np
-from shark.shark_importer import SharkImporter
-import pytest
-
-model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
-
-
-# Inputs modified to be useful albert inputs.
-def generate_inputs(input_details):
-    for input in input_details:
-        print("\t%s, %s", str(input["shape"]), input["dtype"].__name__)
-
-    args = []
-    args.append(
-        np.random.randint(low=0,
-                          high=256,
-                          size=input_details[0]["shape"],
-                          dtype=input_details[0]["dtype"]))
-    args.append(
-        np.ones(shape=input_details[1]["shape"],
-                dtype=input_details[1]["dtype"]))
-    args.append(
-        np.zeros(shape=input_details[2]["shape"],
-                 dtype=input_details[2]["dtype"]))
-    return args
-
-
-# A specific case can be run by commenting different cases. Runs all the test
-# across cpu, gpu and vulkan according to available drivers.
-pytest_param = pytest.mark.parametrize(
-    ('dynamic', 'device'),
-    [
-        pytest.param(False, 'cpu'),
-        # TODO: Language models are failing for dynamic case..
-        pytest.param(True, 'cpu', marks=pytest.mark.skip),
-    ])
-
-
-@pytest_param
-def test_albert(dynamic, device):
-    my_shark_importer = SharkImporter(model_path=model_path,
-                                      model_type="tflite",
-                                      model_source_hub="tfhub",
-                                      device=device,
-                                      dynamic=dynamic,
-                                      jit_trace=True)
-    input_details, output_details = my_shark_importer.get_model_details()
-    inputs = generate_inputs(input_details)  # device_inputs
-    my_shark_importer.compile(inputs)
-    shark_results = my_shark_importer.forward(inputs)
-    # print(shark_results)
--- a/shark/torch_mlir_utils.py
+++ b/shark/torch_mlir_utils.py
@@ -1,133 +0,0 @@
-# Copyright 2020 The Nod Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-import io
-import pickle
-import sys
-import os
-
-from io import StringIO
-from torch_mlir.dialects.torch.importer.jit_ir import (
-    ClassAnnotator,
-    ModuleBuilder,
-)
-from torch_mlir_e2e_test.torchscript.serialization import (
-    extract_serializable_annotations, apply_serializable_annotations,
-    SerializableTest)
-
-from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend
-
-from torch_mlir.passmanager import PassManager
-from torch_mlir_e2e_test.torchscript.annotations import annotate_args, export
-from torch_mlir.ir import StringAttr
-
-
-def get_module_name_for_asm_dump(module):
-    """Gets a name suitable for an assembly dump.
-    The name is not guaranteed to be unique.
-    """
-    if not "torch.debug_module_name" in module.operation.attributes:
-        return "UnnammedModule"
-    return StringAttr(
-        module.operation.attributes["torch.debug_module_name"]).value
-
-
-def get_input_annotations(inputs: tuple, dynamic: bool) -> list:
-    """TODO: Include necessary documentation"""
-
-    annotations_list = [None]
-    for i in inputs:
-        temp_list = []
-        if dynamic:
-            temp_list.append([-1 for i in range(len(i.shape))])
-        else:
-            temp_list.append(list(i.shape))
-        temp_list.append(i.dtype)
-        temp_list.append(True)
-        annotations_list.append(tuple(temp_list))
-    return annotations_list
-
-
-def run_on_refbackend(torch_module, inputs):
-    backend = refbackend.RefBackendLinalgOnTensorsBackend()
-    compiled = backend.compile(torch_module)
-    jit_module = backend.load(compiled)
-    np_inputs = [x.numpy() for x in inputs]
-    return jit_module.forward(np_inputs[0])
-
-
-def shark_jit_trace(module, input: tuple, dynamic: bool,
-                    tracing_required: bool):
-    """TODO: Include necessary documentation."""
-
-    if not tracing_required:
-        return torch.jit.script(module)
-
-    traced_module = torch.jit.trace_module(module, {"forward": input})
-    actual_script = traced_module._actual_script_module
-    export(actual_script.forward)
-    annotate_args_decorator = annotate_args(
-        get_input_annotations(input, dynamic))
-    annotate_args_decorator(actual_script.forward)
-    module = torch.jit.script(actual_script)
-
-    # TODO: remove saved annotations.pickle
-    torchscript_module_bytes = module.save_to_buffer({
-        "annotations.pkl":
-            pickle.dumps(extract_serializable_annotations(module))
-    })
-    serializable_test = SerializableTest(unique_name="",
-                                         program=torchscript_module_bytes,
-                                         trace=None)
-    _extra_files = {"annotations.pkl": ""}
-    module = torch.jit.load(io.BytesIO(serializable_test.program),
-                            _extra_files=_extra_files)
-    # Load the pickled annotations.
-    annotations = pickle.loads(_extra_files["annotations.pkl"])
-    apply_serializable_annotations(module, annotations)
-    return module
-
-
-def get_torch_mlir_module(
-    module,
-    input: tuple,
-    dynamic: bool,
-    tracing_required: bool,
-    from_aot: bool = False,
-):
-    """TODO: Include necessary documentation."""
-
-    # Tracing is not required from the aot_module.
-    if not from_aot:
-        module = shark_jit_trace(module, input, dynamic, tracing_required)
-
-    mb = ModuleBuilder()
-    class_annotator = ClassAnnotator()
-    class_annotator.exportNone(module._c._type())
-    class_annotator.exportPath(module._c._type(), ["forward"])
-    class_annotator.annotateArgs(
-        module._c._type(),
-        ["forward"],
-        get_input_annotations(input, dynamic),
-    )
-    mb.import_module(module._c, class_annotator)
-
-    with mb.module.context:
-        pm = PassManager.parse(
-            "torchscript-module-to-torch-backend-pipeline,torch-backend-to-linalg-on-tensors-backend-pipeline"
-        )
-        pm.run(mb.module)
-
-    return mb.module
--- a/tank/init.py
+++ b/tank/init.py
--- a/tank/model_utils.py
+++ b/tank/model_utils.py
@@ -1,74 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils import check_device_drivers
-
-import torch
-import numpy as np
-import torchvision.models as models
-from transformers import AutoModelForSequenceClassification, BertTokenizer, TFBertModel
-import importlib
-
-torch.manual_seed(0)
-
-##################### Hugging Face LM Models ###################################
-
-
-class HuggingFaceLanguage(torch.nn.Module):
-
-    def __init__(self, hf_model_name):
-        super().__init__()
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            hf_model_name,  # The pretrained model.
-            num_labels=
-            2,  # The number of output labels--2 for binary classification.
-            output_attentions=
-            False,  # Whether the model returns attentions weights.
-            output_hidden_states=
-            False,  # Whether the model returns all hidden-states.
-            torchscript=True,
-        )
-
-    def forward(self, tokens):
-        return self.model.forward(tokens)[0]
-
-
-def get_hf_model(name):
-    model = HuggingFaceLanguage(name)
-    # TODO: Currently the test input is set to (1,128)
-    test_input = torch.randint(2, (1, 128))
-    actual_out = model(test_input)
-    return model, test_input, actual_out
-
-
-################################################################################
-
-##################### Torch Vision Models    ###################################
-
-
-class VisionModule(torch.nn.Module):
-
-    def __init__(self, model):
-        super().__init__()
-        self.model = model
-        self.train(False)
-
-    def forward(self, input):
-        return self.model.forward(input)
-
-
-def get_vision_model(torch_model):
-    model = VisionModule(torch_model)
-    # TODO: Currently the test input is set to (1,128)
-    test_input = torch.randn(1, 3, 224, 224)
-    actual_out = model(test_input)
-    return model, test_input, actual_out
-
-################################################################################
-
-# Utility function for comparing two tensors (torch).
-def compare_tensors(torch_tensor, numpy_tensor):
-    # setting the absolute and relative tolerance
-    rtol = 1e-02
-    atol = 1e-03
-    torch_to_numpy = torch_tensor.detach().numpy()
-    return np.allclose(torch_to_numpy, numpy_tensor, rtol, atol)
-
--- a/tank/model_utils_tf.py
+++ b/tank/model_utils_tf.py
@@ -1,63 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils import check_device_drivers
-
-import tensorflow as tf
-import numpy as np
-from transformers import AutoModelForSequenceClassification, BertTokenizer, TFBertModel
-import importlib
-
-##################### Tensorflow Hugging Face LM Models ###################################
-MAX_SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-# Create a set of 2-dimensional inputs
-tf_bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
-]
-
-class TFHuggingFaceLanguage(tf.Module):
-
-    def __init__(self, hf_model_name):
-        super(TFHuggingFaceLanguage, self).__init__()
-        # Create a BERT trainer with the created network.
-        self.m = TFBertModel.from_pretrained(
-            hf_model_name, from_pt=True)
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m.predict = lambda x, y, z: self.m.call(
-            input_ids=x, attention_mask=y, token_type_ids=z, training=False)
-
-    @tf.function(input_signature=tf_bert_input)
-    def forward(self, input_ids, attention_mask, token_type_ids):
-        return self.m.predict(input_ids, attention_mask, token_type_ids)
-
-
-def get_TFhf_model(name):
-    model = TFHuggingFaceLanguage(name)
-    tokenizer = BertTokenizer.from_pretrained(
-    "microsoft/MiniLM-L12-H384-uncased")
-    text = "Replace me by any text you'd like."
-    encoded_input = tokenizer(text,
-                              padding='max_length',
-                              truncation=True,
-                              max_length=MAX_SEQUENCE_LENGTH)
-    for key in encoded_input:
-        encoded_input[key] = tf.expand_dims(
-            tf.convert_to_tensor(encoded_input[key]), 0)
-    test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
-         encoded_input["token_type_ids"])
-    actual_out = model.forward(*test_input)
-    return model, test_input, actual_out
-
-
-# Utility function for comparing two tensors (tensorflow).
-def compare_tensors_tf(tf_tensor, numpy_tensor):
-    # setting the absolute and relative tolerance
-    rtol = 1e-02
-    atol = 1e-03
-    tf_to_numpy = tf_tensor.pooler_output.numpy()
-    return np.allclose(tf_to_numpy, numpy_tensor, rtol, atol)
-
-
--- a/tank/pytorch/albert_test.py
+++ b/tank/pytorch/albert_test.py
@@ -1,92 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils import check_device_drivers
-from tank.model_utils import get_hf_model, compare_tensors
-from shark.parser import shark_args
-
-import torch
-import unittest
-import numpy as np
-import pytest
-
-#torch.manual_seed(0)
-
-class AlbertModuleTester:
-
-    def __init__(
-        self,
-        dynamic=False,
-        device="cpu",
-        save_mlir=False,
-    ):
-        self.dynamic = dynamic
-        self.device = device
-        self.save_mlir = save_mlir
-
-    def create_and_check_module(self):
-        model, input, act_out = get_hf_model("albert-base-v2")
-        shark_args.save_mlir = self.save_mlir
-        shark_module = SharkInference(model, (input,),
-                                      device=self.device,
-                                      dynamic=self.dynamic,
-                                      jit_trace=True)
-        shark_module.compile()
-        results = shark_module.forward((input,))
-        assert True == compare_tensors(act_out, results)
-
-class AlbertModuleTest(unittest.TestCase):
-    
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig): 
-        self.save_mlir = pytestconfig.getoption("save_mlir")
-    
-    def setUp(self):
-        self.module_tester = AlbertModuleTester(self)
-        self.module_tester.save_mlir = self.save_mlir
-
-    def test_module_static_cpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    @pytest.mark.xfail(reason="Language models currently failing for dynamic case")
-    def test_module_dynamic_cpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    @pytest.mark.xfail(reason="Albert model on GPU currently fails to produce torch numbers")
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_static_gpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.xfail(reason="Language models currently failing for dynamic case")
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_dynamic_gpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.xfail(reason="Static albert model on vulkan currently fails to validate.")
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_static_vulkan(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.xfail(reason="Language models currently failing for dynamic case")
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_dynamic_vulkan(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-if __name__ == '__main__':
-    unittest.main()
--- a/tank/pytorch/alexnet_test.py
+++ b/tank/pytorch/alexnet_test.py
@@ -1,90 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils import check_device_drivers
-from tank.model_utils import get_vision_model, compare_tensors
-from shark.parser import shark_args
-
-import torch
-import unittest
-import numpy as np
-import torchvision.models as models
-import pytest
-
-torch.manual_seed(0)
-
-class AlexnetModuleTester:
-
-    def __init__(
-        self,
-        dynamic=False,
-        device="cpu",
-        save_mlir=False,
-    ):
-        self.dynamic = dynamic
-        self.device = device
-        self.save_mlir = save_mlir
-
-    def create_and_check_module(self):
-        model, input, act_out = get_vision_model(models.alexnet(pretrained=True))
-        shark_args.save_mlir = self.save_mlir
-        shark_module = SharkInference(
-                model,
-                (input,),
-                device=self.device,
-                dynamic=self.dynamic,
-        )
-        shark_module.compile()
-        results = shark_module.forward((input,))
-        assert True == compare_tensors(act_out, results)
-
-class AlexnetModuleTest(unittest.TestCase):
-
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig): 
-        self.save_mlir = pytestconfig.getoption("save_mlir")
-    
-    def setUp(self):
-        self.module_tester = AlexnetModuleTester(self)
-        
-    def test_module_static_cpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    def test_module_dynamic_cpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_static_gpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_dynamic_gpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_static_vulkan(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_dynamic_vulkan(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/tank/pytorch/bert_test.py
+++ b/tank/pytorch/bert_test.py
@@ -1,91 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils import check_device_drivers
-from tank.model_utils import get_hf_model, compare_tensors
-from shark.parser import shark_args
-
-import torch
-import unittest
-import numpy as np
-import pytest
-
-#torch.manual_seed(0)
-
-class BertModuleTester:
-
-    def __init__(
-        self,
-        dynamic=False,
-        device="cpu",
-        save_mlir=False,
-    ):
-        self.dynamic = dynamic
-        self.device = device
-        self.save_mlir = save_mlir
-
-    def create_and_check_module(self):
-        model, input, act_out = get_hf_model("bert-base-uncased")
-        shark_args.save_mlir = self.save_mlir
-        shark_module = SharkInference(model, (input,),
-                                      device=self.device,
-                                      dynamic=self.dynamic,
-                                      jit_trace=True)
-        shark_module.compile()
-        results = shark_module.forward((input,))
-        assert True == compare_tensors(act_out, results)
-
-class BertModuleTest(unittest.TestCase):
-
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig): 
-        self.save_mlir = pytestconfig.getoption("save_mlir")
-    
-    def setUp(self):
-        self.module_tester = BertModuleTester(self)
-        
-    def test_module_static_cpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    @pytest.mark.xfail(reason="Language models currently failing for dynamic case")
-    def test_module_dynamic_cpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    @pytest.mark.xfail(reason="BERT model on GPU currently fails to produce torch numbers")
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_static_gpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.xfail(reason="Language models currently failing for dynamic case")
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_dynamic_gpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_static_vulkan(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.xfail(reason="Language models currently failing for dynamic case")
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_dynamic_vulkan(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/tank/pytorch/conftest.py
+++ b/tank/pytorch/conftest.py
@@ -1,3 +0,0 @@
-def pytest_addoption(parser):
-    # Attaches SHARK command-line arguments to the pytest machinery.
-    parser.addoption("--save_mlir", action="store_true", default="False", help="Pass option to save input MLIR module to /tmp/ directory.")
--- a/tank/pytorch/minilm_test.py
+++ b/tank/pytorch/minilm_test.py
@@ -1,91 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils import check_device_drivers
-from tank.model_utils import get_hf_model, compare_tensors
-from shark.parser import shark_args
-
-import torch
-import unittest
-import numpy as np
-import pytest
-
-torch.manual_seed(0)
-
-class MiniLMModuleTester:
-
-    def __init__(
-        self,
-        dynamic=False,
-        device="cpu",
-        save_mlir=False,
-    ):
-        self.dynamic = dynamic
-        self.device = device
-        self.save_mlir = save_mlir
-
-    def create_and_check_module(self):
-        model, input, act_out = get_hf_model("microsoft/MiniLM-L12-H384-uncased")
-        shark_args.save_mlir = self.save_mlir
-        shark_module = SharkInference(model, (input,),
-                                      device=self.device,
-                                      dynamic=self.dynamic,
-                                      jit_trace=True)
-        shark_module.compile()
-        results = shark_module.forward((input,))
-        assert True == compare_tensors(act_out, results)
-
-class MiniLMModuleTest(unittest.TestCase):
-
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig): 
-        self.save_mlir = pytestconfig.getoption("save_mlir")
-    
-    def setUp(self):
-        self.module_tester = MiniLMModuleTester(self)
-
-    def test_module_static_cpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    @pytest.mark.xfail(reason="language models failing for dynamic case")
-    def test_module_dynamic_cpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    @pytest.mark.xfail(reason="minilm inference on gpu currently returns invalid results")
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_static_gpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.xfail(reason="language models failing for dynamic case")
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_dynamic_gpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_static_vulkan(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.xfail(reason="language models failing for dynamic case")
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_dynamic_vulkan(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/tank/pytorch/resnet101_test.py
+++ b/tank/pytorch/resnet101_test.py
@@ -1,89 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils import check_device_drivers
-from tank.model_utils import get_vision_model, compare_tensors
-from shark.parser import shark_args
-
-import torch
-import unittest
-import numpy as np
-import torchvision.models as models
-import pytest
-
-torch.manual_seed(0)
-
-class Resnet101ModuleTester:
-
-    def __init__(
-        self,
-        dynamic=False,
-        device="cpu",
-        save_mlir=False,
-    ):
-        self.dynamic = dynamic
-        self.device = device
-        self.save_mlir = save_mlir
-
-    def create_and_check_module(self):
-        model, input, act_out = get_vision_model(models.resnet101(pretrained=True))
-        shark_args.save_mlir = self.save_mlir
-        shark_module = SharkInference(
-                model,
-                (input,),
-                device=self.device,
-                dynamic=self.dynamic,
-        )
-        shark_module.compile()
-        results = shark_module.forward((input,))
-        assert True == compare_tensors(act_out, results)
-
-class Resnet101ModuleTest(unittest.TestCase):
-
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig): 
-        self.save_mlir = pytestconfig.getoption("save_mlir")
-    
-    def setUp(self):
-        self.module_tester = Resnet101ModuleTester(self)
-        
-    def test_module_static_cpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    def test_module_dynamic_cpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_static_gpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_dynamic_gpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_static_vulkan(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_dynamic_vulkan(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-if __name__ == '__main__':
-    unittest.main()
--- a/tank/pytorch/resnet18_test.py
+++ b/tank/pytorch/resnet18_test.py
@@ -1,90 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils import check_device_drivers
-from tank.model_utils import get_vision_model, compare_tensors
-from shark.parser import shark_args
-
-import torch
-import unittest
-import numpy as np
-import torchvision.models as models
-import pytest
-
-torch.manual_seed(0)
-
-class Resnet18ModuleTester:
-
-    def __init__(
-        self,
-        dynamic=False,
-        device="cpu",
-        save_mlir=False,
-    ):
-        self.dynamic = dynamic
-        self.device = device
-        self.save_mlir = save_mlir
-
-    def create_and_check_module(self):
-        model, input, act_out = get_vision_model(models.resnet18(pretrained=True))
-        shark_args.save_mlir = self.save_mlir
-        shark_module = SharkInference(
-                model,
-                (input,),
-                device=self.device,
-                dynamic=self.dynamic,
-        )
-        shark_module.compile()
-        results = shark_module.forward((input,))
-        assert True == compare_tensors(act_out, results)
-
-class Resnet18ModuleTest(unittest.TestCase):
-
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig): 
-        self.save_mlir = pytestconfig.getoption("save_mlir")
-    
-    def setUp(self):
-        self.module_tester = Resnet18ModuleTester(self)
-        
-    def test_module_static_cpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    def test_module_dynamic_cpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_static_gpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_dynamic_gpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_static_vulkan(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_dynamic_vulkan(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/tank/pytorch/resnet50_test.py
+++ b/tank/pytorch/resnet50_test.py
@@ -1,90 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils import check_device_drivers
-from tank.model_utils import get_vision_model, compare_tensors
-from shark.parser import shark_args
-
-import torch
-import unittest
-import numpy as np
-import torchvision.models as models
-import pytest
-
-torch.manual_seed(0)
-
-class Resnet50ModuleTester:
-
-    def __init__(
-        self,
-        dynamic=False,
-        device="cpu",
-        save_mlir=False,
-    ):
-        self.dynamic = dynamic
-        self.device = device
-        self.save_mlir = save_mlir
-
-    def create_and_check_module(self):
-        model, input, act_out = get_vision_model(models.resnet50(pretrained=True))
-        shark_args.save_mlir = self.save_mlir
-        shark_module = SharkInference(
-                model,
-                (input,),
-                device=self.device,
-                dynamic=self.dynamic,
-        )
-        shark_module.compile()
-        results = shark_module.forward((input,))
-        assert True == compare_tensors(act_out, results)
-
-class Resnet50ModuleTest(unittest.TestCase):
-
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig): 
-        self.save_mlir = pytestconfig.getoption("save_mlir")
-    
-    def setUp(self):
-        self.module_tester = Resnet50ModuleTester(self)
-        
-    def test_module_static_cpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    def test_module_dynamic_cpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_static_gpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_dynamic_gpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_static_vulkan(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_dynamic_vulkan(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/tank/pytorch/squeezenet_test.py
+++ b/tank/pytorch/squeezenet_test.py
@@ -1,90 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils import check_device_drivers
-from tank.model_utils import get_vision_model, compare_tensors
-from shark.parser import shark_args
-
-import torch
-import unittest
-import numpy as np
-import torchvision.models as models
-import pytest
-
-torch.manual_seed(0)
-
-class SqueezenetModuleTester:
-
-    def __init__(
-        self,
-        dynamic=False,
-        device="cpu",
-        save_mlir=False
-    ):
-        self.dynamic = dynamic
-        self.device = device
-        self.save_mlir = save_mlir
-
-    def create_and_check_module(self):
-        model, input, act_out = get_vision_model(models.squeezenet1_0(pretrained=True))
-        shark_args.save_mlir = self.save_mlir
-        shark_module = SharkInference(
-                model,
-                (input,),
-                device=self.device,
-                dynamic=self.dynamic,
-        )
-        shark_module.compile()
-        results = shark_module.forward((input,))
-        assert True == compare_tensors(act_out, results)
-
-class SqueezenetModuleTest(unittest.TestCase):
-
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig): 
-        self.save_mlir = pytestconfig.getoption("save_mlir")
-    
-    def setUp(self):
-        self.module_tester = SqueezenetModuleTester(self)
-        
-    def test_module_static_cpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    def test_module_dynamic_cpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_static_gpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_dynamic_gpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_static_vulkan(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_dynamic_vulkan(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/tank/pytorch/v_diffusion/cfg_sample.py
+++ b/tank/pytorch/v_diffusion/cfg_sample.py
@@ -1,111 +0,0 @@
-import argparse
-import os
-from functools import partial
-
-import clip
-import torch
-from torchvision import transforms
-from tqdm import trange
-
-try:
-    from diffusion import get_model, sampling, utils
-except ModuleNotFoundError:
-    print(
-        "You need to download v-diffusion source from https://github.com/crowsonkb/v-diffusion-pytorch"
-    )
-    raise
-
-torch.manual_seed(0)
-
-
-def parse_prompt(prompt, default_weight=3.0):
-    if prompt.startswith("http://") or prompt.startswith("https://"):
-        vals = prompt.rsplit(":", 2)
-        vals = [vals[0] + ":" + vals[1], *vals[2:]]
-    else:
-        vals = prompt.rsplit(":", 1)
-    vals = vals + ["", default_weight][len(vals) :]
-    return vals[0], float(vals[1])
-
-
-args = argparse.Namespace(
-    prompts=["New York City, oil on canvas"],
-    batch_size=1,
-    device="cuda",
-    model="cc12m_1_cfg",
-    n=1,
-    steps=10,
-)
-
-device = torch.device(args.device)
-print("Using device:", device)
-
-model = get_model(args.model)()
-_, side_y, side_x = model.shape
-checkpoint = f"{args.model}.pth"
-if os.path.exists(checkpoint):
-    model.load_state_dict(torch.load(checkpoint, map_location="cpu"))
-
-model = model.to(device).eval().requires_grad_(False)
-clip_model_name = model.clip_model if hasattr(model, "clip_model") else "ViT-B/16"
-clip_model = clip.load(clip_model_name, jit=False, device=device)[0]
-clip_model.eval().requires_grad_(False)
-normalize = transforms.Normalize(
-    mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]
-)
-
-zero_embed = torch.zeros([1, clip_model.visual.output_dim], device=device)
-target_embeds, weights = [zero_embed], []
-
-txt, weight = parse_prompt(args.prompts[0])
-target_embeds.append(clip_model.encode_text(clip.tokenize(txt).to(device)).float())
-weights.append(weight)
-
-weights = torch.tensor([1 - sum(weights), *weights], device=device)
-
-
-def cfg_model_fn(model, x, t):
-    n = x.shape[0]
-    n_conds = len(target_embeds)
-    x_in = x.repeat([n_conds, 1, 1, 1])
-    t_in = t.repeat([n_conds])
-    clip_embed_in = torch.cat([*target_embeds]).repeat_interleave(n, 0)
-    vs = model(x_in, t_in, clip_embed_in).view([n_conds, n, *x.shape[1:]])
-    v = vs.mul(weights[:, None, None, None, None]).sum(0)
-    return v
-
-
-x = torch.randn([args.n, 3, side_y, side_x], device=device)
-t = torch.linspace(1, 0, args.steps + 1, device=device)[:-1]
-
-
-def repro(model):
-    if device.type == "cuda":
-        model = model.half()
-
-    steps = utils.get_spliced_ddpm_cosine_schedule(t)
-    for i in trange(0, args.n, args.batch_size):
-        cur_batch_size = min(args.n - i, args.batch_size)
-        outs = sampling.plms_sample(
-            partial(cfg_model_fn, model), x[i : i + cur_batch_size], steps, {}
-        )
-        for j, out in enumerate(outs):
-            utils.to_pil_image(out).save(f"out_{i + j:05}.png")
-
-
-def trace(model, x, t):
-    n = x.shape[0]
-    n_conds = len(target_embeds)
-    x_in = x.repeat([n_conds, 1, 1, 1])
-    t_in = t.repeat([n_conds])
-    clip_embed_in = torch.cat([*target_embeds]).repeat_interleave(n, 0)
-    ts_mod = torch.jit.trace(model, (x_in, t_in, clip_embed_in))
-    print(ts_mod.graph)
-
-    clip_model = clip.load(clip_model_name, jit=True, device=device)[0]
-    print(clip_model.graph)
-
-
-# You can't run both of these because repro will `.half()` the model
-# repro(model)
-trace(model, x, t[0])
--- a/tank/pytorch/v_diffusion/out_00000.png
+++ b/tank/pytorch/v_diffusion/out_00000.png
--- a/tank/pytorch/wide_resnet50_test.py
+++ b/tank/pytorch/wide_resnet50_test.py
@@ -1,90 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils import check_device_drivers
-from tank.model_utils import get_vision_model, compare_tensors
-from shark.parser import shark_args
-
-import torch
-import unittest
-import numpy as np
-import torchvision.models as models
-import pytest
-
-torch.manual_seed(0)
-
-class WideResnet50ModuleTester:
-
-    def __init__(
-        self,
-        dynamic=False,
-        device="cpu",
-        save_mlir=False,
-    ):
-        self.dynamic = dynamic
-        self.device = device
-        self.save_mlir = save_mlir
-
-    def create_and_check_module(self):
-        model, input, act_out = get_vision_model(models.wide_resnet50_2(pretrained=True))
-        shark_args.save_mlir = self.save_mlir
-        shark_module = SharkInference(
-                model,
-                (input,),
-                device=self.device,
-                dynamic=self.dynamic,
-        )
-        shark_module.compile()
-        results = shark_module.forward((input,))
-        assert True == compare_tensors(act_out, results)
-
-class WideResnet50ModuleTest(unittest.TestCase):
-
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig): 
-        self.save_mlir = pytestconfig.getoption("save_mlir")
-    
-    def setUp(self):
-        self.module_tester = WideResnet50ModuleTester(self)
-        
-    def test_module_static_cpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    def test_module_dynamic_cpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "cpu"
-        self.module_tester.create_and_check_module()
-    
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_static_gpu(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(check_device_drivers("gpu"), reason="nvidia-smi not found")
-    def test_module_dynamic_gpu(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "gpu"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_static_vulkan(self):
-        self.module_tester.dynamic = False
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-    @pytest.mark.skipif(
-            check_device_drivers("vulkan"),
-            reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )
-    def test_module_dynamic_vulkan(self):
-        self.module_tester.dynamic = True
-        self.module_tester.device = "vulkan"
-        self.module_tester.create_and_check_module()
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/tank/tf/README.md
+++ b/tank/tf/README.md
@@ -1,15 +0,0 @@
-## Running SharkInference on CPUs, GPUs and MAC.
-
-
-### Run the binary sequence_classification.
-#### The models supported are: [hugging face sequence classification](https://huggingface.co/docs/transformers/model_doc/auto#transformers.TFAutoModelForSequenceClassification)
-```shell
-./seq_classification.py --hf_model_name="hf_model" --device="cpu" # Use gpu | vulkan
-```
-
-Once the model is compiled to run on the device mentioned, we can pass in text and 
-get the logits.
-
-
-
-
--- a/tank/tf/automodelmaskedlm.py
+++ b/tank/tf/automodelmaskedlm.py
@@ -1,47 +0,0 @@
-from transformers import TFAutoModelForMaskedLM
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-
-# Create a set of input signature.
-inputs_signature = [
-    tf.TensorSpec(shape=[1, 512], dtype=tf.int32),
-]
-
-
-class AutoModelMaskedLM(tf.Module):
-
-    def __init__(self, model_name):
-        super(AutoModelMaskedLM, self).__init__()
-        self.m = TFAutoModelForMaskedLM.from_pretrained(model_name,
-                                                        output_attentions=False)
-        self.m.predict = lambda x: self.m(input_ids=x)
-
-    @tf.function(input_signature=inputs_signature)
-    def forward(self, input_ids):
-        return self.m.predict(input_ids)
-
-
-fail_models = ["microsoft/deberta-base", "google/rembert", "google/tapas-base"]
-
-supported_models = [
-    "albert-base-v2", "bert-base-uncased", "camembert-base",
-    "dbmdz/convbert-base-turkish-cased", "distilbert-base-uncased",
-    "google/electra-small-discriminator",
-    "hf-internal-testing/tiny-random-flaubert", "funnel-transformer/small",
-    "microsoft/layoutlm-base-uncased", "allenai/longformer-base-4096",
-    "google/mobilebert-uncased", "microsoft/mpnet-base", "roberta-base",
-    "xlm-roberta-base"
-]
-
-if __name__ == "__main__":
-    inputs = tf.random.uniform(shape=[1, 512],
-                               maxval=3,
-                               dtype=tf.int32,
-                               seed=10)
-
-    for model_name in supported_models:
-        print(f"Running model: {model_name}")
-        shark_module = SharkInference(AutoModelMaskedLM(model_name), (inputs,))
-        shark_module.set_frontend("tensorflow")
-        shark_module.compile()
-        print(shark_module.forward((inputs,)))
--- a/tank/tf/bert_large_gen.py
+++ b/tank/tf/bert_large_gen.py
@@ -1,90 +0,0 @@
-from iree import runtime as ireert
-from iree.tf.support import module_utils
-from iree.compiler import tf as tfc
-import sys
-from absl import app
-
-import numpy as np
-import os
-import tempfile
-import tensorflow as tf
-
-from official.nlp.modeling import layers
-from official.nlp.modeling import networks
-from official.nlp.modeling.models import bert_classifier
-
-vocab_size = 100
-NUM_CLASSES = 5
-SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-# Create a set of 2-dimensional inputs
-bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
-]
-
-
-class BertModule(tf.Module):
-
-    def __init__(self):
-        super(BertModule, self).__init__()
-        dict_outputs = False
-        test_network = networks.BertEncoder(vocab_size=vocab_size,
-                                            num_layers=24,
-                                            hidden_size=1024,
-                                            num_attention_heads=16,
-                                            dict_outputs=dict_outputs)
-
-        # Create a BERT trainer with the created network.
-        bert_trainer_model = bert_classifier.BertClassifier(
-            test_network, num_classes=NUM_CLASSES)
-        bert_trainer_model.summary()
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m = bert_trainer_model
-        self.m.predict = lambda x: self.m.call(x, training=False)
-        self.m.learn = lambda x, y: self.m.call(x, training=False)
-        self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
-        self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
-
-    @tf.function(input_signature=[
-        tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
-                      dtype=tf.int32),  #input0: input_word_ids
-        tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
-                      dtype=tf.int32),  #input1: input_mask
-        tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
-                      dtype=tf.int32),  #input2: segment_ids
-        tf.TensorSpec([BATCH_SIZE], tf.int32)  # input3: labels
-    ])
-    def learn(self, input_word_ids, input_mask, segment_ids, labels):
-        with tf.GradientTape() as tape:
-            # Capture the gradients from forward prop...
-            inputs = [input_word_ids, input_mask, segment_ids]
-            probs = self.m(inputs, training=True)
-            loss = self.loss(labels, probs)
-
-        # ...and use them to update the model's weights.
-        variables = self.m.trainable_variables
-        gradients = tape.gradient(loss, variables)
-        self.optimizer.apply_gradients(zip(gradients, variables))
-        return loss
-
-    @tf.function(input_signature=bert_input)
-    def predict(self, input_word_ids, input_mask, segment_ids):
-        inputs = [input_word_ids, input_mask, segment_ids]
-        return self.m.predict(inputs)
-
-
-if __name__ == "__main__":
-    # BertModule()
-    # Compile the model using IREE
-    compiler_module = tfc.compile_module(BertModule(),
-                                         exported_names=["learn"],
-                                         import_only=True)
-    # Save module as MLIR file in a directory
-    ARITFACTS_DIR = os.getcwd()
-    mlir_path = os.path.join(ARITFACTS_DIR, "model.mlir")
-    with open(mlir_path, "wt") as output_file:
-        output_file.write(compiler_module.decode('utf-8'))
-    print(f"Wrote MLIR to path '{mlir_path}'")
--- a/tank/tf/bert_large_run.py
+++ b/tank/tf/bert_large_run.py
@@ -1,123 +0,0 @@
-from iree import runtime as ireert
-from iree.tf.support import module_utils
-from iree.compiler import tf as tfc
-from iree.compiler import compile_str
-import sys
-from absl import app
-import time
-
-import numpy as np
-import os
-import tempfile
-import tensorflow as tf
-
-from official.nlp.modeling import layers
-from official.nlp.modeling import networks
-from official.nlp.modeling.models import bert_classifier
-
-vocab_size = 100
-NUM_CLASSES = 5
-SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-# Create a set of 2-dimensional inputs
-bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
-]
-
-
-class BertModule(tf.Module):
-
-    def __init__(self):
-        super(BertModule, self).__init__()
-        dict_outputs = False
-        test_network = networks.BertEncoder(vocab_size=vocab_size,
-                                            num_layers=24,
-                                            hidden_size=1024,
-                                            num_attention_heads=16,
-                                            dict_outputs=dict_outputs)
-
-        # Create a BERT trainer with the created network.
-        bert_trainer_model = bert_classifier.BertClassifier(
-            test_network, num_classes=NUM_CLASSES)
-        bert_trainer_model.summary()
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m = bert_trainer_model
-        self.m.predict = lambda x: self.m.call(x, training=False)
-        self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
-        self.m.learn = lambda x, y: self.m.call(x, training=False)
-        self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
-        self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
-
-    @tf.function(input_signature=[
-        bert_input,  # inputs
-        tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32)  # labels
-    ])
-    def learn(self, inputs, labels):
-        with tf.GradientTape() as tape:
-            # Capture the gradients from forward prop...
-            probs = self.m(inputs, training=True)
-            loss = self.loss(labels, probs)
-
-        # ...and use them to update the model's weights.
-        variables = self.m.trainable_variables
-        gradients = tape.gradient(loss, variables)
-        self.optimizer.apply_gradients(zip(gradients, variables))
-        return loss
-
-
-if __name__ == "__main__":
-    # BertModule()
-    # Compile the model using IREE
-    compiler_module = tfc.compile_module(BertModule(),
-                                         exported_names=["learn"],
-                                         import_only=True)
-
-    # Compile the model using IREE
-    backend = "dylib-llvm-aot"
-    args = [
-        "--iree-llvm-target-cpu-features=host",
-        "--iree-mhlo-demote-i64-to-i32=false",
-        "--iree-stream-resource-index-bits=64", "--iree-vm-target-index-bits=64"
-    ]
-    backend_config = "dylib"
-    #backend = "cuda"
-    #backend_config = "cuda"
-    #args = ["--iree-cuda-llvm-target-arch=sm_80", "--iree-hal-cuda-disable-loop-nounroll-wa", "--iree-enable-fusion-with-reduction-ops"]
-    flatbuffer_blob = compile_str(compiler_module,
-                                  target_backends=[backend],
-                                  extra_args=args,
-                                  input_type="mhlo")
-    #flatbuffer_blob = compile_str(compiler_module, target_backends=["dylib-llvm-aot"])
-
-    # Save module as MLIR file in a directory
-    vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
-    tracer = ireert.Tracer(os.getcwd())
-    config = ireert.Config("dylib", tracer)
-    ctx = ireert.SystemContext(config=config)
-    ctx.add_vm_module(vm_module)
-    BertCompiled = ctx.modules.module
-    predict_sample_input = [
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
-    ]
-    learn_sample_input = [
-        predict_sample_input,
-        np.random.randint(5, size=(BATCH_SIZE))
-    ]
-    warmup = 5
-    total_iter = 10
-    num_iter = total_iter - warmup
-    for i in range(10):
-        if (i == warmup - 1):
-            start = time.time()
-        print(
-            BertCompiled.learn(predict_sample_input,
-                               np.random.randint(5, size=(BATCH_SIZE))))
-    end = time.time()
-    total_time = end - start
-    print("time: " + str(total_time))
-    print("time/iter: " + str(total_time / num_iter))
--- a/tank/tf/bert_large_tf.py
+++ b/tank/tf/bert_large_tf.py
@@ -1,85 +0,0 @@
-import numpy as np
-import os
-import tempfile
-import tensorflow as tf
-import time
-
-from official.nlp.modeling import layers
-from official.nlp.modeling import networks
-from official.nlp.modeling.models import bert_classifier
-
-vocab_size = 100
-NUM_CLASSES = 5
-SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-# Create a set of 2-dimensional inputs
-bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
-]
-
-
-class BertModule(tf.Module):
-
-    def __init__(self):
-        super(BertModule, self).__init__()
-        dict_outputs = False
-        test_network = networks.BertEncoder(vocab_size=vocab_size,
-                                            num_layers=24,
-                                            hidden_size=1024,
-                                            num_attention_heads=16,
-                                            dict_outputs=dict_outputs)
-
-        # Create a BERT trainer with the created network.
-        bert_trainer_model = bert_classifier.BertClassifier(
-            test_network, num_classes=NUM_CLASSES)
-        bert_trainer_model.summary()
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m = bert_trainer_model
-        self.m.predict = lambda x: self.m.call(x, training=False)
-        self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
-        self.m.learn = lambda x, y: self.m.call(x, training=False)
-        self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
-        self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
-
-    @tf.function(input_signature=[
-        bert_input,  # inputs
-        tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32)  # labels
-    ])
-    def learn(self, inputs, labels):
-        with tf.GradientTape() as tape:
-            # Capture the gradients from forward prop...
-            probs = self.m(inputs, training=True)
-            loss = self.loss(labels, probs)
-
-        # ...and use them to update the model's weights.
-        variables = self.m.trainable_variables
-        gradients = tape.gradient(loss, variables)
-        self.optimizer.apply_gradients(zip(gradients, variables))
-        return loss
-
-
-if __name__ == "__main__":
-    # BertModule()
-    predict_sample_input = [
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
-    ]
-    bert_model = BertModule()
-    warmup = 1
-    total_iter = 10
-    num_iter = total_iter - warmup
-    for i in range(total_iter):
-        print(
-            bert_model.learn(predict_sample_input,
-                             np.random.randint(5, size=(BATCH_SIZE))))
-        if (i == warmup - 1):
-            start = time.time()
-
-    end = time.time()
-    total_time = end - start
-    print("time: " + str(total_time))
-    print("time/iter: " + str(total_time / num_iter))
--- a/tank/tf/bert_small_gen.py
+++ b/tank/tf/bert_small_gen.py
@@ -1,89 +0,0 @@
-from iree import runtime as ireert
-#from iree.tf.support import module_utils
-from iree.compiler import tf as tfc
-import sys
-from absl import app
-
-import numpy as np
-import os
-import tempfile
-import tensorflow as tf
-
-from official.nlp.modeling import layers
-from official.nlp.modeling import networks
-from official.nlp.modeling.models import bert_classifier
-
-vocab_size = 100
-NUM_CLASSES = 5
-SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-# Create a set of 2-dimensional inputs
-bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
-]
-
-
-class BertModule(tf.Module):
-
-    def __init__(self):
-        super(BertModule, self).__init__()
-        dict_outputs = False
-        test_network = networks.BertEncoder(vocab_size=vocab_size,
-                                            num_layers=2,
-                                            dict_outputs=dict_outputs)
-
-        # Create a BERT trainer with the created network.
-        bert_trainer_model = bert_classifier.BertClassifier(
-            test_network, num_classes=NUM_CLASSES)
-        bert_trainer_model.summary()
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m = bert_trainer_model
-        self.m.predict = lambda x: self.m.call(x, training=False)
-        self.m.learn = lambda x, y: self.m.call(x, training=False)
-        self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
-        self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
-
-    @tf.function(input_signature=[
-        tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
-                      dtype=tf.int32),  #input0: input_word_ids
-        tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
-                      dtype=tf.int32),  #input1: input_mask
-        tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH],
-                      dtype=tf.int32),  #input2: segment_ids
-        tf.TensorSpec([BATCH_SIZE], tf.int32)  # input3: labels
-    ])
-    def learn(self, input_word_ids, input_mask, segment_ids, labels):
-        with tf.GradientTape() as tape:
-            # Capture the gradients from forward prop...
-            inputs = [input_word_ids, input_mask, segment_ids]
-            probs = self.m(inputs, training=True)
-            loss = self.loss(labels, probs)
-
-        # ...and use them to update the model's weights.
-        variables = self.m.trainable_variables
-        gradients = tape.gradient(loss, variables)
-        self.optimizer.apply_gradients(zip(gradients, variables))
-        return loss
-
-    @tf.function(input_signature=bert_input)
-    def predict(self, input_word_ids, input_mask, segment_ids):
-        inputs = [input_word_ids, input_mask, segment_ids]
-        return self.m.predict(inputs)
-
-
-if __name__ == "__main__":
-    # BertModule()
-    # Compile the model using IREE
-    compiler_module = tfc.compile_module(BertModule(),
-                                         exported_names=["learn"],
-                                         import_only=True)
-    print(type(compiler_module))
-    # Save module as MLIR file in a directory
-    ARITFACTS_DIR = os.getcwd()
-    mlir_path = os.path.join(ARITFACTS_DIR, "model.mlir")
-    with open(mlir_path, "wt") as output_file:
-        output_file.write(compiler_module.decode('utf-8'))
-    print(f"Wrote MLIR to path '{mlir_path}'")
--- a/tank/tf/bert_small_run.py
+++ b/tank/tf/bert_small_run.py
@@ -1,120 +0,0 @@
-from iree import runtime as ireert
-from iree.tf.support import module_utils
-from iree.compiler import tf as tfc
-from iree.compiler import compile_str
-import sys
-from absl import app
-import time
-
-import numpy as np
-import os
-import tempfile
-import tensorflow as tf
-
-from official.nlp.modeling import layers
-from official.nlp.modeling import networks
-from official.nlp.modeling.models import bert_classifier
-
-vocab_size = 100
-NUM_CLASSES = 5
-SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-# Create a set of 2-dimensional inputs
-bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
-]
-
-
-class BertModule(tf.Module):
-
-    def __init__(self):
-        super(BertModule, self).__init__()
-        dict_outputs = False
-        test_network = networks.BertEncoder(vocab_size=vocab_size,
-                                            num_layers=2,
-                                            dict_outputs=dict_outputs)
-
-        # Create a BERT trainer with the created network.
-        bert_trainer_model = bert_classifier.BertClassifier(
-            test_network, num_classes=NUM_CLASSES)
-        bert_trainer_model.summary()
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m = bert_trainer_model
-        self.m.predict = lambda x: self.m.call(x, training=False)
-        self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
-        self.m.learn = lambda x, y: self.m.call(x, training=False)
-        self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
-        self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
-
-    @tf.function(input_signature=[
-        bert_input,  # inputs
-        tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32)  # labels
-    ])
-    def learn(self, inputs, labels):
-        with tf.GradientTape() as tape:
-            # Capture the gradients from forward prop...
-            probs = self.m(inputs, training=True)
-            loss = self.loss(labels, probs)
-
-        # ...and use them to update the model's weights.
-        variables = self.m.trainable_variables
-        gradients = tape.gradient(loss, variables)
-        self.optimizer.apply_gradients(zip(gradients, variables))
-        return loss
-
-
-if __name__ == "__main__":
-    # BertModule()
-    # Compile the model using IREE
-    compiler_module = tfc.compile_module(BertModule(),
-                                         exported_names=["learn"],
-                                         import_only=True)
-
-    # Compile the model using IREE
-    backend = "dylib-llvm-aot"
-    args = [
-        "--iree-llvm-target-cpu-features=host",
-        "--iree-mhlo-demote-i64-to-i32=false", "--iree-flow-demote-i64-to-i32"
-    ]
-    backend_config = "dylib"
-    #backend = "cuda"
-    #backend_config = "cuda"
-    #args = ["--iree-cuda-llvm-target-arch=sm_80", "--iree-hal-cuda-disable-loop-nounroll-wa", "--iree-enable-fusion-with-reduction-ops"]
-    flatbuffer_blob = compile_str(compiler_module,
-                                  target_backends=[backend],
-                                  extra_args=args,
-                                  input_type="mhlo")
-    #flatbuffer_blob = compile_str(compiler_module, target_backends=["dylib-llvm-aot"])
-
-    # Save module as MLIR file in a directory
-    vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
-    tracer = ireert.Tracer(os.getcwd())
-    config = ireert.Config("dylib", tracer)
-    ctx = ireert.SystemContext(config=config)
-    ctx.add_vm_module(vm_module)
-    BertCompiled = ctx.modules.module
-    predict_sample_input = [
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
-    ]
-    learn_sample_input = [
-        predict_sample_input,
-        np.random.randint(5, size=(BATCH_SIZE))
-    ]
-    warmup = 5
-    total_iter = 10
-    num_iter = total_iter - warmup
-    for i in range(10):
-        if (i == warmup - 1):
-            start = time.time()
-        print(
-            BertCompiled.learn(predict_sample_input,
-                               np.random.randint(5, size=(BATCH_SIZE))))
-    end = time.time()
-    total_time = end - start
-    print("time: " + str(total_time))
-    print("time/iter: " + str(total_time / num_iter))
--- a/tank/tf/bert_small_tf_run.py
+++ b/tank/tf/bert_small_tf_run.py
@@ -1,83 +0,0 @@
-import numpy as np
-import os
-import tempfile
-import tensorflow as tf
-import time
-
-from official.nlp.modeling import layers
-from official.nlp.modeling import networks
-from official.nlp.modeling.models import bert_classifier
-
-vocab_size = 100
-NUM_CLASSES = 5
-SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-# Create a set of 2-dimensional inputs
-bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
-]
-
-
-class BertModule(tf.Module):
-
-    def __init__(self):
-        super(BertModule, self).__init__()
-        dict_outputs = False
-        test_network = networks.BertEncoder(vocab_size=vocab_size,
-                                            num_layers=2,
-                                            dict_outputs=dict_outputs)
-
-        # Create a BERT trainer with the created network.
-        bert_trainer_model = bert_classifier.BertClassifier(
-            test_network, num_classes=NUM_CLASSES)
-        bert_trainer_model.summary()
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m = bert_trainer_model
-        self.m.predict = lambda x: self.m.call(x, training=False)
-        self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
-        self.m.learn = lambda x, y: self.m.call(x, training=False)
-        self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
-        self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)
-
-    @tf.function(input_signature=[
-        bert_input,  # inputs
-        tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32)  # labels
-    ])
-    def learn(self, inputs, labels):
-        with tf.GradientTape() as tape:
-            # Capture the gradients from forward prop...
-            probs = self.m(inputs, training=True)
-            loss = self.loss(labels, probs)
-
-        # ...and use them to update the model's weights.
-        variables = self.m.trainable_variables
-        gradients = tape.gradient(loss, variables)
-        self.optimizer.apply_gradients(zip(gradients, variables))
-        return loss
-
-
-if __name__ == "__main__":
-    # BertModule()
-    predict_sample_input = [
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
-    ]
-    bert_model = BertModule()
-    warmup = 1
-    total_iter = 10
-    num_iter = total_iter - warmup
-    for i in range(total_iter):
-        print(
-            bert_model.learn(predict_sample_input,
-                             np.random.randint(5, size=(BATCH_SIZE))))
-        if (i == warmup - 1):
-            start = time.time()
-
-    end = time.time()
-    total_time = end - start
-    print("time: " + str(total_time))
-    print("time/iter: " + str(total_time / num_iter))
--- a/tank/tf/huggingface_MiniLM_gen.py
+++ b/tank/tf/huggingface_MiniLM_gen.py
@@ -1,52 +0,0 @@
-from iree import runtime as ireert
-from iree.compiler import tf as tfc
-import sys
-from absl import app
-
-import numpy as np
-import os
-import tempfile
-import tensorflow as tf
-
-from transformers import BertModel, BertTokenizer, TFBertModel
-
-SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-# Create a set of 2-dimensional inputs
-bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, SEQUENCE_LENGTH], dtype=tf.int32)
-]
-
-
-class BertModule(tf.Module):
-
-    def __init__(self):
-        super(BertModule, self).__init__()
-        # Create a BERT trainer with the created network.
-        self.m = TFBertModel.from_pretrained(
-            "microsoft/MiniLM-L12-H384-uncased", from_pt=True)
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m.predict = lambda x, y, z: self.m.call(
-            input_ids=x, attention_mask=y, token_type_ids=z, training=False)
-
-    @tf.function(input_signature=bert_input)
-    def predict(self, input_word_ids, input_mask, segment_ids):
-        return self.m.predict(input_word_ids, input_mask, segment_ids)
-
-
-if __name__ == "__main__":
-    # BertModule()
-    # Compile the model using IREE
-    compiler_module = tfc.compile_module(BertModule(),
-                                         exported_names=["predict"],
-                                         import_only=True)
-    # Save module as MLIR file in a directory
-    ARITFACTS_DIR = os.getcwd()
-    mlir_path = os.path.join(ARITFACTS_DIR, "model.mlir")
-    with open(mlir_path, "wt") as output_file:
-        output_file.write(compiler_module.decode('utf-8'))
-    print(f"Wrote MLIR to path '{mlir_path}'")
--- a/tank/tf/huggingface_MiniLM_run.py
+++ b/tank/tf/huggingface_MiniLM_run.py
@@ -1,87 +0,0 @@
-from iree import runtime as ireert
-from iree.compiler import tf as tfc
-from iree.compiler import compile_str
-import sys
-from absl import app
-
-import numpy as np
-import os
-import tempfile
-import tensorflow as tf
-
-import time
-from transformers import BertModel, BertTokenizer, TFBertModel
-
-MAX_SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-# Create a set of 2-dimensional inputs
-bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
-]
-
-
-class BertModule(tf.Module):
-
-    def __init__(self):
-        super(BertModule, self).__init__()
-        # Create a BERT trainer with the created network.
-        self.m = TFBertModel.from_pretrained(
-            "microsoft/MiniLM-L12-H384-uncased", from_pt=True)
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m.predict = lambda x, y, z: self.m.call(
-            input_ids=x, attention_mask=y, token_type_ids=z, training=False)
-
-    @tf.function(input_signature=bert_input)
-    def predict(self, input_ids, attention_mask, token_type_ids):
-        return self.m.predict(input_ids, attention_mask, token_type_ids)
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    tokenizer = BertTokenizer.from_pretrained(
-        "microsoft/MiniLM-L12-H384-uncased")
-    text = "Replace me by any text you'd like."
-    encoded_input = tokenizer(text,
-                              padding='max_length',
-                              truncation=True,
-                              max_length=MAX_SEQUENCE_LENGTH)
-    for key in encoded_input:
-        encoded_input[key] = tf.expand_dims(
-            tf.convert_to_tensor(encoded_input[key]), 0)
-
-    # Compile the model using IREE
-    compiler_module = tfc.compile_module(BertModule(),
-                                         exported_names=["predict"],
-                                         import_only=True)
-
-    # Compile the model using IREE
-    backend = "dylib-llvm-aot"
-    args = [
-        "--iree-llvm-target-cpu-features=host",
-        "--iree-mhlo-demote-i64-to-i32=false", "--iree-flow-demote-i64-to-i32"
-    ]
-    backend_config = "dylib"
-    #backend = "cuda"
-    #backend_config = "cuda"
-    #args = ["--iree-cuda-llvm-target-arch=sm_80", "--iree-hal-cuda-disable-loop-nounroll-wa", "--iree-enable-fusion-with-reduction-ops"]
-    flatbuffer_blob = compile_str(compiler_module,
-                                  target_backends=[backend],
-                                  extra_args=args,
-                                  input_type="mhlo")
-    #flatbuffer_blob = compile_str(compiler_module, target_backends=["dylib-llvm-aot"])
-
-    # Save module as MLIR file in a directory
-    vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
-    tracer = ireert.Tracer(os.getcwd())
-    config = ireert.Config("dylib", tracer)
-    ctx = ireert.SystemContext(config=config)
-    ctx.add_vm_module(vm_module)
-    BertCompiled = ctx.modules.module
-    result = BertCompiled.predict(encoded_input["input_ids"],
-                                  encoded_input["attention_mask"],
-                                  encoded_input["token_type_ids"])
-    print(result)
--- a/tank/tf/huggingface_MiniLM_tf.py
+++ b/tank/tf/huggingface_MiniLM_tf.py
@@ -1,18 +0,0 @@
-import tensorflow as tf
-from transformers import BertModel, BertTokenizer, TFBertModel
-
-tf_model = TFBertModel.from_pretrained("microsoft/MiniLM-L12-H384-uncased",
-                                       from_pt=True)
-tokenizer = BertTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
-
-text = "Replace me by any text you'd like."
-encoded_input = tokenizer(text,
-                          padding='max_length',
-                          truncation=True,
-                          max_length=512)
-for key in encoded_input:
-    encoded_input[key] = tf.expand_dims(
-        tf.convert_to_tensor(encoded_input[key]), 0)
-output = tf_model(encoded_input)
-
-print(output)
--- a/tank/tf/minilm_tf_test.py
+++ b/tank/tf/minilm_tf_test.py
@@ -1,99 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils import check_device_drivers
-from tank.model_utils_tf import get_TFhf_model, compare_tensors_tf
-
-import tensorflow as tf
-import unittest
-import numpy as np
-import pytest
-
-MAX_SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-#Create a set of 2-dimensional inputs
-tf_bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
-]
-
-
-class MiniLMTFModuleTester:
-
-    def create_and_check_module(self, dynamic, device):
-        model, input, act_out = get_TFhf_model(
-            "microsoft/MiniLM-L12-H384-uncased")
-        shark_module = SharkInference(model, (input,),
-                                      device=device,
-                                      dynamic=dynamic,
-                                      jit_trace=True)
-        shark_module.set_frontend("tensorflow")
-        shark_module.compile()
-        results = shark_module.forward((input))
-        assert True == compare_tensors_tf(act_out, results)
-
-
-class MiniLMTFModuleTest(unittest.TestCase):
-
-    def setUp(self):
-        self.module_tester = MiniLMTFModuleTester()
-
-    @pytest.mark.skip(reason="TF testing temporarily unavailable.")
-    def test_module_static_cpu(self):
-        dynamic = False
-        device = "cpu"
-        self.module_tester.create_and_check_module(dynamic, device)
-
-    @pytest.mark.skip(reason="TF testing temporarily unavailable.")
-    @pytest.mark.xfail(
-        reason="Language models currently failing for dynamic case")
-    def test_module_dynamic_cpu(self):
-        dynamic = True
-        device = "cpu"
-        self.module_tester.create_and_check_module(dynamic, device)
-
-    @pytest.mark.skip(reason="TF testing temporarily unavailable.")
-    @pytest.mark.skipif(check_device_drivers("gpu"),
-                        reason="nvidia-smi not found")
-    def test_module_static_gpu(self):
-        dynamic = False
-        device = "gpu"
-        self.module_tester.create_and_check_module(dynamic, device)
-
-    @pytest.mark.skip(reason="TF testing temporarily unavailable.")
-    @pytest.mark.xfail(
-        reason="Language models currently failing for dynamic case")
-    @pytest.mark.skipif(check_device_drivers("gpu"),
-                        reason="nvidia-smi not found")
-    def test_module_dynamic_gpu(self):
-        dynamic = True
-        device = "gpu"
-        self.module_tester.create_and_check_module(dynamic, device)
-
-    @pytest.mark.skip(reason="TF testing temporarily unavailable.")
-    @pytest.mark.skipif(
-        check_device_drivers("vulkan"),
-        reason=
-        "vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-    )
-    def test_module_static_vulkan(self):
-        dynamic = False
-        device = "vulkan"
-        self.module_tester.create_and_check_module(dynamic, device)
-
-    @pytest.mark.skip(reason="TF testing temporarily unavailable.")
-    @pytest.mark.xfail(
-        reason="Language models currently failing for dynamic case")
-    @pytest.mark.skipif(
-        check_device_drivers("vulkan"),
-        reason=
-        "vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-    )
-    def test_module_dynamic_vulkan(self):
-        dynamic = True
-        device = "vulkan"
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/tank/tf/seq_classification.py
+++ b/tank/tf/seq_classification.py
@@ -1,70 +0,0 @@
-#!/usr/bin/env python
-from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-from shark.parser import shark_args
-import argparse
-import os
-
-
-seq_parser = argparse.ArgumentParser(description='Shark Sequence Classification.')
-seq_parser.add_argument(
-    "--hf_model_name",
-    type=str,
-    default="bert-base-uncased",
-    help="Hugging face model to run sequence classification.")
-
-seq_args, unknown = seq_parser.parse_known_args()
-
-
-BATCH_SIZE = 1
-MAX_SEQUENCE_LENGTH = 16
-
-# Create a set of input signature.
-inputs_signature = [
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-]
-
-# For supported models please see here: 
-# https://huggingface.co/docs/transformers/model_doc/auto#transformers.TFAutoModelForSequenceClassification
-
-def preprocess_input(text = "This is just used to compile the model"):
-    tokenizer = AutoTokenizer.from_pretrained(seq_args.hf_model_name)
-    inputs = tokenizer(text,
-                       padding="max_length",
-                       return_tensors="tf",
-                       truncation=True,
-                       max_length=MAX_SEQUENCE_LENGTH)
-    return inputs
-
-
-class SeqClassification(tf.Module):
-
-    def __init__(self, model_name):
-        super(SeqClassification, self).__init__()
-        self.m = TFAutoModelForSequenceClassification.from_pretrained(
-            model_name, output_attentions=False, num_labels=2)
-        self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)[0]
-
-    @tf.function(input_signature=inputs_signature)
-    def forward(self, input_ids, attention_mask):
-        return tf.math.softmax(self.m.predict(input_ids, attention_mask),
-                               axis=-1)
-
-
-if __name__ == "__main__":
-    inputs = preprocess_input()
-    shark_module = SharkInference(
-        SeqClassification(seq_args.hf_model_name),
-        (inputs["input_ids"], inputs["attention_mask"]))
-    shark_module.set_frontend("tensorflow")
-    shark_module.compile()
-    print(f"Model has been successfully compiled on {shark_args.device}")
-
-    while True:
-        input_text = input("Enter the text to classify (press q or nothing to exit): ")
-        if not input_text or input_text == "q":
-            break
-        inputs = preprocess_input(input_text)
-        print(shark_module.forward((inputs["input_ids"], inputs["attention_mask"])))
--- a/tank/tflite/.gitignore
+++ b/tank/tflite/.gitignore
@@ -1,2 +0,0 @@
-tmp/
-.lit_test_times.txt
--- a/tank/tflite/README.md
+++ b/tank/tflite/README.md
@@ -1,15 +0,0 @@
-# Sample compile and execution of TFLite models
-
-This directory contains test scripts to compile/run/compare various TFLite
-models from TFHub. It aims for simplicity and hackability.
-
-Follow the instructions at the repository root to install a functioning
-python venv. Then you can just run individual python files.
-
-Or, use something like the following to collect all artifacts and traces,
-which can be fed to other tools:
-
-```
-export IREE_SAVE_TEMPS="/tmp/iree/models/{main}/{id}"
-for i in *.py; do export IREE_SAVE_CALLS=/tmp/iree/traces/$i; python $i; done
-```
--- a/tank/tflite/albert.py
+++ b/tank/tflite/albert.py
@@ -1,44 +0,0 @@
-# RUN: %PYTHON %s
-import numpy as np
-from shark.shark_importer import SharkImporter
-import pytest
-
-model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
-
-
-# Inputs modified to be useful albert inputs.
-def generate_inputs(input_details):
-    for input in input_details:
-        print(str(input["shape"]), input["dtype"].__name__)
-
-    args = []
-    args.append(
-        np.random.randint(low=0,
-                          high=256,
-                          size=input_details[0]["shape"],
-                          dtype=input_details[0]["dtype"]))
-    args.append(
-        np.ones(shape=input_details[1]["shape"],
-                dtype=input_details[1]["dtype"]))
-    args.append(
-        np.zeros(shape=input_details[2]["shape"],
-                 dtype=input_details[2]["dtype"]))
-    return args
-
-
-if __name__ == '__main__':
-    my_shark_importer = SharkImporter(model_path=model_path,
-                                      model_type="tflite",
-                                      model_source_hub="tfhub",
-                                      device="cpu",
-                                      dynamic=False,
-                                      jit_trace=True)
-    # Case1: Use default inputs
-    my_shark_importer.compile()
-    shark_results = my_shark_importer.forward()
-    # Case2: Use manually set inputs
-    input_details, output_details = my_shark_importer.get_model_details()
-    inputs = generate_inputs(input_details)  # device_inputs
-    my_shark_importer.compile(inputs)
-    shark_results = my_shark_importer.forward(inputs)
-    # print(shark_results)
--- a/tank/tflite/asr_conformer_test.py
+++ b/tank/tflite/asr_conformer_test.py
@@ -1,22 +0,0 @@
-# RUN: %PYTHON %s
-# XFAIL: *
-
-import absl.testing
-import test_util
-
-model_path = "https://tfhub.dev/neso613/lite-model/ASR_TFLite/pre_trained_models/English/1?lite-format=tflite"
-
-
-# Failure is due to dynamic shapes:
-# - Some improvements to tfl.strided_slice lowering are next steps
-class AsrConformerTest(test_util.TFLiteModelTest):
-
-    def __init__(self, *args, **kwargs):
-        super(AsrConformerTest, self).__init__(model_path, *args, **kwargs)
-
-    def test_compile_tflite(self):
-        self.compile_and_execute()
-
-
-if __name__ == '__main__':
-    absl.testing.absltest.main()
--- a/tank/tflite/bird_classifier_test.py
+++ b/tank/tflite/bird_classifier_test.py
@@ -1,39 +0,0 @@
-# RUN: %PYTHON %s
-
-import absl.testing
-import numpy
-import test_util
-import urllib.request
-
-from PIL import Image
-
-model_path = "https://tfhub.dev/google/lite-model/aiy/vision/classifier/birds_V1/3?lite-format=tflite"
-
-
-class BirdClassifierTest(test_util.TFLiteModelTest):
-
-    def __init__(self, *args, **kwargs):
-        super(BirdClassifierTest, self).__init__(model_path, *args, **kwargs)
-
-    def compare_results(self, iree_results, tflite_results, details):
-        super(BirdClassifierTest, self).compare_results(iree_results,
-                                                        tflite_results, details)
-        self.assertTrue(
-            numpy.isclose(iree_results[0], tflite_results[0], atol=1e-3).all())
-
-    def generate_inputs(self, input_details):
-        img_path = "https://github.com/google-coral/test_data/raw/master/bird.bmp"
-        local_path = "/".join([self.workdir, "bird.bmp"])
-        urllib.request.urlretrieve(img_path, local_path)
-
-        shape = input_details[0]["shape"]
-        im = numpy.array(Image.open(local_path).resize((shape[1], shape[2])))
-        args = [im.reshape(shape)]
-        return args
-
-    def test_compile_tflite(self):
-        self.compile_and_execute()
-
-
-if __name__ == '__main__':
-    absl.testing.absltest.main()
--- a/tank/tflite/cartoon_gan_test.py
+++ b/tank/tflite/cartoon_gan_test.py
@@ -1,20 +0,0 @@
-# RUN: %PYTHON %s
-# REQUIRES: hugetest
-
-import absl.testing
-import test_util
-
-model_path = "https://tfhub.dev/sayakpaul/lite-model/cartoongan/dr/1?lite-format=tflite"
-
-
-class CartoonGanTest(test_util.TFLiteModelTest):
-
-    def __init__(self, *args, **kwargs):
-        super(CartoonGanTest, self).__init__(model_path, *args, **kwargs)
-
-    def test_compile_tflite(self):
-        self.compile_and_execute()
-
-
-if __name__ == '__main__':
-    absl.testing.absltest.main()
--- a/tank/tflite/coco_test_data.py
+++ b/tank/tflite/coco_test_data.py
@@ -1,16 +0,0 @@
-import numpy as np
-import urllib.request
-
-from PIL import Image
-
-
-# Returns a sample image in the COCO 2017 dataset in uint8.
-def generate_input(workdir, input_details):
-    # We use an image of a bear since this is an easy example.
-    img_path = "https://storage.googleapis.com/iree-model-artifacts/coco_2017_000000000285.jpg"
-    local_path = "/".join([workdir, "coco_2017_000000000285.jpg"])
-    urllib.request.urlretrieve(img_path, local_path)
-
-    shape = input_details[0]["shape"]
-    im = np.array(Image.open(local_path).resize((shape[1], shape[2])))
-    return im.reshape(shape)
--- a/tank/tflite/craft_text_test.py
+++ b/tank/tflite/craft_text_test.py
@@ -1,26 +0,0 @@
-# RUN: %PYTHON %s
-# XFAIL: *
-
-import absl.testing
-import test_util
-
-model_path = "https://tfhub.dev/tulasiram58827/lite-model/craft-text-detector/dr/1?lite-format=tflite"
-
-
-# Failure: Resize lowering does not handle inferred dynamic shapes. Furthermore, the entire model
-# requires dynamic shape support.
-class CraftTextTest(test_util.TFLiteModelTest):
-
-    def __init__(self, *args, **kwargs):
-        super(CraftTextTest, self).__init__(model_path, *args, **kwargs)
-
-    def compare_results(self, iree_results, tflite_results, details):
-        super(CraftTextTest, self).compare_results(iree_results, tflite_results,
-                                                   details)
-
-    def test_compile_tflite(self):
-        self.compile_and_execute()
-
-
-if __name__ == '__main__':
-    absl.testing.absltest.main()
--- a/tank/tflite/deeplab_v3_test.py
+++ b/tank/tflite/deeplab_v3_test.py
@@ -1,26 +0,0 @@
-# RUN: %PYTHON %s
-
-import absl.testing
-import numpy
-import test_util
-
-model_path = "https://tfhub.dev/tensorflow/lite-model/deeplabv3/1/metadata/2?lite-format=tflite"
-
-
-class DeepLabV3Test(test_util.TFLiteModelTest):
-
-    def __init__(self, *args, **kwargs):
-        super(DeepLabV3Test, self).__init__(model_path, *args, **kwargs)
-
-    def compare_results(self, iree_results, tflite_results, details):
-        super(DeepLabV3Test, self).compare_results(iree_results, tflite_results,
-                                                   details)
-        self.assertTrue(
-            numpy.isclose(iree_results[0], tflite_results[0], atol=1e-3).all())
-
-    def test_compile_tflite(self):
-        self.compile_and_execute()
-
-
-if __name__ == '__main__':
-    absl.testing.absltest.main()
--- a/tank/tflite/densenet_test.py
+++ b/tank/tflite/densenet_test.py
@@ -1,26 +0,0 @@
-# RUN: %PYTHON %s
-
-import absl.testing
-import numpy
-import test_util
-
-model_path = "https://tfhub.dev/tensorflow/lite-model/densenet/1/metadata/1?lite-format=tflite"
-
-
-class DenseNetTest(test_util.TFLiteModelTest):
-
-    def __init__(self, *args, **kwargs):
-        super(DenseNetTest, self).__init__(model_path, *args, **kwargs)
-
-    def compare_results(self, iree_results, tflite_results, details):
-        super(DenseNetTest, self).compare_results(iree_results, tflite_results,
-                                                  details)
-        self.assertTrue(
-            numpy.isclose(iree_results[0], tflite_results[0], atol=1e-5).all())
-
-    def test_compile_tflite(self):
-        self.compile_and_execute()
-
-
-if __name__ == '__main__':
-    absl.testing.absltest.main()
--- a/tank/tflite/east_text_detector_test.py
+++ b/tank/tflite/east_text_detector_test.py
@@ -1,35 +0,0 @@
-# RUN: %PYTHON %s
-
-import absl.testing
-import numpy
-import test_util
-
-model_path = "https://tfhub.dev/sayakpaul/lite-model/east-text-detector/dr/1?lite-format=tflite"
-
-
-class EastTextDetectorTest(test_util.TFLiteModelTest):
-
-    def __init__(self, *args, **kwargs):
-        super(EastTextDetectorTest, self).__init__(model_path, *args, **kwargs)
-
-    def compare_results(self, iree_results, tflite_results, details):
-        super(EastTextDetectorTest,
-              self).compare_results(iree_results, tflite_results, details)
-        self.assertTrue(
-            numpy.isclose(iree_results[0], tflite_results[0], atol=1e-3).all())
-
-        # The second return is extremely noisy as it is not a binary classification. To handle we
-        # check normalized correlation with an expectation of "close enough".
-        iree_norm = numpy.sqrt(iree_results[1] * iree_results[1])
-        tflite_norm = numpy.sqrt(tflite_results[1] * tflite_results[1])
-
-        correlation = numpy.average(iree_results[1] * tflite_results[1] /
-                                    iree_norm / tflite_norm)
-        self.assertTrue(numpy.isclose(correlation, 1.0, atol=1e-2).all())
-
-    def test_compile_tflite(self):
-        self.compile_and_execute()
-
-
-if __name__ == '__main__':
-    absl.testing.absltest.main()
--- a/tank/tflite/efficientnet_lite0_int8_test.py
+++ b/tank/tflite/efficientnet_lite0_int8_test.py
@@ -1,39 +0,0 @@
-# RUN: %PYTHON %s
-
-import absl.testing
-import imagenet_test_data
-import numpy
-import test_util
-
-# Source https://tfhub.dev/tensorflow/lite-model/efficientnet/lite0/int8/2
-model_path = "https://storage.googleapis.com/iree-model-artifacts/efficientnet_lite0_int8_2.tflite"
-
-
-class EfficientnetLite0Int8Test(test_util.TFLiteModelTest):
-
-    def __init__(self, *args, **kwargs):
-        super(EfficientnetLite0Int8Test, self).__init__(model_path, *args,
-                                                        **kwargs)
-
-    def compare_results(self, iree_results, tflite_results, details):
-        super(EfficientnetLite0Int8Test,
-              self).compare_results(iree_results, tflite_results, details)
-        # Dequantize outputs.
-        zero_point = details[0]['quantization_parameters']['zero_points'][0]
-        scale = details[0]['quantization_parameters']['scales'][0]
-        dequantized_iree_results = (iree_results - zero_point) * scale
-        dequantized_tflite_results = (tflite_results - zero_point) * scale
-        self.assertTrue(
-            numpy.isclose(dequantized_iree_results,
-                          dequantized_tflite_results,
-                          atol=5e-3).all())
-
-    def generate_inputs(self, input_details):
-        return [imagenet_test_data.generate_input(self.workdir, input_details)]
-
-    def test_compile_tflite(self):
-        self.compile_and_execute()
-
-
-if __name__ == '__main__':
-    absl.testing.absltest.main()
--- a/Show More
+++ b/Show More