Merge branch 'main' into MI100

Update README.md (#239 )
Move torch tests up to /tank (#234 )
2026-01-11 14:58:11 -05:00 · 2022-08-03 11:41:31 -07:00 · 2022-08-03 11:39:00 -07:00 · 2022-08-03 10:50:53 -07:00 · 2022-08-02 19:56:02 -07:00 · 2022-08-02 13:19:10 -07:00
216 changed files with 13696 additions and 3513 deletions
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -11,7 +11,7 @@ on:
 jobs:
  build:

-    runs-on: ubuntu-latest
+    runs-on: a100
    strategy:
      fail-fast: false
      matrix:
@@ -53,7 +53,7 @@ jobs:
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
-        python -m pip install flake8 pytest yapf toml
+        python -m pip install flake8 pytest toml
        if [ -f requirements.txt ]; then pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/nightly/cpu  -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/SHARK-Runtime/releases; fi
    - name: Lint with flake8
      run: |
@@ -61,16 +61,15 @@ jobs:
        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude shark.venv,lit.cfg.py 
        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude shark.venv,lit.cfg.py 
-        yapf -i --style .style.yapf shark/*.py

    - name: Build and validate the package
      run: |
        cd $GITHUB_WORKSPACE
-        IMPORTER=1 ./setup_venv.sh
+        ./setup_venv.sh
        source shark.venv/bin/activate
        package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
        SHARK_PACKAGE_VERSION=${package_version} \
-        pip wheel -v -w wheelhouse . --extra-index-url https://download.pytorch.org/whl/nightly/cpu  -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/SHARK-Runtime/releases
+        pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://github.com/llvm/torch-mlir/releases -f https://github.com/nod-ai/SHARK-Runtime/releases
        # Install the built wheel
        pip install ./wheelhouse/nodai*
        # Validate the Models
--- a/.github/workflows/test-models.yml
+++ b/.github/workflows/test-models.yml
@@ -1,7 +1,7 @@
 # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

-name: Validate torch-models on Shark Runtime
+name: Validate Models on Shark Runtime

 on:
  push:
@@ -11,92 +11,101 @@ on:
  workflow_dispatch:

 jobs:
-  build-linux:
-
-    runs-on: ubuntu-latest
+  build-validate:
    strategy:
-      fail-fast: false
+      fail-fast: true
      matrix:
+        os: [a100, MI100, MacStudio, ubuntu-latest]
+        suite: [cpu,gpu,vulkan]
        python-version: ["3.10"]
+        include:
+          - os: ubuntu-latest
+            suite: lint
+          - os: MI100
+            suite: rocm
+        exclude:
+          - os: ubuntu-latest
+            suite: vulkan
+          - os: ubuntu-latest
+            suite: gpu
+          - os: ubuntu-latest
+            suite: cpu
+          - os: MacStudio
+            suite: gpu
+          - os: MacStudio
+            suite: vulkan
+          - os: MI100
+            suite: gpu
+          - os: MI100
+            suite: vulkan
+
+
+    runs-on: ${{ matrix.os }}

    steps:
    - uses: actions/checkout@v3
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
-      with:
-        python-version: ${{ matrix.python-version }}
    
-    - name: Setup pip cache
-      uses: actions/cache@v3
+    - name: Set up Python Version File ${{ matrix.python-version }}
+      if: matrix.os == 'a100' ||  matrix.os == 'ubuntu-latest'
+      run: |
+        # See https://github.com/actions/setup-python/issues/433
+        echo ${{ matrix.python-version }} >> $GITHUB_WORKSPACE/.python-version
+    
+    - name: Set up Python ${{ matrix.python-version }}
+      if: matrix.os == 'a100' ||  matrix.os == 'ubuntu-latest'
+      uses: actions/setup-python@v4
      with:
-        path: ~/.cache/pip
-        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
-        restore-keys: |
-          ${{ runner.os }}-pip-
-
+        python-version: '${{ matrix.python-version }}'
+        #cache: 'pip'
+        #cache-dependency-path: |
+        #  **/requirements-importer.txt
+        #  **/requirements.txt
+          
    - name: Install dependencies
+      if: matrix.suite == 'lint'
      run: |
        python -m pip install --upgrade pip
-        python -m pip install flake8 pytest yapf toml
-
+        python -m pip install flake8 pytest toml black
+        
    - name: Lint with flake8
+      if: matrix.suite == 'lint'
      run: |
+        # black format check
+        black --version
+        black --line-length 79 --check .
        # stop the build if there are Python syntax errors or undefined names
        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude lit.cfg.py
        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude lit.cfg.py
-        yapf -i --style .style.yapf shark/*.py

-    - name: Validate Models
+    - name: Validate CPU Models
+      if: matrix.suite == 'cpu'
      run: |
        cd $GITHUB_WORKSPACE
-        IMPORTER=1 ./setup_venv.sh
+        PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest -k 'not benchmark' --ignore=tank/tf/ --ignore=shark/tests/test_shark_importer.py
+        pytest -k 'cpu' --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py 
+
+    - name: Validate GPU/CUDA Models
+      if: matrix.suite == 'gpu'
+      run: |
+        cd $GITHUB_WORKSPACE
+        PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
+        source shark.venv/bin/activate
+        pytest -k "gpu" --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py 
+
+    - name: Validate Vulkan Models
+      if: matrix.suite == 'vulkan'
+      run: |
+        cd $GITHUB_WORKSPACE
+        PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
+        source shark.venv/bin/activate
+        pytest -k 'vulkan' --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
        
-  perf-macOS:
-    runs-on: MacStudio
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.10"]
-
-    steps:
-    - uses: actions/checkout@v3   
-    - name: Validate Models dependencies
+    - name: Validate GPU/ROCM Models
+      if: matrix.suite == 'rocm'
      run: |
        cd $GITHUB_WORKSPACE
-        PYTHON=python3.10 IMPORTER=1 ./setup_venv.sh
+        PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest -k 'not benchmark' --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py --ignore=tank/tf/ --ignore=shark/tests/test_shark_importer.py 
-        
-  perf-linux:
-    runs-on: a100
-    timeout-minutes: 45
-    continue-on-error: true
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.10"]
-
-    steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
-      with:
-        python-version: ${{ matrix.python-version }}
-    
-    - name: Setup pip cache
-      uses: actions/cache@v3
-      with:
-        path: ~/.cache/pip
-        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
-        restore-keys: |
-          ${{ runner.os }}-pip-
-
-    - name: Validate Models
-      run: |
-        cd $GITHUB_WORKSPACE
-        IMPORTER=1 ./setup_venv.sh
-        source shark.venv/bin/activate
-        pytest --ignore=shark/tests/test_shark_importer.py --ignore=tank/tf/
+        pytest -k 'rocm' --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
--- a/.gitignore
+++ b/.gitignore
@@ -162,6 +162,7 @@ cython_debug/

 # Shark related artefacts
 *venv/
+shark_tmp/

 # ORT related artefacts
 cache_models/
--- a/README.md
+++ b/README.md
@@ -15,11 +15,11 @@ High Performance Machine Learning and Data Analytics for CPUs, GPUs, Accelerator

 <details>
  <summary>Installation (Linux and macOS)</summary>
-  
+
 ### Setup a new pip Virtual Environment

 This step sets up a new VirtualEnv for Python
-  
+
 ```shell
 python --version #Check you have 3.7->3.10 on Linux or 3.10 on macOS
 python -m venv shark_venv
@@ -31,10 +31,10 @@ source shark_venv/bin/activate
 python -m pip install --upgrade pip
 ```

-*macOS Metal* users please install https://sdk.lunarg.com/sdk/download/latest/mac/vulkan-sdk.dmg
+*macOS Metal* users please install https://sdk.lunarg.com/sdk/download/latest/mac/vulkan-sdk.dmg and enable "System wide install"

 ### Install SHARK
-  
+
 This step pip installs SHARK and related packages on Linux Python 3.7, 3.8, 3.9, 3.10 and macOS Python 3.10

 ```shell
@@ -43,20 +43,20 @@ pip install nodai-shark -f https://github.com/nod-ai/SHARK/releases -f https://g
 If you are on an Intel macOS machine you need this [workaround](https://github.com/nod-ai/SHARK/issues/102) for an upstream issue.

 ### Download and run Resnet50 sample
-    
+
 ```shell
 curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/resnet50_script.py
 #Install deps for test script
 pip install --pre torch torchvision torchaudio tqdm pillow --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-python ./resnet50_script.py --device="cpu"  #use cuda or vulkan or metal 
+python ./resnet50_script.py --device="cpu"  #use cuda or vulkan or metal
 ```
-        
+
 ### Download and run BERT (MiniLM) sample
 ```shell
 curl -O https://raw.githubusercontent.com/nod-ai/SHARK/main/shark/examples/shark_inference/minilm_jit.py
 #Install deps for test script
 pip install transformers torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-python ./minilm_jit.py --device="cpu"  #use cuda or vulkan or metal 
+python ./minilm_jit.py --device="cpu"  #use cuda or vulkan or metal
 ```
 </details>

@@ -67,33 +67,67 @@ python ./minilm_jit.py --device="cpu"  #use cuda or vulkan or metal
 ## Check out the code

 ```shell
-git clone https://github.com/nod-ai/SHARK.git 
+git clone https://github.com/nod-ai/SHARK.git
 ```

 ## Setup your Python VirtualEnvironment and Dependencies
 ```shell
 # Setup venv and install necessary packages (torch-mlir, nodLabs/Shark, ...).
 ./setup_venv.sh
-# Please activate the venv after installation.
+source shark.venv/bin/activate
 ```
+For example if you want to use Python3.10 and upstream IREE with TF Import tools you can use the environment variables like:
+```
+# PYTHON=python3.10 VENV_DIR=0617_venv IMPORTER=1 USE_IREE=1 ./setup_venv.sh 
+```
+
+If you are a Torch-mlir developer or an IREE developer and want to test local changes you can uninstall
+the provided packages with `pip uninstall torch-mlir` and / or `pip uninstall iree-compiler iree-runtime` and build locally
+with Python bindings and set your PYTHONPATH as mentioned [here](https://google.github.io/iree/bindings/python/)
+for IREE and [here](https://github.com/llvm/torch-mlir/blob/main/development.md#setup-python-environment-to-export-the-built-python-packages)
+for Torch-MLIR.

 ### Run a demo script
 ```shell
 python -m  shark.examples.shark_inference.resnet50_script --device="cpu" # Use gpu | vulkan
+# Or a pytest
+pytest tank/tf/hf_masked_lm/albert-base-v2_test.py::AlbertBaseModuleTest::test_module_static_cpu
 ```


+</details>
+
+
+<details>
+  <summary>Testing</summary>
+
 ### Run all model tests on CPU/GPU/VULKAN/Metal
 ```shell
-pytest shark/tests/models
+pytest tank

-# If on Linux for quicker results:
-pytest shark/tests/models -n auto
+# If on Linux for multithreading on CPU (faster results):
+pytest tank -n auto
 ```

-### Run all model benchmark tests on CPU/GPU/VULKAN/Metal
+### Running specific tests
 ```shell
-pytest shark/tests/benchmarks
+# Run tests for a specific model:
+pytest tank/<MODEL_NAME> #i.e., pytest tank/bert-base-uncased
+
+# Run tests for a specific case:
+pytest tank/<MODEL_NAME> -k "keyword" 
+# i.e., pytest tank/bert-base-uncased/bert-base-uncased_test.py -k "static_gpu"
+
+```
+
+### Run benchmarks on SHARK tank pytests and generate bench_results.csv with results.
+  
+(requires source installation with `IMPORTER=1 ./setup_venv.sh`)
+```shell
+pytest --benchmark tank
+  
+# Just do static GPU benchmarks for PyTorch tests:
+pytest --benchmark tank --ignore-glob="_tf*" -k "static_gpu"
 ```
 </details>

@@ -104,18 +138,25 @@ pytest shark/tests/benchmarks
 ### Shark Inference API

 ```
-from shark_runner import SharkInference

-shark_module = SharkInference(
-        module = model class.
-        (input,)  = inputs to model (must be a torch-tensor)
-        dynamic (boolean) = Pass the input shapes as static or dynamic.
-        device = `cpu`, `gpu` or `vulkan` is supported.
-        tracing_required = (boolean) = Jit trace the module with the given input, useful in the case where jit.script doesn't work. )
-shark_module.set_frontend("pytorch") # Use tensorflow, mhlo, linalg, tosa
+from shark.shark_importer import SharkImporter
+
+# SharkImporter imports mlir file from the torch, tensorflow or tf-lite module.
+
+mlir_importer = SharkImporter(
+    torch_module,
+    (input),
+    frontend="torch",  #tf, #tf-lite
+)
+torch_mlir, func_name = mlir_importer.import_mlir(tracing_required=True)
+
+# SharkInference accepts mlir in linalg, mhlo, and tosa dialect.
+
+from shark.shark_inference import SharkInference
+shark_module = SharkInference(torch_mlir, func_name, device="cpu", mlir_dialect="linalg")
 shark_module.compile()
+result = shark_module.forward((input))

-result = shark_module.forward(inputs)
 ```


@@ -135,11 +176,9 @@ mhlo_ir = r"""builtin.module  {

 arg0 = np.ones((1, 4)).astype(np.float32)
 arg1 = np.ones((4, 1)).astype(np.float32)
-
-shark_module = SharkInference(mhlo_ir, (arg0, arg1))
-shark_module.set_frontend("mhlo")
+shark_module = SharkInference(mhlo_ir, func_name="forward", device="cpu", mlir_dialect="mhlo")
 shark_module.compile()
-print(shark_module.forward((arg0, arg1)))
+result = shark_module.forward((arg0, arg1))
 ```
 </details>

@@ -153,86 +192,153 @@ print(shark_module.forward((arg0, arg1)))

 | Hugging Face Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
 |---------------------|----------------------|----------|----------|-------------|
-| BERT                | :heavy_check_mark: (JIT)          | :heavy_check_mark:         |          |             |
-| Albert              | :heavy_check_mark: (JIT)            | :heavy_check_mark:         |          |             |
-| BigBird             | :heavy_check_mark: (AOT)            |          |          |             |
-| DistilBERT          | :heavy_check_mark: (JIT)            | :heavy_check_mark:         |          |             |
-| GPT2                | :x: (AOT)            |          |          |             |
+| BERT                | :green_heart: (JIT)          | :green_heart:         | :green_heart:         | :green_heart:            |
+| Albert              | :green_heart: (JIT)            | :green_heart:         | :green_heart:         | :green_heart:            |
+| BigBird             | :green_heart: (AOT)            |          |          |             |
+| DistilBERT          | :green_heart: (JIT)            | :green_heart:         | :green_heart:         | :green_heart:            |
+| GPT2                | :broken_heart: (AOT)            |          |          |             |
+| MobileBert          | :green_heart: (JIT)            | :green_heart:         | :green_heart:         | :green_heart:            |

 ### Torchvision  Models
-  
+
 | TORCHVISION Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
 |--------------------|----------------------|----------|----------|-------------|
-| AlexNet            | :heavy_check_mark: (Script)         | :heavy_check_mark:         | :heavy_check_mark:         |             |
-| DenseNet121        | :heavy_check_mark: (Script)         |          |          |             |
-| MNasNet1_0         | :heavy_check_mark: (Script)         |          |          |             |
-| MobileNetV2        | :heavy_check_mark: (Script)         |          |          |             |
-| MobileNetV3        | :heavy_check_mark: (Script)         |          |          |             |
-| Unet               | :x: (Script)         |          |          |             |
-| Resnet18           | :heavy_check_mark: (Script)         | :heavy_check_mark:         |  :heavy_check_mark:        |             |
-| Resnet50           | :heavy_check_mark: (Script)         | :heavy_check_mark:         |   :heavy_check_mark:       |             |
-| Resnet101           | :heavy_check_mark: (Script)         | :heavy_check_mark:         |   :heavy_check_mark:       |             |
-| Resnext50_32x4d    | :heavy_check_mark: (Script)         |          |          |             |
-| ShuffleNet_v2      | :x: (Script)         |          |          |             |
-| SqueezeNet         | :heavy_check_mark: (Script)         | :heavy_check_mark:         |   :heavy_check_mark:       |             |
-| EfficientNet       | :heavy_check_mark: (Script)         |          |          |             |
-| Regnet             | :heavy_check_mark: (Script)         |          |          |             |
-| Resnest            | :x: (Script)         |          |          |             |
-| Vision Transformer | :heavy_check_mark: (Script)         |          |          |             |
-| VGG 16             | :heavy_check_mark: (Script)         | :heavy_check_mark:         |   :heavy_check_mark:       |             |
-| Wide Resnet        | :heavy_check_mark: (Script)         | :heavy_check_mark:         | :heavy_check_mark:         |             |
-| RAFT               | :x: (JIT)            |          |          |             |
+| AlexNet            | :green_heart: (Script)         | :green_heart:         | :green_heart:         | :green_heart:            |
+| DenseNet121        | :green_heart: (Script)         |          |          |             |
+| MNasNet1_0         | :green_heart: (Script)         | :green_heart:         | :green_heart:         | :green_heart:            |
+| MobileNetV2        | :green_heart: (Script)         | :green_heart:         | :green_heart:         | :green_heart:            |
+| MobileNetV3        | :green_heart: (Script)         | :green_heart:         | :green_heart:         | :green_heart:            |
+| Unet               | :broken_heart: (Script)         |          |          |             |
+| Resnet18           | :green_heart: (Script)         | :green_heart:         |  :green_heart:        | :green_heart:            |
+| Resnet50           | :green_heart: (Script)         | :green_heart:         |   :green_heart:       | :green_heart:            |
+| Resnet101           | :green_heart: (Script)         | :green_heart:         |   :green_heart:       | :green_heart:            |
+| Resnext50_32x4d    | :green_heart: (Script)         | :green_heart:         | :green_heart:         | :green_heart:            |
+| ShuffleNet_v2      | :broken_heart: (Script)         |          |          |             |
+| SqueezeNet         | :green_heart: (Script)         | :green_heart:         |   :green_heart:       | :green_heart:            |
+| EfficientNet       | :green_heart: (Script)         |          |          |             |
+| Regnet             | :green_heart: (Script)         | :green_heart:         | :green_heart:         | :green_heart:            |
+| Resnest            | :broken_heart: (Script)         |          |          |             |
+| Vision Transformer | :green_heart: (Script)         |          |          |             |
+| VGG 16             | :green_heart: (Script)         | :green_heart:         |   :green_heart:       |             |
+| Wide Resnet        | :green_heart: (Script)         | :green_heart:         | :green_heart:         | :green_heart:            |
+| RAFT               | :broken_heart: (JIT)            |          |          |             |

 For more information refer to [MODEL TRACKING SHEET](https://docs.google.com/spreadsheets/d/15PcjKeHZIrB5LfDyuw7DGEEE8XnQEX2aX8lm8qbxV8A/edit#gid=0)

-### PyTorch Training Models 
+### PyTorch Training Models

 | Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
 |---------------------|----------------------|----------|----------|-------------|
-| BERT                | :x:           | :x:         |          |             |
-| FullyConnected                | :heavy_check_mark:           | :heavy_check_mark:         |          |             |
+| BERT                | :broken_heart:           | :broken_heart:         |          |             |
+| FullyConnected                | :green_heart:           | :green_heart:         |          |             |

 </details>
-  
+
 <details>
  <summary>JAX Models</summary>


-### JAX  Models 
+### JAX  Models

 | Models | JAX-MHLO lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
 |---------------------|----------------------|----------|----------|-------------|
-| DALL-E                | :x:           | :x:         |          |             |
-| FullyConnected                | :heavy_check_mark:           | :heavy_check_mark:         |          |             |
- 
+| DALL-E                | :broken_heart:           | :broken_heart:         |          |             |
+| FullyConnected                | :green_heart:           | :green_heart:         |          |             |
+
 </details>
-  
+
 <details>
  <summary>TFLite Models</summary>
- 
-### TFLite Models 
+
+### TFLite Models

 | Models | TOSA/LinAlg  | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
 |---------------------|----------------------|----------|----------|-------------|
-| BERT                | :x:           | :x:         |          |             |
-| FullyConnected                | :heavy_check_mark:           | :heavy_check_mark:         |          |             |
-  
+| BERT                | :broken_heart:           | :broken_heart:         |          |             |
+| FullyConnected      | :green_heart:           | :green_heart:         |          |             |
+| albert | :green_heart:           | :green_heart:         |          |             |
+| asr_conformer | :green_heart:           | :green_heart:         |          |             |
+| bird_classifier | :green_heart:           | :green_heart:         |          |             |
+| cartoon_gan | :green_heart:           | :green_heart:         |          |             |
+| craft_text | :green_heart:           | :green_heart:         |          |             |
+| deeplab_v3 | :green_heart:           | :green_heart:         |          |             |
+| densenet | :green_heart:           | :green_heart:         |          |             |
+| east_text_detector | :green_heart:           | :green_heart:         |          |             |
+| efficientnet_lite0_int8 | :green_heart:           | :green_heart:         |          |             |
+| efficientnet | :green_heart:           | :green_heart:         |          |             |
+| gpt2 | :green_heart:           | :green_heart:         |          |             |
+| image_stylization | :green_heart:           | :green_heart:         |          |             |
+| inception_v4 | :green_heart:           | :green_heart:         |          |             |
+| inception_v4_uint8 | :green_heart:           | :green_heart:         |          |             |
+| lightning_fp16 | :green_heart:           | :green_heart:         |          |             |
+| lightning_i8 | :green_heart:           | :green_heart:         |          |             |
+| lightning | :green_heart:           | :green_heart:         |          |             |
+| magenta | :green_heart:           | :green_heart:         |          |             |
+| midas | :green_heart:           | :green_heart:         |          |             |
+| mirnet | :green_heart:           | :green_heart:         |          |             |
+| mnasnet | :green_heart:           | :green_heart:         |          |             |
+| mobilebert_edgetpu_s_float | :green_heart:           | :green_heart:         |          |             |
+| mobilebert_edgetpu_s_quant | :green_heart:           | :green_heart:         |          |             |
+| mobilebert | :green_heart:           | :green_heart:         |          |             |
+| mobilebert_tf2_float | :green_heart:           | :green_heart:         |          |             |
+| mobilebert_tf2_quant | :green_heart:           | :green_heart:         |          |             |
+| mobilenet_ssd_quant | :green_heart:           | :green_heart:         |          |             |
+| mobilenet_v1 | :green_heart:           | :green_heart:         |          |             |
+| mobilenet_v1_uint8 | :green_heart:           | :green_heart:         |          |             |
+| mobilenet_v2_int8 | :green_heart:           | :green_heart:         |          |             |
+| mobilenet_v2 | :green_heart:           | :green_heart:         |          |             |
+| mobilenet_v2_uint8 | :green_heart:           | :green_heart:         |          |             |
+| mobilenet_v3-large | :green_heart:           | :green_heart:         |          |             |
+| mobilenet_v3-large_uint8 | :green_heart:           | :green_heart:         |          |             |
+| mobilenet_v35-int8 | :green_heart:           | :green_heart:         |          |             |
+| nasnet | :green_heart:           | :green_heart:         |          |             |
+| person_detect | :green_heart:           | :green_heart:         |          |             |
+| posenet | :green_heart:           | :green_heart:         |          |             |
+| resnet_50_int8 | :green_heart:           | :green_heart:         |          |             |
+| rosetta | :green_heart:           | :green_heart:         |          |             |
+| spice | :green_heart:           | :green_heart:         |          |             |
+| squeezenet | :green_heart:           | :green_heart:         |          |             |
+| ssd_mobilenet_v1 | :green_heart:           | :green_heart:         |          |             |
+| ssd_mobilenet_v1_uint8 | :green_heart:           | :green_heart:         |          |             |
+| ssd_mobilenet_v2_fpnlite | :green_heart:           | :green_heart:         |          |             |
+| ssd_mobilenet_v2_fpnlite_uint8 | :green_heart:           | :green_heart:         |          |             |
+| ssd_mobilenet_v2_int8 | :green_heart:           | :green_heart:         |          |             |
+| ssd_mobilenet_v2 | :green_heart:           | :green_heart:         |          |             |
+| ssd_spaghettinet_large | :green_heart:           | :green_heart:         |          |             |
+| ssd_spaghettinet_large_uint8 | :green_heart:           | :green_heart:         |          |             |
+| visual_wake_words_i8 | :green_heart:           | :green_heart:         |          |             |
+
 </details>

 <details>
  <summary>TF Models</summary>
- 
-### Tensorflow Models 

-| Models | Torch-MLIR lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
+### Tensorflow Models (Inference)
+
+| Hugging Face Models | tf-mhlo lowerable | SHARK-CPU | SHARK-CUDA | SHARK-METAL |
 |---------------------|----------------------|----------|----------|-------------|
-| BERT                | :x:           | :x:         |          |             |
-| FullyConnected                | :heavy_check_mark:           | :heavy_check_mark:         |          |             |
-  
+| BERT                | :green_heart:          | :green_heart:         | :green_heart:         | :green_heart:            |
+| albert-base-v2              | :green_heart:            | :green_heart:         | :green_heart:         | :green_heart:            |
+| DistilBERT          | :green_heart:            | :green_heart:         | :green_heart:         | :green_heart:            |
+| CamemBert                | :green_heart:          | :green_heart:         | :green_heart:         | :green_heart:            |
+| ConvBert              | :green_heart:            | :green_heart:         | :green_heart:         | :green_heart:            |
+| Deberta              |            |         |          |             |
+| electra          | :green_heart:            | :green_heart:         | :green_heart:         | :green_heart:            |
+| funnel              |            |         |          |             |
+| layoutlm              | :green_heart:            | :green_heart:         | :green_heart:         | :green_heart:            |
+| longformer              |            |         |          |             |
+| mobile-bert                | :green_heart:          | :green_heart:         | :green_heart:         | :green_heart:            |
+| remembert              |            |         |          |             |
+| tapas              |            |         |          |             |
+| flaubert                | :green_heart:          | :green_heart:         | :green_heart:         | :green_heart:            |
+| roberta                | :green_heart:          | :green_heart:         | :green_heart:         | :green_heart:            |
+| xlm-roberta              | :green_heart:            | :green_heart:         | :green_heart:         | :green_heart:            |
+| mpnet              | :green_heart:            | :green_heart:         | :green_heart:         | :green_heart:            |
+
 </details>

 ## Related Projects
-  
+
 <details>
  <summary>IREE Project Channels</summary>

@@ -243,7 +349,7 @@ For more information refer to [MODEL TRACKING SHEET](https://docs.google.com/spr
 *   [iree-discuss email list](https://groups.google.com/forum/#!forum/iree-discuss):
    Announcements, general and low-priority discussion
 </details>
-    
+
 <details>
  <summary>MLIR and Torch-MLIR Project Channels</summary>

--- a/benchmarks/hf_model_benchmark.py
+++ b/benchmarks/hf_model_benchmark.py
@@ -6,16 +6,16 @@ parser.add_argument(
    "--model_name",
    type=str,
    required=True,
-    help=
-    "Specifies name of HF model to benchmark. (For exmaple \"microsoft/MiniLM-L12-H384-uncased\""
+    help='Specifies name of HF model to benchmark. (For exmaple "microsoft/MiniLM-L12-H384-uncased"',
 )
 load_args, unknown = parser.parse_known_args()

 if __name__ == "__main__":
    model_name = load_args.model_name
    test_input = torch.randint(2, (1, 128))
-    shark_module = SharkHFBenchmarkRunner(model_name, (test_input,),
-                                          jit_trace=True)
+    shark_module = SharkHFBenchmarkRunner(
+        model_name, (test_input,), jit_trace=True
+    )
    shark_module.benchmark_c()
    shark_module.benchmark_python((test_input,))
    shark_module.benchmark_torch(test_input)
--- a/benchmarks/hf_transformer.py
+++ b/benchmarks/hf_transformer.py
@@ -1,8 +1,12 @@
 import torch
-from shark.shark_runner import SharkBenchmarkRunner
+from shark.shark_benchmark_runner import SharkBenchmarkRunner
 from shark.parser import shark_args
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from onnxruntime.transformers.benchmark import run_pytorch, run_tensorflow, run_onnxruntime
+from onnxruntime.transformers.benchmark import (
+    run_pytorch,
+    run_tensorflow,
+    run_onnxruntime,
+)
 from onnxruntime.transformers.huggingface_models import MODELS
 from onnxruntime.transformers.benchmark_helper import ConfigModifier, Precision
 import os
@@ -10,7 +14,6 @@ import psutil


 class OnnxFusionOptions(object):
-
    def __init__(self):
        self.disable_gelu = False
        self.disable_layer_norm = False
@@ -25,17 +28,13 @@ class OnnxFusionOptions(object):


 class HuggingFaceLanguage(torch.nn.Module):
-
    def __init__(self, hf_model_name):
        super().__init__()
        self.model = AutoModelForSequenceClassification.from_pretrained(
            hf_model_name,  # The pretrained model.
-            num_labels=
-            2,  # The number of output labels--2 for binary classification.
-            output_attentions=
-            False,  # Whether the model returns attentions weights.
-            output_hidden_states=
-            False,  # Whether the model returns all hidden-states.
+            num_labels=2,  # The number of output labels--2 for binary classification.
+            output_attentions=False,  # Whether the model returns attentions weights.
+            output_hidden_states=False,  # Whether the model returns all hidden-states.
            torchscript=True,
        )

@@ -62,8 +61,16 @@ class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
            )
        self.model_name = model_name
        model = HuggingFaceLanguage(model_name)
-        SharkBenchmarkRunner.__init__(self, model, input, dynamic, self.device,
-                                      jit_trace, from_aot, frontend)
+        SharkBenchmarkRunner.__init__(
+            self,
+            model,
+            input,
+            dynamic,
+            self.device,
+            jit_trace,
+            from_aot,
+            frontend,
+        )

    def benchmark_torch(self, inputs):
        use_gpu = self.device == "gpu"
@@ -74,10 +81,20 @@ class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
        sequence_lengths = [inputs.shape[-1]]
        cache_dir = os.path.join(".", "cache_models")
        verbose = False
-        result = run_pytorch(use_gpu, [self.model_name], None, config_modifier,
-                             Precision.FLOAT32, num_threads, batch_sizes,
-                             sequence_lengths, shark_args.num_iterations, False,
-                             cache_dir, verbose)
+        result = run_pytorch(
+            use_gpu,
+            [self.model_name],
+            None,
+            config_modifier,
+            Precision.FLOAT32,
+            num_threads,
+            batch_sizes,
+            sequence_lengths,
+            shark_args.num_iterations,
+            False,
+            cache_dir,
+            verbose,
+        )
        print(
            f"ONNX Pytorch-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
        )
@@ -92,10 +109,19 @@ class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
        sequence_lengths = [inputs.shape[-1]]
        cache_dir = os.path.join(".", "cache_models")
        verbose = False
-        result = run_tensorflow(use_gpu, [self.model_name], None,
-                                config_modifier, Precision.FLOAT32, num_threads,
-                                batch_sizes, sequence_lengths,
-                                shark_args.num_iterations, cache_dir, verbose)
+        result = run_tensorflow(
+            use_gpu,
+            [self.model_name],
+            None,
+            config_modifier,
+            Precision.FLOAT32,
+            num_threads,
+            batch_sizes,
+            sequence_lengths,
+            shark_args.num_iterations,
+            cache_dir,
+            verbose,
+        )
        print(
            f"ONNX TF-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
        )
@@ -105,7 +131,8 @@ class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
            print(
                f"{self.model_name} is currently not supported in ORT's HF. Check \
 https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/huggingface_models.py \
-for currently supported models. Exiting benchmark ONNX.")
+for currently supported models. Exiting benchmark ONNX."
+            )
            return
        use_gpu = self.device == "gpu"
        num_threads = psutil.cpu_count(logical=False)
@@ -121,17 +148,34 @@ for currently supported models. Exiting benchmark ONNX.")
        use_raw_attention_mask = True
        model_fusion_statistics = {}
        overwrite = False
-        model_source = "pt"  #Either "pt" or "tf"
+        model_source = "pt"  # Either "pt" or "tf"
        provider = None
        config_modifier = ConfigModifier(None)
        onnx_args = OnnxFusionOptions()
        result = run_onnxruntime(
-            use_gpu, provider, [self.model_name], None, config_modifier,
-            Precision.FLOAT32, num_threads, batch_sizes, sequence_lengths,
-            shark_args.num_iterations, input_counts, optimize_onnx,
-            validate_onnx, cache_dir, onnx_dir, verbose, overwrite,
-            disable_ort_io_binding, use_raw_attention_mask,
-            model_fusion_statistics, model_source, onnx_args)
+            use_gpu,
+            provider,
+            [self.model_name],
+            None,
+            config_modifier,
+            Precision.FLOAT32,
+            num_threads,
+            batch_sizes,
+            sequence_lengths,
+            shark_args.num_iterations,
+            input_counts,
+            optimize_onnx,
+            validate_onnx,
+            cache_dir,
+            onnx_dir,
+            verbose,
+            overwrite,
+            disable_ort_io_binding,
+            use_raw_attention_mask,
+            model_fusion_statistics,
+            model_source,
+            onnx_args,
+        )
        print(
            f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
        )
--- a/benchmarks/tests/test_benchmark.py
+++ b/benchmarks/tests/test_benchmark.py
@@ -1,19 +1,23 @@
 from shark.shark_inference import SharkInference
-from shark.iree_utils import check_device_drivers
+from shark.iree_utils._common import check_device_drivers

 import torch
 import tensorflow as tf
 import numpy as np
 import torchvision.models as models
-from transformers import AutoModelForSequenceClassification, BertTokenizer, TFBertModel
+from transformers import (
+    AutoModelForSequenceClassification,
+    BertTokenizer,
+    TFBertModel,
+)
 import importlib
 import pytest
 import unittest

 torch.manual_seed(0)
-gpus = tf.config.experimental.list_physical_devices('GPU')
+gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
-  tf.config.experimental.set_memory_growth(gpu, True)
+    tf.config.experimental.set_memory_growth(gpu, True)

 ##################### Tensorflow Hugging Face LM Models ###################################
 MAX_SEQUENCE_LENGTH = 512
@@ -23,12 +27,11 @@ BATCH_SIZE = 1
 tf_bert_input = [
    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
+    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
 ]


 class TFHuggingFaceLanguage(tf.Module):
-
    def __init__(self, hf_model_name):
        super(TFHuggingFaceLanguage, self).__init__()
        # Create a BERT trainer with the created network.
@@ -36,7 +39,8 @@ class TFHuggingFaceLanguage(tf.Module):

        # Invoke the trainer model on the inputs. This causes the layer to be built.
        self.m.predict = lambda x, y, z: self.m.call(
-            input_ids=x, attention_mask=y, token_type_ids=z, training=False)
+            input_ids=x, attention_mask=y, token_type_ids=z, training=False
+        )

    @tf.function(input_signature=tf_bert_input)
    def forward(self, input_ids, attention_mask, token_type_ids):
@@ -47,15 +51,21 @@ def get_TFhf_model(name):
    model = TFHuggingFaceLanguage(name)
    tokenizer = BertTokenizer.from_pretrained(name)
    text = "Replace me by any text you'd like."
-    encoded_input = tokenizer(text,
-                              padding='max_length',
-                              truncation=True,
-                              max_length=MAX_SEQUENCE_LENGTH)
+    encoded_input = tokenizer(
+        text,
+        padding="max_length",
+        truncation=True,
+        max_length=MAX_SEQUENCE_LENGTH,
+    )
    for key in encoded_input:
        encoded_input[key] = tf.expand_dims(
-            tf.convert_to_tensor(encoded_input[key]), 0)
-    test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
-                  encoded_input["token_type_ids"])
+            tf.convert_to_tensor(encoded_input[key]), 0
+        )
+    test_input = (
+        encoded_input["input_ids"],
+        encoded_input["attention_mask"],
+        encoded_input["token_type_ids"],
+    )
    actual_out = model.forward(*test_input)
    return model, test_input, actual_out

@@ -64,17 +74,13 @@ def get_TFhf_model(name):


 class HuggingFaceLanguage(torch.nn.Module):
-
    def __init__(self, hf_model_name):
        super().__init__()
        self.model = AutoModelForSequenceClassification.from_pretrained(
            hf_model_name,  # The pretrained model.
-            num_labels=
-            2,  # The number of output labels--2 for binary classification.
-            output_attentions=
-            False,  # Whether the model returns attentions weights.
-            output_hidden_states=
-            False,  # Whether the model returns all hidden-states.
+            num_labels=2,  # The number of output labels--2 for binary classification.
+            output_attentions=False,  # Whether the model returns attentions weights.
+            output_hidden_states=False,  # Whether the model returns all hidden-states.
            torchscript=True,
        )

@@ -96,7 +102,6 @@ def get_hf_model(name):


 class VisionModule(torch.nn.Module):
-
    def __init__(self, model):
        super().__init__()
        self.model = model
@@ -117,46 +122,56 @@ def get_vision_model(torch_model):
 #############################   Benchmark Tests ####################################

 pytest_benchmark_param = pytest.mark.parametrize(
-    ('dynamic', 'device'),
+    ("dynamic", "device"),
    [
-        pytest.param(False, 'cpu'),
+        pytest.param(False, "cpu"),
        # TODO: Language models are failing for dynamic case..
-        pytest.param(True, 'cpu', marks=pytest.mark.skip),
-        pytest.param(False,
-                     'gpu',
-                     marks=pytest.mark.skipif(check_device_drivers("gpu"),
-                                              reason="nvidia-smi not found")),
-        pytest.param(True,
-                     'gpu',
-                     marks=pytest.mark.skip),
+        pytest.param(True, "cpu", marks=pytest.mark.skip),
        pytest.param(
            False,
-            'vulkan',
+            "gpu",
+            marks=pytest.mark.skipif(
+                check_device_drivers("gpu"), reason="nvidia-smi not found"
+            ),
+        ),
+        pytest.param(True, "gpu", marks=pytest.mark.skip),
+        pytest.param(
+            False,
+            "vulkan",
            marks=pytest.mark.skipif(
                check_device_drivers("vulkan"),
-                reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )),
+                reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases",
+            ),
+        ),
        pytest.param(
            True,
-            'vulkan',
+            "vulkan",
            marks=pytest.mark.skipif(
                check_device_drivers("vulkan"),
-                reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
-            )),
-    ])
+                reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases",
+            ),
+        ),
+    ],
+)


-@pytest.mark.skipif(importlib.util.find_spec("iree.tools") is None,
-                    reason="Cannot find tools to import TF")
+@pytest.mark.skipif(
+    importlib.util.find_spec("iree.tools") is None,
+    reason="Cannot find tools to import TF",
+)
@pytest_benchmark_param
 def test_bench_minilm_torch(dynamic, device):
    model, test_input, act_out = get_hf_model(
-        "microsoft/MiniLM-L12-H384-uncased")
-    shark_module = SharkInference(model, (test_input,),
-                                  device=device,
-                                  dynamic=dynamic,
-                                  jit_trace=True,
-                                  benchmark_mode=True)
+        "microsoft/MiniLM-L12-H384-uncased"
+    )
+    shark_module = SharkInference(
+        model,
+        (test_input,),
+        device=device,
+        dynamic=dynamic,
+        jit_trace=True,
+        benchmark_mode=True,
+    )
    try:
        # If becnhmarking succesful, assert success/True.
        shark_module.compile()
@@ -167,17 +182,21 @@ def test_bench_minilm_torch(dynamic, device):
        assert False


-@pytest.mark.skipif(importlib.util.find_spec("iree.tools") is None,
-                    reason="Cannot find tools to import TF")
+@pytest.mark.skipif(
+    importlib.util.find_spec("iree.tools") is None,
+    reason="Cannot find tools to import TF",
+)
@pytest_benchmark_param
 def test_bench_distilbert(dynamic, device):
    model, test_input, act_out = get_TFhf_model("distilbert-base-uncased")
-    shark_module = SharkInference(model,
-                                  test_input,
-                                  device=device,
-                                  dynamic=dynamic,
-                                  jit_trace=True,
-                                  benchmark_mode=True)
+    shark_module = SharkInference(
+        model,
+        test_input,
+        device=device,
+        dynamic=dynamic,
+        jit_trace=True,
+        benchmark_mode=True,
+    )
    try:
        # If becnhmarking succesful, assert success/True.
        shark_module.set_frontend("tensorflow")
@@ -193,12 +212,14 @@ def test_bench_distilbert(dynamic, device):
@pytest_benchmark_param
 def test_bench_xlm_roberta(dynamic, device):
    model, test_input, act_out = get_TFhf_model("xlm-roberta-base")
-    shark_module = SharkInference(model,
-                                  test_input,
-                                  device=device,
-                                  dynamic=dynamic,
-                                  jit_trace=True,
-                                  benchmark_mode=True)
+    shark_module = SharkInference(
+        model,
+        test_input,
+        device=device,
+        dynamic=dynamic,
+        jit_trace=True,
+        benchmark_mode=True,
+    )
    try:
        # If becnhmarking succesful, assert success/True.
        shark_module.set_frontend("tensorflow")
--- a/benchmarks/tests/test_hf_benchmark.py
+++ b/benchmarks/tests/test_hf_benchmark.py
@@ -9,25 +9,31 @@ torch.manual_seed(0)

 # Test running benchmark module without failing.
 pytest_benchmark_param = pytest.mark.parametrize(
-    ('dynamic', 'device'),
+    ("dynamic", "device"),
    [
-        pytest.param(False, 'cpu'),
+        pytest.param(False, "cpu"),
        # TODO: Language models are failing for dynamic case..
-        pytest.param(True, 'cpu', marks=pytest.mark.skip),
-    ])
+        pytest.param(True, "cpu", marks=pytest.mark.skip),
+    ],
+)


-@pytest.mark.skipif(importlib.util.find_spec("onnxruntime") is None,
-                    reason="Cannot find ONNXRUNTIME.")
+@pytest.mark.skipif(
+    importlib.util.find_spec("onnxruntime") is None,
+    reason="Cannot find ONNXRUNTIME.",
+)
@pytest_benchmark_param
 def test_HFbench_minilm_torch(dynamic, device):
    model_name = "bert-base-uncased"
    test_input = torch.randint(2, (1, 128))
    try:
-        shark_module = SharkHFBenchmarkRunner(model_name, (test_input,),
-                                              jit_trace=True,
-                                              dynamic=dynamic,
-                                              device=device)
+        shark_module = SharkHFBenchmarkRunner(
+            model_name,
+            (test_input,),
+            jit_trace=True,
+            dynamic=dynamic,
+            device=device,
+        )
        shark_module.benchmark_c()
        shark_module.benchmark_python((test_input,))
        shark_module.benchmark_torch(test_input)
--- a/conftest.py
+++ b/conftest.py
@@ -0,0 +1,26 @@
+def pytest_addoption(parser):
+    # Attaches SHARK command-line arguments to the pytest machinery.
+    parser.addoption(
+        "--save_mlir",
+        action="store_true",
+        default="False",
+        help="Pass option to save input MLIR",
+    )
+    parser.addoption(
+        "--save_vmfb",
+        action="store_true",
+        default="False",
+        help="Pass option to save IREE output .vmfb",
+    )
+    parser.addoption(
+        "--benchmark",
+        action="store_true",
+        default="False",
+        help="Pass option to benchmark and write results.csv",
+    )
+    parser.addoption(
+        "--save_temps",
+        action="store_true",
+        default="False",
+        help="Saves IREE reproduction artifacts for filing upstream issues.",
+    )
--- a/generate_sharktank.py
+++ b/generate_sharktank.py
@@ -0,0 +1,223 @@
+# Lint as: python3
+"""SHARK Tank"""
+# python generate_sharktank.py, you have to give a csv tile with [model_name, model_download_url]
+# will generate local shark tank folder like this:
+#   /SHARK
+#     /gen_shark_tank
+#       /albert_lite_base
+#       /...model_name...
+#
+
+import os
+import csv
+import argparse
+from shark.shark_importer import SharkImporter
+import tensorflow as tf
+import hashlib
+import numpy as np
+
+visible_default = tf.config.list_physical_devices("GPU")
+try:
+    tf.config.set_visible_devices([], "GPU")
+    visible_devices = tf.config.get_visible_devices()
+    for device in visible_devices:
+        assert device.device_type != "GPU"
+except:
+    # Invalid device or cannot modify virtual devices once initialized.
+    pass
+
+# All generated models and metadata will be saved under this directory.
+WORKDIR = os.path.join(os.path.dirname(__file__), "gen_shark_tank")
+
+
+def create_hash(file_name):
+    with open(file_name, "rb") as f:
+        file_hash = hashlib.blake2b()
+        while chunk := f.read(2**20):
+            file_hash.update(chunk)
+
+    return file_hash.hexdigest()
+
+
+def save_torch_model(torch_model_list):
+    from tank.model_utils import get_hf_model
+    from tank.model_utils import get_vision_model
+
+    with open(torch_model_list) as csvfile:
+        torch_reader = csv.reader(csvfile, delimiter=",")
+        fields = next(torch_reader)
+        for row in torch_reader:
+            torch_model_name = row[0]
+            tracing_required = row[1]
+            model_type = row[2]
+
+            tracing_required = False if tracing_required == "False" else True
+
+            model = None
+            input = None
+            if model_type == "vision":
+                model, input, _ = get_vision_model(torch_model_name)
+            elif model_type == "hf":
+                model, input, _ = get_hf_model(torch_model_name)
+
+            torch_model_name = torch_model_name.replace("/", "_")
+            torch_model_dir = os.path.join(
+                WORKDIR, str(torch_model_name) + "_torch"
+            )
+            os.makedirs(torch_model_dir, exist_ok=True)
+
+            mlir_importer = SharkImporter(
+                model,
+                (input,),
+                frontend="torch",
+            )
+            mlir_importer.import_debug(
+                is_dynamic=False,
+                tracing_required=tracing_required,
+                dir=torch_model_dir,
+                model_name=torch_model_name,
+            )
+            mlir_hash = create_hash(
+                os.path.join(
+                    torch_model_dir, torch_model_name + "_torch" + ".mlir"
+                )
+            )
+            np.save(os.path.join(torch_model_dir, "hash"), np.array(mlir_hash))
+            # Generate torch dynamic models.
+            mlir_importer.import_debug(
+                is_dynamic=True,
+                tracing_required=tracing_required,
+                dir=torch_model_dir,
+                model_name=torch_model_name + "_dynamic",
+            )
+
+
+def save_tf_model(tf_model_list):
+    from tank.model_utils_tf import get_causal_lm_model
+    from tank.model_utils_tf import get_causal_image_model
+
+    with open(tf_model_list) as csvfile:
+        tf_reader = csv.reader(csvfile, delimiter=",")
+        fields = next(tf_reader)
+        for row in tf_reader:
+            tf_model_name = row[0]
+            model_type = row[1]
+
+            model = None
+            input = None
+            print(model_type)
+            if model_type == "hf":
+                model, input, _ = get_causal_lm_model(tf_model_name)
+            if model_type == "img":
+                model, input, _ = get_causal_image_model(tf_model_name)
+
+            tf_model_name = tf_model_name.replace("/", "_")
+            tf_model_dir = os.path.join(WORKDIR, str(tf_model_name) + "_tf")
+            os.makedirs(tf_model_dir, exist_ok=True)
+
+            mlir_importer = SharkImporter(
+                model,
+                input,
+                frontend="tf",
+            )
+            mlir_importer.import_debug(
+                dir=tf_model_dir,
+                model_name=tf_model_name,
+            )
+            mlir_hash = create_hash(
+                os.path.join(tf_model_dir, tf_model_name + "_tf" + ".mlir")
+            )
+            np.save(os.path.join(tf_model_dir, "hash"), np.array(mlir_hash))
+
+
+def save_tflite_model(tflite_model_list):
+    from shark.tflite_utils import TFLitePreprocessor
+
+    with open(tflite_model_list) as csvfile:
+        tflite_reader = csv.reader(csvfile, delimiter=",")
+        for row in tflite_reader:
+            print("\n")
+            tflite_model_name = row[0]
+            tflite_model_link = row[1]
+            print("tflite_model_name", tflite_model_name)
+            print("tflite_model_link", tflite_model_link)
+            tflite_model_name_dir = os.path.join(
+                WORKDIR, str(tflite_model_name) + "_tflite"
+            )
+            os.makedirs(tflite_model_name_dir, exist_ok=True)
+            print(f"TMP_TFLITE_MODELNAME_DIR = {tflite_model_name_dir}")
+
+            # Preprocess to get SharkImporter input args
+            tflite_preprocessor = TFLitePreprocessor(str(tflite_model_name))
+            raw_model_file_path = tflite_preprocessor.get_raw_model_file()
+            inputs = tflite_preprocessor.get_inputs()
+            tflite_interpreter = tflite_preprocessor.get_interpreter()
+
+            # Use SharkImporter to get SharkInference input args
+            my_shark_importer = SharkImporter(
+                module=tflite_interpreter,
+                inputs=inputs,
+                frontend="tflite",
+                raw_model_file=raw_model_file_path,
+            )
+            my_shark_importer.import_debug(
+                dir=tflite_model_name_dir,
+                model_name=tflite_model_name,
+                func_name="main",
+            )
+            mlir_hash = create_hash(
+                os.path.join(
+                    tflite_model_name_dir,
+                    tflite_model_name + "_tflite" + ".mlir",
+                )
+            )
+            np.save(
+                os.path.join(tflite_model_name_dir, "hash"),
+                np.array(mlir_hash),
+            )
+
+
+# Validates whether the file is present or not.
+def is_valid_file(arg):
+    if not os.path.exists(arg):
+        return None
+    else:
+        return arg
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--torch_model_csv",
+        type=lambda x: is_valid_file(x),
+        default="./tank/pytorch/torch_model_list.csv",
+        help="""Contains the file with torch_model name and args.
+             Please see: https://github.com/nod-ai/SHARK/blob/main/tank/pytorch/torch_model_list.csv""",
+    )
+    parser.add_argument(
+        "--tf_model_csv",
+        type=lambda x: is_valid_file(x),
+        default="./tank/tf/tf_model_list.csv",
+        help="Contains the file with tf model name and args.",
+    )
+    parser.add_argument(
+        "--tflite_model_csv",
+        type=lambda x: is_valid_file(x),
+        default="./tank/tflite/tflite_model_list.csv",
+        help="Contains the file with tf model name and args.",
+    )
+    parser.add_argument("--upload", type=bool, default=False)
+
+    args = parser.parse_args()
+    if args.torch_model_csv:
+        save_torch_model(args.torch_model_csv)
+
+    if args.tf_model_csv:
+        save_tf_model(args.tf_model_csv)
+
+    if args.tflite_model_csv:
+        save_tflite_model(args.tflite_model_csv)
+
+    if args.upload:
+        print("uploading files to gs://shark_tank/")
+        os.system("gsutil cp -r ./gen_shark_tank/* gs://shark_tank/")
--- a/inference/src/dshark.cc
+++ b/inference/src/dshark.cc
--- a/requirements-importer-macos.txt
+++ b/requirements-importer-macos.txt
@@ -20,6 +20,7 @@ tensorflow-metal
 #tf-models-nightly
 #tensorflow-text-nightly
 transformers==4.18.0
+tensorflow-probability
 #jax[cpu]

 # tflitehub dependencies.
--- a/requirements-importer.txt
+++ b/requirements-importer.txt
@@ -15,9 +15,10 @@ iree-tools-tf
 # TensorFlow and JAX.
 gin-config
 tensorflow
-tf-models-nightly
-tensorflow-text-nightly
+#tf-models-nightly
+#tensorflow-text-nightly
 transformers==4.18.0
+#tensorflow-probability
 #jax[cpu]


@@ -29,11 +30,11 @@ lit
 pyyaml

 #ONNX and ORT for benchmarking
--extra-index-url https://test.pypi.org/simple/
-protobuf
-coloredlogs
-flatbuffers
-sympy
-psutil
-onnx-weekly
-ort-nightly
+#--extra-index-url https://test.pypi.org/simple/
+#protobuf
+#coloredlogs
+#flatbuffers
+#sympy
+#psutil
+#onnx-weekly
+#ort-nightly
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,13 @@
 setuptools
 wheel

-#SHARK Runner
+# SHARK Runner
 tqdm

-#Testing
+# SHARK Downloader
+gsutil
+
+# Testing
 pytest
 pytest-xdist
+Pillow
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@ setup(
        "License :: OSI Approved :: MIT License",
        "Operating System :: OS Independent",
    ],
-    packages=find_packages(exclude=('examples')),
+    packages=find_packages(exclude=("examples")),
    python_requires=">=3.7",
    install_requires=[
        "numpy",
--- a/setup_venv.sh
+++ b/setup_venv.sh
@@ -98,7 +98,7 @@ if [[ ! -z "${IMPORTER}" ]]; then
  echo "${Yellow}Installing importer tools.."
  if [[ $(uname -s) = 'Linux' ]]; then
    echo "${Yellow}Linux detected.. installing Linux importer tools"
-    $PYTHON -m pip install --upgrade -r "$TD/requirements-importer.txt" -f https://github.com/${RUNTIME}/releases --extra-index-url https://test.pypi.org/simple/ --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+    $PYTHON -m pip install --upgrade -r "$TD/requirements-importer.txt" -f https://github.com/${RUNTIME}/releases --extra-index-url https://test.pypi.org/simple/ --extra-index-url https://download.pytorch.org/whl/nightly/cu116
  elif [[ $(uname -s) = 'Darwin' ]]; then
    echo "${Yellow}macOS detected.. installing macOS importer tools"
    #Conda seems to have some problems installing these packages and hope they get resolved upstream.
@@ -108,6 +108,16 @@ fi

 $PYTHON -m pip install -e . --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://github.com/llvm/torch-mlir/releases -f https://github.com/${RUNTIME}/releases

+if [[ $(uname -s) = 'Linux' && ! -z "${IMPORTER}" ]]; then
+  $PYTHON -m pip uninstall -y torch torchvision
+  $PYTHON -m pip install --pre torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cu116
+  if [ $? -eq 0 ];then
+    echo "Successfully Installed torch + cu116."
+  else
+    echo "Could not install torch + cu116." >&2
+  fi
+fi
+
 if [[ -z "${CONDA_PREFIX}" ]]; then
  echo "${Green}Before running examples activate venv with:"
  echo "  ${Green}source $VENV_DIR/bin/activate"
--- a/shark/backward_makefx.py
+++ b/shark/backward_makefx.py
@@ -18,12 +18,10 @@ from torch.fx.experimental.proxy_tensor import make_fx
 from torch.nn.utils import _stateless

 from torch import fx
-import copy
 import tempfile


 class MakeFxModule:
-
    def __init__(self, model, inputs, labels=None, custom_inference_fn=None):
        self.model = model
        self.inputs = inputs
@@ -53,20 +51,28 @@ class MakeFxModule:
        return fx_g

    def generate_graph(self):
-        fx_g = make_fx(self.custom_inference_fn,
-                       decomposition_table=get_decompositions([
-                           torch.ops.aten.embedding_dense_backward,
-                           torch.ops.aten.native_layer_norm_backward,
-                           torch.ops.aten.slice_backward,
-                           torch.ops.aten.select_backward
-                       ]))(dict(self.model.named_parameters()),
-                           dict(self.model.named_buffers()), self.inputs)
+        fx_g = make_fx(
+            self.custom_inference_fn,
+            decomposition_table=get_decompositions(
+                [
+                    torch.ops.aten.embedding_dense_backward,
+                    torch.ops.aten.native_layer_norm_backward,
+                    torch.ops.aten.slice_backward,
+                    torch.ops.aten.select_backward,
+                ]
+            ),
+        )(
+            dict(self.model.named_parameters()),
+            dict(self.model.named_buffers()),
+            self.inputs,
+        )
        fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
        fx_g.recompile()
        fx_g = self.change_fx_graph_return_to_tuple(fx_g)
        ts_g = torch.jit.script(fx_g)
-        temp = tempfile.NamedTemporaryFile(suffix='_shark_ts',
-                                           prefix='temp_ts_')
+        temp = tempfile.NamedTemporaryFile(
+            suffix="_shark_ts", prefix="temp_ts_"
+        )
        ts_g.save(temp.name)
        new_ts = torch.jit.load(temp.name)
        self.training_graph = new_ts
--- a/shark/examples/shark_eager/dynamo_demo.py
+++ b/shark/examples/shark_eager/dynamo_demo.py
@@ -8,7 +8,9 @@ try:
    from torchdynamo.optimizations.backends import create_backend
    from torchdynamo.optimizations.subgraph import SubGraph
 except ModuleNotFoundError:
-    print("Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo")
+    print(
+        "Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo"
+    )
    exit()

 NUM_ITERS = 10
@@ -24,7 +26,9 @@ def __torch_mlir(fx_graph, *args, **kwargs):

        for node in fx_g.graph.nodes:
            if node.op == "output":
-                assert len(node.args) == 1, "Output node must have a single argument"
+                assert (
+                    len(node.args) == 1
+                ), "Output node must have a single argument"
                node_arg = node.args[0]
                if isinstance(node_arg, tuple) and len(node_arg) == 1:
                    node.args = (node_arg[0],)
@@ -41,8 +45,12 @@ def __torch_mlir(fx_graph, *args, **kwargs):
    if len(args) == 1 and isinstance(args[0], list):
        args = args[0]

-    linalg_module = compile(ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS)
-    callable, _ = get_iree_compiled_module(linalg_module, "cuda", func_name="forward")
+    linalg_module = compile(
+        ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS
+    )
+    callable, _ = get_iree_compiled_module(
+        linalg_module, "cuda", func_name="forward"
+    )

    def forward(*inputs):
        return callable(*inputs)
--- a/shark/examples/shark_inference/CLIPModel_tf.py
+++ b/shark/examples/shark_inference/CLIPModel_tf.py
@@ -9,23 +9,24 @@ from shark.shark_inference import SharkInference
 clip_vit_inputs = [
    tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
    tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
-    tf.TensorSpec(shape=[1, 3, 224, 224], dtype=tf.float32)
+    tf.TensorSpec(shape=[1, 3, 224, 224], dtype=tf.float32),
 ]


 class CLIPModule(tf.Module):
-
    def __init__(self):
        super(CLIPModule, self).__init__()
        self.m = TFCLIPModel.from_pretrained("openai/clip-vit-base-patch32")

        self.m.predict = lambda x, y, z: self.m(
-            input_ids=x, attention_mask=y, pixel_values=z)
+            input_ids=x, attention_mask=y, pixel_values=z
+        )

    @tf.function(input_signature=clip_vit_inputs)
    def forward(self, input_ids, attention_mask, pixel_values):
-        return self.m.predict(input_ids, attention_mask,
-                              pixel_values).logits_per_image
+        return self.m.predict(
+            input_ids, attention_mask, pixel_values
+        ).logits_per_image


 if __name__ == "__main__":
@@ -35,17 +36,30 @@ if __name__ == "__main__":
    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    image = Image.open(requests.get(url, stream=True).raw)

-    inputs = processor(text=["a photo of a cat", "a photo of a dog"],
-                       images=image,
-                       return_tensors="tf",
-                       padding=True)
+    inputs = processor(
+        text=["a photo of a cat", "a photo of a dog"],
+        images=image,
+        return_tensors="tf",
+        padding=True,
+    )

    shark_module = SharkInference(
        CLIPModule(),
-        (inputs["input_ids"], inputs["attention_mask"], inputs["pixel_values"]))
+        (
+            inputs["input_ids"],
+            inputs["attention_mask"],
+            inputs["pixel_values"],
+        ),
+    )
    shark_module.set_frontend("tensorflow")
    shark_module.compile()

    print(
-        shark_module.forward((inputs["input_ids"], inputs["attention_mask"],
-                              inputs["pixel_values"])))
+        shark_module.forward(
+            (
+                inputs["input_ids"],
+                inputs["attention_mask"],
+                inputs["pixel_values"],
+            )
+        )
+    )
--- a/shark/examples/shark_inference/gpt2_tf.py
+++ b/shark/examples/shark_inference/gpt2_tf.py
@@ -13,7 +13,6 @@ gpt2_inputs = [


 class GPT2Module(tf.Module):
-
    def __init__(self):
        super(GPT2Module, self).__init__()
        self.m = TFGPT2Model.from_pretrained("distilgpt2")
@@ -30,9 +29,12 @@ if __name__ == "__main__":
    tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
    text = "I love the distilled version of models."

-    inputs = tokenizer(text, return_tensors='tf')
+    inputs = tokenizer(text, return_tensors="tf")
    shark_module = SharkInference(
-        GPT2Module(), (inputs["input_ids"], inputs["attention_mask"]))
+        GPT2Module(), (inputs["input_ids"], inputs["attention_mask"])
+    )
    shark_module.set_frontend("tensorflow")
    shark_module.compile()
-    print(shark_module.forward((inputs["input_ids"], inputs["attention_mask"])))
+    print(
+        shark_module.forward((inputs["input_ids"], inputs["attention_mask"]))
+    )
--- a/shark/examples/shark_inference/mhlo_example.py
+++ b/shark/examples/shark_inference/mhlo_example.py
@@ -12,7 +12,26 @@ mhlo_ir = r"""builtin.module  {
 arg0 = np.ones((1, 4)).astype(np.float32)
 arg1 = np.ones((4, 1)).astype(np.float32)

-shark_module = SharkInference(mhlo_ir, (arg0, arg1))
-shark_module.set_frontend("mhlo")
+print("Running shark on cpu backend")
+shark_module = SharkInference(
+    mhlo_ir, function_name="forward", device="cpu", mlir_dialect="mhlo"
+)
+
+# Generate the random inputs and feed into the graph.
+x = shark_module.generate_random_inputs()
 shark_module.compile()
-print(shark_module.forward((arg0, arg1)))
+print(shark_module.forward(x))
+
+print("Running shark on cuda backend")
+shark_module = SharkInference(
+    mhlo_ir, function_name="forward", device="cuda", mlir_dialect="mhlo"
+)
+shark_module.compile()
+print(shark_module.forward(x))
+
+print("Running shark on vulkan backend")
+shark_module = SharkInference(
+    mhlo_ir, function_name="forward", device="vulkan", mlir_dialect="mhlo"
+)
+shark_module.compile()
+print(shark_module.forward(x))
--- a/shark/examples/shark_inference/minilm_benchmark.py
+++ b/shark/examples/shark_inference/minilm_benchmark.py
@@ -7,17 +7,13 @@ tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")


 class MiniLMSequenceClassification(torch.nn.Module):
-
    def __init__(self):
        super().__init__()
        self.model = AutoModelForSequenceClassification.from_pretrained(
            "microsoft/MiniLM-L12-H384-uncased",  # The pretrained model.
-            num_labels=
-            2,  # The number of output labels--2 for binary classification.
-            output_attentions=
-            False,  # Whether the model returns attentions weights.
-            output_hidden_states=
-            False,  # Whether the model returns all hidden-states.
+            num_labels=2,  # The number of output labels--2 for binary classification.
+            output_attentions=False,  # Whether the model returns attentions weights.
+            output_hidden_states=False,  # Whether the model returns all hidden-states.
            torchscript=True,
        )

@@ -27,9 +23,12 @@ class MiniLMSequenceClassification(torch.nn.Module):

 test_input = torch.randint(2, (1, 128))

-shark_module = SharkInference(MiniLMSequenceClassification(), (test_input,),
-                              jit_trace=True,
-                              benchmark_mode=True)
+shark_module = SharkInference(
+    MiniLMSequenceClassification(),
+    (test_input,),
+    jit_trace=True,
+    benchmark_mode=True,
+)

 shark_module.compile()
 shark_module.forward((test_input,))
--- a/shark/examples/shark_inference/minilm_benchmark_tf.py
+++ b/shark/examples/shark_inference/minilm_benchmark_tf.py
@@ -9,21 +9,22 @@ BATCH_SIZE = 1
 bert_input = [
    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
+    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
 ]


 class BertModule(tf.Module):
-
    def __init__(self):
        super(BertModule, self).__init__()
        # Create a BERT trainer with the created network.
        self.m = TFBertModel.from_pretrained(
-            "microsoft/MiniLM-L12-H384-uncased", from_pt=True)
+            "microsoft/MiniLM-L12-H384-uncased", from_pt=True
+        )

        # Invoke the trainer model on the inputs. This causes the layer to be built.
        self.m.predict = lambda x, y, z: self.m.call(
-            input_ids=x, attention_mask=y, token_type_ids=z, training=False)
+            input_ids=x, attention_mask=y, token_type_ids=z, training=False
+        )

    @tf.function(input_signature=bert_input)
    def forward(self, input_ids, attention_mask, token_type_ids):
@@ -33,22 +34,28 @@ class BertModule(tf.Module):
 if __name__ == "__main__":
    # Prepping Data
    tokenizer = BertTokenizer.from_pretrained(
-        "microsoft/MiniLM-L12-H384-uncased")
+        "microsoft/MiniLM-L12-H384-uncased"
+    )
    text = "Replace me by any text you'd like."
-    encoded_input = tokenizer(text,
-                              padding='max_length',
-                              truncation=True,
-                              max_length=MAX_SEQUENCE_LENGTH)
+    encoded_input = tokenizer(
+        text,
+        padding="max_length",
+        truncation=True,
+        max_length=MAX_SEQUENCE_LENGTH,
+    )
    for key in encoded_input:
        encoded_input[key] = tf.expand_dims(
-            tf.convert_to_tensor(encoded_input[key]), 0)
+            tf.convert_to_tensor(encoded_input[key]), 0
+        )

-    test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
-         encoded_input["token_type_ids"])
+    test_input = (
+        encoded_input["input_ids"],
+        encoded_input["attention_mask"],
+        encoded_input["token_type_ids"],
+    )
    shark_module = SharkInference(
-        BertModule(),
-        test_input,
-        benchmark_mode=True)
+        BertModule(), test_input, benchmark_mode=True
+    )
    shark_module.set_frontend("tensorflow")
    shark_module.compile()
    shark_module.benchmark_all(test_input)
--- a/shark/examples/shark_inference/minilm_jit.py
+++ b/shark/examples/shark_inference/minilm_jit.py
@@ -1,35 +1,24 @@
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from shark.shark_inference import SharkInference
-
-torch.manual_seed(0)
-tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
+from shark.shark_downloader import download_torch_model


-class MiniLMSequenceClassification(torch.nn.Module):
-
-    def __init__(self):
-        super().__init__()
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            "microsoft/MiniLM-L12-H384-uncased",  # The pretrained model.
-            num_labels=
-            2,  # The number of output labels--2 for binary classification.
-            output_attentions=
-            False,  # Whether the model returns attentions weights.
-            output_hidden_states=
-            False,  # Whether the model returns all hidden-states.
-            torchscript=True,
-        )
-
-    def forward(self, tokens):
-        return self.model.forward(tokens)[0]
+mlir_model, func_name, inputs, golden_out = download_torch_model(
+    "microsoft/MiniLM-L12-H384-uncased"
+)


-test_input = torch.randint(2, (1, 128))
-
-shark_module = SharkInference(MiniLMSequenceClassification(), (test_input,),
-                              jit_trace=True)
-
+shark_module = SharkInference(
+    mlir_model, func_name, device="cpu", mlir_dialect="linalg"
+)
 shark_module.compile()
-result = shark_module.forward((test_input,))
-print("Obtained result", result)
+result = shark_module.forward(inputs)
+print("The obtained result via shark is: ", result)
+print("The golden result is:", golden_out)
+
+
+# Let's generate random inputs, currently supported
+# for static models.
+rand_inputs = shark_module.generate_random_inputs()
+rand_results = shark_module.forward(rand_inputs)
+
+print("Running shark_module with random_inputs is: ", rand_results)
--- a/shark/examples/shark_inference/minilm_tf.py
+++ b/shark/examples/shark_inference/minilm_tf.py
@@ -9,21 +9,22 @@ BATCH_SIZE = 1
 bert_input = [
    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
+    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
 ]


 class BertModule(tf.Module):
-
    def __init__(self):
        super(BertModule, self).__init__()
        # Create a BERT trainer with the created network.
        self.m = TFBertModel.from_pretrained(
-            "microsoft/MiniLM-L12-H384-uncased", from_pt=True)
+            "microsoft/MiniLM-L12-H384-uncased", from_pt=True
+        )

        # Invoke the trainer model on the inputs. This causes the layer to be built.
        self.m.predict = lambda x, y, z: self.m.call(
-            input_ids=x, attention_mask=y, token_type_ids=z, training=False)
+            input_ids=x, attention_mask=y, token_type_ids=z, training=False
+        )

    @tf.function(input_signature=bert_input)
    def forward(self, input_ids, attention_mask, token_type_ids):
@@ -33,24 +34,37 @@ class BertModule(tf.Module):
 if __name__ == "__main__":
    # Prepping Data
    tokenizer = BertTokenizer.from_pretrained(
-        "microsoft/MiniLM-L12-H384-uncased")
+        "microsoft/MiniLM-L12-H384-uncased"
+    )
    text = "Replace me by any text you'd like."
-    encoded_input = tokenizer(text,
-                              padding='max_length',
-                              truncation=True,
-                              max_length=MAX_SEQUENCE_LENGTH)
+    encoded_input = tokenizer(
+        text,
+        padding="max_length",
+        truncation=True,
+        max_length=MAX_SEQUENCE_LENGTH,
+    )
    for key in encoded_input:
        encoded_input[key] = tf.expand_dims(
-            tf.convert_to_tensor(encoded_input[key]), 0)
+            tf.convert_to_tensor(encoded_input[key]), 0
+        )

    shark_module = SharkInference(
        BertModule(),
-        (encoded_input["input_ids"], encoded_input["attention_mask"],
-         encoded_input["token_type_ids"]))
+        (
+            encoded_input["input_ids"],
+            encoded_input["attention_mask"],
+            encoded_input["token_type_ids"],
+        ),
+    )
    shark_module.set_frontend("tensorflow")
    shark_module.compile()

    print(
        shark_module.forward(
-            (encoded_input["input_ids"], encoded_input["attention_mask"],
-             encoded_input["token_type_ids"])))
+            (
+                encoded_input["input_ids"],
+                encoded_input["attention_mask"],
+                encoded_input["token_type_ids"],
+            )
+        )
+    )
--- a/shark/examples/shark_inference/resnest.py
+++ b/shark/examples/shark_inference/resnest.py
@@ -0,0 +1,41 @@
+import torch
+import torchvision.models as models
+from shark.shark_inference import SharkInference
+from shark.shark_importer import SharkImporter
+
+torch.hub.list("zhanghang1989/ResNeSt", force_reload=True)
+
+
+class ResnestModule(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.model = torch.hub.load(
+            "zhanghang1989/ResNeSt", "resnest50", pretrained=True
+        )
+        self.model.eval()
+
+    def forward(self, input):
+        return self.model.forward(input)
+
+
+input = torch.randn(1, 3, 224, 224)
+
+
+mlir_importer = SharkImporter(
+    ResnestModule(),
+    (input),
+    frontend="torch",
+)
+
+(vision_mlir, func_name), inputs, golden_out = mlir_importer.import_debug(
+    tracing_required=True
+)
+
+print(golden_out)
+
+shark_module = SharkInference(
+    vision_mlir, func_name, device="cpu", mlir_dialect="linalg"
+)
+shark_module.compile()
+result = shark_module.forward((input))
+print("Obtained result", result)
--- a/shark/examples/shark_inference/resnet50_script.py
+++ b/shark/examples/shark_inference/resnet50_script.py
@@ -5,24 +5,28 @@ import torchvision.models as models
 from torchvision import transforms
 import sys
 from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_torch_model


 ################################## Preprocessing inputs and model ############
 def load_and_preprocess_image(url: str):
    headers = {
-        "User-Agent":
-            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
    }
-    img = Image.open(requests.get(url, headers=headers,
-                                  stream=True).raw).convert("RGB")
+    img = Image.open(
+        requests.get(url, headers=headers, stream=True).raw
+    ).convert("RGB")
    # preprocessing pipeline
-    preprocess = transforms.Compose([
-        transforms.Resize(256),
-        transforms.CenterCrop(224),
-        transforms.ToTensor(),
-        transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                             std=[0.229, 0.224, 0.225]),
-    ])
+    preprocess = transforms.Compose(
+        [
+            transforms.Resize(256),
+            transforms.CenterCrop(224),
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+            ),
+        ]
+    )
    img_preprocessed = preprocess(img)
    return torch.unsqueeze(img_preprocessed, 0)

@@ -44,7 +48,6 @@ def top3_possibilities(res):


 class Resnet50Module(torch.nn.Module):
-
    def __init__(self):
        super().__init__()
        self.resnet = models.resnet50(pretrained=True)
@@ -61,18 +64,16 @@ labels = load_labels()

 ##############################################################################

-input = torch.randn(1, 3, 224, 224)
-print(input.shape)
-
-## The img is passed to determine the input shape.
-shark_module = SharkInference(Resnet50Module(), (img,))
-shark_module.compile()

 ## Can pass any img or input to the forward module.
-results = shark_module.forward((img,))
+mlir_model, func_name, inputs, golden_out = download_torch_model("resnet50")
+
+shark_module = SharkInference(mlir_model, func_name, mlir_dialect="linalg")
+shark_module.compile()
+result = shark_module.forward((img.detach().numpy(),))

 print("The top 3 results obtained via shark_runner is:")
-print(top3_possibilities(torch.from_numpy(results)))
+print(top3_possibilities(torch.from_numpy(result)))

 print()

--- a/shark/examples/shark_inference/t5_tf.py
+++ b/shark/examples/shark_inference/t5_tf.py
@@ -11,12 +11,12 @@ t5_inputs = [
    tf.TensorSpec(shape=[1, 10], dtype=tf.int32),
 ]

-class T5Module(tf.Module):

+class T5Module(tf.Module):
    def __init__(self):
        super(T5Module, self).__init__()
        self.m = TFT5Model.from_pretrained("t5-small")
-        self.m.predict = lambda x,y: self.m(input_ids=x, decoder_input_ids=y)
+        self.m.predict = lambda x, y: self.m(input_ids=x, decoder_input_ids=y)

    @tf.function(input_signature=t5_inputs)
    def forward(self, input_ids, decoder_input_ids):
@@ -27,12 +27,9 @@ if __name__ == "__main__":
    # Prepping Data
    tokenizer = T5Tokenizer.from_pretrained("t5-small")
    text = "I love the distilled version of models."
-    inputs = tokenizer(
-        text, return_tensors="tf"
-    ).input_ids
+    inputs = tokenizer(text, return_tensors="tf").input_ids

-    shark_module = SharkInference(
-        T5Module(), (inputs, inputs))
+    shark_module = SharkInference(T5Module(), (inputs, inputs))
    shark_module.set_frontend("tensorflow")
    shark_module.compile()
-    print(shark_module.forward((inputs,inputs)))
+    print(shark_module.forward((inputs, inputs)))
--- a/shark/examples/shark_inference/torch_vision_models_script.py
+++ b/shark/examples/shark_inference/torch_vision_models_script.py
@@ -4,7 +4,6 @@ from shark.shark_inference import SharkInference


 class VisionModule(torch.nn.Module):
-
    def __init__(self, model):
        super().__init__()
        self.model = model
--- a/shark/examples/shark_inference/unet_script.py
+++ b/shark/examples/shark_inference/unet_script.py
@@ -4,7 +4,6 @@ from shark_runner import SharkInference

 # Currently not supported aten.transpose_conv2d missing.
 class UnetModule(torch.nn.Module):
-
    def __init__(self):
        super().__init__()
        self.model = torch.hub.load(
--- a/shark/examples/shark_inference/v_diffusion.py
+++ b/shark/examples/shark_inference/v_diffusion.py
@@ -0,0 +1,13 @@
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_torch_model
+
+
+mlir_model, func_name, inputs, golden_out = download_torch_model("v_diffusion")
+
+shark_module = SharkInference(
+    mlir_model, func_name, device="vulkan", mlir_dialect="linalg"
+)
+shark_module.compile()
+result = shark_module.forward(inputs)
+print("The obtained result via shark is: ", result)
+print("The golden result is:", golden_out)
--- a/shark/examples/shark_training/bert_training.py
+++ b/shark/examples/shark_training/bert_training.py
@@ -5,17 +5,13 @@ from shark.shark_runner import SharkTrainer


 class MiniLMSequenceClassification(torch.nn.Module):
-
    def __init__(self):
        super().__init__()
        self.model = AutoModelForSequenceClassification.from_pretrained(
            "microsoft/MiniLM-L12-H384-uncased",  # The pretrained model.
-            num_labels=
-            2,  # The number of output labels--2 for binary classification.
-            output_attentions=
-            False,  # Whether the model returns attentions weights.
-            output_hidden_states=
-            False,  # Whether the model returns all hidden-states.
+            num_labels=2,  # The number of output labels--2 for binary classification.
+            output_attentions=False,  # Whether the model returns attentions weights.
+            output_hidden_states=False,  # Whether the model returns all hidden-states.
            torchscript=True,
        )

@@ -37,8 +33,9 @@ inp = (torch.randint(2, (1, 128)),)

 def forward(params, buffers, args):
    params_and_buffers = {**params, **buffers}
-    _stateless.functional_call(mod, params_and_buffers, args,
-                               {}).sum().backward()
+    _stateless.functional_call(
+        mod, params_and_buffers, args, {}
+    ).sum().backward()
    optim = torch.optim.SGD(get_sorted_params(params), lr=0.01)
    # optim.load_state_dict(optim_state)
    optim.step()
--- a/shark/examples/shark_training/bert_training_load_tf.py
+++ b/shark/examples/shark_training/bert_training_load_tf.py
@@ -11,7 +11,8 @@ parser.add_argument(
    "--download_mlir_path",
    type=str,
    default="bert_tf_training.mlir",
-    help="Specifies path to target mlir file that will be loaded.")
+    help="Specifies path to target mlir file that will be loaded.",
+)
 load_args, unknown = parser.parse_known_args()

 tf.random.set_seed(0)
@@ -25,20 +26,30 @@ if __name__ == "__main__":
    predict_sample_input = [
        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
+        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
    ]
    file_link = "https://storage.googleapis.com/shark_tank/users/stanley/bert_tf_training.mlir"
    response = request.urlretrieve(file_link, load_args.download_mlir_path)
-    sample_input_tensors = [tf.convert_to_tensor(val, dtype=tf.int32) for val in predict_sample_input]
+    sample_input_tensors = [
+        tf.convert_to_tensor(val, dtype=tf.int32)
+        for val in predict_sample_input
+    ]
    num_iter = 10
    if not os.path.isfile(load_args.download_mlir_path):
-        raise ValueError(f"Tried looking for target mlir in {load_args.download_mlir_path}, but cannot be found.")
+        raise ValueError(
+            f"Tried looking for target mlir in {load_args.download_mlir_path}, but cannot be found."
+        )
    with open(load_args.download_mlir_path, "rb") as input_file:
        bert_mlir = input_file.read()
    shark_module = SharkTrainer(
        bert_mlir,
-        (sample_input_tensors,
-         tf.convert_to_tensor(np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32)))
+        (
+            sample_input_tensors,
+            tf.convert_to_tensor(
+                np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32
+            ),
+        ),
+    )
    shark_module.set_frontend("mhlo")
    shark_module.compile()
    start = time.time()
--- a/shark/examples/shark_training/bert_training_tf.py
+++ b/shark/examples/shark_training/bert_training_tf.py
@@ -1,10 +1,7 @@
-import sys
 from absl import app
 import time

 import numpy as np
-import os
-import tempfile
 import tensorflow as tf

 from official.nlp.modeling import layers
@@ -28,31 +25,35 @@ bert_input = [


 class BertModule(tf.Module):
-
    def __init__(self):
        super(BertModule, self).__init__()
        dict_outputs = False
-        test_network = networks.BertEncoder(vocab_size=vocab_size,
-                                            num_layers=2,
-                                            dict_outputs=dict_outputs)
+        test_network = networks.BertEncoder(
+            vocab_size=vocab_size, num_layers=2, dict_outputs=dict_outputs
+        )

        # Create a BERT trainer with the created network.
        bert_trainer_model = bert_classifier.BertClassifier(
-            test_network, num_classes=NUM_CLASSES)
+            test_network, num_classes=NUM_CLASSES
+        )
        bert_trainer_model.summary()

        # Invoke the trainer model on the inputs. This causes the layer to be built.
        self.m = bert_trainer_model
        self.m.predict = lambda x: self.m.call(x, training=False)
-        self.predict = tf.function(input_signature=[bert_input])(self.m.predict)
+        self.predict = tf.function(input_signature=[bert_input])(
+            self.m.predict
+        )
        self.m.learn = lambda x, y: self.m.call(x, training=False)
        self.loss = tf.keras.losses.SparseCategoricalCrossentropy()
        self.optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2)

-    @tf.function(input_signature=[
-        bert_input,  # inputs
-        tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32)  # labels
-    ])
+    @tf.function(
+        input_signature=[
+            bert_input,  # inputs
+            tf.TensorSpec(shape=[BATCH_SIZE], dtype=tf.int32),  # labels
+        ]
+    )
    def forward(self, inputs, labels):
        with tf.GradientTape() as tape:
            # Capture the gradients from forward prop...
@@ -70,14 +71,22 @@ if __name__ == "__main__":
    predict_sample_input = [
        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
-        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH))
+        np.random.randint(5, size=(BATCH_SIZE, SEQUENCE_LENGTH)),
+    ]
+    sample_input_tensors = [
+        tf.convert_to_tensor(val, dtype=tf.int32)
+        for val in predict_sample_input
    ]
-    sample_input_tensors = [tf.convert_to_tensor(val, dtype=tf.int32) for val in predict_sample_input]
    num_iter = 10
    shark_module = SharkTrainer(
        BertModule(),
-        (sample_input_tensors,
-         tf.convert_to_tensor(np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32)))
+        (
+            sample_input_tensors,
+            tf.convert_to_tensor(
+                np.random.randint(5, size=(BATCH_SIZE)), dtype=tf.int32
+            ),
+        ),
+    )
    shark_module.set_frontend("tensorflow")
    shark_module.compile()
    start = time.time()
--- a/shark/examples/shark_training/neural_net_training.py
+++ b/shark/examples/shark_training/neural_net_training.py
@@ -4,7 +4,6 @@ from shark.shark_trainer import SharkTrainer


 class Foo(torch.nn.Module):
-
    def __init__(self):
        super(Foo, self).__init__()
        self.l1 = torch.nn.Linear(10, 16)
@@ -28,8 +27,9 @@ def get_sorted_params(named_params):

 def forward(params, buffers, args):
    params_and_buffers = {**params, **buffers}
-    _stateless.functional_call(mod, params_and_buffers, args,
-                               {}).sum().backward()
+    _stateless.functional_call(
+        mod, params_and_buffers, args, {}
+    ).sum().backward()
    optim = torch.optim.SGD(get_sorted_params(params), lr=0.01)
    optim.step()
    return params, buffers
--- a/shark/iree_eager_backend.py
+++ b/shark/iree_eager_backend.py
@@ -28,9 +28,14 @@ from torch_mlir.eager_mode.torch_mlir_eager_backend import (
    TorchMLIREagerBackend,
    TensorMetaData,
 )
-from torch_mlir_e2e_test.eager_backends.refbackend import NUMPY_TO_TORCH_DTYPE_DICT
+from torch_mlir_e2e_test.eager_backends.refbackend import (
+    NUMPY_TO_TORCH_DTYPE_DICT,
+)

-from shark.iree_utils import get_iree_compiled_module, IREE_DEVICE_MAP
+from shark.iree_utils.compile_utils import (
+    get_iree_compiled_module,
+    IREE_DEVICE_MAP,
+)


 class EagerModeIREELinalgOnTensorsBackend(TorchMLIREagerBackend):
@@ -46,15 +51,16 @@ class EagerModeIREELinalgOnTensorsBackend(TorchMLIREagerBackend):
        self.iree_device_str = IREE_DEVICE_MAP[device]
        self.config = ireert.Config(self.iree_device_str)

-    def get_torch_metadata(self, tensor: DeviceArray,
-                           kwargs: Dict[str, Any]) -> TensorMetaData:
+    def get_torch_metadata(
+        self, tensor: DeviceArray, kwargs: Dict[str, Any]
+    ) -> TensorMetaData:
        return TensorMetaData(
            size=tensor.shape,
            dtype=NUMPY_TO_TORCH_DTYPE_DICT[tensor.dtype.type],
            device=torch.device(self.torch_device_str),
            requires_grad=tensor.dtype.type
-            in {np.float, np.float32, np.float64} and
-            kwargs.get("requires_grad", False),
+            in {np.float, np.float32, np.float64}
+            and kwargs.get("requires_grad", False),
        )

    def compile(self, imported_module: Module):
@@ -64,9 +70,9 @@ class EagerModeIREELinalgOnTensorsBackend(TorchMLIREagerBackend):
            "torch-function-to-torch-backend-pipeline,torch-backend-to-linalg-on-tensors-backend-pipeline",
            "EagerMode",
        )
-        callable, _ = get_iree_compiled_module(imported_module,
-                                               self.iree_device_str,
-                                               func_name=fn_name)
+        callable, _ = get_iree_compiled_module(
+            imported_module, self.iree_device_str, func_name=fn_name
+        )
        return callable

    def copy_into(self, dst, src):
@@ -76,6 +82,7 @@ class EagerModeIREELinalgOnTensorsBackend(TorchMLIREagerBackend):
    def transfer_from_device_to_torch(self, e):
        return torch.from_numpy(e.to_host())

-    def transfer_from_torch_to_device(self,
-                                      tensor: torch.Tensor) -> DeviceArray:
+    def transfer_from_torch_to_device(
+        self, tensor: torch.Tensor
+    ) -> DeviceArray:
        return iree.runtime.asdevicearray(self.config.device, tensor.numpy())
--- a/shark/iree_utils.py
+++ b/shark/iree_utils.py
@@ -1,359 +0,0 @@
-# Copyright 2020 The Nod Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import iree.runtime as ireert
-import iree.runtime.scripts.iree_benchmark_module as benchmark_module
-import iree.compiler as ireec
-from shark.torch_mlir_utils import get_module_name_for_asm_dump
-from shark.cuda_utils import get_cuda_sm_cc
-from shark.model_annotation import *
-import subprocess
-import numpy as np
-import os
-import re
-import sys
-
-IREE_DEVICE_MAP = {
-    "cpu": "local-task",
-    "gpu": "cuda",
-    "cuda": "cuda",
-    "vulkan": "vulkan",
-    "metal": "vulkan",
-    "rocm": "rocm"
-}
-
-IREE_TARGET_MAP = {
-    "cpu": "dylib",
-    "gpu": "cuda",
-    "cuda": "cuda",
-    "vulkan": "vulkan",
-    "metal": "vulkan",
-    "rocm": "rocm"
-}
-
-UNIT_TO_SECOND_MAP = {"ms": 0.001, "s": 1}
-
-
-def check_device_drivers(device):
-    """Checks necessary drivers present for gpu and vulkan devices"""
-    if (device in ["gpu", "cuda"]):
-        try:
-            subprocess.check_output('nvidia-smi')
-        except Exception:
-            return True
-    elif (device in ["metal", "vulkan"]):
-        try:
-            subprocess.check_output('vulkaninfo')
-        except Exception:
-            return True
-    elif (device == "cpu"):
-        return False
-    # Unknown device.
-    else:
-        return True
-
-    return False
-
-
-def get_iree_cpu_args():
-    find_triple_cmd = "uname -s -m"
-    os_name, proc_name = subprocess.run(
-        find_triple_cmd, shell=True, stdout=subprocess.PIPE,
-        check=True).stdout.decode('utf-8').split()
-    if os_name == "Darwin":
-        find_kernel_version_cmd = "uname -r"
-        kernel_version = subprocess.run(find_kernel_version_cmd,
-                                        shell=True,
-                                        stdout=subprocess.PIPE,
-                                        check=True).stdout.decode('utf-8')
-        target_triple = f"{proc_name}-apple-darwin{kernel_version}"
-    elif os_name == "Linux":
-        target_triple = f"{proc_name}-linux-gnu"
-    else:
-        error_message = f"OS Type f{os_name} not supported and triple can't be determined, open issue to dSHARK team please :)"
-        raise Exception(error_message)
-    print(f"Target triple found:{target_triple}")
-    return [f"-iree-llvm-target-triple={target_triple}"]
-
-
-def get_iree_gpu_args():
-    ireert.flags.FUNCTION_INPUT_VALIDATION = False
-    ireert.flags.parse_flags("--cuda_allow_inline_execution")
-    sm_arch = get_cuda_sm_cc()
-    if sm_arch in ['sm_70', 'sm_72', 'sm_75', 'sm_80', 'sm_84', 'sm_86']:
-        return [
-            "--iree-hal-cuda-disable-loop-nounroll-wa",
-            f"--iree-hal-cuda-llvm-target-arch={sm_arch}"
-        ]
-    else:
-        return ["--iree-hal-cuda-disable-loop-nounroll-wa"]
-
-
-def get_vulkan_triple_flag():
-    vulkan_device_cmd = "vulkaninfo | grep deviceName | awk \'END{{print $NF}}\'"
-    vulkan_device = run_cmd(vulkan_device_cmd).strip()
-    if vulkan_device == "M1":
-        print("Found Apple Device. Using m1-moltenvk-macos")
-        return "-iree-vulkan-target-triple=m1-moltenvk-macos"
-    elif vulkan_device == "A100-SXM4-40GB":
-        print("Found Nvidia Device. Using ampere-rtx3080-linux")
-        return "-iree-vulkan-target-triple=ampere-rtx3080-linux"
-    else:
-        print(
-            "Optimized kernel for your target device is not added yet. Contact SHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u] or pull up an issue."
-        )
-        return None
-
-
-def get_iree_vulkan_args():
-    #vulkan_flag = ["--iree-flow-demote-i64-to-i32"]
-    vulkan_flag = []
-    vulkan_triple_flag = get_vulkan_triple_flag()
-    if vulkan_triple_flag is not None:
-        vulkan_flag.append(vulkan_triple_flag)
-    return vulkan_flag
-
-
-def get_iree_device_args(device):
-    if device == "cpu":
-        return get_iree_cpu_args()
-    if device in ["gpu", "cuda"]:
-        return get_iree_gpu_args()
-    if device in ["metal", "vulkan"]:
-        return get_iree_vulkan_args()
-    return []
-
-
-def get_iree_frontend_args(frontend):
-    if frontend in ["torch", "pytorch", "linalg"]:
-        return ["--iree-llvm-target-cpu-features=host"]
-    elif frontend in ["tensorflow", "tf", "mhlo"]:
-        return [
-            "--iree-llvm-target-cpu-features=host",
-            "--iree-mhlo-demote-i64-to-i32=false",
-            "--iree-flow-demote-i64-to-i32"
-        ]
-    else:
-        # Frontend not found.
-        return []
-
-
-def compile_module_to_flatbuffer(module, device, frontend, func_name,
-                                 model_config_path):
-    # Setup Compile arguments wrt to frontends.
-    input_type = ""
-    args = get_iree_frontend_args(frontend)
-    args += get_iree_device_args(device)
-
-    if frontend in ["tensorflow", "tf"]:
-        input_type = "mhlo"
-    elif frontend in ["mhlo", "tosa"]:
-        input_type = frontend
-    elif frontend in ["tflite"]:
-        input_type = "tosa"
-
-    # Annotate the input module with the configs
-    if model_config_path != None:
-        # Currently tuned model only works on tf frontend
-        if frontend in ["tensorflow", "tf"]:
-            input_module = module.decode('utf-8')
-        elif frontend in ["pytorch", "torch"]:
-            input_module = module.operation.get_asm()
-        with create_context() as ctx:
-            module = model_annotation(ctx,
-                                      input_contents=input_module,
-                                      config_path=model_config_path)
-            module = str(module)
-
-    # Compile according to the input type, else just try compiling.
-    if input_type not in ["mhlo", "tosa"]:
-        module = str(module)
-    if input_type != "":
-        # Currently for MHLO/TOSA.
-        flatbuffer_blob = ireec.compile_str(
-            module,
-            target_backends=[IREE_TARGET_MAP[device]],
-            extra_args=args,
-            input_type=input_type)
-    else:
-        # Currently for Torch.
-        flatbuffer_blob = ireec.compile_str(
-            str(module),
-            target_backends=[IREE_TARGET_MAP[device]],
-            extra_args=args)
-    return flatbuffer_blob
-
-
-def get_iree_module(flatbuffer_blob, device, func_name):
-    vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
-    config = ireert.Config(IREE_DEVICE_MAP[device])
-    ctx = ireert.SystemContext(config=config)
-    ctx.add_vm_module(vm_module)
-    ModuleCompiled = ctx.modules.module[func_name]
-    return ModuleCompiled, config
-
-
-def get_iree_compiled_module(module,
-                             device: str,
-                             frontend: str = "torch",
-                             func_name: str = "forward",
-                             model_config_path: str = None):
-    """Given a module returns the compiled .vmfb and configs"""
-    flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend,
-                                                   func_name, model_config_path)
-    return get_iree_module(flatbuffer_blob, device, func_name)
-
-
-def export_iree_module_to_vmfb(module,
-                               device: str,
-                               directory: str,
-                               frontend: str = "torch",
-                               func_name: str = "forward",
-                               model_config_path: str = None):
-    flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend,
-                                                   func_name, model_config_path)
-    module_name = f"{frontend}_{func_name}_{device}"
-    filename = os.path.join(directory, module_name + ".vmfb")
-    print(f"Saved vmfb in {filename}.")
-    with open(filename, 'wb') as f:
-        f.write(flatbuffer_blob)
-    return filename
-
-
-def export_module_to_mlir_file(module, frontend, directory: str):
-    mlir_str = module
-    if frontend in ["tensorflow", "tf", "mhlo"]:
-        mlir_str = module.decode('utf-8')
-    elif frontend in ["pytorch", "torch"]:
-        mlir_str = module.operation.get_asm()
-    filename = os.path.join(directory, "model.mlir")
-    with open(filename, 'w') as f:
-        f.write(mlir_str)
-    print(f"Saved mlir in {filename}.")
-    return filename
-
-
-def get_results(compiled_vm, input, config, frontend="torch"):
-    """Runs a .vmfb file given inputs and config and returns output."""
-    device_inputs = input
-    if frontend in ["torch", "pytorch"]:
-        device_inputs = [ireert.asdevicearray(config.device, a) for a in input]
-    if frontend in ["tensorflow", "tf", "tflite"]:
-        device_inputs = []
-        for a in input:
-            if (isinstance(a, list)):
-                device_inputs.append([
-                    ireert.asdevicearray(config.device, val, dtype=np.int32)
-                    for val in a
-                ])
-            else:
-                device_inputs.append(ireert.asdevicearray(config.device, a))
-    result = compiled_vm(*device_inputs)
-    result_tensors = []
-    if (isinstance(result, tuple)):
-        for val in result:
-            result_tensors.append(np.copy(np.asarray(val, val.dtype)))
-        return result_tensors
-    elif (isinstance(result, dict)):
-        data = list(result.items())
-        res = np.array(data, dtype=object)
-        return np.copy(res)
-    else:
-        return np.copy(np.asarray(result, dtype=result.dtype))
-
-
-######### Benchmark Related Tools ###########
-
-
-def tensor_to_type_str(input_tensors: tuple, frontend: str):
-    """
-    Input: A tuple of input tensors i.e tuple(torch.tensor)
-    Output: list of string that represent mlir types (i.e 1x24xf64)
-    # TODO: Support more than floats, and ints
-    """
-    list_of_type = []
-    for input_tensor in input_tensors:
-        type_string = "x".join([str(dim) for dim in input_tensor.shape])
-        if frontend in ["torch", "pytorch"]:
-            dtype_string = str(input_tensor.dtype).replace("torch.", "")
-        elif frontend in ["tensorflow", "tf"]:
-            dtype = input_tensor.dtype
-            dtype_string = re.findall('\'[^"]*\'',
-                                      str(dtype))[0].replace("\'", "")
-        regex_split = re.compile("([a-zA-Z]+)([0-9]+)")
-        match = regex_split.match(dtype_string)
-        mlir_type_string = str(match.group(1)[0]) + str(match.group(2))
-        type_string += f"x{mlir_type_string}"
-        list_of_type.append(type_string)
-    return list_of_type
-
-
-def build_benchmark_args(input_file: str,
-                         device: str,
-                         input_tensors: tuple,
-                         frontend: str,
-                         training=False):
-    """
-    Inputs: input_file leading to vmfb, input_tensor to function, target device, and whether it is training or not.
-    Outputs: string that execute benchmark-module on target model.
-    """
-    path = benchmark_module.__path__[0]
-    benchmarker_path = os.path.join(path, "..", "..", "iree-benchmark-module")
-    benchmark_cl = [benchmarker_path, f"--module_file={input_file}"]
-    fn_name = "forward"
-    if training == True:
-        # TODO: Replace name of train with actual train fn name.
-        fn_name = "train"
-    benchmark_cl.append(f"--entry_function={fn_name}")
-    benchmark_cl.append(f"--device={IREE_DEVICE_MAP[device]}")
-    mlir_input_types = tensor_to_type_str(input_tensors, frontend)
-    for mlir_input in mlir_input_types:
-        benchmark_cl.append(f"--function_input={mlir_input}")
-    time_extractor = "| awk \'END{{print $2 $3}}\'"
-    benchmark_cl.append(time_extractor)
-    return benchmark_cl
-
-
-def run_cmd(cmd):
-    """
-    Inputs: cli command string.
-    """
-    try:
-        result = subprocess.run(cmd,
-                                shell=True,
-                                stdout=subprocess.PIPE,
-                                stderr=subprocess.PIPE,
-                                check=True)
-        result_str = result.stdout.decode()
-        return result_str
-    except Exception:
-        sys.exit("Exiting program due to error running:", cmd)
-
-
-def run_benchmark_module(benchmark_cl):
-    """
-    Run benchmark command, extract result and return iteration/seconds.
-
-    Input: benchmark command.
-    """
-    benchmark_path = benchmark_cl[0]
-    assert os.path.exists(
-        benchmark_path
-    ), "Cannot find benchmark_module, Please contact SHARK maintainer on discord."
-    bench_result = run_cmd(' '.join(benchmark_cl))
-    regex_split = re.compile("([0-9]+[.]*[0-9]*)([a-zA-Z]+)")
-    match = regex_split.match(bench_result)
-    time = float(match.group(1))
-    unit = match.group(2)
-    return 1.0 / (time * UNIT_TO_SECOND_MAP[unit])
--- a/shark/iree_utils/init.py
+++ b/shark/iree_utils/init.py
--- a/shark/iree_utils/_common.py
+++ b/shark/iree_utils/_common.py
@@ -0,0 +1,87 @@
+# Copyright 2020 The Nod Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+## Common utilities to be shared by iree utilities.
+
+import os
+import sys
+import subprocess
+
+
+def run_cmd(cmd):
+    """
+    Inputs: cli command string.
+    """
+    try:
+        result = subprocess.run(
+            cmd,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            check=True,
+        )
+        result_str = result.stdout.decode()
+        return result_str
+    except Exception:
+        sys.exit("Exiting program due to error running:", cmd)
+
+
+IREE_DEVICE_MAP = {
+    "cpu": "local-task",
+    "gpu": "cuda",
+    "cuda": "cuda",
+    "vulkan": "vulkan",
+    "metal": "vulkan",
+    "rocm": "rocm",
+}
+
+IREE_TARGET_MAP = {
+    "cpu": "dylib",
+    "gpu": "cuda",
+    "cuda": "cuda",
+    "vulkan": "vulkan",
+    "metal": "vulkan",
+    "rocm": "rocm",
+}
+
+# Finds whether the required drivers are installed for the given device.
+def check_device_drivers(device):
+    """Checks necessary drivers present for gpu and vulkan devices"""
+    if device in ["gpu", "cuda"]:
+        try:
+            subprocess.check_output("nvidia-smi")
+        except Exception:
+            return True
+    elif device in ["metal", "vulkan"]:
+        try:
+            subprocess.check_output("vulkaninfo")
+        except Exception:
+            return True
+    elif device == "cpu":
+        return False
+    # Unknown device.
+    else:
+        return True
+
+    return False
+
+
+# Installation info for the missing device drivers.
+def device_driver_info(device):
+    if device in ["gpu", "cuda"]:
+        return "nvidia-smi not found, please install the required drivers from https://www.nvidia.in/Download/index.aspx?lang=en-in"
+    elif device in ["metal", "vulkan"]:
+        return "vulkaninfo not found, Install from https://vulkan.lunarg.com/sdk/home or your distribution"
+    else:
+        return f"{device} is not supported."
--- a/shark/iree_utils/benchmark_utils.py
+++ b/shark/iree_utils/benchmark_utils.py
@@ -0,0 +1,94 @@
+# Copyright 2020 The Nod Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import iree.runtime.scripts.iree_benchmark_module as benchmark_module
+from shark.iree_utils._common import run_cmd, IREE_DEVICE_MAP
+import numpy as np
+import os
+import re
+
+UNIT_TO_SECOND_MAP = {"ms": 0.001, "s": 1}
+
+
+def tensor_to_type_str(input_tensors: tuple, mlir_dialect: str):
+    """
+    Input: A tuple of input tensors i.e tuple(torch.tensor)
+    Output: list of string that represent mlir types (i.e 1x24xf64)
+    # TODO: Support more than floats, and ints
+    """
+    list_of_type = []
+    for input_tensor in input_tensors:
+        type_string = "x".join([str(dim) for dim in input_tensor.shape])
+        if mlir_dialect in ["linalg", "tosa"]:
+            dtype_string = str(input_tensor.dtype).replace("torch.", "")
+        elif mlir_dialect in ["mhlo", "tflite"]:
+            dtype = input_tensor.dtype
+            dtype_string = re.findall("'[^\"]*'", str(dtype))[0].replace(
+                "'", ""
+            )
+        regex_split = re.compile("([a-zA-Z]+)([0-9]+)")
+        match = regex_split.match(dtype_string)
+        mlir_type_string = str(match.group(1)[0]) + str(match.group(2))
+        type_string += f"x{mlir_type_string}"
+        list_of_type.append(type_string)
+    return list_of_type
+
+
+def build_benchmark_args(
+    input_file: str,
+    device: str,
+    input_tensors: tuple,
+    mlir_dialect: str,
+    training=False,
+):
+    """
+    Inputs: input_file leading to vmfb, input_tensor to function, target device,
+    and whether it is training or not.
+    Outputs: string that execute benchmark-module on target model.
+    """
+    path = benchmark_module.__path__[0]
+    benchmarker_path = os.path.join(path, "..", "..", "iree-benchmark-module")
+    benchmark_cl = [benchmarker_path, f"--module_file={input_file}"]
+    # TODO: The function named can be passed as one of the args.
+    fn_name = "forward"
+    if training == True:
+        # TODO: Replace name of train with actual train fn name.
+        fn_name = "train"
+    benchmark_cl.append(f"--entry_function={fn_name}")
+    benchmark_cl.append(f"--device={IREE_DEVICE_MAP[device]}")
+    mlir_input_types = tensor_to_type_str(input_tensors, mlir_dialect)
+    for mlir_input in mlir_input_types:
+        benchmark_cl.append(f"--function_input={mlir_input}")
+    time_extractor = "| awk 'END{{print $2 $3}}'"
+    benchmark_cl.append(time_extractor)
+    return benchmark_cl
+
+
+def run_benchmark_module(benchmark_cl):
+    """
+    Run benchmark command, extract result and return iteration/seconds.
+
+    # TODO: Add an example of the benchmark command.
+    Input: benchmark command.
+    """
+    benchmark_path = benchmark_cl[0]
+    assert os.path.exists(
+        benchmark_path
+    ), "Cannot find benchmark_module, Please contact SHARK maintainer on discord."
+    bench_result = run_cmd(" ".join(benchmark_cl))
+    regex_split = re.compile("([0-9]+[.]*[0-9]*)([a-zA-Z]+)")
+    match = regex_split.match(bench_result)
+    time = float(match.group(1))
+    unit = match.group(2)
+    return 1.0 / (time * UNIT_TO_SECOND_MAP[unit])
--- a/shark/iree_utils/compile_utils.py
+++ b/shark/iree_utils/compile_utils.py
@@ -0,0 +1,171 @@
+# Copyright 2020 The Nod Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import iree.runtime as ireert
+import iree.compiler as ireec
+from shark.iree_utils._common import IREE_DEVICE_MAP, IREE_TARGET_MAP
+import numpy as np
+import os
+
+# Get the iree-compile arguments given device.
+def get_iree_device_args(device):
+    if device == "cpu":
+        from shark.iree_utils.cpu_utils import get_iree_cpu_args
+
+        return get_iree_cpu_args()
+    if device in ["gpu", "cuda"]:
+        from shark.iree_utils.gpu_utils import get_iree_gpu_args
+
+        return get_iree_gpu_args()
+    if device in ["metal", "vulkan"]:
+        from shark.iree_utils.vulkan_utils import get_iree_vulkan_args
+
+        return get_iree_vulkan_args()
+    return []
+
+
+# Get the iree-compiler arguments given frontend.
+def get_iree_frontend_args(frontend):
+    if frontend in ["torch", "pytorch", "linalg"]:
+        return ["--iree-llvm-target-cpu-features=host"]
+    elif frontend in ["tensorflow", "tf", "mhlo"]:
+        return [
+            "--iree-llvm-target-cpu-features=host",
+            "--iree-mhlo-demote-i64-to-i32=false",
+            "--iree-flow-demote-i64-to-i32",
+        ]
+    else:
+        # Frontend not found.
+        return []
+
+
+# Common args to be used given any frontend or device.
+def get_iree_common_args():
+    return [
+        "--iree-stream-resource-index-bits=64",
+        "--iree-vm-target-index-bits=64",
+    ]
+
+
+def compile_module_to_flatbuffer(
+    module, device, frontend, func_name, model_config_path
+):
+    # Setup Compile arguments wrt to frontends.
+    input_type = ""
+    args = get_iree_frontend_args(frontend)
+    args += get_iree_device_args(device)
+    args += get_iree_common_args()
+
+    if frontend in ["tensorflow", "tf"]:
+        input_type = "mhlo"
+    elif frontend in ["mhlo", "tosa"]:
+        input_type = frontend
+    elif frontend in ["tflite", "tflite-tosa"]:
+        input_type = "tosa"
+
+    # TODO: make it simpler.
+    # Compile according to the input type, else just try compiling.
+    if input_type not in ["mhlo", "tosa"]:
+        module = str(module)
+    if input_type != "":
+        # Currently for MHLO/TOSA.
+        flatbuffer_blob = ireec.compile_str(
+            module,
+            target_backends=[IREE_TARGET_MAP[device]],
+            extra_args=args,
+            input_type=input_type,
+        )
+    else:
+        # Currently for Torch.
+        flatbuffer_blob = ireec.compile_str(
+            str(module),
+            target_backends=[IREE_TARGET_MAP[device]],
+            extra_args=args,
+        )
+
+    return flatbuffer_blob
+
+
+def get_iree_module(flatbuffer_blob, device, func_name):
+    # Returns the compiled module and the configs.
+    vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
+    config = ireert.Config(IREE_DEVICE_MAP[device])
+    ctx = ireert.SystemContext(config=config)
+    ctx.add_vm_module(vm_module)
+    ModuleCompiled = ctx.modules.module[func_name]
+    return ModuleCompiled, config
+
+
+def get_iree_compiled_module(
+    module,
+    device: str,
+    frontend: str = "torch",
+    func_name: str = "forward",
+    model_config_path: str = None,
+):
+    """Given a module returns the compiled .vmfb and configs"""
+    flatbuffer_blob = compile_module_to_flatbuffer(
+        module, device, frontend, func_name, model_config_path
+    )
+    return get_iree_module(flatbuffer_blob, device, func_name)
+
+
+def export_iree_module_to_vmfb(
+    module,
+    device: str,
+    directory: str,
+    frontend: str = "torch",
+    func_name: str = "forward",
+    model_config_path: str = None,
+):
+    # Compiles the module given specs and saves it as .vmfb file.
+    flatbuffer_blob = compile_module_to_flatbuffer(
+        module, device, frontend, func_name, model_config_path
+    )
+    module_name = f"{frontend}_{func_name}_{device}"
+    filename = os.path.join(directory, module_name + ".vmfb")
+    print(f"Saved vmfb in {filename}.")
+    with open(filename, "wb") as f:
+        f.write(flatbuffer_blob)
+    return filename
+
+
+def export_module_to_mlir_file(module, frontend, directory: str):
+    # TODO: write proper documentation.
+    mlir_str = module
+    if frontend in ["tensorflow", "tf", "mhlo", "tflite"]:
+        mlir_str = module.decode("utf-8")
+    elif frontend in ["pytorch", "torch"]:
+        mlir_str = module.operation.get_asm()
+    filename = os.path.join(directory, "model.mlir")
+    with open(filename, "w") as f:
+        f.write(mlir_str)
+    print(f"Saved mlir in {filename}.")
+    return filename
+
+
+def get_results(compiled_vm, input, config, frontend="torch"):
+    """Runs a .vmfb file given inputs and config and returns output."""
+    device_inputs = [ireert.asdevicearray(config.device, a) for a in input]
+    result = compiled_vm(*device_inputs)
+    result_tensors = []
+    if isinstance(result, tuple):
+        for val in result:
+            result_tensors.append(np.copy(np.asarray(val, val.dtype)))
+        return result_tensors
+    elif isinstance(result, dict):
+        data = list(result.items())
+        res = np.array(data, dtype=object)
+        return np.copy(res)
+    else:
+        return np.copy(np.asarray(result, dtype=result.dtype))
--- a/shark/iree_utils/cpu_utils.py
+++ b/shark/iree_utils/cpu_utils.py
@@ -0,0 +1,44 @@
+# Copyright 2020 The Nod Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# All the iree_cpu related functionalities go here.
+
+import subprocess
+
+# Get the default cpu args.
+def get_iree_cpu_args():
+    find_triple_cmd = "uname -s -m"
+    os_name, proc_name = (
+        subprocess.run(
+            find_triple_cmd, shell=True, stdout=subprocess.PIPE, check=True
+        )
+        .stdout.decode("utf-8")
+        .split()
+    )
+    if os_name == "Darwin":
+        find_kernel_version_cmd = "uname -r"
+        kernel_version = subprocess.run(
+            find_kernel_version_cmd,
+            shell=True,
+            stdout=subprocess.PIPE,
+            check=True,
+        ).stdout.decode("utf-8")
+        target_triple = f"{proc_name}-apple-darwin{kernel_version}"
+    elif os_name == "Linux":
+        target_triple = f"{proc_name}-linux-gnu"
+    else:
+        error_message = f"OS Type f{os_name} not supported and triple can't be determined, open issue to dSHARK team please :)"
+        raise Exception(error_message)
+    print(f"Target triple found:{target_triple}")
+    return [f"-iree-llvm-target-triple={target_triple}"]
--- a/shark/iree_utils/gpu_utils.py
+++ b/shark/iree_utils/gpu_utils.py
@@ -12,10 +12,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import sys
+# All the iree_gpu related functionalities go here.
+
+import iree.runtime as ireert
 import ctypes

-#Some constants taken from cuda.h
+# Get the default gpu args given the architecture.
+def get_iree_gpu_args():
+    ireert.flags.FUNCTION_INPUT_VALIDATION = False
+    ireert.flags.parse_flags("--cuda_allow_inline_execution")
+    # TODO: Give the user_interface to pass the sm_arch.
+    sm_arch = get_cuda_sm_cc()
+    if sm_arch in ["sm_70", "sm_72", "sm_75", "sm_80", "sm_84", "sm_86"]:
+        return [
+            "--iree-hal-cuda-disable-loop-nounroll-wa",
+            f"--iree-hal-cuda-llvm-target-arch={sm_arch}",
+        ]
+    else:
+        return ["--iree-hal-cuda-disable-loop-nounroll-wa"]
+
+
+# Some constants taken from cuda.h
 CUDA_SUCCESS = 0
 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16
 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39
@@ -24,7 +41,7 @@ CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36


 def get_cuda_sm_cc():
-    libnames = ('libcuda.so', 'libcuda.dylib', 'cuda.dll')
+    libnames = ("libcuda.so", "libcuda.dylib", "cuda.dll")
    for libname in libnames:
        try:
            cuda = ctypes.CDLL(libname)
@@ -33,10 +50,10 @@ def get_cuda_sm_cc():
        else:
            break
    else:
-        raise OSError("could not load any of: " + ' '.join(libnames))
+        raise OSError("could not load any of: " + " ".join(libnames))

    nGpus = ctypes.c_int()
-    name = b' ' * 100
+    name = b" " * 100
    cc_major = ctypes.c_int()
    cc_minor = ctypes.c_int()

@@ -48,31 +65,44 @@ def get_cuda_sm_cc():
    result = cuda.cuInit(0)
    if result != CUDA_SUCCESS:
        cuda.cuGetErrorString(result, ctypes.byref(error_str))
-        print("cuInit failed with error code %d: %s" %
-              (result, error_str.value.decode()))
+        print(
+            "cuInit failed with error code %d: %s"
+            % (result, error_str.value.decode())
+        )
        return 1
    result = cuda.cuDeviceGetCount(ctypes.byref(nGpus))
    if result != CUDA_SUCCESS:
        cuda.cuGetErrorString(result, ctypes.byref(error_str))
-        print("cuDeviceGetCount failed with error code %d: %s" %
-              (result, error_str.value.decode()))
+        print(
+            "cuDeviceGetCount failed with error code %d: %s"
+            % (result, error_str.value.decode())
+        )
        return 1
    print("Found %d device(s)." % nGpus.value)
    for i in range(nGpus.value):
        result = cuda.cuDeviceGet(ctypes.byref(device), i)
        if result != CUDA_SUCCESS:
            cuda.cuGetErrorString(result, ctypes.byref(error_str))
-            print("cuDeviceGet failed with error code %d: %s" %
-                  (result, error_str.value.decode()))
+            print(
+                "cuDeviceGet failed with error code %d: %s"
+                % (result, error_str.value.decode())
+            )
            return 1
        print("Device: %d" % i)
-        if cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name),
-                                device) == CUDA_SUCCESS:
-            print("  Name: %s" % (name.split(b'\0', 1)[0].decode()))
-        if cuda.cuDeviceComputeCapability(ctypes.byref(cc_major),
-                                          ctypes.byref(cc_minor),
-                                          device) == CUDA_SUCCESS:
-            print("  Compute Capability: %d.%d" %
-                  (cc_major.value, cc_minor.value))
+        if (
+            cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device)
+            == CUDA_SUCCESS
+        ):
+            print("  Name: %s" % (name.split(b"\0", 1)[0].decode()))
+        if (
+            cuda.cuDeviceComputeCapability(
+                ctypes.byref(cc_major), ctypes.byref(cc_minor), device
+            )
+            == CUDA_SUCCESS
+        ):
+            print(
+                "  Compute Capability: %d.%d"
+                % (cc_major.value, cc_minor.value)
+            )
    sm = f"sm_{cc_major.value}{cc_minor.value}"
    return sm
--- a/shark/iree_utils/vulkan_utils.py
+++ b/shark/iree_utils/vulkan_utils.py
@@ -0,0 +1,54 @@
+# Copyright 2020 The Nod Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# All the iree_vulkan related functionalities go here.
+
+from shark.iree_utils._common import run_cmd
+
+
+def get_vulkan_triple_flag():
+    vulkan_device_cmd = "vulkaninfo | grep deviceName | awk 'END{{print $NF}}'"
+    vulkan_device = run_cmd(vulkan_device_cmd).strip()
+    if vulkan_device == "Ultra":
+        print("Found MacStudio M1 Device. Using m1-moltenvk-macos")
+        return "-iree-vulkan-target-triple=m1-moltenvk-macos"
+    elif vulkan_device == "M2":
+        print("Found Apple M2 Device. Using m1-moltenvk-macos")
+        return "-iree-vulkan-target-triple=m1-moltenvk-macos"
+    elif vulkan_device == "M1":
+        print("Found Apple M1 Device. Using m1-moltenvk-macos")
+        return "-iree-vulkan-target-triple=m1-moltenvk-macos"
+    elif vulkan_device == "A100-SXM4-40GB":
+        print("Found Nvidia Device. Using ampere-rtx3080-linux")
+        return "-iree-vulkan-target-triple=ampere-rtx3080-linux"
+    elif vulkan_device == "3090":
+        print("Found Nvidia Device. Using ampere-rtx3090-linux")
+        return "-iree-vulkan-target-triple=ampere-rtx3090-linux"
+    else:
+        print(
+            """Optimized kernel for your target device is not added yet.
+            Contact SHARK Admin on discord[https://discord.com/invite/RUqY2h2s9u]
+            or pull up an issue."""
+        )
+        print(f"Target : {vulkan_device}")
+        return None
+
+
+def get_iree_vulkan_args():
+    # vulkan_flag = ["--iree-flow-demote-i64-to-i32"]
+    vulkan_flag = []
+    vulkan_triple_flag = get_vulkan_triple_flag()
+    if vulkan_triple_flag is not None:
+        vulkan_flag.append(vulkan_triple_flag)
+    return vulkan_flag
--- a/shark/model_annotation.py
+++ b/shark/model_annotation.py
@@ -21,11 +21,14 @@ from iree.compiler import ir
 from iree.compiler.transforms import ireec as ireec_trans

 MATMUL_OP_NAMES = set(
-    ["linalg.matmul", "linalg.batch_matmul", "mhlo.dot", "mhlo.dot_general"])
+    ["linalg.matmul", "linalg.batch_matmul", "mhlo.dot", "mhlo.dot_general"]
+)
 idx = 0


-def model_annotation(ctx: ir.Context, *, input_contents: str, config_path: str):
+def model_annotation(
+    ctx: ir.Context, *, input_contents: str, config_path: str
+):
    if os.path.isfile(input_contents):
        with open(input_contents, "rb") as f:
            input_contents = f.read()
@@ -47,7 +50,8 @@ def model_annotation(ctx: ir.Context, *, input_contents: str, config_path: str):
    #   - Disables verification (already done above)
    #   - Writes as binary, avoiding costly unicode conversions
    sys.stdout.buffer.write(
-        module.operation.get_asm(assume_verified=True, binary=True))
+        module.operation.get_asm(assume_verified=True, binary=True)
+    )
    return module


@@ -61,14 +65,21 @@ def walk_children(op: ir.Operation, configs: List[Dict]):
                    child_op = child_op.operation
                if child_op.name in MATMUL_OP_NAMES:
                    global idx
-                    tile_sizes, pipeline, workgroup_size, \
-                    split_k, pipeline_depth = parse_config(configs[idx])
+                    (
+                        tile_sizes,
+                        pipeline,
+                        workgroup_size,
+                        split_k,
+                        pipeline_depth,
+                    ) = parse_config(configs[idx])

-                    add_compilation_info(child_op,
-                                         tile_sizes=tile_sizes,
-                                         pipeline=pipeline,
-                                         workgroup_size=workgroup_size,
-                                         pipeline_depth=pipeline_depth)
+                    add_compilation_info(
+                        child_op,
+                        tile_sizes=tile_sizes,
+                        pipeline=pipeline,
+                        workgroup_size=workgroup_size,
+                        pipeline_depth=pipeline_depth,
+                    )

                    if split_k:
                        add_split_k(child_op, split_k)
@@ -80,8 +91,11 @@ def walk_children(op: ir.Operation, configs: List[Dict]):

 def parse_config(config: Dict):
    if config["pipeline"] == "GPU" or config["pipeline"] == "GPU_TENSORCORE":
-        pipeline = "LLVMGPUMatmulSimt" if config[
-            "pipeline"] == "GPU" else "LLVMGPUMatmulTensorCore"
+        pipeline = (
+            "LLVMGPUMatmulSimt"
+            if config["pipeline"] == "GPU"
+            else "LLVMGPUMatmulTensorCore"
+        )
        tile_sizes = [config["work_group_tile_sizes"]]
        workgroup_size = config["work_group_sizes"]
        try:
@@ -95,8 +109,9 @@ def parse_config(config: Dict):
    else:
        pipeline = config["pipeline"]
        tile_sizes = [
-            config["work_group_tile_sizes"], config["l1_tile_sizes"],
-            config["vector_tile_sizes"]
+            config["work_group_tile_sizes"],
+            config["l1_tile_sizes"],
+            config["vector_tile_sizes"],
        ]
        workgroup_size = []
        split_k = None
@@ -104,9 +119,13 @@ def parse_config(config: Dict):
    return tile_sizes, pipeline, workgroup_size, split_k, pipeline_depth


-def add_compilation_info(op: ir.Operation, tile_sizes: List[List[int]],
-                         pipeline: str, workgroup_size: List[int],
-                         pipeline_depth: int):
+def add_compilation_info(
+    op: ir.Operation,
+    tile_sizes: List[List[int]],
+    pipeline: str,
+    workgroup_size: List[int],
+    pipeline_depth: int,
+):
    # We don't have a Python binding for CompilationInfo, so we just parse
    # its string form.
    if pipeline_depth:
@@ -114,13 +133,15 @@ def add_compilation_info(op: ir.Operation, tile_sizes: List[List[int]],
            f"#iree_codegen.compilation_info<"
            f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
            f"translation_info = <{pipeline} pipeline_depth = {pipeline_depth}>, "
-            f"workgroup_size = {repr(workgroup_size)}>")
+            f"workgroup_size = {repr(workgroup_size)}>"
+        )
    else:
        attr = ir.Attribute.parse(
            f"#iree_codegen.compilation_info<"
            f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
            f"translation_info = <{pipeline}>, "
-            f"workgroup_size = {repr(workgroup_size)}>")
+            f"workgroup_size = {repr(workgroup_size)}>"
+        )
    op.attributes["compilation_info"] = attr


@@ -138,6 +159,6 @@ def create_context() -> ir.Context:

 if __name__ == "__main__":
    with create_context() as ctx:
-        model_annotation(ctx,
-                         input_contents=sys.argv[1],
-                         config_path=sys.argv[2])
+        model_annotation(
+            ctx, input_contents=sys.argv[1], config_path=sys.argv[2]
+        )
--- a/shark/parser.py
+++ b/shark/parser.py
@@ -20,8 +20,8 @@ def dir_path(path):
    if os.path.isdir(path):
        return path
    else:
-        raise argparse.ArgumentTypeError(
-            f"readable_dir:{path} is not a valid path")
+        os.mkdir(path)
+        return path


 def dir_file(path):
@@ -29,43 +29,52 @@ def dir_file(path):
        return path
    else:
        raise argparse.ArgumentTypeError(
-            f"readable_file:{path} is not a valid file")
+            f"readable_file:{path} is not a valid file"
+        )


-parser = argparse.ArgumentParser(description='SHARK runner.')
+parser = argparse.ArgumentParser(description="SHARK runner.")
 parser.add_argument(
    "--device",
    type=str,
    default="cpu",
-    help="Device on which shark_runner runs. options are cpu, gpu, and vulkan")
+    help="Device on which shark_runner runs. options are cpu, gpu, and vulkan",
+)
 parser.add_argument(
    "--repro_dir",
-    help=
-    "Directory to which module files will be saved for reproduction or debugging.",
+    help="Directory to which module files will be saved for reproduction or debugging.",
    type=dir_path,
-    default="/tmp/")
-parser.add_argument("--save_mlir",
-                    default=False,
-                    action="store_true",
-                    help="Saves input MLIR module to /tmp/ directory.")
-parser.add_argument("--save_vmfb",
-                    default=False,
-                    action="store_true",
-                    help="Saves iree .vmfb module to /tmp/ directory.")
+    default="./shark_tmp",
+)
+parser.add_argument(
+    "--save_mlir",
+    default=False,
+    action="store_true",
+    help="Saves input MLIR module to /tmp/ directory.",
+)
+parser.add_argument(
+    "--save_vmfb",
+    default=False,
+    action="store_true",
+    help="Saves iree .vmfb module to /tmp/ directory.",
+)
 parser.add_argument(
    "--model_config_path",
    help="Directory to where the tuned model config file is located.",
-    default=None)
+    default=None,
+)

 parser.add_argument(
    "--num_warmup_iterations",
    type=int,
    default=2,
-    help="Run the model for the specified number of warmup iterations.")
+    help="Run the model for the specified number of warmup iterations.",
+)
 parser.add_argument(
    "--num_iterations",
    type=int,
    default=1,
-    help="Run the model for the specified number of iterations.")
+    help="Run the model for the specified number of iterations.",
+)

 shark_args, unknown = parser.parse_known_args()
--- a/shark/shark_benchmark_runner.py
+++ b/shark/shark_benchmark_runner.py
@@ -0,0 +1,194 @@
+# Copyright 2020 The Nod Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from shark.shark_runner import SharkRunner
+from shark.iree_utils.compile_utils import export_iree_module_to_vmfb
+from shark.iree_utils.benchmark_utils import (
+    build_benchmark_args,
+    run_benchmark_module,
+)
+from shark.parser import shark_args
+from tank.model_utils import get_torch_model
+from datetime import datetime
+import time
+import csv
+import os
+
+
+class SharkBenchmarkRunner(SharkRunner):
+    # SharkRunner derived class with Benchmarking capabilities.
+    def __init__(
+        self,
+        mlir_module: str,
+        function_name: str = "forward",
+        device: str = "none",
+        mlir_dialect: str = "linalg",
+        frontend: str = "torch",
+    ):
+        self.device = shark_args.device if device == "none" else device
+        self.frontend = frontend
+        self.frontend_model = None
+        self.vmfb_file = None
+        SharkRunner.__init__(
+            self,
+            mlir_module,
+            function_name,
+            device,
+            mlir_dialect,
+        )
+        if self.vmfb_file == None:
+            self.vmfb_file = export_iree_module_to_vmfb(
+                mlir_module, device, shark_args.repro_dir, self.frontend
+            )
+
+    def setup_cl(self, input_tensors):
+        self.benchmark_cl = build_benchmark_args(
+            self.vmfb_file,
+            self.device,
+            input_tensors,
+            mlir_dialect=self.mlir_dialect,
+        )
+
+    def benchmark_frontend(self, inputs, modelname):
+        if self.frontend in ["pytorch", "torch"]:
+            return self.benchmark_torch(modelname)
+        elif self.frontend in ["tensorflow", "tf"]:
+            return self.benchmark_tf(inputs, modelname)
+
+    def benchmark_torch(self, modelname):
+        import torch
+
+        if self.device == "gpu":
+            torch.set_default_tensor_type(torch.cuda.FloatTensor)
+        else:
+            torch.set_default_tensor_type(torch.FloatTensor)
+        torch_device = torch.device(
+            "cuda:0" if self.device == "gpu" else "cpu"
+        )
+        HFmodel, input, act_out = get_torch_model(modelname)
+        frontend_model = HFmodel.model
+        frontend_model.to(torch_device)
+        input.to(torch_device)
+
+        for i in range(shark_args.num_warmup_iterations):
+            frontend_model.forward(input)
+
+        begin = time.time()
+        for i in range(shark_args.num_iterations):
+            out = frontend_model.forward(input)
+            if i == shark_args.num_iterations - 1:
+                end = time.time()
+                break
+        print(
+            f"Torch benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
+        )
+        return [
+            f"{shark_args.num_iterations/(end-begin)}",
+            f"{((end-begin)/shark_args.num_iterations)*1000}",
+        ]
+
+    def benchmark_tf(self, frontend_model, inputs):
+        for i in range(shark_args.num_warmup_iterations):
+            frontend_model.forward(*inputs)
+
+        begin = time.time()
+        for i in range(shark_args.num_iterations):
+            out = frontend_model.forward(*inputs)
+            if i == shark_args.num_iterations - 1:
+                end = time.time()
+                break
+        print(
+            f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
+        )
+        return [
+            f"{shark_args.num_iterations/(end-begin)}",
+            f"{((end-begin)/shark_args.num_iterations)*1000}",
+        ]
+
+    def benchmark_c(self):
+        result = run_benchmark_module(self.benchmark_cl)
+        print(f"Shark-{self.frontend} C-benchmark:{result} iter/second")
+        return [f"{result}", f"{1000/result}"]
+
+    def benchmark_python(self, inputs):
+        input_list = [x for x in inputs]
+        for i in range(shark_args.num_warmup_iterations):
+            self.run(input_list)
+
+        begin = time.time()
+        for i in range(shark_args.num_iterations):
+            out = self.run(input_list)
+            if i == shark_args.num_iterations - 1:
+                end = time.time()
+        print(
+            f"Shark-{self.frontend} Python-benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
+        )
+        return [
+            f"{shark_args.num_iterations/(end-begin)}",
+            f"{((end-begin)/shark_args.num_iterations)*1000}",
+        ]
+
+    def benchmark_all(self, inputs: tuple):
+        self.benchmark_frontend(inputs)
+        self.benchmark_python(inputs)
+        self.benchmark_c()
+
+    def benchmark_all_csv(
+        self, inputs: tuple, modelname, dynamic, device_str, frontend
+    ):
+        self.setup_cl(inputs)
+        field_names = [
+            "platform",
+            "model",
+            "dynamic",
+            "device",
+            "iter/sec",
+            "ms/iter",
+            "datetime",
+        ]
+        platforms = ["frontend", "shark_python", "shark_iree_c"]
+
+        if not os.path.exists("bench_results.csv"):
+            with open("bench_results.csv", mode="w", newline="") as f:
+                writer = csv.writer(f)
+                writer.writerow(field_names)
+
+        with open("bench_results.csv", mode="a", newline="") as f:
+            writer = csv.DictWriter(f, fieldnames=field_names)
+            bench_result = {}
+            bench_result["model"] = modelname
+            if dynamic == True:
+                bench_result["dynamic"] = "True"
+            else:
+                bench_result["dynamic"] = "False"
+            bench_result["device"] = device_str
+            for p in platforms:
+                if p == "frontend":
+                    bench_result["platform"] = frontend
+                    bench_result["iter/sec"] = self.benchmark_frontend(
+                        inputs, modelname
+                    )[0]
+                    bench_result["ms/iter"] = self.benchmark_frontend(
+                        inputs, modelname
+                    )[1]
+                elif p == "shark_python":
+                    bench_result["platform"] = "shark_python"
+                    bench_result["iter/sec"] = self.benchmark_python(inputs)[0]
+                    bench_result["ms/iter"] = self.benchmark_python(inputs)[1]
+                else:
+                    bench_result["platform"] = "shark_iree_c"
+                    bench_result["iter/sec"] = self.benchmark_c()[0]
+                    bench_result["ms/iter"] = self.benchmark_c()[1]
+                bench_result["datetime"] = str(datetime.now())
+                writer.writerow(bench_result)
--- a/shark/shark_downloader.py
+++ b/shark/shark_downloader.py
@@ -0,0 +1,228 @@
+# Lint as: python3
+"""SHARK Downloader"""
+# Requirements : Put shark_tank in SHARK directory
+#   /SHARK
+#     /gen_shark_tank
+#       /tflite
+#         /albert_lite_base
+#         /...model_name...
+#       /tf
+#       /pytorch
+#
+#
+#
+
+import numpy as np
+import os
+import urllib.request
+import json
+import hashlib
+from pathlib import Path
+
+input_type_to_np_dtype = {
+    "float32": np.float32,
+    "float64": np.float64,
+    "bool": np.bool_,
+    "int32": np.int32,
+    "int64": np.int64,
+    "uint8": np.uint8,
+    "int8": np.int8,
+}
+
+
+# Save the model in the home local so it needn't be fetched everytime in the CI.
+home = str(Path.home())
+WORKDIR = os.path.join(home, ".local/shark_tank/")
+print(WORKDIR)
+
+
+# Checks whether the directory and files exists.
+def check_dir_exists(model_name, frontend="torch", dynamic=""):
+    model_dir = os.path.join(WORKDIR, model_name)
+
+    # Remove the _tf keyword from end.
+    if frontend in ["tf", "tensorflow"]:
+        model_name = model_name[:-3]
+    elif frontend in ["tflite"]:
+        model_name = model_name[:-7]
+    elif frontend in ["torch", "pytorch"]:
+        model_name = model_name[:-6]
+
+    if os.path.isdir(model_dir):
+        if (
+            os.path.isfile(
+                os.path.join(
+                    model_dir,
+                    model_name + dynamic + "_" + str(frontend) + ".mlir",
+                )
+            )
+            and os.path.isfile(os.path.join(model_dir, "function_name.npy"))
+            and os.path.isfile(os.path.join(model_dir, "inputs.npz"))
+            and os.path.isfile(os.path.join(model_dir, "golden_out.npz"))
+            and os.path.isfile(os.path.join(model_dir, "hash.npy"))
+        ):
+            print(
+                f"""The models are present in the {WORKDIR}. If you want a fresh 
+                download, consider deleting the directory."""
+            )
+            return True
+    return False
+
+
+# Downloads the torch model from gs://shark_tank dir.
+def download_torch_model(model_name, dynamic=False):
+    model_name = model_name.replace("/", "_")
+    dyn_str = "_dynamic" if dynamic else ""
+    os.makedirs(WORKDIR, exist_ok=True)
+    model_dir_name = model_name + "_torch"
+
+    def gs_download_model():
+        gs_command = (
+            'gsutil -o "GSUtil:parallel_process_count=1" cp -r gs://shark_tank'
+            + "/"
+            + model_dir_name
+            + " "
+            + WORKDIR
+        )
+        if os.system(gs_command) != 0:
+            raise Exception("model not present in the tank. Contact Nod Admin")
+
+    if not check_dir_exists(model_dir_name, frontend="torch", dynamic=dyn_str):
+        gs_download_model()
+    else:
+        model_dir = os.path.join(WORKDIR, model_dir_name)
+        local_hash = str(np.load(os.path.join(model_dir, "hash.npy")))
+        gs_hash = (
+            'gsutil -o "GSUtil:parallel_process_count=1" cp gs://shark_tank'
+            + "/"
+            + model_dir_name
+            + "/hash.npy"
+            + " "
+            + os.path.join(model_dir, "upstream_hash.npy")
+        )
+        if os.system(gs_hash) != 0:
+            raise Exception("hash of the model not present in the tank.")
+        upstream_hash = str(
+            np.load(os.path.join(model_dir, "upstream_hash.npy"))
+        )
+        if local_hash != upstream_hash:
+            gs_download_model()
+
+    model_dir = os.path.join(WORKDIR, model_dir_name)
+    with open(
+        os.path.join(model_dir, model_name + dyn_str + "_torch.mlir")
+    ) as f:
+        mlir_file = f.read()
+
+    function_name = str(np.load(os.path.join(model_dir, "function_name.npy")))
+    inputs = np.load(os.path.join(model_dir, "inputs.npz"))
+    golden_out = np.load(os.path.join(model_dir, "golden_out.npz"))
+
+    inputs_tuple = tuple([inputs[key] for key in inputs])
+    golden_out_tuple = tuple([golden_out[key] for key in golden_out])
+    return mlir_file, function_name, inputs_tuple, golden_out_tuple
+
+
+# Downloads the tflite model from gs://shark_tank dir.
+def download_tflite_model(model_name, dynamic=False):
+    dyn_str = "_dynamic" if dynamic else ""
+    os.makedirs(WORKDIR, exist_ok=True)
+    model_dir_name = model_name + "_tflite"
+
+    def gs_download_model():
+        gs_command = (
+            'gsutil -o "GSUtil:parallel_process_count=1" cp -r gs://shark_tank'
+            + "/"
+            + model_dir_name
+            + " "
+            + WORKDIR
+        )
+        if os.system(gs_command) != 0:
+            raise Exception("model not present in the tank. Contact Nod Admin")
+
+    if not check_dir_exists(
+        model_dir_name, frontend="tflite", dynamic=dyn_str
+    ):
+        gs_download_model()
+    else:
+        model_dir = os.path.join(WORKDIR, model_dir_name)
+        local_hash = str(np.load(os.path.join(model_dir, "hash.npy")))
+        gs_hash = (
+            'gsutil -o "GSUtil:parallel_process_count=1" cp gs://shark_tank'
+            + "/"
+            + model_dir_name
+            + "/hash.npy"
+            + " "
+            + os.path.join(model_dir, "upstream_hash.npy")
+        )
+        if os.system(gs_hash) != 0:
+            raise Exception("hash of the model not present in the tank.")
+        upstream_hash = str(
+            np.load(os.path.join(model_dir, "upstream_hash.npy"))
+        )
+        if local_hash != upstream_hash:
+            gs_download_model()
+
+    model_dir = os.path.join(WORKDIR, model_dir_name)
+    with open(
+        os.path.join(model_dir, model_name + dyn_str + "_tflite.mlir")
+    ) as f:
+        mlir_file = f.read()
+
+    function_name = str(np.load(os.path.join(model_dir, "function_name.npy")))
+    inputs = np.load(os.path.join(model_dir, "inputs.npz"))
+    golden_out = np.load(os.path.join(model_dir, "golden_out.npz"))
+
+    inputs_tuple = tuple([inputs[key] for key in inputs])
+    golden_out_tuple = tuple([golden_out[key] for key in golden_out])
+    return mlir_file, function_name, inputs_tuple, golden_out_tuple
+
+
+def download_tf_model(model_name):
+    model_name = model_name.replace("/", "_")
+    os.makedirs(WORKDIR, exist_ok=True)
+    model_dir_name = model_name + "_tf"
+
+    def gs_download_model():
+        gs_command = (
+            'gsutil -o "GSUtil:parallel_process_count=1" cp -r gs://shark_tank'
+            + "/"
+            + model_dir_name
+            + " "
+            + WORKDIR
+        )
+        if os.system(gs_command) != 0:
+            raise Exception("model not present in the tank. Contact Nod Admin")
+
+    if not check_dir_exists(model_dir_name, frontend="tf"):
+        gs_download_model()
+    else:
+        model_dir = os.path.join(WORKDIR, model_dir_name)
+        local_hash = str(np.load(os.path.join(model_dir, "hash.npy")))
+        gs_hash = (
+            'gsutil -o "GSUtil:parallel_process_count=1" cp gs://shark_tank'
+            + "/"
+            + model_dir_name
+            + "/hash.npy"
+            + " "
+            + os.path.join(model_dir, "upstream_hash.npy")
+        )
+        if os.system(gs_hash) != 0:
+            raise Exception("hash of the model not present in the tank.")
+        upstream_hash = str(
+            np.load(os.path.join(model_dir, "upstream_hash.npy"))
+        )
+        if local_hash != upstream_hash:
+            gs_download_model()
+
+    model_dir = os.path.join(WORKDIR, model_dir_name)
+    with open(os.path.join(model_dir, model_name + "_tf.mlir")) as f:
+        mlir_file = f.read()
+
+    function_name = str(np.load(os.path.join(model_dir, "function_name.npy")))
+    inputs = np.load(os.path.join(model_dir, "inputs.npz"))
+    golden_out = np.load(os.path.join(model_dir, "golden_out.npz"))
+
+    inputs_tuple = tuple([inputs[key] for key in inputs])
+    golden_out_tuple = tuple([golden_out[key] for key in golden_out])
+    return mlir_file, function_name, inputs_tuple, golden_out_tuple
--- a/shark/shark_importer.py
+++ b/shark/shark_importer.py
@@ -1,124 +1,234 @@
 # Lint as: python3
 """SHARK Importer"""

-import iree.compiler.tflite as iree_tflite_compile
-import iree.runtime as iree_rt
-import numpy as np
-import os
 import sys
-import tensorflow.compat.v2 as tf
-import urllib.request
-from shark.shark_inference import SharkInference
+import tempfile
+import os
+
+# List of the supported frontends.
+supported_frontends = {
+    "tensorflow",
+    "tf",
+    "pytorch",
+    "torch",
+    "tf-lite",
+    "tflite",
+}


 class SharkImporter:
+    """
+    SharkImporter converts frontend modules into a
+    mlir_module. The supported frameworks are tensorflow,
+    pytorch, and tf-lite.

-    def __init__(self,
-                 model_path,
-                 model_type: str = "tflite",
-                 model_source_hub: str = "tfhub",
-                 device: str = None,
-                 dynamic: bool = False,
-                 jit_trace: bool = False,
-                 benchmark_mode: bool = False):
-        self.model_path = model_path
-        self.model_type = model_type
-        self.model_source_hub = model_source_hub
-        self.device = device
-        self.dynamic = dynamic
-        self.jit_trace = jit_trace
-        self.benchmark_mode = benchmark_mode
-        self.inputs = None
-        self.input_details = None
-        self.output_details = None
+    ...

-        # create tmp model file directory
-        if self.model_path is None:
-            print("Error. No model_path, Please input model path.")
-            return
+    Attributes
+    ----------
+    module :
+        torch, tensorflow or tf-lite module.
+    inputs :
+        inputs to the module, may be required for the shape
+        information.
+    frontend: str
+        frontend to which the module belongs.
+    raw_model_file: str
+        temp tflite model path

-        if self.model_source_hub == "tfhub":
-            # compile and run tfhub tflite
-            if self.model_type == "tflite":
-                print("Setting up for TMP_DIR")
-                exe_basename = os.path.basename(sys.argv[0])
-                self.workdir = os.path.join(os.path.dirname(__file__), "tmp",
-                                            exe_basename)
-                print(f"TMP_DIR = {self.workdir}")
-                os.makedirs(self.workdir, exist_ok=True)
-                self.tflite_file = '/'.join([self.workdir, 'model.tflite'])
-                print("Setting up local address for tflite model file: ",
-                      self.tflite_file)
-                if os.path.exists(self.model_path):
-                    self.tflite_file = self.model_path
-                else:
-                    print("Download tflite model")
-                    urllib.request.urlretrieve(self.model_path,
-                                               self.tflite_file)
-                print("Setting up tflite interpreter")
-                self.tflite_interpreter = tf.lite.Interpreter(
-                    model_path=self.tflite_file)
-                self.tflite_interpreter.allocate_tensors()
-                # default input initialization
-                self.input_details, self.output_details = self.get_model_details(
+    Methods
+    -------
+    import_mlir(is_dynamic, tracing_required, func_name):
+        is_dynamic: input shapes to be totally dynamic (pytorch specific).
+        tracing_required: whether tracing is required (pytorch specific.
+        func_name: The function to be traced out or imported to mlir.
+
+    import_debug(is_dynamic, tracing_required, func_name):
+        returns the converted (mlir_module,func_name) with inputs and golden
+        outputs.
+        The inputs and outputs are converted into np array.
+    """
+
+    def __init__(
+        self,
+        module,
+        inputs: tuple = (),
+        frontend: str = "torch",
+        raw_model_file: str = "",
+    ):
+        self.module = module
+        self.inputs = None if len(inputs) == 0 else inputs
+        self.frontend = frontend
+        if not self.frontend in supported_frontends:
+            print(
+                f"The frontend is not in the supported_frontends: {supported_frontends}"
+            )
+            sys.exit(1)
+        self.raw_model_file = raw_model_file
+
+    # NOTE: The default function for torch is "forward" and tf-lite is "main".
+
+    def _torch_mlir(self, is_dynamic, tracing_required):
+        from shark.torch_mlir_utils import get_torch_mlir_module
+
+        return get_torch_mlir_module(
+            self.module, self.inputs, is_dynamic, tracing_required
+        )
+
+    def _tf_mlir(self, func_name):
+        from iree.compiler import tf as tfc
+
+        return tfc.compile_module(
+            self.module, exported_names=[func_name], import_only=True
+        )
+
+    def _tflite_mlir(self, func_name):
+        from iree.compiler import tflite as tflitec
+        from shark.iree_utils._common import IREE_TARGET_MAP
+
+        self.mlir_model = tflitec.compile_file(
+            self.raw_model_file,  # in tflite, it is a path to .tflite file, not a tflite interpreter
+            input_type="tosa",
+            import_only=True,
+        )
+        return self.mlir_model
+
+    # Adds the conversion of the frontend with the private function.
+    def import_mlir(
+        self,
+        is_dynamic=False,
+        tracing_required=False,
+        func_name="forward",
+    ):
+        if self.frontend in ["torch", "pytorch"]:
+            if self.inputs == None:
+                print(
+                    "Please pass in the inputs, the inputs are required to determine the shape of the mlir_module"
                )
-                inputs = self.generate_inputs(
-                    self.input_details)  # device_inputs
-                self.setup_inputs(inputs)
+                sys.exit(1)
+            return self._torch_mlir(is_dynamic, tracing_required), func_name
+        if self.frontend in ["tf", "tensorflow"]:
+            return self._tf_mlir(func_name), func_name
+        if self.frontend in ["tflite", "tf-lite"]:
+            func_name = "main"
+            return self._tflite_mlir(func_name), func_name

-    def generate_inputs(self, input_details):
-        args = []
-        for input in input_details:
-            print(str(input["shape"]), input["dtype"].__name__)
-            args.append(np.zeros(shape=input["shape"], dtype=input["dtype"]))
-        return args
+    # Converts the frontend specific tensors into np array.
+    def convert_to_numpy(self, array_tuple: tuple):
+        if self.frontend in ["torch", "pytorch"]:
+            return [x.detach().numpy() for x in array_tuple]
+        if self.frontend in ["tf", "tensorflow"]:
+            return [x.numpy() for x in array_tuple]

-    def get_model_details(self):
-        if self.model_type == "tflite":
-            print("Get tflite input output details")
-            self.input_details = self.tflite_interpreter.get_input_details()
-            self.output_details = self.tflite_interpreter.get_output_details()
-            return self.input_details, self.output_details
+    # Saves `function_name.npy`, `inputs.npz`, `golden_out.npz` and `model_name.mlir` in the directory `dir`.
+    def save_data(
+        self, dir, model_name, mlir_data, func_name, inputs, outputs
+    ):
+        import numpy as np

-    def setup_inputs(self, inputs):
-        print("Setting up inputs")
-        self.inputs = inputs
+        inputs_name = "inputs.npz"
+        outputs_name = "golden_out.npz"
+        func_file_name = "function_name"
+        model_name_mlir = model_name + "_" + self.frontend + ".mlir"
+        np.savez(os.path.join(dir, inputs_name), *inputs)
+        np.savez(os.path.join(dir, outputs_name), *outputs)
+        np.save(os.path.join(dir, func_file_name), np.array(func_name))

-    def compile(self, inputs=None):
-        if inputs is not None:
-            self.setup_inputs(inputs)
-        # preprocess model_path to get model_type and Model Source Hub
-        print("Shark Importer Intialize SharkInference and Do Compile")
-        if self.model_source_hub == "tfhub":
-            # compile and run tfhub tflite
-            print("Inference tfhub model")
-            self.shark_module = SharkInference(self.tflite_file,
-                                               self.inputs,
-                                               device=self.device,
-                                               dynamic=self.dynamic,
-                                               jit_trace=self.jit_trace)
-            self.shark_module.set_frontend("tflite")
-            self.shark_module.compile()
-        elif self.model_source_hub == "huggingface":
-            print("Inference", self.model_source_hub, " not implemented yet")
-        elif self.model_source_hub == "jaxhub":
-            print("Inference", self.model_source_hub, " not implemented yet")
+        mlir_str = mlir_data
+        if self.frontend == "torch":
+            mlir_str = mlir_data.operation.get_asm()
+        elif self.frontend == "tf":
+            mlir_str = mlir_data.decode("utf-8")
+        elif self.frontend == "tflite":
+            mlir_str = mlir_data.decode("utf-8")
+        with open(os.path.join(dir, model_name_mlir), "w") as mlir_file:
+            mlir_file.write(mlir_str)

-    def forward(self, inputs=None):
-        if inputs is not None:
-            self.setup_inputs(inputs)
-        # preprocess model_path to get model_type and Model Source Hub
-        print("Shark Importer forward Model")
-        if self.model_source_hub == "tfhub":
-            shark_results = self.shark_module.forward(self.inputs)
-            # Fix type information for unsigned cases.
-            # for test compare result
-            shark_results = list(shark_results)
-            for i in range(len(self.output_details)):
-                dtype = self.output_details[i]["dtype"]
-                shark_results[i] = shark_results[i].astype(dtype)
-            return shark_results
-        elif self.model_source_hub == "huggingface":
-            print("Inference", self.model_source_hub, " not implemented yet")
-        elif self.model_source_hub == "jaxhub":
-            print("Inference", self.model_source_hub, " not implemented yet")
+        return
+
+    def import_debug(
+        self,
+        is_dynamic=False,
+        tracing_required=False,
+        func_name="forward",
+        dir=tempfile.gettempdir(),
+        model_name="model",
+    ):
+        if self.inputs == None:
+            print(
+                f"There is no input provided: {self.inputs}, please provide inputs or simply run import_mlir."
+            )
+            sys.exit(1)
+
+        imported_mlir = self.import_mlir(
+            is_dynamic, tracing_required, func_name
+        )
+        # TODO: Make sure that any generic function name is accepted. Currently takes in the default function names.
+        # TODO: Check for multiple outputs.
+        if self.frontend in ["torch", "pytorch"]:
+            import torch
+
+            golden_out = self.module(*self.inputs)
+            if torch.is_tensor(golden_out):
+                golden_out = tuple(
+                    golden_out.detach().numpy(),
+                )
+            else:
+                golden_out = self.convert_to_numpy(golden_out)
+            # Save the artifacts in the directory dir.
+            self.save_data(
+                dir,
+                model_name,
+                imported_mlir[0],
+                imported_mlir[1],
+                self.inputs,
+                golden_out,
+            )
+            return (
+                imported_mlir,
+                self.convert_to_numpy(self.inputs),
+                golden_out,
+            )
+        if self.frontend in ["tf", "tensorflow"]:
+            import tensorflow as tf
+
+            golden_out = self.module.forward(*self.inputs)
+            if tf.is_tensor(golden_out):
+                golden_out = tuple(
+                    golden_out.numpy(),
+                )
+            elif golden_out is tuple:
+                golden_out = self.convert_to_numpy(golden_out)
+            else:
+                # from transformers import TFSequenceClassifierOutput
+                golden_out = golden_out.logits
+            # Save the artifacts in the directory dir.
+            self.save_data(
+                dir,
+                model_name,
+                imported_mlir[0],
+                imported_mlir[1],
+                self.inputs,
+                golden_out,
+            )
+            return (
+                imported_mlir,
+                self.convert_to_numpy(self.inputs),
+                golden_out,
+            )
+        if self.frontend in ["tflite", "tf-lite"]:
+            # TODO(Chi): Validate it for tflite models.
+            golden_out = self.module.invoke_tflite(self.inputs)
+            self.save_data(
+                dir,
+                model_name,
+                imported_mlir[0],
+                imported_mlir[1],
+                self.inputs,
+                golden_out,
+            )
+            return (
+                imported_mlir,
+                self.inputs,
+                golden_out,
+            )
--- a/shark/shark_inference.py
+++ b/shark/shark_inference.py
@@ -9,107 +9,129 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from shark.torch_mlir_utils import get_torch_mlir_module, run_on_refbackend
-import os
-from shark.parser import shark_args
-from shark.shark_runner import SharkRunner, SharkBenchmarkRunner
-import time
-import sys
+from shark.shark_runner import SharkRunner
+import numpy as np


-# Prints to stderr.
-def print_err(*a):
-    print(*a, file=sys.stderr)
+dtype_to_np_dtype = {
+    "f32": np.float32,
+    "f64": np.float64,
+    "i32": np.int32,
+    "i64": np.int64,
+    "i1": np.bool_,
+}


 class SharkInference:
-    """Inference API targeting pytorch, tensorflow, linalg, mhlo and tosa frontend."""
+    """
+    Runs prediction or inference on mlir_module.

-    def __init__(self,
-                 model,
-                 input: tuple,
-                 device: str = None,
-                 dynamic: bool = False,
-                 jit_trace: bool = False,
-                 benchmark_mode: bool = False):
-        self.model = model
-        self.input = input
-        self.dynamic = dynamic
-        self.jit_trace = jit_trace
-        self.benchmark_mode = benchmark_mode
+    ...

-        # By default it's torch frontend.
-        self.frontend = "pytorch"
+    Attributes
+    ----------
+    mlir_module : str
+        mlir_module represented in string.
+    function_name : str
+        function to execute in the given mlir_module.
+    device : str
+        device to execute the mlir_module on.
+        currently supports cpu, cuda, vulkan, and metal backends.
+    mlir_dialect: str
+        The dialect in which the given mlir_module is in.
+        Refer to {https://mlir.llvm.org/docs/Dialects/}
+    is_benchmark: bool
+        Whether this SharkInference module should be benchmark-enabled.

-        # Sets the device.
-        self.device = device if device is not None else shark_args.device
+    Methods
+    -------
+    run(inputs=None):
+        Runs the mlir_module with the given inputs, if the inputs are not
+        given it autogenerates the inputs. Also, the inputs should be a
+        numpy array.
+    input_info():
+        Gives the information about the inputs required by the `function_name`.
+        This can be expensive as it does string matching to do so.

-        self.model_config_path = shark_args.model_config_path
+    """
+
+    def __init__(
+        self,
+        mlir_module: str,
+        function_name: str = "forward",
+        device: str = "none",
+        mlir_dialect: str = "linalg",
+        is_benchmark: bool = False,
+    ):
+        self.mlir_module = mlir_module
+        self.function_name = function_name
+        self.device = device
+        self.mlir_dialect = mlir_dialect
+        self.is_benchmark = is_benchmark

        self.shark_runner = None

-    # Sets the frontend i.e `pytorch` or `tensorflow`.
-    def set_frontend(self, frontend: str):
-        if frontend not in [
-                "pytorch", "torch", "tensorflow", "tf", "mhlo", "linalg",
-                "tosa", "tflite"
-        ]:
-            print_err("frontend not supported.")
-        else:
-            self.frontend = frontend
-
    def compile(self):
-        # Inference do not use AOT.
-        from_aot = False
-        if (self.benchmark_mode == True):
-            self.shark_runner = SharkBenchmarkRunner(self.model, self.input,
-                                                     self.dynamic, self.device,
-                                                     self.jit_trace, from_aot,
-                                                     self.frontend)
+
+        if self.is_benchmark == True:
+            from shark.shark_benchmark_runner import SharkBenchmarkRunner
+
+            self.shark_runner = SharkBenchmarkRunner(
+                self.mlir_module,
+                self.function_name,
+                self.device,
+                self.mlir_dialect,
+            )
+
        else:
-            self.shark_runner = SharkRunner(self.model, self.input,
-                                            self.dynamic, self.device,
-                                            self.jit_trace, from_aot,
-                                            self.frontend,
-                                            self.model_config_path)
+            self.shark_runner = SharkRunner(
+                self.mlir_module,
+                self.function_name,
+                self.device,
+                self.mlir_dialect,
+            )

-    # inputs are considered to be np.array.
-    def forward(self, inputs):
-        input_list = inputs
-        # converts the inputs to numpy.
-        if self.frontend in ["pytorch", "torch"]:
-            input_list = [x.detach().numpy() for x in inputs]
-        elif self.frontend in ["tensorflow", "tf"]:
-            input_list = [x.numpy() for x in inputs]
-        return self.shark_runner.forward(input_list, self.frontend)
+    # inputs are considered to be tuple of np.array.
+    def forward(self, inputs: tuple):
+        return self.shark_runner.run(inputs)

-    # Saves the .vmfb module.
-    def save_module(self, dir = None):
-        if dir is None:
-            return self.shark_runner.save_module()
-        return self.shark_runner.save_module(dir)
+    # Captures the static input information from the mlir_module.
+    # TODO(pashu123): Generate the input information for dynamic shapes.
+    def _input_info(self):
+        # func_key to get the line which contains the function.
+        func_key = "func.func @" + self.function_name
+        func_header = None
+        for line in str(self.mlir_module).splitlines():
+            if func_key in line:
+                func_header = line
+                break
+        if func_header is None:
+            print(f"Function: {self.function_name} not found")

-    ######### Benchmark Related Functions #########
-    def benchmark_mode(func):
+        import re

-        def inner(self, *args, **kwargs):
-            assert self.benchmark_mode, "SharkRunner needs to be in benchmark mode to run benchmark methods."
-            return func(self, *args, **kwargs)
+        inputs = re.findall("\(.*?\)", func_header)[0].split(",")
+        shapes = []
+        dtype = []
+        for inp in inputs:
+            shape_dtype = re.findall(r"<[^>]*>", inp)[0].split("x")
+            shape_dtype[0], shape_dtype[-1] = (
+                shape_dtype[0][1:],
+                shape_dtype[-1][:-1],
+            )
+            shapes.append(tuple([int(x) for x in shape_dtype[:-1]]))
+            dtype.append(shape_dtype[-1])

-        return inner
+        return shapes, dtype

-    @benchmark_mode
-    def benchmark_all(self, inputs):
-        self.shark_runner.benchmark_all(inputs)
-
-    @benchmark_mode
-    def benchmark_frontend(self, inputs):
-        self.shark_runner.benchmark_frontend(inputs)
-
-    @benchmark_mode
-    def benchmark_python(self, inputs):
-        self.shark_runner.benchmark_python(inputs)
-
-    @benchmark_mode
-    def benchmark_c(self):
-        self.shark_runner.benchmark_c()
+    # Generates random input to be feed into the graph.
+    def generate_random_inputs(self, low=0, high=1):
+        shapes, dtype = self._input_info()
+        inputs = []
+        for i, j in zip(shapes, dtype):
+            inputs.append(
+                np.random.uniform(low, high, size=i).astype(
+                    dtype_to_np_dtype[j]
+                )
+            )
+        return tuple(inputs)
--- a/shark/shark_runner.py
+++ b/shark/shark_runner.py
@@ -11,195 +11,91 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from iree.compiler import tf as tfc
-import iree.compiler.tflite as ireec_tflite
-from torch.utils._python_dispatch import enable_torch_dispatch_mode
-from torch_mlir.eager_mode import torch_mlir_tensor
-from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
-from torch_mlir_e2e_test.eager_backends.refbackend import EagerModeRefBackend

-from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
-from shark.torch_mlir_utils import get_torch_mlir_module, run_on_refbackend
-from shark.iree_utils import get_results, get_iree_compiled_module, export_iree_module_to_vmfb, export_module_to_mlir_file, build_benchmark_args, run_benchmark_module
-import os
+from shark.iree_utils.compile_utils import (
+    get_iree_compiled_module,
+    get_results,
+    export_iree_module_to_vmfb,
+)
+from shark.iree_utils._common import check_device_drivers, device_driver_info
 from shark.parser import shark_args
-from tqdm import tqdm
-import time
+import os
+import sys
+
+
+# supported dialects by the shark-runtime.
+supported_dialects = {"linalg", "mhlo", "tosa", "tf-lite"}


 class SharkRunner:
-    """Base class for Shark Inference and Shark Runner."""
+    """
+    Base class for SharkInference and SharkTrainer
+    used to execute an mlir_module.
+
+    ...
+
+    Attributes
+    ----------
+    mlir_module : str
+        mlir_module represented in string.
+    function_name : str
+        function to execute in the given mlir_module.
+    device : str
+        device to execute the mlir_module on.
+        currently supports cpu, cuda, vulkan, and metal backends.
+    mlir_dialect: str
+        The dialect in which the given mlir_module is in.
+        Refer to {https://mlir.llvm.org/docs/Dialects/}
+
+    Methods
+    -------
+    run(inputs=None):
+        Runs the mlir_module with the given inputs, if the inputs are not
+        given it autogenerates the inputs. Also, the inputs should be a
+        numpy array.
+    input_info():
+        Gives the information about the inputs required by the `function_name`.
+        This can be expensive as it does string matching to do so.
+    """

    def __init__(
        self,
-        model,
-        input: tuple,
-        dynamic: bool = False,
-        device: str = None,
-        jit_trace: bool = False,
-        from_aot: bool = False,
-        frontend: str = "torch",
-        model_config_path: str = None,
+        mlir_module: str,
+        function_name: str = "forward",
+        device: str = "none",
+        mlir_dialect: str = "linalg",
    ):
-        self.model = model
-        self.frontend_model = model
-        self.from_aot = from_aot
-        self.input = input
-        self.frontend = frontend
-        self.vmfb_file = None
-        func_name = "forward"
-        self.device = device if device is not None else shark_args.device
-        if self.frontend in ["pytorch", "torch"]:
-            # get torch-mlir dialect
-            # self.model = torch.Module
-            # TODO assert
-            self.model = get_torch_mlir_module(self.model, input, dynamic,
-                                               jit_trace, from_aot)
-        elif self.frontend in ["tensorflow", "tf"]:
-            # get mhlo dialect
-            # self.model = tf.Module
-            # TODO assert
-            self.model = tfc.compile_module(self.model,
-                                            exported_names=[func_name],
-                                            import_only=True)
-        elif self.frontend in ["tflite"]:
-            print("Setting up for IREE compiler tflite")
-            # get tosa dialect
-            # self.model = model.tflite
-            # TODO assert
-            self.model = ireec_tflite.compile_file(self.model,
-                                                   input_type="tosa",
-                                                   import_only=True)
-            func_name = "main"
+        self.mlir_module = mlir_module
+        self.function_name = function_name
+        self.device = shark_args.device if device == "none" else device
+        self.mlir_dialect = mlir_dialect

-        # TODO: We can capture the .vmfb module here and later use it for saving
-        # rather than recompiling it again, if used for saving.
+        if check_device_drivers(self.device):
+            device_driver_info(self.device)
+            sys.exit(1)
+
+        # Compile the module to get the .vmfb.
        (
            self.iree_compilation_module,
            self.iree_config,
-        ) = get_iree_compiled_module(self.model,
-                                     self.device,
-                                     self.frontend,
-                                     func_name=func_name,
-                                     model_config_path=model_config_path)
+        ) = get_iree_compiled_module(
+            self.mlir_module,
+            self.device,
+            self.mlir_dialect,
+            func_name=self.function_name,
+        )

-        # Debugging Options:
-        if shark_args.save_mlir:
-            export_module_to_mlir_file(self.model, self.frontend,
-                                       shark_args.repro_dir)
-        if shark_args.save_vmfb:
-            self.vmfb_file = self.save_module(shark_args.repro_dir)
-
-    # All the timings and benchmarking can be done here.
-    def forward(self, input, frontend):
-        return get_results(self.iree_compilation_module, input,
-                           self.iree_config, frontend)
+    def run(self, inputs: tuple):
+        return get_results(
+            self.iree_compilation_module,
+            inputs,
+            self.iree_config,
+            self.mlir_dialect,
+        )

    # TODO: Instead of passing directory and having names decided by the module
    # , user may want to save the module with manual names.
    def save_module(self, dir=os.getcwd()):
-        return export_iree_module_to_vmfb(self.model, self.device, dir,
-                                          self.frontend)
-
-    # TODO: Load a module and directly use it, we will need to set the frontend
-    # in this case.
-    def load_module(self, name):
-        pass
-
-
-class SharkEagerMode:
-
-    def __init__(self, device="cpu"):
-        if device == "refbackend":
-            torch_mlir_tensor.backend = EagerModeRefBackend()
-        else:
-            torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(
-                device)
-        self.guard = enable_torch_dispatch_mode(TorchMLIRTensor)
-        self.guard.__enter__()
-
-    def __del__(self):
-        self.guard.__exit__(None, None, None)
-
-
-class SharkBenchmarkRunner(SharkRunner):
-    # SharkRunner derived class with Benchmarking capabilities.
-    def __init__(
-        self,
-        model,
-        input: tuple,
-        dynamic: bool = False,
-        device: str = None,
-        jit_trace: bool = False,
-        from_aot: bool = False,
-        frontend: str = "torch",
-    ):
-        SharkRunner.__init__(self, model, input, dynamic, device, jit_trace,
-                             from_aot, frontend)
-        if (self.vmfb_file == None):
-            self.vmfb_file = export_iree_module_to_vmfb(self.model, device,
-                                                        shark_args.repro_dir,
-                                                        frontend)
-        self.benchmark_cl = build_benchmark_args(self.vmfb_file, device, input,
-                                                 frontend, from_aot)
-
-    def benchmark_frontend(self, inputs):
-        if self.frontend in ["pytorch", "torch"]:
-            self.benchmark_torch(inputs)
-        elif self.frontend in ["tensorflow", "tf"]:
-            self.benchmark_tf(inputs)
-
-    def benchmark_torch(self, inputs):
-        inputs = self.input if self.from_aot else inputs
-        inputs = inputs[0]
-        for i in range(shark_args.num_warmup_iterations):
-            self.frontend_model.forward(inputs)
-
-        begin = time.time()
-        for i in range(shark_args.num_iterations):
-            out = self.frontend_model.forward(inputs)
-            if i == shark_args.num_iterations - 1:
-                end = time.time()
-                break
-        print(
-            f"Torch benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
+        return export_iree_module_to_vmfb(
+            self.model, self.device, dir, self.mlir_dialect
        )
-
-    def benchmark_tf(self, inputs):
-        for i in range(shark_args.num_warmup_iterations):
-            self.frontend_model.forward(*inputs)
-
-        begin = time.time()
-        for i in range(shark_args.num_iterations):
-            out = self.frontend_model.forward(*inputs)
-            if i == shark_args.num_iterations - 1:
-                end = time.time()
-                break
-        print(
-            f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
-        return
-
-    def benchmark_c(self):
-        result = run_benchmark_module(self.benchmark_cl)
-        print(f"Shark-{self.frontend} C-benchmark:{result} iter/second")
-
-    def benchmark_python(self, inputs):
-        inputs = self.input if self.from_aot else inputs
-        input_list = [x for x in inputs]
-        for i in range(shark_args.num_warmup_iterations):
-            self.forward(input_list, self.frontend)
-
-        begin = time.time()
-        for i in range(shark_args.num_iterations):
-            out = self.forward(input_list, self.frontend)
-            if i == shark_args.num_iterations - 1:
-                end = time.time()
-        print(
-            f"Shark-{self.frontend} Python-benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
-
-    def benchmark_all(self, inputs):
-        self.benchmark_frontend(inputs)
-        self.benchmark_python(inputs)
-        self.benchmark_c()
--- a/shark/shark_trainer.py
+++ b/shark/shark_trainer.py
@@ -12,15 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from shark.torch_mlir_utils import get_torch_mlir_module, run_on_refbackend
-from shark.iree_utils import get_results, get_iree_compiled_module, export_iree_module_to_vmfb
-import os
 from shark.parser import shark_args
 from shark.shark_runner import SharkRunner
 from shark.backward_makefx import MakeFxModule
 import numpy as np
 from tqdm import tqdm
-import time
 import sys


@@ -58,7 +54,13 @@ class SharkTrainer:
    # Sets the frontend i.e `pytorch` or `tensorflow`.
    def set_frontend(self, frontend: str):
        if frontend not in [
-                "pytorch", "torch", "tensorflow", "tf", "mhlo", "linalg", "tosa"
+            "pytorch",
+            "torch",
+            "tensorflow",
+            "tf",
+            "mhlo",
+            "linalg",
+            "tosa",
        ]:
            print_err("frontend not supported.")
        else:
@@ -67,22 +69,32 @@ class SharkTrainer:
    # Training function is needed in the case of torch_fn.
    def compile(self, training_fn=None):
        if self.frontend in ["torch", "pytorch"]:
-            aot_module = MakeFxModule(self.model,
-                                      tuple(self.input),
-                                      custom_inference_fn=training_fn)
+            aot_module = MakeFxModule(
+                self.model, tuple(self.input), custom_inference_fn=training_fn
+            )
            aot_module.generate_graph()
            # Returns the backward graph.
            training_graph = aot_module.training_graph
            weights = self.get_torch_params()
-            self.shark_runner = SharkRunner(training_graph,
-                                            weights + self.input, self.dynamic,
-                                            self.device, self.jit_trace,
-                                            self.from_aot, self.frontend)
+            self.shark_runner = SharkRunner(
+                training_graph,
+                weights + self.input,
+                self.dynamic,
+                self.device,
+                self.jit_trace,
+                self.from_aot,
+                self.frontend,
+            )
        elif self.frontend in ["tensorflow", "tf", "mhlo"]:
-            self.shark_runner = SharkRunner(self.model, self.input,
-                                            self.dynamic, self.device,
-                                            self.jit_trace, self.from_aot,
-                                            self.frontend)
+            self.shark_runner = SharkRunner(
+                self.model,
+                self.input,
+                self.dynamic,
+                self.device,
+                self.jit_trace,
+                self.from_aot,
+                self.frontend,
+            )
        else:
            print_err("Unknown frontend")
            return
@@ -100,8 +112,9 @@ class SharkTrainer:
        params = [x.numpy() for x in params]
        print(f"Training started for {num_iters} iterations:")
        for i in tqdm(range(num_iters)):
-            params = self.shark_runner.forward(params + self.input,
-                                               self.frontend)
+            params = self.shark_runner.forward(
+                params + self.input, self.frontend
+            )

        return params

@@ -111,15 +124,15 @@ class SharkTrainer:
    def _train_tf(self, num_iters):
        input_list = []
        for x in self.input:
-            if (isinstance(x, list)):
+            if isinstance(x, list):
                nested_list = []
                for val in x:
-                    if (isinstance(val, np.ndarray)):
+                    if isinstance(val, np.ndarray):
                        nested_list.append(val)
                    else:
                        nested_list.append(val.numpy())
                input_list.append(nested_list)
-            elif (isinstance(x, np.ndarray)):
+            elif isinstance(x, np.ndarray):
                input_list.append(x)
            else:
                input_list.append(x.numpy())
--- a/shark/tests/test_shark_importer.py
+++ b/shark/tests/test_shark_importer.py
@@ -2,51 +2,143 @@
 import numpy as np
 from shark.shark_importer import SharkImporter
 import pytest
+from shark.parser import shark_args
+from shark.shark_inference import SharkInference
+from shark.tflite_utils import TFLitePreprocessor
+import sys

-model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
+# model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"


 # Inputs modified to be useful albert inputs.
 def generate_inputs(input_details):
    for input in input_details:
-        print("\t%s, %s", str(input["shape"]), input["dtype"].__name__)
+        print(str(input["shape"]), input["dtype"].__name__)

    args = []
    args.append(
-        np.random.randint(low=0,
-                          high=256,
-                          size=input_details[0]["shape"],
-                          dtype=input_details[0]["dtype"]))
+        np.random.randint(
+            low=0,
+            high=256,
+            size=input_details[0]["shape"],
+            dtype=input_details[0]["dtype"],
+        )
+    )
    args.append(
-        np.ones(shape=input_details[1]["shape"],
-                dtype=input_details[1]["dtype"]))
+        np.ones(
+            shape=input_details[1]["shape"], dtype=input_details[1]["dtype"]
+        )
+    )
    args.append(
-        np.zeros(shape=input_details[2]["shape"],
-                 dtype=input_details[2]["dtype"]))
+        np.zeros(
+            shape=input_details[2]["shape"], dtype=input_details[2]["dtype"]
+        )
+    )
    return args


+def compare_results(mlir_results, tflite_results, details):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(details)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class AlbertTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+        tflite_preprocessor = TFLitePreprocessor(model_name="albert_lite_base")
+
+        raw_model_file_path = tflite_preprocessor.get_raw_model_file()
+        inputs = tflite_preprocessor.get_inputs()
+        tflite_interpreter = tflite_preprocessor.get_interpreter()
+
+        my_shark_importer = SharkImporter(
+            module=tflite_interpreter,
+            inputs=inputs,
+            frontend="tflite",
+            raw_model_file=raw_model_file_path,
+        )
+        mlir_model, func_name = my_shark_importer.import_mlir()
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        ## post process results for compare
+        input_details, output_details = tflite_preprocessor.get_model_details()
+        mlir_results = list(mlir_results)
+        for i in range(len(output_details)):
+            dtype = output_details[i]["dtype"]
+            mlir_results[i] = mlir_results[i].astype(dtype)
+        tflite_results = tflite_preprocessor.get_golden_output()
+        compare_results(mlir_results, tflite_results, output_details)
+
+        # Case2: Use manually set inputs
+        input_details, output_details = tflite_preprocessor.get_model_details()
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        ## post process results for compare
+        tflite_results = tflite_preprocessor.get_golden_output()
+        compare_results(mlir_results, tflite_results, output_details)
+        # print(mlir_results)
+
+
 # A specific case can be run by commenting different cases. Runs all the test
 # across cpu, gpu and vulkan according to available drivers.
 pytest_param = pytest.mark.parametrize(
-    ('dynamic', 'device'),
+    ("dynamic", "device"),
    [
-        pytest.param(False, 'cpu'),
+        pytest.param(False, "cpu"),
        # TODO: Language models are failing for dynamic case..
-        pytest.param(True, 'cpu', marks=pytest.mark.skip),
-    ])
+        pytest.param(True, "cpu", marks=pytest.mark.skip),
+    ],
+)


@pytest_param
+@pytest.mark.xfail(
+    sys.platform == "darwin", reason="known macos tflite install issue"
+)
 def test_albert(dynamic, device):
-    my_shark_importer = SharkImporter(model_path=model_path,
-                                      model_type="tflite",
-                                      model_source_hub="tfhub",
-                                      device=device,
-                                      dynamic=dynamic,
-                                      jit_trace=True)
-    input_details, output_details = my_shark_importer.get_model_details()
-    inputs = generate_inputs(input_details)  # device_inputs
-    my_shark_importer.compile(inputs)
-    shark_results = my_shark_importer.forward(inputs)
-    # print(shark_results)
+    module_tester = AlbertTfliteModuleTester(dynamic=dynamic, device=device)
+    module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    test_albert(False, "cpu")
--- a/shark/tflite_utils.py
+++ b/shark/tflite_utils.py
@@ -0,0 +1,208 @@
+import tensorflow as tf
+import numpy as np
+import os
+import csv
+import urllib.request
+
+
+class TFLiteModelUtil:
+    def __init__(self, raw_model_file):
+        self.raw_model_file = str(raw_model_file)
+        self.tflite_interpreter = None
+        self.input_details = None
+        self.output_details = None
+        self.inputs = []
+
+    def setup_tflite_interpreter(self):
+        self.tflite_interpreter = tf.lite.Interpreter(
+            model_path=self.raw_model_file
+        )
+        self.tflite_interpreter.allocate_tensors()
+        # default input initialization
+        return self.get_model_details()
+
+    def get_model_details(self):
+        print("Get tflite input output details")
+        self.input_details = self.tflite_interpreter.get_input_details()
+        self.output_details = self.tflite_interpreter.get_output_details()
+        return self.input_details, self.output_details
+
+    def invoke_tflite(self, inputs):
+        self.inputs = inputs
+        print("invoke_tflite")
+        for i, input in enumerate(self.inputs):
+            self.tflite_interpreter.set_tensor(
+                self.input_details[i]["index"], input
+            )
+        self.tflite_interpreter.invoke()
+
+        # post process tflite_result for compare with mlir_result,
+        # for tflite the output is a list of numpy.tensor
+        tflite_results = []
+        for output_detail in self.output_details:
+            tflite_results.append(
+                np.array(
+                    self.tflite_interpreter.get_tensor(output_detail["index"])
+                )
+            )
+
+        for i in range(len(self.output_details)):
+            # print("output_details ", i, "shape", self.output_details[i]["shape"].__name__,
+            #       ", dtype: ", self.output_details[i]["dtype"].__name__)
+            out_dtype = self.output_details[i]["dtype"]
+            tflite_results[i] = tflite_results[i].astype(out_dtype)
+        return tflite_results
+
+
+class TFLitePreprocessor:
+    def __init__(
+        self,
+        model_name,
+        input_details=None,
+        output_details=None,
+        model_path=None,
+    ):
+        self.model_name = model_name
+        self.input_details = (
+            input_details  # used for tflite, optional for tf/pytorch
+        )
+        self.output_details = (
+            output_details  # used for tflite, optional for tf/pytorch
+        )
+        self.inputs = []
+        self.model_path = model_path  # url to download the model
+        self.raw_model_file = (
+            None  # local address for raw tf/tflite/pytorch model
+        )
+        self.mlir_file = (
+            None  # local address for .mlir file of tf/tflite/pytorch model
+        )
+        self.mlir_model = None  # read of .mlir file
+        self.output_tensor = (
+            None  # the raw tf/pytorch/tflite_output_tensor, not mlir_tensor
+        )
+        self.interpreter = (
+            None  # could be tflite/tf/torch_interpreter in utils
+        )
+        self.input_file = None
+        self.output_file = None
+
+        # create tmp model file directory
+        if self.model_path is None and self.model_name is None:
+            print(
+                "Error. No model_path, No model name,Please input either one."
+            )
+            return
+
+        print("Setting up for TMP_WORK_DIR")
+        self.workdir = os.path.join(
+            os.path.dirname(__file__), "./../gen_shark_tank"
+        )
+        os.makedirs(self.workdir, exist_ok=True)
+        print(f"TMP_WORK_DIR = {self.workdir}")
+
+        # compile and run tfhub tflite
+        load_model_success = self.load_tflite_model()
+        if not load_model_success:
+            print("Error, load tflite model fail")
+            return
+
+        if (self.input_details is None) or (self.output_details is None):
+            # print("Setting up tflite interpreter to get model input details")
+            self.setup_interpreter()
+
+            inputs = self.generate_inputs(self.input_details)  # device_inputs
+        self.setup_inputs(inputs)
+
+    def load_tflite_model(self):
+        # use model name get dir.
+        tflite_model_name_dir = os.path.join(
+            self.workdir, str(self.model_name)
+        )
+
+        os.makedirs(tflite_model_name_dir, exist_ok=True)
+        print(f"TMP_TFLITE_MODELNAME_DIR = {tflite_model_name_dir}")
+
+        self.raw_model_file = "/".join(
+            [tflite_model_name_dir, str(self.model_name) + "_tflite.tflite"]
+        )
+        self.mlir_file = "/".join(
+            [tflite_model_name_dir, str(self.model_name) + "_tflite.mlir"]
+        )
+        self.input_file = "/".join([tflite_model_name_dir, "inputs"])
+        self.output_file = "/".join([tflite_model_name_dir, "golden_out"])
+        # np.save("/".join([tflite_model_name_dir, "function_name"]), np.array("main"))
+
+        if os.path.exists(self.raw_model_file):
+            print(
+                "Local address for .tflite model file Exists: ",
+                self.raw_model_file,
+            )
+        else:
+            print("No local tflite file, Download tflite model")
+            if self.model_path is None:
+                # get model file from tflite_model_list.csv or download from gs://bucket
+                print("No model_path, get from tflite_model_list.csv")
+                tflite_model_list_path = os.path.join(
+                    os.path.dirname(__file__),
+                    "../tank/tflite/tflite_model_list.csv",
+                )
+                tflite_model_list = csv.reader(open(tflite_model_list_path))
+                for row in tflite_model_list:
+                    if str(row[0]) == str(self.model_name):
+                        self.model_path = row[1]
+                        print("tflite_model_name", str(row[0]))
+                        print("tflite_model_link", self.model_path)
+            if self.model_path is None:
+                print("Error, No model path find in tflite_model_list.csv")
+                return False
+            urllib.request.urlretrieve(self.model_path, self.raw_model_file)
+        return True
+
+    def setup_interpreter(self):
+        self.interpreter = TFLiteModelUtil(self.raw_model_file)
+        (
+            self.input_details,
+            self.output_details,
+        ) = self.interpreter.setup_tflite_interpreter()
+
+    def generate_inputs(self, input_details):
+        self.inputs = []
+        for tmp_input in input_details:
+            print(
+                "input_details shape:",
+                str(tmp_input["shape"]),
+                " type:",
+                tmp_input["dtype"].__name__,
+            )
+            self.inputs.append(
+                np.ones(shape=tmp_input["shape"], dtype=tmp_input["dtype"])
+            )
+        return self.inputs
+
+    def setup_inputs(self, inputs):
+        # print("Setting up inputs")
+        self.inputs = inputs
+
+    def get_mlir_model(self):
+        return self.mlir_model
+
+    def get_mlir_file(self):
+        return self.mlir_file
+
+    def get_inputs(self):
+        return self.inputs
+
+    def get_golden_output(self):
+        self.output_tensor = self.interpreter.invoke_tflite(self.inputs)
+        np.savez(self.output_file, *self.output_tensor)
+        return self.output_tensor
+
+    def get_model_details(self):
+        return self.input_details, self.output_details
+
+    def get_raw_model_file(self):
+        return self.raw_model_file
+
+    def get_interpreter(self):
+        return self.interpreter
--- a/shark/torch_mlir_utils.py
+++ b/shark/torch_mlir_utils.py
@@ -15,23 +15,23 @@
 import torch
 import io
 import pickle
-import sys
-import os

-from io import StringIO
 from torch_mlir.dialects.torch.importer.jit_ir import (
    ClassAnnotator,
    ModuleBuilder,
 )
 from torch_mlir_e2e_test.torchscript.serialization import (
-    extract_serializable_annotations, apply_serializable_annotations,
-    SerializableTest)
+    extract_serializable_annotations,
+    apply_serializable_annotations,
+    SerializableTest,
+)

 from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend

 from torch_mlir.passmanager import PassManager
 from torch_mlir_e2e_test.torchscript.annotations import annotate_args, export
 from torch_mlir.ir import StringAttr
+import torch_mlir


 def get_module_name_for_asm_dump(module):
@@ -41,7 +41,8 @@ def get_module_name_for_asm_dump(module):
    if not "torch.debug_module_name" in module.operation.attributes:
        return "UnnammedModule"
    return StringAttr(
-        module.operation.attributes["torch.debug_module_name"]).value
+        module.operation.attributes["torch.debug_module_name"]
+    ).value


 def get_input_annotations(inputs: tuple, dynamic: bool) -> list:
@@ -68,8 +69,9 @@ def run_on_refbackend(torch_module, inputs):
    return jit_module.forward(np_inputs[0])


-def shark_jit_trace(module, input: tuple, dynamic: bool,
-                    tracing_required: bool):
+def shark_jit_trace(
+    module, input: tuple, dynamic: bool, tracing_required: bool
+):
    """TODO: Include necessary documentation."""

    if not tracing_required:
@@ -79,21 +81,26 @@ def shark_jit_trace(module, input: tuple, dynamic: bool,
    actual_script = traced_module._actual_script_module
    export(actual_script.forward)
    annotate_args_decorator = annotate_args(
-        get_input_annotations(input, dynamic))
+        get_input_annotations(input, dynamic)
+    )
    annotate_args_decorator(actual_script.forward)
    module = torch.jit.script(actual_script)

    # TODO: remove saved annotations.pickle
-    torchscript_module_bytes = module.save_to_buffer({
-        "annotations.pkl":
-            pickle.dumps(extract_serializable_annotations(module))
-    })
-    serializable_test = SerializableTest(unique_name="",
-                                         program=torchscript_module_bytes,
-                                         trace=None)
+    torchscript_module_bytes = module.save_to_buffer(
+        {
+            "annotations.pkl": pickle.dumps(
+                extract_serializable_annotations(module)
+            )
+        }
+    )
+    serializable_test = SerializableTest(
+        unique_name="", program=torchscript_module_bytes, trace=None
+    )
    _extra_files = {"annotations.pkl": ""}
-    module = torch.jit.load(io.BytesIO(serializable_test.program),
-                            _extra_files=_extra_files)
+    module = torch.jit.load(
+        io.BytesIO(serializable_test.program), _extra_files=_extra_files
+    )
    # Load the pickled annotations.
    annotations = pickle.loads(_extra_files["annotations.pkl"])
    apply_serializable_annotations(module, annotations)
@@ -104,14 +111,26 @@ def get_torch_mlir_module(
    module,
    input: tuple,
    dynamic: bool,
-    tracing_required: bool,
-    from_aot: bool = False,
+    jit_trace: bool,
+    from_torchscript: bool = False,
 ):
    """TODO: Include necessary documentation."""

+    # Static modules compiles well with the torch_mlir.compile API.
+    # We will always jit_trace = True with the API since we always
+    # want to propagate static shapes.
+    if not dynamic:
+        module = torch_mlir.compile(
+            module,
+            input,
+            output_type=torch_mlir.OutputType.LINALG_ON_TENSORS,
+            use_tracing=jit_trace,
+        )
+        return module
+
    # Tracing is not required from the aot_module.
-    if not from_aot:
-        module = shark_jit_trace(module, input, dynamic, tracing_required)
+    if not from_torchscript:
+        module = shark_jit_trace(module, input, dynamic, jit_trace)

    mb = ModuleBuilder()
    class_annotator = ClassAnnotator()
--- a/tank/MiniLM-L12-H384-uncased_torch/MiniLM-L12-H384-uncased_torch_test.py
+++ b/tank/MiniLM-L12-H384-uncased_torch/MiniLM-L12-H384-uncased_torch_test.py
@@ -0,0 +1,104 @@
+from shark.shark_inference import SharkInference
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from tank.model_utils import compare_tensors
+from shark.shark_downloader import download_torch_model
+
+import unittest
+import numpy as np
+import pytest
+
+
+class MiniLMModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model_mlir, func_name, input, act_out = download_torch_model(
+            "microsoft/MiniLM-L12-H384-uncased", dynamic
+        )
+
+        # from shark.shark_importer import SharkImporter
+        # mlir_importer = SharkImporter(
+        #    model,
+        #    (input,),
+        #    frontend="torch",
+        # )
+        # minilm_mlir, func_name = mlir_importer.import_mlir(
+        #    is_dynamic=dynamic, tracing_required=True
+        # )
+
+        shark_module = SharkInference(
+            model_mlir,
+            func_name,
+            device=device,
+            mlir_dialect="linalg",
+            is_benchmark=self.benchmark,
+        )
+        shark_module.compile()
+        results = shark_module.forward(input)
+        assert True == compare_tensors(act_out, results)
+
+        if self.benchmark == True:
+            shark_module.shark_runner.benchmark_all_csv(
+                (input),
+                "microsoft/MiniLM-L12-H384-uncased",
+                dynamic,
+                device,
+                "torch",
+            )
+
+
+class MiniLMModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = MiniLMModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    def test_module_dynamic_cpu(self):
+        dynamic = True
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_dynamic_gpu(self):
+        dynamic = True
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_dynamic_vulkan(self):
+        dynamic = True
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/albert-base-v2_tf/albert-base-v2_tf_test.py
+++ b/tank/albert-base-v2_tf/albert-base-v2_tf_test.py
@@ -0,0 +1,60 @@
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model
+
+import iree.compiler as ireec
+import unittest
+import pytest
+import numpy as np
+
+
+class AlbertBaseModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model, func_name, inputs, golden_out = download_tf_model(
+            "albert-base-v2"
+        )
+
+        shark_module = SharkInference(
+            model, func_name, device=device, mlir_dialect="mhlo"
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
+
+
+class AlbertBaseModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = AlbertBaseModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/albert-base-v2_torch/albert-base-v2_torch_test.py
+++ b/tank/albert-base-v2_torch/albert-base-v2_torch_test.py
@@ -0,0 +1,104 @@
+from shark.shark_inference import SharkInference
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from tank.model_utils import compare_tensors
+from shark.shark_downloader import download_torch_model
+
+import unittest
+import numpy as np
+import pytest
+
+
+class AlbertModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model_mlir, func_name, input, act_out = download_torch_model(
+            "albert-base-v2", dynamic
+        )
+
+        # from shark.shark_importer import SharkImporter
+        # mlir_importer = SharkImporter(
+        #    model,
+        #    (input,),
+        #    frontend="torch",
+        # )
+        # minilm_mlir, func_name = mlir_importer.import_mlir(
+        #    is_dynamic=dynamic, tracing_required=True
+        # )
+
+        shark_module = SharkInference(
+            model_mlir,
+            func_name,
+            device=device,
+            mlir_dialect="linalg",
+            is_benchmark=self.benchmark,
+        )
+        shark_module.compile()
+        results = shark_module.forward(input)
+        assert True == compare_tensors(act_out, results)
+
+        if self.benchmark == True:
+            shark_module.shark_runner.benchmark_all_csv(
+                (input),
+                "albert-base-v2",
+                dynamic,
+                device,
+                "torch",
+            )
+
+
+class AlbertModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = AlbertModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    def test_module_dynamic_cpu(self):
+        dynamic = True
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_dynamic_gpu(self):
+        dynamic = True
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_dynamic_vulkan(self):
+        dynamic = True
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/albert_lite_base/albert_lite_base_tflite_sharkimporter.txt
+++ b/tank/albert_lite_base/albert_lite_base_tflite_sharkimporter.txt
@@ -0,0 +1,177 @@
+# import numpy as np
+# from shark.shark_importer import SharkImporter
+# from shark.shark_inference import SharkInference
+# import pytest
+# import unittest
+# from shark.parser import shark_args
+# from shark.tflite_utils import TFLitePreprocessor
+#
+#
+# # model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
+# # model_path = model_path
+#
+# # Inputs modified to be useful albert inputs.
+# def generate_inputs(input_details):
+#     for input in input_details:
+#         print(str(input["shape"]), input["dtype"].__name__)
+#         # [  1 384] int32
+#         # [  1 384] int32
+#         # [  1 384] int32
+#
+#     args = []
+#     args.append(
+#         np.random.randint(
+#             low=0,
+#             high=256,
+#             size=input_details[0]["shape"],
+#             dtype=input_details[0]["dtype"],
+#         )
+#     )
+#     args.append(
+#         np.ones(
+#             shape=input_details[1]["shape"], dtype=input_details[1]["dtype"]
+#         )
+#     )
+#     args.append(
+#         np.zeros(
+#             shape=input_details[2]["shape"], dtype=input_details[2]["dtype"]
+#         )
+#     )
+#     return args
+#
+#
+# def compare_results(mlir_results, tflite_results):
+#     print("Compare mlir_results VS tflite_results: ")
+#     assert len(mlir_results) == len(
+#         tflite_results
+#     ), "Number of results do not match"
+#     rtol = 1e-02
+#     atol = 1e-03
+#     print(
+#         "numpy.allclose: ",
+#         np.allclose(mlir_results, tflite_results, rtol, atol),
+#     )
+#     for i in range(len(mlir_results)):
+#         mlir_result = mlir_results[i]
+#         tflite_result = tflite_results[i]
+#         mlir_result = mlir_result.astype(np.single)
+#         tflite_result = tflite_result.astype(np.single)
+#         assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+#         max_error = np.max(np.abs(mlir_result - tflite_result))
+#         print("Max error (%d): %f", i, max_error)
+#
+#
+# class AlbertTfliteModuleTester:
+#     def __init__(
+#         self,
+#         dynamic=False,
+#         device="cpu",
+#         save_mlir=False,
+#         save_vmfb=False,
+#     ):
+#         self.dynamic = dynamic
+#         self.device = device
+#         self.save_mlir = save_mlir
+#         self.save_vmfb = save_vmfb
+#
+#     def create_and_check_module(self):
+#         shark_args.save_mlir = self.save_mlir
+#         shark_args.save_vmfb = self.save_vmfb
+#
+#         # Preprocess to get SharkImporter input args
+#         tflite_preprocessor = TFLitePreprocessor(model_name="albert_lite_base")
+#         raw_model_file_path = tflite_preprocessor.get_raw_model_file()
+#         inputs = tflite_preprocessor.get_inputs()
+#         tflite_interpreter = tflite_preprocessor.get_interpreter()
+#
+#         # Use SharkImporter to get SharkInference input args
+#         my_shark_importer = SharkImporter(
+#             module=tflite_interpreter,
+#             inputs=inputs,
+#             frontend="tflite",
+#             raw_model_file=raw_model_file_path,
+#         )
+#         mlir_model, func_name = my_shark_importer.import_mlir()
+#
+#         # Use SharkInference to get inference result
+#         shark_module = SharkInference(
+#             mlir_module=mlir_model,
+#             function_name=func_name,
+#             device=self.device,
+#             mlir_dialect="tflite",
+#         )
+#
+#         # Case1: Use shark_importer default generate inputs
+#         shark_module.compile()
+#         mlir_results = shark_module.forward(inputs)
+#         ## post process results for compare
+#         # input_details, output_details = tflite_preprocessor.get_model_details()
+#         # mlir_results = list(mlir_results)
+#         # for i in range(len(output_details)):
+#         #     dtype = output_details[i]["dtype"]
+#         #     mlir_results[i] = mlir_results[i].astype(dtype)
+#         tflite_results = tflite_preprocessor.get_golden_output()
+#         compare_results(mlir_results, tflite_results)
+#         # import pdb
+#         # pdb.set_trace()
+#
+#         # Case2: Use manually set inputs
+#         # input_details, output_details = tflite_preprocessor.get_model_details()
+#         input_details = [
+#             {
+#                 "shape": [1, 384],
+#                 "dtype": np.int32,
+#             },
+#             {
+#                 "shape": [1, 384],
+#                 "dtype": np.int32,
+#             },
+#             {
+#                 "shape": [1, 384],
+#                 "dtype": np.int32,
+#             },
+#         ]
+#         inputs = generate_inputs(input_details)  # new inputs
+#
+#         shark_module = SharkInference(
+#             mlir_module=mlir_model,
+#             function_name=func_name,
+#             device=self.device,
+#             mlir_dialect="tflite",
+#         )
+#         shark_module.compile()
+#         mlir_results = shark_module.forward(inputs)
+#         ## post process results for compare
+#         tflite_results = tflite_preprocessor.get_golden_output()
+#         compare_results(mlir_results, tflite_results)
+#         # print(mlir_results)
+#
+#
+# class AlbertTfliteModuleTest(unittest.TestCase):
+#     @pytest.fixture(autouse=True)
+#     def configure(self, pytestconfig):
+#         self.save_mlir = pytestconfig.getoption("save_mlir")
+#         self.save_vmfb = pytestconfig.getoption("save_vmfb")
+#
+#     def setUp(self):
+#         self.module_tester = AlbertTfliteModuleTester(self)
+#         self.module_tester.save_mlir = self.save_mlir
+#
+#     import sys
+#
+#     @pytest.mark.xfail(
+#         sys.platform == "darwin", reason="known macos tflite install issue"
+#     )
+#     def test_module_static_cpu(self):
+#         self.module_tester.dynamic = False
+#         self.module_tester.device = "cpu"
+#         self.module_tester.create_and_check_module()
+
+
+# if __name__ == "__main__":
+# module_tester = AlbertTfliteModuleTester()
+# module_tester.save_mlir = True
+# module_tester.save_vmfb = True
+# module_tester.create_and_check_module()
+
+# unittest.main()
--- a/tank/albert_lite_base/albert_lite_base_tflite_test.py
+++ b/tank/albert_lite_base/albert_lite_base_tflite_test.py
@@ -0,0 +1,118 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+
+
+# model_path = "https://tfhub.dev/tensorflow/lite-model/albert_lite_base/squadv1/1?lite-format=tflite"
+# model_path = model_path
+
+# Inputs modified to be useful albert inputs.
+def generate_inputs(input_details):
+    for input in input_details:
+        print(str(input["shape"]), input["dtype"].__name__)
+    # [  1 384] int32
+    # [  1 384] int32
+    # [  1 384] int32
+
+    args = []
+    args.append(
+        np.random.randint(
+            low=0,
+            high=256,
+            size=input_details[0]["shape"],
+            dtype=input_details[0]["dtype"],
+        )
+    )
+    args.append(
+        np.ones(
+            shape=input_details[1]["shape"], dtype=input_details[1]["dtype"]
+        )
+    )
+    args.append(
+        np.zeros(
+            shape=input_details[2]["shape"], dtype=input_details[2]["dtype"]
+        )
+    )
+    return args
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    rtol = 1e-02
+    atol = 1e-03
+    print(
+        "numpy.allclose: ",
+        np.allclose(mlir_results, tflite_results, rtol, atol),
+    )
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class AlbertTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        (
+            mlir_model,
+            function_name,
+            inputs,
+            tflite_results,
+        ) = download_tflite_model(model_name="albert_lite_base")
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name="main",
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        # print(shark_results)
+        compare_results(mlir_results, tflite_results)
+
+
+class AlbertTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = AlbertTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    unittest.main()
+    # module_tester = AlbertTfliteModuleTester()
+    # module_tester.create_and_check_module()
--- a/tank/alexnet_torch/alexnet_torch_test.py
+++ b/tank/alexnet_torch/alexnet_torch_test.py
@@ -0,0 +1,107 @@
+from shark.shark_inference import SharkInference
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from tank.model_utils import compare_tensors
+from shark.shark_downloader import download_torch_model
+
+import unittest
+import numpy as np
+import pytest
+
+
+class AlexnetModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model_mlir, func_name, input, act_out = download_torch_model(
+            "alexnet", dynamic
+        )
+
+        # from shark.shark_importer import SharkImporter
+        # mlir_importer = SharkImporter(
+        #    model,
+        #    (input,),
+        #    frontend="torch",
+        # )
+        # minilm_mlir, func_name = mlir_importer.import_mlir(
+        #    is_dynamic=dynamic, tracing_required=True
+        # )
+
+        shark_module = SharkInference(
+            model_mlir,
+            func_name,
+            device=device,
+            mlir_dialect="linalg",
+            is_benchmark=self.benchmark,
+        )
+        shark_module.compile()
+        results = shark_module.forward(input)
+        assert True == compare_tensors(act_out, results)
+
+        if self.benchmark == True:
+            shark_module.shark_runner.benchmark_all_csv(
+                (input),
+                "alexnet",
+                dynamic,
+                device,
+                "torch",
+            )
+
+
+class AlexnetModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = AlexnetModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    def test_module_dynamic_cpu(self):
+        dynamic = True
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_dynamic_gpu(self):
+        dynamic = True
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    @pytest.mark.xfail(
+        reason="Issue known, WIP",
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_dynamic_vulkan(self):
+        dynamic = True
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/arbitrary-image-stylization-v1-256/arbitrary-image-stylization-v1-256_tflite_test.py
+++ b/tank/arbitrary-image-stylization-v1-256/arbitrary-image-stylization-v1-256_tflite_test.py
@@ -0,0 +1,97 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+
+
+# model_path = "https://tfhub.dev/google/lite-model/magenta/arbitrary-image-stylization-v1-256/int8/prediction/1?lite-format=tflite"
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class ArbitraryImageStylizationV1TfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        (
+            mlir_model,
+            function_name,
+            inputs,
+            tflite_results,
+        ) = download_tflite_model(
+            model_name="arbitrary-image-stylization-v1-256"
+        )
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name="main",
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        # print(shark_results)
+        compare_results(mlir_results, tflite_results)
+
+
+class ArbitraryImageStylizationV1TfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = ArbitraryImageStylizationV1TfliteModuleTester(
+            self
+        )
+        self.module_tester.save_mlir = self.save_mlir
+
+    import sys
+
+    @pytest.mark.xfail(
+        reason="'tosa.conv2d' op attribute 'quantization_info' failed ",
+    )
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = ArbitraryImageStylizationV1TfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/bert-base-uncased_tf/bert-base-uncased_tf_test.py
+++ b/tank/bert-base-uncased_tf/bert-base-uncased_tf_test.py
@@ -0,0 +1,60 @@
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model
+
+import iree.compiler as ireec
+import unittest
+import pytest
+import numpy as np
+
+
+class BertBaseUncasedModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model, func_name, inputs, golden_out = download_tf_model(
+            "bert-base-uncased"
+        )
+
+        shark_module = SharkInference(
+            model, func_name, device=device, mlir_dialect="mhlo"
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
+
+
+class BertBaseUncasedModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = BertBaseUncasedModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/bert-base-uncased_torch/bert-base-uncased_torch_test.py
+++ b/tank/bert-base-uncased_torch/bert-base-uncased_torch_test.py
@@ -0,0 +1,109 @@
+from shark.shark_inference import SharkInference
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from tank.model_utils import compare_tensors
+from shark.shark_downloader import download_torch_model
+
+import torch
+import unittest
+import numpy as np
+import pytest
+
+
+class BertBaseUncasedModuleTester:
+    def __init__(
+        self,
+        save_mlir=False,
+        save_vmfb=False,
+        benchmark=False,
+    ):
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model_mlir, func_name, input, act_out = download_torch_model(
+            "bert-base-uncased", dynamic
+        )
+
+        # from shark.shark_importer import SharkImporter
+        # mlir_importer = SharkImporter(
+        #    model,
+        #    (input,),
+        #    frontend="torch",
+        # )
+        # minilm_mlir, func_name = mlir_importer.import_mlir(
+        #    is_dynamic=dynamic, tracing_required=True
+        # )
+
+        shark_module = SharkInference(
+            model_mlir,
+            func_name,
+            device=device,
+            mlir_dialect="linalg",
+            is_benchmark=self.benchmark,
+        )
+        shark_module.compile()
+        results = shark_module.forward(input)
+        assert True == compare_tensors(act_out, results)
+
+        if self.benchmark == True:
+            shark_module.shark_runner.benchmark_all_csv(
+                (input),
+                "bert-base-uncased",
+                dynamic,
+                device,
+                "torch",
+            )
+
+
+class BertBaseUncasedModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = BertBaseUncasedModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    def test_module_dynamic_cpu(self):
+        dynamic = True
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_dynamic_gpu(self):
+        dynamic = True
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_dynamic_vulkan(self):
+        dynamic = True
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/birds_V1/birds_V1_tflite_test.py
+++ b/tank/birds_V1/birds_V1_tflite_test.py
@@ -0,0 +1,131 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+import os
+import sys
+import urllib.request
+from PIL import Image
+
+# model_path = "https://tfhub.dev/google/lite-model/aiy/vision/classifier/birds_V1/3?lite-format=tflite"
+
+
+def generate_inputs(input_details):
+    # input_details shape: [  1 224 224   3]  type: uint8
+    exe_basename = os.path.basename(sys.argv[0])
+    workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
+    os.makedirs(workdir, exist_ok=True)
+
+    img_path = "https://github.com/google-coral/test_data/raw/master/bird.bmp"
+    local_path = "/".join([workdir, "bird.bmp"])
+    urllib.request.urlretrieve(img_path, local_path)
+
+    shape = input_details[0]["shape"]
+    im = np.array(Image.open(local_path).resize((shape[1], shape[2])))
+    args = [im.reshape(shape)]
+    return args
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class BirdsV1TfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        (
+            mlir_model,
+            function_name,
+            inputs,
+            tflite_results,
+        ) = download_tflite_model(model_name="birds_V1")
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name="main",
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 224, 224, 3],
+                "dtype": np.uint8,
+            }
+        ]
+        inputs = generate_inputs(input_details)  # device_inputs
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name="main",
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class BirdsV1TfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = BirdsV1TfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    import sys
+
+    @pytest.mark.xfail(
+        reason="'tosa.conv2d' op attribute 'quantization_info' failed ",
+    )
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = BirdsV1TfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/camembert-base_tf/camembert-base_tf_test.py
+++ b/tank/camembert-base_tf/camembert-base_tf_test.py
@@ -0,0 +1,60 @@
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model
+
+import iree.compiler as ireec
+import unittest
+import pytest
+import numpy as np
+
+
+class CamemBertModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model, func_name, inputs, golden_out = download_tf_model(
+            "camembert-base"
+        )
+
+        shark_module = SharkInference(
+            model, func_name, device=device, mlir_dialect="mhlo"
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
+
+
+class CamemBertModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = CamemBertModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/cartoongan/cartoongan_tflite_test.py
+++ b/tank/cartoongan/cartoongan_tflite_test.py
@@ -0,0 +1,88 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+
+
+# model_path = "https://tfhub.dev/sayakpaul/lite-model/cartoongan/dr/1?lite-format=tflite"
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class CartoonganTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        (
+            mlir_model,
+            function_name,
+            inputs,
+            tflite_results,
+        ) = download_tflite_model(model_name="cartoongan")
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name="main",
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+
+class CartoonganTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = CartoonganTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = CartoonganTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/convbert-base-turkish-cased_tf/convbert-base-turkish-cased_tf_test.py
+++ b/tank/convbert-base-turkish-cased_tf/convbert-base-turkish-cased_tf_test.py
@@ -0,0 +1,63 @@
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model
+
+import iree.compiler as ireec
+import unittest
+import pytest
+import numpy as np
+
+
+class ConvBertModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model, func_name, inputs, golden_out = download_tf_model(
+            "dbmdz/convbert-base-turkish-cased"
+        )
+
+        shark_module = SharkInference(
+            model, func_name, device=device, mlir_dialect="mhlo"
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
+
+
+class ConvBertModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = ConvBertModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    @pytest.mark.xfail(
+        reason="Issue: https://github.com/iree-org/iree/issues/9971",
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/deberta-base_tf/deberta-base_tf_test.py
+++ b/tank/deberta-base_tf/deberta-base_tf_test.py
@@ -0,0 +1,64 @@
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model
+from shark.parser import shark_args
+
+import iree.compiler as ireec
+import unittest
+import pytest
+import numpy as np
+import tempfile
+import os
+
+
+class DebertaBaseModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model, func_name, inputs, golden_out = download_tf_model(
+            "microsoft/deberta-base"
+        )
+
+        shark_module = SharkInference(
+            model, func_name, device=device, mlir_dialect="mhlo"
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
+
+
+class DebertaBaseModuleTest(unittest.TestCase):
+    @pytest.skip(reason="Model can't be imported.", allow_module_level=True)
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = DebertaBaseModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/deeplabv3/deeplabv3_tflite_test.py
+++ b/tank/deeplabv3/deeplabv3_tflite_test.py
@@ -0,0 +1,90 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+
+
+# model_path = "https://tfhub.dev/google/lite-model/aiy/vision/classifier/birds_V1/3?lite-format=tflite"
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class DeepLabV3TfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # preprocess to get SharkImporter input args
+        (
+            mlir_model,
+            function_name,
+            inputs,
+            tflite_results,
+        ) = download_tflite_model(model_name="deeplabv3")
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name="main",
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+
+class DeepLabV3TfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = DeepLabV3TfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = DeepLabV3TfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/densenet/densenet_tflite_test.py
+++ b/tank/densenet/densenet_tflite_test.py
@@ -0,0 +1,90 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+
+
+# model_path = "https://tfhub.dev/tensorflow/lite-model/densenet/1/metadata/1?lite-format=tflite"
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class DensenetTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        (
+            mlir_model,
+            function_name,
+            inputs,
+            tflite_results,
+        ) = download_tflite_model(model_name="densenet")
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name="main",
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+
+class DensenetTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = DensenetTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = DensenetTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/distilbert-base-uncased_tf/distilbert-base-uncased_tf_test.py
+++ b/tank/distilbert-base-uncased_tf/distilbert-base-uncased_tf_test.py
@@ -0,0 +1,60 @@
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model
+
+import iree.compiler as ireec
+import unittest
+import pytest
+import numpy as np
+
+
+class DistilBertModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model, func_name, inputs, golden_out = download_tf_model(
+            "distilbert-base-uncased"
+        )
+
+        shark_module = SharkInference(
+            model, func_name, device=device, mlir_dialect="mhlo"
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
+
+
+class DistilBertModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = DistilBertModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/distilbert-base-uncased_torch/distilbert-base-uncased_torch_test.py
+++ b/tank/distilbert-base-uncased_torch/distilbert-base-uncased_torch_test.py
@@ -0,0 +1,119 @@
+from shark.shark_inference import SharkInference
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from tank.model_utils import compare_tensors
+from shark.parser import shark_args
+from shark.shark_downloader import download_torch_model
+
+import unittest
+import numpy as np
+import pytest
+
+
+class DistilBertModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model_mlir, func_name, input, act_out = download_torch_model(
+            "distilbert-base-uncased", dynamic
+        )
+
+        # from shark.shark_importer import SharkImporter
+        # mlir_importer = SharkImporter(
+        #    model,
+        #    (input,),
+        #    frontend="torch",
+        # )
+        # minilm_mlir, func_name = mlir_importer.import_mlir(
+        #    is_dynamic=dynamic, tracing_required=True
+        # )
+
+        shark_module = SharkInference(
+            model_mlir,
+            func_name,
+            device=device,
+            mlir_dialect="linalg",
+            is_benchmark=self.benchmark,
+        )
+        shark_module.compile()
+        results = shark_module.forward(input)
+        assert True == compare_tensors(act_out, results)
+
+        if self.benchmark == True:
+            shark_module.shark_runner.benchmark_all_csv(
+                (input),
+                "distilbert-base-uncased",
+                dynamic,
+                device,
+                "torch",
+            )
+
+
+class DistilBertModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = DistilBertModuleTester(self)
+        self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
+        self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    @pytest.mark.skip(
+        reason="Fails to lower in torch-mlir. See https://github.com/nod-ai/SHARK/issues/222"
+    )
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skip(
+        reason="Fails to lower in torch-mlir. See https://github.com/nod-ai/SHARK/issues/222"
+    )
+    def test_module_dynamic_cpu(self):
+        dynamic = True
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skip(
+        reason="Fails to lower in torch-mlir. See https://github.com/nod-ai/SHARK/issues/222"
+    )
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skip(reason="DistilBert needs to be uploaded to cloud.")
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_dynamic_gpu(self):
+        dynamic = True
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skip(reason="DistilBert needs to be uploaded to cloud.")
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skip(reason="DistilBert needs to be uploaded to cloud.")
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_dynamic_vulkan(self):
+        dynamic = True
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/efficientnet_224_fp32/efficientnet_224_fp32_tflite_test.py
+++ b/tank/efficientnet_224_fp32/efficientnet_224_fp32_tflite_test.py
@@ -0,0 +1,122 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+import os
+import sys
+from tank.tflite import imagenet_data
+
+
+# # Source https://tfhub.dev/sayannath/lite-model/image-scene/1
+# model_path = "https://storage.googleapis.com/iree-model-artifacts/efficientnet_224_fp32.tflite"
+
+
+def generate_inputs(input_details):
+    exe_basename = os.path.basename(sys.argv[0])
+    workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
+    os.makedirs(workdir, exist_ok=True)
+    inputs = imagenet_data.generate_input(workdir, input_details)
+    # Normalize inputs to [-1, 1].
+    inputs = (inputs.astype("float32") / 127.5) - 1
+    return [inputs]
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class Efficientnet_224_fp32TfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="efficientnet_224_fp32"
+        )
+
+        # Use SharkInference to get inference result
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 224, 224, 3],
+                "dtype": np.float32,
+            }
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class Efficientnet_224_fp32TfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = Efficientnet_224_fp32TfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = Efficientnet_224_fp32TfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/efficientnet_lite0_fp32_2/efficientnet_lite0_fp32_2_tflite_test.py
+++ b/tank/efficientnet_lite0_fp32_2/efficientnet_lite0_fp32_2_tflite_test.py
@@ -0,0 +1,121 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+import os
+import sys
+from tank.tflite import imagenet_data
+
+
+# # Source https://tfhub.dev/tensorflow/lite-model/efficientnet/lite0/fp32/2
+# model_path = "https://storage.googleapis.com/iree-model-artifacts/efficientnet_lite0_fp32_2.tflite"
+
+
+def generate_inputs(input_details):
+    exe_basename = os.path.basename(sys.argv[0])
+    workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
+    os.makedirs(workdir, exist_ok=True)
+    inputs = imagenet_data.generate_input(workdir, input_details)
+    # Normalize inputs to [-1, 1].
+    inputs = (inputs.astype("float32") / 127.5) - 1
+    return [inputs]
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class Efficientnet_lite0_fp32_2TfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="efficientnet_lite0_fp32_2"
+        )
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name="main",
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 224, 224, 3],
+                "dtype": np.float32,
+            }
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name="main",
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class Efficientnet_lite0_fp32_2TfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = Efficientnet_lite0_fp32_2TfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = Efficientnet_lite0_fp32_2TfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/efficientnet_lite0_int8_2/efficientnet_lite0_int8_2_tflite_test.py
+++ b/tank/efficientnet_lite0_int8_2/efficientnet_lite0_int8_2_tflite_test.py
@@ -0,0 +1,125 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+import os
+import sys
+from tank.tflite import imagenet_data
+
+
+# # Source https://tfhub.dev/tensorflow/lite-model/efficientnet/lite0/int8/2
+# model_path = "https://storage.googleapis.com/iree-model-artifacts/efficientnet_lite0_int8_2.tflite"
+
+
+def generate_inputs(input_details):
+    exe_basename = os.path.basename(sys.argv[0])
+    workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
+    os.makedirs(workdir, exist_ok=True)
+
+    return [imagenet_data.generate_input(workdir, input_details)]
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class Efficientnet_lite0_int8_2TfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="efficientnet_lite0_int8_2"
+        )
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name="main",
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 224, 224, 3],
+                "dtype": np.uint8,
+            }
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name="main",
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class Efficientnet_lite0_int8_2TfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = Efficientnet_lite0_int8_2TfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    import sys
+
+    @pytest.mark.xfail(
+        reason="known macos tflite install issue & "
+        "'tosa.conv2d' op attribute 'quantization_info' failed "
+    )
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = Efficientnet_lite0_int8_2TfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/electra-small-discriminator_tf/electra-small-discriminator_tf_test.py
+++ b/tank/electra-small-discriminator_tf/electra-small-discriminator_tf_test.py
@@ -0,0 +1,60 @@
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model
+
+import iree.compiler as ireec
+import unittest
+import pytest
+import numpy as np
+
+
+class ElectraModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model, func_name, inputs, golden_out = download_tf_model(
+            "google/electra-small-discriminator"
+        )
+
+        shark_module = SharkInference(
+            model, func_name, device=device, mlir_dialect="mhlo"
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
+
+
+class ElectraModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = ElectraModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/facebook_convnext-tiny-224_tf/facebook_convnext-tiny-224_tf_test.py
+++ b/tank/facebook_convnext-tiny-224_tf/facebook_convnext-tiny-224_tf_test.py
@@ -0,0 +1,73 @@
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model
+
+import unittest
+import pytest
+import numpy as np
+
+
+class ConvNextTinyModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model, func_name, inputs, golden_out = download_tf_model(
+            "facebook/convnext-tiny-224"
+        )
+
+        shark_module = SharkInference(
+            model, func_name, device=device, mlir_dialect="mhlo"
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+        #  result: array([['logits',
+        #         <IREE DeviceArray: shape=[1, 1000], dtype=<class 'numpy.float32'>>]],
+        #       dtype=object)
+
+        # post process of img output
+        ir_device_array = result[0][1]
+        logits = ir_device_array.astype(ir_device_array.dtype)
+        logits = np.squeeze(logits, axis=0)
+        print("logits: ", logits.shape)
+        print("golden_out: ", golden_out[0].shape)
+        print(np.allclose(golden_out[0], logits, rtol=1e-02, atol=1e-03))
+
+
+class ConvNextTinyModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = ConvNextTinyModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    # dynamic = False
+    # device = "cpu"
+    # module_tester = ConvNextTinyModuleTester()
+    # module_tester.create_and_check_module(dynamic, device)
+    unittest.main()
--- a/tank/funnel-transformer_tf/funnel-transformer_tf_test.py
+++ b/tank/funnel-transformer_tf/funnel-transformer_tf_test.py
@@ -0,0 +1,66 @@
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model
+
+import iree.compiler as ireec
+import unittest
+import pytest
+import numpy as np
+
+
+class FunnelModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model, func_name, inputs, golden_out = download_tf_model(
+            "funnel-transformer/small"
+        )
+
+        shark_module = SharkInference(
+            model, func_name, device=device, mlir_dialect="mhlo"
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
+
+
+class FunnelModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = FunnelModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.xfail(
+        reason="failing in the iree-compiler passes, see https://github.com/nod-ai/SHARK/issues/201"
+    )
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.xfail(
+        reason="failing in the iree-compiler passes, see https://github.com/nod-ai/SHARK/issues/201"
+    )
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/google_vit-base-patch16-224_tf/google_vit-base-patch16-224_tf_test.py
+++ b/tank/google_vit-base-patch16-224_tf/google_vit-base-patch16-224_tf_test.py
@@ -0,0 +1,70 @@
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model
+
+import unittest
+import pytest
+import numpy as np
+
+
+class VitBaseModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model, func_name, inputs, golden_out = download_tf_model(
+            "google/vit-base-patch16-224"
+        )
+
+        shark_module = SharkInference(
+            model, func_name, device=device, mlir_dialect="mhlo"
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+
+        # post process of img output
+        ir_device_array = result[0][1]
+        logits = ir_device_array.astype(ir_device_array.dtype)
+        logits = np.squeeze(logits, axis=0)
+        print("logits: ", logits.shape)
+        print("golden_out: ", golden_out[0].shape)
+        print(np.allclose(golden_out[0], logits, rtol=1e-02, atol=1e-03))
+
+
+class VitBaseModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = VitBaseModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    dynamic = False
+    device = "cpu"
+    module_tester = VitBaseModuleTester()
+    module_tester.create_and_check_module(dynamic, device)
+    # unittest.main()
--- a/tank/gpt2-64/gpt2-64_tflite_test.py
+++ b/tank/gpt2-64/gpt2-64_tflite_test.py
@@ -0,0 +1,119 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+
+
+# model_path = "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-64.tflite"
+
+
+def generate_inputs(input_details):
+    args = []
+    args.append(
+        np.random.randint(
+            low=0,
+            high=256,
+            size=input_details[0]["shape"],
+            dtype=input_details[0]["dtype"],
+        )
+    )
+    return args
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        # mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class GptTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="gpt2-64"
+        )
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name="main",
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 64],
+                "dtype": np.int32,
+            }
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name="main",
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class GptTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = GptTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = GptTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/inception_v4_299_fp32/inception_v4_299_fp32_tflite_test.py
+++ b/tank/inception_v4_299_fp32/inception_v4_299_fp32_tflite_test.py
@@ -0,0 +1,121 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+import os
+import sys
+from tank.tflite import imagenet_data
+
+
+# # Source https://tfhub.dev/tensorflow/lite-model/inception_v4/1/default/1
+# model_path = "https://storage.googleapis.com/iree-model-artifacts/inception_v4_299_fp32.tflite"
+
+
+def generate_inputs(input_details):
+    exe_basename = os.path.basename(sys.argv[0])
+    workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
+    os.makedirs(workdir, exist_ok=True)
+    inputs = imagenet_data.generate_input(workdir, input_details)
+    # Normalize inputs to [-1, 1].
+    inputs = (inputs.astype("float32") / 127.5) - 1
+    return [inputs]
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class Inception_v4_299_fp32TfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="inception_v4_299_fp32"
+        )
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 299, 299, 3],
+                "dtype": np.float32,
+            }
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class Inception_v4_299_fp32TfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = Inception_v4_299_fp32TfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = Inception_v4_299_fp32TfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/inception_v4_299_uint8/inception_v4_299_uint8_tflite_test.py
+++ b/tank/inception_v4_299_uint8/inception_v4_299_uint8_tflite_test.py
@@ -0,0 +1,125 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+import os
+import sys
+from tank.tflite import imagenet_data
+
+
+# Source https://tfhub.dev/tensorflow/lite-model/inception_v4_quant/1/default/1
+# model_path = "https://storage.googleapis.com/iree-model-artifacts/inception_v4_299_uint8.tflite"
+
+
+def generate_inputs(input_details):
+    exe_basename = os.path.basename(sys.argv[0])
+    workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
+    os.makedirs(workdir, exist_ok=True)
+    return [imagenet_data.generate_input(workdir, input_details)]
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class Inception_v4_299_uint8TfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="inception_v4_299_uint8"
+        )
+
+        # Use SharkInference to get inference result
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 299, 299, 3],
+                "dtype": np.uint8,
+            }
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class Inception_v4_299_uint8TfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = Inception_v4_299_uint8TfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    import sys
+
+    @pytest.mark.xfail(
+        reason="known macos tflite install issue & "
+        "'tosa.conv2d' op attribute 'quantization_info' failed "
+    )
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = Inception_v4_299_uint8TfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/layoutlm-base-uncased_tf/layoutlm-base-uncased_tf_test.py
+++ b/tank/layoutlm-base-uncased_tf/layoutlm-base-uncased_tf_test.py
@@ -0,0 +1,60 @@
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model
+
+import iree.compiler as ireec
+import unittest
+import pytest
+import numpy as np
+
+
+class LayoutLMModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model, func_name, inputs, golden_out = download_tf_model(
+            "microsoft/layoutlm-base-uncased"
+        )
+
+        shark_module = SharkInference(
+            model, func_name, device=device, mlir_dialect="mhlo"
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
+
+
+class LayoutLMModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = LayoutLMModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/longformer-base-4096_tf/longformer-base-4096_tf_test.py
+++ b/tank/longformer-base-4096_tf/longformer-base-4096_tf_test.py
@@ -0,0 +1,61 @@
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model
+
+import iree.compiler as ireec
+import unittest
+import pytest
+import numpy as np
+
+
+class LongformerModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model, func_name, inputs, golden_out = download_tf_model(
+            "allenai/longformer-base-4096"
+        )
+
+        shark_module = SharkInference(
+            model, func_name, device=device, mlir_dialect="mhlo"
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
+
+
+class LongformerModuleTest(unittest.TestCase):
+    @pytest.skip(reason="Model can't be imported.", allow_module_level=True)
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = LongformerModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/midas/midas_tflite_test.py
+++ b/tank/midas/midas_tflite_test.py
@@ -0,0 +1,88 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+
+
+# model_path = "https://tfhub.dev/intel/lite-model/midas/v2_1_small/1/lite/1?lite-format=tflite"
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class MidasTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="midas"
+        )
+
+        # Use SharkInference to get inference result
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+
+class MidasTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = MidasTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = MidasTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/mnasnet_1.0_224/mnasnet_tflite_test.py
+++ b/tank/mnasnet_1.0_224/mnasnet_tflite_test.py
@@ -0,0 +1,88 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+
+
+# model_path = "https://tfhub.dev/tensorflow/lite-model/mnasnet_1.0_224/1/metadata/1?lite-format=tflite"
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class MnasnetTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="mnasnet_1.0_224"
+        )
+
+        # Use SharkInference to get inference result
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+
+class MnasnetTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = MnasnetTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = MnasnetTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/mobilebert-baseline-tf2-float/mobilebert-baseline-tf2-float_tflite_test.py
+++ b/tank/mobilebert-baseline-tf2-float/mobilebert-baseline-tf2-float_tflite_test.py
@@ -0,0 +1,134 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+from tank.tflite import squad_data
+
+
+# model_path = "https://storage.googleapis.com/iree-model-artifacts/mobilebert-baseline-tf2-float.tflite"
+
+
+def generate_inputs(input_details):
+    for input in input_details:
+        print(str(input["shape"]), input["dtype"].__name__)
+
+    input_0 = np.asarray(
+        squad_data._INPUT_WORD_ID, dtype=input_details[0]["dtype"]
+    )
+    input_1 = np.asarray(
+        squad_data._INPUT_TYPE_ID, dtype=input_details[1]["dtype"]
+    )
+    input_2 = np.asarray(
+        squad_data._INPUT_MASK, dtype=input_details[2]["dtype"]
+    )
+    return [
+        input_0.reshape(input_details[0]["shape"]),
+        input_1.reshape(input_details[1]["shape"]),
+        input_2.reshape(input_details[2]["shape"]),
+    ]
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class MobilebertTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="mobilebert-baseline-tf2-float"
+        )
+
+        # Use SharkInference to get inference result
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 384],
+                "dtype": np.int32,
+            },
+            {
+                "shape": [1, 384],
+                "dtype": np.int32,
+            },
+            {
+                "shape": [1, 384],
+                "dtype": np.int32,
+            },
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class MobilebertTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = MobilebertTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = MobilebertTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/mobilebert-baseline-tf2-quant/mobilebert-baseline-tf2-quant_tflite_test.py
+++ b/tank/mobilebert-baseline-tf2-quant/mobilebert-baseline-tf2-quant_tflite_test.py
@@ -0,0 +1,140 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+from tank.tflite import squad_data
+
+
+# model_path = "https://storage.googleapis.com/iree-model-artifacts/mobilebert-baseline-tf2-quant.tflite"
+
+
+def generate_inputs(input_details):
+    for input in input_details:
+        print(str(input["shape"]), input["dtype"].__name__)
+
+    input_0 = np.asarray(
+        squad_data._INPUT_WORD_ID, dtype=input_details[0]["dtype"]
+    )
+    input_1 = np.asarray(
+        squad_data._INPUT_TYPE_ID, dtype=input_details[1]["dtype"]
+    )
+    input_2 = np.asarray(
+        squad_data._INPUT_MASK, dtype=input_details[2]["dtype"]
+    )
+    return [
+        input_0.reshape(input_details[0]["shape"]),
+        input_1.reshape(input_details[1]["shape"]),
+        input_2.reshape(input_details[2]["shape"]),
+    ]
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class MobilebertTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="mobilebert-baseline-tf2-quant"
+        )
+
+        # Use SharkInference to get inference result
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 384],
+                "dtype": np.int32,
+            },
+            {
+                "shape": [1, 384],
+                "dtype": np.int32,
+            },
+            {
+                "shape": [1, 384],
+                "dtype": np.int32,
+            },
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class MobilebertTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = MobilebertTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    import sys
+
+    @pytest.mark.xfail(
+        reason="known macos tflite install issue & "
+        "'tosa.conv2d' op attribute 'quantization_info' failed "
+    )
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = MobilebertTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/mobilebert-edgetpu-s-float/mobilebert-edgetpu-s-float_tflite_test.py
+++ b/tank/mobilebert-edgetpu-s-float/mobilebert-edgetpu-s-float_tflite_test.py
@@ -0,0 +1,141 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+
+
+# model_path = "https://storage.googleapis.com/iree-model-artifacts/mobilebert-edgetpu-s-float.tflite"
+
+
+def generate_inputs(input_details):
+    for input in input_details:
+        print(str(input["shape"]), input["dtype"].__name__)
+
+    args = []
+    args.append(
+        np.random.randint(
+            low=0,
+            high=256,
+            size=input_details[0]["shape"],
+            dtype=input_details[0]["dtype"],
+        )
+    )
+    args.append(
+        np.ones(
+            shape=input_details[1]["shape"], dtype=input_details[1]["dtype"]
+        )
+    )
+    args.append(
+        np.zeros(
+            shape=input_details[2]["shape"], dtype=input_details[2]["dtype"]
+        )
+    )
+    return args
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class MobilebertTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="mobilebert-edgetpu-s-float"
+        )
+
+        # Use SharkInference to get inference result
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 384],
+                "dtype": np.int32,
+            },
+            {
+                "shape": [1, 384],
+                "dtype": np.int32,
+            },
+            {
+                "shape": [1, 384],
+                "dtype": np.int32,
+            },
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class MobilebertTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = MobilebertTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = MobilebertTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/mobilebert-edgetpu-s-quant/mobilebert-edgetpu-s-quant_tflite_test.py
+++ b/tank/mobilebert-edgetpu-s-quant/mobilebert-edgetpu-s-quant_tflite_test.py
@@ -0,0 +1,119 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+
+
+# model_path = "https://storage.googleapis.com/iree-model-artifacts/mobilebert-edgetpu-s-quant.tflite"
+
+
+def generate_inputs(input_details):
+    for input in input_details:
+        print(str(input["shape"]), input["dtype"].__name__)
+
+    args = []
+    args.append(
+        np.random.randint(
+            low=0,
+            high=256,
+            size=input_details[0]["shape"],
+            dtype=input_details[0]["dtype"],
+        )
+    )
+    args.append(
+        np.ones(
+            shape=input_details[1]["shape"], dtype=input_details[1]["dtype"]
+        )
+    )
+    args.append(
+        np.zeros(
+            shape=input_details[2]["shape"], dtype=input_details[2]["dtype"]
+        )
+    )
+    return
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class MobilebertTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="mobilebert-edgetpu-s-quant"
+        )
+
+        # Use SharkInference to get inference result
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+
+class MobilebertTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = MobilebertTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    import sys
+
+    @pytest.mark.xfail(
+        reason="known macos tflite install issue & "
+        "'tosa.conv2d' op attribute 'quantization_info' failed "
+    )
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = MobilebertTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/mobilebert-uncased_tf/mobilebert-uncased_tf_test.py
+++ b/tank/mobilebert-uncased_tf/mobilebert-uncased_tf_test.py
@@ -0,0 +1,60 @@
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model
+
+import iree.compiler as ireec
+import unittest
+import pytest
+import numpy as np
+
+
+class MobileBertModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model, func_name, inputs, golden_out = download_tf_model(
+            "google/mobilebert-uncased"
+        )
+
+        shark_module = SharkInference(
+            model, func_name, device=device, mlir_dialect="mhlo"
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
+
+
+class MobileBertModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = MobileBertModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/mobilebert-uncased_torch/mobilebert-uncased_torch_test.py
+++ b/tank/mobilebert-uncased_torch/mobilebert-uncased_torch_test.py
@@ -0,0 +1,108 @@
+from shark.shark_inference import SharkInference
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from tank.model_utils import compare_tensors
+from shark.shark_downloader import download_torch_model
+
+import torch
+import unittest
+import numpy as np
+import pytest
+
+
+class MobileBertModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model_mlir, func_name, input, act_out = download_torch_model(
+            "google/mobilebert-uncased", dynamic
+        )
+
+        # from shark.shark_importer import SharkImporter
+        # mlir_importer = SharkImporter(
+        #    model,
+        #    (input,),
+        #    frontend="torch",
+        # )
+        # minilm_mlir, func_name = mlir_importer.import_mlir(
+        #    is_dynamic=dynamic, tracing_required=True
+        # )
+
+        shark_module = SharkInference(
+            model_mlir,
+            func_name,
+            device=device,
+            mlir_dialect="linalg",
+            is_benchmark=self.benchmark,
+        )
+        shark_module.compile()
+        results = shark_module.forward(input)
+        assert True == compare_tensors(act_out, results)
+
+        if self.benchmark == True:
+            shark_module.shark_runner.benchmark_all_csv(
+                (input),
+                "google/mobilebert-uncased",
+                dynamic,
+                device,
+                "torch",
+            )
+
+
+class MobileBertModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = MobileBertModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    def test_module_dynamic_cpu(self):
+        dynamic = True
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_dynamic_gpu(self):
+        dynamic = True
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    @pytest.mark.xfail(
+        reason="Issue known, WIP",
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_dynamic_vulkan(self):
+        dynamic = True
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tank/mobilebert/mobilebert_tflite_test.py
+++ b/tank/mobilebert/mobilebert_tflite_test.py
@@ -0,0 +1,133 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+from tank.tflite import squad_data
+
+# model_path = "https://tfhub.dev/tensorflow/lite-model/mobilebert/1/metadata/1?lite-format=tflite"
+
+
+def generate_inputs(input_details):
+    for input in input_details:
+        print(str(input["shape"]), input["dtype"].__name__)
+
+    input_0 = np.asarray(
+        squad_data._INPUT_WORD_ID, dtype=input_details[0]["dtype"]
+    )
+    input_1 = np.asarray(
+        squad_data._INPUT_TYPE_ID, dtype=input_details[1]["dtype"]
+    )
+    input_2 = np.asarray(
+        squad_data._INPUT_MASK, dtype=input_details[2]["dtype"]
+    )
+    return [
+        input_0.reshape(input_details[0]["shape"]),
+        input_1.reshape(input_details[1]["shape"]),
+        input_2.reshape(input_details[2]["shape"]),
+    ]
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class MobilebertTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="mobilebert"
+        )
+
+        # Use SharkInference to get inference result
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 384],
+                "dtype": np.int32,
+            },
+            {
+                "shape": [1, 384],
+                "dtype": np.int32,
+            },
+            {
+                "shape": [1, 384],
+                "dtype": np.int32,
+            },
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class MobilebertTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = MobilebertTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = MobilebertTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/mobilenet_v1_224_1.0_float/mobilenet_v1_float_tflite_test.py
+++ b/tank/mobilenet_v1_224_1.0_float/mobilenet_v1_float_tflite_test.py
@@ -0,0 +1,121 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+import os
+import sys
+from tank.tflite import imagenet_data
+
+# model_path = "https://storage.googleapis.com/iree-model-artifacts/mobilenet_v1_224_1.0_float.tflite"
+
+
+def generate_inputs(input_details):
+    exe_basename = os.path.basename(sys.argv[0])
+    workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
+    os.makedirs(workdir, exist_ok=True)
+
+    inputs = imagenet_data.generate_input(workdir, input_details)
+    # Normalize inputs to [-1, 1].
+    inputs = (inputs.astype("float32") / 127.5) - 1
+    return [inputs]
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class MobilenetTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="mobilenet_v1_224_1.0_float"
+        )
+
+        # Use SharkInference to get inference result
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 224, 224, 3],
+                "dtype": np.float32,
+            }
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class MobilenetTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = MobilenetTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = MobilenetTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/mobilenet_v1_224_1.0_uint8/mobilenet_v1_uint8_tflite_test.py
+++ b/tank/mobilenet_v1_224_1.0_uint8/mobilenet_v1_uint8_tflite_test.py
@@ -0,0 +1,125 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+import os
+import sys
+from tank.tflite import imagenet_data
+
+
+# model_path = "https://storage.googleapis.com/iree-model-artifacts/mobilenet_v1_224_1.0_uint8.tflite"
+
+
+def generate_inputs(input_details):
+    exe_basename = os.path.basename(sys.argv[0])
+    workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
+    os.makedirs(workdir, exist_ok=True)
+
+    return [imagenet_data.generate_input(workdir, input_details)]
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class MobilenetTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="mobilenet_v1_224_1.0_uint8"
+        )
+
+        # Use SharkInference to get inference result
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 224, 224, 3],
+                "dtype": np.uint8,
+            }
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class MobilenetTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = MobilenetTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    import sys
+
+    @pytest.mark.xfail(
+        reason="known macos tflite install issue & "
+        "'tosa.conv2d' op attribute 'quantization_info' failed "
+    )
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = MobilenetTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/mobilenet_v2_1.00_224_int8/mobilenet_v2_int8_tflite_test.py
+++ b/tank/mobilenet_v2_1.00_224_int8/mobilenet_v2_int8_tflite_test.py
@@ -0,0 +1,127 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+import os
+import sys
+from tank.tflite import imagenet_data
+
+# model_path = "https://storage.googleapis.com/tf_model_garden/vision/mobilenet/v2_1.0_int8/mobilenet_v2_1.00_224_int8.tflite"
+
+
+def generate_inputs(input_details):
+    exe_basename = os.path.basename(sys.argv[0])
+    workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
+    os.makedirs(workdir, exist_ok=True)
+
+    inputs = imagenet_data.generate_input(workdir, input_details)
+    # Normalize inputs to [-1, 1].
+    inputs = (inputs.astype("float32") / 127.5) - 1
+    return [inputs]
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class MobilenetTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="mobilenet_v2_1.00_224_int8"
+        )
+
+        # Use SharkInference to get inference result
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 224, 224, 3],
+                "dtype": np.float32,
+            }
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class MobilenetTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = MobilenetTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    import sys
+
+    @pytest.mark.xfail(
+        reason="known macos tflite install issue & "
+        "'tosa.conv2d' op attribute 'quantization_info' failed "
+    )
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = MobilenetTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/mobilenet_v2_1.0_224/mobilenet_v2_tflite_test.py
+++ b/tank/mobilenet_v2_1.0_224/mobilenet_v2_tflite_test.py
@@ -0,0 +1,122 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+import os
+import sys
+from tank.tflite import imagenet_data
+
+
+# model_path = "https://storage.googleapis.com/iree-model-artifacts/mobilenet_v2_1.0_224.tflite"
+
+
+def generate_inputs(input_details):
+    exe_basename = os.path.basename(sys.argv[0])
+    workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
+    os.makedirs(workdir, exist_ok=True)
+
+    inputs = imagenet_data.generate_input(workdir, input_details)
+    # Normalize inputs to [-1, 1].
+    inputs = (inputs.astype("float32") / 127.5) - 1
+    return [inputs]
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class MobilenetTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="mobilenet_v2_1.0_224"
+        )
+
+        # Use SharkInference to get inference result
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 224, 224, 3],
+                "dtype": np.float32,
+            }
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class MobilenetTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = MobilenetTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = MobilenetTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/mobilenet_v2_224_1.0_uint8/mobilenet_v2_uint8_tflite_test.py
+++ b/tank/mobilenet_v2_224_1.0_uint8/mobilenet_v2_uint8_tflite_test.py
@@ -0,0 +1,125 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+import os
+import sys
+from tank.tflite import imagenet_data
+
+
+# model_path = "https://storage.googleapis.com/iree-model-artifacts/mobilenet_v2_224_1.0_uint8.tflite"
+
+
+def generate_inputs(input_details):
+    exe_basename = os.path.basename(sys.argv[0])
+    workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
+    os.makedirs(workdir, exist_ok=True)
+
+    return [imagenet_data.generate_input(workdir, input_details)]
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class MobilenetTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="mobilenet_v2_224_1.0_uint8"
+        )
+
+        # Use SharkInference to get inference result
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 224, 224, 3],
+                "dtype": np.uint8,
+            }
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class MobilenetTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = MobilenetTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    import sys
+
+    @pytest.mark.xfail(
+        reason="known macos tflite install issue & "
+        "'tosa.conv2d' op attribute 'quantization_info' failed "
+    )
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = MobilenetTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/tank/mobilenet_v3-large_224_1.0_float/mobilenet_v3_float_tflite_test.py
+++ b/tank/mobilenet_v3-large_224_1.0_float/mobilenet_v3_float_tflite_test.py
@@ -0,0 +1,122 @@
+import numpy as np
+from shark.shark_downloader import download_tflite_model
+from shark.shark_inference import SharkInference
+import pytest
+import unittest
+from shark.parser import shark_args
+import os
+import sys
+from tank.tflite import imagenet_data
+
+
+# model_path = "https://storage.googleapis.com/iree-model-artifacts/mobilenet_v3-large_224_1.0_float.tflite"
+
+
+def generate_inputs(input_details):
+    exe_basename = os.path.basename(sys.argv[0])
+    workdir = os.path.join(os.path.dirname(__file__), "../tmp", exe_basename)
+    os.makedirs(workdir, exist_ok=True)
+
+    inputs = imagenet_data.generate_input(workdir, input_details)
+    # Normalize inputs to [-1, 1].
+    inputs = (inputs.astype("float32") / 127.5) - 1
+    return [inputs]
+
+
+def compare_results(mlir_results, tflite_results):
+    print("Compare mlir_results VS tflite_results: ")
+    assert len(mlir_results) == len(
+        tflite_results
+    ), "Number of results do not match"
+    for i in range(len(mlir_results)):
+        mlir_result = mlir_results[i]
+        tflite_result = tflite_results[i]
+        mlir_result = mlir_result.astype(np.single)
+        tflite_result = tflite_result.astype(np.single)
+        mlir_result = np.expand_dims(mlir_result, axis=0)
+        print("mlir_result.shape", mlir_result.shape)
+        print("tflite_result.shape", tflite_result.shape)
+        assert mlir_result.shape == tflite_result.shape, "shape doesnot match"
+        max_error = np.max(np.abs(mlir_result - tflite_result))
+        print("Max error (%d): %f", i, max_error)
+
+
+class MobilenetTfliteModuleTester:
+    def __init__(
+        self,
+        dynamic=False,
+        device="cpu",
+        save_mlir=False,
+        save_vmfb=False,
+    ):
+        self.dynamic = dynamic
+        self.device = device
+        self.save_mlir = save_mlir
+        self.save_vmfb = save_vmfb
+
+    def create_and_check_module(self):
+        shark_args.save_mlir = self.save_mlir
+        shark_args.save_vmfb = self.save_vmfb
+
+        # Preprocess to get SharkImporter input args
+        mlir_model, func_name, inputs, tflite_results = download_tflite_model(
+            model_name="mobilenet_v3-large_224_1.0_float"
+        )
+
+        # Use SharkInference to get inference result
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+
+        # Case1: Use shark_importer default generate inputs
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+
+        # Case2: Use manually set inputs
+        input_details = [
+            {
+                "shape": [1, 224, 224, 3],
+                "dtype": np.float32,
+            }
+        ]
+        inputs = generate_inputs(input_details)  # new inputs
+
+        shark_module = SharkInference(
+            mlir_module=mlir_model,
+            function_name=func_name,
+            device=self.device,
+            mlir_dialect="tflite",
+        )
+        shark_module.compile()
+        mlir_results = shark_module.forward(inputs)
+        compare_results(mlir_results, tflite_results)
+        # print(mlir_results)
+
+
+class MobilenetTfliteModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.save_mlir = pytestconfig.getoption("save_mlir")
+        self.save_vmfb = pytestconfig.getoption("save_vmfb")
+
+    def setUp(self):
+        self.module_tester = MobilenetTfliteModuleTester(self)
+        self.module_tester.save_mlir = self.save_mlir
+
+    def test_module_static_cpu(self):
+        self.module_tester.dynamic = False
+        self.module_tester.device = "cpu"
+        self.module_tester.create_and_check_module()
+
+
+if __name__ == "__main__":
+    # module_tester = MobilenetTfliteModuleTester()
+    # module_tester.save_mlir = True
+    # module_tester.save_vmfb = True
+    # module_tester.create_and_check_module()
+
+    unittest.main()
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
powderluv	c9a310842d	Merge branch 'main' into MI100	2022-08-03 11:41:31 -07:00
Ean Garvey	38664a4c68	Update README.md (#239 )	2022-08-03 11:39:00 -07:00
Chi_Liu	abce0b1c91	Move torch tests up to /tank (#234 )	2022-08-03 10:50:53 -07:00
Phaneesh Barwaria	189466bbe4	Mark XFail for M1 Vulkan Failures (#235 )	2022-08-02 19:56:02 -07:00
powderluv	c6b4ad1e26	Update test-models.yml	2022-08-02 13:19:10 -07:00
Phaneesh Barwaria	198755788c	Check for MacStudio Target Value (#233 ) * Identify Apple M2 for Vulkan * Check for MacOS Target	2022-08-02 13:18:36 -07:00
Ean Garvey	c1a14a8db1	Update wide_resnet50_2_test.py (#229 ) Removed old xfails.	2022-08-01 20:38:53 -05:00
Chi_Liu	0e42c19f33	Test tflite quantization_info fail on cloud (#228 )	2022-08-01 18:21:04 -07:00
Chi_Liu	0c31bb82cd	Add hash of mlir for tf/tflite (#225 )	2022-08-01 10:40:49 -07:00
Prashant Kumar	315ec72984	Add check for `hash.npy` in the models folder.	2022-08-01 22:40:25 +05:30
Ean Garvey	d556c0d6ef	Update nightly.yml to remove IMPORTER=1 in package validation. (#226 ) * Update nightly.yml to fix wheel building issues. * Update setup_venv.sh to only install torch+cu116 if IMPORTER=1 is set.	2022-07-31 20:39:59 -07:00
Ean Garvey	0ee515a7be	Fix GPU benchmarks on PyTorch model tests. (#218 ) * Add CUDA_BENCHMARKS option to setup.venv to enable PyTorch benchmarks on CUDA. * Fix PyTorch GPU benchmarks for tank models.	2022-07-29 15:17:55 -07:00
Daniel Garvey	4c0deb9899	remove gpu script (#220 ) No longer needed after fixes in iree	2022-07-29 15:20:03 -05:00
Chi_Liu	94d9275515	Move tf tests to tank/ (#223 )	2022-07-29 09:34:50 -07:00
powderluv	67bdfda58c	Update test-models.yml	2022-07-29 09:04:36 -07:00
Chi_Liu	c1cb7bb3fd	Rename tank tflite/torch model dir (#219 )	2022-07-29 01:56:05 -05:00
Prashant Kumar	ff20ddeb97	Add hash of the mlir for checking upstream changes. (#217 ) -- hash of the mlir files have been added in the shark_tank. (If there is upstream changes, the model will be downloaded otherwise not). -- The models have been placed in `~/.local/shark_tank/` folder as opposed to the current folder not to redownload the files again.	2022-07-28 21:54:50 -05:00
Chi_Liu	2c7d879a4e	Add tf image classification auto model tests (#216 )	2022-07-27 18:38:13 -07:00
Prashant Kumar	4e5c592094	Enable `--devices` to pass through CLI.	2022-07-27 22:13:52 +05:30
Chi_Liu	af4257d05f	Add tf image classification auto model (#213 )	2022-07-26 23:18:42 -07:00
Chi_Liu	dc1a283ab7	Remove tf package dep for SharkDownloader tflite tests (#212 )	2022-07-26 12:41:12 -07:00
Vivek Khandelwal	cc4fa96831	Remove op decomposition from the v_diffusion.py (#210 ) The PyTorch decomposition for the op `aten.upsample_bilinear2d.vec` is merged in the upstream repo and hence removed from this file.	2022-07-25 17:36:26 +05:30
Ean Garvey	921ccdc40b	Force TensorFlow to use CPU when generating model and golden values. (#199 ) Split up gpu pytest runs temporarily	2022-07-22 20:21:16 -07:00
Prashant Kumar	49fc6d2f4b	Add all the tf models to gs::shark_tank. (#207 ) -- Tensorflow models have been imported to shark_tank. -- TF Tests have been updated to use the downloader.	2022-07-22 11:53:07 -05:00
Prashant Kumar	e62c531170	Update the torch_model for dynamic cases.	2022-07-22 00:21:35 +05:30
Prashant Kumar	3c2f8ef243	Generate shark_tank for tensorflow models. Updated the generate_sharktank.py script to give tensorflow models.	2022-07-20 22:37:14 +05:30
Vivek Khandelwal	ec870f45fd	Merge pull request #197 from vivekkhandelwal1/v-diffusion Add v-diffusion model	2022-07-20 21:41:48 +05:30
Vivek Khandelwal	0e2485d85b	Add v-diffusion model	2022-07-20 21:36:42 +05:30
Ean Garvey	6401687733	Update TF pytest marks and fix benchmark runner import. (#196 )	2022-07-19 15:42:32 -05:00
Prashant Kumar	39c5f940c9	Add v_diffusion model into shark examples. (#195 )	2022-07-19 09:51:10 -07:00
powderluv	9fc1c84cef	Switch to disable multiprocessing as it fails on macOS (#194 )	2022-07-19 01:25:13 -07:00
Ean Garvey	fa6f19679e	Merge pull request #186 from nod-ai/benchmarks Make PyTorch model tests importer-independent and add benchmarks.	2022-07-18 12:23:48 -05:00
Prashant Kumar	9105f5d54e	Use the downloaded folder instead of re-downloading. shark_tank models.	2022-07-18 17:10:54 +05:30
Prashant Kumar	54a642e76a	Update the resnet50 example to use the shark_downloader. The resnet50 example is updated to use the shark_downloader instead of shark_importer and inference.	2022-07-18 13:41:38 +05:30
Ean Garvey	d047eff086	Update test-models.yml	2022-07-16 16:38:32 -05:00
monorimet	5da32601c0	Ensure CPU workflow job suite only collects CPU tests.	2022-07-16 13:50:36 -05:00
Ean Garvey	c5ae01311d	Make PyTorch model tests importer-independent and add benchmarks.	2022-07-15 21:54:41 -05:00
Prashant Kumar	1191f53c9d	Add the shark_downloader for the torch_models. (#184 )	2022-07-14 13:41:43 -07:00
Chi_Liu	8434c67d96	Change tflite tests from sharkimporter -> sharkdownloader (#182 ) * Change tflite test from sharkimporter -> sharkdownloader * xfail all uint/int tflite sharkdownloader tests	2022-07-14 13:40:25 -07:00
Ean Garvey	79caf729f7	Merge pull request #169 from nod-ai/tf-tests Refactor TF tests for importer/runner split	2022-07-14 14:58:18 -05:00
monorimet	b647764b9a	Update requirements.txt	2022-07-14 13:07:46 -05:00
monorimet	08448a013f	Refactor TF tests for importer split, update pytorch tests.	2022-07-14 12:59:21 -05:00
gpetters94	69f169be1b	Update model status (#181 )	2022-07-12 20:38:43 -07:00
Prashant Kumar	0dcf387089	Add shark_importer for torch_models. (#183 ) All the torch_models are imported to gs::shark_tank. Scripts have been updated.	2022-07-12 20:38:19 -07:00
Prashant Kumar	2e22d0b690	Bug fix: Pass the device attribute appropriately. Previously the device attribute was not passed and device was hardcoded to "cpu". So every tests were running on cpu.	2022-07-09 01:56:17 +05:30
Chi_Liu	b98c9ee484	torch fix in gen_shark_tank (#180 )	2022-07-08 10:13:58 -07:00
Prashant Kumar	fa7ee7e099	Update pytorch tests to support vulkan and cuda. All the model validation pass except distilbert which is failing in torch-mlir lowering. Also, added the mobilebert-uncased model to the torch test suite.	2022-07-08 14:40:13 +05:30
Prashant Kumar	9cc92d0e7d	Update the README.md Update the shark_importer and shark_inference APIs.	2022-07-07 21:08:32 +05:30
Prashant Kumar	2e5cb4ba76	Add resnest model to the shark_inference examples list.	2022-07-07 19:51:53 +05:30
gpetters94	f49a2c3df4	Update README with pending models supported by groups (#176 )	2022-07-07 00:01:20 -07:00
Chi_Liu	d496c66d11	Torch support of gen_shark_tank (#175 )	2022-07-06 22:42:15 -07:00
Prashant Kumar	1a861acbb2	Add the `torch_mlir.compile` API for static cases. (#174 )	2022-07-06 09:21:17 -07:00
Chi_Liu	1cad50d521	generate shark tank for tflite (#173 ) * Add gen_shark_tank support tflite * gen_shark_tank.py use SharkImporter to save model	2022-07-05 23:11:19 -07:00
powderluv	c351bb50b6	Update test-models.yml	2022-07-03 13:56:45 -07:00
powderluv	ca36d40f0a	Update test-models.yml	2022-07-01 12:27:49 -07:00
Chi_Liu	4e9e35a9ff	Fix mobilebert return bug (#171 )	2022-07-01 05:46:18 -07:00
Chi_Liu	41a8cbb5b6	Fix deeplab&mobilebert tflite test bug (#170 )	2022-06-30 21:42:14 -07:00
powderluv	cc11a71ec8	Update test-models.yml	2022-06-30 18:55:38 -07:00
powderluv	1d1a1b131f	Update test-models.yml	2022-06-30 18:54:50 -07:00
powderluv	84a2969381	Update test-models.yml	2022-06-30 15:57:07 -07:00
Chi_Liu	babd3d002b	Merge pull request #164 from AmosLewis/move Rewrite&Move tflite examples to up tank dir	2022-06-30 15:47:15 -07:00
Chi Liu	c1cde2e252	Add xfail with sys.platform == "darwin" for mac	2022-06-30 15:17:59 -07:00
Chi Liu	638f982c94	Add Xfail for tflites exampls Know Macos iree-tflite install error	2022-06-30 15:02:05 -07:00
Chi Liu	84978cf3c5	Fix tflite examples fail pytest	2022-06-30 14:57:33 -07:00
Chi Liu	96dd08cca4	Fix load json input bug in SharkDownloader albert test	2022-06-30 14:57:33 -07:00
Chi Liu	06a45d9025	Rewrite tflite SharkImporter tests to compatible to most recent SharkImporter	2022-06-30 14:57:33 -07:00
Chi Liu	193a735865	Move tflite examples to up dir	2022-06-30 14:57:33 -07:00
powderluv	8199ea15e8	Update test-models.yml	2022-06-30 14:42:41 -07:00
powderluv	958e0100bf	Update test-models.yml	2022-06-30 14:41:26 -07:00
powderluv	6ca726615e	Update test-models.yml	2022-06-30 14:40:25 -07:00
powderluv	d1b398cb9b	Update test-models.yml	2022-06-30 14:28:37 -07:00
powderluv	6b0bd7181f	Update test-models.yml	2022-06-30 14:20:30 -07:00
powderluv	b24ce861d0	Update test-models.yml	2022-06-30 14:14:49 -07:00
powderluv	79e392d85f	enable pip cache	2022-06-30 14:11:00 -07:00
powderluv	e57cee21a5	Delete shark/tmp directory	2022-06-30 14:03:35 -07:00
powderluv	a064a32755	Update test-models.yml	2022-06-30 13:53:19 -07:00
powderluv	24ba2bcdc8	Update test-models.yml	2022-06-30 13:50:54 -07:00
Anush Elangovan	a7435973d9	Fix black formatting	2022-06-30 20:42:02 +00:00
powderluv	cf4d363436	Update test-models.yml	2022-06-30 13:38:31 -07:00
powderluv	b012b9a5f7	Update test-models.yml	2022-06-30 13:37:16 -07:00
powderluv	9556060c27	Update test-models.yml	2022-06-30 13:23:20 -07:00
powderluv	1841fcc088	Update test-models.yml	2022-06-30 13:11:11 -07:00
powderluv	36e2f80fdb	Update test-models.yml	2022-06-30 13:07:36 -07:00
powderluv	188ec5431f	update to matrix builds	2022-06-30 13:04:02 -07:00
powderluv	7532a8db0c	Update test-models.yml	2022-06-30 00:44:08 -07:00
powderluv	6ed957f5fb	enable fast fail	2022-06-30 00:35:00 -07:00
powderluv	b01ffb5a22	delete black builder	2022-06-30 00:33:14 -07:00
Prashant Kumar	83855e7b08	Capture input information from mlir_graph and generate random inputs.	2022-06-29 22:51:53 +05:30
Chi_Liu	2adea76b8c	Enable SharkImporter Tests (#165 ) rewrite some tflite code from SharkImporter to tflite_utils.py	2022-06-29 10:11:48 -07:00
Chi_Liu	210d28fce9	Add more tflite shark_importer examples (#163 )	2022-06-28 08:39:56 -07:00
Prashant Kumar	b07377cbfd	Refactor the shark_runner shark_inference to only support mlir_modules. 1. The shark_inference is divided into shark_importer and shark_inference. 2. All the tank/pytorch tests have been updated.	2022-06-28 18:46:18 +05:30
Chi_Liu	44dce561e9	SharkImporter for tflite without forward and compile (#159 )	2022-06-23 22:49:35 -07:00
powderluv	4ae9331a77	Update README.md	2022-06-23 22:10:56 -07:00
Ean Garvey	0a6bc6e17f	Generate test-specific repro path for each TF model test. (#158 ) Set TempFileSaver path directory to shark_args.repro_dir	2022-06-23 21:58:45 -07:00
Chi_Liu	fa0aaf63c2	Fix repo_dir cannot export/write mlir file bug (#157 ) -Set repro_dir as a temporary dir within current working directory.	2022-06-23 18:37:16 -07:00
Chi_Liu	b7b2dfaa8d	Fix shark_importer.py iree_utils import bug (#156 )	2022-06-23 16:40:11 -07:00
powderluv	a7ca9b8d68	Update parser.py	2022-06-23 15:26:17 -07:00
powderluv	2c90ddde5b	Update README.md	2022-06-23 15:17:14 -07:00
Ean Garvey	71f5cfcb30	Fix package management in tank for Nightly build. (#152 ) Fix formatting.	2022-06-22 15:51:57 -07:00
Chi_Liu	a635b6fbef	Add tf/torch/mhlo/tosa support for SharkDownloader (#151 )	2022-06-22 11:25:34 -07:00
Prashant Kumar	e8aa105b2a	Divide iree_utils and do module imports on function calls.	2022-06-22 14:17:33 +05:30
Ean Garvey	08eda2ce35	Equip TF tests with save_mlir, save_vmfb, and benchmark to .csv options. (#148 )	2022-06-21 21:54:44 -07:00
Chi_Liu	206ba11cc8	Add SharkDownloader for end users (#147 ) * Add SharkDownloader for user * Change tank_url to gs://shark_tank	2022-06-21 16:42:23 -07:00
powderluv	6bc9ebad24	Update test-models.yml ignore onnx benchmarks for now	2022-06-21 13:12:59 -07:00
Prashant Kumar	334d129443	Changes to minilm to make it one on one with tensorflow.	2022-06-21 22:08:59 +05:30
Anush Elangovan	28a620f0e2	Remove ONNX importer from requirements for now. Seems to cause install conflicts.	2022-06-21 16:15:08 +00:00
Chi_Liu	af582925f2	Enable tosa.mlir as input for SharkImporter inference (#145 ) * Change shark_importer to use tosa.mlir as tflite model input from local gen_shark_tank	2022-06-20 23:15:14 -07:00
Chi_Liu	f6e9f2d571	Generate Shark Tank for tflite (#142 ) generate shark_tank for tflite	2022-06-20 14:35:35 -07:00
Anush Elangovan	c66e285e2b	Enable 64bit for models	2022-06-20 20:25:29 +00:00
Ean Garvey	7198f0db0e	Add TempFileSaver pytest option to tank TF tests. (#144 ) add/remove pytest marks to TF tests	2022-06-20 10:00:28 -07:00
Eliasj42	aae8754ace	Fixed bug where backend would segfault under specific circumstances (#134 ) * updated README * added debug printouts * added debug printouts * fixed bug where backend would segfault on models with multiple outputs of different sizes fixed formating issues Co-authored-by: Elias Joseph <elias@nod-labs.com>	2022-06-20 09:56:58 -07:00
Ean Garvey	f1edf88c33	Revert "add/remove pytest marks to TF tests" This reverts commit `b522f933a9`.	2022-06-20 16:06:40 +00:00
Ean Garvey	b522f933a9	add/remove pytest marks to TF tests	2022-06-20 15:23:28 +00:00
Ean Garvey	dbabb48582	Add TempFileSaver pytest option to tank TF tests.	2022-06-20 15:02:36 +00:00
Anush Elangovan	2b3fac7519	Vulkan xfails https://github.com/google/iree/issues/9554	2022-06-18 09:39:09 +00:00
Anush Elangovan	de223e7824	Add xfail reason	2022-06-18 08:31:26 +00:00
powderluv	fd4df79573	Update README.md	2022-06-17 14:16:39 -07:00
powderluv	0093610054	Update test-models.yml disable a100 cpu for now	2022-06-16 16:27:11 -07:00
Ean Garvey	ce7350f5f8	Mark all TF model tests as expected to fail. (#139 )	2022-06-16 16:25:18 -07:00
powderluv	2bcd261a82	updates to green / broken hearts for models	2022-06-16 14:50:56 -07:00
powderluv	918b36c177	Split perf-builder into cpu/gpu/vulkan	2022-06-16 14:08:36 -07:00
Ean Garvey	c8230fe093	Update README.md Update testing instructions.	2022-06-16 15:49:58 -05:00
powderluv	559d9be4a5	Update README.md	2022-06-16 13:25:12 -07:00
Prashant Kumar	5fbd868296	Update readme for tf models.	2022-06-16 21:55:00 +05:30
powderluv	334b50b963	Update test-models.yml	2022-06-16 07:44:20 -07:00
Prashant Kumar	4d3acff484	Add tf masked lm models (17 models - 12 pass, 5 fail). (#135 )	2022-06-16 07:28:00 -07:00
Ean Garvey	f5e67e3aa4	Update README.md	2022-06-15 14:31:57 -05:00
Prashant Kumar	b43d627b20	Add distilbert-base-uncased to tank pytorch test. distilbert-base-uncased model validation test is added.	2022-06-15 21:14:56 +05:30
powderluv	0ddf7ad5e8	Update nightly.yml	2022-06-14 23:26:37 -07:00
Ean Garvey	dc8c221467	Add option to save .vmfb in pytest + move pytorch model tests. (#131 )	2022-06-14 20:39:10 -07:00