creates abstract test case class (#333)

2026-04-03 03:00:17 -04:00 · 2022-09-20 09:06:38 -05:00
parent 814a6f8295
commit 5df582e7e8
47 changed files with 384 additions and 2341 deletions
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -81,7 +81,7 @@ jobs:
        pip install ./wheelhouse/nodai*
        # Validate the Models
        /bin/bash "$GITHUB_WORKSPACE/build_tools/populate_sharktank_ci.sh"
-        pytest tank |
+        pytest tank/test_models.py |
          tail -n 1 |
          tee -a pytest_results.txt
        if !(grep -Fxq " failed" pytest_results.txt) 
@@ -104,7 +104,7 @@ jobs:
        # Install the built wheel
        pip install ./wheelhouse/nodai*
        # Validate the Models
-        pytest tank |
+        pytest tank/test_models.py |
          tail -n 1 |
          tee -a pytest_results.txt
    
--- a/.github/workflows/test-models.yml
+++ b/.github/workflows/test-models.yml
@@ -90,7 +90,7 @@ jobs:
        cd $GITHUB_WORKSPACE
        PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest --benchmark tank/ -k cpu
+        pytest --benchmark tank/test_models.py -k cpu
        gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv
        gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cpu_latest.csv

@@ -100,7 +100,7 @@ jobs:
        cd $GITHUB_WORKSPACE
        PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest --benchmark -k "cuda" --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py 
+        pytest --benchmark tank/test_models.py -k "cuda" --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py 
        gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv
        gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cuda_latest.csv

@@ -110,4 +110,4 @@ jobs:
        cd $GITHUB_WORKSPACE
        PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest -k 'vulkan' --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
+        pytest tank/test_models.py -k 'vulkan' --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
--- a/conftest.py
+++ b/conftest.py
@@ -12,6 +12,12 @@ def pytest_addoption(parser):
        default="False",
        help="Add ONNX benchmark results to pytest benchmarks.",
    )
+    parser.addoption(
+        "--tf32",
+        action="store_true",
+        default="False",
+        help="Use TensorFloat-32 calculations.",
+    )
    # The following options are deprecated and pending removal.
    parser.addoption(
        "--save_mlir",
--- a/shark/shark_benchmark_runner.py
+++ b/shark/shark_benchmark_runner.py
@@ -237,15 +237,15 @@ for currently supported models. Exiting benchmark ONNX."
            ]

    def get_metadata(self, modelname):
-        with open("./tank/pytorch/torch_model_list.csv", mode="r") as csvfile:
+        with open("./tank/model_metadata.csv", mode="r") as csvfile:
            torch_reader = csv.reader(csvfile, delimiter=",")
            fields = next(torch_reader)
            for row in torch_reader:
                torch_model_name = row[0]
-                param_count = row[4]
-                model_tags = row[5]
-                model_notes = row[6]
                if torch_model_name == modelname:
+                    param_count = row[3]
+                    model_tags = row[4]
+                    model_notes = row[5]
                    return [param_count, model_tags, model_notes]

    def compare_bench_results(self, baseline: str, result: str):
@@ -304,6 +304,11 @@ for currently supported models. Exiting benchmark ONNX."
            bench_result["device"] = device_str
            bench_result["data_type"] = inputs[0].dtype
            for e in engines:
+                (
+                    bench_result["param_count"],
+                    bench_result["tags"],
+                    bench_result["notes"],
+                ) = ["", "", ""]
                if e == "frontend":
                    bench_result["engine"] = frontend
                    (
@@ -330,11 +335,6 @@ for currently supported models. Exiting benchmark ONNX."
                    ] = self.compare_bench_results(
                        self.frontend_result, bench_result["ms/iter"]
                    )
-                    (
-                        bench_result["param_count"],
-                        bench_result["tags"],
-                        bench_result["notes"],
-                    ) = ["", "", ""]

                elif e == "shark_iree_c":
                    bench_result["engine"] = "shark_iree_c"
@@ -348,11 +348,6 @@ for currently supported models. Exiting benchmark ONNX."
                    ] = self.compare_bench_results(
                        self.frontend_result, bench_result["ms/iter"]
                    )
-                    (
-                        bench_result["param_count"],
-                        bench_result["tags"],
-                        bench_result["notes"],
-                    ) = ["", "", ""]

                elif e == "onnxruntime":
                    bench_result["engine"] = "onnxruntime"
--- a/tank/MiniLM-L12-H384-uncased/MiniLM-L12-H384-uncased_test.py
+++ b/tank/MiniLM-L12-H384-uncased/MiniLM-L12-H384-uncased_test.py
@@ -1,80 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from shark.parser import shark_args
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import iree.compiler as ireec
-import unittest
-import pytest
-import numpy as np
-
-
-class MiniLMModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-        onnx_bench=False,
-    ):
-        self.benchmark = benchmark
-        self.onnx_bench = onnx_bench
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "microsoft/MiniLM-L12-H384-uncased", device
-        )
-
-        shark_module = SharkInference(
-            model,
-            func_name,
-            device=device,
-            mlir_dialect="mhlo",
-            is_benchmark=self.benchmark,
-        )
-        if self.benchmark == True:
-            shark_args.enable_tf32 = True
-            shark_module.compile()
-            shark_args.onnx_bench = self.onnx_bench
-            shark_module.shark_runner.benchmark_all_csv(
-                (inputs),
-                "microsoft/MiniLM-L12-H384-uncased",
-                dynamic,
-                device,
-                "tensorflow",
-            )
-            shark_args.enable_tf32 = False
-            rtol = 1e-01
-            atol = 1e-02
-
-        else:
-            shark_module.compile()
-            rtol = 1e-02
-            atol = 1e-03
-
-        # TODO: Remove catch once new MiniLM stable
-        try:
-            result = shark_module.forward(inputs)[0][1].to_host()
-
-        except:
-            result = shark_module.forward(inputs)
-
-        np.testing.assert_allclose(golden_out, result, rtol=rtol, atol=atol)
-
-
-class MiniLMModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = MiniLMModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-        self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/MiniLM-L12-H384-uncased_torch/MiniLM-L12-H384-uncased_torch_test.py
+++ b/tank/MiniLM-L12-H384-uncased_torch/MiniLM-L12-H384-uncased_torch_test.py
@@ -1,72 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from tank.model_utils import compare_tensors
-from shark.shark_downloader import download_torch_model
-from shark.parser import shark_args
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import numpy as np
-import pytest
-
-
-class MiniLMModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-        onnx_bench=False,
-    ):
-        self.benchmark = benchmark
-        self.onnx_bench = onnx_bench
-
-    def create_and_check_module(self, dynamic, device):
-        model_mlir, func_name, input, act_out = download_torch_model(
-            "microsoft/MiniLM-L12-H384-uncased", dynamic
-        )
-        shark_module = SharkInference(
-            model_mlir,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        if self.benchmark == True:
-            shark_args.enable_tf32 = True
-            shark_module.compile()
-            shark_args.onnx_bench = self.onnx_bench
-            shark_module.shark_runner.benchmark_all_csv(
-                (input),
-                "microsoft/MiniLM-L12-H384-uncased",
-                dynamic,
-                device,
-                "torch",
-            )
-            shark_args.enable_tf32 = False
-            rtol = 1e-01
-            atol = 1e-02
-        else:
-            shark_module.compile()
-            rtol = 1e-02
-            atol = 1e-03
-
-        results = shark_module.forward(input)
-        assert True == compare_tensors(act_out, results, rtol, atol)
-
-
-class MiniLMModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = MiniLMModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-        self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/albert-base-v2_tf/albert-base-v2_tf_test.py
+++ b/tank/albert-base-v2_tf/albert-base-v2_tf_test.py
@@ -1,47 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import iree.compiler as ireec
-import unittest
-import pytest
-import numpy as np
-
-
-class AlbertBaseModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "albert-base-v2"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
-
-
-class AlbertBaseModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = AlbertBaseModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/albert-base-v2_torch/albert-base-v2_torch_test.py
+++ b/tank/albert-base-v2_torch/albert-base-v2_torch_test.py
@@ -1,70 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from tank.model_utils import compare_tensors
-from shark.shark_downloader import download_torch_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import numpy as np
-import pytest
-
-
-class AlbertModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model_mlir, func_name, input, act_out = download_torch_model(
-            "albert-base-v2", dynamic
-        )
-
-        # from shark.shark_importer import SharkImporter
-        # mlir_importer = SharkImporter(
-        #    model,
-        #    (input,),
-        #    frontend="torch",
-        # )
-        # minilm_mlir, func_name = mlir_importer.import_mlir(
-        #    is_dynamic=dynamic, tracing_required=True
-        # )
-
-        shark_module = SharkInference(
-            model_mlir,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        results = shark_module.forward(input)
-        assert True == compare_tensors(act_out, results)
-
-        if self.benchmark == True:
-            shark_module.shark_runner.benchmark_all_csv(
-                (input),
-                "albert-base-v2",
-                dynamic,
-                device,
-                "torch",
-            )
-
-
-class AlbertModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = AlbertModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/alexnet_torch/alexnet_torch_test.py
+++ b/tank/alexnet_torch/alexnet_torch_test.py
@@ -1,82 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from tank.model_utils import compare_tensors
-from shark.iree_utils.vulkan_utils import get_vulkan_triple_flag
-from shark.shark_downloader import download_torch_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-
-from parameterized import parameterized
-import unittest
-import numpy as np
-import pytest
-
-
-class AlexnetModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model_mlir, func_name, input, act_out = download_torch_model(
-            "alexnet", dynamic
-        )
-
-        # from shark.shark_importer import SharkImporter
-        # mlir_importer = SharkImporter(
-        #    model,
-        #    (input,),
-        #    frontend="torch",
-        # )
-        # minilm_mlir, func_name = mlir_importer.import_mlir(
-        #    is_dynamic=dynamic, tracing_required=True
-        # )
-
-        shark_module = SharkInference(
-            model_mlir,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        results = shark_module.forward(input)
-        assert True == compare_tensors(act_out, results)
-
-        if self.benchmark == True:
-            shark_module.shark_runner.benchmark_all_csv(
-                (input),
-                "alexnet",
-                dynamic,
-                device,
-                "torch",
-            )
-
-
-class AlexnetModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = AlexnetModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if device in ["metal", "vulkan"]:
-            if dynamic == False:
-                if "m1-moltenvk-macos" in get_vulkan_triple_flag():
-                    pytest.xfail(
-                        reason="Assert Error:https://github.com/iree-org/iree/issues/10075"
-                    )
-            if dynamic == True:
-                pytest.xfail(
-                    reason="https://github.com/nod-ai/SHARK/issues/309"
-                )
-
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/all_models.csv
+++ b/tank/all_models.csv
@@ -0,0 +1,34 @@
+resnet50,mhlo,tf,1e-02,1e-3,default
+albert-base-v2,mhlo,tf,1e-02,1e-3,default
+roberta-base,mhlo,tf,1e-02,1e-3,default
+bert-base-uncased,mhlo,tf,1e-2,1e-3,default
+camembert-base,mhlo,tf,1e-2,1e-3,default
+dbmdz/convbert-base-turkish-cased,mhlo,tf,1e-2,1e-3,default
+distilbert-base-uncased,mhlo,tf,1e-2,1e-3,default
+facebook/convnext-tiny-224,mhlo,tf,1e-2,1e-3,tf_vit
+funnel-transformer/small,mhlo,tf,1e-2,1e-3,default
+google/electra-small-discriminator,mhlo,tf,1e-2,1e-3,default
+google/mobilebert-uncased,mhlo,tf,1e-2,1e-3,default
+google/vit-base-patch16-224,mhlo,tf,1e-2,1e-3,tf_vit
+hf-internal-testing/tiny-random-flaubert,mhlo,tf,1e-2,1e-3,default
+microsoft/MiniLM-L12-H384-uncased,mhlo,tf,1e-2,1e-3,tf_hf
+microsoft/layoutlm-base-uncased,mhlo,tf,1e-2,1e-3,default
+microsoft/mpnet-base,mhlo,tf,1e-2,1e-3,default
+albert-base-v2,linalg,torch,1e-2,1e-3,default
+alexnet,linalg,torch,1e-2,1e-3,default
+bert-base-cased,linalg,torch,1e-2,1e-3,default
+bert-base-uncased,linalg,torch,1e-2,1e-3,default
+distilbert-base-uncased,linalg,torch,1e-2,1e-3,default
+facebook/deit-small-distilled-patch16-224,linalg,torch,1e-2,1e-3,default
+google/vit-base-patch16-224,linalg,torch,1e-2,1e-3,default
+microsoft/beit-base-patch16-224-pt22k-ft22k,linalg,torch,1e-2,1e-3,default
+microsoft/MiniLM-L12-H384-uncased,linalg,torch,1e-2,1e-3,default
+microsoft/resnet-50,linalg,torch,1e-2,1e-3,default
+google/mobilebert-uncased,linalg,torch,1e-2,1e-3,default
+mobilenet_v3_small,linalg,torch,1e-2,1e-3,default
+nvidia/mit-b0,linalg,torch,1e-2,1e-3,default
+resnet101,linalg,torch,1e-2,1e-3,default
+resnet18,linalg,torch,1e-2,1e-3,default
+resnet50,linalg,torch,1e-2,1e-3,default
+squeezenet1_0,linalg,torch,1e-2,1e-3,default
+wide_resnet50_2,linalg,torch,1e-2,1e-3,default
--- a/tank/bert-base-cased_torch/bert-base-cased_torch_test.py
+++ b/tank/bert-base-cased_torch/bert-base-cased_torch_test.py
@@ -1,80 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from tank.model_utils import compare_tensors
-from shark.shark_downloader import download_torch_model
-from shark.iree_utils.vulkan_utils import get_vulkan_triple_flag
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import torch
-import unittest
-import numpy as np
-import pytest
-
-
-class BertBaseUncasedModuleTester:
-    def __init__(
-        self,
-        save_mlir=False,
-        save_vmfb=False,
-        benchmark=False,
-    ):
-        self.save_mlir = save_mlir
-        self.save_vmfb = save_vmfb
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model_mlir, func_name, input, act_out = download_torch_model(
-            "bert-base-cased", dynamic
-        )
-
-        # from shark.shark_importer import SharkImporter
-        # mlir_importer = SharkImporter(
-        #    model,
-        #    (input,),
-        #    frontend="torch",
-        # )
-        # minilm_mlir, func_name = mlir_importer.import_mlir(
-        #    is_dynamic=dynamic, tracing_required=True
-        # )
-
-        shark_module = SharkInference(
-            model_mlir,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        results = shark_module.forward(input)
-        assert True == compare_tensors(act_out, results)
-
-        if self.benchmark == True:
-            shark_module.shark_runner.benchmark_all_csv(
-                (input),
-                "bert-base-cased",
-                dynamic,
-                device,
-                "torch",
-            )
-
-
-class BertBaseUncasedModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = BertBaseUncasedModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if device in ["metal", "vulkan"]:
-            if dynamic == False:
-                if "m1-moltenvk-macos" in get_vulkan_triple_flag():
-                    pytest.xfail(reason="M1: CompilerToolError | M2: Pass")
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/bert-base-uncased_tf/bert-base-uncased_tf_test.py
+++ b/tank/bert-base-uncased_tf/bert-base-uncased_tf_test.py
@@ -1,50 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from shark.parser import shark_args
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import pytest
-import numpy as np
-
-
-class BertBaseUncasedModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-        onnx_bench=False,
-    ):
-        self.benchmark = benchmark
-        self.onnx_bench = onnx_bench
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "bert-base-uncased"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
-
-
-class BertBaseUncasedModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = BertBaseUncasedModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/bert-base-uncased_torch/bert-base-uncased_torch_test.py
+++ b/tank/bert-base-uncased_torch/bert-base-uncased_torch_test.py
@@ -1,73 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from tank.model_utils import compare_tensors
-from shark.shark_downloader import download_torch_model
-from shark.parser import shark_args
-from shark.iree_utils.vulkan_utils import get_vulkan_triple_flag
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import torch
-import unittest
-import numpy as np
-import pytest
-
-
-class BertBaseUncasedModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-        onnx_bench=False,
-    ):
-        self.benchmark = benchmark
-        self.onnx_bench = onnx_bench
-
-    def create_and_check_module(self, dynamic, device):
-        model_mlir, func_name, input, act_out = download_torch_model(
-            "bert-base-uncased", dynamic
-        )
-
-        shark_module = SharkInference(
-            model_mlir,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        results = shark_module.forward(input)
-        assert True == compare_tensors(act_out, results)
-
-        if self.benchmark == True:
-            shark_args.onnx_bench = self.onnx_bench
-            shark_module.shark_runner.benchmark_all_csv(
-                (input),
-                "bert-base-uncased",
-                dynamic,
-                device,
-                "torch",
-            )
-
-
-class BertBaseUncasedModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = BertBaseUncasedModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-        self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if device in ["metal", "vulkan"]:
-            if dynamic == True:
-                if "m1-moltenvk-macos" in get_vulkan_triple_flag():
-                    pytest.xfail(
-                        reason="Checking: Error invoking IREE compiler tool (no repro on M2)"
-                    )
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/camembert-base_tf/camembert-base_tf_test.py
+++ b/tank/camembert-base_tf/camembert-base_tf_test.py
@@ -1,47 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import iree.compiler as ireec
-import unittest
-import pytest
-import numpy as np
-
-
-class CamemBertModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "camembert-base"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
-
-
-class CamemBertModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = CamemBertModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/convbert-base-turkish-cased_tf/convbert-base-turkish-cased_tf_test.py
+++ b/tank/convbert-base-turkish-cased_tf/convbert-base-turkish-cased_tf_test.py
@@ -1,51 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import iree.compiler as ireec
-import unittest
-import pytest
-import numpy as np
-
-
-class ConvBertModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "dbmdz/convbert-base-turkish-cased"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
-
-
-class ConvBertModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = ConvBertModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if device in ["metal", "vulkan"]:
-            pytest.xfail(
-                reason="Issue: https://github.com/iree-org/iree/issues/9971"
-            )
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/distilbert-base-uncased_tf/distilbert-base-uncased_tf_test.py
+++ b/tank/distilbert-base-uncased_tf/distilbert-base-uncased_tf_test.py
@@ -1,48 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import iree.compiler as ireec
-import unittest
-import pytest
-import numpy as np
-
-
-class DistilBertModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "distilbert-base-uncased"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
-
-
-class DistilBertModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = DistilBertModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        pytest.xfail(reason="shark_tank hash issues -- awaiting triage")
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/distilbert-base-uncased_torch/distilbert-base-uncased_torch_test.py
+++ b/tank/distilbert-base-uncased_torch/distilbert-base-uncased_torch_test.py
@@ -1,87 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from tank.model_utils import compare_tensors
-from shark.parser import shark_args
-from shark.shark_downloader import download_torch_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import numpy as np
-import pytest
-
-
-class DistilBertModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model_mlir, func_name, input, act_out = download_torch_model(
-            "distilbert-base-uncased", dynamic
-        )
-
-        # from shark.shark_importer import SharkImporter
-        # mlir_importer = SharkImporter(
-        #    model,
-        #    (input,),
-        #    frontend="torch",
-        # )
-        # minilm_mlir, func_name = mlir_importer.import_mlir(
-        #    is_dynamic=dynamic, tracing_required=True
-        # )
-
-        shark_module = SharkInference(
-            model_mlir,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        results = shark_module.forward(input)
-        assert True == compare_tensors(act_out, results)
-
-        if self.benchmark == True:
-            shark_module.shark_runner.benchmark_all_csv(
-                (input),
-                "distilbert-base-uncased",
-                dynamic,
-                device,
-                "torch",
-            )
-
-
-class DistilBertModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = DistilBertModuleTester(self)
-        self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
-        self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if device == "cpu":
-            pytest.skip(
-                reason="Fails to lower in torch-mlir. See https://github.com/nod-ai/SHARK/issues/222"
-            )
-        elif device == "cuda":
-            if dynamic == False:
-                pytest.skip(
-                    reason="Fails to lower in torch-mlir. See https://github.com/nod-ai/SHARK/issues/222"
-                )
-            elif dynamic == True:
-                pytest.skip(reason="DistilBert needs to be uploaded to cloud.")
-        elif device in ["vulkan", "metal"]:
-            pytest.skip(reason="DistilBert needs to be uploaded to cloud.")
-
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/electra-small-discriminator_tf/electra-small-discriminator_tf_test.py
+++ b/tank/electra-small-discriminator_tf/electra-small-discriminator_tf_test.py
@@ -1,47 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import iree.compiler as ireec
-import unittest
-import pytest
-import numpy as np
-
-
-class ElectraModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "google/electra-small-discriminator"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
-
-
-class ElectraModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = ElectraModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/facebook_convnext-tiny-224_tf/facebook_convnext-tiny-224_tf_test.py
+++ b/tank/facebook_convnext-tiny-224_tf/facebook_convnext-tiny-224_tf_test.py
@@ -1,64 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import pytest
-import numpy as np
-
-
-class ConvNextTinyModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "facebook/convnext-tiny-224"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        #  result: array([['logits',
-        #         <IREE DeviceArray: shape=[1, 1000], dtype=<class 'numpy.float32'>>]],
-        #       dtype=object)
-
-        # post process of img output
-        ir_device_array = result[0][1]
-        logits = ir_device_array.astype(ir_device_array.dtype)
-        logits = np.squeeze(logits, axis=0)
-        print("logits: ", logits.shape)
-        print("golden_out: ", golden_out[0].shape)
-        print(np.allclose(golden_out[0], logits, rtol=1e-02, atol=1e-03))
-
-
-class ConvNextTinyModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = ConvNextTinyModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-
-        if device in ["cuda"]:
-            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/311")
-
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    # dynamic = False
-    # device = "cpu"
-    # module_tester = ConvNextTinyModuleTester()
-    # module_tester.create_and_check_module(dynamic, device)
-    unittest.main()
--- a/tank/facebook_deit-small-distilled-patch16-224_torch/facebook_deit-small-distilled-patch16-224_torch_test.py
+++ b/tank/facebook_deit-small-distilled-patch16-224_torch/facebook_deit-small-distilled-patch16-224_torch_test.py
@@ -1,62 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_torch_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import pytest
-import numpy as np
-
-
-class DeitModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_torch_model(
-            "facebook/deit-small-distilled-patch16-224", dynamic
-        )
-
-        shark_module = SharkInference(
-            model,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-
-        print(np.allclose(golden_out[0], result[0], rtol=1e-02, atol=1e-03))
-
-
-class DeitModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = DeitModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if dynamic == True:
-            pytest.skip(
-                reason="Dynamic Test not Supported: mlir file not found"
-            )
-        if device in ["cuda"]:
-            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/311")
-
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    # dynamic = False
-    # device = "cpu"
-    # module_tester = DeiteModuleTester()
-    # module_tester.create_and_check_module(dynamic, device)
-    unittest.main()
--- a/tank/funnel-transformer_tf/funnel-transformer_tf_test.py
+++ b/tank/funnel-transformer_tf/funnel-transformer_tf_test.py
@@ -1,51 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import iree.compiler as ireec
-import unittest
-import pytest
-import numpy as np
-
-
-class FunnelModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "funnel-transformer/small"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
-
-
-class FunnelModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = FunnelModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if device in ["cuda", "metal", "vulkan"]:
-            pytest.xfail(
-                reason="failing in the iree-compiler passes, see https://github.com/nod-ai/SHARK/issues/201"
-            )
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/google_vit-base-patch16-224_tf/google_vit-base-patch16-224_tf_test.py
+++ b/tank/google_vit-base-patch16-224_tf/google_vit-base-patch16-224_tf_test.py
@@ -1,61 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import pytest
-import numpy as np
-
-
-class VitBaseModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "google/vit-base-patch16-224"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-
-        # post process of img output
-        ir_device_array = result[0][1]
-        logits = ir_device_array.astype(ir_device_array.dtype)
-        logits = np.squeeze(logits, axis=0)
-        print("logits: ", logits.shape)
-        print("golden_out: ", golden_out[0].shape)
-        print(np.allclose(golden_out[0], logits, rtol=1e-02, atol=1e-03))
-
-
-class VitBaseModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = VitBaseModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-
-        if device in ["cuda"]:
-            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/311")
-
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    # dynamic = False
-    # device = "cpu"
-    # module_tester = VitBaseModuleTester()
-    # module_tester.create_and_check_module(dynamic, device)
-    unittest.main()
--- a/tank/google_vit-base-patch16-224_torch/google_vit-base-patch16-224_torch_test.py
+++ b/tank/google_vit-base-patch16-224_torch/google_vit-base-patch16-224_torch_test.py
@@ -1,63 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_torch_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import pytest
-import numpy as np
-
-
-class VitBaseModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_torch_model(
-            "google/vit-base-patch16-224", dynamic
-        )
-
-        shark_module = SharkInference(
-            model,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-
-        print(np.allclose(golden_out[0], result[0], rtol=1e-02, atol=1e-03))
-
-
-class VitBaseModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = VitBaseModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if dynamic == True:
-            pytest.skip(
-                reason="Dynamic tests not supported. mlir file not found."
-            )
-
-        if device in ["cuda"]:
-            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/311")
-
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    # dynamic = False
-    # device = "cpu"
-    # module_tester = VitBaseModuleTester()
-    # module_tester.create_and_check_module(dynamic, device)
-    unittest.main()
--- a/tank/layoutlm-base-uncased_tf/layoutlm-base-uncased_tf_test.py
+++ b/tank/layoutlm-base-uncased_tf/layoutlm-base-uncased_tf_test.py
@@ -1,47 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import iree.compiler as ireec
-import unittest
-import pytest
-import numpy as np
-
-
-class LayoutLMModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "microsoft/layoutlm-base-uncased"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
-
-
-class LayoutLMModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = LayoutLMModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/longformer-base-4096_tf/longformer-base-4096_tf_test.py
+++ b/tank/longformer-base-4096_tf/longformer-base-4096_tf_test.py
@@ -1,48 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import iree.compiler as ireec
-import unittest
-import pytest
-import numpy as np
-
-
-class LongformerModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "allenai/longformer-base-4096"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
-
-
-class LongformerModuleTest(unittest.TestCase):
-    @pytest.skip(reason="Model can't be imported.", allow_module_level=True)
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = LongformerModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/microsoft_beit-base-patch16-224-pt22k-ft22k_torch/microsoft_beit-base-patch16-224-pt22k-ft22k_torch.py
+++ b/tank/microsoft_beit-base-patch16-224-pt22k-ft22k_torch/microsoft_beit-base-patch16-224-pt22k-ft22k_torch.py
@@ -1,59 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_torch_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import pytest
-import numpy as np
-
-
-class BeitModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_torch_model(
-            "microsoft/beit-base-patch16-224-pt22k-ft22k", dynamic
-        )
-
-        shark_module = SharkInference(
-            model,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-
-        print(np.allclose(golden_out[0], result[0], rtol=1e-02, atol=1e-03))
-
-
-class BeitModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = BeitModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if dynamic == True:
-            pytest.skip(
-                reason="Dynamic tests not supported. mlir file not found."
-            )
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    # dynamic = False
-    # device = "cpu"
-    # module_tester = BeitModuleTester()
-    # module_tester.create_and_check_module(dynamic, device)
-    unittest.main()
--- a/tank/microsoft_resnet-50_torch/microsoft_resnet-50_torch_test.py
+++ b/tank/microsoft_resnet-50_torch/microsoft_resnet-50_torch_test.py
@@ -1,59 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_torch_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import pytest
-import numpy as np
-
-
-class ResnetModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_torch_model(
-            "microsoft/resnet-50", dynamic
-        )
-
-        shark_module = SharkInference(
-            model,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-
-        print(np.allclose(golden_out[0], result[0], rtol=1e-01, atol=1e-03))
-
-
-class ResnetModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = ResnetModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if dynamic == True:
-            pytest.skip(
-                reason="Dynamic tests not supported. mlir file not found."
-            )
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    # dynamic = False
-    # device = "cpu"
-    # module_tester = ResnetModuleTester()
-    # module_tester.create_and_check_module(dynamic, device)
-    unittest.main()
--- a/tank/mobilebert-uncased_tf/mobilebert-uncased_tf_test.py
+++ b/tank/mobilebert-uncased_tf/mobilebert-uncased_tf_test.py
@@ -1,47 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import iree.compiler as ireec
-import unittest
-import pytest
-import numpy as np
-
-
-class MobileBertModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "google/mobilebert-uncased"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
-
-
-class MobileBertModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = MobileBertModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/mobilebert-uncased_torch/mobilebert-uncased_torch_test.py
+++ b/tank/mobilebert-uncased_torch/mobilebert-uncased_torch_test.py
@@ -1,74 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from tank.model_utils import compare_tensors
-from shark.shark_downloader import download_torch_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import torch
-import unittest
-import numpy as np
-import pytest
-
-
-class MobileBertModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model_mlir, func_name, input, act_out = download_torch_model(
-            "google/mobilebert-uncased", dynamic
-        )
-
-        # from shark.shark_importer import SharkImporter
-        # mlir_importer = SharkImporter(
-        #    model,
-        #    (input,),
-        #    frontend="torch",
-        # )
-        # minilm_mlir, func_name = mlir_importer.import_mlir(
-        #    is_dynamic=dynamic, tracing_required=True
-        # )
-
-        shark_module = SharkInference(
-            model_mlir,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        results = shark_module.forward(input)
-        assert True == compare_tensors(act_out, results)
-
-        if self.benchmark == True:
-            shark_module.shark_runner.benchmark_all_csv(
-                (input),
-                "google/mobilebert-uncased",
-                dynamic,
-                device,
-                "torch",
-            )
-
-
-class MobileBertModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = MobileBertModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if device in ["vulkan", "metal"]:
-            if dynamic == False:
-                pytest.xfail(reason="Issue known, WIP")
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/mobilenet_v3_small_torch/mobilenet_v3_small_torch_test.py
+++ b/tank/mobilenet_v3_small_torch/mobilenet_v3_small_torch_test.py
@@ -1,79 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_downloader import download_torch_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import numpy as np
-import pytest
-
-
-class MobileNetV3ModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model_mlir, func_name, input, act_out = download_torch_model(
-            "mobilenet_v3_small", dynamic
-        )
-
-        # from shark.shark_importer import SharkImporter
-        # mlir_importer = SharkImporter(
-        #    model,
-        #    (input,),
-        #    frontend="torch",
-        # )
-        # minilm_mlir, func_name = mlir_importer.import_mlir(
-        #    is_dynamic=dynamic, tracing_required=True
-        # )
-
-        shark_module = SharkInference(
-            model_mlir,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        results = shark_module.forward(input)
-        np.testing.assert_allclose(act_out, results, rtol=1e-02, atol=1e-03)
-
-        if self.benchmark == True:
-            shark_module.shark_runner.benchmark_all_csv(
-                (input),
-                "mobilenet_v3_small",
-                dynamic,
-                device,
-                "torch",
-            )
-
-
-class MobileNetV3ModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = MobileNetV3ModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if device == "cuda":
-            pytest.xfail(reason="golden results don't match.")
-        elif device in ["vulkan", "metal"]:
-            if dynamic == False:
-                pytest.xfail(reason="stuck in the pipeline.")
-            if dynamic == True:
-                pytest.xfail(
-                    reason="https://github.com/nod-ai/SHARK/issues/309"
-                )
-
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/model_metadata.csv
+++ b/tank/model_metadata.csv
@@ -0,0 +1,29 @@
+model_name, use_tracing, dynamic, param_count, tags, notes
+microsoft/MiniLM-L12-H384-uncased,True,True,66M,"nlp;bert-variant;transformer-encoder","Large version has 12 layers; 384 hidden size; Smaller than BERTbase (66M params vs 109M params)"
+albert-base-v2,True,True,11M,"nlp;bert-variant;transformer-encoder","12 layers; 128 embedding dim; 768 hidden dim; 12 attention heads; Smaller than BERTbase (11M params vs 109M params); Uses weight sharing to reduce # params but computational cost is similar to BERT."
+bert-base-uncased,True,True,109M,"nlp;bert-variant;transformer-encoder","12 layers; 768 hidden; 12 attention heads"
+bert-base-cased,True,True,109M,"nlp;bert-variant;transformer-encoder","12 layers; 768 hidden; 12 attention heads"
+distilbert-base-uncased,True,True,66M,"nlp;bert-variant;transformer-encoder","Smaller and faster than BERT with 97percent retained accuracy."
+google/mobilebert-uncased,True,True,25M,"nlp,bert-variant,transformer-encoder,mobile","24 layers, 512 hidden size, 128 embedding"
+alexnet,False,True,61M,"cnn,parallel-layers","The CNN that revolutionized computer vision (move away from hand-crafted features to neural networks),10 years old now and probably no longer used in prod."
+resnet18,False,True,11M,"cnn,image-classification,residuals,resnet-variant","1 7x7 conv2d and the rest are 3x3 conv2d"
+resnet50,False,True,23M,"cnn,image-classification,residuals,resnet-variant","Bottlenecks with only conv2d (1x1 conv -> 3x3 conv -> 1x1 conv blocks)"
+resnet101,False,True,29M,"cnn,image-classification,residuals,resnet-variant","Bottlenecks with only conv2d (1x1 conv -> 3x3 conv -> 1x1 conv blocks)"
+squeezenet1_0,False,True,1.25M,"cnn,image-classification,mobile,parallel-layers","Parallel conv2d (1x1 conv to compress -> (3x3 expand | 1x1 expand) -> concat)"
+wide_resnet50_2,False,True,69M,"cnn,image-classification,residuals,resnet-variant","Resnet variant where model depth is decreased and width is increased."
+mobilenet_v3_small,False,True,2.5M,"image-classification,cnn,mobile",N/A
+google/vit-base-patch16-224,True,False,86M,"image-classification,vision-transformer,transformer-encoder",N/A
+microsoft/resnet-50,True,False,23M,"image-classification,cnn,residuals,resnet-variant","Bottlenecks with only conv2d (1x1 conv -> 3x3 conv -> 1x1 conv blocks)"
+facebook/deit-small-distilled-patch16-224,True,False,22M,"image-classification,vision-transformer,cnn",N/A
+microsoft/beit-base-patch16-224-pt22k-ft22k,True,False,86M,"image-classification,transformer-encoder,bert-variant,vision-transformer",N/A
+nvidia/mit-b0,True,False,3.7M,"image-classification,transformer-encoder",SegFormer
+camembert-base,False,False,-,-,-
+dbmdz/convbert-base-turkish-cased,False,False,-,-,-
+google/electra-small-discriminator,False,False,-,-,-
+hf-internal-testing/tiny-random-flaubert,False,False,-,-,-
+funnel-transformer/small,False,False,-,-,-
+microsoft/layoutlm-base-uncased,False,False,-,-,-
+microsoft/mpnet-base,False,False,-,-,-
+roberta-base,False,False,-,-,-
+xlm-roberta-base,False,False,-,-,-
+facebook/convnext-tiny-224,False,False,-,-,-
--- a/tank/model_utils.py
+++ b/tank/model_utils.py
@@ -16,11 +16,20 @@ vision_models = [
    "wide_resnet50_2",
    "mobilenet_v3_small",
 ]
+hf_img_cls_models = [
+    "google/vit-base-patch16-224",
+    "microsoft/resnet-50",
+    "facebook/deit-small-distilled-patch16-224",
+    "microsoft/beit-base-patch16-224-pt22k-ft22k",
+    "nvidia/mit-b0",
+]


 def get_torch_model(modelname):
    if modelname in vision_models:
        return get_vision_model(modelname)
+    elif modelname in hf_img_cls_models:
+        return get_hf_img_cls_model(modelname)
    else:
        return get_hf_model(modelname)

@@ -68,8 +77,8 @@ class HuggingFaceImageClassification(torch.nn.Module):
 def get_hf_img_cls_model(name):
    model = HuggingFaceImageClassification(name)
    # you can use preprocess_input_image to get the test_input or just random value.
-    # test_input = preprocess_input_image(name)
-    test_input = torch.FloatTensor(1, 3, 224, 224).uniform_(-1, 1)
+    test_input = preprocess_input_image(name)
+    # test_input = torch.FloatTensor(1, 3, 224, 224).uniform_(-1, 1)
    print("test_input.shape: ", test_input.shape)
    # test_input.shape:  torch.Size([1, 3, 224, 224])
    actual_out = model(test_input)
--- a/tank/model_utils_tf.py
+++ b/tank/model_utils_tf.py
@@ -28,24 +28,28 @@ maskedlm_models = [
    "albert-base-v2",
    "bert-base-uncased",
    "camembert-base",
-    "convbert-base-turkish-cased",
+    "dbmdz/convbert-base-turkish-cased",
    "deberta-base",
    "distilbert-base-uncased",
-    "electra-small-discriminator",
-    "funnel-transformer",
-    "layoutlm-base-uncased",
+    "google/electra-small-discriminator",
+    "funnel-transformer/small",
+    "microsoft/layoutlm-base-uncased",
    "longformer-base-4096",
-    "mobilebert-uncased",
-    "mpnet-base",
-    "rembert",
+    "google/mobilebert-uncased",
+    "microsoft/mpnet-base",
+    "google/rembert",
    "roberta-base",
    "tapas-base",
-    "tiny-random-flaubert",
+    "hf-internal-testing/tiny-random-flaubert",
    "xlm-roberta",
 ]
 tfhf_models = [
    "microsoft/MiniLM-L12-H384-uncased",
 ]
+img_models = [
+    "google/vit-base-patch16-224",
+    "facebook/convnext-tiny-224",
+]


 def get_tf_model(name):
@@ -55,8 +59,12 @@ def get_tf_model(name):
        return get_causal_lm_model(name)
    elif name in tfhf_models:
        return get_TFhf_model(name)
-    else:
+    elif name in img_models:
        return get_causal_image_model(name)
+    else:
+        raise Exception(
+            "TF model not found! Please check that the modelname has been input correctly."
+        )


 ##################### Tensorflow Hugging Face LM Models ###################################
--- a/tank/mpnet-base_tf/mpnet-base_tf_test.py
+++ b/tank/mpnet-base_tf/mpnet-base_tf_test.py
@@ -1,48 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import iree.compiler as ireec
-import unittest
-import pytest
-import numpy as np
-
-
-class MpNetModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "microsoft/mpnet-base"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
-
-
-class MpNetModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = MpNetModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/203")
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/nvidia_mit-b0_torch/nvidia_mit-b0_torch_test.py
+++ b/tank/nvidia_mit-b0_torch/nvidia_mit-b0_torch_test.py
@@ -1,63 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_torch_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import pytest
-import numpy as np
-
-
-class MitModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_torch_model(
-            "nvidia/mit-b0", dynamic
-        )
-
-        shark_module = SharkInference(
-            model,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-
-        print(np.allclose(golden_out[0], result[0], rtol=1e-02, atol=1e-03))
-
-
-class MitModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = MitModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if dynamic == True:
-            pytest.skip(
-                reason="Dynamic tests not supported. mlir file not found."
-            )
-
-        if device in ["cuda"]:
-            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/311")
-
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    # dynamic = False
-    # device = "cpu"
-    # module_tester = MitModuleTester()
-    # module_tester.create_and_check_module(dynamic, device)
-    unittest.main()
--- a/tank/pytorch/torch_model_list.csv
+++ b/tank/pytorch/torch_model_list.csv
@@ -1,8 +1,9 @@
 model_name, use_tracing, model_type, dynamic, param_count, tags, notes
-microsoft/MiniLM-L12-H384-uncased,True,hf,True,66M,"nlp,bert-variant,transformer-encoder","Large version has 12 layers, 384 hidden size,Smaller than BERTbase (66M params vs 109M params)"
-albert-base-v2,True,hf,True,11M,"nlp,bert-variant,transformer-encoder","12 layers, 128 embedding dim, 768 hidden dim, 12 attention heads,Smaller than BERTbase (11M params vs 109M params),Uses weight sharing to reduce # params but computational cost is similar to BERT."
-bert-base-uncased,True,hf,True,109M,"nlp,bert-variant,transformer-encoder","12 layers, 768 hidden, 12 attention heads"
-bert-base-cased,True,hf,True,109M,"nlp,bert-variant,transformer-encoder","12 layers, 768 hidden, 12 attention heads"
+microsoft/MiniLM-L12-H384-uncased,True,hf,True,66M,"nlp;bert-variant;transformer-encoder","Large version has 12 layers; 384 hidden size; Smaller than BERTbase (66M params vs 109M params)"
+albert-base-v2,True,hf,True,11M,"nlp;bert-variant;transformer-encoder","12 layers; 128 embedding dim; 768 hidden dim; 12 attention heads; Smaller than BERTbase (11M params vs 109M params); Uses weight sharing to reduce # params but computational cost is similar to BERT."
+bert-base-uncased,True,hf,True,109M,"nlp;bert-variant;transformer-encoder","12 layers; 768 hidden; 12 attention heads"
+bert-base-cased,True,hf,True,109M,"nlp;bert-variant;transformer-encoder","12 layers; 768 hidden; 12 attention heads"
+distilbert-base-uncased,True,hf,True,66M,"nlp;bert-variant;transformer-encoder","Smaller and faster than BERT with 97percent retained accuracy."
 google/mobilebert-uncased,True,hf,True,25M,"nlp,bert-variant,transformer-encoder,mobile","24 layers, 512 hidden size, 128 embedding"
 alexnet,False,vision,True,61M,"cnn,parallel-layers","The CNN that revolutionized computer vision (move away from hand-crafted features to neural networks),10 years old now and probably no longer used in prod."
 resnet18,False,vision,True,11M,"cnn,image-classification,residuals,resnet-variant","1 7x7 conv2d and the rest are 3x3 conv2d"
--- a/tank/resnet101_torch/resnet101_torch_test.py
+++ b/tank/resnet101_torch/resnet101_torch_test.py
@@ -1,78 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from tank.model_utils import compare_tensors
-from shark.shark_downloader import download_torch_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import numpy as np
-import pytest
-
-
-class Resnet101ModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model_mlir, func_name, input, act_out = download_torch_model(
-            "resnet101", dynamic
-        )
-
-        # from shark.shark_importer import SharkImporter
-        # mlir_importer = SharkImporter(
-        #    model,
-        #    (input,),
-        #    frontend="torch",
-        # )
-        # minilm_mlir, func_name = mlir_importer.import_mlir(
-        #    is_dynamic=dynamic, tracing_required=True
-        # )
-
-        shark_module = SharkInference(
-            model_mlir,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        results = shark_module.forward(input)
-        assert True == compare_tensors(act_out, results)
-
-        if self.benchmark == True:
-            shark_module.shark_runner.benchmark_all_csv(
-                (input),
-                "resnet101",
-                dynamic,
-                device,
-                "torch",
-            )
-
-
-class Resnet101ModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = Resnet101ModuleTester(self)
-        self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
-        self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if device in ["metal", "vulkan"]:
-            if dynamic == True:
-                pytest.xfail(
-                    reason="https://github.com/nod-ai/SHARK/issues/309"
-                )
-
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/resnet18_torch/resnet18_torch_test.py
+++ b/tank/resnet18_torch/resnet18_torch_test.py
@@ -1,78 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from tank.model_utils import get_vision_model, compare_tensors
-from shark.shark_downloader import download_torch_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import numpy as np
-import pytest
-
-
-class Resnet18ModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model_mlir, func_name, input, act_out = download_torch_model(
-            "resnet18", dynamic
-        )
-
-        # mlir_importer = SharkImporter(
-        #    model,
-        #    (input,),
-        #    frontend="torch",
-        # )
-        # minilm_mlir, func_name = mlir_importer.import_mlir(
-        #    is_dynamic=dynamic, tracing_required=True
-        # )
-
-        shark_module = SharkInference(
-            model_mlir,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        results = shark_module.forward(input)
-        assert True == compare_tensors(act_out, results)
-
-        if self.benchmark == True:
-            shark_module.shark_runner.benchmark_all_csv(
-                (input),
-                "resnet18",
-                dynamic,
-                device,
-                "torch",
-            )
-
-
-class Resnet18ModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = Resnet18ModuleTester(self)
-        self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
-        self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-
-        if device in ["metal", "vulkan"]:
-            if dynamic == True:
-                pytest.xfail(
-                    reason="https://github.com/nod-ai/SHARK/issues/309"
-                )
-
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/resnet50/resnet50_test.py
+++ b/tank/resnet50/resnet50_test.py
@@ -1,64 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_downloader import download_tf_model
-from shark.parser import shark_args
-from shark.iree_utils.vulkan_utils import get_vulkan_triple_flag
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import numpy as np
-import pytest
-import numpy as np
-
-
-class Resnet50ModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-        onnx_bench=False,
-    ):
-        self.benchmark = benchmark
-        self.onnx_bench = onnx_bench
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model("resnet50")
-
-        shark_module = SharkInference(
-            model,
-            func_name,
-            device=device,
-            mlir_dialect="mhlo",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
-
-        if self.benchmark == True:
-            shark_args.enable_tf32 = True
-            shark_args.onnx_bench = self.onnx_bench
-            shark_module.shark_runner.benchmark_all_csv(
-                (inputs), "resnet50", dynamic, device, "tensorflow"
-            )
-
-
-class Resnet50ModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = Resnet50ModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-        self.module_tester.onnx_bench = pytestconfig.getoption("onnx_bench")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if device in ["metal", "vulkan"]:
-            if "m1-moltenvk-macos" in get_vulkan_triple_flag():
-                pytest.xfail(reason="M2: Assert error & M1: CompilerToolError")
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/resnet50_torch/resnet50_torch_test.py
+++ b/tank/resnet50_torch/resnet50_torch_test.py
@@ -1,81 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from tank.model_utils import get_vision_model, compare_tensors
-from shark.shark_downloader import download_torch_model
-from shark.iree_utils.vulkan_utils import get_vulkan_triple_flag
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import numpy as np
-import pytest
-
-
-class Resnet50ModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model_mlir, func_name, input, act_out = download_torch_model(
-            "resnet50", dynamic
-        )
-
-        # from shark.shark_importer import SharkImporter
-        # mlir_importer = SharkImporter(
-        #    model,
-        #    (input,),
-        #    frontend="torch",
-        # )
-        # minilm_mlir, func_name = mlir_importer.import_mlir(
-        #    is_dynamic=dynamic, tracing_required=True
-        # )
-
-        shark_module = SharkInference(
-            model_mlir,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        results = shark_module.forward(input)
-        assert True == compare_tensors(act_out, results)
-
-        if self.benchmark == True:
-            shark_module.shark_runner.benchmark_all_csv(
-                (input),
-                "resnet50",
-                dynamic,
-                device,
-                "torch",
-            )
-
-
-class Resnet50ModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = Resnet50ModuleTester(self)
-        self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
-        self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if device in ["metal", "vulkan"]:
-            if "m1-moltenvk-macos" in get_vulkan_triple_flag():
-                pytest.xfail(reason="M1: CompilerToolError | M2: Pass")
-            if dynamic == True:
-                pytest.xfail(
-                    reason="https://github.com/nod-ai/SHARK/issues/309"
-                )
-
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/roberta-base_tf/roberta-base_tf_test.py
+++ b/tank/roberta-base_tf/roberta-base_tf_test.py
@@ -1,54 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from shark.parser import shark_args
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import iree.compiler as ireec
-import unittest
-import pytest
-import numpy as np
-import tempfile
-import os
-
-
-class RobertaBaseModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "roberta-base"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        np.testing.assert_allclose(
-            result, golden_out, rtol=1e-02, atol=1e-01, verbose=True
-        )
-
-
-class RobertaBaseModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = RobertaBaseModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if device == "cuda":
-            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/274")
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/squeezenet1_0_torch/squeezenet1_0_torch_test.py
+++ b/tank/squeezenet1_0_torch/squeezenet1_0_torch_test.py
@@ -1,70 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from tank.model_utils import get_vision_model, compare_tensors
-from shark.shark_downloader import download_torch_model
-from shark.iree_utils.vulkan_utils import get_vulkan_triple_flag
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import numpy as np
-import pytest
-
-
-class SqueezenetModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model_mlir, func_name, input, act_out = download_torch_model(
-            "squeezenet1_0", dynamic
-        )
-
-        shark_module = SharkInference(
-            model_mlir,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        results = shark_module.forward(input)
-        assert True == compare_tensors(act_out, results)
-
-        if self.benchmark == True:
-            shark_module.shark_runner.benchmark_all_csv(
-                (input),
-                "squeezenet1_0",
-                dynamic,
-                device,
-                "torch",
-            )
-
-
-class SqueezenetModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = SqueezenetModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if device in ["metal", "vulkan"]:
-            if dynamic == True:
-                pytest.xfail(
-                    reason="https://github.com/nod-ai/SHARK/issues/309"
-                )
-                if "m1-moltenvk-macos" in get_vulkan_triple_flag():
-                    pytest.xfail(
-                        reason="https://github.com/iree-org/iree/issues/9972"
-                    )
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/test_models.py
+++ b/tank/test_models.py
@@ -0,0 +1,268 @@
+from shark.iree_utils._common import (
+    check_device_drivers,
+    device_driver_info,
+    IREE_DEVICE_MAP,
+)
+from shark.iree_utils.vulkan_utils import get_vulkan_triple_flag
+from parameterized import parameterized
+from shark.shark_downloader import (
+    download_tf_model,
+    download_torch_model,
+    download_tflite_model,
+)
+from shark.shark_inference import SharkInference
+from shark.parser import shark_args
+import pytest
+import unittest
+import numpy as np
+import csv
+
+
+def load_csv_and_convert(filename, gen=False):
+    """
+    takes in a csv filename and generates a dict for consumption by get_valid_test_params
+    """
+    model_configs = []
+    with open(filename, "r+") as f:
+        reader = csv.reader(f, delimiter=",")
+        for row in reader:
+            if len(row) < 5:
+                print("invalid model: " + row)
+                continue
+            model_configs.append(
+                {
+                    "model_name": row[0],
+                    "dialect": row[1],
+                    "framework": row[2],
+                    "rtol": float(row[3]),
+                    "atol": float(row[4]),
+                    "out_type": row[5],
+                }
+            )
+    # This is a pytest workaround
+    if gen:
+        with open("tank/dict_configs.py", "w+") as out:
+            out.write("ALL = [\n")
+            for c in model_configs:
+                out.write(str(c) + ",\n")
+            out.write("]")
+    return model_configs
+
+
+def get_valid_test_params():
+    """
+    Generate a list of all combinations of available devices and static/dynamic flag.
+    """
+    device_list = [
+        device
+        for device in IREE_DEVICE_MAP.keys()
+        if not check_device_drivers(device)
+    ]
+    dynamic_list = (True, False)
+    # TODO: This is soooo ugly, but for some reason creating the dict at runtime
+    # results in strange pytest failures.
+    load_csv_and_convert("tank/all_models.csv", True)
+    from tank.dict_configs import ALL
+
+    config_list = ALL
+
+    param_list = [
+        (dynamic, device, config)
+        for dynamic in dynamic_list
+        for device in device_list
+        for config in config_list
+    ]
+    return param_list
+
+
+def shark_test_name_func(testcase_func, param_num, param):
+    """
+    Generate function name string which shows dynamic/static and device name.
+    this will be ingested by 'parameterized' package to rename the pytest.
+    """
+    param_names = []
+    for x in param.args:
+        if x == True:
+            param_names.append("dynamic")
+        elif x == False:
+            param_names.append("static")
+        elif "model" in str(x):
+            as_list = str(x).split(" ")
+            as_list = [
+                parameterized.to_safe_name(x).strip("_") for x in as_list
+            ]
+            param_names.insert(0, as_list[as_list.index("model_name") + 1])
+            param_names.insert(1, as_list[as_list.index("framework") + 1])
+            # param_names.append(as_list[3])
+
+        else:
+            param_names.append(x)
+    return "%s_%s" % (
+        testcase_func.__name__,
+        parameterized.to_safe_name("_".join(str(x) for x in param_names)),
+    )
+
+
+class SharkModuleTester:
+    def __init__(self, config):
+        """config should be a dict containing minimally:
+        dialect: (str) name of input dialect
+        framework: (str) one of tf, tflite, pytorch
+        model_name: (str) name of the model in the tank ("resnet50")
+        rtol/atol: (float) tolerances for golden values
+        """
+        self.config = config
+
+    def create_and_check_module(self, dynamic, device):
+        if self.config["framework"] == "tf":
+            model, func_name, inputs, golden_out = download_tf_model(
+                self.config["model_name"]
+            )
+        elif self.config["framework"] == "torch":
+            model, func_name, inputs, golden_out = download_torch_model(
+                self.config["model_name"]
+            )
+        elif self.config["framework"] == "tflite":
+            model, func_name, inputs, golden_out = download_tflite_model(
+                model_name=self.config["model_name"]
+            )
+        else:
+            model, func_name, inputs, golden_out = None, None, None, None
+
+        shark_module = SharkInference(
+            model,
+            func_name,
+            device=device,
+            mlir_dialect=self.config["dialect"],
+            is_benchmark=self.benchmark,
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+        golden_out, result = self.postprocess_outputs(golden_out, result)
+
+        np.testing.assert_allclose(
+            golden_out,
+            result,
+            rtol=self.config["rtol"],
+            atol=self.config["atol"],
+        )
+
+        if self.benchmark == True:
+            shark_args.enable_tf32 = self.tf32
+            if shark_args.enable_tf32 == True:
+                shark_module.compile()
+                shark_args.enable_tf32 = False
+
+            shark_args.onnx_bench = self.onnx_bench
+            shark_module.shark_runner.benchmark_all_csv(
+                (inputs),
+                self.config["model_name"],
+                dynamic,
+                device,
+                self.config["framework"],
+            )
+
+    def postprocess_outputs(self, golden_out, result):
+        # Prepares result tensors of forward pass and golden values for comparison, when needed.
+        if self.config["out_type"] == "tf_vit":
+            ir_device_array = result[0][1]
+            logits = ir_device_array.astype(ir_device_array.dtype)
+            logits = np.squeeze(logits, axis=0)
+            expected = golden_out[0]
+        elif self.config["out_type"] == "tf_hf":
+            logits = result[0][1].to_host()
+            expected = golden_out
+        elif self.config["out_type"] == "default":
+            logits = result
+            expected = golden_out
+
+        return expected, logits
+
+
+class SharkModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.pytestconfig = pytestconfig
+
+    param_list = get_valid_test_params()
+
+    @parameterized.expand(param_list, name_func=shark_test_name_func)
+    def test_module(self, dynamic, device, config):
+        self.module_tester = SharkModuleTester(config)
+        self.module_tester.benchmark = self.pytestconfig.getoption("benchmark")
+        self.module_tester.onnx_bench = self.pytestconfig.getoption(
+            "onnx_bench"
+        )
+        self.module_tester.tf32 = self.pytestconfig.getoption("tf32")
+
+        if (
+            config["model_name"] == "facebook/convnext-tiny-224"
+            and device == "cuda"
+        ):
+            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/311")
+        if (
+            config["model_name"] == "google/vit-base-patch16-224"
+            and device == "cuda"
+        ):
+            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/311")
+        if config["model_name"] == "resnet50" and device in [
+            "metal",
+            "vulkan",
+        ]:
+            if get_vulkan_triple_flag() is not None:
+                if "m1-moltenvk-macos" in get_vulkan_triple_flag():
+                    pytest.xfail(
+                        reason="M2: Assert Error & M1: CompilerToolError"
+                    )
+        if config["model_name"] == "roberta-base" and device == "cuda":
+            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/274")
+        if config["model_name"] == "google/rembert":
+            pytest.skip(reason="Model too large to convert.")
+        if config[
+            "model_name"
+        ] == "dbmdz/convbert-base-turkish-cased" and device in [
+            "metal",
+            "vulkan",
+        ]:
+            pytest.xfail(
+                reason="Issue: https://github.com/iree-org/iree/issues/9971"
+            )
+        if config["model_name"] == "facebook/convnext-tiny-224" and device in [
+            "cuda",
+            "metal",
+            "vulkan",
+        ]:
+            pytest.xfail(
+                reason="https://github.com/nod-ai/SHARK/issues/311, https://github.com/nod-ai/SHARK/issues/342"
+            )
+        if config["model_name"] == "funnel-transformer/small" and device in [
+            "cuda",
+            "metal",
+            "vulkan",
+        ]:
+            pytest.xfail(
+                reason="failing in the iree-compiler passes, see https://github.com/nod-ai/SHARK/issues/201"
+            )
+        if (
+            config["model_name"] == "google/vit-base-patch16-224"
+            and device == "cuda"
+        ):
+            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/311")
+        if config["model_name"] == "microsoft/mpnet-base":
+            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/203")
+        if config["model_name"] == "nvidia/mit-b0":
+            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/343")
+        if (
+            config["model_name"] == "google/mobilebert-uncased"
+            and device in ["metal", "vulkan"]
+            and config["framework"] == "torch"
+        ):
+            pytest.xfail(
+                reason="Numerics issues -- https://github.com/nod-ai/SHARK/issues/344"
+            )
+        if config["framework"] == "tf" and dynamic == True:
+            pytest.skip(
+                reason="Dynamic shapes not supported for this framework."
+            )
+
+        self.module_tester.create_and_check_module(dynamic, device)
--- a/tank/test_utils.py
+++ b/tank/test_utils.py
@@ -1,41 +0,0 @@
-from shark.iree_utils._common import (
-    check_device_drivers,
-    device_driver_info,
-    IREE_DEVICE_MAP,
-)
-from parameterized import parameterized
-
-
-def get_valid_test_params():
-    """
-    Generate a list of all combinations of available devices and static/dynamic flag.
-    """
-    device_list = [
-        device
-        for device in IREE_DEVICE_MAP.keys()
-        if not check_device_drivers(device)
-    ]
-    dynamic_list = (True, False)
-    param_list = [
-        (dynamic, device) for dynamic in dynamic_list for device in device_list
-    ]
-    return param_list
-
-
-def shark_test_name_func(testcase_func, param_num, param):
-    """
-    Generate function name string which shows dynamic/static and device name.
-    this will be ingested by 'parameterized' package to rename the pytest.
-    """
-    param_names = []
-    for x in param.args:
-        if x == True:
-            param_names.append("dynamic")
-        elif x == False:
-            param_names.append("static")
-        else:
-            param_names.append(x)
-    return "%s_%s" % (
-        testcase_func.__name__,
-        parameterized.to_safe_name("_".join(str(x) for x in param_names)),
-    )
--- a/tank/tiny-random-flaubert_tf/tiny-random-flaubert_tf_test.py
+++ b/tank/tiny-random-flaubert_tf/tiny-random-flaubert_tf_test.py
@@ -1,47 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import iree.compiler as ireec
-import unittest
-import pytest
-import numpy as np
-
-
-class FlauBertModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "hf-internal-testing/tiny-random-flaubert"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
-
-
-class FlauBertModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = FlauBertModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/wide_resnet50_2_torch/wide_resnet50_2_torch_test.py
+++ b/tank/wide_resnet50_2_torch/wide_resnet50_2_torch_test.py
@@ -1,79 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from tank.model_utils import get_vision_model, compare_tensors
-from shark.shark_downloader import download_torch_model
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import unittest
-import numpy as np
-import pytest
-
-
-class WideResnet50ModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model_mlir, func_name, input, act_out = download_torch_model(
-            "wide_resnet50_2", dynamic
-        )
-
-        # from shark.shark_importer import SharkImporter
-        # mlir_importer = SharkImporter(
-        #    model,
-        #    (input,),
-        #    frontend="torch",
-        # )
-        # minilm_mlir, func_name = mlir_importer.import_mlir(
-        #    is_dynamic=dynamic, tracing_required=True
-        # )
-
-        shark_module = SharkInference(
-            model_mlir,
-            func_name,
-            device=device,
-            mlir_dialect="linalg",
-            is_benchmark=self.benchmark,
-        )
-        shark_module.compile()
-        results = shark_module.forward(input)
-        assert True == compare_tensors(act_out, results)
-
-        if self.benchmark == True:
-            shark_module.shark_runner.benchmark_all_csv(
-                (input),
-                "wide_resnet50_2",
-                dynamic,
-                device,
-                "torch",
-            )
-
-
-class WideResnet50ModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = WideResnet50ModuleTester(self)
-        self.module_tester.save_mlir = pytestconfig.getoption("save_mlir")
-        self.module_tester.save_vmfb = pytestconfig.getoption("save_vmfb")
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-
-        if device in ["metal", "vulkan"]:
-            if dynamic == True:
-                pytest.xfail(
-                    reason="https://github.com/nod-ai/SHARK/issues/309"
-                )
-
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tank/xlm-roberta-base_tf/xlm-roberta-base_tf_test.py
+++ b/tank/xlm-roberta-base_tf/xlm-roberta-base_tf_test.py
@@ -1,56 +0,0 @@
-from shark.iree_utils._common import check_device_drivers, device_driver_info
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-from shark.iree_utils.vulkan_utils import get_vulkan_triple_flag
-from tank.test_utils import get_valid_test_params, shark_test_name_func
-from parameterized import parameterized
-
-import iree.compiler as ireec
-import unittest
-import pytest
-import numpy as np
-
-
-class XLMRobertaModuleTester:
-    def __init__(
-        self,
-        benchmark=False,
-    ):
-        self.benchmark = benchmark
-
-    def create_and_check_module(self, dynamic, device):
-        model, func_name, inputs, golden_out = download_tf_model(
-            "xlm-roberta-base"
-        )
-
-        shark_module = SharkInference(
-            model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        result = shark_module.forward(inputs)
-        np.testing.assert_allclose(
-            result, golden_out, rtol=1e-02, atol=1e-01, verbose=True
-        )
-
-
-class XLMRobertaModuleTest(unittest.TestCase):
-    @pytest.fixture(autouse=True)
-    def configure(self, pytestconfig):
-        self.module_tester = XLMRobertaModuleTester(self)
-        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
-
-    param_list = get_valid_test_params()
-
-    @parameterized.expand(param_list, name_func=shark_test_name_func)
-    def test_module(self, dynamic, device):
-        if device == "cuda":
-            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/274")
-        elif device in ["metal", "vulkan"]:
-            if dynamic == False:
-                if "m1-moltenvk-macos" in get_vulkan_triple_flag():
-                    pytest.xfail(reason="M1: CompilerToolError | M2: Pass")
-        self.module_tester.create_and_check_module(dynamic, device)
-
-
-if __name__ == "__main__":
-    unittest.main()