Revert "Move most xfails to entries in tank/all_models.csv and temporarily remove multiprocessing and TF gpu support. (#602)" (#622)

This reverts commit fe618811ee.
2026-04-03 03:00:17 -04:00 · 2022-12-13 21:49:46 -08:00
parent fe618811ee
commit d946cffabc
13 changed files with 168 additions and 134 deletions
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -122,7 +122,7 @@ jobs:
      continue-on-error: true
      run: |
        cd $GITHUB_WORKSPACE
-        USE_IREE=1 VENV_DIR=iree.venv NIGHTLY=1 ./setup_venv.sh
+        USE_IREE=1 VENV_DIR=iree.venv ./setup_venv.sh
        source iree.venv/bin/activate
        package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
        SHARK_PACKAGE_VERSION=${package_version} \
@@ -146,7 +146,7 @@ jobs:
      if: ${{ matrix.backend == 'SHARK' }}
      run: |
        cd $GITHUB_WORKSPACE
-        NIGHTLY=1 ./setup_venv.sh
+        ./setup_venv.sh
        source shark.venv/bin/activate
        package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
        SHARK_PACKAGE_VERSION=${package_version} \
--- a/.github/workflows/test-models.yml
+++ b/.github/workflows/test-models.yml
@@ -123,7 +123,7 @@ jobs:
        echo "VULKAN SDK PATH with setup: $VULKAN_SDK"
        echo $PATH
        pip list | grep -E "torch|iree"
-        pytest -s --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/Volumes/builder/anush/shark_cache" tank/test_models.py -k vulkan --update_tank
+        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/Volumes/builder/anush/shark_cache" tank/test_models.py -k vulkan --update_tank

    - name: Validate Vulkan Models (a100)
      if: matrix.suite == 'vulkan' && matrix.os != 'MacStudio'
--- a/.gitignore
+++ b/.gitignore
@@ -164,7 +164,6 @@ cython_debug/
 shark_tmp/
 *.vmfb
 .use-iree
-tank/dict_configs.py

 # ORT related artefacts
 cache_models/
--- a/build_tools/shark_versions.txt
+++ b/build_tools/shark_versions.txt
@@ -1,8 +0,0 @@
-# IREE Compiler/ Runtime Version:
-20221207.350
-# SHARK Compiler/ Runtime Version:
-20221207.236
-# Torch-MLIR Version for IREE:
-20221207.680
-# Torch-MLIR Version for SHARK:
-20221207.680
--- a/conftest.py
+++ b/conftest.py
@@ -38,7 +38,7 @@ def pytest_addoption(parser):
    )
    parser.addoption(
        "--update_tank",
-        action="store_false",
+        action="store_true",
        default="False",
        help="Update local shark tank with latest artifacts.",
    )
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,3 +1,3 @@
 [pytest]
 addopts = --verbose -p no:warnings
-norecursedirs = inference tank/tflite examples benchmarks shark 
+norecursedirs = inference tank/tflite 
--- a/setup.py
+++ b/setup.py
@@ -6,18 +6,12 @@ import os
 with open("README.md", "r", encoding="utf-8") as fh:
    long_description = fh.read()

-with open("build_tools/shark_versions.txt", "r") as sv:
-    lines = [line.rstrip() for line in sv]
-    TM_VERSION = lines[7]
-    IREE_VERSION = lines[3]
-
-
 PACKAGE_VERSION = os.environ.get("SHARK_PACKAGE_VERSION") or "0.0.4"
 backend_deps = []
 if "NO_BACKEND" in os.environ.keys():
    backend_deps = [
-        f"iree-compiler=={IREE_VERSION}",
-        f"iree-runtime>={IREE_VERSION}",
+        "iree-compiler>=20221022.190",
+        "iree-runtime>=20221022.190",
    ]

 setup(
@@ -43,7 +37,7 @@ setup(
    install_requires=[
        "numpy",
        "PyYAML",
-        f"torch-mlir=={TM_VERSION}",
+        "torch-mlir>=20221021.633",
    ]
    + backend_deps,
 )
--- a/setup_venv.sh
+++ b/setup_venv.sh
@@ -76,15 +76,12 @@ fi
 $PYTHON -m pip install --upgrade pip || die "Could not upgrade pip"
 $PYTHON -m pip install --upgrade -r "$TD/requirements.txt"
 if [ "$torch_mlir_bin" = true ]; then
-  TM_VERSION=$(sed '8q;d' build_tools/shark_versions.txt)
  if [[ $(uname -s) = 'Darwin' ]]; then
    echo "MacOS detected. Installing torch-mlir from .whl, to avoid dependency problems with torch."
-    $PYTHON -m pip install --pre --no-cache-dir  torch-mlir==${TM_VERSION} -f https://llvm.github.io/torch-mlir/package-index/ -f https://download.pytorch.org/whl/nightly/torch/
-  elif [[ ! -z "${NIGHTLY}" ]]; then
-    $PYTHON -m pip install --pre torch-mlir -f https://llvm.github.io/torch-mlir/package-index/
+    $PYTHON -m pip install --pre --no-cache-dir  torch-mlir -f https://llvm.github.io/torch-mlir/package-index/ -f https://download.pytorch.org/whl/nightly/torch/
  else
-    $PYTHON -m pip install --pre torch-mlir==${TM_VERSION} -f https://llvm.github.io/torch-mlir/package-index/
-    if [ $? -eq 0 ]; then
+    $PYTHON -m pip install --pre torch-mlir -f https://llvm.github.io/torch-mlir/package-index/
+    if [ $? -eq 0 ];then
      echo "Successfully Installed torch-mlir"
    else
      echo "Could not install torch-mlir" >&2
@@ -99,17 +96,13 @@ fi
 if [[ -z "${USE_IREE}" ]]; then
  rm .use-iree
  RUNTIME="https://nod-ai.github.io/SHARK-Runtime/pip-release-links.html"
-  RUNTIME_VERSION=$(sed '4q;d' build_tools/shark_versions.txt)
-  TM_VERSION=$(sed '8q;d' build_tools/shark_versions.txt)
 else
  touch ./.use-iree
  RUNTIME="https://iree-org.github.io/iree/pip-release-links.html"
-  RUNTIME_VERSION=$(sed '2q;d' build_tools/shark_versions.txt)
-  TM_VERSION=$(sed '6q;d' build_tools/shark_versions.txt)
 fi
 if [[ -z "${NO_BACKEND}" ]]; then
  echo "Installing ${RUNTIME}..."
-  $PYTHON -m pip install --upgrade --find-links ${RUNTIME} iree-compiler==${RUNTIME_VERSION} iree-runtime==${RUNTIME_VERSION}
+  $PYTHON -m pip install --upgrade --find-links ${RUNTIME} iree-compiler iree-runtime
 else
  echo "Not installing a backend, please make sure to add your backend to PYTHONPATH"
 fi
@@ -129,7 +122,6 @@ fi

 $PYTHON -m pip install --no-warn-conflicts -e . -f https://llvm.github.io/torch-mlir/package-index/ -f ${RUNTIME} -f https://download.pytorch.org/whl/nightly/torch/

-
 if [[ $(uname -s) = 'Linux' && ! -z "${BENCHMARK}" ]]; then
  $PYTHON -m pip uninstall -y torch torchvision
  $PYTHON -m pip install --pre torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cu117
@@ -150,15 +142,6 @@ if [[ ! -z "${ONNX}" ]]; then
  fi
 fi

-if [[ ! -z "${NIGHTLY}" ]]; then
-  $PYTHON -m pip install --upgrade --pre iree-compiler iree-runtime torch-mlir -f https://llvm.github.io/torch-mlir/package-index/ -f $RUNTIME -f https://download.pytorch.org/whl/nightly/torch/
-  if [ $? -eq 0 ];then
-    echo "Successfully Installed latest packages for nightly job."
-  else
-    echo "Could not install latest IREE and Torch-MLIR." >&2
-  fi
-fi
-
 if [[ -z "${CONDA_PREFIX}" ]]; then
  echo "${Green}Before running examples activate venv with:"
  echo "  ${Green}source $VENV_DIR/bin/activate"
--- a/shark/parser.py
+++ b/shark/parser.py
@@ -108,7 +108,7 @@ parser.add_argument(
 parser.add_argument(
    "--enable_conv_transform",
    default=False,
-    action="store_false",
+    action="store",
    help="Enables the --iree-flow-enable-conv-nchw-to-nhwc-transform flag.",
 )

--- a/shark/shark_benchmark_runner.py
+++ b/shark/shark_benchmark_runner.py
@@ -100,7 +100,6 @@ class SharkBenchmarkRunner(SharkRunner):
    def benchmark_frontend(self, modelname):
        if self.mlir_dialect in ["linalg", "torch"]:
            return self.benchmark_torch(modelname)
-
        elif self.mlir_dialect in ["mhlo", "tf"]:
            return self.benchmark_tf(modelname)

@@ -139,21 +138,9 @@ class SharkBenchmarkRunner(SharkRunner):

    def benchmark_tf(self, modelname):
        import tensorflow as tf
-
-        visible_default = tf.config.list_physical_devices("GPU")
-        try:
-            tf.config.set_visible_devices([], "GPU")
-            visible_devices = tf.config.get_visible_devices()
-            for device in visible_devices:
-                assert device.device_type != "GPU"
-        except:
-            # Invalid device or cannot modify virtual devices once initialized.
-            pass
-
        from tank.model_utils_tf import get_tf_model

-        # tf_device = "/GPU:0" if self.device == "cuda" else "/CPU:0"
-        tf_device = "/CPU:0"
+        tf_device = "/GPU:0" if self.device == "cuda" else "/CPU:0"
        with tf.device(tf_device):
            model, input, = get_tf_model(
                modelname
--- a/shark/shark_downloader.py
+++ b/shark/shark_downloader.py
@@ -118,7 +118,9 @@ def check_dir_exists(model_name, frontend="torch", dynamic=""):
            and os.path.isfile(os.path.join(model_dir, "golden_out.npz"))
            and os.path.isfile(os.path.join(model_dir, "hash.npy"))
        ):
-            print(f"""Using cached models from {WORKDIR}...""")
+            print(
+                f"""Using cached models from {WORKDIR}..."""
+            )
            return True
    return False

--- a/tank/all_models.csv
+++ b/tank/all_models.csv
@@ -1,34 +1,35 @@
-resnet50,mhlo,tf,1e-2,1e-3,default,nhcw-nhwc,False,False,True,"Vulkan Numerical Error: mostly conv"
-albert-base-v2,mhlo,tf,1e-2,1e-2,default,None,False,False,False,""
-roberta-base,mhlo,tf,1e-02,1e-3,default,nhcw-nhwc,False,False,False,""
-bert-base-uncased,mhlo,tf,1e-2,1e-3,default,None,False,False,False,""
-camembert-base,mhlo,tf,1e-2,1e-3,default,None,False,False,False,""
-dbmdz/convbert-base-turkish-cased,mhlo,tf,1e-2,1e-3,default,nhcw-nhwc,True,False,True,"https://github.com/iree-org/iree/issues/9971"
-distilbert-base-uncased,mhlo,tf,1e-2,1e-3,default,None,False,False,False,""
-facebook/convnext-tiny-224,mhlo,tf,1e-2,1e-3,tf_vit,nhcw-nhwc,False,True,True,"https://github.com/nod-ai/SHARK/issues/311 & https://github.com/nod-ai/SHARK/issues/342"
-funnel-transformer/small,mhlo,tf,1e-2,1e-3,default,None,False,True,True,"https://github.com/nod-ai/SHARK/issues/201"
-google/electra-small-discriminator,mhlo,tf,1e-2,1e-3,default,None,False,False,False,""
-google/mobilebert-uncased,mhlo,tf,1e-2,1e-3,default,None,False,False,False,""
-google/vit-base-patch16-224,mhlo,tf,1e-2,1e-3,tf_vit,nhcw-nhwc,False,False,True,"Vulkan Numerical Error (mostly conv)"
-microsoft/MiniLM-L12-H384-uncased,mhlo,tf,1e-2,1e-3,tf_hf,None,False,False,False,""
-microsoft/layoutlm-base-uncased,mhlo,tf,1e-2,1e-3,default,None,False,False,False,""
-microsoft/mpnet-base,mhlo,tf,1e-2,1e-2,default,None,False,False,False,""
-albert-base-v2,linalg,torch,1e-2,1e-3,default,None,False,False,False,""
-alexnet,linalg,torch,1e-2,1e-3,default,None,False,False,True,"Assertion Error: Zeros Output"
-bert-base-cased,linalg,torch,1e-2,1e-3,default,None,False,False,False,""
-bert-base-uncased,linalg,torch,1e-2,1e-3,default,None,False,False,False,""
-facebook/deit-small-distilled-patch16-224,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,True,False,"Fails during iree-compile."
-google/vit-base-patch16-224,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,True,False,"https://github.com/nod-ai/SHARK/issues/311"
-microsoft/beit-base-patch16-224-pt22k-ft22k,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,True,False,"https://github.com/nod-ai/SHARK/issues/390"
-microsoft/MiniLM-L12-H384-uncased,linalg,torch,1e-2,1e-3,default,None,False,False,True,""
-microsoft/resnet-50,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,True,"Vulkan Numerical Error (mostly conv)"
-google/mobilebert-uncased,linalg,torch,1e-2,1e-3,default,None,False,False,True,"https://github.com/nod-ai/SHARK/issues/344"
-mobilenet_v3_small,linalg,torch,1e-1,1e-2,default,nhcw-nhwc,False,True,True,"https://github.com/nod-ai/SHARK/issues/388"
-nvidia/mit-b0,linalg,torch,1e-2,1e-3,default,None,True,True,True,"https://github.com/nod-ai/SHARK/issues/343"
-resnet101,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,True,"Vulkan Numerical Error (mostly conv)"
-resnet18,linalg,torch,1e-2,1e-3,default,None,True,True,True,""
-resnet50,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,True,"Vulkan Numerical Error (mostly conv)"
-squeezenet1_0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,True,"https://github.com/nod-ai/SHARK/issues/388"
-wide_resnet50_2,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,True,"Vulkan Numerical Error (mostly conv)"
-efficientnet-v2-s,mhlo,tf,1e-02,1e-3,default,nhcw-nhwc,False,False,True,"https://github.com/nod-ai/SHARK/issues/575"
-mnasnet1_0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,True,"https://github.com/nod-ai/SHARK/issues/388"
+resnet50,mhlo,tf,1e-2,1e-3,default,nhcw-nhwc
+albert-base-v2,mhlo,tf,1e-2,1e-2,default,None
+roberta-base,mhlo,tf,1e-02,1e-3,default,nhcw-nhwc
+bert-base-uncased,mhlo,tf,1e-2,1e-3,default,None
+camembert-base,mhlo,tf,1e-2,1e-3,default,None
+dbmdz/convbert-base-turkish-cased,mhlo,tf,1e-2,1e-3,default,nhcw-nhwc
+distilbert-base-uncased,mhlo,tf,1e-2,1e-3,default,None
+facebook/convnext-tiny-224,mhlo,tf,1e-2,1e-3,tf_vit,nhcw-nhwc,
+funnel-transformer/small,mhlo,tf,1e-2,1e-3,default,None
+google/electra-small-discriminator,mhlo,tf,1e-2,1e-3,default,None
+google/mobilebert-uncased,mhlo,tf,1e-2,1e-3,default,None
+google/vit-base-patch16-224,mhlo,tf,1e-2,1e-3,tf_vit,nhcw-nhwc
+hf-internal-testing/tiny-random-flaubert,mhlo,tf,1e-2,1e-3,default,None
+microsoft/MiniLM-L12-H384-uncased,mhlo,tf,1e-2,1e-3,tf_hf,None
+microsoft/layoutlm-base-uncased,mhlo,tf,1e-2,1e-3,default,None
+microsoft/mpnet-base,mhlo,tf,1e-2,1e-2,default,None
+albert-base-v2,linalg,torch,1e-2,1e-3,default,None
+alexnet,linalg,torch,1e-2,1e-3,default,None
+bert-base-cased,linalg,torch,1e-2,1e-3,default,None
+bert-base-uncased,linalg,torch,1e-2,1e-3,default,None
+facebook/deit-small-distilled-patch16-224,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
+google/vit-base-patch16-224,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
+microsoft/beit-base-patch16-224-pt22k-ft22k,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
+microsoft/MiniLM-L12-H384-uncased,linalg,torch,1e-2,1e-3,default,None
+microsoft/resnet-50,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
+google/mobilebert-uncased,linalg,torch,1e-2,1e-3,default,None
+mobilenet_v3_small,linalg,torch,1e-1,1e-2,default,nhcw-nhwc
+nvidia/mit-b0,linalg,torch,1e-2,1e-3,default,None
+resnet101,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
+resnet18,linalg,torch,1e-2,1e-3,default,None
+resnet50,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
+squeezenet1_0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
+wide_resnet50_2,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
+efficientnet-v2-s,mhlo,tf,1e-02,1e-3,default,nhcw-nhwc
+mnasnet1_0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc
--- a/tank/test_models.py
+++ b/tank/test_models.py
@@ -39,10 +39,6 @@ def load_csv_and_convert(filename, gen=False):
                    "atol": float(row[4]),
                    "out_type": row[5],
                    "flags": row[6],
-                    "xfail_cpu": row[7],
-                    "xfail_cuda": row[8],
-                    "xfail_vkm": row[9],
-                    "xfail_reason": row[10],
                }
            )
    # This is a pytest workaround
@@ -178,25 +174,10 @@ class SharkModuleTester:
            if self.ci == True:
                self.upload_repro()
            if self.benchmark == True:
-                # p = multiprocessing.Process(
-                #    target=self.benchmark_module,
-                #    args=(shark_module, inputs, dynamic, device),
-                # )
-                # p.start()
-                # p.join()
                self.benchmark_module(shark_module, inputs, dynamic, device)
            raise

        if self.benchmark == True:
-            # We must create a new process each time we benchmark a model to allow
-            # for Tensorflow to release GPU resources. Using the same process to
-            # benchmark multiple models leads to OOM.
-            # p = multiprocessing.Process(
-            #    target=self.benchmark_module,
-            #    args=(shark_module, inputs, dynamic, device),
-            # )
-            # p.start()
-            # p.join()
            self.benchmark_module(shark_module, inputs, dynamic, device)

        if self.save_repro == True:
@@ -252,6 +233,16 @@ class SharkModuleTester:
        return expected, logits


+def run_test(module_tester, dynamic, device):
+    tempdir = tempfile.TemporaryDirectory(
+        prefix=module_tester.tmp_prefix, dir="./shark_tmp/"
+    )
+    module_tester.temp_dir = tempdir.name
+
+    with ireec.tools.TempFileSaver(tempdir.name):
+        module_tester.create_and_check_module(dynamic, device)
+
+
 class SharkModuleTest(unittest.TestCase):
    @pytest.fixture(autouse=True)
    def configure(self, pytestconfig):
@@ -282,21 +273,15 @@ class SharkModuleTest(unittest.TestCase):
            "update_tank"
        )
        self.module_tester.tank_url = self.pytestconfig.getoption("tank_url")
-
-        # Add pytest xfail marks based on entries to tank/all_models.csv.
-        # if config["framework"] == "torch":
-        #    pytest.xfail(reason="roll pytorch issue")
-
-        if config["xfail_cpu"] == "True" and device == "cpu":
-            pytest.xfail(reason=config["xfail_reason"])
-
-        if config["xfail_cuda"] == "True" and device == "cuda":
-            pytest.xfail(reason=config["xfail_reason"])
-
-        if config["xfail_vkm"] == "True" and device in ["metal", "vulkan"]:
-            pytest.xfail(reason=config["xfail_reason"])
-
-        # Special cases that need to be marked.
+        if config["model_name"] == "efficientnet-v2-s" and device in [
+            "metal",
+            "vulkan",
+        ]:
+            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/575")
+        if config[
+            "model_name"
+        ] == "google/vit-base-patch16-224" and device in ["cuda"]:
+            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/311")
        if config["model_name"] == "resnet50" and device in [
            "metal",
            "vulkan",
@@ -306,6 +291,78 @@ class SharkModuleTest(unittest.TestCase):
                    pytest.xfail(
                        reason="M2: Assert Error & M1: CompilerToolError"
                    )
+        if config[
+            "model_name"
+        ] == "dbmdz/convbert-base-turkish-cased" and device in [
+            "metal",
+            "vulkan",
+        ]:
+            pytest.xfail(
+                reason="Issue: https://github.com/iree-org/iree/issues/9971"
+            )
+        if config["model_name"] == "facebook/convnext-tiny-224" and device in [
+            "cuda",
+            "metal",
+            "vulkan",
+        ]:
+            pytest.xfail(
+                reason="https://github.com/nod-ai/SHARK/issues/311, https://github.com/nod-ai/SHARK/issues/342"
+            )
+        if config["model_name"] == "funnel-transformer/small" and device in [
+            "cuda",
+            "metal",
+            "vulkan",
+        ]:
+            pytest.xfail(
+                reason="failing in the iree-compiler passes, see https://github.com/nod-ai/SHARK/issues/201"
+            )
+        if config["model_name"] == "nvidia/mit-b0":
+            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/343")
+        if (
+            config["model_name"] == "google/mobilebert-uncased"
+            and device in ["metal", "vulkan"]
+            and config["framework"] == "torch"
+        ):
+            pytest.xfail(
+                reason="Numerics issues -- https://github.com/nod-ai/SHARK/issues/344"
+            )
+        if (
+            config["model_name"] == "facebook/deit-small-distilled-patch16-224"
+            and device == "cuda"
+        ):
+            pytest.xfail(
+                reason="Fails during iree-compile without reporting diagnostics."
+            )
+        if (
+            config["model_name"]
+            == "microsoft/beit-base-patch16-224-pt22k-ft22k"
+            and device == "cuda"
+        ):
+            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/390")
+        if config["model_name"] == "squeezenet1_0" and device in [
+            "metal",
+            "vulkan",
+        ]:
+            pytest.xfail(
+                reason="Numerics Issues: https://github.com/nod-ai/SHARK/issues/388"
+            )
+        if config["model_name"] == "mobilenet_v3_small" and device not in [
+            "cpu"
+        ]:
+            pytest.xfail(
+                reason="Numerics Issues: https://github.com/nod-ai/SHARK/issues/388"
+            )
+        if config["model_name"] == "mnasnet1_0" and device not in [
+            "cpu",
+            "cuda",
+        ]:
+            pytest.xfail(
+                reason="Numerics Issues: https://github.com/nod-ai/SHARK/issues/388"
+            )
+        if config["model_name"] == "hf-internal-testing/tiny-random-flaubert":
+            pytest.xfail(reason="Transformers API mismatch")
+        if config["model_name"] == "alexnet" and device in ["metal", "vulkan"]:
+            pytest.xfail(reason="Assertion Error: Zeros Output")
        if (
            config["model_name"] == "camembert-base"
            and dynamic == False
@@ -322,6 +379,19 @@ class SharkModuleTest(unittest.TestCase):
            pytest.xfail(
                reason="chlo.broadcast_compare failed to satify constraint"
            )
+        if config["model_name"] in [
+            "microsoft/MiniLM-L12-H384-uncased",
+            "wide_resnet50_2",
+            "resnet50",
+            "resnet18",
+            "resnet101",
+            "microsoft/resnet-50",
+        ] and device in ["metal", "vulkan"]:
+            pytest.xfail(reason="Vulkan Numerical Error (mostly conv)")
+        if config[
+            "model_name"
+        ] == "dbmdz/convbert-base-turkish-cased" and device in ["cuda", "cpu"]:
+            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/463")
        if (
            config["model_name"]
            in [
@@ -344,6 +414,11 @@ class SharkModuleTest(unittest.TestCase):
            pytest.xfail(
                reason="Numerics issues: https://github.com/nod-ai/SHARK/issues/476"
            )
+        if config["framework"] == "tf" and dynamic == True:
+            pytest.skip(
+                reason="Dynamic shapes not supported for this framework."
+            )
+
        safe_name = (
            f"{config['model_name']}_{config['framework']}_{dynamic}_{device}"
        )
@@ -352,10 +427,11 @@ class SharkModuleTest(unittest.TestCase):
        if not os.path.isdir("./shark_tmp/"):
            os.mkdir("./shark_tmp/")

-        tempdir = tempfile.TemporaryDirectory(
-            prefix=self.module_tester.tmp_prefix, dir="./shark_tmp/"
+        # We must create a new process each time we benchmark a model to allow
+        # for Tensorflow to release GPU resources. Using the same process to
+        # benchmark multiple models leads to OOM.
+        p = multiprocessing.Process(
+            target=run_test, args=(self.module_tester, dynamic, device)
        )
-        self.module_tester.temp_dir = tempdir.name
-
-        with ireec.tools.TempFileSaver(tempdir.name):
-            self.module_tester.create_and_check_module(dynamic, device)
+        p.start()
+        p.join()