Add pytest option for updating tank and fix save_mlir function. (#413)

* Use IREE tf tools to save .mlir modules when generating shark_tank. * Add option to pytest for enabling auto-updates to local shark tank. * xfail mobilenet torch on cpu, cuda and fix CI macos setup * Update test-models.yml to disable macos vulkan CI.
2026-01-10 06:17:55 -05:00 · 2022-10-25 10:59:18 -05:00
parent f97b8fffed
commit 9956099516
9 changed files with 67 additions and 40 deletions
--- a/.github/workflows/test-models.yml
+++ b/.github/workflows/test-models.yml
@@ -36,6 +36,8 @@ jobs:
            suite: cuda
          - os: ubuntu-latest
            suite: cpu
+          - os: MacStudio
+            suite: vulkan
          - os: MacStudio
            suite: cuda
          - os: MacStudio
@@ -96,7 +98,7 @@ jobs:
        cd $GITHUB_WORKSPACE
        PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest --benchmark --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k cpu
+        pytest --benchmark --ci --ci_sha=${SHORT_SHA} -s --local_tank_cache="/data/anush/shark_cache" tank/test_models.py -k cpu --update_tank
        gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv
        gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cpu_latest.csv

@@ -106,7 +108,7 @@ jobs:
        cd $GITHUB_WORKSPACE
        PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest --benchmark --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k cuda
+        pytest --benchmark --ci --ci_sha=${SHORT_SHA} -s --local_tank_cache="/data/anush/shark_cache" tank/test_models.py -k cuda --update_tank
        gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv
        gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cuda_latest.csv

@@ -114,7 +116,7 @@ jobs:
      if: matrix.suite == 'vulkan' && matrix.os == 'MacStudio'
      run: |
        cd $GITHUB_WORKSPACE
-        PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
+        PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
        echo "VULKAN SDK PATH wo setup: $VULKAN_SDK"
        cd /Users/anush/VulkanSDK/1.3.224.1/
@@ -123,13 +125,7 @@ jobs:
        echo "VULKAN SDK PATH with setup: $VULKAN_SDK"
        echo $PATH
        pip list | grep -E "torch|iree"
-        pip uninstall -y torch iree-compiler iree-runtime
-        pip install https://download.pytorch.org/whl/nightly/cpu/torch-1.14.0.dev20221010-cp310-none-macosx_11_0_arm64.whl
-        pip install https://github.com/llvm/torch-mlir/releases/download/oneshot-20221011.55/torch_mlir-20221011.55-cp310-cp310-macosx_11_0_universal2.whl
-        pip install https://github.com/nod-ai/SHARK-Runtime/releases/download/candidate-20221011.179/iree_compiler-20221011.179-cp310-cp310-macosx_11_0_universal2.whl
-        pip install https://github.com/nod-ai/SHARK-Runtime/releases/download/candidate-20221011.179/iree_runtime-20221011.179-cp310-cp310-macosx_11_0_universal2.whl
-        pip list | grep -E "torch|iree"
-        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/Volumes/builder/anush" tank/test_models.py -k vulkan
+        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/Volumes/builder/anush/shark_cache" tank/test_models.py -k vulkan --update_tank

    - name: Validate Vulkan Models (a100)
      if: matrix.suite == 'vulkan' && matrix.os != 'MacStudio'
@@ -137,4 +133,4 @@ jobs:
        cd $GITHUB_WORKSPACE
        PYTHON=python${{ matrix.python-version }} BENCHMARK=1 IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="/data/anush" tank/test_models.py -k vulkan
+        pytest --ci --ci_sha=${SHORT_SHA} -s --local_tank_cache="/data/anush/shark_cache" tank/test_models.py -k vulkan --update_tank
--- a/conftest.py
+++ b/conftest.py
@@ -36,6 +36,12 @@ def pytest_addoption(parser):
        default="False",
        help="Enables uploading of reproduction artifacts upon test case failure during iree-compile or validation. Must be passed with --ci_sha option ",
    )
+    parser.addoption(
+        "--update_tank",
+        action="store_true",
+        default="False",
+        help="Update local shark tank with latest artifacts.",
+    )
    parser.addoption(
        "--ci_sha",
        action="store",
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,9 +4,9 @@ requires = [
    "wheel",
    "packaging",

-    "numpy==1.22.4",
-    "torch-mlir>=20220428.420",
-    "iree-compiler>=20220427.13",
-    "iree-runtime>=20220427.13",
+    "numpy>=1.22.4",
+    "torch-mlir>=20221021.633",
+    "iree-compiler>=20221022.190",
+    "iree-runtime>=20221022.190",
 ]
 build-backend = "setuptools.build_meta"
--- a/requirements-importer-macos.txt
+++ b/requirements-importer-macos.txt
@@ -1,8 +1,8 @@
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
+-f https://download.pytorch.org/whl/nightly/cpu/
 --pre

 numpy
-torch
+torch==1.14.0.dev20221021
 torchvision

 tqdm
--- a/requirements-importer.txt
+++ b/requirements-importer.txt
@@ -32,7 +32,6 @@ lit
 pyyaml
 python-dateutil
 sacremoses
-chardet

 # web dependecies.
 gradio
--- a/setup.py
+++ b/setup.py
@@ -10,8 +10,8 @@ PACKAGE_VERSION = os.environ.get("SHARK_PACKAGE_VERSION") or "0.0.4"
 backend_deps = []
 if "NO_BACKEND" in os.environ.keys():
    backend_deps = [
-        "iree-compiler>=20220427.13",
-        "iree-runtime>=20220427.13",
+        "iree-compiler>=20221022.190",
+        "iree-runtime>=20221022.190",
    ]

 setup(
@@ -33,11 +33,11 @@ setup(
        "Operating System :: OS Independent",
    ],
    packages=find_packages(exclude=("examples")),
-    python_requires=">=3.7",
+    python_requires=">=3.9",
    install_requires=[
        "numpy",
        "PyYAML",
-        "torch-mlir>=20220428.420",
+        "torch-mlir>=20221021.633",
    ]
    + backend_deps,
 )
--- a/setup_venv.sh
+++ b/setup_venv.sh
@@ -76,11 +76,15 @@ fi
 $PYTHON -m pip install --upgrade pip || die "Could not upgrade pip"
 $PYTHON -m pip install --upgrade -r "$TD/requirements.txt"
 if [ "$torch_mlir_bin" = true ]; then
-  $PYTHON -m pip install --pre torch-mlir -f https://llvm.github.io/torch-mlir/package-index/
-  if [ $? -eq 0 ];then
-    echo "Successfully Installed torch-mlir"
+  if [[ $(uname -s) = 'Darwin' ]]; then
+    echo "MacOS detected. Please install torch-mlir from source or .whl, as dependency problems may occur otherwise."
  else
-    echo "Could not install torch-mlir" >&2
+    $PYTHON -m pip install --pre torch-mlir -f https://llvm.github.io/torch-mlir/package-index/
+    if [ $? -eq 0 ];then
+      echo "Successfully Installed torch-mlir"
+    else
+      echo "Could not install torch-mlir" >&2
+    fi
  fi
 else
  echo "${Red}No binaries found for Python $PYTHON_VERSION_X_Y on $(uname -s)"
@@ -109,6 +113,7 @@ if [[ ! -z "${IMPORTER}" ]]; then
    echo "${Yellow}macOS detected.. installing macOS importer tools"
    #Conda seems to have some problems installing these packages and hope they get resolved upstream.
    $PYTHON -m pip install --upgrade -r "$TD/requirements-importer-macos.txt" -f ${RUNTIME} --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+    $PYTHON -m pip install https://github.com/llvm/torch-mlir/releases/download/snapshot-20221024.636/torch_mlir-20221024.636-cp310-cp310-macosx_11_0_universal2.whl
  fi
 fi

--- a/shark/shark_importer.py
+++ b/shark/shark_importer.py
@@ -75,14 +75,17 @@ class SharkImporter:
            self.module, self.inputs, is_dynamic, tracing_required
        )

-    def _tf_mlir(self, func_name):
+    def _tf_mlir(self, func_name, save_dir="./shark_tmp/"):
        from iree.compiler import tf as tfc

        return tfc.compile_module(
-            self.module, exported_names=[func_name], import_only=True
+            self.module,
+            exported_names=[func_name],
+            import_only=True,
+            output_file=save_dir,
        )

-    def _tflite_mlir(self, func_name):
+    def _tflite_mlir(self, func_name, save_dir="./shark_tmp/"):
        from iree.compiler import tflite as tflitec
        from shark.iree_utils._common import IREE_TARGET_MAP

@@ -90,6 +93,7 @@ class SharkImporter:
            self.raw_model_file,  # in tflite, it is a path to .tflite file, not a tflite interpreter
            input_type="tosa",
            import_only=True,
+            output_file=save_dir,
        )
        return self.mlir_model

@@ -99,6 +103,7 @@ class SharkImporter:
        is_dynamic=False,
        tracing_required=False,
        func_name="forward",
+        save_dir="./shark_tmp/",
    ):
        if self.frontend in ["torch", "pytorch"]:
            if self.inputs == None:
@@ -108,10 +113,10 @@ class SharkImporter:
                sys.exit(1)
            return self._torch_mlir(is_dynamic, tracing_required), func_name
        if self.frontend in ["tf", "tensorflow"]:
-            return self._tf_mlir(func_name), func_name
+            return self._tf_mlir(func_name, save_dir), func_name
        if self.frontend in ["tflite", "tf-lite"]:
            func_name = "main"
-            return self._tflite_mlir(func_name), func_name
+            return self._tflite_mlir(func_name, save_dir), func_name

    # Converts the frontend specific tensors into np array.
    def convert_to_numpy(self, array_tuple: tuple):
@@ -130,7 +135,13 @@ class SharkImporter:
        outputs_name = "golden_out.npz"
        func_file_name = "function_name"
        model_name_mlir = model_name + "_" + self.frontend + ".mlir"
-        inputs = [x.cpu().detach() for x in inputs]
+        try:
+            inputs = [x.cpu().detach() for x in inputs]
+        except AttributeError:
+            try:
+                inputs = [x.numpy() for x in inputs]
+            except AttributeError:
+                inputs = [x for x in inputs]
        np.savez(os.path.join(dir, inputs_name), *inputs)
        np.savez(os.path.join(dir, outputs_name), *outputs)
        np.save(os.path.join(dir, func_file_name), np.array(func_name))
@@ -138,12 +149,8 @@ class SharkImporter:
        mlir_str = mlir_data
        if self.frontend == "torch":
            mlir_str = mlir_data.operation.get_asm()
-        elif self.frontend == "tf":
-            mlir_str = mlir_data.decode("latin-1")
-        elif self.frontend == "tflite":
-            mlir_str = mlir_data.decode("latin-1")
-        with open(os.path.join(dir, model_name_mlir), "w") as mlir_file:
-            mlir_file.write(mlir_str)
+            with open(os.path.join(dir, model_name_mlir), "w") as mlir_file:
+                mlir_file.write(mlir_str)

        return

@@ -160,9 +167,13 @@ class SharkImporter:
                f"There is no input provided: {self.inputs}, please provide inputs or simply run import_mlir."
            )
            sys.exit(1)
-
+        model_name_mlir = model_name + "_" + self.frontend + ".mlir"
+        artifact_path = os.path.join(dir, model_name_mlir)
        imported_mlir = self.import_mlir(
-            is_dynamic, tracing_required, func_name
+            is_dynamic,
+            tracing_required,
+            func_name,
+            save_dir=artifact_path,
        )
        # TODO: Make sure that any generic function name is accepted. Currently takes in the default function names.
        # TODO: Check for multiple outputs.
--- a/tank/test_models.py
+++ b/tank/test_models.py
@@ -131,6 +131,7 @@ class SharkModuleTester:

    def create_and_check_module(self, dynamic, device):
        shark_args.local_tank_cache = self.local_tank_cache
+        shark_args.update_tank = self.update_tank
        if self.config["framework"] == "tf":
            model, func_name, inputs, golden_out = download_tf_model(
                self.config["model_name"],
@@ -266,6 +267,9 @@ class SharkModuleTest(unittest.TestCase):
        self.module_tester.local_tank_cache = self.pytestconfig.getoption(
            "local_tank_cache"
        )
+        self.module_tester.update_tank = self.pytestconfig.getoption(
+            "update_tank"
+        )
        self.module_tester.tank_url = self.pytestconfig.getoption("tank_url")
        if (
            config["model_name"] == "distilbert-base-uncased"
@@ -350,6 +354,7 @@ class SharkModuleTest(unittest.TestCase):
        ):
            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/390")
        if config["model_name"] == "squeezenet1_0" and device in [
+            "cpu",
            "metal",
            "vulkan",
        ]:
@@ -392,6 +397,11 @@ class SharkModuleTest(unittest.TestCase):
            "microsoft/resnet-50",
        ] and device in ["metal", "vulkan"]:
            pytest.xfail(reason="Vulkan Numerical Error (mostly conv)")
+        if config["model_name"] == "mobilenet_v3_small" and device in [
+            "cuda",
+            "cpu",
+        ]:
+            pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/424")
        if config["framework"] == "tf" and dynamic == True:
            pytest.skip(
                reason="Dynamic shapes not supported for this framework."