Add tf image classification auto model tests (#216)

2026-04-03 03:00:17 -04:00 · 2022-07-27 18:38:13 -07:00
parent 4e5c592094
commit 2c7d879a4e
8 changed files with 287 additions and 227 deletions
--- a/generate_sharktank.py
+++ b/generate_sharktank.py
@@ -4,15 +4,11 @@
 # will generate local shark tank folder like this:
 #   /SHARK
 #     /gen_shark_tank
-#       /tflite
-#         /albert_lite_base
-#         /...model_name...
-#       /tf
-#       /pytorch
+#       /albert_lite_base
+#       /...model_name...
 #

 import os
-import urllib.request
 import csv
 import argparse
 from shark.shark_importer import SharkImporter
@@ -35,7 +31,6 @@ WORKDIR = os.path.join(os.path.dirname(__file__), "gen_shark_tank")
 def save_torch_model(torch_model_list):
    from tank.model_utils import get_hf_model
    from tank.model_utils import get_vision_model
-    import torch

    with open(torch_model_list) as csvfile:
        torch_reader = csv.reader(csvfile, delimiter=",")
@@ -79,8 +74,8 @@ def save_torch_model(torch_model_list):


 def save_tf_model(tf_model_list):
-    from tank.masked_lm_tf import get_causal_lm_model
-    from tank.tf.automodelimageclassification import get_causal_image_model
+    from tank.model_utils_tf import get_causal_lm_model
+    from tank.model_utils_tf import get_causal_image_model

    with open(tf_model_list) as csvfile:
        tf_reader = csv.reader(csvfile, delimiter=",")
@@ -187,8 +182,8 @@ if __name__ == "__main__":
    if args.tf_model_csv:
        save_tf_model(args.tf_model_csv)

-    # if args.tflite_model_csv:
-    #     save_tflite_model(args.tflite_model_csv)
+    if args.tflite_model_csv:
+        save_tflite_model(args.tflite_model_csv)

    if args.upload:
        print("uploading files to gs://shark_tank/")
--- a/shark/shark_downloader.py
+++ b/shark/shark_downloader.py
@@ -129,7 +129,7 @@ def download_tf_model(model_name):
            raise Exception("model not present in the tank. Contact Nod Admin")

    model_dir = os.path.join(WORKDIR, model_dir_name)
-    with open(os.path.join(model_dir, model_name + ".mlir")) as f:
+    with open(os.path.join(model_dir, model_name + "_tf.mlir")) as f:
        mlir_file = f.read()

    function_name = str(np.load(os.path.join(model_dir, "function_name.npy")))
--- a/tank/facebook_convnext-tiny-224_tf/facebook_convnext-tiny-224_tf_test.py
+++ b/tank/facebook_convnext-tiny-224_tf/facebook_convnext-tiny-224_tf_test.py
@@ -0,0 +1,73 @@
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model
+
+import unittest
+import pytest
+import numpy as np
+
+
+class ConvNextTinyModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model, func_name, inputs, golden_out = download_tf_model(
+            "facebook/convnext-tiny-224"
+        )
+
+        shark_module = SharkInference(
+            model, func_name, device=device, mlir_dialect="mhlo"
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+        #  result: array([['logits',
+        #         <IREE DeviceArray: shape=[1, 1000], dtype=<class 'numpy.float32'>>]],
+        #       dtype=object)
+
+        # post process of img output
+        ir_device_array = result[0][1]
+        logits = ir_device_array.astype(ir_device_array.dtype)
+        logits = np.squeeze(logits, axis=0)
+        print("logits: ", logits.shape)
+        print("golden_out: ", golden_out[0].shape)
+        print(np.allclose(golden_out[0], logits, rtol=1e-02, atol=1e-03))
+
+
+class ConvNextTinyModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = ConvNextTinyModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    # dynamic = False
+    # device = "cpu"
+    # module_tester = ConvNextTinyModuleTester()
+    # module_tester.create_and_check_module(dynamic, device)
+    unittest.main()
--- a/tank/google_vit-base-patch16-224_tf/google_vit-base-patch16-224_tf_test.py
+++ b/tank/google_vit-base-patch16-224_tf/google_vit-base-patch16-224_tf_test.py
@@ -0,0 +1,70 @@
+from shark.iree_utils._common import check_device_drivers, device_driver_info
+from shark.shark_inference import SharkInference
+from shark.shark_downloader import download_tf_model
+
+import unittest
+import pytest
+import numpy as np
+
+
+class VitBaseModuleTester:
+    def __init__(
+        self,
+        benchmark=False,
+    ):
+        self.benchmark = benchmark
+
+    def create_and_check_module(self, dynamic, device):
+        model, func_name, inputs, golden_out = download_tf_model(
+            "google/vit-base-patch16-224"
+        )
+
+        shark_module = SharkInference(
+            model, func_name, device=device, mlir_dialect="mhlo"
+        )
+        shark_module.compile()
+        result = shark_module.forward(inputs)
+
+        # post process of img output
+        ir_device_array = result[0][1]
+        logits = ir_device_array.astype(ir_device_array.dtype)
+        logits = np.squeeze(logits, axis=0)
+        print("logits: ", logits.shape)
+        print("golden_out: ", golden_out[0].shape)
+        print(np.allclose(golden_out[0], logits, rtol=1e-02, atol=1e-03))
+
+
+class VitBaseModuleTest(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def configure(self, pytestconfig):
+        self.module_tester = VitBaseModuleTester(self)
+        self.module_tester.benchmark = pytestconfig.getoption("benchmark")
+
+    def test_module_static_cpu(self):
+        dynamic = False
+        device = "cpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("gpu"), reason=device_driver_info("gpu")
+    )
+    def test_module_static_gpu(self):
+        dynamic = False
+        device = "gpu"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+    @pytest.mark.skipif(
+        check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
+    )
+    def test_module_static_vulkan(self):
+        dynamic = False
+        device = "vulkan"
+        self.module_tester.create_and_check_module(dynamic, device)
+
+
+if __name__ == "__main__":
+    dynamic = False
+    device = "cpu"
+    module_tester = VitBaseModuleTester()
+    module_tester.create_and_check_module(dynamic, device)
+    # unittest.main()
--- a/tank/masked_lm_tf.py
+++ b/tank/masked_lm_tf.py
@@ -1,63 +0,0 @@
-from transformers import TFAutoModelForMaskedLM, AutoTokenizer
-import tensorflow as tf
-
-visible_default = tf.config.list_physical_devices("GPU")
-try:
-    tf.config.set_visible_devices([], "GPU")
-    visible_devices = tf.config.get_visible_devices()
-    for device in visible_devices:
-        assert device.device_type != "GPU"
-except:
-    # Invalid device or cannot modify virtual devices once initialized.
-    pass
-
-# The max_sequence_length is set small for testing purpose.
-BATCH_SIZE = 1
-MAX_SEQUENCE_LENGTH = 16
-
-# Create a set of input signature.
-inputs_signature = [
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-]
-
-# For supported models please see here:
-# https://huggingface.co/docs/transformers/model_doc/auto#transformers.TFAutoModelForCasualLM
-
-
-def preprocess_input(
-    model_name, text="This is just used to compile the model"
-):
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    inputs = tokenizer(
-        text,
-        padding="max_length",
-        return_tensors="tf",
-        truncation=True,
-        max_length=MAX_SEQUENCE_LENGTH,
-    )
-    return inputs
-
-
-class MaskedLM(tf.Module):
-    def __init__(self, model_name):
-        super(MaskedLM, self).__init__()
-        self.m = TFAutoModelForMaskedLM.from_pretrained(
-            model_name, output_attentions=False, num_labels=2
-        )
-        self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)[0]
-
-    @tf.function(input_signature=inputs_signature)
-    def forward(self, input_ids, attention_mask):
-        return self.m.predict(input_ids, attention_mask)
-
-
-def get_causal_lm_model(hf_name, text="Hello, this is the default text."):
-    #    gpus = tf.config.experimental.list_physical_devices("GPU")
-    #    for gpu in gpus:
-    #        tf.config.experimental.set_memory_growth(gpu, True)
-    model = MaskedLM(hf_name)
-    encoded_input = preprocess_input(hf_name, text)
-    test_input = (encoded_input["input_ids"], encoded_input["attention_mask"])
-    actual_out = model.forward(*test_input)
-    return model, test_input, actual_out
--- a/tank/model_utils_tf.py
+++ b/tank/model_utils_tf.py
@@ -1,5 +1,3 @@
-from shark.shark_inference import SharkInference
-
 import tensorflow as tf
 import numpy as np
 from transformers import (
@@ -81,3 +79,140 @@ def compare_tensors_tf(tf_tensor, numpy_tensor):
    atol = 1e-03
    tf_to_numpy = tf_tensor.numpy()
    return np.allclose(tf_to_numpy, numpy_tensor, rtol, atol)
+
+
+##################### Tensorflow Hugging Face Masked LM Models ###################################
+from transformers import TFAutoModelForMaskedLM, AutoTokenizer
+import tensorflow as tf
+
+visible_default = tf.config.list_physical_devices("GPU")
+try:
+    tf.config.set_visible_devices([], "GPU")
+    visible_devices = tf.config.get_visible_devices()
+    for device in visible_devices:
+        assert device.device_type != "GPU"
+except:
+    # Invalid device or cannot modify virtual devices once initialized.
+    pass
+
+# The max_sequence_length is set small for testing purpose.
+BATCH_SIZE = 1
+MAX_SEQUENCE_LENGTH = 16
+
+# Create a set of input signature.
+inputs_signature = [
+    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
+    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
+]
+
+# For supported models please see here:
+# https://huggingface.co/docs/transformers/model_doc/auto#transformers.TFAutoModelForCasualLM
+
+
+def preprocess_input(
+    model_name, text="This is just used to compile the model"
+):
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    inputs = tokenizer(
+        text,
+        padding="max_length",
+        return_tensors="tf",
+        truncation=True,
+        max_length=MAX_SEQUENCE_LENGTH,
+    )
+    return inputs
+
+
+class MaskedLM(tf.Module):
+    def __init__(self, model_name):
+        super(MaskedLM, self).__init__()
+        self.m = TFAutoModelForMaskedLM.from_pretrained(
+            model_name, output_attentions=False, num_labels=2
+        )
+        self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)[0]
+
+    @tf.function(input_signature=inputs_signature)
+    def forward(self, input_ids, attention_mask):
+        return self.m.predict(input_ids, attention_mask)
+
+
+def get_causal_lm_model(hf_name, text="Hello, this is the default text."):
+    #    gpus = tf.config.experimental.list_physical_devices("GPU")
+    #    for gpu in gpus:
+    #        tf.config.experimental.set_memory_growth(gpu, True)
+    model = MaskedLM(hf_name)
+    encoded_input = preprocess_input(hf_name, text)
+    test_input = (encoded_input["input_ids"], encoded_input["attention_mask"])
+    actual_out = model.forward(*test_input)
+    return model, test_input, actual_out
+
+
+##################### Tensorflow Hugging Face  Image Classification Models ###################################
+from transformers import TFAutoModelForImageClassification
+from transformers import ConvNextFeatureExtractor, ViTFeatureExtractor
+from transformers import BeitFeatureExtractor, AutoFeatureExtractor
+import tensorflow as tf
+from PIL import Image
+import requests
+
+# Create a set of input signature.
+inputs_signature = [
+    tf.TensorSpec(shape=[1, 3, 224, 224], dtype=tf.float32),
+]
+
+
+class AutoModelImageClassfication(tf.Module):
+    def __init__(self, model_name):
+        super(AutoModelImageClassfication, self).__init__()
+        self.m = TFAutoModelForImageClassification.from_pretrained(
+            model_name, output_attentions=False
+        )
+        self.m.predict = lambda x: self.m(x)
+
+    @tf.function(input_signature=inputs_signature)
+    def forward(self, inputs):
+        return self.m.predict(inputs)
+
+
+fail_models = [
+    "facebook/data2vec-vision-base-ft1k",
+    "microsoft/swin-tiny-patch4-window7-224",
+]
+
+supported_models = [
+    "facebook/convnext-tiny-224",
+    "google/vit-base-patch16-224",
+]
+
+img_models_fe_dict = {
+    "facebook/convnext-tiny-224": ConvNextFeatureExtractor,
+    "facebook/data2vec-vision-base-ft1k": BeitFeatureExtractor,
+    "microsoft/swin-tiny-patch4-window7-224": AutoFeatureExtractor,
+    "google/vit-base-patch16-224": ViTFeatureExtractor,
+}
+
+
+def preprocess_input_image(model_name):
+    # from datasets import load_dataset
+    # dataset = load_dataset("huggingface/cats-image")
+    # image1 = dataset["test"]["image"][0]
+    # # print("image1: ", image1) # <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x480 at 0x7FA0B86BB6D0>
+    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+    # <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x480 at 0x7FA0B86BB6D0>
+    image = Image.open(requests.get(url, stream=True).raw)
+    feature_extractor = img_models_fe_dict[model_name].from_pretrained(
+        model_name
+    )
+    # inputs: {'pixel_values': <tf.Tensor: shape=(1, 3, 224, 224), dtype=float32, numpy=array([[[[]]]], dtype=float32)>}
+    inputs = feature_extractor(images=image, return_tensors="tf")
+
+    return [inputs[str(*inputs)]]
+
+
+def get_causal_image_model(hf_name):
+    model = AutoModelImageClassfication(hf_name)
+    test_input = preprocess_input_image(hf_name)
+    # TFSequenceClassifierOutput(loss=None, logits=<tf.Tensor: shape=(1, 1000), dtype=float32, numpy=
+    # array([[]], dtype=float32)>, hidden_states=None, attentions=None)
+    actual_out = model.forward(*test_input)
+    return model, test_input, actual_out
--- a/tank/tf/automodelimageclassification.py
+++ b/tank/tf/automodelimageclassification.py
@@ -1,97 +0,0 @@
-from transformers import TFAutoModelForImageClassification
-from transformers import ConvNextFeatureExtractor, ViTFeatureExtractor
-from transformers import BeitFeatureExtractor, AutoFeatureExtractor
-import tensorflow as tf
-from PIL import Image
-import requests
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_tf_model
-
-# Create a set of input signature.
-inputs_signature = [
-    tf.TensorSpec(shape=[1, 3, 224, 224], dtype=tf.float32),
-]
-
-
-class AutoModelImageClassfication(tf.Module):
-    def __init__(self, model_name):
-        super(AutoModelImageClassfication, self).__init__()
-        self.m = TFAutoModelForImageClassification.from_pretrained(
-            model_name, output_attentions=False
-        )
-        self.m.predict = lambda x: self.m(x)
-
-    @tf.function(input_signature=inputs_signature)
-    def forward(self, inputs):
-        return self.m.predict(inputs)
-
-
-fail_models = [
-    "facebook/data2vec-vision-base-ft1k",
-    "microsoft/swin-tiny-patch4-window7-224",
-]
-
-supported_models = [
-    # "facebook/convnext-tiny-224",
-    "google/vit-base-patch16-224",
-]
-
-img_models_fe_dict = {
-    "facebook/convnext-tiny-224": ConvNextFeatureExtractor,
-    "facebook/data2vec-vision-base-ft1k": BeitFeatureExtractor,
-    "microsoft/swin-tiny-patch4-window7-224": AutoFeatureExtractor,
-    "google/vit-base-patch16-224": ViTFeatureExtractor,
-}
-
-
-def preprocess_input_image(model_name):
-    # from datasets import load_dataset
-    # dataset = load_dataset("huggingface/cats-image")
-    # image1 = dataset["test"]["image"][0]
-    # # print("image1: ", image1) # <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x480 at 0x7FA0B86BB6D0>
-    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
-    # <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x480 at 0x7FA0B86BB6D0>
-    image = Image.open(requests.get(url, stream=True).raw)
-    feature_extractor = img_models_fe_dict[model_name].from_pretrained(
-        model_name
-    )
-    # inputs: {'pixel_values': <tf.Tensor: shape=(1, 3, 224, 224), dtype=float32, numpy=array([[[[]]]], dtype=float32)>}
-    inputs = feature_extractor(images=image, return_tensors="tf")
-
-    return [inputs[str(*inputs)]]
-
-
-def get_causal_image_model(hf_name):
-    model = AutoModelImageClassfication(hf_name)
-    test_input = preprocess_input_image(hf_name)
-    # TFSequenceClassifierOutput(loss=None, logits=<tf.Tensor: shape=(1, 1000), dtype=float32, numpy=
-    # array([[]], dtype=float32)>, hidden_states=None, attentions=None)
-    actual_out = model.forward(*test_input)
-    return model, test_input, actual_out
-
-
-if __name__ == "__main__":
-    for model_name in supported_models:
-        print(f"Running model: {model_name}")
-        inputs = preprocess_input_image(model_name)
-        model = AutoModelImageClassfication(model_name)
-
-        # 1. USE SharkImporter to get the mlir
-        # from shark.shark_importer import SharkImporter
-        # mlir_importer = SharkImporter(
-        #     model,
-        #     inputs,
-        #     frontend="tf",
-        # )
-        # imported_mlir, func_name = mlir_importer.import_mlir()
-
-        # 2. USE SharkDownloader to get the mlir
-        imported_mlir, func_name, inputs, golden_out = download_tf_model(
-            model_name
-        )
-
-        shark_module = SharkInference(
-            imported_mlir, func_name, device="cpu", mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        shark_module.forward(inputs)
--- a/tank/tf/automodelmaskedlm.py
+++ b/tank/tf/automodelmaskedlm.py
@@ -1,53 +0,0 @@
-from transformers import TFAutoModelForMaskedLM
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-
-# Create a set of input signature.
-inputs_signature = [
-    tf.TensorSpec(shape=[1, 512], dtype=tf.int32),
-]
-
-
-class AutoModelMaskedLM(tf.Module):
-    def __init__(self, model_name):
-        super(AutoModelMaskedLM, self).__init__()
-        self.m = TFAutoModelForMaskedLM.from_pretrained(
-            model_name, output_attentions=False
-        )
-        self.m.predict = lambda x: self.m(input_ids=x)
-
-    @tf.function(input_signature=inputs_signature)
-    def forward(self, input_ids):
-        return self.m.predict(input_ids)
-
-
-fail_models = ["microsoft/deberta-base", "google/rembert", "google/tapas-base"]
-
-supported_models = [
-    "albert-base-v2",
-    "bert-base-uncased",
-    "camembert-base",
-    "dbmdz/convbert-base-turkish-cased",
-    "distilbert-base-uncased",
-    "google/electra-small-discriminator",
-    "hf-internal-testing/tiny-random-flaubert",
-    "funnel-transformer/small",
-    "microsoft/layoutlm-base-uncased",
-    "allenai/longformer-base-4096",
-    "google/mobilebert-uncased",
-    "microsoft/mpnet-base",
-    "roberta-base",
-    "xlm-roberta-base",
-]
-
-if __name__ == "__main__":
-    inputs = tf.random.uniform(
-        shape=[1, 512], maxval=3, dtype=tf.int32, seed=10
-    )
-
-    for model_name in supported_models:
-        print(f"Running model: {model_name}")
-        shark_module = SharkInference(AutoModelMaskedLM(model_name), (inputs,))
-        shark_module.set_frontend("tensorflow")
-        shark_module.compile()
-        print(shark_module.forward((inputs,)))