Add tf image classification auto model tests (#216)

This commit is contained in:
Chi_Liu
2022-07-27 18:38:13 -07:00
committed by GitHub
parent 4e5c592094
commit 2c7d879a4e
8 changed files with 287 additions and 227 deletions

View File

@@ -4,15 +4,11 @@
# will generate local shark tank folder like this:
# /SHARK
# /gen_shark_tank
# /tflite
# /albert_lite_base
# /...model_name...
# /tf
# /pytorch
# /albert_lite_base
# /...model_name...
#
import os
import urllib.request
import csv
import argparse
from shark.shark_importer import SharkImporter
@@ -35,7 +31,6 @@ WORKDIR = os.path.join(os.path.dirname(__file__), "gen_shark_tank")
def save_torch_model(torch_model_list):
from tank.model_utils import get_hf_model
from tank.model_utils import get_vision_model
import torch
with open(torch_model_list) as csvfile:
torch_reader = csv.reader(csvfile, delimiter=",")
@@ -79,8 +74,8 @@ def save_torch_model(torch_model_list):
def save_tf_model(tf_model_list):
from tank.masked_lm_tf import get_causal_lm_model
from tank.tf.automodelimageclassification import get_causal_image_model
from tank.model_utils_tf import get_causal_lm_model
from tank.model_utils_tf import get_causal_image_model
with open(tf_model_list) as csvfile:
tf_reader = csv.reader(csvfile, delimiter=",")
@@ -187,8 +182,8 @@ if __name__ == "__main__":
if args.tf_model_csv:
save_tf_model(args.tf_model_csv)
# if args.tflite_model_csv:
# save_tflite_model(args.tflite_model_csv)
if args.tflite_model_csv:
save_tflite_model(args.tflite_model_csv)
if args.upload:
print("uploading files to gs://shark_tank/")

View File

@@ -129,7 +129,7 @@ def download_tf_model(model_name):
raise Exception("model not present in the tank. Contact Nod Admin")
model_dir = os.path.join(WORKDIR, model_dir_name)
with open(os.path.join(model_dir, model_name + ".mlir")) as f:
with open(os.path.join(model_dir, model_name + "_tf.mlir")) as f:
mlir_file = f.read()
function_name = str(np.load(os.path.join(model_dir, "function_name.npy")))

View File

@@ -0,0 +1,73 @@
from shark.iree_utils._common import check_device_drivers, device_driver_info
from shark.shark_inference import SharkInference
from shark.shark_downloader import download_tf_model
import unittest
import pytest
import numpy as np
class ConvNextTinyModuleTester:
def __init__(
self,
benchmark=False,
):
self.benchmark = benchmark
def create_and_check_module(self, dynamic, device):
model, func_name, inputs, golden_out = download_tf_model(
"facebook/convnext-tiny-224"
)
shark_module = SharkInference(
model, func_name, device=device, mlir_dialect="mhlo"
)
shark_module.compile()
result = shark_module.forward(inputs)
# result: array([['logits',
# <IREE DeviceArray: shape=[1, 1000], dtype=<class 'numpy.float32'>>]],
# dtype=object)
# post process of img output
ir_device_array = result[0][1]
logits = ir_device_array.astype(ir_device_array.dtype)
logits = np.squeeze(logits, axis=0)
print("logits: ", logits.shape)
print("golden_out: ", golden_out[0].shape)
print(np.allclose(golden_out[0], logits, rtol=1e-02, atol=1e-03))
class ConvNextTinyModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = ConvNextTinyModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
# dynamic = False
# device = "cpu"
# module_tester = ConvNextTinyModuleTester()
# module_tester.create_and_check_module(dynamic, device)
unittest.main()

View File

@@ -0,0 +1,70 @@
from shark.iree_utils._common import check_device_drivers, device_driver_info
from shark.shark_inference import SharkInference
from shark.shark_downloader import download_tf_model
import unittest
import pytest
import numpy as np
class VitBaseModuleTester:
def __init__(
self,
benchmark=False,
):
self.benchmark = benchmark
def create_and_check_module(self, dynamic, device):
model, func_name, inputs, golden_out = download_tf_model(
"google/vit-base-patch16-224"
)
shark_module = SharkInference(
model, func_name, device=device, mlir_dialect="mhlo"
)
shark_module.compile()
result = shark_module.forward(inputs)
# post process of img output
ir_device_array = result[0][1]
logits = ir_device_array.astype(ir_device_array.dtype)
logits = np.squeeze(logits, axis=0)
print("logits: ", logits.shape)
print("golden_out: ", golden_out[0].shape)
print(np.allclose(golden_out[0], logits, rtol=1e-02, atol=1e-03))
class VitBaseModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
self.module_tester = VitBaseModuleTester(self)
self.module_tester.benchmark = pytestconfig.getoption("benchmark")
def test_module_static_cpu(self):
dynamic = False
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
dynamic = False
device = "gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("vulkan"), reason=device_driver_info("vulkan")
)
def test_module_static_vulkan(self):
dynamic = False
device = "vulkan"
self.module_tester.create_and_check_module(dynamic, device)
if __name__ == "__main__":
dynamic = False
device = "cpu"
module_tester = VitBaseModuleTester()
module_tester.create_and_check_module(dynamic, device)
# unittest.main()

View File

@@ -1,63 +0,0 @@
from transformers import TFAutoModelForMaskedLM, AutoTokenizer
import tensorflow as tf
visible_default = tf.config.list_physical_devices("GPU")
try:
tf.config.set_visible_devices([], "GPU")
visible_devices = tf.config.get_visible_devices()
for device in visible_devices:
assert device.device_type != "GPU"
except:
# Invalid device or cannot modify virtual devices once initialized.
pass
# The max_sequence_length is set small for testing purpose.
BATCH_SIZE = 1
MAX_SEQUENCE_LENGTH = 16
# Create a set of input signature.
inputs_signature = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
]
# For supported models please see here:
# https://huggingface.co/docs/transformers/model_doc/auto#transformers.TFAutoModelForCasualLM
def preprocess_input(
model_name, text="This is just used to compile the model"
):
tokenizer = AutoTokenizer.from_pretrained(model_name)
inputs = tokenizer(
text,
padding="max_length",
return_tensors="tf",
truncation=True,
max_length=MAX_SEQUENCE_LENGTH,
)
return inputs
class MaskedLM(tf.Module):
def __init__(self, model_name):
super(MaskedLM, self).__init__()
self.m = TFAutoModelForMaskedLM.from_pretrained(
model_name, output_attentions=False, num_labels=2
)
self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)[0]
@tf.function(input_signature=inputs_signature)
def forward(self, input_ids, attention_mask):
return self.m.predict(input_ids, attention_mask)
def get_causal_lm_model(hf_name, text="Hello, this is the default text."):
# gpus = tf.config.experimental.list_physical_devices("GPU")
# for gpu in gpus:
# tf.config.experimental.set_memory_growth(gpu, True)
model = MaskedLM(hf_name)
encoded_input = preprocess_input(hf_name, text)
test_input = (encoded_input["input_ids"], encoded_input["attention_mask"])
actual_out = model.forward(*test_input)
return model, test_input, actual_out

View File

@@ -1,5 +1,3 @@
from shark.shark_inference import SharkInference
import tensorflow as tf
import numpy as np
from transformers import (
@@ -81,3 +79,140 @@ def compare_tensors_tf(tf_tensor, numpy_tensor):
atol = 1e-03
tf_to_numpy = tf_tensor.numpy()
return np.allclose(tf_to_numpy, numpy_tensor, rtol, atol)
##################### Tensorflow Hugging Face Masked LM Models ###################################
from transformers import TFAutoModelForMaskedLM, AutoTokenizer
import tensorflow as tf
visible_default = tf.config.list_physical_devices("GPU")
try:
tf.config.set_visible_devices([], "GPU")
visible_devices = tf.config.get_visible_devices()
for device in visible_devices:
assert device.device_type != "GPU"
except:
# Invalid device or cannot modify virtual devices once initialized.
pass
# The max_sequence_length is set small for testing purpose.
BATCH_SIZE = 1
MAX_SEQUENCE_LENGTH = 16
# Create a set of input signature.
inputs_signature = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
]
# For supported models please see here:
# https://huggingface.co/docs/transformers/model_doc/auto#transformers.TFAutoModelForCasualLM
def preprocess_input(
model_name, text="This is just used to compile the model"
):
tokenizer = AutoTokenizer.from_pretrained(model_name)
inputs = tokenizer(
text,
padding="max_length",
return_tensors="tf",
truncation=True,
max_length=MAX_SEQUENCE_LENGTH,
)
return inputs
class MaskedLM(tf.Module):
def __init__(self, model_name):
super(MaskedLM, self).__init__()
self.m = TFAutoModelForMaskedLM.from_pretrained(
model_name, output_attentions=False, num_labels=2
)
self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)[0]
@tf.function(input_signature=inputs_signature)
def forward(self, input_ids, attention_mask):
return self.m.predict(input_ids, attention_mask)
def get_causal_lm_model(hf_name, text="Hello, this is the default text."):
# gpus = tf.config.experimental.list_physical_devices("GPU")
# for gpu in gpus:
# tf.config.experimental.set_memory_growth(gpu, True)
model = MaskedLM(hf_name)
encoded_input = preprocess_input(hf_name, text)
test_input = (encoded_input["input_ids"], encoded_input["attention_mask"])
actual_out = model.forward(*test_input)
return model, test_input, actual_out
##################### Tensorflow Hugging Face Image Classification Models ###################################
from transformers import TFAutoModelForImageClassification
from transformers import ConvNextFeatureExtractor, ViTFeatureExtractor
from transformers import BeitFeatureExtractor, AutoFeatureExtractor
import tensorflow as tf
from PIL import Image
import requests
# Create a set of input signature.
inputs_signature = [
tf.TensorSpec(shape=[1, 3, 224, 224], dtype=tf.float32),
]
class AutoModelImageClassfication(tf.Module):
def __init__(self, model_name):
super(AutoModelImageClassfication, self).__init__()
self.m = TFAutoModelForImageClassification.from_pretrained(
model_name, output_attentions=False
)
self.m.predict = lambda x: self.m(x)
@tf.function(input_signature=inputs_signature)
def forward(self, inputs):
return self.m.predict(inputs)
fail_models = [
"facebook/data2vec-vision-base-ft1k",
"microsoft/swin-tiny-patch4-window7-224",
]
supported_models = [
"facebook/convnext-tiny-224",
"google/vit-base-patch16-224",
]
img_models_fe_dict = {
"facebook/convnext-tiny-224": ConvNextFeatureExtractor,
"facebook/data2vec-vision-base-ft1k": BeitFeatureExtractor,
"microsoft/swin-tiny-patch4-window7-224": AutoFeatureExtractor,
"google/vit-base-patch16-224": ViTFeatureExtractor,
}
def preprocess_input_image(model_name):
# from datasets import load_dataset
# dataset = load_dataset("huggingface/cats-image")
# image1 = dataset["test"]["image"][0]
# # print("image1: ", image1) # <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x480 at 0x7FA0B86BB6D0>
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
# <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x480 at 0x7FA0B86BB6D0>
image = Image.open(requests.get(url, stream=True).raw)
feature_extractor = img_models_fe_dict[model_name].from_pretrained(
model_name
)
# inputs: {'pixel_values': <tf.Tensor: shape=(1, 3, 224, 224), dtype=float32, numpy=array([[[[]]]], dtype=float32)>}
inputs = feature_extractor(images=image, return_tensors="tf")
return [inputs[str(*inputs)]]
def get_causal_image_model(hf_name):
model = AutoModelImageClassfication(hf_name)
test_input = preprocess_input_image(hf_name)
# TFSequenceClassifierOutput(loss=None, logits=<tf.Tensor: shape=(1, 1000), dtype=float32, numpy=
# array([[]], dtype=float32)>, hidden_states=None, attentions=None)
actual_out = model.forward(*test_input)
return model, test_input, actual_out

View File

@@ -1,97 +0,0 @@
from transformers import TFAutoModelForImageClassification
from transformers import ConvNextFeatureExtractor, ViTFeatureExtractor
from transformers import BeitFeatureExtractor, AutoFeatureExtractor
import tensorflow as tf
from PIL import Image
import requests
from shark.shark_inference import SharkInference
from shark.shark_downloader import download_tf_model
# Create a set of input signature.
inputs_signature = [
tf.TensorSpec(shape=[1, 3, 224, 224], dtype=tf.float32),
]
class AutoModelImageClassfication(tf.Module):
def __init__(self, model_name):
super(AutoModelImageClassfication, self).__init__()
self.m = TFAutoModelForImageClassification.from_pretrained(
model_name, output_attentions=False
)
self.m.predict = lambda x: self.m(x)
@tf.function(input_signature=inputs_signature)
def forward(self, inputs):
return self.m.predict(inputs)
fail_models = [
"facebook/data2vec-vision-base-ft1k",
"microsoft/swin-tiny-patch4-window7-224",
]
supported_models = [
# "facebook/convnext-tiny-224",
"google/vit-base-patch16-224",
]
img_models_fe_dict = {
"facebook/convnext-tiny-224": ConvNextFeatureExtractor,
"facebook/data2vec-vision-base-ft1k": BeitFeatureExtractor,
"microsoft/swin-tiny-patch4-window7-224": AutoFeatureExtractor,
"google/vit-base-patch16-224": ViTFeatureExtractor,
}
def preprocess_input_image(model_name):
# from datasets import load_dataset
# dataset = load_dataset("huggingface/cats-image")
# image1 = dataset["test"]["image"][0]
# # print("image1: ", image1) # <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x480 at 0x7FA0B86BB6D0>
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
# <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x480 at 0x7FA0B86BB6D0>
image = Image.open(requests.get(url, stream=True).raw)
feature_extractor = img_models_fe_dict[model_name].from_pretrained(
model_name
)
# inputs: {'pixel_values': <tf.Tensor: shape=(1, 3, 224, 224), dtype=float32, numpy=array([[[[]]]], dtype=float32)>}
inputs = feature_extractor(images=image, return_tensors="tf")
return [inputs[str(*inputs)]]
def get_causal_image_model(hf_name):
model = AutoModelImageClassfication(hf_name)
test_input = preprocess_input_image(hf_name)
# TFSequenceClassifierOutput(loss=None, logits=<tf.Tensor: shape=(1, 1000), dtype=float32, numpy=
# array([[]], dtype=float32)>, hidden_states=None, attentions=None)
actual_out = model.forward(*test_input)
return model, test_input, actual_out
if __name__ == "__main__":
for model_name in supported_models:
print(f"Running model: {model_name}")
inputs = preprocess_input_image(model_name)
model = AutoModelImageClassfication(model_name)
# 1. USE SharkImporter to get the mlir
# from shark.shark_importer import SharkImporter
# mlir_importer = SharkImporter(
# model,
# inputs,
# frontend="tf",
# )
# imported_mlir, func_name = mlir_importer.import_mlir()
# 2. USE SharkDownloader to get the mlir
imported_mlir, func_name, inputs, golden_out = download_tf_model(
model_name
)
shark_module = SharkInference(
imported_mlir, func_name, device="cpu", mlir_dialect="mhlo"
)
shark_module.compile()
shark_module.forward(inputs)

View File

@@ -1,53 +0,0 @@
from transformers import TFAutoModelForMaskedLM
import tensorflow as tf
from shark.shark_inference import SharkInference
# Create a set of input signature.
inputs_signature = [
tf.TensorSpec(shape=[1, 512], dtype=tf.int32),
]
class AutoModelMaskedLM(tf.Module):
def __init__(self, model_name):
super(AutoModelMaskedLM, self).__init__()
self.m = TFAutoModelForMaskedLM.from_pretrained(
model_name, output_attentions=False
)
self.m.predict = lambda x: self.m(input_ids=x)
@tf.function(input_signature=inputs_signature)
def forward(self, input_ids):
return self.m.predict(input_ids)
fail_models = ["microsoft/deberta-base", "google/rembert", "google/tapas-base"]
supported_models = [
"albert-base-v2",
"bert-base-uncased",
"camembert-base",
"dbmdz/convbert-base-turkish-cased",
"distilbert-base-uncased",
"google/electra-small-discriminator",
"hf-internal-testing/tiny-random-flaubert",
"funnel-transformer/small",
"microsoft/layoutlm-base-uncased",
"allenai/longformer-base-4096",
"google/mobilebert-uncased",
"microsoft/mpnet-base",
"roberta-base",
"xlm-roberta-base",
]
if __name__ == "__main__":
inputs = tf.random.uniform(
shape=[1, 512], maxval=3, dtype=tf.int32, seed=10
)
for model_name in supported_models:
print(f"Running model: {model_name}")
shark_module = SharkInference(AutoModelMaskedLM(model_name), (inputs,))
shark_module.set_frontend("tensorflow")
shark_module.compile()
print(shark_module.forward((inputs,)))