Divide iree_utils and do module imports on function calls.

This commit is contained in:
Prashant Kumar
2022-06-22 11:13:17 +05:30
parent 08eda2ce35
commit e8aa105b2a
147 changed files with 4705 additions and 2662 deletions

View File

@@ -1,19 +1,23 @@
from shark.shark_inference import SharkInference
from shark.iree_utils import check_device_drivers
from shark.iree_utils._common import check_device_drivers
import torch
import tensorflow as tf
import numpy as np
import torchvision.models as models
from transformers import AutoModelForSequenceClassification, BertTokenizer, TFBertModel
from transformers import (
AutoModelForSequenceClassification,
BertTokenizer,
TFBertModel,
)
import importlib
import pytest
import unittest
torch.manual_seed(0)
gpus = tf.config.experimental.list_physical_devices('GPU')
gpus = tf.config.experimental.list_physical_devices("GPU")
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
tf.config.experimental.set_memory_growth(gpu, True)
##################### Tensorflow Hugging Face LM Models ###################################
MAX_SEQUENCE_LENGTH = 512
@@ -23,12 +27,11 @@ BATCH_SIZE = 1
tf_bert_input = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
]
class TFHuggingFaceLanguage(tf.Module):
def __init__(self, hf_model_name):
super(TFHuggingFaceLanguage, self).__init__()
# Create a BERT trainer with the created network.
@@ -36,7 +39,8 @@ class TFHuggingFaceLanguage(tf.Module):
# Invoke the trainer model on the inputs. This causes the layer to be built.
self.m.predict = lambda x, y, z: self.m.call(
input_ids=x, attention_mask=y, token_type_ids=z, training=False)
input_ids=x, attention_mask=y, token_type_ids=z, training=False
)
@tf.function(input_signature=tf_bert_input)
def forward(self, input_ids, attention_mask, token_type_ids):
@@ -47,15 +51,21 @@ def get_TFhf_model(name):
model = TFHuggingFaceLanguage(name)
tokenizer = BertTokenizer.from_pretrained(name)
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text,
padding='max_length',
truncation=True,
max_length=MAX_SEQUENCE_LENGTH)
encoded_input = tokenizer(
text,
padding="max_length",
truncation=True,
max_length=MAX_SEQUENCE_LENGTH,
)
for key in encoded_input:
encoded_input[key] = tf.expand_dims(
tf.convert_to_tensor(encoded_input[key]), 0)
test_input = (encoded_input["input_ids"], encoded_input["attention_mask"],
encoded_input["token_type_ids"])
tf.convert_to_tensor(encoded_input[key]), 0
)
test_input = (
encoded_input["input_ids"],
encoded_input["attention_mask"],
encoded_input["token_type_ids"],
)
actual_out = model.forward(*test_input)
return model, test_input, actual_out
@@ -64,17 +74,13 @@ def get_TFhf_model(name):
class HuggingFaceLanguage(torch.nn.Module):
def __init__(self, hf_model_name):
super().__init__()
self.model = AutoModelForSequenceClassification.from_pretrained(
hf_model_name, # The pretrained model.
num_labels=
2, # The number of output labels--2 for binary classification.
output_attentions=
False, # Whether the model returns attentions weights.
output_hidden_states=
False, # Whether the model returns all hidden-states.
num_labels=2, # The number of output labels--2 for binary classification.
output_attentions=False, # Whether the model returns attentions weights.
output_hidden_states=False, # Whether the model returns all hidden-states.
torchscript=True,
)
@@ -96,7 +102,6 @@ def get_hf_model(name):
class VisionModule(torch.nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
@@ -117,46 +122,56 @@ def get_vision_model(torch_model):
############################# Benchmark Tests ####################################
pytest_benchmark_param = pytest.mark.parametrize(
('dynamic', 'device'),
("dynamic", "device"),
[
pytest.param(False, 'cpu'),
pytest.param(False, "cpu"),
# TODO: Language models are failing for dynamic case..
pytest.param(True, 'cpu', marks=pytest.mark.skip),
pytest.param(False,
'gpu',
marks=pytest.mark.skipif(check_device_drivers("gpu"),
reason="nvidia-smi not found")),
pytest.param(True,
'gpu',
marks=pytest.mark.skip),
pytest.param(True, "cpu", marks=pytest.mark.skip),
pytest.param(
False,
'vulkan',
"gpu",
marks=pytest.mark.skipif(
check_device_drivers("gpu"), reason="nvidia-smi not found"
),
),
pytest.param(True, "gpu", marks=pytest.mark.skip),
pytest.param(
False,
"vulkan",
marks=pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases",
),
),
pytest.param(
True,
'vulkan',
"vulkan",
marks=pytest.mark.skipif(
check_device_drivers("vulkan"),
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases"
)),
])
reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases",
),
),
],
)
@pytest.mark.skipif(importlib.util.find_spec("iree.tools") is None,
reason="Cannot find tools to import TF")
@pytest.mark.skipif(
importlib.util.find_spec("iree.tools") is None,
reason="Cannot find tools to import TF",
)
@pytest_benchmark_param
def test_bench_minilm_torch(dynamic, device):
model, test_input, act_out = get_hf_model(
"microsoft/MiniLM-L12-H384-uncased")
shark_module = SharkInference(model, (test_input,),
device=device,
dynamic=dynamic,
jit_trace=True,
benchmark_mode=True)
"microsoft/MiniLM-L12-H384-uncased"
)
shark_module = SharkInference(
model,
(test_input,),
device=device,
dynamic=dynamic,
jit_trace=True,
benchmark_mode=True,
)
try:
# If becnhmarking succesful, assert success/True.
shark_module.compile()
@@ -167,17 +182,21 @@ def test_bench_minilm_torch(dynamic, device):
assert False
@pytest.mark.skipif(importlib.util.find_spec("iree.tools") is None,
reason="Cannot find tools to import TF")
@pytest.mark.skipif(
importlib.util.find_spec("iree.tools") is None,
reason="Cannot find tools to import TF",
)
@pytest_benchmark_param
def test_bench_distilbert(dynamic, device):
model, test_input, act_out = get_TFhf_model("distilbert-base-uncased")
shark_module = SharkInference(model,
test_input,
device=device,
dynamic=dynamic,
jit_trace=True,
benchmark_mode=True)
shark_module = SharkInference(
model,
test_input,
device=device,
dynamic=dynamic,
jit_trace=True,
benchmark_mode=True,
)
try:
# If becnhmarking succesful, assert success/True.
shark_module.set_frontend("tensorflow")
@@ -193,12 +212,14 @@ def test_bench_distilbert(dynamic, device):
@pytest_benchmark_param
def test_bench_xlm_roberta(dynamic, device):
model, test_input, act_out = get_TFhf_model("xlm-roberta-base")
shark_module = SharkInference(model,
test_input,
device=device,
dynamic=dynamic,
jit_trace=True,
benchmark_mode=True)
shark_module = SharkInference(
model,
test_input,
device=device,
dynamic=dynamic,
jit_trace=True,
benchmark_mode=True,
)
try:
# If becnhmarking succesful, assert success/True.
shark_module.set_frontend("tensorflow")