Add T5-base and T5-large Torch and TF Models (#1116)

This commit is contained in:
mariecwhite
2023-03-21 09:32:50 +11:00
committed by GitHub
parent f99903e023
commit 90c958bca2
8 changed files with 139 additions and 4 deletions

View File

@@ -26,6 +26,7 @@ safetensors
opencv-python
scikit-image
pytorch_lightning # for runwayml models
sentencepiece
# Keep PyInstaller at the end. Sometimes Windows Defender flags it but most folks can continue even if it errors
pefile

View File

@@ -35,8 +35,14 @@ squeezenet1_0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,False,"","mac
wide_resnet50_2,linalg,torch,1e-2,1e-3,default,nhcw-nhwc/img2col,False,False,False,"","macos"
efficientnet-v2-s,mhlo,tf,1e-02,1e-3,default,nhcw-nhwc,False,False,False,"","macos"
mnasnet1_0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,True,True,True,"","macos"
t5-base,linalg,torch,1e-2,1e-3,default,None,False,False,False,"",""
t5-base,mhlo,tf,1e-2,1e-3,default,None,False,False,False,"",""
t5-large,linalg,torch,1e-2,1e-3,default,None,False,False,False,"",""
t5-large,mhlo,tf,1e-2,1e-3,default,None,False,False,False,"",""
efficientnet_b0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,False,"",""
efficientnet_b7,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,False,"",""
efficientnet_b0,mhlo,tf,1e-2,1e-3,default,None,nhcw-nhwc,False,False,False,"",""
efficientnet_b7,mhlo,tf,1e-2,1e-3,default,None,nhcw-nhwc,False,False,False,"",""
efficientnet_b0,mhlo,tf,1e-2,1e-3,default,None,nhcw-nhwc,False,False,"",""
efficientnet_b7,mhlo,tf,1e-2,1e-3,default,None,nhcw-nhwc,False,False,"",""
gpt2,mhlo,tf,1e-2,1e-3,default,None,False,False,False,"",""
1 resnet50 mhlo tf 1e-2 1e-3 default nhcw-nhwc False False False macos
35 wide_resnet50_2 linalg torch 1e-2 1e-3 default nhcw-nhwc/img2col False False False macos
36 efficientnet-v2-s mhlo tf 1e-02 1e-3 default nhcw-nhwc False False False macos
37 mnasnet1_0 linalg torch 1e-2 1e-3 default nhcw-nhwc True True True macos
38 t5-base linalg torch 1e-2 1e-3 default None False False False
39 t5-base mhlo tf 1e-2 1e-3 default None False False False
40 t5-large linalg torch 1e-2 1e-3 default None False False False
41 t5-large mhlo tf 1e-2 1e-3 default None False False False
42 efficientnet_b0 linalg torch 1e-2 1e-3 default nhcw-nhwc False False False
43 efficientnet_b7 linalg torch 1e-2 1e-3 default nhcw-nhwc False False False
44 efficientnet_b0 mhlo tf 1e-2 1e-3 default None nhcw-nhwc False False False
45 efficientnet_b7 mhlo tf 1e-2 1e-3 default None nhcw-nhwc False False False
46 efficientnet_b0 mhlo tf 1e-2 1e-3 default None nhcw-nhwc False False
47 efficientnet_b7 mhlo tf 1e-2 1e-3 default None nhcw-nhwc False False
48 gpt2 mhlo tf 1e-2 1e-3 default None False False False

View File

@@ -36,6 +36,7 @@ def create_hash(file_name):
def save_torch_model(torch_model_list, local_tank_cache):
from tank.model_utils import (
get_hf_model,
get_hf_seq2seq_model,
get_vision_model,
get_hf_img_cls_model,
get_fp16_model,
@@ -84,6 +85,8 @@ def save_torch_model(torch_model_list, local_tank_cache):
model, input, _ = get_vision_model(torch_model_name)
elif model_type == "hf":
model, input, _ = get_hf_model(torch_model_name)
elif model_type == "hf_seq2seq":
model, input, _ = get_hf_seq2seq_model(torch_model_name)
elif model_type == "hf_img_cls":
model, input, _ = get_hf_img_cls_model(torch_model_name)
elif model_type == "fp16":
@@ -122,6 +125,7 @@ def save_tf_model(tf_model_list, local_tank_cache):
get_causal_lm_model,
get_keras_model,
get_TFhf_model,
get_tfhf_seq2seq_model,
)
import tensorflow as tf
@@ -147,13 +151,15 @@ def save_tf_model(tf_model_list, local_tank_cache):
print(f"Generating artifacts for model {tf_model_name}")
if model_type == "hf":
model, input, _ = get_masked_lm_model(tf_model_name)
if model_type == "img":
elif model_type == "img":
model, input, _ = get_causal_image_model(tf_model_name)
if model_type == "keras":
elif model_type == "keras":
model, input, _ = get_keras_model(tf_model_name)
if model_type == "TFhf":
elif model_type == "TFhf":
model, input, _ = get_TFhf_model(tf_model_name)
if model_type == "hf_causallm":
elif model_type == "tfhf_seq2seq":
model, input, _ = get_tfhf_seq2seq_model(tf_model_name)
elif model_type == "hf_causallm":
model, input, _ = get_causal_lm_model(tf_model_name)
tf_model_name = tf_model_name.replace("/", "_")

View File

@@ -31,6 +31,9 @@ xlm-roberta-base,False,False,-,-,-
facebook/convnext-tiny-224,False,False,-,-,-
efficientnet-v2-s,False,False,22M,"image-classification,cnn","Includes MBConv and Fused-MBConv"
mnasnet1_0,False,True,-,"cnn, torchvision, mobile, architecture-search","Outperforms other mobile CNNs on Accuracy vs. Latency"
bert-large-uncased,True,True,330M,"nlp;bert-variant;transformer-encoder","24 layers, 1024 hidden units, 16 attention heads"
t5-base,True,False,220M,"nlp;transformer-encoder;transformer-decoder","Text-to-Text Transfer Transformer"
t5-large,True,False,770M,"nlp;transformer-encoder;transformer-decoder","Text-to-Text Transfer Transformer"
bert-large-uncased,True,hf,True,330M,"nlp;bert-variant;transformer-encoder","24 layers, 1024 hidden units, 16 attention heads"
efficientnet_b0,True,False,5.3M,"image-classification;cnn;conv2d;depthwise-conv","Smallest EfficientNet variant with 224x224 input"
efficientnet_b7,True,False,66M,"image-classification;cnn;conv2d;depthwise-conv","Largest EfficientNet variant with 600x600 input"
1 model_name use_tracing dynamic param_count tags notes
31 facebook/convnext-tiny-224 False False - - -
32 efficientnet-v2-s False False 22M image-classification,cnn Includes MBConv and Fused-MBConv
33 mnasnet1_0 False True - cnn, torchvision, mobile, architecture-search Outperforms other mobile CNNs on Accuracy vs. Latency
34 bert-large-uncased True True 330M nlp;bert-variant;transformer-encoder 24 layers, 1024 hidden units, 16 attention heads
35 t5-base True False 220M nlp;transformer-encoder;transformer-decoder Text-to-Text Transfer Transformer
36 t5-large True False 770M nlp;transformer-encoder;transformer-decoder Text-to-Text Transfer Transformer
37 bert-large-uncased True hf True 330M nlp;bert-variant;transformer-encoder
38 efficientnet_b0 True False 5.3M image-classification;cnn;conv2d;depthwise-conv Smallest EfficientNet variant with 224x224 input
39 efficientnet_b7 True False 66M image-classification;cnn;conv2d;depthwise-conv Largest EfficientNet variant with 600x600 input

View File

@@ -29,6 +29,10 @@ hf_img_cls_models = [
"microsoft/beit-base-patch16-224-pt22k-ft22k",
"nvidia/mit-b0",
]
hf_seq2seq_models = [
"t5-base",
"t5-large",
]
def get_torch_model(modelname):
@@ -36,6 +40,8 @@ def get_torch_model(modelname):
return get_vision_model(modelname)
elif modelname in hf_img_cls_models:
return get_hf_img_cls_model(modelname)
elif modelname in hf_seq2seq_models:
return get_hf_seq2seq_model(modelname)
elif "fp16" in modelname:
return get_fp16_model(modelname)
else:
@@ -131,6 +137,47 @@ def get_hf_model(name):
return model, test_input, actual_out
##################### Hugging Face Seq2SeqLM Models ###################################
# We use a maximum sequence length of 512 since this is the default used in the T5 config.
T5_MAX_SEQUENCE_LENGTH = 512
class HFSeq2SeqLanguageModel(torch.nn.Module):
def __init__(self, model_name):
super().__init__()
from transformers import AutoTokenizer, T5Model
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.tokenization_kwargs = {
"pad_to_multiple_of": T5_MAX_SEQUENCE_LENGTH,
"padding": True,
"return_tensors": "pt",
}
self.model = T5Model.from_pretrained(model_name, return_dict=True)
def preprocess_input(self, text):
return self.tokenizer(text, **self.tokenization_kwargs)
def forward(self, input_ids, decoder_input_ids):
return self.model.forward(
input_ids, decoder_input_ids=decoder_input_ids
)[0]
def get_hf_seq2seq_model(name):
m = HFSeq2SeqLanguageModel(name)
encoded_input_ids = m.preprocess_input(
"Studies have been shown that owning a dog is good for you"
).input_ids
decoder_input_ids = m.preprocess_input("Studies show that").input_ids
decoder_input_ids = m.model._shift_right(decoder_input_ids)
test_input = (encoded_input_ids, decoder_input_ids)
actual_out = m.forward(*test_input)
return m, test_input, actual_out
################################################################################
##################### Torch Vision Models ###################################

View File

@@ -42,6 +42,10 @@ causallm_models = [
tfhf_models = [
"microsoft/MiniLM-L12-H384-uncased",
]
tfhf_seq2seq_models = [
"t5-base",
"t5-large",
]
img_models = [
"google/vit-base-patch16-224",
"facebook/convnext-tiny-224",
@@ -59,6 +63,8 @@ def get_tf_model(name):
return get_TFhf_model(name)
elif name in img_models:
return get_causal_image_model(name)
elif name in tfhf_seq2seq_models:
return get_tfhf_seq2seq_model(name)
else:
raise Exception(
"TF model not found! Please check that the modelname has been input correctly."
@@ -254,6 +260,68 @@ def get_causal_lm_model(hf_name, text="Hello, this is the default text."):
return model, test_input, actual_out
##################### TensorflowHugging Face Seq2SeqLM Models ###################################
# We use a maximum sequence length of 512 since this is the default used in the T5 config.
T5_MAX_SEQUENCE_LENGTH = 512
input_signature_t5 = [
tf.TensorSpec(
shape=[BATCH_SIZE, T5_MAX_SEQUENCE_LENGTH],
dtype=tf.int32,
name="input_ids",
),
tf.TensorSpec(
shape=[BATCH_SIZE, T5_MAX_SEQUENCE_LENGTH],
dtype=tf.int32,
name="attention_mask",
),
]
class TFHFSeq2SeqLanguageModel(tf.Module):
def __init__(self, model_name):
super(TFHFSeq2SeqLanguageModel, self).__init__()
from transformers import (
AutoTokenizer,
AutoConfig,
TFAutoModelForSeq2SeqLM,
TFT5Model,
)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.tokenization_kwargs = {
"pad_to_multiple_of": T5_MAX_SEQUENCE_LENGTH,
"padding": True,
"return_tensors": "tf",
}
self.model = TFT5Model.from_pretrained(model_name, return_dict=True)
self.model.predict = lambda x, y: self.model(x, decoder_input_ids=y)[0]
def preprocess_input(self, text):
return self.tokenizer(text, **self.tokenization_kwargs)
@tf.function(input_signature=input_signature_t5, jit_compile=True)
def forward(self, input_ids, decoder_input_ids):
return self.model.predict(input_ids, decoder_input_ids)
def get_tfhf_seq2seq_model(name):
m = TFHFSeq2SeqLanguageModel(name)
text = "Studies have been shown that owning a dog is good for you"
batched_text = [text] * BATCH_SIZE
encoded_input_ids = m.preprocess_input(batched_text).input_ids
text = "Studies show that"
batched_text = [text] * BATCH_SIZE
decoder_input_ids = m.preprocess_input(batched_text).input_ids
decoder_input_ids = m.model._shift_right(decoder_input_ids)
test_input = (encoded_input_ids, decoder_input_ids)
actual_out = m.forward(*test_input)
return m, test_input, actual_out
##################### TensorFlow Keras Resnet Models #########################################################
# Static shape, including batch size (1).
# Can be dynamic once dynamic shape support is ready.

View File

@@ -19,6 +19,8 @@ facebook/convnext-tiny-224,img
google/vit-base-patch16-224,img
efficientnet-v2-s,keras
bert-large-uncased,hf
t5-base,tfhf_seq2seq
t5-large,tfhf_seq2seq
efficientnet_b0,keras
efficientnet_b7,keras
gpt2,hf_causallm
1 model_name model_type
19 google/vit-base-patch16-224 img
20 efficientnet-v2-s keras
21 bert-large-uncased hf
22 t5-base tfhf_seq2seq
23 t5-large tfhf_seq2seq
24 efficientnet_b0 keras
25 efficientnet_b7 keras
26 gpt2 hf_causallm

View File

@@ -19,5 +19,7 @@ mnasnet1_0,False,vision,True,-,"cnn, torchvision, mobile, architecture-search","
resnet50_fp16,False,vision,True,23M,"cnn,image-classification,residuals,resnet-variant","Bottlenecks with only conv2d (1x1 conv -> 3x3 conv -> 1x1 conv blocks)"
bert-base-uncased_fp16,True,fp16,False,109M,"nlp;bert-variant;transformer-encoder","12 layers; 768 hidden; 12 attention heads"
bert-large-uncased,True,hf,True,330M,"nlp;bert-variant;transformer-encoder","24 layers, 1024 hidden units, 16 attention heads"
t5-base,True,hf_seq2seq,True,220M,"nlp;transformer-encoder;transformer-decoder","Text-to-Text Transfer Transformer"
t5-large,True,hf_seq2seq,True,770M,"nlp;transformer-encoder;transformer-decoder","Text-to-Text Transfer Transformer"
efficientnet_b0,True,vision,False,5.3M,"image-classification;cnn;conv2d;depthwise-conv","Smallest EfficientNet variant with 224x224 input"
efficientnet_b7,True,vision,False,66M,"image-classification;cnn;conv2d;depthwise-conv","Largest EfficientNet variant with 600x600 input"
1 model_name use_tracing model_type dynamic param_count tags notes
19 resnet50_fp16 False vision True 23M cnn,image-classification,residuals,resnet-variant Bottlenecks with only conv2d (1x1 conv -> 3x3 conv -> 1x1 conv blocks)
20 bert-base-uncased_fp16 True fp16 False 109M nlp;bert-variant;transformer-encoder 12 layers; 768 hidden; 12 attention heads
21 bert-large-uncased True hf True 330M nlp;bert-variant;transformer-encoder 24 layers, 1024 hidden units, 16 attention heads
22 t5-base True hf_seq2seq True 220M nlp;transformer-encoder;transformer-decoder Text-to-Text Transfer Transformer
23 t5-large True hf_seq2seq True 770M nlp;transformer-encoder;transformer-decoder Text-to-Text Transfer Transformer
24 efficientnet_b0 True vision False 5.3M image-classification;cnn;conv2d;depthwise-conv Smallest EfficientNet variant with 224x224 input
25 efficientnet_b7 True vision False 66M image-classification;cnn;conv2d;depthwise-conv Largest EfficientNet variant with 600x600 input