Add T5-base and T5-large Torch and TF Models (#1116)

2026-04-03 03:00:17 -04:00 · 2023-03-21 09:32:50 +11:00
parent f99903e023
commit 90c958bca2
8 changed files with 139 additions and 4 deletions
--- a/requirements.txt
+++ b/requirements.txt
@@ -26,6 +26,7 @@ safetensors
 opencv-python
 scikit-image
 pytorch_lightning # for runwayml models
+sentencepiece

 # Keep PyInstaller at the end. Sometimes Windows Defender flags it but most folks can continue even if it errors
 pefile
--- a/tank/all_models.csv
+++ b/tank/all_models.csv
@@ -35,8 +35,14 @@ squeezenet1_0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,False,"","mac
 wide_resnet50_2,linalg,torch,1e-2,1e-3,default,nhcw-nhwc/img2col,False,False,False,"","macos"
 efficientnet-v2-s,mhlo,tf,1e-02,1e-3,default,nhcw-nhwc,False,False,False,"","macos"
 mnasnet1_0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,True,True,True,"","macos"
+t5-base,linalg,torch,1e-2,1e-3,default,None,False,False,False,"",""
+t5-base,mhlo,tf,1e-2,1e-3,default,None,False,False,False,"",""
+t5-large,linalg,torch,1e-2,1e-3,default,None,False,False,False,"",""
+t5-large,mhlo,tf,1e-2,1e-3,default,None,False,False,False,"",""
 efficientnet_b0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,False,"",""
 efficientnet_b7,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,False,"",""
+efficientnet_b0,mhlo,tf,1e-2,1e-3,default,None,nhcw-nhwc,False,False,False,"",""
+efficientnet_b7,mhlo,tf,1e-2,1e-3,default,None,nhcw-nhwc,False,False,False,"",""
 efficientnet_b0,mhlo,tf,1e-2,1e-3,default,None,nhcw-nhwc,False,False,"",""
 efficientnet_b7,mhlo,tf,1e-2,1e-3,default,None,nhcw-nhwc,False,False,"",""
 gpt2,mhlo,tf,1e-2,1e-3,default,None,False,False,False,"",""
--- a/tank/generate_sharktank.py
+++ b/tank/generate_sharktank.py
@@ -36,6 +36,7 @@ def create_hash(file_name):
 def save_torch_model(torch_model_list, local_tank_cache):
    from tank.model_utils import (
        get_hf_model,
+        get_hf_seq2seq_model,
        get_vision_model,
        get_hf_img_cls_model,
        get_fp16_model,
@@ -84,6 +85,8 @@ def save_torch_model(torch_model_list, local_tank_cache):
                model, input, _ = get_vision_model(torch_model_name)
            elif model_type == "hf":
                model, input, _ = get_hf_model(torch_model_name)
+            elif model_type == "hf_seq2seq":
+                model, input, _ = get_hf_seq2seq_model(torch_model_name)
            elif model_type == "hf_img_cls":
                model, input, _ = get_hf_img_cls_model(torch_model_name)
            elif model_type == "fp16":
@@ -122,6 +125,7 @@ def save_tf_model(tf_model_list, local_tank_cache):
        get_causal_lm_model,
        get_keras_model,
        get_TFhf_model,
+        get_tfhf_seq2seq_model,
    )
    import tensorflow as tf

@@ -147,13 +151,15 @@ def save_tf_model(tf_model_list, local_tank_cache):
            print(f"Generating artifacts for model {tf_model_name}")
            if model_type == "hf":
                model, input, _ = get_masked_lm_model(tf_model_name)
-            if model_type == "img":
+            elif model_type == "img":
                model, input, _ = get_causal_image_model(tf_model_name)
-            if model_type == "keras":
+            elif model_type == "keras":
                model, input, _ = get_keras_model(tf_model_name)
-            if model_type == "TFhf":
+            elif model_type == "TFhf":
                model, input, _ = get_TFhf_model(tf_model_name)
-            if model_type == "hf_causallm":
+            elif model_type == "tfhf_seq2seq":
+                model, input, _ = get_tfhf_seq2seq_model(tf_model_name)
+            elif model_type == "hf_causallm":
                model, input, _ = get_causal_lm_model(tf_model_name)

            tf_model_name = tf_model_name.replace("/", "_")
--- a/tank/model_metadata.csv
+++ b/tank/model_metadata.csv
@@ -31,6 +31,9 @@ xlm-roberta-base,False,False,-,-,-
 facebook/convnext-tiny-224,False,False,-,-,-
 efficientnet-v2-s,False,False,22M,"image-classification,cnn","Includes MBConv and Fused-MBConv"
 mnasnet1_0,False,True,-,"cnn, torchvision, mobile, architecture-search","Outperforms other mobile CNNs on Accuracy vs. Latency"
+bert-large-uncased,True,True,330M,"nlp;bert-variant;transformer-encoder","24 layers, 1024 hidden units, 16 attention heads"
+t5-base,True,False,220M,"nlp;transformer-encoder;transformer-decoder","Text-to-Text Transfer Transformer"
+t5-large,True,False,770M,"nlp;transformer-encoder;transformer-decoder","Text-to-Text Transfer Transformer"
 bert-large-uncased,True,hf,True,330M,"nlp;bert-variant;transformer-encoder","24 layers, 1024 hidden units, 16 attention heads"
 efficientnet_b0,True,False,5.3M,"image-classification;cnn;conv2d;depthwise-conv","Smallest EfficientNet variant with 224x224 input"
 efficientnet_b7,True,False,66M,"image-classification;cnn;conv2d;depthwise-conv","Largest EfficientNet variant with 600x600 input"
--- a/tank/model_utils.py
+++ b/tank/model_utils.py
@@ -29,6 +29,10 @@ hf_img_cls_models = [
    "microsoft/beit-base-patch16-224-pt22k-ft22k",
    "nvidia/mit-b0",
 ]
+hf_seq2seq_models = [
+    "t5-base",
+    "t5-large",
+]


 def get_torch_model(modelname):
@@ -36,6 +40,8 @@ def get_torch_model(modelname):
        return get_vision_model(modelname)
    elif modelname in hf_img_cls_models:
        return get_hf_img_cls_model(modelname)
+    elif modelname in hf_seq2seq_models:
+        return get_hf_seq2seq_model(modelname)
    elif "fp16" in modelname:
        return get_fp16_model(modelname)
    else:
@@ -131,6 +137,47 @@ def get_hf_model(name):
    return model, test_input, actual_out


+##################### Hugging Face Seq2SeqLM Models ###################################
+
+# We use a maximum sequence length of 512 since this is the default used in the T5 config.
+T5_MAX_SEQUENCE_LENGTH = 512
+
+
+class HFSeq2SeqLanguageModel(torch.nn.Module):
+    def __init__(self, model_name):
+        super().__init__()
+        from transformers import AutoTokenizer, T5Model
+
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.tokenization_kwargs = {
+            "pad_to_multiple_of": T5_MAX_SEQUENCE_LENGTH,
+            "padding": True,
+            "return_tensors": "pt",
+        }
+        self.model = T5Model.from_pretrained(model_name, return_dict=True)
+
+    def preprocess_input(self, text):
+        return self.tokenizer(text, **self.tokenization_kwargs)
+
+    def forward(self, input_ids, decoder_input_ids):
+        return self.model.forward(
+            input_ids, decoder_input_ids=decoder_input_ids
+        )[0]
+
+
+def get_hf_seq2seq_model(name):
+    m = HFSeq2SeqLanguageModel(name)
+    encoded_input_ids = m.preprocess_input(
+        "Studies have been shown that owning a dog is good for you"
+    ).input_ids
+    decoder_input_ids = m.preprocess_input("Studies show that").input_ids
+    decoder_input_ids = m.model._shift_right(decoder_input_ids)
+
+    test_input = (encoded_input_ids, decoder_input_ids)
+    actual_out = m.forward(*test_input)
+    return m, test_input, actual_out
+
+
 ################################################################################

 ##################### Torch Vision Models    ###################################
--- a/tank/model_utils_tf.py
+++ b/tank/model_utils_tf.py
@@ -42,6 +42,10 @@ causallm_models = [
 tfhf_models = [
    "microsoft/MiniLM-L12-H384-uncased",
 ]
+tfhf_seq2seq_models = [
+    "t5-base",
+    "t5-large",
+]
 img_models = [
    "google/vit-base-patch16-224",
    "facebook/convnext-tiny-224",
@@ -59,6 +63,8 @@ def get_tf_model(name):
        return get_TFhf_model(name)
    elif name in img_models:
        return get_causal_image_model(name)
+    elif name in tfhf_seq2seq_models:
+        return get_tfhf_seq2seq_model(name)
    else:
        raise Exception(
            "TF model not found! Please check that the modelname has been input correctly."
@@ -254,6 +260,68 @@ def get_causal_lm_model(hf_name, text="Hello, this is the default text."):
    return model, test_input, actual_out


+##################### TensorflowHugging Face Seq2SeqLM Models ###################################
+
+# We use a maximum sequence length of 512 since this is the default used in the T5 config.
+T5_MAX_SEQUENCE_LENGTH = 512
+
+input_signature_t5 = [
+    tf.TensorSpec(
+        shape=[BATCH_SIZE, T5_MAX_SEQUENCE_LENGTH],
+        dtype=tf.int32,
+        name="input_ids",
+    ),
+    tf.TensorSpec(
+        shape=[BATCH_SIZE, T5_MAX_SEQUENCE_LENGTH],
+        dtype=tf.int32,
+        name="attention_mask",
+    ),
+]
+
+
+class TFHFSeq2SeqLanguageModel(tf.Module):
+    def __init__(self, model_name):
+        super(TFHFSeq2SeqLanguageModel, self).__init__()
+        from transformers import (
+            AutoTokenizer,
+            AutoConfig,
+            TFAutoModelForSeq2SeqLM,
+            TFT5Model,
+        )
+
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.tokenization_kwargs = {
+            "pad_to_multiple_of": T5_MAX_SEQUENCE_LENGTH,
+            "padding": True,
+            "return_tensors": "tf",
+        }
+        self.model = TFT5Model.from_pretrained(model_name, return_dict=True)
+        self.model.predict = lambda x, y: self.model(x, decoder_input_ids=y)[0]
+
+    def preprocess_input(self, text):
+        return self.tokenizer(text, **self.tokenization_kwargs)
+
+    @tf.function(input_signature=input_signature_t5, jit_compile=True)
+    def forward(self, input_ids, decoder_input_ids):
+        return self.model.predict(input_ids, decoder_input_ids)
+
+
+def get_tfhf_seq2seq_model(name):
+    m = TFHFSeq2SeqLanguageModel(name)
+    text = "Studies have been shown that owning a dog is good for you"
+    batched_text = [text] * BATCH_SIZE
+    encoded_input_ids = m.preprocess_input(batched_text).input_ids
+
+    text = "Studies show that"
+    batched_text = [text] * BATCH_SIZE
+    decoder_input_ids = m.preprocess_input(batched_text).input_ids
+    decoder_input_ids = m.model._shift_right(decoder_input_ids)
+
+    test_input = (encoded_input_ids, decoder_input_ids)
+    actual_out = m.forward(*test_input)
+    return m, test_input, actual_out
+
+
 ##################### TensorFlow Keras Resnet Models #########################################################
 # Static shape, including batch size (1).
 # Can be dynamic once dynamic shape support is ready.
--- a/tank/tf_model_list.csv
+++ b/tank/tf_model_list.csv
@@ -19,6 +19,8 @@ facebook/convnext-tiny-224,img
 google/vit-base-patch16-224,img
 efficientnet-v2-s,keras
 bert-large-uncased,hf
+t5-base,tfhf_seq2seq
+t5-large,tfhf_seq2seq
 efficientnet_b0,keras
 efficientnet_b7,keras
 gpt2,hf_causallm
--- a/tank/torch_model_list.csv
+++ b/tank/torch_model_list.csv
@@ -19,5 +19,7 @@ mnasnet1_0,False,vision,True,-,"cnn, torchvision, mobile, architecture-search","
 resnet50_fp16,False,vision,True,23M,"cnn,image-classification,residuals,resnet-variant","Bottlenecks with only conv2d (1x1 conv -> 3x3 conv -> 1x1 conv blocks)"
 bert-base-uncased_fp16,True,fp16,False,109M,"nlp;bert-variant;transformer-encoder","12 layers; 768 hidden; 12 attention heads"
 bert-large-uncased,True,hf,True,330M,"nlp;bert-variant;transformer-encoder","24 layers, 1024 hidden units, 16 attention heads"
+t5-base,True,hf_seq2seq,True,220M,"nlp;transformer-encoder;transformer-decoder","Text-to-Text Transfer Transformer"
+t5-large,True,hf_seq2seq,True,770M,"nlp;transformer-encoder;transformer-decoder","Text-to-Text Transfer Transformer"
 efficientnet_b0,True,vision,False,5.3M,"image-classification;cnn;conv2d;depthwise-conv","Smallest EfficientNet variant with 224x224 input"
 efficientnet_b7,True,vision,False,66M,"image-classification;cnn;conv2d;depthwise-conv","Largest EfficientNet variant with 600x600 input"