Add EfficientNet B0 and B7 Torch and TF models

This commit is contained in:
mariecwhite
2023-02-20 04:45:23 -08:00
parent 8dcd4d5aeb
commit c6f44ef1b3
6 changed files with 123 additions and 20 deletions

View File

@@ -35,3 +35,7 @@ squeezenet1_0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,False,"","mac
wide_resnet50_2,linalg,torch,1e-2,1e-3,default,nhcw-nhwc/img2col,False,False,False,"","macos"
efficientnet-v2-s,mhlo,tf,1e-02,1e-3,default,nhcw-nhwc,False,False,False,"","macos"
mnasnet1_0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,True,True,True,"","macos"
efficientnet_b0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,False,"",""
efficientnet_b7,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,False,"",""
efficientnet_b0,mhlo,tf,1e-2,1e-3,default,None,nhcw-nhwc,False,False,"",""
efficientnet_b7,mhlo,tf,1e-2,1e-3,default,None,nhcw-nhwc,False,False,"",""
1 resnet50 mhlo tf 1e-2 1e-3 default nhcw-nhwc False False False macos
35 wide_resnet50_2 linalg torch 1e-2 1e-3 default nhcw-nhwc/img2col False False False macos
36 efficientnet-v2-s mhlo tf 1e-02 1e-3 default nhcw-nhwc False False False macos
37 mnasnet1_0 linalg torch 1e-2 1e-3 default nhcw-nhwc True True True macos
38 efficientnet_b0 linalg torch 1e-2 1e-3 default nhcw-nhwc False False False
39 efficientnet_b7 linalg torch 1e-2 1e-3 default nhcw-nhwc False False False
40 efficientnet_b0 mhlo tf 1e-2 1e-3 default None nhcw-nhwc False False
41 efficientnet_b7 mhlo tf 1e-2 1e-3 default None nhcw-nhwc False False

View File

@@ -32,3 +32,5 @@ facebook/convnext-tiny-224,False,False,-,-,-
efficientnet-v2-s,False,False,22M,"image-classification,cnn","Includes MBConv and Fused-MBConv"
mnasnet1_0,False,True,-,"cnn, torchvision, mobile, architecture-search","Outperforms other mobile CNNs on Accuracy vs. Latency"
bert-large-uncased,True,hf,True,330M,"nlp;bert-variant;transformer-encoder","24 layers, 1024 hidden units, 16 attention heads"
efficientnet_b0,True,False,5.3M,"image-classification;cnn;conv2d;depthwise-conv","Smallest EfficientNet variant with 224x224 input"
efficientnet_b7,True,False,66M,"image-classification;cnn;conv2d;depthwise-conv","Largest EfficientNet variant with 600x600 input"
1 model_name use_tracing dynamic param_count tags notes
32 efficientnet-v2-s False False 22M image-classification,cnn Includes MBConv and Fused-MBConv
33 mnasnet1_0 False True - cnn, torchvision, mobile, architecture-search Outperforms other mobile CNNs on Accuracy vs. Latency
34 bert-large-uncased True hf True 330M nlp;bert-variant;transformer-encoder
35 efficientnet_b0 True False 5.3M image-classification;cnn;conv2d;depthwise-conv Smallest EfficientNet variant with 224x224 input
36 efficientnet_b7 True False 66M image-classification;cnn;conv2d;depthwise-conv Largest EfficientNet variant with 600x600 input

View File

@@ -19,6 +19,8 @@ vision_models = [
"wide_resnet50_2",
"mobilenet_v3_small",
"mnasnet1_0",
"efficientnet_b0",
"efficientnet_b7",
]
hf_img_cls_models = [
"google/vit-base-patch16-224",
@@ -147,22 +149,48 @@ class VisionModule(torch.nn.Module):
def get_vision_model(torch_model):
import torchvision.models as models
default_image_size = (224, 224)
vision_models_dict = {
"alexnet": models.alexnet(weights="DEFAULT"),
"resnet18": models.resnet18(weights="DEFAULT"),
"resnet50": models.resnet50(weights="DEFAULT"),
"resnet50_fp16": models.resnet50(weights="DEFAULT"),
"resnet101": models.resnet101(weights="DEFAULT"),
"squeezenet1_0": models.squeezenet1_0(weights="DEFAULT"),
"wide_resnet50_2": models.wide_resnet50_2(weights="DEFAULT"),
"mobilenet_v3_small": models.mobilenet_v3_small(weights="DEFAULT"),
"mnasnet1_0": models.mnasnet1_0(weights="DEFAULT"),
"alexnet": (models.alexnet(weights="DEFAULT"), default_image_size),
"resnet18": (models.resnet18(weights="DEFAULT"), default_image_size),
"resnet50": (models.resnet50(weights="DEFAULT"), default_image_size),
"resnet50_fp16": (
models.resnet50(weights="DEFAULT"),
default_image_size,
),
"resnet101": (models.resnet101(weights="DEFAULT"), default_image_size),
"squeezenet1_0": (
models.squeezenet1_0(weights="DEFAULT"),
default_image_size,
),
"wide_resnet50_2": (
models.wide_resnet50_2(weights="DEFAULT"),
default_image_size,
),
"mobilenet_v3_small": (
models.mobilenet_v3_small(weights="DEFAULT"),
default_image_size,
),
"mnasnet1_0": (
models.mnasnet1_0(weights="DEFAULT"),
default_image_size,
),
# EfficientNet input image size varies on the size of the model.
"efficientnet_b0": (
models.efficientnet_b0(weights="DEFAULT"),
(224, 224),
),
"efficientnet_b7": (
models.efficientnet_b7(weights="DEFAULT"),
(600, 600),
),
}
if isinstance(torch_model, str):
fp16_model = None
if "fp16" in torch_model:
fp16_model = True
torch_model = vision_models_dict[torch_model]
torch_model, input_image_size = vision_models_dict[torch_model]
model = VisionModule(torch_model)
test_input = torch.randn(BATCH_SIZE, 3, 224, 224)
actual_out = model(test_input)

View File

@@ -11,7 +11,12 @@ MAX_SEQUENCE_LENGTH = 128
################################## MHLO/TF models #########################################
# TODO : Generate these lists or fetch model source from tank/tf/tf_model_list.csv
keras_models = ["resnet50", "efficientnet-v2-s"]
keras_models = [
"resnet50",
"efficientnet_b0",
"efficientnet_b7",
"efficientnet-v2-s",
]
maskedlm_models = [
"albert-base-v2",
"bert-base-uncased",
@@ -178,7 +183,9 @@ def get_causal_lm_model(hf_name, text="Hello, this is the default text."):
# Static shape, including batch size (1).
# Can be dynamic once dynamic shape support is ready.
RESNET_INPUT_SHAPE = [BATCH_SIZE, 224, 224, 3]
EFFICIENTNET_INPUT_SHAPE = [BATCH_SIZE, 384, 384, 3]
EFFICIENTNET_V2_S_INPUT_SHAPE = [BATCH_SIZE, 384, 384, 3]
EFFICIENTNET_B0_INPUT_SHAPE = [BATCH_SIZE, 224, 224, 3]
EFFICIENTNET_B7_INPUT_SHAPE = [BATCH_SIZE, 600, 600, 3]
class ResNetModule(tf.Module):
@@ -205,25 +212,79 @@ class ResNetModule(tf.Module):
return tf.keras.applications.resnet50.preprocess_input(image)
class EfficientNetModule(tf.Module):
class EfficientNetB0Module(tf.Module):
def __init__(self):
super(EfficientNetModule, self).__init__()
self.m = tf.keras.applications.efficientnet_v2.EfficientNetV2S(
super(EfficientNetB0Module, self).__init__()
self.m = tf.keras.applications.efficientnet.EfficientNetB0(
weights="imagenet",
include_top=True,
input_shape=tuple(EFFICIENTNET_INPUT_SHAPE[1:]),
input_shape=tuple(EFFICIENTNET_B0_INPUT_SHAPE[1:]),
)
self.m.predict = lambda x: self.m.call(x, training=False)
@tf.function(
input_signature=[tf.TensorSpec(EFFICIENTNET_INPUT_SHAPE, tf.float32)],
input_signature=[
tf.TensorSpec(EFFICIENTNET_B0_INPUT_SHAPE, tf.float32)
],
jit_compile=True,
)
def forward(self, inputs):
return self.m.predict(inputs)
def input_shape(self):
return EFFICIENTNET_INPUT_SHAPE
return EFFICIENTNET_B0_INPUT_SHAPE
def preprocess_input(self, image):
return tf.keras.applications.efficientnet.preprocess_input(image)
class EfficientNetB7Module(tf.Module):
def __init__(self):
super(EfficientNetB7Module, self).__init__()
self.m = tf.keras.applications.efficientnet.EfficientNetB7(
weights="imagenet",
include_top=True,
input_shape=tuple(EFFICIENTNET_B7_INPUT_SHAPE[1:]),
)
self.m.predict = lambda x: self.m.call(x, training=False)
@tf.function(
input_signature=[
tf.TensorSpec(EFFICIENTNET_B7_INPUT_SHAPE, tf.float32)
],
jit_compile=True,
)
def forward(self, inputs):
return self.m.predict(inputs)
def input_shape(self):
return EFFICIENTNET_B7_INPUT_SHAPE
def preprocess_input(self, image):
return tf.keras.applications.efficientnet.preprocess_input(image)
class EfficientNetV2SModule(tf.Module):
def __init__(self):
super(EfficientNetV2SModule, self).__init__()
self.m = tf.keras.applications.efficientnet_v2.EfficientNetV2S(
weights="imagenet",
include_top=True,
input_shape=tuple(EFFICIENTNET_V2_S_INPUT_SHAPE[1:]),
)
self.m.predict = lambda x: self.m.call(x, training=False)
@tf.function(
input_signature=[
tf.TensorSpec(EFFICIENTNET_V2_S_INPUT_SHAPE, tf.float32)
],
jit_compile=True,
)
def forward(self, inputs):
return self.m.predict(inputs)
def input_shape(self):
return EFFICIENTNET_V2_S_INPUT_SHAPE
def preprocess_input(self, image):
return tf.keras.applications.efficientnet_v2.preprocess_input(image)
@@ -240,7 +301,11 @@ def load_image(path_to_image, width, height, channels):
def get_keras_model(modelname):
if modelname == "efficientnet-v2-s":
model = EfficientNetModule()
model = EfficientNetV2SModule()
elif modelname == "efficientnet_b0":
model = EfficientNetB0Module()
elif modelname == "efficientnet_b7":
model = EfficientNetB7Module()
else:
model = ResNetModule()

View File

@@ -19,3 +19,5 @@ facebook/convnext-tiny-224,img
google/vit-base-patch16-224,img
efficientnet-v2-s,keras
bert-large-uncased,hf
efficientnet_b0,keras
efficientnet_b7,keras
1 model_name model_type
19 google/vit-base-patch16-224 img
20 efficientnet-v2-s keras
21 bert-large-uncased hf
22 efficientnet_b0 keras
23 efficientnet_b7 keras

View File

@@ -18,4 +18,6 @@ nvidia/mit-b0,True,hf_img_cls,False,3.7M,"image-classification,transformer-encod
mnasnet1_0,False,vision,True,-,"cnn, torchvision, mobile, architecture-search","Outperforms other mobile CNNs on Accuracy vs. Latency"
resnet50_fp16,False,vision,True,23M,"cnn,image-classification,residuals,resnet-variant","Bottlenecks with only conv2d (1x1 conv -> 3x3 conv -> 1x1 conv blocks)"
bert-base-uncased_fp16,True,fp16,False,109M,"nlp;bert-variant;transformer-encoder","12 layers; 768 hidden; 12 attention heads"
bert-large-uncased,True,hf,True,330M,"nlp;bert-variant;transformer-encoder","24 layers, 1024 hidden units, 16 attention heads"
bert-large-uncased,True,hf,True,330M,"nlp;bert-variant;transformer-encoder","24 layers, 1024 hidden units, 16 attention heads"
efficientnet_b0,True,vision,False,5.3M,"image-classification;cnn;conv2d;depthwise-conv","Smallest EfficientNet variant with 224x224 input"
efficientnet_b7,True,vision,False,66M,"image-classification;cnn;conv2d;depthwise-conv","Largest EfficientNet variant with 600x600 input"
1 model_name use_tracing model_type dynamic param_count tags notes
18 mnasnet1_0 False vision True - cnn, torchvision, mobile, architecture-search Outperforms other mobile CNNs on Accuracy vs. Latency
19 resnet50_fp16 False vision True 23M cnn,image-classification,residuals,resnet-variant Bottlenecks with only conv2d (1x1 conv -> 3x3 conv -> 1x1 conv blocks)
20 bert-base-uncased_fp16 True fp16 False 109M nlp;bert-variant;transformer-encoder 12 layers; 768 hidden; 12 attention heads
21 bert-large-uncased True hf True 330M nlp;bert-variant;transformer-encoder 24 layers, 1024 hidden units, 16 attention heads
22 efficientnet_b0 True vision False 5.3M image-classification;cnn;conv2d;depthwise-conv Smallest EfficientNet variant with 224x224 input
23 efficientnet_b7 True vision False 66M image-classification;cnn;conv2d;depthwise-conv Largest EfficientNet variant with 600x600 input