mirror of
https://github.com/nod-ai/SHARK-Studio.git
synced 2026-01-09 13:57:54 -05:00
349 lines
12 KiB
Python
349 lines
12 KiB
Python
from amdshark.amdshark_inference import AMDSharkInference
|
||
|
||
import torch
|
||
import numpy as np
|
||
import sys
|
||
|
||
torch.manual_seed(0)
|
||
|
||
BATCH_SIZE = 1
|
||
|
||
vision_models = [
|
||
"alexnet",
|
||
"resnet101",
|
||
"resnet18",
|
||
"resnet50",
|
||
"resnet50_fp16",
|
||
"squeezenet1_0",
|
||
"wide_resnet50_2",
|
||
"mobilenet_v3_small",
|
||
"mnasnet1_0",
|
||
"efficientnet_b0",
|
||
"efficientnet_b7",
|
||
]
|
||
hf_img_cls_models = [
|
||
"google/vit-base-patch16-224",
|
||
"microsoft/resnet-50",
|
||
"facebook/deit-small-distilled-patch16-224",
|
||
"microsoft/beit-base-patch16-224-pt22k-ft22k",
|
||
"nvidia/mit-b0",
|
||
]
|
||
hf_seq2seq_models = [
|
||
"t5-base",
|
||
"t5-large",
|
||
]
|
||
|
||
|
||
def get_torch_model(modelname, import_args):
|
||
if modelname in vision_models:
|
||
return get_vision_model(modelname, import_args)
|
||
elif modelname in hf_img_cls_models:
|
||
return get_hf_img_cls_model(modelname, import_args)
|
||
elif modelname in hf_seq2seq_models:
|
||
return get_hf_seq2seq_model(modelname, import_args)
|
||
elif "fp16" in modelname:
|
||
return get_fp16_model(modelname, import_args)
|
||
else:
|
||
return get_hf_model(modelname, import_args)
|
||
|
||
|
||
##################### Hugging Face Image Classification Models ###################################
|
||
from transformers import AutoModelForImageClassification
|
||
from transformers import AutoFeatureExtractor
|
||
from PIL import Image
|
||
import requests
|
||
|
||
|
||
def preprocess_input_image(model_name):
|
||
# from datasets import load_dataset
|
||
# dataset = load_dataset("huggingface/cats-image")
|
||
# image1 = dataset["test"]["image"][0]
|
||
# # print("image1: ", image1) # <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x480 at 0x7FA0B86BB6D0>
|
||
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
||
# <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=640x480 at 0x7FA0B86BB6D0>
|
||
image = Image.open(requests.get(url, stream=True).raw)
|
||
# feature_extractor = img_models_fe_dict[model_name].from_pretrained(
|
||
# model_name
|
||
# )
|
||
feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
|
||
inputs = feature_extractor(images=image, return_tensors="pt")
|
||
# inputs = {'pixel_values': tensor([[[[ 0.1137..., -0.2000, -0.4275, -0.5294]]]])}
|
||
# torch.Size([1, 3, 224, 224]), torch.FloatTensor
|
||
|
||
return inputs[str(*inputs)]
|
||
|
||
|
||
class HuggingFaceImageClassification(torch.nn.Module):
|
||
def __init__(self, hf_model_name):
|
||
super().__init__()
|
||
self.model = AutoModelForImageClassification.from_pretrained(
|
||
hf_model_name, # The pretrained model.
|
||
output_attentions=False, # Whether the model returns attentions weights.
|
||
return_dict=False, # https://github.com/huggingface/transformers/issues/9095
|
||
torchscript=True,
|
||
)
|
||
|
||
def forward(self, inputs):
|
||
return self.model.forward(inputs)[0]
|
||
|
||
|
||
def get_hf_img_cls_model(name, import_args):
|
||
model = HuggingFaceImageClassification(name)
|
||
# you can use preprocess_input_image to get the test_input or just random value.
|
||
test_input = preprocess_input_image(name)
|
||
# test_input = torch.FloatTensor(1, 3, 224, 224).uniform_(-1, 1)
|
||
# print("test_input.shape: ", test_input.shape)
|
||
# test_input.shape: torch.Size([1, 3, 224, 224])
|
||
test_input = test_input.repeat(int(import_args["batch_size"]), 1, 1, 1)
|
||
actual_out = model(test_input)
|
||
# print("actual_out.shape: ", actual_out.shape)
|
||
# actual_out.shape: torch.Size([1, 1000])
|
||
return model, test_input, actual_out
|
||
|
||
|
||
##################### Hugging Face LM Models ###################################
|
||
|
||
|
||
class HuggingFaceLanguage(torch.nn.Module):
|
||
def __init__(self, hf_model_name):
|
||
super().__init__()
|
||
from transformers import AutoModelForSequenceClassification
|
||
import transformers as trf
|
||
|
||
transformers_path = trf.__path__[0]
|
||
hf_model_path = f"{transformers_path}/models/{hf_model_name}"
|
||
self.model = AutoModelForSequenceClassification.from_pretrained(
|
||
hf_model_name, # The pretrained model.
|
||
num_labels=2, # The number of output labels--2 for binary classification.
|
||
output_attentions=False, # Whether the model returns attentions weights.
|
||
output_hidden_states=False, # Whether the model returns all hidden-states.
|
||
torchscript=True,
|
||
)
|
||
|
||
def forward(self, tokens):
|
||
return self.model.forward(tokens)[0]
|
||
|
||
|
||
def get_hf_model(name, import_args):
|
||
from transformers import (
|
||
BertTokenizer,
|
||
)
|
||
|
||
model = HuggingFaceLanguage(name)
|
||
test_input = torch.randint(2, (int(import_args["batch_size"]), 128))
|
||
actual_out = model(test_input)
|
||
return model, test_input, actual_out
|
||
|
||
|
||
##################### Hugging Face Seq2SeqLM Models ###################################
|
||
|
||
# We use a maximum sequence length of 512 since this is the default used in the T5 config.
|
||
T5_MAX_SEQUENCE_LENGTH = 512
|
||
|
||
|
||
class HFSeq2SeqLanguageModel(torch.nn.Module):
|
||
def __init__(self, model_name):
|
||
super().__init__()
|
||
from transformers import AutoTokenizer, T5Model
|
||
|
||
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||
self.tokenization_kwargs = {
|
||
"pad_to_multiple_of": T5_MAX_SEQUENCE_LENGTH,
|
||
"padding": True,
|
||
"return_tensors": "pt",
|
||
}
|
||
self.model = T5Model.from_pretrained(model_name, return_dict=True)
|
||
|
||
def preprocess_input(self, text):
|
||
return self.tokenizer(text, **self.tokenization_kwargs)
|
||
|
||
def forward(self, input_ids, decoder_input_ids):
|
||
return self.model.forward(
|
||
input_ids, decoder_input_ids=decoder_input_ids
|
||
)[0]
|
||
|
||
|
||
def get_hf_seq2seq_model(name, import_args):
|
||
m = HFSeq2SeqLanguageModel(name)
|
||
encoded_input_ids = m.preprocess_input(
|
||
"Studies have been shown that owning a dog is good for you"
|
||
).input_ids
|
||
decoder_input_ids = m.preprocess_input("Studies show that").input_ids
|
||
decoder_input_ids = m.model._shift_right(decoder_input_ids)
|
||
|
||
test_input = (encoded_input_ids, decoder_input_ids)
|
||
actual_out = m.forward(*test_input)
|
||
return m, test_input, actual_out
|
||
|
||
|
||
##################### Hugging Face CausalLM Models ###################################
|
||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||
|
||
|
||
def prepare_sentence_tokens(hf_model: str, sentence: str):
|
||
tokenizer = AutoTokenizer.from_pretrained(hf_model)
|
||
return torch.tensor([tokenizer.encode(sentence)])
|
||
|
||
|
||
class HFCausalLM(torch.nn.Module):
|
||
def __init__(self, model_name: str):
|
||
super().__init__()
|
||
self.model = AutoModelForCausalLM.from_pretrained(
|
||
model_name, # The pretrained model name.
|
||
# The number of output labels--2 for binary classification.
|
||
num_labels=2,
|
||
# Whether the model returns attentions weights.
|
||
output_attentions=False,
|
||
# Whether the model returns all hidden-states.
|
||
output_hidden_states=False,
|
||
torchscript=True,
|
||
)
|
||
self.model.eval()
|
||
|
||
def forward(self, tokens):
|
||
return self.model.forward(tokens)[0]
|
||
|
||
|
||
def get_hf_causallm_model(name, import_args):
|
||
m = HFCausalLM(name)
|
||
test_input = prepare_sentence_tokens(
|
||
name, "this project is very interesting"
|
||
)
|
||
actual_out = m.forward(*test_input)
|
||
return m, test_input, actual_out
|
||
|
||
|
||
################################################################################
|
||
|
||
##################### Torch Vision Models ###################################
|
||
|
||
|
||
class VisionModule(torch.nn.Module):
|
||
def __init__(self, model):
|
||
super().__init__()
|
||
self.model = model
|
||
self.train(False)
|
||
|
||
def forward(self, input):
|
||
return self.model.forward(input)
|
||
|
||
|
||
def get_vision_model(torch_model, import_args):
|
||
import torchvision.models as models
|
||
|
||
default_image_size = (224, 224)
|
||
modelname = torch_model
|
||
if modelname == "alexnet":
|
||
torch_model = models.alexnet(weights="DEFAULT")
|
||
input_image_size = default_image_size
|
||
if modelname == "resnet18":
|
||
torch_model = models.resnet18(weights="DEFAULT")
|
||
input_image_size = default_image_size
|
||
if modelname == "resnet50":
|
||
torch_model = models.resnet50(weights="DEFAULT")
|
||
input_image_size = default_image_size
|
||
if modelname == "resnet50_fp16":
|
||
torch_model = models.resnet50(weights="DEFAULT")
|
||
input_image_size = default_image_size
|
||
if modelname == "resnet50_fp16":
|
||
torch_model = models.resnet50(weights="DEFAULT")
|
||
input_image_size = default_image_size
|
||
if modelname == "resnet101":
|
||
torch_model = models.resnet101(weights="DEFAULT")
|
||
input_image_size = default_image_size
|
||
if modelname == "squeezenet1_0":
|
||
torch_model = models.squeezenet1_0(weights="DEFAULT")
|
||
input_image_size = default_image_size
|
||
if modelname == "wide_resnet50_2":
|
||
torch_model = models.wide_resnet50_2(weights="DEFAULT")
|
||
input_image_size = default_image_size
|
||
if modelname == "mobilenet_v3_small":
|
||
torch_model = models.mobilenet_v3_small(weights="DEFAULT")
|
||
input_image_size = default_image_size
|
||
if modelname == "mnasnet1_0":
|
||
torch_model = models.mnasnet1_0(weights="DEFAULT")
|
||
input_image_size = default_image_size
|
||
if modelname == "efficientnet_b0":
|
||
torch_model = models.efficientnet_b0(weights="DEFAULT")
|
||
input_image_size = (224, 224)
|
||
if modelname == "efficientnet_b7":
|
||
torch_model = models.efficientnet_b7(weights="DEFAULT")
|
||
input_image_size = (600, 600)
|
||
|
||
fp16_model = False
|
||
if "fp16" in modelname:
|
||
fp16_model = True
|
||
model = VisionModule(torch_model)
|
||
test_input = torch.randn(
|
||
int(import_args["batch_size"]), 3, *input_image_size
|
||
)
|
||
actual_out = model(test_input)
|
||
if fp16_model == True:
|
||
test_input_fp16 = test_input.to(
|
||
device=torch.device("cuda"), dtype=torch.half
|
||
)
|
||
model_fp16 = model.half()
|
||
model_fp16.eval()
|
||
model_fp16.to("cuda")
|
||
actual_out_fp16 = model_fp16(test_input_fp16)
|
||
model, test_input, actual_out = (
|
||
model_fp16,
|
||
test_input_fp16,
|
||
actual_out_fp16,
|
||
)
|
||
return model, test_input, actual_out
|
||
|
||
|
||
################################################################################
|
||
|
||
####################### Other PyTorch HF Models ###############################
|
||
|
||
|
||
class BertHalfPrecisionModel(torch.nn.Module):
|
||
def __init__(self, hf_model_name):
|
||
super().__init__()
|
||
from transformers import AutoModelForMaskedLM
|
||
|
||
self.model = AutoModelForMaskedLM.from_pretrained(
|
||
hf_model_name, # The pretrained model.
|
||
num_labels=2, # The number of output labels--2 for binary classification.
|
||
output_attentions=False, # Whether the model returns attentions weights.
|
||
output_hidden_states=False, # Whether the model returns all hidden-states.
|
||
torchscript=True,
|
||
torch_dtype=torch.float16,
|
||
).to("cuda")
|
||
|
||
def forward(self, tokens):
|
||
return self.model.forward(tokens)[0]
|
||
|
||
|
||
def get_fp16_model(torch_model, import_args):
|
||
from transformers import AutoTokenizer
|
||
|
||
modelname = torch_model.replace("_fp16", "")
|
||
model = BertHalfPrecisionModel(modelname)
|
||
tokenizer = AutoTokenizer.from_pretrained(modelname)
|
||
text = "Replace me by any text you like."
|
||
text = [text] * int(import_args["batch_size"])
|
||
test_input_fp16 = tokenizer(
|
||
text,
|
||
truncation=True,
|
||
max_length=128,
|
||
return_tensors="pt",
|
||
).input_ids.to("cuda")
|
||
# test_input = torch.randint(2, (1, 128))
|
||
# test_input_fp16 = test_input.to(
|
||
# device=torch.device("cuda")
|
||
# )
|
||
model_fp16 = model.half()
|
||
model_fp16.eval()
|
||
with torch.no_grad():
|
||
actual_out_fp16 = model_fp16(test_input_fp16)
|
||
return model_fp16, test_input_fp16, actual_out_fp16
|
||
|
||
|
||
# Utility function for comparing two tensors (torch).
|
||
def compare_tensors(torch_tensor, numpy_tensor, rtol=1e-02, atol=1e-03):
|
||
# torch_to_numpy = torch_tensor.detach().numpy()
|
||
return np.allclose(torch_tensor, numpy_tensor, rtol, atol)
|