update xfails for PyTorch DistilBERT (#355)

This commit is contained in:
Ean Garvey
2022-09-24 14:53:20 -05:00
committed by GitHub
parent 6438bce023
commit de23e5d9d7
2 changed files with 5 additions and 1 deletions

View File

@@ -3,7 +3,6 @@ microsoft/MiniLM-L12-H384-uncased,True,hf,True,66M,"nlp;bert-variant;transformer
albert-base-v2,True,hf,True,11M,"nlp;bert-variant;transformer-encoder","12 layers; 128 embedding dim; 768 hidden dim; 12 attention heads; Smaller than BERTbase (11M params vs 109M params); Uses weight sharing to reduce # params but computational cost is similar to BERT."
bert-base-uncased,True,hf,True,109M,"nlp;bert-variant;transformer-encoder","12 layers; 768 hidden; 12 attention heads"
bert-base-cased,True,hf,True,109M,"nlp;bert-variant;transformer-encoder","12 layers; 768 hidden; 12 attention heads"
distilbert-base-uncased,True,hf,True,66M,"nlp;bert-variant;transformer-encoder","Smaller and faster than BERT with 97percent retained accuracy."
google/mobilebert-uncased,True,hf,True,25M,"nlp,bert-variant,transformer-encoder,mobile","24 layers, 512 hidden size, 128 embedding"
alexnet,False,vision,True,61M,"cnn,parallel-layers","The CNN that revolutionized computer vision (move away from hand-crafted features to neural networks),10 years old now and probably no longer used in prod."
resnet18,False,vision,True,11M,"cnn,image-classification,residuals,resnet-variant","1 7x7 conv2d and the rest are 3x3 conv2d"
1 model_name use_tracing model_type dynamic param_count tags notes
3 albert-base-v2 True hf True 11M nlp;bert-variant;transformer-encoder 12 layers; 128 embedding dim; 768 hidden dim; 12 attention heads; Smaller than BERTbase (11M params vs 109M params); Uses weight sharing to reduce # params but computational cost is similar to BERT.
4 bert-base-uncased True hf True 109M nlp;bert-variant;transformer-encoder 12 layers; 768 hidden; 12 attention heads
5 bert-base-cased True hf True 109M nlp;bert-variant;transformer-encoder 12 layers; 768 hidden; 12 attention heads
distilbert-base-uncased True hf True 66M nlp;bert-variant;transformer-encoder Smaller and faster than BERT with 97percent retained accuracy.
6 google/mobilebert-uncased True hf True 25M nlp,bert-variant,transformer-encoder,mobile 24 layers, 512 hidden size, 128 embedding
7 alexnet False vision True 61M cnn,parallel-layers The CNN that revolutionized computer vision (move away from hand-crafted features to neural networks),10 years old now and probably no longer used in prod.
8 resnet18 False vision True 11M cnn,image-classification,residuals,resnet-variant 1 7x7 conv2d and the rest are 3x3 conv2d

View File

@@ -259,6 +259,11 @@ class SharkModuleTest(unittest.TestCase):
self.module_tester.tf32 = self.pytestconfig.getoption("tf32")
self.module_tester.ci = self.pytestconfig.getoption("ci")
self.module_tester.ci_sha = self.pytestconfig.getoption("ci_sha")
if (
config["model_name"] == "distilbert-base-uncased"
and config["framework"] == "torch"
):
pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/354")
if (
config["model_name"] == "facebook/convnext-tiny-224"
and device == "cuda"