allow pytest to retry getting model artifacts + disable autotuning for pytorch benchmarks (#1257)

* Adds a few xfails to enable macOS builder * Convert string batch sizes to ints where needed. * allow pytest to retry getting model artifacts * Reduce attempts and add assert msg.
2026-04-03 03:00:17 -04:00 · 2023-03-28 23:38:45 -05:00
parent 594c6b8ea2
commit d6f740b998
5 changed files with 44 additions and 32 deletions
--- a/shark/shark_benchmark_runner.py
+++ b/shark/shark_benchmark_runner.py
@@ -132,12 +132,13 @@ class SharkBenchmarkRunner(SharkRunner):
        frontend_model.to(torch_device)
        input.to(torch_device)

-        try:
-            frontend_model = torch.compile(
-                frontend_model, mode="max-autotune", backend="inductor"
-            )
-        except RuntimeError:
-            frontend_model = HFmodel.model
+        # TODO: re-enable as soon as pytorch CUDA context issues are resolved
+        # try:
+        #    frontend_model = torch.compile(
+        #        frontend_model, mode="max-autotune", backend="inductor"
+        #    )
+        # except RuntimeError:
+        #    frontend_model = HFmodel.model

        for i in range(shark_args.num_warmup_iterations):
            frontend_model.forward(input)
--- a/shark/shark_downloader.py
+++ b/shark/shark_downloader.py
@@ -204,17 +204,17 @@ def download_model(
    suffix = f"{dyn_str}_{frontend}{tuned_str}.mlir"
    filename = os.path.join(model_dir, model_name + suffix)

-    try:
-        with open(filename, mode="rb") as f:
-            mlir_file = f.read()
-    except FileNotFoundError:
+    if not os.path.exists(filename):
        from tank.generate_sharktank import gen_shark_files

-        tank_dir = WORKDIR
-        gen_shark_files(model_name, frontend, tank_dir, import_args)
-        with open(filename, mode="rb") as f:
-            mlir_file = f.read()
+        print(
+            "The model data was not found. Trying to generate artifacts locally."
+        )
+        gen_shark_files(model_name, frontend, WORKDIR, import_args)

+    assert os.path.exists(filename), f"MLIR not found at {filename}"
+    with open(filename, mode="rb") as f:
+        mlir_file = f.read()
    function_name = str(np.load(os.path.join(model_dir, "function_name.npy")))
    inputs = np.load(os.path.join(model_dir, "inputs.npz"))
    golden_out = np.load(os.path.join(model_dir, "golden_out.npz"))
--- a/tank/all_models.csv
+++ b/tank/all_models.csv
@@ -22,7 +22,7 @@ bert-large-uncased,linalg,torch,1e-2,1e-3,default,None,False,False,False,"",""
 bert-large-uncased,mhlo,tf,1e-2,1e-3,default,None,False,False,False,"",""
 facebook/deit-small-distilled-patch16-224,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,True,False,"Fails during iree-compile.",""
 google/vit-base-patch16-224,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,True,False,"https://github.com/nod-ai/SHARK/issues/311",""
-microsoft/beit-base-patch16-224-pt22k-ft22k,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,True,False,"https://github.com/nod-ai/SHARK/issues/390",""
+microsoft/beit-base-patch16-224-pt22k-ft22k,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,True,False,"https://github.com/nod-ai/SHARK/issues/390","macos"
 microsoft/MiniLM-L12-H384-uncased,linalg,torch,1e-2,1e-3,default,None,False,False,False,"",""
 google/mobilebert-uncased,linalg,torch,1e-2,1e-3,default,None,False,False,False,"https://github.com/nod-ai/SHARK/issues/344",""
 mobilenet_v3_small,linalg,torch,1e-1,1e-2,default,nhcw-nhwc,False,True,False,"https://github.com/nod-ai/SHARK/issues/388","macos"
@@ -39,7 +39,7 @@ efficientnet_b0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,True,True,False,"https:
 efficientnet_b7,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,True,False,False,"Torchvision imports issue",""
 efficientnet_b0,mhlo,tf,1e-2,1e-3,default,None,nhcw-nhwc,False,False,False,"",""
 efficientnet_b7,mhlo,tf,1e-2,1e-3,default,None,nhcw-nhwc,False,False,False,"",""
-gpt2,mhlo,tf,1e-2,1e-3,default,None,False,False,False,"",""
+gpt2,mhlo,tf,1e-2,1e-3,default,None,True,False,False,"",""
 t5-base,linalg,torch,1e-2,1e-3,default,None,True,True,True,"Inputs for seq2seq models in torch currently unsupported.",""
 t5-base,mhlo,tf,1e-2,1e-3,default,None,False,False,False,"",""
 t5-large,linalg,torch,1e-2,1e-3,default,None,True,True,True,"Inputs for seq2seq models in torch currently unsupported",""
--- a/tank/model_utils.py
+++ b/tank/model_utils.py
@@ -94,7 +94,7 @@ def get_hf_img_cls_model(name, import_args):
    # test_input = torch.FloatTensor(1, 3, 224, 224).uniform_(-1, 1)
    # print("test_input.shape: ", test_input.shape)
    # test_input.shape:  torch.Size([1, 3, 224, 224])
-    test_input = test_input.repeat(import_args["batch_size"], 1, 1, 1)
+    test_input = test_input.repeat(int(import_args["batch_size"]), 1, 1, 1)
    actual_out = model(test_input)
    # print("actual_out.shape： ", actual_out.shape)
    # actual_out.shape：  torch.Size([1, 1000])
@@ -130,7 +130,7 @@ def get_hf_model(name, import_args):
    )

    model = HuggingFaceLanguage(name)
-    test_input = torch.randint(2, (import_args["batch_size"], 128))
+    test_input = torch.randint(2, (int(import_args["batch_size"]), 128))
    actual_out = model(test_input)
    return model, test_input, actual_out

@@ -237,7 +237,9 @@ def get_vision_model(torch_model, import_args):
            fp16_model = True
        torch_model, input_image_size = vision_models_dict[torch_model]
    model = VisionModule(torch_model)
-    test_input = torch.randn(import_args["batch_size"], 3, *input_image_size)
+    test_input = torch.randn(
+        int(import_args["batch_size"]), 3, *input_image_size
+    )
    actual_out = model(test_input)
    if fp16_model is not None:
        test_input_fp16 = test_input.to(
@@ -285,7 +287,7 @@ def get_fp16_model(torch_model, import_args):
    model = BertHalfPrecisionModel(modelname)
    tokenizer = AutoTokenizer.from_pretrained(modelname)
    text = "Replace me by any text you like."
-    text = [text] * import_args["batch_size"]
+    text = [text] * int(import_args["batch_size"])
    test_input_fp16 = tokenizer(
        text,
        truncation=True,
--- a/tank/test_models.py
+++ b/tank/test_models.py
@@ -169,18 +169,27 @@ class SharkModuleTester:
        if "winograd" in self.config["flags"]:
            shark_args.use_winograd = True

-        try:
-            model, func_name, inputs, golden_out = download_model(
-                self.config["model_name"],
-                tank_url=self.tank_url,
-                frontend=self.config["framework"],
-                import_args=import_config,
-            )
-        except NoImportException:
-            pytest.xfail(
-                reason=f"Artifacts for this model/config must be generated locally. Please make sure {self.config['framework']} is installed."
-            )
-
+        dl_gen_attempts = 2
+        for i in range(dl_gen_attempts):
+            try:
+                model, func_name, inputs, golden_out = download_model(
+                    self.config["model_name"],
+                    tank_url=self.tank_url,
+                    frontend=self.config["framework"],
+                    import_args=import_config,
+                )
+            except NoImportException as err:
+                pytest.xfail(
+                    reason=f"Artifacts for this model/config must be generated locally. Please make sure {self.config['framework']} is installed."
+                )
+            except AssertionError as err:
+                if i < dl_gen_attempts - 1:
+                    continue
+                else:
+                    pytest.xfail(
+                        "Generating OTF may require exiting the subprocess for files to be available."
+                    )
+            break
        shark_module = SharkInference(
            model,
            device=device,