prevents recompiles for cuda benchmarks + update benchmark_module path (#1759)

* xfail resnet50_fp16 * Fix cuda benchmarks and prevent recompilation.
2026-01-10 06:17:55 -05:00 · 2023-08-14 15:30:32 -05:00
parent 4f61d69d86
commit c96571855a
4 changed files with 34 additions and 25 deletions
--- a/shark/iree_utils/benchmark_utils.py
+++ b/shark/iree_utils/benchmark_utils.py
@@ -62,16 +62,12 @@ def build_benchmark_args(
    and whether it is training or not.
    Outputs: string that execute benchmark-module on target model.
    """
-    path = benchmark_module.__path__[0]
+    path = os.path.join(os.environ["VIRTUAL_ENV"], "bin")
    if platform.system() == "Windows":
-        benchmarker_path = os.path.join(
-            path, "..", "..", "iree-benchmark-module.exe"
-        )
+        benchmarker_path = os.path.join(path, "iree-benchmark-module.exe")
        time_extractor = None
    else:
-        benchmarker_path = os.path.join(
-            path, "..", "..", "iree-benchmark-module"
-        )
+        benchmarker_path = os.path.join(path, "iree-benchmark-module")
        time_extractor = "| awk 'END{{print $2 $3}}'"
    benchmark_cl = [benchmarker_path, f"--module={input_file}"]
    # TODO: The function named can be passed as one of the args.
--- a/shark/shark_benchmark_runner.py
+++ b/shark/shark_benchmark_runner.py
@@ -13,7 +13,11 @@
 # limitations under the License.

 from shark.shark_runner import SharkRunner
-from shark.iree_utils.compile_utils import export_iree_module_to_vmfb
+from shark.iree_utils.compile_utils import (
+    export_iree_module_to_vmfb,
+    load_flatbuffer,
+    get_iree_runtime_config,
+)
 from shark.iree_utils.benchmark_utils import (
    build_benchmark_args,
    run_benchmark_module,
@@ -79,22 +83,31 @@ class SharkBenchmarkRunner(SharkRunner):
        self.mlir_dialect = mlir_dialect
        self.extra_args = extra_args
        self.import_args = {}
+        self.temp_file_to_unlink = None
        SharkRunner.__init__(
            self,
            mlir_module,
            device,
            self.mlir_dialect,
            self.extra_args,
-            compile_vmfb=True,
+            compile_vmfb=False,
        )
-        if self.vmfb_file == None:
-            self.vmfb_file = export_iree_module_to_vmfb(
-                mlir_module,
-                device,
-                ".",
-                self.mlir_dialect,
-                extra_args=self.extra_args,
-            )
+        self.vmfb_file = export_iree_module_to_vmfb(
+            mlir_module,
+            device,
+            ".",
+            self.mlir_dialect,
+            extra_args=self.extra_args,
+        )
+        params = load_flatbuffer(
+            self.vmfb_file,
+            device,
+            mmap=True,
+        )
+        self.iree_compilation_module = params["vmfb"]
+        self.iree_config = params["config"]
+        self.temp_file_to_unlink = params["temp_file_to_unlink"]
+        del params

    def setup_cl(self, input_tensors):
        self.benchmark_cl = build_benchmark_args(
--- a/tank/all_models.csv
+++ b/tank/all_models.csv
@@ -30,7 +30,7 @@ nvidia/mit-b0,linalg,torch,1e-2,1e-3,default,None,True,True,True,"https://github
 resnet101,linalg,torch,1e-2,1e-3,default,nhcw-nhwc/img2col,True,False,False,"","macos"
 resnet18,linalg,torch,1e-2,1e-3,default,None,True,True,False,"","macos"
 resnet50,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,False,"","macos"
-resnet50_fp16,linalg,torch,1e-2,1e-2,default,nhcw-nhwc/img2col,True,False,True,"",""
+resnet50_fp16,linalg,torch,1e-2,1e-2,default,nhcw-nhwc/img2col,True,True,True,"Numerics issues, awaiting cuda-independent fp16 integration",""
 squeezenet1_0,linalg,torch,1e-2,1e-3,default,nhcw-nhwc,False,False,False,"","macos"
 wide_resnet50_2,linalg,torch,1e-2,1e-3,default,nhcw-nhwc/img2col,True,False,False,"","macos"
 efficientnet-v2-s,stablehlo,tf,1e-02,1e-3,default,nhcw-nhwc,False,False,False,"","macos"
--- a/tank/test_models.py
+++ b/tank/test_models.py
@@ -145,6 +145,7 @@ class SharkModuleTester:
        shark_args.shark_prefix = self.shark_tank_prefix
        shark_args.local_tank_cache = self.local_tank_cache
        shark_args.dispatch_benchmarks = self.benchmark_dispatches
+        shark_args.enable_tf32 = self.tf32

        if self.benchmark_dispatches is not None:
            _m = self.config["model_name"].split("/")
@@ -216,10 +217,12 @@ class SharkModuleTester:

        result = shark_module(func_name, inputs)
        golden_out, result = self.postprocess_outputs(golden_out, result)
-        if self.tf32 == "true":
-            print("Validating with relaxed tolerances.")
-            atol = 1e-02
-            rtol = 1e-03
+        if self.tf32 == True:
+            print(
+                "Validating with relaxed tolerances for TensorFloat32 calculations."
+            )
+            self.config["atol"] = 1e-01
+            self.config["rtol"] = 1e-02
        try:
            np.testing.assert_allclose(
                golden_out,
@@ -254,9 +257,6 @@ class SharkModuleTester:
        model_config = {
            "batch_size": self.batch_size,
        }
-        shark_args.enable_tf32 = self.tf32
-        if shark_args.enable_tf32 == True:
-            shark_module.compile()

        shark_args.onnx_bench = self.onnx_bench
        shark_module.shark_runner.benchmark_all_csv(