Use correct TF device depending on configuration (#492)

2026-04-03 03:00:17 -04:00 · 2022-12-02 09:33:56 -08:00
parent 8c158f2452
commit 6393dc5d14
3 changed files with 39 additions and 35 deletions
--- a/shark/shark_benchmark_runner.py
+++ b/shark/shark_benchmark_runner.py
@@ -125,27 +125,29 @@ class SharkBenchmarkRunner(SharkRunner):
        import tensorflow as tf
        from tank.model_utils_tf import get_tf_model

-        model, input, = get_tf_model(
-            modelname
-        )[:2]
-        frontend_model = model
+        tf_device = "/GPU:0" if self.device == "cuda" else "/CPU:0"
+        with tf.device(tf_device):
+            model, input, = get_tf_model(
+                modelname
+            )[:2]
+            frontend_model = model

-        for i in range(shark_args.num_warmup_iterations):
-            frontend_model.forward(*input)
+            for i in range(shark_args.num_warmup_iterations):
+                frontend_model.forward(*input)

-        begin = time.time()
-        for i in range(shark_args.num_iterations):
-            out = frontend_model.forward(*input)
-            if i == shark_args.num_iterations - 1:
-                end = time.time()
-                break
-        print(
-            f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
-        return [
-            f"{shark_args.num_iterations/(end-begin)}",
-            f"{((end-begin)/shark_args.num_iterations)*1000}",
-        ]
+            begin = time.time()
+            for i in range(shark_args.num_iterations):
+                out = frontend_model.forward(*input)
+                if i == shark_args.num_iterations - 1:
+                    end = time.time()
+                    break
+            print(
+                f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
+            )
+            return [
+                f"{shark_args.num_iterations/(end-begin)}",
+                f"{((end-begin)/shark_args.num_iterations)*1000}",
+            ]

    def benchmark_c(self):
        print(self.benchmark_cl)
--- a/tank/model_utils_tf.py
+++ b/tank/model_utils_tf.py
@@ -6,16 +6,6 @@ from transformers import (
    TFBertModel,
 )

-visible_default = tf.config.list_physical_devices("GPU")
-try:
-    tf.config.set_visible_devices([], "GPU")
-    visible_devices = tf.config.get_visible_devices()
-    for device in visible_devices:
-        assert device.device_type != "GPU"
-except:
-    # Invalid device or cannot modify virtual devices once initialized.
-    pass
-
 BATCH_SIZE = 1
 MAX_SEQUENCE_LENGTH = 128

--- a/tank/test_models.py
+++ b/tank/test_models.py
@@ -20,6 +20,7 @@ import csv
 import tempfile
 import os
 import shutil
+import multiprocessing


 def load_csv_and_convert(filename, gen=False):
@@ -241,6 +242,16 @@ class SharkModuleTester:
        return expected, logits


+def run_test(module_tester, dynamic, device):
+    tempdir = tempfile.TemporaryDirectory(
+        prefix=module_tester.tmp_prefix, dir="./shark_tmp/"
+    )
+    module_tester.temp_dir = tempdir.name
+
+    with ireec.tools.TempFileSaver(tempdir.name):
+        module_tester.create_and_check_module(dynamic, device)
+
+
 class SharkModuleTest(unittest.TestCase):
    @pytest.fixture(autouse=True)
    def configure(self, pytestconfig):
@@ -485,10 +496,11 @@ class SharkModuleTest(unittest.TestCase):
        if not os.path.isdir("./shark_tmp/"):
            os.mkdir("./shark_tmp/")

-        tempdir = tempfile.TemporaryDirectory(
-            prefix=self.module_tester.tmp_prefix, dir="./shark_tmp/"
+        # We must create a new process each time we benchmark a model to allow
+        # for Tensorflow to release GPU resources. Using the same process to
+        # benchmark multiple models leads to OOM.
+        p = multiprocessing.Process(
+            target=run_test, args=(self.module_tester, dynamic, device)
        )
-        self.module_tester.temp_dir = tempdir.name
-
-        with ireec.tools.TempFileSaver(tempdir.name):
-            self.module_tester.create_and_check_module(dynamic, device)
+        p.start()
+        p.join()