Use correct TF device depending on configuration (#492)

This commit is contained in:
mariecwhite
2022-12-02 09:33:56 -08:00
committed by GitHub
parent 8c158f2452
commit 6393dc5d14
3 changed files with 39 additions and 35 deletions

View File

@@ -125,27 +125,29 @@ class SharkBenchmarkRunner(SharkRunner):
import tensorflow as tf
from tank.model_utils_tf import get_tf_model
model, input, = get_tf_model(
modelname
)[:2]
frontend_model = model
tf_device = "/GPU:0" if self.device == "cuda" else "/CPU:0"
with tf.device(tf_device):
model, input, = get_tf_model(
modelname
)[:2]
frontend_model = model
for i in range(shark_args.num_warmup_iterations):
frontend_model.forward(*input)
for i in range(shark_args.num_warmup_iterations):
frontend_model.forward(*input)
begin = time.time()
for i in range(shark_args.num_iterations):
out = frontend_model.forward(*input)
if i == shark_args.num_iterations - 1:
end = time.time()
break
print(
f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
)
return [
f"{shark_args.num_iterations/(end-begin)}",
f"{((end-begin)/shark_args.num_iterations)*1000}",
]
begin = time.time()
for i in range(shark_args.num_iterations):
out = frontend_model.forward(*input)
if i == shark_args.num_iterations - 1:
end = time.time()
break
print(
f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
)
return [
f"{shark_args.num_iterations/(end-begin)}",
f"{((end-begin)/shark_args.num_iterations)*1000}",
]
def benchmark_c(self):
print(self.benchmark_cl)

View File

@@ -6,16 +6,6 @@ from transformers import (
TFBertModel,
)
visible_default = tf.config.list_physical_devices("GPU")
try:
tf.config.set_visible_devices([], "GPU")
visible_devices = tf.config.get_visible_devices()
for device in visible_devices:
assert device.device_type != "GPU"
except:
# Invalid device or cannot modify virtual devices once initialized.
pass
BATCH_SIZE = 1
MAX_SEQUENCE_LENGTH = 128

View File

@@ -20,6 +20,7 @@ import csv
import tempfile
import os
import shutil
import multiprocessing
def load_csv_and_convert(filename, gen=False):
@@ -241,6 +242,16 @@ class SharkModuleTester:
return expected, logits
def run_test(module_tester, dynamic, device):
tempdir = tempfile.TemporaryDirectory(
prefix=module_tester.tmp_prefix, dir="./shark_tmp/"
)
module_tester.temp_dir = tempdir.name
with ireec.tools.TempFileSaver(tempdir.name):
module_tester.create_and_check_module(dynamic, device)
class SharkModuleTest(unittest.TestCase):
@pytest.fixture(autouse=True)
def configure(self, pytestconfig):
@@ -485,10 +496,11 @@ class SharkModuleTest(unittest.TestCase):
if not os.path.isdir("./shark_tmp/"):
os.mkdir("./shark_tmp/")
tempdir = tempfile.TemporaryDirectory(
prefix=self.module_tester.tmp_prefix, dir="./shark_tmp/"
# We must create a new process each time we benchmark a model to allow
# for Tensorflow to release GPU resources. Using the same process to
# benchmark multiple models leads to OOM.
p = multiprocessing.Process(
target=run_test, args=(self.module_tester, dynamic, device)
)
self.module_tester.temp_dir = tempdir.name
with ireec.tools.TempFileSaver(tempdir.name):
self.module_tester.create_and_check_module(dynamic, device)
p.start()
p.join()