mirror of
https://github.com/nod-ai/AMD-SHARK-Studio.git
synced 2026-04-03 03:00:17 -04:00
Use correct TF device depending on configuration (#492)
This commit is contained in:
@@ -125,27 +125,29 @@ class SharkBenchmarkRunner(SharkRunner):
|
||||
import tensorflow as tf
|
||||
from tank.model_utils_tf import get_tf_model
|
||||
|
||||
model, input, = get_tf_model(
|
||||
modelname
|
||||
)[:2]
|
||||
frontend_model = model
|
||||
tf_device = "/GPU:0" if self.device == "cuda" else "/CPU:0"
|
||||
with tf.device(tf_device):
|
||||
model, input, = get_tf_model(
|
||||
modelname
|
||||
)[:2]
|
||||
frontend_model = model
|
||||
|
||||
for i in range(shark_args.num_warmup_iterations):
|
||||
frontend_model.forward(*input)
|
||||
for i in range(shark_args.num_warmup_iterations):
|
||||
frontend_model.forward(*input)
|
||||
|
||||
begin = time.time()
|
||||
for i in range(shark_args.num_iterations):
|
||||
out = frontend_model.forward(*input)
|
||||
if i == shark_args.num_iterations - 1:
|
||||
end = time.time()
|
||||
break
|
||||
print(
|
||||
f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
return [
|
||||
f"{shark_args.num_iterations/(end-begin)}",
|
||||
f"{((end-begin)/shark_args.num_iterations)*1000}",
|
||||
]
|
||||
begin = time.time()
|
||||
for i in range(shark_args.num_iterations):
|
||||
out = frontend_model.forward(*input)
|
||||
if i == shark_args.num_iterations - 1:
|
||||
end = time.time()
|
||||
break
|
||||
print(
|
||||
f"TF benchmark:{shark_args.num_iterations/(end-begin)} iter/second, Total Iterations:{shark_args.num_iterations}"
|
||||
)
|
||||
return [
|
||||
f"{shark_args.num_iterations/(end-begin)}",
|
||||
f"{((end-begin)/shark_args.num_iterations)*1000}",
|
||||
]
|
||||
|
||||
def benchmark_c(self):
|
||||
print(self.benchmark_cl)
|
||||
|
||||
@@ -6,16 +6,6 @@ from transformers import (
|
||||
TFBertModel,
|
||||
)
|
||||
|
||||
visible_default = tf.config.list_physical_devices("GPU")
|
||||
try:
|
||||
tf.config.set_visible_devices([], "GPU")
|
||||
visible_devices = tf.config.get_visible_devices()
|
||||
for device in visible_devices:
|
||||
assert device.device_type != "GPU"
|
||||
except:
|
||||
# Invalid device or cannot modify virtual devices once initialized.
|
||||
pass
|
||||
|
||||
BATCH_SIZE = 1
|
||||
MAX_SEQUENCE_LENGTH = 128
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ import csv
|
||||
import tempfile
|
||||
import os
|
||||
import shutil
|
||||
import multiprocessing
|
||||
|
||||
|
||||
def load_csv_and_convert(filename, gen=False):
|
||||
@@ -241,6 +242,16 @@ class SharkModuleTester:
|
||||
return expected, logits
|
||||
|
||||
|
||||
def run_test(module_tester, dynamic, device):
|
||||
tempdir = tempfile.TemporaryDirectory(
|
||||
prefix=module_tester.tmp_prefix, dir="./shark_tmp/"
|
||||
)
|
||||
module_tester.temp_dir = tempdir.name
|
||||
|
||||
with ireec.tools.TempFileSaver(tempdir.name):
|
||||
module_tester.create_and_check_module(dynamic, device)
|
||||
|
||||
|
||||
class SharkModuleTest(unittest.TestCase):
|
||||
@pytest.fixture(autouse=True)
|
||||
def configure(self, pytestconfig):
|
||||
@@ -485,10 +496,11 @@ class SharkModuleTest(unittest.TestCase):
|
||||
if not os.path.isdir("./shark_tmp/"):
|
||||
os.mkdir("./shark_tmp/")
|
||||
|
||||
tempdir = tempfile.TemporaryDirectory(
|
||||
prefix=self.module_tester.tmp_prefix, dir="./shark_tmp/"
|
||||
# We must create a new process each time we benchmark a model to allow
|
||||
# for Tensorflow to release GPU resources. Using the same process to
|
||||
# benchmark multiple models leads to OOM.
|
||||
p = multiprocessing.Process(
|
||||
target=run_test, args=(self.module_tester, dynamic, device)
|
||||
)
|
||||
self.module_tester.temp_dir = tempdir.name
|
||||
|
||||
with ireec.tools.TempFileSaver(tempdir.name):
|
||||
self.module_tester.create_and_check_module(dynamic, device)
|
||||
p.start()
|
||||
p.join()
|
||||
|
||||
Reference in New Issue
Block a user