Remove "gpu" device alias and migrate to using "cuda" for NVIDIA GPU. (#325)

* Replace instances of "gpu" alias for devices with "cuda"
This commit is contained in:
Ean Garvey
2022-09-13 01:16:56 -05:00
committed by GitHub
parent 7143902a90
commit 6cf5564c84
19 changed files with 34 additions and 36 deletions

View File

@@ -16,7 +16,7 @@ jobs:
fail-fast: true
matrix:
os: [icelake, a100, MacStudio, ubuntu-latest]
suite: [cpu,gpu,vulkan]
suite: [cpu,cuda,vulkan]
python-version: ["3.10"]
include:
- os: ubuntu-latest
@@ -25,11 +25,11 @@ jobs:
- os: ubuntu-latest
suite: vulkan
- os: ubuntu-latest
suite: gpu
suite: cuda
- os: ubuntu-latest
suite: cpu
- os: MacStudio
suite: gpu
suite: cuda
- os: MacStudio
suite: cpu
- os: MacStudio
@@ -37,7 +37,7 @@ jobs:
- os: icelake
suite: vulkan
- os: icelake
suite: gpu
suite: cuda
- os: a100
suite: cpu
@@ -84,7 +84,7 @@ jobs:
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude lit.cfg.py
- name: Validate CPU Models
- name: Validate Models on CPU
if: matrix.suite == 'cpu'
run: |
cd $GITHUB_WORKSPACE
@@ -94,15 +94,15 @@ jobs:
gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv
gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cpu_latest.csv
- name: Validate GPU Models
if: matrix.suite == 'gpu'
- name: Validate Models on NVIDIA GPU
if: matrix.suite == 'cuda'
run: |
cd $GITHUB_WORKSPACE
PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
source shark.venv/bin/activate
pytest --benchmark -k "gpu" --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_gpu_${SHORT_SHA}.csv
gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_gpu_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_gpu_latest.csv
pytest --benchmark -k "cuda" --ignore=shark/tests/test_shark_importer.py --ignore=benchmarks/tests/test_hf_benchmark.py --ignore=benchmarks/tests/test_benchmark.py
gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv
gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cuda_latest.csv
- name: Validate Vulkan Models
if: matrix.suite == 'vulkan'

View File

@@ -39,7 +39,6 @@ def run_cmd(cmd):
IREE_DEVICE_MAP = {
"cpu": "local-task",
"gpu": "cuda",
"cuda": "cuda",
"vulkan": "vulkan",
"metal": "vulkan",
@@ -49,7 +48,6 @@ IREE_DEVICE_MAP = {
IREE_TARGET_MAP = {
"cpu": "llvm-cpu",
"gpu": "cuda",
"cuda": "cuda",
"vulkan": "vulkan",
"metal": "vulkan",
@@ -60,7 +58,7 @@ IREE_TARGET_MAP = {
# Finds whether the required drivers are installed for the given device.
def check_device_drivers(device):
"""Checks necessary drivers present for gpu and vulkan devices"""
if device in ["gpu", "cuda"]:
if device == "cuda":
try:
subprocess.check_output("nvidia-smi")
except Exception:
@@ -87,7 +85,7 @@ def check_device_drivers(device):
# Installation info for the missing device drivers.
def device_driver_info(device):
if device in ["gpu", "cuda"]:
if device == "cuda":
return "nvidia-smi not found, please install the required drivers from https://www.nvidia.in/Download/index.aspx?lang=en-in"
elif device in ["metal", "vulkan"]:
return "vulkaninfo not found, Install from https://vulkan.lunarg.com/sdk/home or your distribution"

View File

@@ -23,7 +23,7 @@ def get_iree_device_args(device):
from shark.iree_utils.cpu_utils import get_iree_cpu_args
return get_iree_cpu_args()
if device in ["gpu", "cuda"]:
if device == "cuda":
from shark.iree_utils.gpu_utils import get_iree_gpu_args
return get_iree_gpu_args()

View File

@@ -38,7 +38,7 @@ parser.add_argument(
"--device",
type=str,
default="cpu",
help="Device on which shark_runner runs. options are cpu, gpu, and vulkan",
help="Device on which shark_runner runs. options are cpu, cuda, and vulkan",
)
parser.add_argument(
"--repro_dir",

View File

@@ -83,12 +83,12 @@ class SharkBenchmarkRunner(SharkRunner):
import torch
from tank.model_utils import get_torch_model
if self.device == "gpu":
if self.device == "cuda":
torch.set_default_tensor_type(torch.cuda.FloatTensor)
else:
torch.set_default_tensor_type(torch.FloatTensor)
torch_device = torch.device(
"cuda:0" if self.device == "gpu" else "cpu"
"cuda:0" if self.device == "cuda" else "cpu"
)
HFmodel, input = get_torch_model(modelname)[:2]
frontend_model = HFmodel.model
@@ -163,7 +163,7 @@ class SharkBenchmarkRunner(SharkRunner):
]
def benchmark_onnx(self, modelname, inputs):
if self.device == "gpu":
if self.device == "cuda":
print(
"Currently GPU benchmarking on ONNX is not supported in SHARK."
)
@@ -186,7 +186,7 @@ https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/tr
for currently supported models. Exiting benchmark ONNX."
)
return ["N/A", "N/A"]
use_gpu = self.device == "gpu"
use_gpu = self.device == "cuda"
num_threads = psutil.cpu_count(logical=False)
batch_sizes = [1]
sequence_lengths = [128]

View File

@@ -70,7 +70,7 @@ class DistilBertModuleTest(unittest.TestCase):
pytest.skip(
reason="Fails to lower in torch-mlir. See https://github.com/nod-ai/SHARK/issues/222"
)
elif device == "gpu":
elif device == "cuda":
if dynamic == False:
pytest.skip(
reason="Fails to lower in torch-mlir. See https://github.com/nod-ai/SHARK/issues/222"

View File

@@ -50,7 +50,7 @@ class ConvNextTinyModuleTest(unittest.TestCase):
@parameterized.expand(param_list, name_func=shark_test_name_func)
def test_module(self, dynamic, device):
if device in ["gpu", "cuda"]:
if device in ["cuda"]:
pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/311")
self.module_tester.create_and_check_module(dynamic, device)

View File

@@ -48,7 +48,7 @@ class DeitModuleTest(unittest.TestCase):
pytest.skip(
reason="Dynamic Test not Supported: mlir file not found"
)
if device in ["gpu", "cuda"]:
if device in ["cuda"]:
pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/311")
self.module_tester.create_and_check_module(dynamic, device)

View File

@@ -40,7 +40,7 @@ class FunnelModuleTest(unittest.TestCase):
@parameterized.expand(param_list, name_func=shark_test_name_func)
def test_module(self, dynamic, device):
if device in ["gpu", "metal", "vulkan"]:
if device in ["cuda", "metal", "vulkan"]:
pytest.xfail(
reason="failing in the iree-compiler passes, see https://github.com/nod-ai/SHARK/issues/201"
)

View File

@@ -47,7 +47,7 @@ class VitBaseModuleTest(unittest.TestCase):
@parameterized.expand(param_list, name_func=shark_test_name_func)
def test_module(self, dynamic, device):
if device in ["gpu", "cuda"]:
if device in ["cuda"]:
pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/311")
self.module_tester.create_and_check_module(dynamic, device)

View File

@@ -49,7 +49,7 @@ class VitBaseModuleTest(unittest.TestCase):
reason="Dynamic tests not supported. mlir file not found."
)
if device in ["gpu", "cuda"]:
if device in ["cuda"]:
pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/311")
self.module_tester.create_and_check_module(dynamic, device)

View File

@@ -62,7 +62,7 @@ class MobileNetV3ModuleTest(unittest.TestCase):
@parameterized.expand(param_list, name_func=shark_test_name_func)
def test_module(self, dynamic, device):
if device == "gpu":
if device == "cuda":
pytest.xfail(reason="golden results don't match.")
elif device in ["vulkan", "metal"]:
if dynamic == False:

View File

@@ -49,7 +49,7 @@ class MitModuleTest(unittest.TestCase):
reason="Dynamic tests not supported. mlir file not found."
)
if device in ["gpu", "cuda"]:
if device in ["cuda"]:
pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/311")
self.module_tester.create_and_check_module(dynamic, device)

View File

@@ -235,7 +235,7 @@ mlir_model = module
func_name = "forward"
shark_module = SharkInference(
mlir_model, func_name, device="gpu", mlir_dialect="linalg"
mlir_model, func_name, device="cuda", mlir_dialect="linalg"
)
shark_module.compile()

View File

@@ -236,7 +236,7 @@ ts_g = torch.jit.script(fx_g)
# func_name = "forward"
#
# shark_module = SharkInference(
# mlir_model, func_name, device="gpu", mlir_dialect="linalg"
# mlir_model, func_name, device="cuda", mlir_dialect="linalg"
# )
# shark_module.compile()

View File

@@ -45,7 +45,7 @@ class RobertaBaseModuleTest(unittest.TestCase):
@parameterized.expand(param_list, name_func=shark_test_name_func)
def test_module(self, dynamic, device):
if device == "gpu":
if device == "cuda":
pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/274")
self.module_tester.create_and_check_module(dynamic, device)

View File

@@ -43,11 +43,11 @@ class TapasBaseModuleTest(unittest.TestCase):
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
check_device_drivers("cuda"), reason=device_driver_info("gpu")
)
def test_module_static_gpu(self):
def test_module_static_cuda(self):
dynamic = False
device = "gpu"
device = "cuda"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(

View File

@@ -43,7 +43,7 @@ class XLMRobertaModuleTest(unittest.TestCase):
@parameterized.expand(param_list, name_func=shark_test_name_func)
def test_module(self, dynamic, device):
if device == "gpu":
if device == "cuda":
pytest.xfail(reason="https://github.com/nod-ai/SHARK/issues/274")
elif device in ["metal", "vulkan"]:
if dynamic == False:

View File

@@ -254,7 +254,7 @@ def vdiff_inf(prompts: str, n, bs, steps):
mlir_model = module
func_name = "forward"
shark_module = SharkInference(
mlir_model, func_name, device="gpu", mlir_dialect="linalg"
mlir_model, func_name, device="cuda", mlir_dialect="linalg"
)
shark_module.compile()
return run_all(