diff --git a/pytest.ini b/pytest.ini
index 157f2c5f..e1ee8b79 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,4 +1,4 @@
 [pytest]
-addopts = --verbose -p no:warnings
+addopts = --workers auto --verbose -p no:warnings
 norecursedirs = inference
 
diff --git a/requirements.txt b/requirements.txt
index dd500bb2..253006d8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,4 +10,5 @@ pybind11
 wheel
 ninja
 pytest
+pytest-parallel
 
diff --git a/shark/examples/test_cpu.py b/shark/examples/test_models.py
similarity index 54%
rename from shark/examples/test_cpu.py
rename to shark/examples/test_models.py
index 4d23ae65..ca5017f3 100644
--- a/shark/examples/test_cpu.py
+++ b/shark/examples/test_models.py
@@ -4,39 +4,47 @@ import torch
 import numpy as np
 import torchvision.models as models
 from transformers import AutoModelForSequenceClassification
+import pytest
 
 torch.manual_seed(0)
 
 ##################### Hugging Face LM Models ###################################
 
+
 class HuggingFaceLanguage(torch.nn.Module):
+
     def __init__(self, hf_model_name):
         super().__init__()
         self.model = AutoModelForSequenceClassification.from_pretrained(
             hf_model_name,  # The pretrained model.
-            num_labels=2,  # The number of output labels--2 for binary classification.
-            output_attentions=False,  # Whether the model returns attentions weights.
-            output_hidden_states=False,  # Whether the model returns all hidden-states.
+            num_labels=
+            2,  # The number of output labels--2 for binary classification.
+            output_attentions=
+            False,  # Whether the model returns attentions weights.
+            output_hidden_states=
+            False,  # Whether the model returns all hidden-states.
             torchscript=True,
         )
 
     def forward(self, tokens):
         return self.model.forward(tokens)[0]
 
+
 def get_hf_model(name):
     model = HuggingFaceLanguage(name)
     # TODO: Currently the test input is set to (1,128)
-    test_input = torch.randint(2, (1,128))
+    test_input = torch.randint(2, (1, 128))
     actual_out = model(test_input)
     return model, test_input, actual_out
 
 
 ################################################################################
 
-
 ##################### Torch Vision Models    ###################################
 
+
 class VisionModule(torch.nn.Module):
+
     def __init__(self, model):
         super().__init__()
         self.model = model
@@ -56,6 +64,7 @@ def get_vision_model(torch_model):
 
 ################################################################################
 
+
 # Utility function for comparing two tensors.
 def compare_tensors(torch_tensor, numpy_tensor):
     # setting the absolute and relative tolerance
@@ -65,63 +74,118 @@ def compare_tensors(torch_tensor, numpy_tensor):
     return np.allclose(torch_to_numpy, numpy_tensor, rtol, atol)
 
 
-def test_bert():
+################################################################################
+
+#############################   Model Tests ####################################
+
+# Please remove the skip flag to run the test with required param.
+pytest_param = pytest.mark.parametrize(('dynamic', 'device'), [
+    pytest.param(False, 'cpu'),
+    pytest.param(
+        True,
+        'cpu',
+        marks=pytest.mark.skip(reason="dynamic-shape is not supported.")),
+    pytest.param(
+        False,
+        'gpu',
+        marks=pytest.mark.skip(reason="device in the CI not supported.")),
+    pytest.param(
+        True,
+        'gpu',
+        marks=pytest.mark.skip(reason="device in the CI not supported.")),
+    pytest.param(
+        False,
+        'vulkan',
+        marks=pytest.mark.skip(reason="device in the CI not supported.")),
+    pytest.param(
+        True,
+        'vulkan',
+        marks=pytest.mark.skip(reason="device in the CI not supported.")),
+])
+
+
+@pytest_param
+def test_bert(dynamic, device):
     model, input, act_out = get_hf_model("bert-base-uncased")
-    shark_module = SharkInference(
-        model, (input,), device="cpu", jit_trace=True
-    )
+    shark_module = SharkInference(model, (input,),
+                                  device=device,
+                                  dynamic=dynamic,
+                                  jit_trace=True)
     results = shark_module.forward((input,))
     assert True == compare_tensors(act_out, results)
 
 
-def test_albert():
+@pytest_param
+def test_albert(dynamic, device):
     model, input, act_out = get_hf_model("albert-base-v2")
-    shark_module = SharkInference(
-        model, (input,), device="cpu", jit_trace=True
-    )
+    shark_module = SharkInference(model, (input,),
+                                  device=device,
+                                  dynamic=dynamic,
+                                  jit_trace=True)
     results = shark_module.forward((input,))
     assert True == compare_tensors(act_out, results)
 
-def test_resnet18():
-    model, input, act_out = get_vision_model(models.resnet18(pretrained = True))
+
+@pytest_param
+def test_resnet18(dynamic, device):
+    model, input, act_out = get_vision_model(models.resnet18(pretrained=True))
     shark_module = SharkInference(
         model,
         (input,),
+        device=device,
+        dynamic=dynamic,
     )
     results = shark_module.forward((input,))
     assert True == compare_tensors(act_out, results)
 
-def test_resnet50():
-    model, input, act_out = get_vision_model(models.resnet50(pretrained = True))
+
+@pytest_param
+def test_resnet50(dynamic, device):
+    model, input, act_out = get_vision_model(models.resnet50(pretrained=True))
     shark_module = SharkInference(
         model,
         (input,),
+        device=device,
+        dynamic=dynamic,
     )
     results = shark_module.forward((input,))
     assert True == compare_tensors(act_out, results)
 
-def test_wide_resnet50():
-    model, input, act_out = get_vision_model(models.wide_resnet50_2(pretrained = True))
+
+@pytest_param
+def test_wide_resnet50(dynamic, device):
+    model, input, act_out = get_vision_model(
+        models.wide_resnet50_2(pretrained=True))
     shark_module = SharkInference(
         model,
         (input,),
+        device=device,
+        dynamic=dynamic,
     )
     results = shark_module.forward((input,))
     assert True == compare_tensors(act_out, results)
 
-def test_minilm():
+
+@pytest_param
+def test_minilm(dynamic, device):
     model, input, act_out = get_hf_model("microsoft/MiniLM-L12-H384-uncased")
-    shark_module = SharkInference(
-        model, (input,), device="cpu", jit_trace=True
-    )
+    shark_module = SharkInference(model, (input,),
+                                  device=device,
+                                  dynamic=dynamic,
+                                  jit_trace=True)
     results = shark_module.forward((input,))
     assert True == compare_tensors(act_out, results)
 
-def test_squeezenet():
-    model, input, act_out = get_vision_model(models.squeezenet1_0(pretrained = True))
+
+@pytest_param
+def test_squeezenet(dynamic, device):
+    model, input, act_out = get_vision_model(
+        models.squeezenet1_0(pretrained=True))
     shark_module = SharkInference(
         model,
         (input,),
+        device=device,
+        dynamic=dynamic,
     )
     results = shark_module.forward((input,))
     assert True == compare_tensors(act_out, results)
diff --git a/shark/shark_runner.py b/shark/shark_runner.py
index ab1ebbb2..6f2e86e2 100644
--- a/shark/shark_runner.py
+++ b/shark/shark_runner.py
@@ -146,7 +146,7 @@ class SharkTrainer:
                                         dynamic, self.device, jit_trace,
                                         from_aot)
 
-    def train(self, num_iters = 1):
+    def train(self, num_iters=1):
         """Returns the updated weights after num_iters"""
         weights = [x.detach().numpy() for x in self.weights]
         inputs = [x.detach().numpy() for x in self.input]