WIP: Intel-GPU integration

This commit is contained in:
Stanley Winata
2022-08-02 14:42:51 -07:00
parent c6b4ad1e26
commit c47218c972
27 changed files with 418 additions and 3 deletions

View File

@@ -0,0 +1,59 @@
from transformers import AutoModelForMaskedLM, AutoTokenizer
import torch
from shark.shark_inference import SharkInference
from shark.shark_importer import SharkImporter
from iree.compiler import compile_str
from iree import runtime as ireert
import os
import numpy as np
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
class AlbertModule(torch.nn.Module):
def __init__(self):
super().__init__()
self.model = AutoModelForMaskedLM.from_pretrained("albert-base-v2")
self.model.eval()
def forward(self, input_ids, attention_mask):
return self.model(input_ids=input_ids, attention_mask=attention_mask).logits
if __name__ == "__main__":
# Prepping Data
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
text = "This [MASK] is very tasty."
encoded_inputs = tokenizer(text, padding='max_length', truncation=True, max_length=MAX_SEQUENCE_LENGTH, return_tensors="pt")
inputs = (encoded_inputs["input_ids"],encoded_inputs["attention_mask"])
mlir_importer = SharkImporter(
AlbertModule(),
inputs,
frontend="torch",
)
minilm_mlir, func_name = mlir_importer.import_mlir(
is_dynamic=False, tracing_required=True
)
shark_module = SharkInference(minilm_mlir, func_name, mlir_dialect="linalg")
shark_module.compile()
token_logits = torch.tensor(shark_module.forward(inputs))
mask_id = torch.where(encoded_inputs["input_ids"] == tokenizer.mask_token_id)[1]
mask_token_logits = token_logits[0, mask_id, :]
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
for token in top_5_tokens:
print(f"'>>> Sample/Warmup output: {text.replace(tokenizer.mask_token, tokenizer.decode(token))}'")
while True:
try:
new_text = input("Give me a sentence with [MASK] to fill: ")
encoded_inputs = tokenizer(new_text, padding='max_length', truncation=True, max_length=MAX_SEQUENCE_LENGTH, return_tensors="pt")
inputs = (encoded_inputs["input_ids"],encoded_inputs["attention_mask"])
token_logits = torch.tensor(shark_module.forward(inputs))
mask_id = torch.where(encoded_inputs["input_ids"] == tokenizer.mask_token_id)[1]
mask_token_logits = token_logits[0, mask_id, :]
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
for token in top_5_tokens:
print(f"'>>> {new_text.replace(tokenizer.mask_token, tokenizer.decode(token))}'")
except KeyboardInterrupt:
print("Exiting program.")
break

View File

@@ -0,0 +1,72 @@
from PIL import Image
import requests
from transformers import TFAutoModelForMaskedLM, AutoTokenizer
import tensorflow as tf
from shark.shark_inference import SharkInference
from shark.shark_importer import SharkImporter
from iree.compiler import tf as tfc
from iree.compiler import compile_str
from iree import runtime as ireert
import os
import numpy as np
import sys
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of inputs
t5_inputs = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
]
class AlbertModule(tf.Module):
def __init__(self):
super(AlbertModule, self).__init__()
self.m = TFAutoModelForMaskedLM.from_pretrained("albert-base-v2")
self.m.predict = lambda x,y: self.m(input_ids=x, attention_mask=y)
@tf.function(input_signature=t5_inputs)
def forward(self, input_ids, attention_mask):
return self.m.predict(input_ids, attention_mask)
if __name__ == "__main__":
# Prepping Data
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
# text = "This is a great [MASK]."
text = "This [MASK] is very tasty."
encoded_inputs = tokenizer(text, padding='max_length', truncation=True, max_length=MAX_SEQUENCE_LENGTH, return_tensors="tf")
inputs = (encoded_inputs["input_ids"],encoded_inputs["attention_mask"])
mlir_importer = SharkImporter(
AlbertModule(),
inputs,
frontend="tf",
)
minilm_mlir, func_name = mlir_importer.import_mlir(
is_dynamic=False, tracing_required=False
)
shark_module = SharkInference(minilm_mlir, func_name, mlir_dialect="mhlo")
shark_module.compile()
output_idx = 0
data_idx = 1
token_logits = shark_module.forward(inputs)[output_idx][data_idx]
mask_id = np.where(tf.squeeze(encoded_inputs["input_ids"]) == tokenizer.mask_token_id)
mask_token_logits = token_logits[0, mask_id, :]
top_5_tokens = np.flip(np.argsort(mask_token_logits)).squeeze()[0:5]
for token in top_5_tokens:
print(f"'>>> Sample/Warmup output: {text.replace(tokenizer.mask_token, tokenizer.decode(token))}'")
while True:
try:
new_text = input("Give me a sentence with [MASK] to fill: ")
encoded_inputs = tokenizer(new_text, padding='max_length', truncation=True, max_length=MAX_SEQUENCE_LENGTH, return_tensors="tf")
inputs = (encoded_inputs["input_ids"],encoded_inputs["attention_mask"])
token_logits = shark_module.forward(inputs)[output_idx][data_idx]
mask_id = np.where(tf.squeeze(encoded_inputs["input_ids"]) == tokenizer.mask_token_id)
mask_token_logits = token_logits[0, mask_id, :]
top_5_tokens = np.flip(np.argsort(mask_token_logits)).squeeze()[0:5]
for token in top_5_tokens:
print(f"'>>> {new_text.replace(tokenizer.mask_token, tokenizer.decode(token))}'")
except KeyboardInterrupt:
print("Exiting program.")
sys.exit()

View File

@@ -0,0 +1,35 @@
from PIL import Image
import requests
from transformers import AutoModelForMaskedLM, AutoTokenizer
import torch
from shark.shark_inference import SharkInference
from shark.shark_importer import SharkImporter
from iree.compiler import tf as tfc
from iree.compiler import compile_str
from iree import runtime as ireert
import os
import numpy as np
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
if __name__ == "__main__":
# Prepping Data
model = AutoModelForMaskedLM.from_pretrained("albert-base-v2")
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
text = "This [MASK] is very tasty."
inputs = tokenizer(text, padding='max_length', truncation=True, max_length=MAX_SEQUENCE_LENGTH, return_tensors="pt")
token_logits = model(**inputs).logits
print(token_logits)
# Find the location of [MASK] and extract its logits
mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
mask_token_logits = token_logits[0, mask_token_index, :]
# print(mask_token_logits)
# Pick the [MASK] candidates with the highest logits
top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
print(np.argsort(mask_token_logits.detach().numpy()))
# print(top_5_tokens)
for token in top_5_tokens:
print(f"'>>> {text.replace(tokenizer.mask_token, tokenizer.decode([token]))}'")

View File

@@ -8,7 +8,7 @@ mlir_model, func_name, inputs, golden_out = download_torch_model(
shark_module = SharkInference(
mlir_model, func_name, device="cpu", mlir_dialect="linalg"
mlir_model, func_name, mlir_dialect="linalg"
)
shark_module.compile()
result = shark_module.forward(inputs)

View File

@@ -0,0 +1,47 @@
from PIL import Image
import requests
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Model
import torch
from shark.shark_inference import SharkInference
from shark.shark_importer import SharkImporter
from iree.compiler import tf as tfc
from iree.compiler import compile_str
from iree import runtime as ireert
import os
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
class T5Module(torch.nn.Module):
def __init__(self):
super().__init__()
self.model = T5ForConditionalGeneration.from_pretrained("t5-small")
self.model.eval()
def forward(self, input_ids):
return self.model.generate(input_ids)
if __name__ == "__main__":
# Prepping Data
tokenizer = T5Tokenizer.from_pretrained("t5-small")
text = "I love the distilled version of models."
task_prefix = "translate English to German: "
encoded_input = tokenizer(task_prefix + text, padding='max_length', truncation=True, max_length=MAX_SEQUENCE_LENGTH, return_tensors="pt").input_ids
inputs = (encoded_input)
mlir_importer = SharkImporter(
T5Module(),
inputs,
frontend="torch",
)
import pdb; pdb.set_trace()
minilm_mlir, func_name = mlir_importer.import_mlir(
is_dynamic=True, tracing_required=True
)
shark_module = SharkInference(minilm_mlir, func_name, mlir_dialect="linalg")
shark_module.compile()
import pdb; pdb.set_trace()
output = shark_module.forward(inputs)
print(tokenizer.batch_decode(output, skip_special_tokens=True))

View File

@@ -0,0 +1,51 @@
from PIL import Image
import requests
from transformers import T5Tokenizer, TFT5Model, TFT5ForConditionalGeneration
import tensorflow as tf
from shark.shark_inference import SharkInference
from shark.shark_importer import SharkImporter
from iree.compiler import tf as tfc
from iree.compiler import compile_str
from iree import runtime as ireert
import os
MAX_SEQUENCE_LENGTH = 512
BATCH_SIZE = 1
# Create a set of inputs
t5_inputs = [
tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32)
]
class T5Module(tf.Module):
def __init__(self):
super(T5Module, self).__init__()
self.m = TFT5ForConditionalGeneration.from_pretrained("t5-small")
self.m.predict = lambda x: self.m.generate(input_ids=x)
@tf.function(input_signature=t5_inputs)
def forward(self, input_ids):
return self.m.predict(input_ids)
if __name__ == "__main__":
# Prepping Data
tokenizer = T5Tokenizer.from_pretrained("t5-small")
text = "I love the distilled version of models."
task_prefix = "translate English to German: "
encoded_input = tokenizer(task_prefix + text, padding='max_length', truncation=True, max_length=MAX_SEQUENCE_LENGTH, return_tensors="tf").input_ids
inputs = (encoded_input)
mlir_importer = SharkImporter(
T5Module(),
inputs,
frontend="tf",
)
minilm_mlir, func_name = mlir_importer.import_mlir(
is_dynamic=False, tracing_required=False
)
shark_module = SharkInference(minilm_mlir, func_name, mlir_dialect="mhlo")
shark_module.compile()
import pdb; pdb.set_trace()
output = shark_module.forward(inputs)
print(tokenizer.batch_decode(output, skip_special_tokens=True))

View File

@@ -5,7 +5,7 @@ from shark.shark_downloader import download_torch_model
mlir_model, func_name, inputs, golden_out = download_torch_model("v_diffusion")
shark_module = SharkInference(
mlir_model, func_name, device="vulkan", mlir_dialect="linalg"
mlir_model, func_name, mlir_dialect="linalg"
)
shark_module.compile()
result = shark_module.forward(inputs)

View File

@@ -44,6 +44,7 @@ IREE_DEVICE_MAP = {
"vulkan": "vulkan",
"metal": "vulkan",
"rocm": "rocm",
"intel-gpu" : "level_zero",
}
IREE_TARGET_MAP = {
@@ -53,6 +54,7 @@ IREE_TARGET_MAP = {
"vulkan": "vulkan",
"metal": "vulkan",
"rocm": "rocm",
"intel-gpu" : "opencl-spirv",
}
# Finds whether the required drivers are installed for the given device.
@@ -68,6 +70,9 @@ def check_device_drivers(device):
subprocess.check_output("vulkaninfo")
except Exception:
return True
elif device in ["intel-gpu"]:
# TODO: Add intel gpu check.
return False
elif device == "cpu":
return False
# Unknown device.

View File

@@ -64,7 +64,7 @@ def compile_module_to_flatbuffer(
input_type = ""
args = get_iree_frontend_args(frontend)
args += get_iree_device_args(device)
args += get_iree_common_args()
# args += get_iree_common_args()
if frontend in ["tensorflow", "tf"]:
input_type = "mhlo"

View File

@@ -39,6 +39,14 @@ class DistilBertModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)

View File

@@ -39,6 +39,14 @@ class ElectraModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)

View File

@@ -48,6 +48,14 @@ class ConvNextTinyModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)

View File

@@ -39,6 +39,14 @@ class FunnelModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.xfail(
reason="failing in the iree-compiler passes, see https://github.com/nod-ai/SHARK/issues/201"
)

View File

@@ -45,6 +45,14 @@ class VitBaseModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)

View File

@@ -67,6 +67,14 @@ class MiniLMModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)

View File

@@ -67,6 +67,14 @@ class AlbertModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)

View File

@@ -67,6 +67,14 @@ class AlexnetModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)

View File

@@ -72,6 +72,14 @@ class BertBaseUncasedModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)

View File

@@ -76,6 +76,14 @@ class DistilBertModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skip(
reason="Fails to lower in torch-mlir. See https://github.com/nod-ai/SHARK/issues/222"
)

View File

@@ -68,6 +68,14 @@ class MobileBertModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)

View File

@@ -69,6 +69,14 @@ class Resnet101ModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)

View File

@@ -68,6 +68,14 @@ class Resnet18ModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)

View File

@@ -69,6 +69,14 @@ class Resnet50ModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)

View File

@@ -69,6 +69,14 @@ class SqueezenetModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)

View File

@@ -69,6 +69,14 @@ class WideResnet50ModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)

View File

@@ -42,6 +42,15 @@ class RobertaBaseModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.xfail(reason="expected numerical inconsistency for roberta on OCL-SPIRV/IntelGPU")
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)

View File

@@ -39,6 +39,15 @@ class XLMRobertaModuleTest(unittest.TestCase):
device = "cpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.xfail(reason="expected numerical inconsistency for roberta on OCL-SPIRV/IntelGPU")
@pytest.mark.skipif(
check_device_drivers("intel-gpu"), reason=device_driver_info("intel-gpu")
)
def test_module_static_intelgpu(self):
dynamic = False
device = "intel-gpu"
self.module_tester.create_and_check_module(dynamic, device)
@pytest.mark.skipif(
check_device_drivers("gpu"), reason=device_driver_info("gpu")
)