yapf format python files.

Please use `yapf -i --style .style.yapf shark/*.py` to format python
files.
This commit is contained in:
Prashant Kumar
2022-06-06 08:51:02 +00:00
parent 49c932d742
commit 9d2ce33bd1
6 changed files with 144 additions and 117 deletions

View File

@@ -15,7 +15,6 @@
import sys
import ctypes
#Some constants taken from cuda.h
CUDA_SUCCESS = 0
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16
@@ -23,6 +22,7 @@ CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36
def get_cuda_sm_cc():
libnames = ('libcuda.so', 'libcuda.dylib', 'cuda.dll')
for libname in libnames:
@@ -48,24 +48,31 @@ def get_cuda_sm_cc():
result = cuda.cuInit(0)
if result != CUDA_SUCCESS:
cuda.cuGetErrorString(result, ctypes.byref(error_str))
print("cuInit failed with error code %d: %s" % (result, error_str.value.decode()))
print("cuInit failed with error code %d: %s" %
(result, error_str.value.decode()))
return 1
result = cuda.cuDeviceGetCount(ctypes.byref(nGpus))
if result != CUDA_SUCCESS:
cuda.cuGetErrorString(result, ctypes.byref(error_str))
print("cuDeviceGetCount failed with error code %d: %s" % (result, error_str.value.decode()))
print("cuDeviceGetCount failed with error code %d: %s" %
(result, error_str.value.decode()))
return 1
print("Found %d device(s)." % nGpus.value)
for i in range(nGpus.value):
result = cuda.cuDeviceGet(ctypes.byref(device),i)
result = cuda.cuDeviceGet(ctypes.byref(device), i)
if result != CUDA_SUCCESS:
cuda.cuGetErrorString(result, ctypes.byref(error_str))
print("cuDeviceGet failed with error code %d: %s" % (result, error_str.value.decode()))
print("cuDeviceGet failed with error code %d: %s" %
(result, error_str.value.decode()))
return 1
print("Device: %d" % i)
if cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device) == CUDA_SUCCESS:
if cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name),
device) == CUDA_SUCCESS:
print(" Name: %s" % (name.split(b'\0', 1)[0].decode()))
if cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) == CUDA_SUCCESS:
print(" Compute Capability: %d.%d" % (cc_major.value, cc_minor.value))
if cuda.cuDeviceComputeCapability(ctypes.byref(cc_major),
ctypes.byref(cc_minor),
device) == CUDA_SUCCESS:
print(" Compute Capability: %d.%d" %
(cc_major.value, cc_minor.value))
sm = f"sm_{cc_major.value}{cc_minor.value}"
return sm

View File

@@ -100,7 +100,9 @@ def get_vulkan_triple_flag():
print("Found Nvidia Device. Using ampere-rtx3080-linux")
return "-iree-vulkan-target-triple=ampere-rtx3080-linux"
else:
print("Optimized kernel for your target device is not added yet. Contact SHARK Admin on discord or pull up an issue.")
print(
"Optimized kernel for your target device is not added yet. Contact SHARK Admin on discord or pull up an issue."
)
return None
@@ -111,6 +113,7 @@ def get_iree_vulkan_args():
vulkan_flag.append(vulkan_triple_flag)
return vulkan_flag
def get_iree_device_args(device):
if device == "cpu":
return get_iree_cpu_args()
@@ -135,7 +138,8 @@ def get_iree_frontend_args(frontend):
return []
def compile_module_to_flatbuffer(module, device, frontend, func_name, model_config_path):
def compile_module_to_flatbuffer(module, device, frontend, func_name,
model_config_path):
# Setup Compile arguments wrt to frontends.
input_type = ""
args = get_iree_frontend_args(frontend)
@@ -177,6 +181,7 @@ def compile_module_to_flatbuffer(module, device, frontend, func_name, model_conf
extra_args=args)
return flatbuffer_blob
def get_iree_module(flatbuffer_blob, device, func_name):
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
config = ireert.Config(IREE_DEVICE_MAP[device])
@@ -192,17 +197,19 @@ def get_iree_compiled_module(module,
func_name: str = "forward",
model_config_path: str = None):
"""Given a module returns the compiled .vmfb and configs"""
flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend, func_name, model_config_path)
flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend,
func_name, model_config_path)
return get_iree_module(flatbuffer_blob, device, func_name)
def export_iree_module_to_vmfb(module,
device: str,
directory: str,
frontend: str = "torch",
func_name: str = "forward",
model_config_path: str = None):
flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend, func_name, model_config_path)
device: str,
directory: str,
frontend: str = "torch",
func_name: str = "forward",
model_config_path: str = None):
flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend,
func_name, model_config_path)
module_name = f"{frontend}_{func_name}_{device}"
filename = os.path.join(directory, module_name + ".vmfb")
print(f"Saved vmfb in {filename}.")
@@ -210,6 +217,7 @@ def export_iree_module_to_vmfb(module,
f.write(flatbuffer_blob)
return filename
def export_module_to_mlir_file(module, frontend, directory: str):
mlir_str = module
if frontend in ["tensorflow", "tf", "mhlo"]:
@@ -222,6 +230,7 @@ def export_module_to_mlir_file(module, frontend, directory: str):
print(f"Saved mlir in {filename}.")
return filename
def get_results(compiled_vm, input, config, frontend="torch"):
"""Runs a .vmfb file given inputs and config and returns output."""
device_inputs = input
@@ -231,7 +240,10 @@ def get_results(compiled_vm, input, config, frontend="torch"):
device_inputs = []
for a in input:
if (isinstance(a, list)):
device_inputs.append([ireert.asdevicearray(config.device, val, dtype=np.int32) for val in a])
device_inputs.append([
ireert.asdevicearray(config.device, val, dtype=np.int32)
for val in a
])
else:
device_inputs.append(ireert.asdevicearray(config.device, a))
result = compiled_vm(*device_inputs)
@@ -257,15 +269,16 @@ def tensor_to_type_str(input_tensors: tuple, frontend: str):
Output: list of string that represent mlir types (i.e 1x24xf64)
# TODO: Support more than floats, and ints
"""
print("front:",frontend)
print("front:", frontend)
list_of_type = []
for input_tensor in input_tensors:
type_string = "x".join([str(dim) for dim in input_tensor.shape])
if frontend in ["torch", "pytorch"]:
dtype_string = str(input_tensor.dtype).replace("torch.", "")
elif frontend in ["tensorflow","tf"]:
elif frontend in ["tensorflow", "tf"]:
dtype = input_tensor.dtype
dtype_string = re.findall('\'[^"]*\'',str(dtype))[0].replace("\'","")
dtype_string = re.findall('\'[^"]*\'',
str(dtype))[0].replace("\'", "")
regex_split = re.compile("([a-zA-Z]+)([0-9]+)")
match = regex_split.match(dtype_string)
mlir_type_string = str(match.group(1)[0]) + str(match.group(2))

View File

@@ -20,122 +20,124 @@ from typing import List, Dict
from iree.compiler import ir
from iree.compiler.transforms import ireec as ireec_trans
MATMUL_OP_NAMES = set([
"linalg.matmul",
"linalg.batch_matmul",
"mhlo.dot",
"mhlo.dot_general"
])
MATMUL_OP_NAMES = set(
["linalg.matmul", "linalg.batch_matmul", "mhlo.dot", "mhlo.dot_general"])
idx = 0
def model_annotation(ctx: ir.Context, *, input_contents: str, config_path: str):
if os.path.isfile(input_contents):
with open(input_contents, "rb") as f:
input_contents = f.read()
if os.path.isfile(input_contents):
with open(input_contents, "rb") as f:
input_contents = f.read()
module = ir.Module.parse(input_contents)
module = ir.Module.parse(input_contents)
with open(config_path, "r") as f:
data = json.load(f)
configs = data["options"]
with open(config_path, "r") as f:
data = json.load(f)
configs = data["options"]
# The Python API does not expose a general walk() function, so we just
# do it ourselves.
walk_children(module.operation, configs)
# The Python API does not expose a general walk() function, so we just
# do it ourselves.
walk_children(module.operation, configs)
if not module.operation.verify():
raise RuntimeError("Modified program does not verify!")
if not module.operation.verify():
raise RuntimeError("Modified program does not verify!")
# More efficient than: print(module)
# - Disables verification (already done above)
# - Writes as binary, avoiding costly unicode conversions
sys.stdout.buffer.write(
module.operation.get_asm(assume_verified=True, binary=True))
return module
# More efficient than: print(module)
# - Disables verification (already done above)
# - Writes as binary, avoiding costly unicode conversions
sys.stdout.buffer.write(
module.operation.get_asm(assume_verified=True, binary=True))
return module
def walk_children(op: ir.Operation, configs: List[Dict]):
for region in op.regions:
for block in region.blocks:
for child_op in block.operations:
# TODO: This is dumb. Both Operation and OpView should expose
# 'operation' and 'name' attributes.
if isinstance(child_op, ir.OpView):
child_op = child_op.operation
if child_op.name in MATMUL_OP_NAMES:
global idx
tile_sizes, pipeline, workgroup_size, \
split_k, pipeline_depth = parse_config(configs[idx])
for region in op.regions:
for block in region.blocks:
for child_op in block.operations:
# TODO: This is dumb. Both Operation and OpView should expose
# 'operation' and 'name' attributes.
if isinstance(child_op, ir.OpView):
child_op = child_op.operation
if child_op.name in MATMUL_OP_NAMES:
global idx
tile_sizes, pipeline, workgroup_size, \
split_k, pipeline_depth = parse_config(configs[idx])
add_compilation_info(child_op,
tile_sizes=tile_sizes,
pipeline=pipeline,
workgroup_size=workgroup_size,
pipeline_depth=pipeline_depth)
add_compilation_info(child_op,
tile_sizes=tile_sizes,
pipeline=pipeline,
workgroup_size=workgroup_size,
pipeline_depth=pipeline_depth)
if split_k:
add_split_k(child_op, split_k)
if split_k:
add_split_k(child_op, split_k)
idx = idx+1
print(f"Updated op {child_op}", file=sys.stderr)
walk_children(child_op, configs)
idx = idx + 1
print(f"Updated op {child_op}", file=sys.stderr)
walk_children(child_op, configs)
def parse_config(config: Dict):
if config["pipeline"] == "GPU" or config["pipeline"] == "GPU_TENSORCORE":
pipeline = "LLVMGPUMatmulSimt" if config["pipeline"] == "GPU" else "LLVMGPUMatmulTensorCore"
tile_sizes = [config["work_group_tile_sizes"]]
workgroup_size = config["work_group_sizes"]
try:
pipeline_depth = config["pipeline_depth"]
except:
pipeline_depth = None
try:
split_k = config["split_k"]
except:
split_k = None
else:
pipeline = config["pipeline"]
tile_sizes = [config["work_group_tile_sizes"], config["l1_tile_sizes"],
config["vector_tile_sizes"]]
workgroup_size = []
split_k = None
pipeline_depth = None
return tile_sizes, pipeline, workgroup_size, split_k, pipeline_depth
if config["pipeline"] == "GPU" or config["pipeline"] == "GPU_TENSORCORE":
pipeline = "LLVMGPUMatmulSimt" if config[
"pipeline"] == "GPU" else "LLVMGPUMatmulTensorCore"
tile_sizes = [config["work_group_tile_sizes"]]
workgroup_size = config["work_group_sizes"]
try:
pipeline_depth = config["pipeline_depth"]
except:
pipeline_depth = None
try:
split_k = config["split_k"]
except:
split_k = None
else:
pipeline = config["pipeline"]
tile_sizes = [
config["work_group_tile_sizes"], config["l1_tile_sizes"],
config["vector_tile_sizes"]
]
workgroup_size = []
split_k = None
pipeline_depth = None
return tile_sizes, pipeline, workgroup_size, split_k, pipeline_depth
def add_compilation_info(op: ir.Operation, tile_sizes: List[List[int]],
pipeline: str, workgroup_size: List[int],
pipeline_depth: int):
# We don't have a Python binding for CompilationInfo, so we just parse
# its string form.
if pipeline_depth:
attr = ir.Attribute.parse(
f"#iree_codegen.compilation_info<"
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
f"translation_info = <{pipeline} pipeline_depth = {pipeline_depth}>, "
f"workgroup_size = {repr(workgroup_size)}>")
else:
attr = ir.Attribute.parse(
f"#iree_codegen.compilation_info<"
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
f"translation_info = <{pipeline}>, "
f"workgroup_size = {repr(workgroup_size)}>")
op.attributes["compilation_info"] = attr
# We don't have a Python binding for CompilationInfo, so we just parse
# its string form.
if pipeline_depth:
attr = ir.Attribute.parse(
f"#iree_codegen.compilation_info<"
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
f"translation_info = <{pipeline} pipeline_depth = {pipeline_depth}>, "
f"workgroup_size = {repr(workgroup_size)}>")
else:
attr = ir.Attribute.parse(
f"#iree_codegen.compilation_info<"
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
f"translation_info = <{pipeline}>, "
f"workgroup_size = {repr(workgroup_size)}>")
op.attributes["compilation_info"] = attr
def add_split_k(op: ir.Operation, k: int):
attr = ir.IntegerAttr.get(ir.IntegerType.get_signless(64), k)
op.attributes["iree_flow_split_k"] = attr
attr = ir.IntegerAttr.get(ir.IntegerType.get_signless(64), k)
op.attributes["iree_flow_split_k"] = attr
def create_context() -> ir.Context:
context = ir.Context()
ireec_trans.register_all_dialects(context)
context.allow_unregistered_dialects = True
return context
context = ir.Context()
ireec_trans.register_all_dialects(context)
context.allow_unregistered_dialects = True
return context
if __name__ == "__main__":
with create_context() as ctx:
model_annotation(ctx, input_contents=sys.argv[1], config_path=sys.argv[2])
with create_context() as ctx:
model_annotation(ctx,
input_contents=sys.argv[1],
config_path=sys.argv[2])

View File

@@ -23,6 +23,7 @@ def dir_path(path):
raise argparse.ArgumentTypeError(
f"readable_dir:{path} is not a valid path")
def dir_file(path):
if os.path.isfile(path):
return path
@@ -53,8 +54,7 @@ parser.add_argument("--save_vmfb",
help="Saves iree .vmfb module to /tmp/ directory.")
parser.add_argument(
"--model_config_path",
help=
"Directory to where the tuned model config file is located.",
help="Directory to where the tuned model config file is located.",
default=None)
parser.add_argument(

View File

@@ -16,10 +16,12 @@ from shark.shark_runner import SharkRunner, SharkBenchmarkRunner
import time
import sys
# Prints to stderr.
def print_err(*a):
print(*a, file=sys.stderr)
class SharkInference:
"""Inference API targeting pytorch, tensorflow, linalg, mhlo and tosa frontend."""

View File

@@ -53,17 +53,20 @@ class SharkRunner:
jit_trace, from_aot)
elif frontend in ["tensorflow", "tf"]:
self.model = tfc.compile_module(self.model,
exported_names=[func_name],
import_only=True)
exported_names=[func_name],
import_only=True)
(
self.iree_compilation_module,
self.iree_config,
) = get_iree_compiled_module(self.model, device, self.frontend,
self.iree_compilation_module,
self.iree_config,
) = get_iree_compiled_module(self.model,
device,
self.frontend,
model_config_path=model_config_path)
# Debugging Options:
if shark_args.save_mlir:
export_module_to_mlir_file(self.model, self.frontend, shark_args.repro_dir)
export_module_to_mlir_file(self.model, self.frontend,
shark_args.repro_dir)
if shark_args.save_vmfb:
self.vmfb_file = export_iree_module_to_vmfb(self.model, device,
shark_args.repro_dir,