mirror of
https://github.com/nod-ai/AMD-SHARK-Studio.git
synced 2026-04-03 03:00:17 -04:00
yapf format python files.
Please use `yapf -i --style .style.yapf shark/*.py` to format python files.
This commit is contained in:
@@ -15,7 +15,6 @@
|
||||
import sys
|
||||
import ctypes
|
||||
|
||||
|
||||
#Some constants taken from cuda.h
|
||||
CUDA_SUCCESS = 0
|
||||
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16
|
||||
@@ -23,6 +22,7 @@ CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39
|
||||
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13
|
||||
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36
|
||||
|
||||
|
||||
def get_cuda_sm_cc():
|
||||
libnames = ('libcuda.so', 'libcuda.dylib', 'cuda.dll')
|
||||
for libname in libnames:
|
||||
@@ -48,24 +48,31 @@ def get_cuda_sm_cc():
|
||||
result = cuda.cuInit(0)
|
||||
if result != CUDA_SUCCESS:
|
||||
cuda.cuGetErrorString(result, ctypes.byref(error_str))
|
||||
print("cuInit failed with error code %d: %s" % (result, error_str.value.decode()))
|
||||
print("cuInit failed with error code %d: %s" %
|
||||
(result, error_str.value.decode()))
|
||||
return 1
|
||||
result = cuda.cuDeviceGetCount(ctypes.byref(nGpus))
|
||||
if result != CUDA_SUCCESS:
|
||||
cuda.cuGetErrorString(result, ctypes.byref(error_str))
|
||||
print("cuDeviceGetCount failed with error code %d: %s" % (result, error_str.value.decode()))
|
||||
print("cuDeviceGetCount failed with error code %d: %s" %
|
||||
(result, error_str.value.decode()))
|
||||
return 1
|
||||
print("Found %d device(s)." % nGpus.value)
|
||||
for i in range(nGpus.value):
|
||||
result = cuda.cuDeviceGet(ctypes.byref(device),i)
|
||||
result = cuda.cuDeviceGet(ctypes.byref(device), i)
|
||||
if result != CUDA_SUCCESS:
|
||||
cuda.cuGetErrorString(result, ctypes.byref(error_str))
|
||||
print("cuDeviceGet failed with error code %d: %s" % (result, error_str.value.decode()))
|
||||
print("cuDeviceGet failed with error code %d: %s" %
|
||||
(result, error_str.value.decode()))
|
||||
return 1
|
||||
print("Device: %d" % i)
|
||||
if cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device) == CUDA_SUCCESS:
|
||||
if cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name),
|
||||
device) == CUDA_SUCCESS:
|
||||
print(" Name: %s" % (name.split(b'\0', 1)[0].decode()))
|
||||
if cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) == CUDA_SUCCESS:
|
||||
print(" Compute Capability: %d.%d" % (cc_major.value, cc_minor.value))
|
||||
if cuda.cuDeviceComputeCapability(ctypes.byref(cc_major),
|
||||
ctypes.byref(cc_minor),
|
||||
device) == CUDA_SUCCESS:
|
||||
print(" Compute Capability: %d.%d" %
|
||||
(cc_major.value, cc_minor.value))
|
||||
sm = f"sm_{cc_major.value}{cc_minor.value}"
|
||||
return sm
|
||||
|
||||
@@ -100,7 +100,9 @@ def get_vulkan_triple_flag():
|
||||
print("Found Nvidia Device. Using ampere-rtx3080-linux")
|
||||
return "-iree-vulkan-target-triple=ampere-rtx3080-linux"
|
||||
else:
|
||||
print("Optimized kernel for your target device is not added yet. Contact SHARK Admin on discord or pull up an issue.")
|
||||
print(
|
||||
"Optimized kernel for your target device is not added yet. Contact SHARK Admin on discord or pull up an issue."
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
@@ -111,6 +113,7 @@ def get_iree_vulkan_args():
|
||||
vulkan_flag.append(vulkan_triple_flag)
|
||||
return vulkan_flag
|
||||
|
||||
|
||||
def get_iree_device_args(device):
|
||||
if device == "cpu":
|
||||
return get_iree_cpu_args()
|
||||
@@ -135,7 +138,8 @@ def get_iree_frontend_args(frontend):
|
||||
return []
|
||||
|
||||
|
||||
def compile_module_to_flatbuffer(module, device, frontend, func_name, model_config_path):
|
||||
def compile_module_to_flatbuffer(module, device, frontend, func_name,
|
||||
model_config_path):
|
||||
# Setup Compile arguments wrt to frontends.
|
||||
input_type = ""
|
||||
args = get_iree_frontend_args(frontend)
|
||||
@@ -177,6 +181,7 @@ def compile_module_to_flatbuffer(module, device, frontend, func_name, model_conf
|
||||
extra_args=args)
|
||||
return flatbuffer_blob
|
||||
|
||||
|
||||
def get_iree_module(flatbuffer_blob, device, func_name):
|
||||
vm_module = ireert.VmModule.from_flatbuffer(flatbuffer_blob)
|
||||
config = ireert.Config(IREE_DEVICE_MAP[device])
|
||||
@@ -192,17 +197,19 @@ def get_iree_compiled_module(module,
|
||||
func_name: str = "forward",
|
||||
model_config_path: str = None):
|
||||
"""Given a module returns the compiled .vmfb and configs"""
|
||||
flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend, func_name, model_config_path)
|
||||
flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend,
|
||||
func_name, model_config_path)
|
||||
return get_iree_module(flatbuffer_blob, device, func_name)
|
||||
|
||||
|
||||
def export_iree_module_to_vmfb(module,
|
||||
device: str,
|
||||
directory: str,
|
||||
frontend: str = "torch",
|
||||
func_name: str = "forward",
|
||||
model_config_path: str = None):
|
||||
flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend, func_name, model_config_path)
|
||||
device: str,
|
||||
directory: str,
|
||||
frontend: str = "torch",
|
||||
func_name: str = "forward",
|
||||
model_config_path: str = None):
|
||||
flatbuffer_blob = compile_module_to_flatbuffer(module, device, frontend,
|
||||
func_name, model_config_path)
|
||||
module_name = f"{frontend}_{func_name}_{device}"
|
||||
filename = os.path.join(directory, module_name + ".vmfb")
|
||||
print(f"Saved vmfb in {filename}.")
|
||||
@@ -210,6 +217,7 @@ def export_iree_module_to_vmfb(module,
|
||||
f.write(flatbuffer_blob)
|
||||
return filename
|
||||
|
||||
|
||||
def export_module_to_mlir_file(module, frontend, directory: str):
|
||||
mlir_str = module
|
||||
if frontend in ["tensorflow", "tf", "mhlo"]:
|
||||
@@ -222,6 +230,7 @@ def export_module_to_mlir_file(module, frontend, directory: str):
|
||||
print(f"Saved mlir in {filename}.")
|
||||
return filename
|
||||
|
||||
|
||||
def get_results(compiled_vm, input, config, frontend="torch"):
|
||||
"""Runs a .vmfb file given inputs and config and returns output."""
|
||||
device_inputs = input
|
||||
@@ -231,7 +240,10 @@ def get_results(compiled_vm, input, config, frontend="torch"):
|
||||
device_inputs = []
|
||||
for a in input:
|
||||
if (isinstance(a, list)):
|
||||
device_inputs.append([ireert.asdevicearray(config.device, val, dtype=np.int32) for val in a])
|
||||
device_inputs.append([
|
||||
ireert.asdevicearray(config.device, val, dtype=np.int32)
|
||||
for val in a
|
||||
])
|
||||
else:
|
||||
device_inputs.append(ireert.asdevicearray(config.device, a))
|
||||
result = compiled_vm(*device_inputs)
|
||||
@@ -257,15 +269,16 @@ def tensor_to_type_str(input_tensors: tuple, frontend: str):
|
||||
Output: list of string that represent mlir types (i.e 1x24xf64)
|
||||
# TODO: Support more than floats, and ints
|
||||
"""
|
||||
print("front:",frontend)
|
||||
print("front:", frontend)
|
||||
list_of_type = []
|
||||
for input_tensor in input_tensors:
|
||||
type_string = "x".join([str(dim) for dim in input_tensor.shape])
|
||||
if frontend in ["torch", "pytorch"]:
|
||||
dtype_string = str(input_tensor.dtype).replace("torch.", "")
|
||||
elif frontend in ["tensorflow","tf"]:
|
||||
elif frontend in ["tensorflow", "tf"]:
|
||||
dtype = input_tensor.dtype
|
||||
dtype_string = re.findall('\'[^"]*\'',str(dtype))[0].replace("\'","")
|
||||
dtype_string = re.findall('\'[^"]*\'',
|
||||
str(dtype))[0].replace("\'", "")
|
||||
regex_split = re.compile("([a-zA-Z]+)([0-9]+)")
|
||||
match = regex_split.match(dtype_string)
|
||||
mlir_type_string = str(match.group(1)[0]) + str(match.group(2))
|
||||
|
||||
@@ -20,122 +20,124 @@ from typing import List, Dict
|
||||
from iree.compiler import ir
|
||||
from iree.compiler.transforms import ireec as ireec_trans
|
||||
|
||||
MATMUL_OP_NAMES = set([
|
||||
"linalg.matmul",
|
||||
"linalg.batch_matmul",
|
||||
"mhlo.dot",
|
||||
"mhlo.dot_general"
|
||||
])
|
||||
MATMUL_OP_NAMES = set(
|
||||
["linalg.matmul", "linalg.batch_matmul", "mhlo.dot", "mhlo.dot_general"])
|
||||
idx = 0
|
||||
|
||||
|
||||
def model_annotation(ctx: ir.Context, *, input_contents: str, config_path: str):
|
||||
if os.path.isfile(input_contents):
|
||||
with open(input_contents, "rb") as f:
|
||||
input_contents = f.read()
|
||||
if os.path.isfile(input_contents):
|
||||
with open(input_contents, "rb") as f:
|
||||
input_contents = f.read()
|
||||
|
||||
module = ir.Module.parse(input_contents)
|
||||
module = ir.Module.parse(input_contents)
|
||||
|
||||
with open(config_path, "r") as f:
|
||||
data = json.load(f)
|
||||
configs = data["options"]
|
||||
with open(config_path, "r") as f:
|
||||
data = json.load(f)
|
||||
configs = data["options"]
|
||||
|
||||
# The Python API does not expose a general walk() function, so we just
|
||||
# do it ourselves.
|
||||
walk_children(module.operation, configs)
|
||||
# The Python API does not expose a general walk() function, so we just
|
||||
# do it ourselves.
|
||||
walk_children(module.operation, configs)
|
||||
|
||||
if not module.operation.verify():
|
||||
raise RuntimeError("Modified program does not verify!")
|
||||
if not module.operation.verify():
|
||||
raise RuntimeError("Modified program does not verify!")
|
||||
|
||||
# More efficient than: print(module)
|
||||
# - Disables verification (already done above)
|
||||
# - Writes as binary, avoiding costly unicode conversions
|
||||
sys.stdout.buffer.write(
|
||||
module.operation.get_asm(assume_verified=True, binary=True))
|
||||
return module
|
||||
# More efficient than: print(module)
|
||||
# - Disables verification (already done above)
|
||||
# - Writes as binary, avoiding costly unicode conversions
|
||||
sys.stdout.buffer.write(
|
||||
module.operation.get_asm(assume_verified=True, binary=True))
|
||||
return module
|
||||
|
||||
|
||||
def walk_children(op: ir.Operation, configs: List[Dict]):
|
||||
for region in op.regions:
|
||||
for block in region.blocks:
|
||||
for child_op in block.operations:
|
||||
# TODO: This is dumb. Both Operation and OpView should expose
|
||||
# 'operation' and 'name' attributes.
|
||||
if isinstance(child_op, ir.OpView):
|
||||
child_op = child_op.operation
|
||||
if child_op.name in MATMUL_OP_NAMES:
|
||||
global idx
|
||||
tile_sizes, pipeline, workgroup_size, \
|
||||
split_k, pipeline_depth = parse_config(configs[idx])
|
||||
for region in op.regions:
|
||||
for block in region.blocks:
|
||||
for child_op in block.operations:
|
||||
# TODO: This is dumb. Both Operation and OpView should expose
|
||||
# 'operation' and 'name' attributes.
|
||||
if isinstance(child_op, ir.OpView):
|
||||
child_op = child_op.operation
|
||||
if child_op.name in MATMUL_OP_NAMES:
|
||||
global idx
|
||||
tile_sizes, pipeline, workgroup_size, \
|
||||
split_k, pipeline_depth = parse_config(configs[idx])
|
||||
|
||||
add_compilation_info(child_op,
|
||||
tile_sizes=tile_sizes,
|
||||
pipeline=pipeline,
|
||||
workgroup_size=workgroup_size,
|
||||
pipeline_depth=pipeline_depth)
|
||||
add_compilation_info(child_op,
|
||||
tile_sizes=tile_sizes,
|
||||
pipeline=pipeline,
|
||||
workgroup_size=workgroup_size,
|
||||
pipeline_depth=pipeline_depth)
|
||||
|
||||
if split_k:
|
||||
add_split_k(child_op, split_k)
|
||||
if split_k:
|
||||
add_split_k(child_op, split_k)
|
||||
|
||||
idx = idx+1
|
||||
print(f"Updated op {child_op}", file=sys.stderr)
|
||||
walk_children(child_op, configs)
|
||||
idx = idx + 1
|
||||
print(f"Updated op {child_op}", file=sys.stderr)
|
||||
walk_children(child_op, configs)
|
||||
|
||||
|
||||
def parse_config(config: Dict):
|
||||
if config["pipeline"] == "GPU" or config["pipeline"] == "GPU_TENSORCORE":
|
||||
pipeline = "LLVMGPUMatmulSimt" if config["pipeline"] == "GPU" else "LLVMGPUMatmulTensorCore"
|
||||
tile_sizes = [config["work_group_tile_sizes"]]
|
||||
workgroup_size = config["work_group_sizes"]
|
||||
try:
|
||||
pipeline_depth = config["pipeline_depth"]
|
||||
except:
|
||||
pipeline_depth = None
|
||||
try:
|
||||
split_k = config["split_k"]
|
||||
except:
|
||||
split_k = None
|
||||
else:
|
||||
pipeline = config["pipeline"]
|
||||
tile_sizes = [config["work_group_tile_sizes"], config["l1_tile_sizes"],
|
||||
config["vector_tile_sizes"]]
|
||||
workgroup_size = []
|
||||
split_k = None
|
||||
pipeline_depth = None
|
||||
return tile_sizes, pipeline, workgroup_size, split_k, pipeline_depth
|
||||
if config["pipeline"] == "GPU" or config["pipeline"] == "GPU_TENSORCORE":
|
||||
pipeline = "LLVMGPUMatmulSimt" if config[
|
||||
"pipeline"] == "GPU" else "LLVMGPUMatmulTensorCore"
|
||||
tile_sizes = [config["work_group_tile_sizes"]]
|
||||
workgroup_size = config["work_group_sizes"]
|
||||
try:
|
||||
pipeline_depth = config["pipeline_depth"]
|
||||
except:
|
||||
pipeline_depth = None
|
||||
try:
|
||||
split_k = config["split_k"]
|
||||
except:
|
||||
split_k = None
|
||||
else:
|
||||
pipeline = config["pipeline"]
|
||||
tile_sizes = [
|
||||
config["work_group_tile_sizes"], config["l1_tile_sizes"],
|
||||
config["vector_tile_sizes"]
|
||||
]
|
||||
workgroup_size = []
|
||||
split_k = None
|
||||
pipeline_depth = None
|
||||
return tile_sizes, pipeline, workgroup_size, split_k, pipeline_depth
|
||||
|
||||
|
||||
def add_compilation_info(op: ir.Operation, tile_sizes: List[List[int]],
|
||||
pipeline: str, workgroup_size: List[int],
|
||||
pipeline_depth: int):
|
||||
# We don't have a Python binding for CompilationInfo, so we just parse
|
||||
# its string form.
|
||||
if pipeline_depth:
|
||||
attr = ir.Attribute.parse(
|
||||
f"#iree_codegen.compilation_info<"
|
||||
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
|
||||
f"translation_info = <{pipeline} pipeline_depth = {pipeline_depth}>, "
|
||||
f"workgroup_size = {repr(workgroup_size)}>")
|
||||
else:
|
||||
attr = ir.Attribute.parse(
|
||||
f"#iree_codegen.compilation_info<"
|
||||
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
|
||||
f"translation_info = <{pipeline}>, "
|
||||
f"workgroup_size = {repr(workgroup_size)}>")
|
||||
op.attributes["compilation_info"] = attr
|
||||
# We don't have a Python binding for CompilationInfo, so we just parse
|
||||
# its string form.
|
||||
if pipeline_depth:
|
||||
attr = ir.Attribute.parse(
|
||||
f"#iree_codegen.compilation_info<"
|
||||
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
|
||||
f"translation_info = <{pipeline} pipeline_depth = {pipeline_depth}>, "
|
||||
f"workgroup_size = {repr(workgroup_size)}>")
|
||||
else:
|
||||
attr = ir.Attribute.parse(
|
||||
f"#iree_codegen.compilation_info<"
|
||||
f"lowering_config = <tile_sizes = {repr(tile_sizes)}>, "
|
||||
f"translation_info = <{pipeline}>, "
|
||||
f"workgroup_size = {repr(workgroup_size)}>")
|
||||
op.attributes["compilation_info"] = attr
|
||||
|
||||
|
||||
def add_split_k(op: ir.Operation, k: int):
|
||||
attr = ir.IntegerAttr.get(ir.IntegerType.get_signless(64), k)
|
||||
op.attributes["iree_flow_split_k"] = attr
|
||||
attr = ir.IntegerAttr.get(ir.IntegerType.get_signless(64), k)
|
||||
op.attributes["iree_flow_split_k"] = attr
|
||||
|
||||
|
||||
def create_context() -> ir.Context:
|
||||
context = ir.Context()
|
||||
ireec_trans.register_all_dialects(context)
|
||||
context.allow_unregistered_dialects = True
|
||||
return context
|
||||
context = ir.Context()
|
||||
ireec_trans.register_all_dialects(context)
|
||||
context.allow_unregistered_dialects = True
|
||||
return context
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with create_context() as ctx:
|
||||
model_annotation(ctx, input_contents=sys.argv[1], config_path=sys.argv[2])
|
||||
with create_context() as ctx:
|
||||
model_annotation(ctx,
|
||||
input_contents=sys.argv[1],
|
||||
config_path=sys.argv[2])
|
||||
|
||||
@@ -23,6 +23,7 @@ def dir_path(path):
|
||||
raise argparse.ArgumentTypeError(
|
||||
f"readable_dir:{path} is not a valid path")
|
||||
|
||||
|
||||
def dir_file(path):
|
||||
if os.path.isfile(path):
|
||||
return path
|
||||
@@ -53,8 +54,7 @@ parser.add_argument("--save_vmfb",
|
||||
help="Saves iree .vmfb module to /tmp/ directory.")
|
||||
parser.add_argument(
|
||||
"--model_config_path",
|
||||
help=
|
||||
"Directory to where the tuned model config file is located.",
|
||||
help="Directory to where the tuned model config file is located.",
|
||||
default=None)
|
||||
|
||||
parser.add_argument(
|
||||
|
||||
@@ -16,10 +16,12 @@ from shark.shark_runner import SharkRunner, SharkBenchmarkRunner
|
||||
import time
|
||||
import sys
|
||||
|
||||
|
||||
# Prints to stderr.
|
||||
def print_err(*a):
|
||||
print(*a, file=sys.stderr)
|
||||
|
||||
|
||||
class SharkInference:
|
||||
"""Inference API targeting pytorch, tensorflow, linalg, mhlo and tosa frontend."""
|
||||
|
||||
|
||||
@@ -53,17 +53,20 @@ class SharkRunner:
|
||||
jit_trace, from_aot)
|
||||
elif frontend in ["tensorflow", "tf"]:
|
||||
self.model = tfc.compile_module(self.model,
|
||||
exported_names=[func_name],
|
||||
import_only=True)
|
||||
exported_names=[func_name],
|
||||
import_only=True)
|
||||
(
|
||||
self.iree_compilation_module,
|
||||
self.iree_config,
|
||||
) = get_iree_compiled_module(self.model, device, self.frontend,
|
||||
self.iree_compilation_module,
|
||||
self.iree_config,
|
||||
) = get_iree_compiled_module(self.model,
|
||||
device,
|
||||
self.frontend,
|
||||
model_config_path=model_config_path)
|
||||
|
||||
# Debugging Options:
|
||||
if shark_args.save_mlir:
|
||||
export_module_to_mlir_file(self.model, self.frontend, shark_args.repro_dir)
|
||||
export_module_to_mlir_file(self.model, self.frontend,
|
||||
shark_args.repro_dir)
|
||||
if shark_args.save_vmfb:
|
||||
self.vmfb_file = export_iree_module_to_vmfb(self.model, device,
|
||||
shark_args.repro_dir,
|
||||
|
||||
Reference in New Issue
Block a user