mirror of
https://github.com/nod-ai/AMD-SHARK-Studio.git
synced 2026-02-19 11:56:43 -05:00
* Optimize device enumeration overhead and log details on long operations. * Various fixes to add `@functools.cache` to what should be one time, expensive, device enumeration and setup activities. Cuts several seconds off of initialization on my machine. * Add detailed tracing to actual invocations if they exceed a certain timeout or have an exception. * Add detailed tracing to loading status. * By default detail logging is only printed if an operation takes an excessive amount of time. All logging/timing can be printed by setting the variable `$env:SHARK_DETAIL_TRACE = "1"` * Remove cache from unhashable functions
66 lines
2.0 KiB
Python
66 lines
2.0 KiB
Python
# Copyright 2020 The Nod Team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# All the iree_cpu related functionalities go here.
|
|
|
|
import functools
|
|
import subprocess
|
|
import platform
|
|
from shark.parser import shark_args
|
|
|
|
|
|
def get_cpu_count():
|
|
import multiprocessing
|
|
|
|
try:
|
|
cpu_count = multiprocessing.cpu_count()
|
|
return cpu_count
|
|
except NotImplementedError:
|
|
return None
|
|
|
|
|
|
# Get the default cpu args.
|
|
@functools.cache
|
|
def get_iree_cpu_args():
|
|
uname = platform.uname()
|
|
os_name, proc_name = uname.system, uname.machine
|
|
|
|
if os_name == "Darwin":
|
|
kernel_version = uname.release
|
|
target_triple = f"{proc_name}-apple-darwin{kernel_version}"
|
|
elif os_name == "Linux":
|
|
target_triple = f"{proc_name}-linux-gnu"
|
|
elif os_name == "Windows":
|
|
target_triple = "x86_64-pc-windows-msvc"
|
|
else:
|
|
error_message = f"OS Type f{os_name} not supported and triple can't be determined, open issue to dSHARK team please :)"
|
|
raise Exception(error_message)
|
|
print(f"Target triple found:{target_triple}")
|
|
return [
|
|
f"--iree-llvmcpu-target-triple={target_triple}",
|
|
]
|
|
|
|
|
|
# Get iree runtime flags for cpu
|
|
@functools.cache
|
|
def get_iree_cpu_rt_args():
|
|
default = get_cpu_count()
|
|
default = default if default <= 8 else default - 2
|
|
cpu_count = (
|
|
default
|
|
if shark_args.task_topology_max_group_count is None
|
|
else shark_args.task_topology_max_group_count
|
|
)
|
|
return [f"--task_topology_max_group_count={cpu_count}"]
|