Enable --device_allocator=caching

2026-04-03 03:00:17 -04:00 · 2023-02-09 22:58:46 -08:00
parent 4ca84ee4ee
commit 83fe477066
4 changed files with 4 additions and 3 deletions
--- a/apps/stable_diffusion/src/utils/utils.py
+++ b/apps/stable_diffusion/src/utils/utils.py
@@ -117,6 +117,7 @@ def compile_through_fx(
 def set_iree_runtime_flags():
    vulkan_runtime_flags = [
        f"--vulkan_large_heap_block_size={args.vulkan_large_heap_block_size}",
+        f"--device_allocator=caching",
        f"--vulkan_validation_layers={'true' if args.vulkan_validation_layers else 'false'}",
    ]
    if args.enable_rgp:
--- a/shark/iree_utils/compile_utils.py
+++ b/shark/iree_utils/compile_utils.py
@@ -70,6 +70,7 @@ def get_iree_common_args():
    return [
        "--iree-stream-resource-index-bits=64",
        "--iree-vm-target-index-bits=64",
+        "--iree-vm-bytecode-module-strip-source-map=true",
        "--iree-util-zero-fill-elided-attrs",
    ]

--- a/shark/iree_utils/gpu_utils.py
+++ b/shark/iree_utils/gpu_utils.py
@@ -22,7 +22,7 @@ from shark.parser import shark_args
 # Get the default gpu args given the architecture.
 def get_iree_gpu_args():
    ireert.flags.FUNCTION_INPUT_VALIDATION = False
-    ireert.flags.parse_flags("--cuda_allow_inline_execution")
+    ireert.flags.parse_flags("--cuda_allow_inline_execution", "--device_allocator=caching")
    # TODO: Give the user_interface to pass the sm_arch.
    sm_arch = get_cuda_sm_cc()
    if (
--- a/shark/iree_utils/vulkan_utils.py
+++ b/shark/iree_utils/vulkan_utils.py
@@ -139,9 +139,8 @@ def get_vulkan_triple_flag(device_name="", extra_args=[]):


 def get_iree_vulkan_args(extra_args=[]):
-    # vulkan_flag = ["--iree-flow-demote-i64-to-i32"]
+    res_vulkan_flag = ["--device_allocator=caching"]

-    res_vulkan_flag = []
    vulkan_triple_flag = None
    for arg in extra_args:
        if "-iree-vulkan-target-triple=" in arg: