[SharkInference-SharkRuntime] Adds capability to mmap vmfbs

-- This commit is based on [VmModule.mmap() API](https://github.com/openxla/iree/pull/14124). -- It thereby adds capability to mmap vmfbs in SHARK. Signed-off-by: Abhishek Varma <abhishek@nod-labs.com>
2026-04-03 03:00:17 -04:00 · 2023-06-15 13:01:58 +00:00
parent 1b0f39107c
commit cdd505e2dd
3 changed files with 116 additions and 18 deletions
--- a/shark/iree_utils/compile_utils.py
+++ b/shark/iree_utils/compile_utils.py
@@ -19,6 +19,7 @@ from shark.parser import shark_args
 import numpy as np
 import os
 import re
+import tempfile


 # Get the iree-compile arguments given device.
@@ -181,8 +182,10 @@ def compile_benchmark_dirs(bench_dir, device, dispatch_benchmarks):
                        vmfb_file.close()

                        config = get_iree_runtime_config(device)
-                        vm_module = ireert.VmModule.from_flatbuffer(
-                            config.vm_instance, flatbuffer_blob
+                        vm_module = ireert.VmModule.from_buffer(
+                            config.vm_instance,
+                            flatbuffer_blob,
+                            warn_if_copy=False,
                        )

                        benchmark_cl = build_benchmark_args_non_tensor_input(
@@ -313,8 +316,8 @@ def get_iree_module(flatbuffer_blob, device, device_idx=None):
        config = ireert.Config(device=haldevice)
    else:
        config = get_iree_runtime_config(device)
-    vm_module = ireert.VmModule.from_flatbuffer(
-        config.vm_instance, flatbuffer_blob
+    vm_module = ireert.VmModule.from_buffer(
+        config.vm_instance, flatbuffer_blob, warn_if_copy=False
    )
    ctx = ireert.SystemContext(config=config)
    ctx.add_vm_module(vm_module)
@@ -322,6 +325,55 @@ def get_iree_module(flatbuffer_blob, device, device_idx=None):
    return ModuleCompiled, config


+def load_vmfb_using_mmap(
+    flatbuffer_blob_or_path, device: str, device_idx: int = None
+):
+    instance = ireert.VmInstance()
+    device = iree_device_map(device)
+    haldriver = ireert.get_driver(device)
+    haldevice = haldriver.create_device_by_uri(
+        device,
+        allocators=[],
+    )
+    # First get configs.
+    if device_idx is not None:
+        device = iree_device_map(device)
+        print("registering device id: ", device_idx)
+        haldriver = ireert.get_driver(device)
+
+        haldevice = haldriver.create_device(
+            haldriver.query_available_devices()[device_idx]["device_id"],
+            allocators=shark_args.device_allocator,
+        )
+        config = ireert.Config(device=haldevice)
+    else:
+        config = get_iree_runtime_config(device)
+    # Now load vmfb.
+    # Two scenarios we have here :-
+    #      1. We either have the vmfb already saved and therefore pass the path of it.
+    #         (This would arise if we're invoking `load_module` from a SharkInference obj)
+    #   OR 2. We are compiling on the fly, therefore we have the flatbuffer blob to play with.
+    #         (This would arise if we're invoking `compile` from a SharkInference obj)
+    temp_file_to_unlink = None
+    if (
+        isinstance(flatbuffer_blob_or_path, str)
+        and ".vmfb" in flatbuffer_blob_or_path
+    ):
+        vmfb_file_path = flatbuffer_blob_or_path
+        mmaped_vmfb = ireert.VmModule.mmap(instance, flatbuffer_blob_or_path)
+        ctx = ireert.SystemContext(config=config)
+        ctx.add_vm_module(mmaped_vmfb)
+        mmaped_vmfb = getattr(ctx.modules, mmaped_vmfb.name)
+    else:
+        with tempfile.NamedTemporaryFile(delete=False) as tf:
+            tf.write(flatbuffer_blob_or_path)
+            tf.flush()
+            vmfb_file_path = tf.name
+        temp_file_to_unlink = vmfb_file_path
+        mmaped_vmfb = ireert.VmModule.mmap(instance, vmfb_file_path)
+    return mmaped_vmfb, config, temp_file_to_unlink
+
+
 def get_iree_compiled_module(
    module,
    device: str,
@@ -329,19 +381,58 @@ def get_iree_compiled_module(
    model_config_path: str = None,
    extra_args: list = [],
    device_idx: int = None,
+    mmap: bool = False,
 ):
    """Given a module returns the compiled .vmfb and configs"""
    flatbuffer_blob = compile_module_to_flatbuffer(
        module, device, frontend, model_config_path, extra_args
    )
-    return get_iree_module(flatbuffer_blob, device, device_idx=device_idx)
+    temp_file_to_unlink = None
+    # TODO: Currently mmap=True control flow path has been switched off for mmap.
+    #       Got to find a cleaner way to unlink/delete the temporary file since
+    #       we're setting delete=False when creating NamedTemporaryFile. That's why
+    #       I'm getting hold of the name of the temporary file in `temp_file_to_unlink`.
+    if mmap:
+        print(f"Will load the compiled module as a mmapped temporary file")
+        vmfb, config, temp_file_to_unlink = load_vmfb_using_mmap(
+            flatbuffer_blob, device, device_idx
+        )
+    else:
+        vmfb, config = get_iree_module(
+            flatbuffer_blob, device, device_idx=device_idx
+        )
+    ret_params = {
+        "vmfb": vmfb,
+        "config": config,
+        "temp_file_to_unlink": temp_file_to_unlink,
+    }
+    return ret_params


-def load_flatbuffer(flatbuffer_path: str, device: str, device_idx: int = None):
-    with open(os.path.join(flatbuffer_path), "rb") as f:
-        flatbuffer_blob = f.read()
-
-    return get_iree_module(flatbuffer_blob, device, device_idx=device_idx)
+def load_flatbuffer(
+    flatbuffer_path: str,
+    device: str,
+    device_idx: int = None,
+    mmap: bool = False,
+):
+    temp_file_to_unlink = None
+    if mmap:
+        print(f"Loading flatbuffer at {flatbuffer_path} as a mmapped file")
+        vmfb, config, temp_file_to_unlink = load_vmfb_using_mmap(
+            flatbuffer_path, device, device_idx
+        )
+    else:
+        with open(os.path.join(flatbuffer_path), "rb") as f:
+            flatbuffer_blob = f.read()
+        vmfb, config = get_iree_module(
+            flatbuffer_blob, device, device_idx=device_idx
+        )
+    ret_params = {
+        "vmfb": vmfb,
+        "config": config,
+        "temp_file_to_unlink": temp_file_to_unlink,
+    }
+    return ret_params


 def export_iree_module_to_vmfb(
--- a/shark/shark_inference.py
+++ b/shark/shark_inference.py
@@ -48,6 +48,8 @@ class SharkInference:
        Refer to {https://mlir.llvm.org/docs/Dialects/}
    is_benchmark: bool
        Whether this SharkInference module should be benchmark-enabled.
+    mmap: bool
+        Whether to load/run vmfb using mmap. It's `True` by default.

    Methods
    -------
@@ -70,6 +72,7 @@ class SharkInference:
        dispatch_benchmark: str = None,
        dispatch_benchmark_dir: str = "temp_dispatch_benchmarks",
        device_idx: int = None,
+        mmap: bool = True,
    ):
        self.mlir_module = mlir_module
        self.device = shark_args.device if device == "none" else device
@@ -88,6 +91,7 @@ class SharkInference:
        )

        self.shark_runner = None
+        self.mmap = mmap

    def compile(self, extra_args=[]):
        if self.dispatch_benchmarks is not None:
@@ -201,12 +205,14 @@ class SharkInference:
            compile_vmfb=False,
            extra_args=extra_args,
        )
-        (
-            self.shark_runner.iree_compilation_module,
-            self.shark_runner.iree_config,
-        ) = load_flatbuffer(
+        params = load_flatbuffer(
            path,
            self.device,
            self.device_idx,
+            mmap=self.mmap,
        )
+        self.shark_runner.iree_compilation_module = params["vmfb"]
+        self.shark_runner.iree_config = params["config"]
+        self.shark_runner.temp_file_to_unlink = params["temp_file_to_unlink"]
+        del params
        return
--- a/shark/shark_runner.py
+++ b/shark/shark_runner.py
@@ -85,16 +85,17 @@ class SharkRunner:

        if compile_vmfb == True:
            # Compile the module to get the .vmfb.
-            (
-                self.iree_compilation_module,
-                self.iree_config,
-            ) = get_iree_compiled_module(
+            params = get_iree_compiled_module(
                self.mlir_module,
                self.device,
                self.mlir_dialect,
                extra_args=self.extra_args,
                device_idx=self.device_idx,
            )
+            self.iree_compilation_module = params["vmfb"]
+            self.iree_config = params["config"]
+            self.temp_file_to_unlink = params["temp_file_to_unlink"]
+            del params

    def run(self, function_name, inputs: tuple, send_to_host=False):
        return get_results(