Revert python version change and update actions/setup-python to v5

Change python version in nightly .yml to 3.11.9
Rest API support and cleanup
2026-01-11 14:58:11 -05:00 · 2024-08-08 16:50:15 -05:00 · 2024-08-08 16:48:41 -05:00 · 2024-08-08 11:37:53 -05:00 · 2024-06-17 18:16:44 -05:00 · 2024-06-17 17:57:40 -05:00
145 changed files with 1649 additions and 27046 deletions
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -19,7 +19,7 @@ jobs:
    steps:
    - uses: actions/checkout@v2
    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v5
      with:
        python-version: ${{ matrix.python-version }}

@@ -46,17 +46,18 @@ jobs:
        draft: true
        prerelease: true

-    - name: Build Package 
+    - name: Build Package (api only)
      shell: powershell
      run: |
        ./setup_venv.ps1
        python process_skipfiles.py
        $env:SHARK_PACKAGE_VERSION=${{ env.package_version }}
        pip install -e .
-        pyinstaller .\apps\shark_studio\shark_studio.spec
+        pip freeze -l
+        pyinstaller .\apps\shark_studio\shark_studio_apionly.spec
        mv ./dist/nodai_shark_studio.exe ./dist/nodai_shark_studio_${{ env.package_version_ }}.exe
        signtool sign /f c:\g\shark_02152023.cer /fd certHash /csp "eToken Base Cryptographic Provider" /k "${{ secrets.CI_CERT }}" ./dist/nodai_shark_studio_${{ env.package_version_ }}.exe
-  
+
    - name: Upload Release Assets
      id: upload-release-assets
      uses: dwenegar/upload-release-assets@v1
--- a/.github/workflows/test-studio.yml
+++ b/.github/workflows/test-studio.yml
@@ -81,6 +81,5 @@ jobs:
        source shark.venv/bin/activate
        pip install -r requirements.txt --no-cache-dir
        pip install -e .
-        pip uninstall -y torch
-        pip install torch==2.1.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
-        python apps/shark_studio/tests/api_test.py
+        # Disabled due to hang when exporting test llama2
+        # python apps/shark_studio/tests/api_test.py
--- a/.gitignore
+++ b/.gitignore
@@ -164,7 +164,7 @@ cython_debug/
 # vscode related
 .vscode

-# Shark related artefacts
+# Shark related artifacts
 *venv/
 shark_tmp/
 *.vmfb
@@ -172,6 +172,7 @@ shark_tmp/
 tank/dict_configs.py
 *.csv
 reproducers/
+apps/shark_studio/web/configs

 # ORT related artefacts
 cache_models/
@@ -188,6 +189,11 @@ variants.json
 # models folder
 apps/stable_diffusion/web/models/

+# model artifacts (SHARK)
+*.tempfile
+*.mlir
+*.vmfb
+
 # Stencil annotators.
 stencil_annotator/

--- a/apps/shark_studio/api/initializers.py
+++ b/apps/shark_studio/api/initializers.py
@@ -25,10 +25,18 @@ def imports():
    )
    warnings.filterwarnings(action="ignore", category=UserWarning, module="torchvision")
    warnings.filterwarnings(action="ignore", category=UserWarning, module="torch")
+    warnings.filterwarnings(action="ignore", category=UserWarning, module="diffusers")
+    warnings.filterwarnings(action="ignore", category=FutureWarning, module="diffusers")
+    warnings.filterwarnings(
+        action="ignore", category=FutureWarning, module="huggingface-hub"
+    )
+    warnings.filterwarnings(
+        action="ignore", category=UserWarning, module="huggingface-hub"
+    )

-    import gradio  # noqa: F401
+    # import gradio  # noqa: F401

-    startup_timer.record("import gradio")
+    # startup_timer.record("import gradio")

    import apps.shark_studio.web.utils.globals as global_obj

@@ -48,18 +56,15 @@ def initialize():
    # existing temporary images there if they exist. Then we can import gradio.
    # It has to be in this order or gradio ignores what we've set up.

-    config_tmp()
-    # clear_tmp_mlir()
-    clear_tmp_imgs()
+    # config_tmp()
+    # clear_tmp_imgs()

    from apps.shark_studio.web.utils.file_utils import (
-        create_checkpoint_folders,
+        create_model_folders,
    )

    # Create custom models folders if they don't exist
-    create_checkpoint_folders()
-
-    import gradio as gr
+    create_model_folders()

    # initialize_rest(reload_script_modules=False)

--- a/apps/shark_studio/api/llm.py
+++ b/apps/shark_studio/api/llm.py
@@ -3,8 +3,13 @@ from turbine_models.model_runner import vmfbRunner
 from turbine_models.gen_external_params.gen_external_params import gen_external_params
 import time
 from shark.iree_utils.compile_utils import compile_module_to_flatbuffer
-from apps.shark_studio.web.utils.file_utils import get_resource_path
+from apps.shark_studio.web.utils.file_utils import (
+    get_resource_path,
+    get_checkpoints_path,
+)
 from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+from apps.shark_studio.api.utils import parse_device
+from urllib.request import urlopen
 import iree.runtime as ireert
 from itertools import chain
 import gc
@@ -13,7 +18,7 @@ import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM

 llm_model_map = {
-    "llama2_7b": {
+    "meta-llama/Llama-2-7b-chat-hf": {
        "initializer": stateless_llama.export_transformer_model,
        "hf_model_name": "meta-llama/Llama-2-7b-chat-hf",
        "compile_flags": ["--iree-opt-const-expr-hoisting=False"],
@@ -65,6 +70,7 @@ class LanguageModel:
        use_system_prompt=True,
        streaming_llm=False,
    ):
+        _, _, self.triple = parse_device(device)
        self.hf_model_name = llm_model_map[model_name]["hf_model_name"]
        self.device = device.split("=>")[-1].strip()
        self.backend = self.device.split("://")[0]
@@ -155,7 +161,9 @@ class LanguageModel:
                use_auth_token=hf_auth_token,
            )
        elif not os.path.exists(self.tempfile_name):
-            self.torch_ir, self.tokenizer = llm_model_map[model_name]["initializer"](
+            self.torch_ir, self.tokenizer = llm_model_map[self.hf_model_name][
+                "initializer"
+            ](
                self.hf_model_name,
                hf_auth_token,
                compile_to="torch",
@@ -163,6 +171,7 @@ class LanguageModel:
                precision=self.precision,
                quantization=self.quantization,
                streaming_llm=self.streaming_llm,
+                decomp_attn=True,
            )
            with open(self.tempfile_name, "w+") as f:
                f.write(self.torch_ir)
@@ -192,11 +201,27 @@ class LanguageModel:
            )
        elif self.backend == "vulkan":
            flags.extend(["--iree-stream-resource-max-allocation-size=4294967296"])
+        elif self.backend == "rocm":
+            flags.extend(
+                [
+                    "--iree-codegen-llvmgpu-enable-transform-dialect-jit=false",
+                    "--iree-llvmgpu-enable-prefetch=true",
+                    "--iree-opt-outer-dim-concat=true",
+                    "--iree-flow-enable-aggressive-fusion",
+                ]
+            )
+            if "gfx9" in self.triple:
+                flags.extend(
+                    [
+                        f"--iree-codegen-transform-dialect-library={get_mfma_spec_path(self.triple, get_checkpoints_path())}",
+                        "--iree-codegen-llvmgpu-use-vector-distribution=true",
+                    ]
+                )
        flags.extend(llm_model_map[self.hf_model_name]["compile_flags"])
        flatbuffer_blob = compile_module_to_flatbuffer(
            self.tempfile_name,
            device=self.device,
-            frontend="torch",
+            frontend="auto",
            model_config_path=None,
            extra_args=flags,
            write_to=self.vmfb_name,
@@ -258,7 +283,7 @@ class LanguageModel:

            history.append(format_out(token))
            while (
-                format_out(token) != llm_model_map["llama2_7b"]["stop_token"]
+                format_out(token) != llm_model_map[self.hf_model_name]["stop_token"]
                and len(history) < self.max_tokens
            ):
                dec_time = time.time()
@@ -272,7 +297,7 @@ class LanguageModel:

            self.prev_token_len = token_len + len(history)

-            if format_out(token) == llm_model_map["llama2_7b"]["stop_token"]:
+            if format_out(token) == llm_model_map[self.hf_model_name]["stop_token"]:
                break

        for i in range(len(history)):
@@ -306,7 +331,7 @@ class LanguageModel:
                self.first_input = False

            history.append(int(token))
-            while token != llm_model_map["llama2_7b"]["stop_token"]:
+            while token != llm_model_map[self.hf_model_name]["stop_token"]:
                dec_time = time.time()
                result = self.hf_mod(token.reshape([1, 1]), past_key_values=pkv)
                history.append(int(token))
@@ -317,7 +342,7 @@ class LanguageModel:

            self.prev_token_len = token_len + len(history)

-            if token == llm_model_map["llama2_7b"]["stop_token"]:
+            if token == llm_model_map[self.hf_model_name]["stop_token"]:
                break
        for i in range(len(history)):
            if type(history[i]) != int:
@@ -327,6 +352,17 @@ class LanguageModel:
        return result_output, total_time


+def get_mfma_spec_path(target_chip, save_dir):
+    url = "https://raw.githubusercontent.com/iree-org/iree/main/build_tools/pkgci/external_test_suite/attention_and_matmul_spec.mlir"
+    attn_spec = urlopen(url).read().decode("utf-8")
+    spec_path = os.path.join(save_dir, "attention_and_matmul_spec_mfma.mlir")
+    if os.path.exists(spec_path):
+        return spec_path
+    with open(spec_path, "w") as f:
+        f.write(attn_spec)
+    return spec_path
+
+
 def llm_chat_api(InputData: dict):
    from datetime import datetime as dt

@@ -347,7 +383,11 @@ def llm_chat_api(InputData: dict):
    else:
        print(f"prompt : {InputData['prompt']}")

-    model_name = InputData["model"] if "model" in InputData.keys() else "llama2_7b"
+    model_name = (
+        InputData["model"]
+        if "model" in InputData.keys()
+        else "meta-llama/Llama-2-7b-chat-hf"
+    )
    model_path = llm_model_map[model_name]
    device = InputData["device"] if "device" in InputData.keys() else "cpu"
    precision = "fp16"
--- a/apps/shark_studio/api/sd.py
+++ b/apps/shark_studio/api/sd.py
@@ -1,54 +1,75 @@
 import gc
 import torch
+import gradio as gr
 import time
 import os
 import json
 import numpy as np
+import copy
+import importlib.util
+import sys
 from tqdm.auto import tqdm

 from pathlib import Path
 from random import randint
-from turbine_models.custom_models.sd_inference import clip, unet, vae
+
+
 from apps.shark_studio.api.controlnet import control_adapter_map
+from apps.shark_studio.api.utils import parse_device
 from apps.shark_studio.web.utils.state import status_label
 from apps.shark_studio.web.utils.file_utils import (
    safe_name,
    get_resource_path,
    get_checkpoints_path,
 )
-from apps.shark_studio.modules.pipeline import SharkPipelineBase
-from apps.shark_studio.modules.schedulers import get_schedulers
-from apps.shark_studio.modules.prompt_encoding import (
-    get_weighted_text_embeddings,
-)
+
 from apps.shark_studio.modules.img_processing import (
-    resize_stencil,
    save_output_img,
-    resamplers,
-    resampler_list,
 )

-from apps.shark_studio.modules.ckpt_processing import (
-    preprocessCKPT,
-    process_custom_pipe_weights,
-)
-from transformers import CLIPTokenizer
-from diffusers.image_processor import VaeImageProcessor

-sd_model_map = {
-    "clip": {
-        "initializer": clip.export_clip_model,
-    },
-    "unet": {
-        "initializer": unet.export_unet_model,
-    },
-    "vae_decode": {
-        "initializer": vae.export_vae_model,
-    },
+from subprocess import check_output
+
+EMPTY_SD_MAP = {
+    "clip": None,
+    "scheduler": None,
+    "unet": None,
+    "vae_decode": None,
+}
+
+EMPTY_SDXL_MAP = {
+    "prompt_encoder": None,
+    "scheduled_unet": None,
+    "vae_decode": None,
+    "pipeline": None,
+    "full_pipeline": None,
+}
+
+EMPTY_FLAGS = {
+    "clip": None,
+    "unet": None,
+    "vae": None,
+    "pipeline": None,
 }


-class StableDiffusion(SharkPipelineBase):
+def load_script(source, module_name):
+    """
+    reads file source and loads it as a module
+
+    :param source: file to load
+    :param module_name: name of module to register in sys.modules
+    :return: loaded module
+    """
+    spec = importlib.util.spec_from_file_location(module_name, source)
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)
+
+    return module
+
+
+class StableDiffusion:
    # This class is responsible for executing image generation and creating
    # /managing a set of compiled modules to run Stable Diffusion. The init
    # aims to be as general as possible, and the class will infer and compile
@@ -61,66 +82,59 @@ class StableDiffusion(SharkPipelineBase):
        height: int,
        width: int,
        batch_size: int,
+        steps: int,
+        scheduler: str,
        precision: str,
        device: str,
+        target_triple: str = None,
        custom_vae: str = None,
        num_loras: int = 0,
        import_ir: bool = True,
        is_controlled: bool = False,
-        hf_auth_token=None,
+        external_weights: str = "safetensors",
+        progress=gr.Progress(),
    ):
-        self.model_max_length = 77
-        self.batch_size = batch_size
+        progress(0, desc="Initializing pipeline...")
+        self.ui_device = device
        self.precision = precision
-        self.dtype = torch.float16 if precision == "fp16" else torch.float32
-        self.height = height
-        self.width = width
-        self.scheduler_obj = {}
-        static_kwargs = {
-            "pipe": {
-                "external_weights": "safetensors",
-            },
-            "clip": {"hf_model_name": base_model_id},
-            "unet": {
-                "hf_model_name": base_model_id,
-                "unet_model": unet.UnetModel(hf_model_name=base_model_id),
-                "batch_size": batch_size,
-                # "is_controlled": is_controlled,
-                # "num_loras": num_loras,
-                "height": height,
-                "width": width,
-                "precision": precision,
-                "max_length": self.model_max_length,
-            },
-            "vae_encode": {
-                "hf_model_name": base_model_id,
-                "vae_model": vae.VaeModel(
-                    hf_model_name=custom_vae if custom_vae else base_model_id,
-                ),
-                "batch_size": batch_size,
-                "height": height,
-                "width": width,
-                "precision": precision,
-            },
-            "vae_decode": {
-                "hf_model_name": base_model_id,
-                "vae_model": vae.VaeModel(
-                    hf_model_name=custom_vae if custom_vae else base_model_id,
-                ),
-                "batch_size": batch_size,
-                "height": height,
-                "width": width,
-                "precision": precision,
-            },
-        }
-        super().__init__(sd_model_map, base_model_id, static_kwargs, device, import_ir)
+        self.compiled_pipeline = False
+        self.base_model_id = base_model_id
+        self.custom_vae = custom_vae
+        self.is_sdxl = "xl" in self.base_model_id.lower()
+        self.is_custom = ".py" in self.base_model_id.lower()
+        if self.is_custom:
+            custom_module = load_script(
+                os.path.join(get_checkpoints_path("scripts"), self.base_model_id),
+                "custom_pipeline",
+            )
+            self.turbine_pipe = custom_module.StudioPipeline
+            self.dynamic_steps = False
+            self.model_map = custom_module.MODEL_MAP
+        elif self.is_sdxl:
+            from turbine_models.custom_models.sdxl_inference.sdxl_compiled_pipeline import (
+                SharkSDXLPipeline,
+            )
+
+            self.turbine_pipe = SharkSDXLPipeline
+            self.dynamic_steps = False
+            self.model_map = EMPTY_SDXL_MAP
+        else:
+            from turbine_models.custom_models.sd_inference.sd_pipeline import (
+                SharkSDPipeline,
+            )
+
+            self.turbine_pipe = SharkSDPipeline
+            self.dynamic_steps = True
+            self.model_map = EMPTY_SD_MAP
+        max_length = 64
+        target_backend, self.rt_device, triple = parse_device(device, target_triple)
        pipe_id_list = [
            safe_name(base_model_id),
            str(batch_size),
-            str(self.model_max_length),
+            str(max_length),
            f"{str(height)}x{str(width)}",
            precision,
-            self.device,
+            triple,
        ]
        if num_loras > 0:
            pipe_id_list.append(str(num_loras) + "lora")
@@ -129,317 +143,183 @@ class StableDiffusion(SharkPipelineBase):
        if custom_vae:
            pipe_id_list.append(custom_vae)
        self.pipe_id = "_".join(pipe_id_list)
-        print(f"\n[LOG] Pipeline initialized with pipe_id: {self.pipe_id}.")
-        del static_kwargs
-        gc.collect()
-
-    def prepare_pipe(self, custom_weights, adapters, embeddings, is_img2img):
-        print(f"\n[LOG] Preparing pipeline...")
-        self.is_img2img = is_img2img
-        self.schedulers = get_schedulers(self.base_model_id)
-
-        self.weights_path = os.path.join(
-            get_checkpoints_path(), self.safe_name(self.base_model_id)
+        self.pipeline_dir = Path(os.path.join(get_checkpoints_path(), self.pipe_id))
+        self.weights_path = Path(
+            os.path.join(
+                get_checkpoints_path(), safe_name(self.base_model_id + "_" + precision)
+            )
        )
        if not os.path.exists(self.weights_path):
            os.mkdir(self.weights_path)

-        for model in adapters:
-            self.model_map[model] = adapters[model]
+        decomp_attn = True
+        attn_spec = None
+        if triple in ["gfx940", "gfx942", "gfx90a"]:
+            decomp_attn = False
+            attn_spec = "mfma"
+        elif triple in ["gfx1100", "gfx1103", "gfx1150"]:
+            decomp_attn = False
+            attn_spec = "wmma"
+            if triple in ["gfx1103", "gfx1150"]:
+                # external weights have issues on igpu
+                external_weights = None
+        elif target_backend == "llvm-cpu":
+            decomp_attn = False
+        progress(0.5, desc="Initializing pipeline...")
+        self.sd_pipe = self.turbine_pipe(
+            hf_model_name=base_model_id,
+            scheduler_id=scheduler,
+            height=height,
+            width=width,
+            precision=precision,
+            max_length=max_length,
+            batch_size=batch_size,
+            num_inference_steps=steps,
+            device=target_backend,
+            iree_target_triple=triple,
+            ireec_flags=EMPTY_FLAGS,
+            attn_spec=attn_spec,
+            decomp_attn=decomp_attn,
+            pipeline_dir=self.pipeline_dir,
+            external_weights_dir=self.weights_path,
+            external_weights=external_weights,
+            custom_vae=custom_vae,
+        )
+        progress(1, desc="Pipeline initialized!...")
+        gc.collect()

-        for submodel in self.static_kwargs:
-            if custom_weights:
-                custom_weights_params, _ = process_custom_pipe_weights(custom_weights)
-                if submodel not in ["clip", "clip2"]:
-                    self.static_kwargs[submodel][
-                        "external_weights"
-                    ] = custom_weights_params
-                else:
-                    self.static_kwargs[submodel]["external_weight_path"] = os.path.join(
-                        self.weights_path, submodel + ".safetensors"
+    def prepare_pipe(
+        self,
+        custom_weights,
+        adapters,
+        embeddings,
+        is_img2img,
+        compiled_pipeline,
+        progress=gr.Progress(),
+    ):
+        progress(0, desc="Preparing models...")
+
+        self.is_img2img = False
+        mlirs = copy.deepcopy(self.model_map)
+        vmfbs = copy.deepcopy(self.model_map)
+        weights = copy.deepcopy(self.model_map)
+        if not self.is_sdxl:
+            compiled_pipeline = False
+        self.compiled_pipeline = compiled_pipeline
+
+        if custom_weights:
+            from apps.shark_studio.modules.ckpt_processing import (
+                preprocessCKPT,
+                save_irpa,
+            )
+
+            custom_weights = os.path.join(
+                get_checkpoints_path("checkpoints"),
+                safe_name(self.base_model_id.split("/")[-1]),
+                custom_weights,
+            )
+            diffusers_weights_path = preprocessCKPT(custom_weights, self.precision)
+            for key in weights:
+                if key in ["scheduled_unet", "unet"]:
+                    unet_weights_path = os.path.join(
+                        diffusers_weights_path,
+                        "unet",
+                        "diffusion_pytorch_model.safetensors",
                    )
-            else:
-                self.static_kwargs[submodel]["external_weight_path"] = os.path.join(
-                    self.weights_path, submodel + ".safetensors"
-                )
+                    weights[key] = save_irpa(unet_weights_path, "unet.")
+                if key in ["mmdit"]:
+                    mmdit_weights_path = os.path.join(
+                        diffusers_weights_path,
+                        "mmdit",
+                        "diffusion_pytorch_model_fp16.safetensors",
+                    )
+                    weights[key] = save_irpa(mmdit_weights_path, "mmdit.")
+                elif key in ["clip", "prompt_encoder", "text_encoder"]:
+                    if not self.is_sdxl and not self.is_custom:
+                        sd1_path = os.path.join(
+                            diffusers_weights_path, "text_encoder", "model.safetensors"
+                        )
+                        weights[key] = save_irpa(sd1_path, "text_encoder_model.")
+                    elif self.is_sdxl:
+                        clip_1_path = os.path.join(
+                            diffusers_weights_path, "text_encoder", "model.safetensors"
+                        )
+                        clip_2_path = os.path.join(
+                            diffusers_weights_path,
+                            "text_encoder_2",
+                            "model.safetensors",
+                        )
+                        weights[key] = [
+                            save_irpa(clip_1_path, "text_encoder_model_1."),
+                            save_irpa(clip_2_path, "text_encoder_model_2."),
+                        ]
+                    elif self.is_custom:
+                        clip_g_path = os.path.join(
+                            diffusers_weights_path,
+                            "text_encoder",
+                            "model.fp16.safetensors",
+                        )
+                        clip_l_path = os.path.join(
+                            diffusers_weights_path,
+                            "text_encoder_2",
+                            "model.fp16.safetensors",
+                        )
+                        t5xxl_path = os.path.join(
+                            diffusers_weights_path,
+                            "text_encoder_3",
+                            "model.fp16.safetensors",
+                        )
+                        weights[key] = [
+                            save_irpa(clip_g_path, "clip_g.transformer."),
+                            save_irpa(clip_l_path, "clip_l.transformer."),
+                            save_irpa(t5xxl_path, "t5xxl.transformer."),
+                        ]
+                elif key in ["vae_decode"] and weights[key] is None:
+                    vae_weights_path = os.path.join(
+                        diffusers_weights_path,
+                        "vae",
+                        "diffusion_pytorch_model.safetensors",
+                    )
+                    weights[key] = save_irpa(vae_weights_path, "vae.")

-        self.get_compiled_map(pipe_id=self.pipe_id)
-        print("\n[LOG] Pipeline successfully prepared for runtime.")
+        progress(0.25, desc=f"Preparing pipeline for {self.ui_device}...")
+
+        vmfbs, weights = self.sd_pipe.check_prepared(
+            mlirs, vmfbs, weights, interactive=False
+        )
+        progress(0.5, desc=f"Artifacts ready!")
+        progress(0.75, desc=f"Loading models and weights...")
+
+        self.sd_pipe.load_pipeline(
+            vmfbs, weights, self.rt_device, self.compiled_pipeline
+        )
+        progress(1, desc="Pipeline loaded! Generating images...")
        return

-    def encode_prompts_weight(
-        self,
-        prompt,
-        negative_prompt,
-        do_classifier_free_guidance=True,
-    ):
-        # Encodes the prompt into text encoder hidden states.
-        self.load_submodels(["clip"])
-        self.tokenizer = CLIPTokenizer.from_pretrained(
-            self.base_model_id,
-            subfolder="tokenizer",
-        )
-        clip_inf_start = time.time()
-
-        text_embeddings, uncond_embeddings = get_weighted_text_embeddings(
-            pipe=self,
-            prompt=prompt,
-            uncond_prompt=negative_prompt if do_classifier_free_guidance else None,
-        )
-
-        if do_classifier_free_guidance:
-            text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
-
-        pad = (0, 0) * (len(text_embeddings.shape) - 2)
-        pad = pad + (
-            0,
-            self.static_kwargs["unet"]["max_length"] - text_embeddings.shape[1],
-        )
-        text_embeddings = torch.nn.functional.pad(text_embeddings, pad)
-
-        # SHARK: Report clip inference time
-        clip_inf_time = (time.time() - clip_inf_start) * 1000
-        if self.ondemand:
-            self.unload_submodels(["clip"])
-            gc.collect()
-        print(f"\n[LOG] Clip Inference time (ms) = {clip_inf_time:.3f}")
-
-        return text_embeddings.numpy().astype(np.float16)
-
-    def prepare_latents(
-        self,
-        generator,
-        num_inference_steps,
-        image,
-        strength,
-    ):
-        noise = torch.randn(
-            (
-                self.batch_size,
-                4,
-                self.height // 8,
-                self.width // 8,
-            ),
-            generator=generator,
-            dtype=self.dtype,
-        ).to("cpu")
-
-        self.scheduler.set_timesteps(num_inference_steps)
-        if self.is_img2img:
-            init_timestep = min(
-                int(num_inference_steps * strength), num_inference_steps
-            )
-            t_start = max(num_inference_steps - init_timestep, 0)
-            timesteps = self.scheduler.timesteps[t_start:]
-            latents = self.encode_image(image)
-            latents = self.scheduler.add_noise(latents, noise, timesteps[0].repeat(1))
-            return latents, [timesteps]
-        else:
-            self.scheduler.is_scale_input_called = True
-            latents = noise * self.scheduler.init_noise_sigma
-            return latents, self.scheduler.timesteps
-
-    def encode_image(self, input_image):
-        self.load_submodels(["vae_encode"])
-        vae_encode_start = time.time()
-        latents = self.run("vae_encode", input_image)
-        vae_inf_time = (time.time() - vae_encode_start) * 1000
-        if self.ondemand:
-            self.unload_submodels(["vae_encode"])
-        print(f"\n[LOG] VAE Encode Inference time (ms): {vae_inf_time:.3f}")
-
-        return latents
-
-    def produce_img_latents(
-        self,
-        latents,
-        text_embeddings,
-        guidance_scale,
-        total_timesteps,
-        cpu_scheduling,
-        mask=None,
-        masked_image_latents=None,
-        return_all_latents=False,
-    ):
-        # self.status = SD_STATE_IDLE
-        step_time_sum = 0
-        latent_history = [latents]
-        text_embeddings = torch.from_numpy(text_embeddings).to(self.dtype)
-        text_embeddings_numpy = text_embeddings.detach().numpy()
-        guidance_scale = torch.Tensor([guidance_scale]).to(self.dtype)
-        self.load_submodels(["unet"])
-        for i, t in tqdm(enumerate(total_timesteps)):
-            step_start_time = time.time()
-            timestep = torch.tensor([t]).to(self.dtype).detach().numpy()
-            latent_model_input = self.scheduler.scale_model_input(latents, t).to(
-                self.dtype
-            )
-            if mask is not None and masked_image_latents is not None:
-                latent_model_input = torch.cat(
-                    [
-                        torch.from_numpy(np.asarray(latent_model_input)).to(self.dtype),
-                        mask,
-                        masked_image_latents,
-                    ],
-                    dim=1,
-                ).to(self.dtype)
-            if cpu_scheduling:
-                latent_model_input = latent_model_input.detach().numpy()
-
-            # Profiling Unet.
-            # profile_device = start_profiling(file_path="unet.rdc")
-            noise_pred = self.run(
-                "unet",
-                [
-                    latent_model_input,
-                    timestep,
-                    text_embeddings_numpy,
-                    guidance_scale,
-                ],
-            )
-            # end_profiling(profile_device)
-
-            if cpu_scheduling:
-                noise_pred = torch.from_numpy(noise_pred.to_host())
-                latents = self.scheduler.step(noise_pred, t, latents).prev_sample
-            else:
-                latents = self.run("scheduler_step", (noise_pred, t, latents))
-
-            latent_history.append(latents)
-            step_time = (time.time() - step_start_time) * 1000
-            # print(
-            #     f"\n [LOG] step = {i} | timestep = {t} | time = {step_time:.2f}ms"
-            # )
-            step_time_sum += step_time
-
-            # if self.status == SD_STATE_CANCEL:
-            #    break
-
-        if self.ondemand:
-            self.unload_submodels(["unet"])
-            gc.collect()
-
-        avg_step_time = step_time_sum / len(total_timesteps)
-        print(f"\n[LOG] Average step time: {avg_step_time}ms/it")
-
-        if not return_all_latents:
-            return latents
-        all_latents = torch.cat(latent_history, dim=0)
-        return all_latents
-
-    def decode_latents(self, latents, cpu_scheduling=True):
-        latents_numpy = latents.to(self.dtype)
-        if cpu_scheduling:
-            latents_numpy = latents.detach().numpy()
-
-        # profile_device = start_profiling(file_path="vae.rdc")
-        vae_start = time.time()
-        images = self.run("vae_decode", latents_numpy).to_host()
-        vae_inf_time = (time.time() - vae_start) * 1000
-        # end_profiling(profile_device)
-        print(f"\n[LOG] VAE Inference time (ms): {vae_inf_time:.3f}")
-
-        images = torch.from_numpy(images).permute(0, 2, 3, 1).float().numpy()
-        pil_images = self.image_processor.numpy_to_pil(images)
-        return pil_images
-
    def generate_images(
        self,
        prompt,
        negative_prompt,
        image,
-        scheduler,
-        steps,
        strength,
        guidance_scale,
        seed,
        ondemand,
-        repeatable_seeds,
        resample_type,
        control_mode,
        hints,
+        progress=gr.Progress(),
    ):
-        # TODO: Batched args
-        self.image_processor = VaeImageProcessor(do_convert_rgb=True)
-        self.scheduler = self.schedulers[scheduler]
-        self.ondemand = ondemand
-        if self.is_img2img:
-            image, _ = self.image_processor.preprocess(image, resample_type)
-        else:
-            image = None

-        print("\n[LOG] Generating images...")
-        batched_args = [
-            prompt,
-            negative_prompt,
-            image,
-        ]
-        for arg in batched_args:
-            if not isinstance(arg, list):
-                arg = [arg] * self.batch_size
-            if len(arg) < self.batch_size:
-                arg = arg * self.batch_size
-            else:
-                arg = [arg[i] for i in range(self.batch_size)]
-
-        text_embeddings = self.encode_prompts_weight(
+        img = self.sd_pipe.generate_images(
            prompt,
            negative_prompt,
+            1,
+            guidance_scale,
+            seed,
+            return_imgs=True,
        )
-
-        uint32_info = np.iinfo(np.uint32)
-        uint32_min, uint32_max = uint32_info.min, uint32_info.max
-        if seed < uint32_min or seed >= uint32_max:
-            seed = randint(uint32_min, uint32_max)
-
-        generator = torch.manual_seed(seed)
-
-        init_latents, final_timesteps = self.prepare_latents(
-            generator=generator,
-            num_inference_steps=steps,
-            image=image,
-            strength=strength,
-        )
-
-        latents = self.produce_img_latents(
-            latents=init_latents,
-            text_embeddings=text_embeddings,
-            guidance_scale=guidance_scale,
-            total_timesteps=final_timesteps,
-            cpu_scheduling=True,  # until we have schedulers through Turbine
-        )
-
-        # Img latents -> PIL images
-        all_imgs = []
-        self.load_submodels(["vae_decode"])
-        for i in tqdm(range(0, latents.shape[0], self.batch_size)):
-            imgs = self.decode_latents(
-                latents=latents[i : i + self.batch_size],
-                cpu_scheduling=True,
-            )
-            all_imgs.extend(imgs)
-        if self.ondemand:
-            self.unload_submodels(["vae_decode"])
-
-        return all_imgs
-
-
-def shark_sd_fn_dict_input(
-    sd_kwargs: dict,
-):
-    print("[LOG] Submitting Request...")
-
-    for key in sd_kwargs:
-        if sd_kwargs[key] in [None, []]:
-            sd_kwargs[key] = None
-        if sd_kwargs[key] in ["None"]:
-            sd_kwargs[key] = ""
-        if key == "seed":
-            sd_kwargs[key] = int(sd_kwargs[key])
-
-    for i in range(1):
-        generated_imgs = yield from shark_sd_fn(**sd_kwargs)
-        yield generated_imgs
+        return img


 def shark_sd_fn(
@@ -460,19 +340,21 @@ def shark_sd_fn(
    custom_vae: str,
    precision: str,
    device: str,
+    target_triple: str,
    ondemand: bool,
-    repeatable_seeds: bool,
+    compiled_pipeline: bool,
    resample_type: str,
    controlnets: dict,
    embeddings: dict,
+    seed_increment: str | int = 1,
+    output_type: str = "png",
+    # progress=gr.Progress(),
 ):
    sd_kwargs = locals()
    if not isinstance(sd_init_image, list):
        sd_init_image = [sd_init_image]
    is_img2img = True if sd_init_image[0] is not None else False

-    print("\n[LOG] Performing Stable Diffusion Pipeline setup...")
-
    from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
    import apps.shark_studio.web.utils.globals as global_obj

@@ -481,6 +363,7 @@ def shark_sd_fn(
    control_mode = None
    hints = []
    num_loras = 0
+    import_ir = True
    for i in embeddings:
        num_loras += 1 if embeddings[i] else 0
    if "model" in controlnets:
@@ -512,32 +395,36 @@ def shark_sd_fn(
        "batch_size": batch_size,
        "precision": precision,
        "device": device,
+        "target_triple": target_triple,
        "custom_vae": custom_vae,
        "num_loras": num_loras,
-        "import_ir": cmd_opts.import_mlir,
+        "import_ir": import_ir,
        "is_controlled": is_controlled,
+        "steps": steps,
+        "scheduler": scheduler,
    }
    submit_prep_kwargs = {
        "custom_weights": custom_weights,
        "adapters": adapters,
        "embeddings": embeddings,
        "is_img2img": is_img2img,
+        "compiled_pipeline": compiled_pipeline,
    }
    submit_run_kwargs = {
        "prompt": prompt,
        "negative_prompt": negative_prompt,
        "image": sd_init_image,
-        "steps": steps,
-        "scheduler": scheduler,
        "strength": strength,
        "guidance_scale": guidance_scale,
        "seed": seed,
        "ondemand": ondemand,
-        "repeatable_seeds": repeatable_seeds,
        "resample_type": resample_type,
        "control_mode": control_mode,
        "hints": hints,
    }
+    if global_obj.get_sd_obj() and global_obj.get_sd_obj().dynamic_steps:
+        submit_run_kwargs["steps"] = submit_pipe_kwargs["steps"]
+        submit_pipe_kwargs.pop("steps")
    if (
        not global_obj.get_sd_obj()
        or global_obj.get_pipe_kwargs() != submit_pipe_kwargs
@@ -563,25 +450,100 @@ def shark_sd_fn(
        global_obj.get_sd_obj().prepare_pipe(**submit_prep_kwargs)

    generated_imgs = []
+    if submit_run_kwargs["seed"] in [-1, "-1"]:
+        submit_run_kwargs["seed"] = randint(0, 4294967295)
+        seed_increment = "random"
+        # print(f"\n[LOG] Random seed: {seed}")
+    # progress(None, desc=f"Generating...")
+
    for current_batch in range(batch_count):
        start_time = time.time()
        out_imgs = global_obj.get_sd_obj().generate_images(**submit_run_kwargs)
-        total_time = time.time() - start_time
-        text_output = f"Total image(s) generation time: {total_time:.4f}sec"
-        print(f"\n[LOG] {text_output}")
+        if not isinstance(out_imgs, list):
+            out_imgs = [out_imgs]
+        # total_time = time.time() - start_time
+        # text_output = f"Total image(s) generation time: {total_time:.4f}sec"
+        # print(f"\n[LOG] {text_output}")
        # if global_obj.get_sd_status() == SD_STATE_CANCEL:
        #     break
        # else:
-        save_output_img(
-            out_imgs[current_batch],
-            seed,
-            sd_kwargs,
-        )
+        for batch in range(batch_size):
+            if output_type == "png":
+                save_output_img(
+                    out_imgs[batch],
+                    seed,
+                    sd_kwargs,
+                )
        generated_imgs.extend(out_imgs)
+
        yield generated_imgs, status_label(
            "Stable Diffusion", current_batch + 1, batch_count, batch_size
        )
-    return generated_imgs, ""
+        if batch_count > 1:
+            submit_run_kwargs["seed"] = get_next_seed(seed, seed_increment)
+
+    return (generated_imgs, "")
+
+
+def shark_sd_fn_dict_input(sd_kwargs: dict, *, progress=gr.Progress()):
+    print("\n[LOG] Submitting Request...")
+
+    for key in sd_kwargs:
+        if sd_kwargs[key] in [None, []]:
+            sd_kwargs[key] = None
+        if sd_kwargs[key] in ["None"]:
+            sd_kwargs[key] = ""
+        if key in ["steps", "height", "width", "batch_count", "batch_size"]:
+            sd_kwargs[key] = int(sd_kwargs[key])
+        if key == "seed":
+            sd_kwargs[key] = int(sd_kwargs[key])
+
+    # TODO: move these checks into the UI code so we don't have gradio warnings in a generalized dict input function.
+    if not sd_kwargs["device"]:
+        gr.Warning("No device specified. Please specify a device.")
+        return None, ""
+    if sd_kwargs["height"] not in [512, 1024]:
+        gr.Warning("Height must be 512 or 1024. This is a temporary limitation.")
+        return None, ""
+    if sd_kwargs["height"] != sd_kwargs["width"]:
+        gr.Warning("Height and width must be the same. This is a temporary limitation.")
+        return None, ""
+    if sd_kwargs["base_model_id"] == "stabilityai/sdxl-turbo":
+        if sd_kwargs["steps"] > 10:
+            gr.Warning("Max steps for sdxl-turbo is 10. 1 to 4 steps are recommended.")
+            return None, ""
+        if sd_kwargs["guidance_scale"] > 3:
+            gr.Warning(
+                "sdxl-turbo CFG scale should be less than 2.0 if using negative prompt, 0 otherwise."
+            )
+            return None, ""
+    if sd_kwargs["target_triple"] == "":
+        if not parse_device(sd_kwargs["device"], sd_kwargs["target_triple"])[2]:
+            gr.Warning(
+                "Target device architecture could not be inferred. Please specify a target triple, e.g. 'gfx1100' for a Radeon 7900xtx."
+            )
+            return None, ""
+
+    generated_imgs = yield from shark_sd_fn(**sd_kwargs)
+    return generated_imgs
+
+
+def get_next_seed(seed, seed_increment: str | int = 10):
+    if isinstance(seed_increment, int):
+        # print(f"\n[LOG] Seed after batch increment: {seed + seed_increment}")
+        return int(seed + seed_increment)
+    elif seed_increment == "random":
+        seed = randint(0, 4294967295)
+        # print(f"\n[LOG] Random seed: {seed}")
+        return seed
+
+
+def unload_sd():
+    print("Unloading models.")
+    import apps.shark_studio.web.utils.globals as global_obj
+
+    global_obj.clear_cache()
+    gc.collect()


 def cancel_sd():
@@ -596,16 +558,22 @@ def view_json_file(file_path):
    return content


+def safe_name(name):
+    return name.replace("/", "_").replace("\\", "_").replace(".", "_")
+
+
 if __name__ == "__main__":
    from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
    import apps.shark_studio.web.utils.globals as global_obj

    global_obj._init()

-    sd_json = view_json_file(get_resource_path("../configs/default_sd_config.json"))
+    sd_json = view_json_file(
+        get_resource_path(os.path.join(cmd_opts.config_dir, cmd_opts.default_config))
+    )
    sd_kwargs = json.loads(sd_json)
-    for arg in vars(cmd_opts):
-        if arg in sd_kwargs:
-            sd_kwargs[arg] = getattr(cmd_opts, arg)
+    # for arg in vars(cmd_opts):
+    #     if arg in sd_kwargs:
+    #         sd_kwargs[arg] = getattr(cmd_opts, arg)
    for i in shark_sd_fn_dict_input(sd_kwargs):
        print(i)
--- a/apps/shark_studio/api/utils.py
+++ b/apps/shark_studio/api/utils.py
@@ -11,17 +11,62 @@ from pathlib import Path
 from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
 from cpuinfo import get_cpu_info

-# TODO: migrate these utils to studio
-from shark.iree_utils.vulkan_utils import (
-    set_iree_vulkan_runtime_flags,
-    get_vulkan_target_triple,
-    get_iree_vulkan_runtime_flags,
-)
+
+def iree_device_map(device):
+    uri_parts = device.split("://", 2)
+    iree_driver = (
+        _IREE_DEVICE_MAP[uri_parts[0]]
+        if uri_parts[0] in _IREE_DEVICE_MAP
+        else uri_parts[0]
+    )
+    if len(uri_parts) == 1:
+        return iree_driver
+    elif "rocm" in uri_parts:
+        return "rocm"
+    else:
+        return f"{iree_driver}://{uri_parts[1]}"
+
+
+def get_supported_device_list():
+    return list(_IREE_DEVICE_MAP.keys())
+
+
+_IREE_DEVICE_MAP = {
+    "cpu": "local-task",
+    "cpu-task": "local-task",
+    "cpu-sync": "local-sync",
+    "cuda": "cuda",
+    "vulkan": "vulkan",
+    "metal": "metal",
+    "rocm": "rocm",
+    "hip": "hip",
+    "intel-gpu": "level_zero",
+}
+
+
+def iree_target_map(device):
+    if "://" in device:
+        device = device.split("://")[0]
+    return _IREE_TARGET_MAP[device] if device in _IREE_TARGET_MAP else device
+
+
+_IREE_TARGET_MAP = {
+    "cpu": "llvm-cpu",
+    "cpu-task": "llvm-cpu",
+    "cpu-sync": "llvm-cpu",
+    "cuda": "cuda",
+    "vulkan": "vulkan-spirv",
+    "metal": "metal",
+    "rocm": "rocm",
+    "hip": "rocm",
+    "intel-gpu": "opencl-spirv",
+}


 def get_available_devices():
+    return ["rocm", "cpu"]
+
    def get_devices_by_name(driver_name):
-        from shark.iree_utils._common import iree_device_map

        device_list = []
        try:
@@ -49,82 +94,119 @@ def get_available_devices():
                        device_list.append(f"{device_name} => {driver_name}://{i}")
        return device_list

-    set_iree_runtime_flags()
+    # set_iree_runtime_flags()

    available_devices = []
-    from shark.iree_utils.vulkan_utils import (
-        get_all_vulkan_devices,
-    )
-
-    vulkaninfo_list = get_all_vulkan_devices()
-    vulkan_devices = []
-    id = 0
-    for device in vulkaninfo_list:
-        vulkan_devices.append(f"{device.strip()} => vulkan://{id}")
-        id += 1
-    if id != 0:
-        print(f"vulkan devices are available.")
-    available_devices.extend(vulkan_devices)
-    metal_devices = get_devices_by_name("metal")
-    available_devices.extend(metal_devices)
-    cuda_devices = get_devices_by_name("cuda")
-    available_devices.extend(cuda_devices)
    rocm_devices = get_devices_by_name("rocm")
    available_devices.extend(rocm_devices)
-    cpu_device = get_devices_by_name("cpu-sync")
-    available_devices.extend(cpu_device)
+    # cpu_device = get_devices_by_name("cpu-sync")
+    # available_devices.extend(cpu_device)
    cpu_device = get_devices_by_name("cpu-task")
    available_devices.extend(cpu_device)
+
+    # from shark.iree_utils.vulkan_utils import (
+    #     get_all_vulkan_devices,
+    # )
+
+    # vulkaninfo_list = get_all_vulkan_devices()
+    # vulkan_devices = []
+    # id = 0
+    # for device in vulkaninfo_list:
+    #     vulkan_devices.append(f"{device.strip()} => vulkan://{id}")
+    #     id += 1
+    # if id != 0:
+    #     print(f"vulkan devices are available.")
+
+    # available_devices.extend(vulkan_devices)
+    # metal_devices = get_devices_by_name("metal")
+    # available_devices.extend(metal_devices)
+    # cuda_devices = get_devices_by_name("cuda")
+    # available_devices.extend(cuda_devices)
+    # hip_devices = get_devices_by_name("hip")
+    # available_devices.extend(hip_devices)
+
+    for idx, device_str in enumerate(available_devices):
+        if "AMD Radeon(TM) Graphics =>" in device_str:
+            igpu_id_candidates = [
+                x.split("w/")[-1].split("=>")[0]
+                for x in available_devices
+                if "M Graphics" in x
+            ]
+            for igpu_name in igpu_id_candidates:
+                if igpu_name:
+                    available_devices[idx] = device_str.replace(
+                        "AMD Radeon(TM) Graphics", igpu_name
+                    )
+                break
    return available_devices


-def set_init_device_flags():
-    if "vulkan" in cmd_opts.device:
-        # set runtime flags for vulkan.
-        set_iree_runtime_flags()
+def clean_device_info(raw_device):
+    # return appropriate device and device_id for consumption by Studio pipeline
+    # Multiple devices only supported for vulkan and rocm (as of now).
+    # default device must be selected for all others

-        # set triple flag to avoid multiple calls to get_vulkan_triple_flag
-        device_name, cmd_opts.device = map_device_to_name_path(cmd_opts.device)
-        if not cmd_opts.iree_vulkan_target_triple:
-            triple = get_vulkan_target_triple(device_name)
-            if triple is not None:
-                cmd_opts.iree_vulkan_target_triple = triple
-        print(
-            f"Found device {device_name}. Using target triple "
-            f"{cmd_opts.iree_vulkan_target_triple}."
-        )
-    elif "cuda" in cmd_opts.device:
-        cmd_opts.device = "cuda"
-    elif "metal" in cmd_opts.device:
-        device_name, cmd_opts.device = map_device_to_name_path(cmd_opts.device)
-        if not cmd_opts.iree_metal_target_platform:
-            from shark.iree_utils.metal_utils import get_metal_target_triple
+    device_id = None
+    device = raw_device if "=>" not in raw_device else raw_device.split("=>")[1].strip()
+    if "://" in device:
+        device, device_id = device.split("://")
+        if len(device_id) <= 2:
+            device_id = int(device_id)

-            triple = get_metal_target_triple(device_name)
-            if triple is not None:
-                cmd_opts.iree_metal_target_platform = triple.split("-")[-1]
-        print(
-            f"Found device {device_name}. Using target triple "
-            f"{cmd_opts.iree_metal_target_platform}."
-        )
-    elif "cpu" in cmd_opts.device:
-        cmd_opts.device = "cpu"
+    if device not in ["hip", "rocm", "vulkan"]:
+        device_id = None
+    if device in ["hip", "rocm", "vulkan"] and device_id == None:
+        device_id = 0
+    return device, device_id


-def set_iree_runtime_flags():
-    # TODO: This function should be device-agnostic and piped properly
-    # to general runtime driver init.
-    vulkan_runtime_flags = get_iree_vulkan_runtime_flags()
-    if cmd_opts.enable_rgp:
-        vulkan_runtime_flags += [
-            f"--enable_rgp=true",
-            f"--vulkan_debug_utils=true",
-        ]
-    if cmd_opts.device_allocator_heap_key:
-        vulkan_runtime_flags += [
-            f"--device_allocator=caching:device_local={cmd_opts.device_allocator_heap_key}",
-        ]
-    set_iree_vulkan_runtime_flags(flags=vulkan_runtime_flags)
+def parse_device(device_str, target_override=""):
+
+    rt_driver, device_id = clean_device_info(device_str)
+    target_backend = iree_target_map(rt_driver)
+    if device_id:
+        rt_device = f"{rt_driver}://{device_id}"
+    else:
+        rt_device = rt_driver
+
+    if target_override:
+        if "cpu" in device_str:
+            rt_device = "local-task"
+        return target_backend, rt_device, target_override
+    match target_backend:
+        case "vulkan-spirv":
+            triple = get_iree_target_triple(device_str)
+            return target_backend, rt_device, triple
+        case "rocm":
+            triple = get_rocm_target_chip(device_str)
+            return target_backend, rt_device, triple
+        case "llvm-cpu":
+            if "Ryzen 9" in device_str:
+                return target_backend, "local-task", "znver4"
+            else:
+                return "llvm-cpu", "local-task", "x86_64-linux-gnu"
+
+
+def get_rocm_target_chip(device_str):
+    # TODO: Use a data file to map device_str to target chip.
+    rocm_chip_map = {
+        "6700": "gfx1031",
+        "6800": "gfx1030",
+        "6900": "gfx1030",
+        "7900": "gfx1100",
+        "MI300X": "gfx942",
+        "MI300A": "gfx940",
+        "MI210": "gfx90a",
+        "MI250": "gfx90a",
+        "MI100": "gfx908",
+        "MI50": "gfx906",
+        "MI60": "gfx906",
+        "780M": "gfx1103",
+    }
+    for key in rocm_chip_map:
+        if key in device_str:
+            return rocm_chip_map[key]
+    return None


 def get_all_devices(driver_name):
@@ -138,183 +220,69 @@ def get_all_devices(driver_name):
    driver = get_driver(driver_name)
    device_list_src = driver.query_available_devices()
    device_list_src.sort(key=lambda d: d["path"])
+    del driver
    return device_list_src


-def get_device_mapping(driver, key_combination=3):
-    """This method ensures consistent device ordering when choosing
-    specific devices for execution
-    Args:
-        driver (str): execution driver (vulkan, cuda, rocm, etc)
-        key_combination (int, optional): choice for mapping value for
-            device name.
-        1 : path
-        2 : name
-        3 : (name, path)
-        Defaults to 3.
-    Returns:
-        dict: map to possible device names user can input mapped to desired
-            combination of name/path.
-    """
-    from shark.iree_utils._common import iree_device_map
+# def get_device_mapping(driver, key_combination=3):
+#     """This method ensures consistent device ordering when choosing
+#     specific devices for execution
+#     Args:
+#         driver (str): execution driver (vulkan, cuda, rocm, etc)
+#         key_combination (int, optional): choice for mapping value for
+#             device name.
+#         1 : path
+#         2 : name
+#         3 : (name, path)
+#         Defaults to 3.
+#     Returns:
+#         dict: map to possible device names user can input mapped to desired
+#             combination of name/path.
+#     """

-    driver = iree_device_map(driver)
-    device_list = get_all_devices(driver)
-    device_map = dict()
+#     driver = iree_device_map(driver)
+#     device_list = get_all_devices(driver)
+#     device_map = dict()

-    def get_output_value(dev_dict):
-        if key_combination == 1:
-            return f"{driver}://{dev_dict['path']}"
-        if key_combination == 2:
-            return dev_dict["name"]
-        if key_combination == 3:
-            return dev_dict["name"], f"{driver}://{dev_dict['path']}"
+#     def get_output_value(dev_dict):
+#         if key_combination == 1:
+#             return f"{driver}://{dev_dict['path']}"
+#         if key_combination == 2:
+#             return dev_dict["name"]
+#         if key_combination == 3:
+#             return dev_dict["name"], f"{driver}://{dev_dict['path']}"

-    # mapping driver name to default device (driver://0)
-    device_map[f"{driver}"] = get_output_value(device_list[0])
-    for i, device in enumerate(device_list):
-        # mapping with index
-        device_map[f"{driver}://{i}"] = get_output_value(device)
-        # mapping with full path
-        device_map[f"{driver}://{device['path']}"] = get_output_value(device)
-    return device_map
+#     # mapping driver name to default device (driver://0)
+#     device_map[f"{driver}"] = get_output_value(device_list[0])
+#     for i, device in enumerate(device_list):
+#         # mapping with index
+#         device_map[f"{driver}://{i}"] = get_output_value(device)
+#         # mapping with full path
+#         device_map[f"{driver}://{device['path']}"] = get_output_value(device)
+#     return device_map


-def get_opt_flags(model, precision="fp16"):
-    iree_flags = []
-    if len(cmd_opts.iree_vulkan_target_triple) > 0:
-        iree_flags.append(
-            f"-iree-vulkan-target-triple={cmd_opts.iree_vulkan_target_triple}"
-        )
-    if "rocm" in cmd_opts.device:
-        from shark.iree_utils.gpu_utils import get_iree_rocm_args
+# def get_opt_flags(model, precision="fp16"):
+#     iree_flags = []
+#     if len(cmd_opts.iree_vulkan_target_triple) > 0:
+#         iree_flags.append(
+#             f"-iree-vulkan-target-triple={cmd_opts.iree_vulkan_target_triple}"
+#         )
+#     if "rocm" in cmd_opts.device:
+#         from shark.iree_utils.gpu_utils import get_iree_rocm_args

-        rocm_args = get_iree_rocm_args()
-        iree_flags.extend(rocm_args)
-    if cmd_opts.iree_constant_folding == False:
-        iree_flags.append("--iree-opt-const-expr-hoisting=False")
-        iree_flags.append(
-            "--iree-codegen-linalg-max-constant-fold-elements=9223372036854775807"
-        )
-    if cmd_opts.data_tiling == False:
-        iree_flags.append("--iree-opt-data-tiling=False")
+#         rocm_args = get_iree_rocm_args()
+#         iree_flags.extend(rocm_args)
+#     if cmd_opts.iree_constant_folding == False:
+#         iree_flags.append("--iree-opt-const-expr-hoisting=False")
+#         iree_flags.append(
+#             "--iree-codegen-linalg-max-constant-fold-elements=9223372036854775807"
+#         )
+#     if cmd_opts.data_tiling == False:
+#         iree_flags.append("--iree-opt-data-tiling=False")

-    if "vae" not in model:
-        # Due to lack of support for multi-reduce, we always collapse reduction
-        # dims before dispatch formation right now.
-        iree_flags += ["--iree-flow-collapse-reduction-dims"]
-    return iree_flags
-
-
-def map_device_to_name_path(device, key_combination=3):
-    """Gives the appropriate device data (supported name/path) for user
-        selected execution device
-    Args:
-        device (str): user
-        key_combination (int, optional): choice for mapping value for
-            device name.
-        1 : path
-        2 : name
-        3 : (name, path)
-        Defaults to 3.
-    Raises:
-        ValueError:
-    Returns:
-        str / tuple: returns the mapping str or tuple of mapping str for
-        the device depending on key_combination value
-    """
-    driver = device.split("://")[0]
-    device_map = get_device_mapping(driver, key_combination)
-    try:
-        device_mapping = device_map[device]
-    except KeyError:
-        raise ValueError(f"Device '{device}' is not a valid device.")
-    return device_mapping
-
-    def get_devices_by_name(driver_name):
-        from shark.iree_utils._common import iree_device_map
-
-        device_list = []
-        try:
-            driver_name = iree_device_map(driver_name)
-            device_list_dict = get_all_devices(driver_name)
-            print(f"{driver_name} devices are available.")
-        except:
-            print(f"{driver_name} devices are not available.")
-        else:
-            cpu_name = get_cpu_info()["brand_raw"]
-            for i, device in enumerate(device_list_dict):
-                device_name = (
-                    cpu_name if device["name"] == "default" else device["name"]
-                )
-                if "local" in driver_name:
-                    device_list.append(
-                        f"{device_name} => {driver_name.replace('local', 'cpu')}"
-                    )
-                else:
-                    # for drivers with single devices
-                    # let the default device be selected without any indexing
-                    if len(device_list_dict) == 1:
-                        device_list.append(f"{device_name} => {driver_name}")
-                    else:
-                        device_list.append(f"{device_name} => {driver_name}://{i}")
-        return device_list
-
-    set_iree_runtime_flags()
-
-    available_devices = []
-    from shark.iree_utils.vulkan_utils import (
-        get_all_vulkan_devices,
-    )
-
-    vulkaninfo_list = get_all_vulkan_devices()
-    vulkan_devices = []
-    id = 0
-    for device in vulkaninfo_list:
-        vulkan_devices.append(f"{device.strip()} => vulkan://{id}")
-        id += 1
-    if id != 0:
-        print(f"vulkan devices are available.")
-    available_devices.extend(vulkan_devices)
-    metal_devices = get_devices_by_name("metal")
-    available_devices.extend(metal_devices)
-    cuda_devices = get_devices_by_name("cuda")
-    available_devices.extend(cuda_devices)
-    rocm_devices = get_devices_by_name("rocm")
-    available_devices.extend(rocm_devices)
-    cpu_device = get_devices_by_name("cpu-sync")
-    available_devices.extend(cpu_device)
-    cpu_device = get_devices_by_name("cpu-task")
-    available_devices.extend(cpu_device)
-    return available_devices
-
-
-# Generate and return a new seed if the provided one is not in the
-# supported range (including -1)
-def sanitize_seed(seed: int | str):
-    seed = int(seed)
-    uint32_info = np.iinfo(np.uint32)
-    uint32_min, uint32_max = uint32_info.min, uint32_info.max
-    if seed < uint32_min or seed >= uint32_max:
-        seed = randint(uint32_min, uint32_max)
-    return seed
-
-
-# take a seed expression in an input format and convert it to
-# a list of integers, where possible
-def parse_seed_input(seed_input: str | list | int):
-    if isinstance(seed_input, str):
-        try:
-            seed_input = json.loads(seed_input)
-        except (ValueError, TypeError):
-            seed_input = None
-
-    if isinstance(seed_input, int):
-        return [seed_input]
-
-    if isinstance(seed_input, list) and all(type(seed) is int for seed in seed_input):
-        return seed_input
-
-    raise TypeError(
-        "Seed input must be an integer or an array of integers in JSON format"
-    )
+#     if "vae" not in model:
+#         # Due to lack of support for multi-reduce, we always collapse reduction
+#         # dims before dispatch formation right now.
+#         iree_flags += ["--iree-flow-collapse-reduction-dims"]
+#     return iree_flags
--- a/apps/shark_studio/modules/ckpt_processing.py
+++ b/apps/shark_studio/modules/ckpt_processing.py
@@ -2,10 +2,16 @@ import os
 import json
 import re
 import requests
+import torch
+import safetensors
+from shark_turbine.aot.params import (
+    ParameterArchiveBuilder,
+)
 from io import BytesIO
 from pathlib import Path
 from tqdm import tqdm
 from omegaconf import OmegaConf
+from diffusers import StableDiffusionPipeline
 from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
 from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
    download_from_original_stable_diffusion_ckpt,
@@ -14,21 +20,21 @@ from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
 )


-def get_path_to_diffusers_checkpoint(custom_weights):
+def get_path_to_diffusers_checkpoint(custom_weights, precision="fp16"):
    path = Path(custom_weights)
    diffusers_path = path.parent.absolute()
-    diffusers_directory_name = os.path.join("diffusers", path.stem)
+    diffusers_directory_name = os.path.join("diffusers", path.stem + f"_{precision}")
    complete_path_to_diffusers = diffusers_path / diffusers_directory_name
    complete_path_to_diffusers.mkdir(parents=True, exist_ok=True)
    path_to_diffusers = complete_path_to_diffusers.as_posix()
    return path_to_diffusers


-def preprocessCKPT(custom_weights, is_inpaint=False):
-    path_to_diffusers = get_path_to_diffusers_checkpoint(custom_weights)
+def preprocessCKPT(custom_weights, precision="fp16", is_inpaint=False):
+    path_to_diffusers = get_path_to_diffusers_checkpoint(custom_weights, precision)
    if next(Path(path_to_diffusers).iterdir(), None):
        print("Checkpoint already loaded at : ", path_to_diffusers)
-        return
+        return path_to_diffusers
    else:
        print(
            "Diffusers' checkpoint will be identified here : ",
@@ -50,8 +56,31 @@ def preprocessCKPT(custom_weights, is_inpaint=False):
        from_safetensors=from_safetensors,
        num_in_channels=num_in_channels,
    )
+    if precision == "fp16":
+        pipe.to(dtype=torch.float16)
    pipe.save_pretrained(path_to_diffusers)
+    del pipe
    print("Loading complete")
+    return path_to_diffusers
+
+
+def save_irpa(weights_path, prepend_str):
+    weights = safetensors.torch.load_file(weights_path)
+    archive = ParameterArchiveBuilder()
+    for key in weights.keys():
+        new_key = prepend_str + key
+        archive.add_tensor(new_key, weights[key])
+
+    if "safetensors" in weights_path:
+        irpa_file = weights_path.replace(".safetensors", ".irpa")
+    elif "irpa" in weights_path:
+        irpa_file = weights_path
+    else:
+        return Exception(
+            "Invalid file format. Please provide a .safetensors or .irpa file."
+        )
+    archive.save(irpa_file)
+    return irpa_file


 def convert_original_vae(vae_checkpoint):
@@ -87,6 +116,7 @@ def process_custom_pipe_weights(custom_weights):
            ), "checkpoint files supported can be any of [.ckpt, .safetensors] type"
            custom_weights_tgt = get_path_to_diffusers_checkpoint(custom_weights)
            custom_weights_params = custom_weights
+
        return custom_weights_params, custom_weights_tgt


@@ -98,7 +128,7 @@ def get_civitai_checkpoint(url: str):
        base_filename = re.findall(
            '"([^"]*)"', response.headers["Content-Disposition"]
        )[0]
-        destination_path = Path.cwd() / (cmd_opts.ckpt_dir or "models") / base_filename
+        destination_path = Path.cwd() / (cmd_opts.model_dir or "models") / base_filename

        # we don't have this model downloaded yet
        if not destination_path.is_file():
--- a/apps/shark_studio/modules/img_processing.py
+++ b/apps/shark_studio/modules/img_processing.py
@@ -33,6 +33,8 @@ def save_output_img(output_img, img_seed, extra_info=None):

    if extra_info is None:
        extra_info = {}
+    elif "progress" in extra_info.keys():
+        extra_info.pop("progress")
    generated_imgs_path = Path(
        get_generated_imgs_path(), get_generated_imgs_todays_subdir()
    )
--- a/apps/shark_studio/modules/pipeline.py
+++ b/apps/shark_studio/modules/pipeline.py
@@ -41,7 +41,7 @@ class SharkPipelineBase:
        self.device, self.device_id = clean_device_info(device)
        self.import_mlir = import_mlir
        self.iree_module_dict = {}
-        self.tmp_dir = get_resource_path(os.path.join("..", "shark_tmp"))
+        self.tmp_dir = get_resource_path(cmd_opts.tmp_dir)
        if not os.path.exists(self.tmp_dir):
            os.mkdir(self.tmp_dir)
        self.tempfiles = {}
@@ -55,9 +55,7 @@ class SharkPipelineBase:
        # and your model map is populated with any IR - unique model IDs and their static params,
        # call this method to get the artifacts associated with your map.
        self.pipe_id = self.safe_name(pipe_id)
-        self.pipe_vmfb_path = Path(
-            os.path.join(get_checkpoints_path(".."), self.pipe_id)
-        )
+        self.pipe_vmfb_path = Path(os.path.join(get_checkpoints_path(), self.pipe_id))
        self.pipe_vmfb_path.mkdir(parents=False, exist_ok=True)
        if submodel == "None":
            print("\n[LOG] Gathering any pre-compiled artifacts....")
--- a/apps/shark_studio/modules/schedulers.py
+++ b/apps/shark_studio/modules/schedulers.py
@@ -24,47 +24,47 @@ def get_schedulers(model_id):
        model_id,
        subfolder="scheduler",
    )
-    schedulers["DDPM"] = DDPMScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["KDPM2Discrete"] = KDPM2DiscreteScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["LMSDiscrete"] = LMSDiscreteScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["DDIM"] = DDIMScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["LCMScheduler"] = LCMScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["DPMSolverMultistep"] = DPMSolverMultistepScheduler.from_pretrained(
-        model_id, subfolder="scheduler", algorithm_type="dpmsolver"
-    )
-    schedulers["DPMSolverMultistep++"] = DPMSolverMultistepScheduler.from_pretrained(
-        model_id, subfolder="scheduler", algorithm_type="dpmsolver++"
-    )
-    schedulers["DPMSolverMultistepKarras"] = (
-        DPMSolverMultistepScheduler.from_pretrained(
-            model_id,
-            subfolder="scheduler",
-            use_karras_sigmas=True,
-        )
-    )
-    schedulers["DPMSolverMultistepKarras++"] = (
-        DPMSolverMultistepScheduler.from_pretrained(
-            model_id,
-            subfolder="scheduler",
-            algorithm_type="dpmsolver++",
-            use_karras_sigmas=True,
-        )
-    )
+    # schedulers["DDPM"] = DDPMScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
+    # schedulers["KDPM2Discrete"] = KDPM2DiscreteScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
+    # schedulers["LMSDiscrete"] = LMSDiscreteScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
+    # schedulers["DDIM"] = DDIMScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
+    # schedulers["LCMScheduler"] = LCMScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
+    # schedulers["DPMSolverMultistep"] = DPMSolverMultistepScheduler.from_pretrained(
+    #     model_id, subfolder="scheduler", algorithm_type="dpmsolver"
+    # )
+    # schedulers["DPMSolverMultistep++"] = DPMSolverMultistepScheduler.from_pretrained(
+    #     model_id, subfolder="scheduler", algorithm_type="dpmsolver++"
+    # )
+    # schedulers["DPMSolverMultistepKarras"] = (
+    #     DPMSolverMultistepScheduler.from_pretrained(
+    #         model_id,
+    #         subfolder="scheduler",
+    #         use_karras_sigmas=True,
+    #     )
+    # )
+    # schedulers["DPMSolverMultistepKarras++"] = (
+    #     DPMSolverMultistepScheduler.from_pretrained(
+    #         model_id,
+    #         subfolder="scheduler",
+    #         algorithm_type="dpmsolver++",
+    #         use_karras_sigmas=True,
+    #     )
+    # )
    schedulers["EulerDiscrete"] = EulerDiscreteScheduler.from_pretrained(
        model_id,
        subfolder="scheduler",
@@ -75,24 +75,24 @@ def get_schedulers(model_id):
            subfolder="scheduler",
        )
    )
-    schedulers["DEISMultistep"] = DEISMultistepScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["DPMSolverSinglestep"] = DPMSolverSinglestepScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
-    schedulers["KDPM2AncestralDiscrete"] = (
-        KDPM2AncestralDiscreteScheduler.from_pretrained(
-            model_id,
-            subfolder="scheduler",
-        )
-    )
-    schedulers["HeunDiscrete"] = HeunDiscreteScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-    )
+    # schedulers["DEISMultistep"] = DEISMultistepScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
+    # schedulers["DPMSolverSinglestep"] = DPMSolverSinglestepScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
+    # schedulers["KDPM2AncestralDiscrete"] = (
+    #     KDPM2AncestralDiscreteScheduler.from_pretrained(
+    #         model_id,
+    #         subfolder="scheduler",
+    #     )
+    # )
+    # schedulers["HeunDiscrete"] = HeunDiscreteScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
    return schedulers


@@ -101,17 +101,18 @@ def export_scheduler_model(model):


 scheduler_model_map = {
+    # "PNDM": export_scheduler_model("PNDMScheduler"),
+    # "DPMSolverSDE": export_scheduler_model("DpmSolverSDEScheduler"),
    "EulerDiscrete": export_scheduler_model("EulerDiscreteScheduler"),
    "EulerAncestralDiscrete": export_scheduler_model("EulerAncestralDiscreteScheduler"),
-    "LCM": export_scheduler_model("LCMScheduler"),
-    "LMSDiscrete": export_scheduler_model("LMSDiscreteScheduler"),
-    "PNDM": export_scheduler_model("PNDMScheduler"),
-    "DDPM": export_scheduler_model("DDPMScheduler"),
-    "DDIM": export_scheduler_model("DDIMScheduler"),
-    "DPMSolverMultistep": export_scheduler_model("DPMSolverMultistepScheduler"),
-    "KDPM2Discrete": export_scheduler_model("KDPM2DiscreteScheduler"),
-    "DEISMultistep": export_scheduler_model("DEISMultistepScheduler"),
-    "DPMSolverSinglestep": export_scheduler_model("DPMSolverSingleStepScheduler"),
-    "KDPM2AncestralDiscrete": export_scheduler_model("KDPM2AncestralDiscreteScheduler"),
-    "HeunDiscrete": export_scheduler_model("HeunDiscreteScheduler"),
+    # "LCM": export_scheduler_model("LCMScheduler"),
+    # "LMSDiscrete": export_scheduler_model("LMSDiscreteScheduler"),
+    # "DDPM": export_scheduler_model("DDPMScheduler"),
+    # "DDIM": export_scheduler_model("DDIMScheduler"),
+    # "DPMSolverMultistep": export_scheduler_model("DPMSolverMultistepScheduler"),
+    # "KDPM2Discrete": export_scheduler_model("KDPM2DiscreteScheduler"),
+    # "DEISMultistep": export_scheduler_model("DEISMultistepScheduler"),
+    # "DPMSolverSinglestep": export_scheduler_model("DPMSolverSingleStepScheduler"),
+    # "KDPM2AncestralDiscrete": export_scheduler_model("KDPM2AncestralDiscreteScheduler"),
+    # "HeunDiscrete": export_scheduler_model("HeunDiscreteScheduler"),
 }
--- a/apps/shark_studio/modules/shared_cmd_opts.py
+++ b/apps/shark_studio/modules/shared_cmd_opts.py
@@ -23,7 +23,6 @@ p = argparse.ArgumentParser(
 ##############################################################################
 # Stable Diffusion Params
 ##############################################################################
-
 p.add_argument(
    "-a",
    "--app",
@@ -35,10 +34,7 @@ p.add_argument(
    "--prompt",
    nargs="+",
    default=[
-        "a photo taken of the front of a super-car drifting on a road near "
-        "mountains at high speeds with smoke coming off the tires, front "
-        "angle, front point of view, trees in the mountains of the "
-        "background, ((sharp focus))"
+        "A hi-res photo of a red street racer drifting around a curve on a mountain, high altitude, at night, tokyo in the background, 8k"
    ],
    help="Text of which images to be generated.",
 )
@@ -62,7 +58,7 @@ p.add_argument(
 p.add_argument(
    "--steps",
    type=int,
-    default=50,
+    default=2,
    help="The number of steps to do the sampling.",
 )

@@ -100,7 +96,7 @@ p.add_argument(
 p.add_argument(
    "--guidance_scale",
    type=float,
-    default=7.5,
+    default=0,
    help="The value to be used for guidance scaling.",
 )

@@ -339,7 +335,7 @@ p.add_argument(
 p.add_argument(
    "--output_dir",
    type=str,
-    default=None,
+    default=os.path.join(os.getcwd(), "generated_imgs"),
    help="Directory path to save the output images and json.",
 )

@@ -597,6 +593,12 @@ p.add_argument(
 ##############################################################################
 # Web UI flags
 ##############################################################################
+p.add_argument(
+    "--defaults",
+    default="sdxl-turbo.json",
+    type=str,
+    help="Path to the default API request .json file. Works for CLI and webui.",
+)

 p.add_argument(
    "--webui",
@@ -613,12 +615,27 @@ p.add_argument(
 )

 p.add_argument(
-    "--ckpt_dir",
+    "--tmp_dir",
    type=str,
-    default="../models",
+    default=os.path.join(os.getcwd(), "shark_tmp"),
+    help="Path to tmp directory",
+)
+
+p.add_argument(
+    "--config_dir",
+    type=str,
+    default=os.path.join(os.getcwd(), "configs"),
+    help="Path to config directory",
+)
+
+p.add_argument(
+    "--model_dir",
+    type=str,
+    default=os.path.join(os.getcwd(), "models"),
    help="Path to directory where all .ckpts are stored in order to populate "
    "them in the web UI.",
 )
+
 # TODO: replace API flag when these can be run together
 p.add_argument(
    "--ui",
--- a/apps/shark_studio/shark_studio_apionly.spec
+++ b/apps/shark_studio/shark_studio_apionly.spec
@@ -0,0 +1,45 @@
+# -*- mode: python ; coding: utf-8 -*-
+from apps.shark_studio.studio_imports_apionly import pathex, datas, hiddenimports
+
+binaries = []
+
+block_cipher = None
+
+a = Analysis(
+    ['web/index.py'],
+    pathex=pathex,
+    binaries=binaries,
+    datas=datas,
+    hiddenimports=hiddenimports,
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[],
+    win_no_prefer_redirects=False,
+    win_private_assemblies=False,
+    cipher=block_cipher,
+    noarchive=False,
+)
+pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
+
+exe = EXE(
+    pyz,
+    a.scripts,
+    a.binaries,
+    a.zipfiles,
+    a.datas,
+    [],
+    name='shark_sd3_server',
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=False,
+    upx_exclude=[],
+    runtime_tmpdir=None,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+)
--- a/apps/shark_studio/studio_imports.py
+++ b/apps/shark_studio/studio_imports.py
@@ -22,30 +22,25 @@ datas += copy_metadata("packaging")
 datas += copy_metadata("filelock")
 datas += copy_metadata("numpy")
 datas += copy_metadata("importlib_metadata")
-datas += copy_metadata("omegaconf")
 datas += copy_metadata("safetensors")
 datas += copy_metadata("Pillow")
 datas += copy_metadata("sentencepiece")
 datas += copy_metadata("pyyaml")
 datas += copy_metadata("huggingface-hub")
 datas += copy_metadata("gradio")
-datas += copy_metadata("scipy")
 datas += collect_data_files("torch")
 datas += collect_data_files("tokenizers")
-datas += collect_data_files("accelerate")
 datas += collect_data_files("diffusers")
 datas += collect_data_files("transformers")
 datas += collect_data_files("gradio")
 datas += collect_data_files("gradio_client")
 datas += collect_data_files("iree", include_py_files=True)
-datas += collect_data_files("shark", include_py_files=True)
+datas += collect_data_files("shark-turbine", include_py_files=True)
 datas += collect_data_files("tqdm")
-datas += collect_data_files("tkinter")
 datas += collect_data_files("sentencepiece")
 datas += collect_data_files("jsonschema")
 datas += collect_data_files("jsonschema_specifications")
 datas += collect_data_files("cpuinfo")
-datas += collect_data_files("scipy", include_py_files=True)
 datas += [
    ("web/ui/css/*", "ui/css"),
    ("web/ui/js/*", "ui/js"),
@@ -54,7 +49,7 @@ datas += [


 # hidden imports for pyinstaller
-hiddenimports = ["shark", "apps"]
+hiddenimports = ["apps", "shark-turbine"]
 hiddenimports += [x for x in collect_submodules("gradio") if "tests" not in x]
 hiddenimports += [x for x in collect_submodules("diffusers") if "tests" not in x]
 blacklist = ["tests", "convert"]
@@ -65,4 +60,3 @@ hiddenimports += [
 ]
 hiddenimports += [x for x in collect_submodules("iree") if "test" not in x]
 hiddenimports += ["iree._runtime"]
-hiddenimports += [x for x in collect_submodules("scipy") if "test" not in x]
--- a/apps/shark_studio/studio_imports_apionly.py
+++ b/apps/shark_studio/studio_imports_apionly.py
@@ -0,0 +1,46 @@
+from PyInstaller.utils.hooks import collect_data_files
+from PyInstaller.utils.hooks import copy_metadata
+from PyInstaller.utils.hooks import collect_submodules
+
+import sys
+
+sys.setrecursionlimit(sys.getrecursionlimit() * 5)
+
+# python path for pyinstaller
+pathex = [
+    ".",
+]
+
+# datafiles for pyinstaller
+datas = []
+datas += copy_metadata("torch")
+datas += copy_metadata("tokenizers")
+datas += copy_metadata("tqdm")
+datas += copy_metadata("regex")
+datas += copy_metadata("requests")
+datas += copy_metadata("packaging")
+datas += copy_metadata("filelock")
+datas += copy_metadata("numpy")
+datas += copy_metadata("importlib_metadata")
+datas += copy_metadata("safetensors")
+datas += copy_metadata("Pillow")
+datas += copy_metadata("sentencepiece")
+datas += copy_metadata("pyyaml")
+datas += copy_metadata("huggingface-hub")
+datas += copy_metadata("gradio")
+datas += collect_data_files("torch")
+datas += collect_data_files("tokenizers")
+datas += collect_data_files("diffusers")
+datas += collect_data_files("transformers")
+datas += collect_data_files("iree", include_py_files=True)
+datas += collect_data_files("tqdm")
+datas += collect_data_files("jsonschema")
+datas += collect_data_files("jsonschema_specifications")
+datas += collect_data_files("cpuinfo")
+
+
+# hidden imports for pyinstaller
+hiddenimports = ["apps", "shark-turbine"]
+hiddenimports += [x for x in collect_submodules("diffusers") if "tests" not in x]
+hiddenimports += [x for x in collect_submodules("iree") if "test" not in x]
+hiddenimports += ["iree._runtime"]
--- a/apps/shark_studio/tests/api_test.py
+++ b/apps/shark_studio/tests/api_test.py
@@ -36,6 +36,7 @@ class LLMAPITest(unittest.TestCase):
            device="cpu",
            precision="fp32",
            quantization="None",
+            streaming_llm=True,
        )
        count = 0
        label = "Turkishoure Turkish"
--- a/apps/shark_studio/tools/params_prefixer.py
+++ b/apps/shark_studio/tools/params_prefixer.py
@@ -0,0 +1,20 @@
+from apps.shark_studio.modules.ckpt_processing import save_irpa
+import argparse
+import safetensors
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--input",
+    type=str,
+    default="",
+    help="input safetensors/irpa",
+)
+parser.add_argument(
+    "--prefix",
+    type=str,
+    default="",
+    help="prefix to add to all the keys in the irpa",
+)
+args = parser.parse_args()
+output_file = save_irpa(args.input, args.prefix)
+print("saved irpa to", output_file, "with prefix", args.prefix)
--- a/apps/shark_studio/web/api/compat.py
+++ b/apps/shark_studio/web/api/compat.py
@@ -20,9 +20,6 @@ from fastapi.encoders import jsonable_encoder

 from apps.shark_studio.modules.shared_cmd_opts import cmd_opts

-# from sdapi_v1 import shark_sd_api
-from apps.shark_studio.api.llm import llm_chat_api
-

 def decode_base64_to_image(encoding):
    if encoding.startswith("http://") or encoding.startswith("https://"):
@@ -183,50 +180,8 @@ class ApiCompat:
        self.app = app
        self.queue_lock = queue_lock
        api_middleware(self.app)
-        # self.add_api_route("/sdapi/v1/txt2img", shark_sd_api, methods=["POST"])
-        # self.add_api_route("/sdapi/v1/img2img", shark_sd_api, methods=["POST"])
-        # self.add_api_route("/sdapi/v1/upscaler", self.upscaler_api, methods=["POST"])
-        # self.add_api_route("/sdapi/v1/extra-single-image", self.extras_single_image_api, methods=["POST"], response_model=models.ExtrasSingleImageResponse)
-        # self.add_api_route("/sdapi/v1/extra-batch-images", self.extras_batch_images_api, methods=["POST"], response_model=models.ExtrasBatchImagesResponse)
-        # self.add_api_route("/sdapi/v1/png-info", self.pnginfoapi, methods=["POST"], response_model=models.PNGInfoResponse)
-        # self.add_api_route("/sdapi/v1/progress", self.progressapi, methods=["GET"], response_model=models.ProgressResponse)
-        # self.add_api_route("/sdapi/v1/interrogate", self.interrogateapi, methods=["POST"])
-        # self.add_api_route("/sdapi/v1/interrupt", self.interruptapi, methods=["POST"])
-        # self.add_api_route("/sdapi/v1/skip", self.skip, methods=["POST"])
-        # self.add_api_route("/sdapi/v1/options", self.get_config, methods=["GET"], response_model=models.OptionsModel)
-        # self.add_api_route("/sdapi/v1/options", self.set_config, methods=["POST"])
-        # self.add_api_route("/sdapi/v1/cmd-flags", self.get_cmd_flags, methods=["GET"], response_model=models.FlagsModel)
-        # self.add_api_route("/sdapi/v1/samplers", self.get_samplers, methods=["GET"], response_model=List[models.SamplerItem])
-        # self.add_api_route("/sdapi/v1/upscalers", self.get_upscalers, methods=["GET"], response_model=List[models.UpscalerItem])
-        # self.add_api_route("/sdapi/v1/latent-upscale-modes", self.get_latent_upscale_modes, methods=["GET"], response_model=List[models.LatentUpscalerModeItem])
-        # self.add_api_route("/sdapi/v1/sd-models", self.get_sd_models, methods=["GET"], response_model=List[models.SDModelItem])
-        # self.add_api_route("/sdapi/v1/sd-vae", self.get_sd_vaes, methods=["GET"], response_model=List[models.SDVaeItem])
-        # self.add_api_route("/sdapi/v1/hypernetworks", self.get_hypernetworks, methods=["GET"], response_model=List[models.HypernetworkItem])
-        # self.add_api_route("/sdapi/v1/face-restorers", self.get_face_restorers, methods=["GET"], response_model=List[models.FaceRestorerItem])
-        # self.add_api_route("/sdapi/v1/realesrgan-models", self.get_realesrgan_models, methods=["GET"], response_model=List[models.RealesrganItem])
-        # self.add_api_route("/sdapi/v1/prompt-styles", self.get_prompt_styles, methods=["GET"], response_model=List[models.PromptStyleItem])
-        # self.add_api_route("/sdapi/v1/embeddings", self.get_embeddings, methods=["GET"], response_model=models.EmbeddingsResponse)
-        # self.add_api_route("/sdapi/v1/refresh-checkpoints", self.refresh_checkpoints, methods=["POST"])
-        # self.add_api_route("/sdapi/v1/refresh-vae", self.refresh_vae, methods=["POST"])
-        # self.add_api_route("/sdapi/v1/create/embedding", self.create_embedding, methods=["POST"], response_model=models.CreateResponse)
-        # self.add_api_route("/sdapi/v1/create/hypernetwork", self.create_hypernetwork, methods=["POST"], response_model=models.CreateResponse)
-        # self.add_api_route("/sdapi/v1/preprocess", self.preprocess, methods=["POST"], response_model=models.PreprocessResponse)
-        # self.add_api_route("/sdapi/v1/train/embedding", self.train_embedding, methods=["POST"], response_model=models.TrainResponse)
-        # self.add_api_route("/sdapi/v1/train/hypernetwork", self.train_hypernetwork, methods=["POST"], response_model=models.TrainResponse)
-        # self.add_api_route("/sdapi/v1/memory", self.get_memory, methods=["GET"], response_model=models.MemoryResponse)
-        # self.add_api_route("/sdapi/v1/unload-checkpoint", self.unloadapi, methods=["POST"])
-        # self.add_api_route("/sdapi/v1/reload-checkpoint", self.reloadapi, methods=["POST"])
-        # self.add_api_route("/sdapi/v1/scripts", self.get_scripts_list, methods=["GET"], response_model=models.ScriptsList)
-        # self.add_api_route("/sdapi/v1/script-info", self.get_script_info, methods=["GET"], response_model=List[models.ScriptInfo])

-        # chat APIs needed for compatibility with multiple extensions using OpenAI API
-        self.add_api_route("/v1/chat/completions", llm_chat_api, methods=["POST"])
-        self.add_api_route("/v1/completions", llm_chat_api, methods=["POST"])
-        self.add_api_route("/chat/completions", llm_chat_api, methods=["POST"])
-        self.add_api_route("/completions", llm_chat_api, methods=["POST"])
-        self.add_api_route(
-            "/v1/engines/codegen/completions", llm_chat_api, methods=["POST"]
-        )
+        # self.add_api_route("/sdapi/v1/txt2img", shark_sd_api, methods=["POST"])

        self.default_script_arg_txt2img = []
        self.default_script_arg_img2img = []
@@ -234,27 +189,6 @@ class ApiCompat:
    def add_api_route(self, path: str, endpoint, **kwargs):
        return self.app.add_api_route(path, endpoint, **kwargs)

-    # def refresh_checkpoints(self):
-    #     with self.queue_lock:
-    #         studio_data.refresh_checkpoints()
-
-    # def refresh_vae(self):
-    #     with self.queue_lock:
-    #         studio_data.refresh_vae_list()
-
-    # def unloadapi(self):
-    #     unload_model_weights()
-
-    #     return {}
-
-    # def reloadapi(self):
-    #     reload_model_weights()
-
-    #     return {}
-
-    # def skip(self):
-    #     studio.state.skip()
-
    def launch(self, server_name, port, root_path):
        self.app.include_router(self.router)
        uvicorn.run(
--- a/apps/shark_studio/web/api/sd.py
+++ b/apps/shark_studio/web/api/sd.py
@@ -1 +1,115 @@
+import base64

+from fastapi import FastAPI
+
+from io import BytesIO
+from PIL import Image
+from pydantic import BaseModel, Field
+from fastapi.exceptions import HTTPException
+
+from apps.shark_studio.api.sd import shark_sd_fn
+
+sdapi = FastAPI()
+
+
+class GenerationInputData(BaseModel):
+    prompt: list = [""]
+    negative_prompt: list = [""]
+    hf_model_id: str | None = None
+    height: int = Field(default=512, ge=128, le=1024, multiple_of=8)
+    width: int = Field(default=512, ge=128, le=1024, multiple_of=8)
+    sampler_name: str = "EulerDiscrete"
+    cfg_scale: float = Field(default=7.5, ge=1)
+    steps: int = Field(default=20, ge=1, le=100)
+    seed: int = Field(default=-1)
+    n_iter: int = Field(default=1)
+    config: dict = None
+
+
+class GenerationResponseData(BaseModel):
+    images: list[str] = Field(description="Generated images, Base64 encoded")
+    properties: dict = {}
+    info: str
+
+
+def encode_pil_to_base64(images: list[Image.Image]):
+    encoded_imgs = []
+    for image in images:
+        with BytesIO() as output_bytes:
+            image.save(output_bytes, format="PNG")
+            bytes_data = output_bytes.getvalue()
+            encoded_imgs.append(base64.b64encode(bytes_data))
+    return encoded_imgs
+
+
+def decode_base64_to_image(encoding: str):
+    if encoding.startswith("data:image/"):
+        encoding = encoding.split(";", 1)[1].split(",", 1)[1]
+    try:
+        image = Image.open(BytesIO(base64.b64decode(encoding)))
+        return image
+    except Exception as err:
+        print(err)
+        raise HTTPException(status_code=400, detail="Invalid encoded image")
+
+
+@sdapi.post(
+    "/v1/txt2img",
+    summary="Does text to image generation",
+    response_model=GenerationResponseData,
+)
+def txt2img_api(InputData: GenerationInputData):
+    model_id = (
+        InputData.hf_model_id or "stabilityai/stable-diffusion-3-medium-diffusers"
+    )
+    scheduler = "FlowEulerDiscrete"
+    print(
+        f"Prompt: {InputData.prompt}, "
+        f"Negative Prompt: {InputData.negative_prompt}, "
+        f"Seed: {InputData.seed},"
+        f"Model: {model_id}, "
+        f"Scheduler: {scheduler}. "
+    )
+    if not getattr(InputData, "config"):
+        InputData.config = {
+            "precision": "fp16",
+            "device": "rocm",
+            "target_triple": "gfx1150",
+        }
+
+    res = shark_sd_fn(
+        InputData.prompt,
+        InputData.negative_prompt,
+        None,
+        InputData.height,
+        InputData.width,
+        InputData.steps,
+        None,
+        InputData.cfg_scale,
+        InputData.seed,
+        custom_vae=None,
+        batch_count=InputData.n_iter,
+        batch_size=1,
+        scheduler=scheduler,
+        base_model_id=model_id,
+        custom_weights=None,
+        precision=InputData.config["precision"],
+        device=InputData.config["device"],
+        target_triple=InputData.config["target_triple"],
+        output_type="pil",
+        ondemand=False,
+        compiled_pipeline=False,
+        resample_type=None,
+        controlnets=[],
+        embeddings=[],
+    )
+
+    # Since we're not streaming we just want the last generator result
+    for items_so_far in res:
+        items = items_so_far
+
+    return {
+        "images": encode_pil_to_base64(items[0]),
+        "parameters": {},
+        "info": items[1],
+    }
--- a/apps/shark_studio/web/configs/default_sd_config.json
+++ b/apps/shark_studio/web/configs/default_sd_config.json
@@ -1,28 +0,0 @@
-{
-  "prompt": [
-    "a photo taken of the front of a super-car drifting on a road near mountains at high speeds with smoke coming off the tires, front angle, front point of view, trees in the mountains of the background, ((sharp focus))"
-  ],
-  "negative_prompt": [
-    "watermark, signature, logo, text, lowres, ((monochrome, grayscale)), blurry, ugly, blur, oversaturated, cropped"
-  ],
-  "sd_init_image": [null],
-  "height": 512,
-  "width": 512,
-  "steps": 50,
-  "strength": 0.8,
-  "guidance_scale": 7.5,
-  "seed": "-1",
-  "batch_count": 1,
-  "batch_size": 1,
-  "scheduler": "EulerDiscrete",
-  "base_model_id": "stabilityai/stable-diffusion-2-1-base",
-  "custom_weights": null,
-  "custom_vae": null,
-  "precision": "fp16",
-  "device": "AMD Radeon RX 7900 XTX => vulkan://0",
-  "ondemand": false,
-  "repeatable_seeds": false,
-  "resample_type": "Nearest Neighbor",
-  "controlnets": {},
-  "embeddings": {}
-}
--- a/apps/shark_studio/web/index.py
+++ b/apps/shark_studio/web/index.py
@@ -32,13 +32,15 @@ def create_api(app):


 def api_only():
-    from fastapi import FastAPI
    from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+    from apps.shark_studio.web.api.sd import sdapi
+    from fastapi import FastAPI

    initialize.initialize()

    app = FastAPI()
    initialize.setup_middleware(app)
+    app.mount("/sdapi/", sdapi)
    api = create_api(app)

    # from modules import script_callbacks
@@ -56,6 +58,7 @@ def api_only():
 def launch_webui(address):
    from tkinter import Tk
    import webview
+    import gradio as gr

    window = Tk()

@@ -76,14 +79,14 @@ def launch_webui(address):
 def webui():
    from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
    from apps.shark_studio.web.ui.utils import (
-        nodicon_loc,
-        nodlogo_loc,
+        amdicon_loc,
+        amdlogo_loc,
    )

    launch_api = cmd_opts.api
    initialize.initialize()

-    from ui.chat import chat_element
+    # from ui.chat import chat_element
    from ui.sd import sd_element
    from ui.outputgallery import outputgallery_element

@@ -170,11 +173,11 @@ def webui():
        css=dark_theme,
        js=gradio_workarounds,
        analytics_enabled=False,
-        title="Shark Studio 2.0 Beta",
+        title="Shark Studio 2.0",
    ) as studio_web:
-        nod_logo = Image.open(nodlogo_loc)
+        amd_logo = Image.open(amdlogo_loc)
        gr.Image(
-            value=nod_logo,
+            value=amd_logo,
            show_label=False,
            interactive=False,
            elem_id="tab_bar_logo",
@@ -194,8 +197,8 @@ def webui():
                sd_element.render()
            with gr.TabItem(label="Output Gallery", id=1):
                outputgallery_element.render()
-            with gr.TabItem(label="Chat Bot", id=2):
-                chat_element.render()
+            # with gr.TabItem(label="Chat Bot", id=2):
+            #     chat_element.render()

    studio_web.queue()

@@ -209,14 +212,15 @@ def webui():
        inbrowser=True,
        server_name="0.0.0.0",
        server_port=cmd_opts.server_port,
-        favicon_path=nodicon_loc,
+        favicon_path=amdicon_loc,
    )


 if __name__ == "__main__":
    from apps.shark_studio.modules.shared_cmd_opts import cmd_opts

-    if cmd_opts.webui == False:
-        api_only()
-    else:
-        webui()
+    api_only()
+    # if cmd_opts.webui == False:
+    #     api_only()
+    # else:
+    #     webui()
--- a/apps/shark_studio/web/ui/chat.py
+++ b/apps/shark_studio/web/ui/chat.py
@@ -9,6 +9,7 @@ from apps.shark_studio.api.llm import (
    llm_model_map,
    LanguageModel,
 )
+from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
 import apps.shark_studio.web.utils.globals as global_obj

 B_SYS, E_SYS = "<s>", "</s>"
@@ -64,6 +65,7 @@ def chat_fn(
            external_weights="safetensors",
            use_system_prompt=prompt_prefix,
            streaming_llm=streaming_llm,
+            hf_auth_token=cmd_opts.hf_auth_token,
        )
        history[-1][-1] = "Getting the model ready... Done"
        yield history, ""
@@ -135,7 +137,8 @@ with gr.Blocks(title="Chat") as chat_element:
            streaming_llm = gr.Checkbox(
                label="Run in streaming mode (requires recompilation)",
                value=True,
-                interactive=True,
+                interactive=False,
+                visible=False,
            )
            prompt_prefix = gr.Checkbox(
                label="Add System Prompt",
--- a/apps/shark_studio/web/ui/css/sd_dark_theme.css
+++ b/apps/shark_studio/web/ui/css/sd_dark_theme.css
@@ -367,7 +367,7 @@ footer {
 #tab_bar_logo .image-container {
    object-fit: scale-down;
    position: absolute !important;
-    top: 14px;
+    top: 10px;
    right: 0px;
    height: 36px;
-}
+}
--- a/apps/shark_studio/web/ui/logos/amd-icon.jpg
+++ b/apps/shark_studio/web/ui/logos/amd-icon.jpg
--- a/apps/shark_studio/web/ui/logos/amd-logo.jpg
+++ b/apps/shark_studio/web/ui/logos/amd-logo.jpg
--- a/apps/shark_studio/web/ui/logos/nod-icon.png
+++ b/apps/shark_studio/web/ui/logos/nod-icon.png
--- a/apps/shark_studio/web/ui/logos/nod-logo.png
+++ b/apps/shark_studio/web/ui/logos/nod-logo.png
--- a/apps/shark_studio/web/ui/outputgallery.py
+++ b/apps/shark_studio/web/ui/outputgallery.py
@@ -10,7 +10,7 @@ from apps.shark_studio.web.utils.file_utils import (
    get_generated_imgs_path,
    get_generated_imgs_todays_subdir,
 )
-from apps.shark_studio.web.ui.utils import nodlogo_loc
+from apps.shark_studio.web.ui.utils import amdlogo_loc
 from apps.shark_studio.web.utils.metadata import displayable_metadata

 # -- Functions for file, directory and image info querying
@@ -60,7 +60,7 @@ def output_subdirs() -> list[str]:
 # --- Define UI layout for Gradio

 with gr.Blocks() as outputgallery_element:
-    nod_logo = Image.open(nodlogo_loc)
+    amd_logo = Image.open(amdlogo_loc)

    with gr.Row(elem_id="outputgallery_gallery"):
        # needed to workaround gradio issue:
@@ -73,7 +73,7 @@ with gr.Blocks() as outputgallery_element:
        with gr.Column(scale=6):
            logo = gr.Image(
                label="Getting subdirectories...",
-                value=nod_logo,
+                value=amd_logo,
                interactive=False,
                visible=True,
                show_label=True,
--- a/apps/shark_studio/web/ui/sd.py
+++ b/apps/shark_studio/web/ui/sd.py
@@ -14,12 +14,13 @@ from apps.shark_studio.web.utils.file_utils import (
    get_checkpoints_path,
    get_checkpoints,
    get_configs_path,
-    write_default_sd_config,
+    get_configs,
+    write_default_sd_configs,
 )
 from apps.shark_studio.api.sd import (
-    sd_model_map,
    shark_sd_fn_dict_input,
    cancel_sd,
+    unload_sd,
 )
 from apps.shark_studio.api.controlnet import (
    cnet_preview,
@@ -33,7 +34,7 @@ from apps.shark_studio.modules.img_processing import (
 )
 from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
 from apps.shark_studio.web.ui.utils import (
-    nodlogo_loc,
+    amdlogo_loc,
    none_to_str_none,
    str_none_to_none,
 )
@@ -44,14 +45,15 @@ from apps.shark_studio.web.ui.common_events import lora_changed
 from apps.shark_studio.modules import logger
 import apps.shark_studio.web.utils.globals as global_obj

+# Disabled some models for demo purposes
 sd_default_models = [
-    "CompVis/stable-diffusion-v1-4",
-    "runwayml/stable-diffusion-v1-5",
-    "stabilityai/stable-diffusion-2-1-base",
-    "stabilityai/stable-diffusion-2-1",
-    "stabilityai/stable-diffusion-xl-1.0",
-    "stabilityai/sdxl-turbo",
+    # "runwayml/stable-diffusion-v1-5",
+    # "stabilityai/stable-diffusion-2-1-base",
+    # "stabilityai/stable-diffusion-2-1",
+    # "stabilityai/stable-diffusion-xl-base-1.0",
+    # "stabilityai/sdxl-turbo",
 ]
+sd_default_models.extend(get_checkpoints(model_type="scripts"))


 def view_json_file(file_path):
@@ -119,8 +121,9 @@ def pull_sd_configs(
    custom_vae,
    precision,
    device,
+    target_triple,
    ondemand,
-    repeatable_seeds,
+    compiled_pipeline,
    resample_type,
    controlnets,
    embeddings,
@@ -146,7 +149,16 @@ def pull_sd_configs(


 def load_sd_cfg(sd_json: dict, load_sd_config: str):
-    new_sd_config = none_to_str_none(json.loads(view_json_file(load_sd_config)))
+    if os.path.exists(load_sd_config):
+        config = load_sd_config
+    elif os.path.exists(os.path.join(get_configs_path(), load_sd_config)):
+        config = os.path.join(get_configs_path(), load_sd_config)
+    else:
+        print(
+            "Default config not found as absolute path or in configs folder. Using sdxl-turbo as default config."
+        )
+        config = sd_json
+    new_sd_config = none_to_str_none(json.loads(view_json_file(config)))
    if sd_json:
        for key in new_sd_config:
            sd_json[key] = new_sd_config[key]
@@ -158,6 +170,8 @@ def load_sd_cfg(sd_json: dict, load_sd_config: str):
                sd_image = [Image.open(i, mode="r")]
    else:
        sd_image = None
+    if not sd_json["device"]:
+        sd_json["device"] = gr.update()

    return [
        sd_json["prompt"][0],
@@ -165,7 +179,7 @@ def load_sd_cfg(sd_json: dict, load_sd_config: str):
        sd_image,
        sd_json["height"],
        sd_json["width"],
-        sd_json["steps"],
+        gr.update(),
        sd_json["strength"],
        sd_json["guidance_scale"],
        sd_json["seed"],
@@ -177,8 +191,9 @@ def load_sd_cfg(sd_json: dict, load_sd_config: str):
        sd_json["custom_vae"],
        sd_json["precision"],
        sd_json["device"],
+        sd_json["target_triple"],
        sd_json["ondemand"],
-        sd_json["repeatable_seeds"],
+        sd_json["compiled_pipeline"],
        sd_json["resample_type"],
        sd_json["controlnets"],
        sd_json["embeddings"],
@@ -197,7 +212,7 @@ def save_sd_cfg(config: dict, save_name: str):
        filepath += ".json"
    with open(filepath, mode="w") as f:
        f.write(json.dumps(config))
-    return "..."
+    return save_name


 def create_canvas(width, height):
@@ -234,83 +249,253 @@ def base_model_changed(base_model_id):
    new_choices = get_checkpoints(
        os.path.join("checkpoints", os.path.basename(str(base_model_id)))
    ) + get_checkpoints(model_type="checkpoints")
+    if "turbo" in base_model_id:
+        new_steps = gr.Dropdown(
+            value=2,
+            choices=[1, 2],
+            label="\U0001F3C3\U0000FE0F Steps",
+            allow_custom_value=True,
+        )
+    if "stable-diffusion-xl-base-1.0" in base_model_id:
+        new_steps = gr.Dropdown(
+            value=40,
+            choices=[20, 25, 30, 35, 40, 45, 50],
+            label="\U0001F3C3\U0000FE0F Steps",
+            allow_custom_value=True,
+        )
+    elif ".py" in base_model_id:
+        new_steps = gr.Dropdown(
+            value=20,
+            choices=[10, 15, 20],
+            label="\U0001F3C3\U0000FE0F Steps",
+            allow_custom_value=True,
+        )
+    else:
+        new_steps = gr.Dropdown(
+            value=20,
+            choices=[10, 20, 30, 40, 50],
+            label="\U0001F3C3\U0000FE0F Steps",
+            allow_custom_value=True,
+        )

-    return gr.Dropdown(
-        value=new_choices[0] if len(new_choices) > 0 else "None",
-        choices=["None"] + new_choices,
-    )
+    return [
+        gr.Dropdown(
+            value=new_choices[0] if len(new_choices) > 0 else "None",
+            choices=["None"] + new_choices,
+        ),
+        new_steps,
+    ]


+init_config = global_obj.get_init_config()
+init_config = none_to_str_none(json.loads(view_json_file(init_config)))
+
 with gr.Blocks(title="Stable Diffusion") as sd_element:
    with gr.Column(elem_id="ui_body"):
        with gr.Row():
            with gr.Column(scale=2, min_width=600):
+                with gr.Group(elem_id="prompt_box_outer"):
+                    prompt = gr.Textbox(
+                        label="\U00002795\U0000FE0F Prompt",
+                        value=init_config["prompt"][0],
+                        lines=4,
+                        elem_id="prompt_box",
+                        show_copy_button=True,
+                    )
+                    negative_prompt = gr.Textbox(
+                        label="\U00002796\U0000FE0F Negative Prompt",
+                        value=init_config["negative_prompt"][0],
+                        lines=4,
+                        elem_id="negative_prompt_box",
+                        show_copy_button=True,
+                    )
                with gr.Accordion(
-                    label="\U0001F4D0\U0000FE0F Device Settings", open=False
+                    label="\U0001F4D0\U0000FE0F Advanced Settings", open=False
                ):
-                    device = gr.Dropdown(
-                        elem_id="device",
-                        label="Device",
-                        value=global_obj.get_device_list()[0],
-                        choices=global_obj.get_device_list(),
-                        allow_custom_value=False,
-                    )
+                    with gr.Accordion(label="Device Settings", open=False):
+                        device = gr.Dropdown(
+                            elem_id="device",
+                            label="Device",
+                            value=(
+                                init_config["device"]
+                                if init_config["device"]
+                                else "rocm"
+                            ),
+                            choices=global_obj.get_device_list(),
+                            allow_custom_value=True,
+                        )
+                        target_triple = gr.Textbox(
+                            elem_id="target_triple",
+                            label="Architecture",
+                            value=init_config["target_triple"],
+                        )
+                        with gr.Row():
+                            ondemand = gr.Checkbox(
+                                value=init_config["ondemand"],
+                                label="Low VRAM",
+                                interactive=True,
+                                visible=False,
+                            )
+                            precision = gr.Radio(
+                                label="Precision",
+                                value=init_config["precision"],
+                                choices=[
+                                    "fp16",
+                                    "fp32",
+                                ],
+                                visible=False,
+                            )
                    with gr.Row():
-                        ondemand = gr.Checkbox(
-                            value=cmd_opts.lowvram,
-                            label="Low VRAM",
+                        height = gr.Slider(
+                            512,
+                            1024,
+                            value=512,
+                            step=512,
+                            label="\U00002195\U0000FE0F Height",
+                            interactive=False,  # DEMO
+                            visible=False,  # DEMO
+                        )
+                        width = gr.Slider(
+                            512,
+                            1024,
+                            value=512,
+                            step=512,
+                            label="\U00002194\U0000FE0F Width",
+                            interactive=False,  # DEMO
+                            visible=False,  # DEMO
+                        )
+
+                    with gr.Accordion(
+                        label="\U0001F9EA\U0000FE0F Input Image Processing",
+                        open=False,
+                        visible=False,
+                    ):
+                        strength = gr.Slider(
+                            0,
+                            1,
+                            value=init_config["strength"],
+                            step=0.01,
+                            label="Denoising Strength",
+                        )
+                        resample_type = gr.Dropdown(
+                            value=init_config["resample_type"],
+                            choices=resampler_list,
+                            label="Resample Type",
+                            allow_custom_value=True,
+                        )
+                    with gr.Row():
+                        sd_model_info = (
+                            f"Checkpoint Path: {str(get_checkpoints_path())}"
+                        )
+                        base_model_id = gr.Dropdown(
+                            label="\U000026F0\U0000FE0F Base Model",
+                            info="Select or enter HF model ID",
+                            elem_id="custom_model",
+                            value=init_config["base_model_id"],
+                            choices=sd_default_models,
+                            allow_custom_value=True,
+                        )  # base_model_id
+                    with gr.Row(equal_height=True):
+                        seed = gr.Textbox(
+                            value=init_config["seed"],
+                            label="\U0001F331\U0000FE0F Seed",
+                            info="An integer, -1 for random",
+                            show_copy_button=True,
+                        )
+                        scheduler = gr.Dropdown(
+                            elem_id="scheduler",
+                            label="\U0001F4C5\U0000FE0F Scheduler",
+                            info="\U000E0020",  # forces same height as seed
+                            value=init_config["scheduler"],
+                            choices=scheduler_model_map.keys(),
+                            allow_custom_value=False,
+                            visible=False,
+                        )
+                    with gr.Row():
+                        steps = gr.Dropdown(
+                            value=20,
+                            choices=[10, 15, 20],
+                            label="\U0001F3C3\U0000FE0F Steps",
+                            allow_custom_value=True,
+                        )
+                        guidance_scale = gr.Slider(
+                            0,
+                            5,  # DEMO
+                            value=4,
+                            step=0.1,
+                            label="\U0001F5C3\U0000FE0F CFG Scale",
+                            visible=False,
+                        )
+                    with gr.Row():
+                        batch_count = gr.Slider(
+                            1,
+                            100,
+                            value=init_config["batch_count"],
+                            step=1,
+                            label="Batch Count",
                            interactive=True,
+                            visible=False,
                        )
-                        precision = gr.Radio(
-                            label="Precision",
-                            value=cmd_opts.precision,
-                            choices=[
-                                "fp16",
-                                "fp32",
-                            ],
-                            visible=True,
+                        batch_size = gr.Slider(
+                            1,
+                            4,
+                            value=init_config["batch_size"],
+                            step=1,
+                            label="Batch Size",
+                            interactive=False,  # DEMO
+                            visible=False,
                        )
-                sd_model_info = f"Checkpoint Path: {str(get_checkpoints_path())}"
-                base_model_id = gr.Dropdown(
-                    label="\U000026F0\U0000FE0F Base Model",
-                    info="Select or enter HF model ID",
-                    elem_id="custom_model",
-                    value="stabilityai/stable-diffusion-2-1-base",
-                    choices=sd_default_models,
-                )  # base_model_id
-                with gr.Row():
-                    height = gr.Slider(
-                        384,
-                        768,
-                        value=cmd_opts.height,
-                        step=8,
-                        label="\U00002195\U0000FE0F Height",
-                    )
-                    width = gr.Slider(
-                        384,
-                        768,
-                        value=cmd_opts.width,
-                        step=8,
-                        label="\U00002194\U0000FE0F Width",
-                    )
+                        compiled_pipeline = gr.Checkbox(
+                            value=init_config["compiled_pipeline"],
+                            label="Faster txt2img (SDXL only)",
+                            visible=False,  # DEMO
+                        )
+                    with gr.Row(elem_classes=["fill"], visible=False):
+                        Path(get_configs_path()).mkdir(parents=True, exist_ok=True)
+                        write_default_sd_configs(get_configs_path())
+                        default_config_file = global_obj.get_init_config()
+                        sd_json = gr.JSON(
+                            elem_classes=["fill"],
+                            value=view_json_file(default_config_file),
+                        )
+                    with gr.Row(visible=False):
+                        with gr.Row():
+                            load_sd_config = gr.Dropdown(
+                                label="Load Config",
+                                value=cmd_opts.defaults,
+                                choices=get_configs(),
+                                allow_custom_value=True,
+                                visible=False,
+                            )
+                        with gr.Row():
+                            save_sd_config = gr.Button(value="Save Config", size="sm")
+                            clear_sd_config = gr.ClearButton(
+                                value="Clear Config",
+                                size="sm",
+                                components=sd_json,
+                            )
+                            # with gr.Row():
+                            sd_config_name = gr.Textbox(
+                                value="Config Name",
+                                info="Name of the file this config will be saved to.",
+                                interactive=True,
+                                show_label=False,
+                            )
                with gr.Accordion(
-                    label="\U00002696\U0000FE0F Model Weights", open=False
+                    label="\U00002696\U0000FE0F Model Weights",
+                    open=False,
+                    visible=False,  # DEMO
                ):
                    with gr.Column():
                        custom_weights = gr.Dropdown(
                            label="Checkpoint Weights",
                            info="Select or enter HF model ID",
                            elem_id="custom_model",
-                            value="None",
+                            value=init_config["custom_weights"],
                            allow_custom_value=True,
                            choices=["None"]
                            + get_checkpoints(os.path.basename(str(base_model_id))),
                        )  # custom_weights
-                        base_model_id.change(
-                            fn=base_model_changed,
-                            inputs=[base_model_id],
-                            outputs=[custom_weights],
-                        )
                        sd_vae_info = (str(get_checkpoints_path("vae"))).replace(
                            "\\", "\n\\"
                        )
@@ -319,11 +504,7 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
                            label=f"VAE Model",
                            info=sd_vae_info,
                            elem_id="custom_model",
-                            value=(
-                                os.path.basename(cmd_opts.custom_vae)
-                                if cmd_opts.custom_vae
-                                else "None"
-                            ),
+                            value=init_config["custom_vae"],
                            choices=["None"] + get_checkpoints("vae"),
                            allow_custom_value=True,
                            scale=1,
@@ -336,7 +517,11 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
                            label=f"Standalone LoRA Weights",
                            info=sd_lora_info,
                            elem_id="lora_weights",
-                            value=None,
+                            value=(
+                                init_config["embeddings"][0]
+                                if (len(init_config["embeddings"].keys()) > 1)
+                                else "None"
+                            ),
                            multiselect=True,
                            choices=[] + get_checkpoints("lora"),
                            scale=2,
@@ -361,67 +546,6 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
                            outputs=[embeddings_config],
                            show_progress=False,
                        )
-                with gr.Accordion(
-                    label="\U0001F9EA\U0000FE0F Input Image Processing", open=False
-                ):
-                    strength = gr.Slider(
-                        0,
-                        1,
-                        value=cmd_opts.strength,
-                        step=0.01,
-                        label="Denoising Strength",
-                    )
-                    resample_type = gr.Dropdown(
-                        value=cmd_opts.resample_type,
-                        choices=resampler_list,
-                        label="Resample Type",
-                        allow_custom_value=True,
-                    )
-                with gr.Group(elem_id="prompt_box_outer"):
-                    prompt = gr.Textbox(
-                        label="\U00002795\U0000FE0F Prompt",
-                        value=cmd_opts.prompt[0],
-                        lines=2,
-                        elem_id="prompt_box",
-                        show_copy_button=True,
-                    )
-                    negative_prompt = gr.Textbox(
-                        label="\U00002796\U0000FE0F Negative Prompt",
-                        value=cmd_opts.negative_prompt[0],
-                        lines=2,
-                        elem_id="negative_prompt_box",
-                        show_copy_button=True,
-                    )
-                with gr.Row(equal_height=True):
-                    seed = gr.Textbox(
-                        value=cmd_opts.seed,
-                        label="\U0001F331\U0000FE0F Seed",
-                        info="An integer or a JSON list of integers, -1 for random",
-                        show_copy_button=True,
-                    )
-                    scheduler = gr.Dropdown(
-                        elem_id="scheduler",
-                        label="\U0001F4C5\U0000FE0F Scheduler",
-                        info="\U000E0020",  # forces same height as seed
-                        value="EulerDiscrete",
-                        choices=scheduler_model_map.keys(),
-                        allow_custom_value=False,
-                    )
-                with gr.Row():
-                    steps = gr.Slider(
-                        1,
-                        100,
-                        value=cmd_opts.steps,
-                        step=1,
-                        label="\U0001F3C3\U0000FE0F Steps",
-                    )
-                    guidance_scale = gr.Slider(
-                        0,
-                        50,
-                        value=cmd_opts.guidance_scale,
-                        step=0.1,
-                        label="\U0001F5C3\U0000FE0F CFG Scale",
-                    )
                with gr.Accordion(
                    label="Controlnet Options",
                    open=False,
@@ -471,17 +595,17 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
                            with gr.Row():
                                canvas_width = gr.Slider(
                                    label="Canvas Width",
-                                    minimum=256,
+                                    minimum=512,
                                    maximum=1024,
                                    value=512,
-                                    step=8,
+                                    step=512,
                                )
                                canvas_height = gr.Slider(
                                    label="Canvas Height",
-                                    minimum=256,
+                                    minimum=512,
                                    maximum=1024,
                                    value=512,
-                                    step=8,
+                                    step=512,
                                )
                            make_canvas = gr.Button(
                                value="Make Canvas!",
@@ -551,7 +675,9 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
                        lambda: gr.Tabs(selected=101),
                        outputs=[sd_tabs],
                    )
-                    with gr.Tab(label="Input Image", id=100) as sd_tab_init_image:
+                    with gr.Tab(
+                        label="Input Image", id=100, visible=False
+                    ) as sd_tab_init_image:  # DEMO
                        with gr.Column(elem_classes=["sd-right-panel"]):
                            with gr.Row(elem_classes=["fill"]):
                                # TODO: make this import image prompt info if it exists
@@ -581,137 +707,107 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
                                    object_fit="fit",
                                    preview=True,
                                )
-                            with gr.Row():
-                                std_output = gr.Textbox(
-                                    value=f"{sd_model_info}\n"
-                                    f"Images will be saved at "
-                                    f"{get_generated_imgs_path()}",
-                                    lines=2,
-                                    elem_id="std_output",
-                                    show_label=True,
-                                    label="Log",
-                                    show_copy_button=True,
-                                )
-                                sd_element.load(
-                                    logger.read_sd_logs, None, std_output, every=1
-                                )
-                                sd_status = gr.Textbox(visible=False)
-                            with gr.Row():
-                                batch_count = gr.Slider(
-                                    1,
-                                    100,
-                                    value=cmd_opts.batch_count,
-                                    step=1,
-                                    label="Batch Count",
-                                    interactive=True,
-                                )
-                                batch_size = gr.Slider(
-                                    1,
-                                    4,
-                                    value=cmd_opts.batch_size,
-                                    step=1,
-                                    label="Batch Size",
-                                    interactive=True,
-                                    visible=True,
-                                )
-                                repeatable_seeds = gr.Checkbox(
-                                    cmd_opts.repeatable_seeds,
-                                    label="Use Repeatable Seeds for Batches",
-                                )
                            with gr.Row():
                                stable_diffusion = gr.Button("Start")
-                                random_seed = gr.Button("Randomize Seed")
-                                random_seed.click(
-                                    lambda: -1,
-                                    inputs=[],
-                                    outputs=[seed],
+                                unload = gr.Button("Unload Models")
+                                unload.click(
+                                    fn=unload_sd,
                                    queue=False,
                                    show_progress=False,
                                )
-                                stop_batch = gr.Button("Stop")
-                    with gr.Tab(label="Config", id=102) as sd_tab_config:
-                        with gr.Column(elem_classes=["sd-right-panel"]):
-                            with gr.Row(elem_classes=["fill"]):
-                                Path(get_configs_path()).mkdir(
-                                    parents=True, exist_ok=True
-                                )
-                                default_config_file = os.path.join(
-                                    get_configs_path(),
-                                    "default_sd_config.json",
-                                )
-                                write_default_sd_config(default_config_file)
-                                sd_json = gr.JSON(
-                                    elem_classes=["fill"],
-                                    value=view_json_file(default_config_file),
-                                )
-                            with gr.Row():
-                                with gr.Column(scale=3):
-                                    load_sd_config = gr.FileExplorer(
-                                        label="Load Config",
-                                        file_count="single",
-                                        root_dir=(
-                                            cmd_opts.configs_path
-                                            if cmd_opts.configs_path
-                                            else get_configs_path()
-                                        ),
-                                        height=75,
-                                    )
-                                with gr.Column(scale=1):
-                                    save_sd_config = gr.Button(
-                                        value="Save Config", size="sm"
-                                    )
-                                    clear_sd_config = gr.ClearButton(
-                                        value="Clear Config",
-                                        size="sm",
-                                        components=sd_json,
-                                    )
-                            with gr.Row():
-                                sd_config_name = gr.Textbox(
-                                    value="Config Name",
-                                    info="Name of the file this config will be saved to.",
-                                    interactive=True,
-                                    show_label=False,
-                                )
-                                load_sd_config.change(
-                                    fn=load_sd_cfg,
-                                    inputs=[sd_json, load_sd_config],
-                                    outputs=[
-                                        prompt,
-                                        negative_prompt,
-                                        sd_init_image,
-                                        height,
-                                        width,
-                                        steps,
-                                        strength,
-                                        guidance_scale,
-                                        seed,
-                                        batch_count,
-                                        batch_size,
-                                        scheduler,
-                                        base_model_id,
-                                        custom_weights,
-                                        custom_vae,
-                                        precision,
-                                        device,
-                                        ondemand,
-                                        repeatable_seeds,
-                                        resample_type,
-                                        cnet_config,
-                                        embeddings_config,
-                                        sd_json,
-                                    ],
-                                )
-                                save_sd_config.click(
-                                    fn=save_sd_cfg,
-                                    inputs=[sd_json, sd_config_name],
-                                    outputs=[sd_config_name],
-                                )
-                        save_sd_config.click(
-                            fn=save_sd_cfg,
-                            inputs=[sd_json, sd_config_name],
-                            outputs=[sd_config_name],
-                        )
-
+                                stop_batch = gr.Button("Stop", visible=False)
+                    # with gr.Tab(label="Config", id=102) as sd_tab_config:
+                    #     with gr.Group():#elem_classes=["sd-right-panel"]):
+                    #         with gr.Row(elem_classes=["fill"], visible=False):
+                    #             Path(get_configs_path()).mkdir(
+                    #                 parents=True, exist_ok=True
+                    #             )
+                    #             write_default_sd_configs(get_configs_path())
+                    #             default_config_file = global_obj.get_init_config()
+                    #             sd_json = gr.JSON(
+                    #                 elem_classes=["fill"],
+                    #                 value=view_json_file(default_config_file),
+                    #             )
+                    #         with gr.Row():
+                    #             with gr.Row():
+                    #                 load_sd_config = gr.Dropdown(
+                    #                     label="Load Config",
+                    #                     value=cmd_opts.defaults,
+                    #                     choices=get_configs(),
+                    #                     allow_custom_value=True,
+                    #                 )
+                    #             with gr.Row():
+                    #                 save_sd_config = gr.Button(
+                    #                     value="Save Config", size="sm"
+                    #                 )
+                    #                 clear_sd_config = gr.ClearButton(
+                    #                     value="Clear Config",
+                    #                     size="sm",
+                    #                     components=sd_json,
+                    #                 )
+                    #                 # with gr.Row():
+                    #                 sd_config_name = gr.Textbox(
+                    #                     value="Config Name",
+                    #                     info="Name of the file this config will be saved to.",
+                    #                     interactive=True,
+                    #                     show_label=False,
+                    #                 )
+                    with gr.Tab(label="Log", id=103, visible=False) as sd_tab_log:
+                        with gr.Row():
+                            std_output = gr.Textbox(
+                                value=f"{sd_model_info}\n"
+                                f"Images will be saved at "
+                                f"{get_generated_imgs_path()}",
+                                lines=2,
+                                elem_id="std_output",
+                                show_label=True,
+                                label="Log",
+                                show_copy_button=True,
+                            )
+                            sd_element.load(
+                                logger.read_sd_logs, None, std_output, every=1
+                            )
+                            sd_status = gr.Textbox(visible=False)
+    base_model_id.change(
+        fn=base_model_changed,
+        inputs=[base_model_id],
+        outputs=[custom_weights, steps],
+    )
+    load_sd_config.change(
+        fn=load_sd_cfg,
+        inputs=[sd_json, load_sd_config],
+        outputs=[
+            prompt,
+            negative_prompt,
+            sd_init_image,
+            height,
+            width,
+            steps,
+            strength,
+            guidance_scale,
+            seed,
+            batch_count,
+            batch_size,
+            scheduler,
+            base_model_id,
+            custom_weights,
+            custom_vae,
+            precision,
+            device,
+            target_triple,
+            ondemand,
+            compiled_pipeline,
+            resample_type,
+            cnet_config,
+            embeddings_config,
+            sd_json,
+        ],
+    )
+    save_sd_config.click(
+        fn=save_sd_cfg,
+        inputs=[sd_json, sd_config_name],
+        outputs=[sd_config_name],
+    )
    pull_kwargs = dict(
        fn=pull_sd_configs,
        inputs=[
@@ -732,8 +828,9 @@ with gr.Blocks(title="Stable Diffusion") as sd_element:
            custom_vae,
            precision,
            device,
+            target_triple,
            ondemand,
-            repeatable_seeds,
+            compiled_pipeline,
            resample_type,
            cnet_config,
            embeddings_config,
--- a/apps/shark_studio/web/ui/utils.py
+++ b/apps/shark_studio/web/ui/utils.py
@@ -10,8 +10,8 @@ def resource_path(relative_path):
    return os.path.join(base_path, relative_path)


-nodlogo_loc = resource_path("logos/nod-logo.png")
-nodicon_loc = resource_path("logos/nod-icon.png")
+amdlogo_loc = resource_path("logos/amd-logo.jpg")
+amdicon_loc = resource_path("logos/amd-icon.jpg")


 class HSLHue(IntEnum):
--- a/apps/shark_studio/web/utils/default_configs.py
+++ b/apps/shark_studio/web/utils/default_configs.py
@@ -0,0 +1,95 @@
+default_sd_config = r"""{
+  "prompt": [
+    "a photo taken of the front of a super-car drifting on a road near mountains at high speeds with smoke coming off the tires, front angle, front point of view, trees in the mountains of the background, ((sharp focus))"
+  ],
+  "negative_prompt": [
+    "watermark, signature, logo, text, lowres, ((monochrome, grayscale)), blurry, ugly, blur, oversaturated, cropped"
+  ],
+  "sd_init_image": [null],
+  "height": 512,
+  "width": 512,
+  "steps": 50,
+  "strength": 0.8,
+  "guidance_scale": 7.5,
+  "seed": "-1",
+  "batch_count": 1,
+  "batch_size": 1,
+  "scheduler": "EulerDiscrete",
+  "base_model_id": "stabilityai/stable-diffusion-2-1-base",
+  "custom_weights": null,
+  "custom_vae": null,
+  "precision": "fp16",
+  "device": "",
+  "target_triple": "",
+  "ondemand": false,
+  "compiled_pipeline": false,
+  "resample_type": "Nearest Neighbor",
+  "controlnets": {},
+  "embeddings": {}
+}"""
+
+sdxl_30steps = r"""{
+  "prompt": [
+    "a cat under the snow with blue eyes, covered by snow, cinematic style, medium shot, professional photo, animal"
+  ],
+  "negative_prompt": [
+    "watermark, signature, logo, text, lowres, ((monochrome, grayscale)), blurry, ugly, blur, oversaturated, cropped"
+  ],
+  "sd_init_image": [null],
+  "height": 1024,
+  "width": 1024,
+  "steps": 30,
+  "strength": 0.8,
+  "guidance_scale": 7.5,
+  "seed": "-1",
+  "batch_count": 1,
+  "batch_size": 1,
+  "scheduler": "EulerDiscrete",
+  "base_model_id": "stabilityai/stable-diffusion-xl-base-1.0",
+  "custom_weights": null,
+  "custom_vae": null,
+  "precision": "fp16",
+  "device": "",
+  "target_triple": "",
+  "ondemand": false,
+  "compiled_pipeline": true,
+  "resample_type": "Nearest Neighbor",
+  "controlnets": {},
+  "embeddings": {}
+}"""
+
+sdxl_turbo = r"""{
+  "prompt": [
+    "A cat wearing a hat that says 'TURBO' on it. The cat is sitting on a skateboard."
+  ],
+  "negative_prompt": [
+    ""
+  ],
+  "sd_init_image": [null],
+  "height": 512,
+  "width": 512,
+  "steps": 2,
+  "strength": 0.8,
+  "guidance_scale": 0,
+  "seed": "-1",
+  "batch_count": 1,
+  "batch_size": 1,
+  "scheduler": "EulerAncestralDiscrete",
+  "base_model_id": "stabilityai/sdxl-turbo",
+  "custom_weights": null,
+  "custom_vae": null,
+  "precision": "fp16",
+  "device": "",
+  "target_triple": "",
+  "ondemand": false,
+  "compiled_pipeline": true,
+  "resample_type": "Nearest Neighbor",
+  "controlnets": {},
+  "embeddings": {}
+}"""
+
+default_sd_configs = {
+    # "default_sd_config.json": sdxl_turbo,
+    # "sdxl-30steps.json": sdxl_30steps,
+    "sdxl-turbo.json": sdxl_turbo,
+}
--- a/apps/shark_studio/web/utils/file_utils.py
+++ b/apps/shark_studio/web/utils/file_utils.py
@@ -11,43 +11,19 @@ checkpoints_filetypes = (
    "*.safetensors",
 )

-default_sd_config = r"""{
-  "prompt": [
-    "a photo taken of the front of a super-car drifting on a road near mountains at high speeds with smoke coming off the tires, front angle, front point of view, trees in the mountains of the background, ((sharp focus))"
-  ],
-  "negative_prompt": [
-    "watermark, signature, logo, text, lowres, ((monochrome, grayscale)), blurry, ugly, blur, oversaturated, cropped"
-  ],
-  "sd_init_image": [null],
-  "height": 512,
-  "width": 512,
-  "steps": 50,
-  "strength": 0.8,
-  "guidance_scale": 7.5,
-  "seed": "-1",
-  "batch_count": 1,
-  "batch_size": 1,
-  "scheduler": "EulerDiscrete",
-  "base_model_id": "stabilityai/stable-diffusion-2-1-base",
-  "custom_weights": null,
-  "custom_vae": null,
-  "precision": "fp16",
-  "device": "AMD Radeon RX 7900 XTX => vulkan://0",
-  "ondemand": false,
-  "repeatable_seeds": false,
-  "resample_type": "Nearest Neighbor",
-  "controlnets": {},
-  "embeddings": {}
-}"""
+from apps.shark_studio.web.utils.default_configs import default_sd_configs


-def write_default_sd_config(path):
-    with open(path, "w") as f:
-        f.write(default_sd_config)
+def write_default_sd_configs(path):
+    for key in default_sd_configs.keys():
+        config_fpath = os.path.join(path, key)
+        if not os.path.exists(config_fpath):
+            with open(config_fpath, "w") as f:
+                f.write(default_sd_configs[key])


 def safe_name(name):
-    return name.replace("/", "_").replace("-", "_")
+    return name.split("/")[-1].replace("-", "_")


 def get_path_stem(path):
@@ -66,33 +42,39 @@ def get_resource_path(path):


 def get_configs_path() -> Path:
-    configs = get_resource_path(os.path.join("..", "configs"))
+    configs = get_resource_path(cmd_opts.config_dir)
    if not os.path.exists(configs):
        os.mkdir(configs)
-    return Path(get_resource_path("../configs"))
+    return Path(configs)


 def get_generated_imgs_path() -> Path:
-    return Path(
-        cmd_opts.output_dir
-        if cmd_opts.output_dir
-        else get_resource_path("../generated_imgs")
-    )
+    outputs = get_resource_path(cmd_opts.output_dir)
+    if not os.path.exists(outputs):
+        os.mkdir(outputs)
+    return Path(outputs)
+
+
+def get_tmp_path() -> Path:
+    tmpdir = get_resource_path(cmd_opts.model_dir)
+    if not os.path.exists(tmpdir):
+        os.mkdir(tmpdir)
+    return Path(tmpdir)


 def get_generated_imgs_todays_subdir() -> str:
    return dt.now().strftime("%Y%m%d")


-def create_checkpoint_folders():
+def create_model_folders():
    dir = ["checkpoints", "vae", "lora", "vmfb"]
-    if not os.path.isdir(cmd_opts.ckpt_dir):
+    if not os.path.isdir(cmd_opts.model_dir):
        try:
-            os.makedirs(cmd_opts.ckpt_dir)
+            os.makedirs(cmd_opts.model_dir)
        except OSError:
            sys.exit(
-                f"Invalid --ckpt_dir argument, "
-                f"{cmd_opts.ckpt_dir} folder does not exist, and cannot be created."
+                f"Invalid --model_dir argument, "
+                f"{cmd_opts.model_dir} folder does not exist, and cannot be created."
            )

    for root in dir:
@@ -100,12 +82,14 @@ def create_checkpoint_folders():


 def get_checkpoints_path(model_type=""):
-    return get_resource_path(os.path.join(cmd_opts.ckpt_dir, model_type))
+    return get_resource_path(os.path.join(cmd_opts.model_dir, model_type))


 def get_checkpoints(model_type="checkpoints"):
    ckpt_files = []
    file_types = checkpoints_filetypes
+    if model_type == "scripts":
+        file_types = ["shark_*.py"]
    if model_type == "lora":
        file_types = file_types + ("*.pt", "*.bin")
    for extn in file_types:
@@ -117,5 +101,15 @@ def get_checkpoints(model_type="checkpoints"):
    return sorted(ckpt_files, key=str.casefold)


+def get_configs():
+    return sorted(
+        [
+            os.path.basename(x)
+            for x in glob.glob(os.path.join(get_configs_path(), "*.json"))
+        ],
+        key=str.casefold,
+    )
+
+
 def get_checkpoint_pathfile(checkpoint_name, model_type="checkpoints"):
    return os.path.join(get_checkpoints_path(model_type), checkpoint_name)
--- a/apps/shark_studio/web/utils/globals.py
+++ b/apps/shark_studio/web/utils/globals.py
@@ -1,5 +1,8 @@
 import gc
 from ...api.utils import get_available_devices
+from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+import os
+from apps.shark_studio.web.utils.file_utils import get_configs_path

 """
 The global objects include SD pipeline and config.
@@ -8,6 +11,13 @@ Also we could avoid memory leak when switching models by clearing the cache.
 """


+def view_json_file(file_path):
+    content = ""
+    with open(file_path, "r") as fopen:
+        content = fopen.read()
+    return content
+
+
 def _init():
    global _sd_obj
    global _llm_obj
@@ -90,6 +100,20 @@ def get_device_list():
    return _devices


+def get_init_config():
+    global _init_config
+    if os.path.exists(cmd_opts.defaults):
+        _init_config = cmd_opts.defaults
+    elif os.path.exists(os.path.join(get_configs_path(), cmd_opts.defaults)):
+        _init_config = os.path.join(get_configs_path(), cmd_opts.defaults)
+    else:
+        print(
+            "Default config not found as absolute path or in configs folder. Using sdxl-turbo as default config."
+        )
+        _init_config = os.path.join(get_configs_path(), "sdxl-turbo.json")
+    return _init_config
+
+
 def get_sd_status():
    global _sd_obj
    return _sd_obj.status
--- a/apps/shark_studio/web/utils/metadata/png_metadata.py
+++ b/apps/shark_studio/web/utils/metadata/png_metadata.py
@@ -3,9 +3,8 @@ from pathlib import Path
 from apps.shark_studio.web.utils.file_utils import (
    get_checkpoint_pathfile,
 )
-from apps.shark_studio.api.sd import (
-    sd_model_map,
-)
+from apps.shark_studio.api.sd import EMPTY_SD_MAP as sd_model_map
+
 from apps.shark_studio.modules.schedulers import (
    scheduler_model_map,
 )
--- a/apps/shark_studio/web/utils/tmp_configs.py
+++ b/apps/shark_studio/web/utils/tmp_configs.py
@@ -2,7 +2,9 @@ import os
 import shutil
 from time import time

-shark_tmp = os.path.join(os.getcwd(), "shark_tmp/")
+from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+
+shark_tmp = cmd_opts.tmp_dir  # os.path.join(os.getcwd(), "shark_tmp/")


 def clear_tmp_mlir():
@@ -15,7 +17,7 @@ def clear_tmp_mlir():
        and filename.endswith(".mlir")
    ]
    for filename in mlir_files:
-        os.remove(shark_tmp + filename)
+        os.remove(os.path.join(shark_tmp, filename))
    print(f"Clearing .mlir temporary files took {time() - cleanup_start:.4f} seconds.")


--- a/benchmarks/init.py
+++ b/benchmarks/init.py
--- a/benchmarks/hf_model_benchmark.py
+++ b/benchmarks/hf_model_benchmark.py
@@ -1,22 +0,0 @@
-import torch
-from shark.parser import parser
-from benchmarks.hf_transformer import SharkHFBenchmarkRunner
-
-parser.add_argument(
-    "--model_name",
-    type=str,
-    required=True,
-    help='Specifies name of HF model to benchmark. (For exmaple "microsoft/MiniLM-L12-H384-uncased"',
-)
-load_args, unknown = parser.parse_known_args()
-
-if __name__ == "__main__":
-    model_name = load_args.model_name
-    test_input = torch.randint(2, (1, 128))
-    shark_module = SharkHFBenchmarkRunner(
-        model_name, (test_input,), jit_trace=True
-    )
-    shark_module.benchmark_c()
-    shark_module.benchmark_python((test_input,))
-    shark_module.benchmark_torch(test_input)
-    shark_module.benchmark_onnx(test_input)
--- a/benchmarks/hf_transformer.py
+++ b/benchmarks/hf_transformer.py
@@ -1,181 +0,0 @@
-import torch
-from shark.shark_benchmark_runner import SharkBenchmarkRunner
-from shark.parser import shark_args
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from onnxruntime.transformers.benchmark import (
-    run_pytorch,
-    run_tensorflow,
-    run_onnxruntime,
-)
-from onnxruntime.transformers.huggingface_models import MODELS
-from onnxruntime.transformers.benchmark_helper import ConfigModifier, Precision
-import os
-import psutil
-
-
-class OnnxFusionOptions(object):
-    def __init__(self):
-        self.disable_gelu = False
-        self.disable_layer_norm = False
-        self.disable_attention = False
-        self.disable_skip_layer_norm = False
-        self.disable_embed_layer_norm = False
-        self.disable_bias_skip_layer_norm = False
-        self.disable_bias_gelu = False
-        self.enable_gelu_approximation = False
-        self.use_mask_index = False
-        self.no_attention_mask = False
-
-
-class HuggingFaceLanguage(torch.nn.Module):
-    def __init__(self, hf_model_name):
-        super().__init__()
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            hf_model_name,  # The pretrained model.
-            num_labels=2,  # The number of output labels--2 for binary classification.
-            output_attentions=False,  # Whether the model returns attentions weights.
-            output_hidden_states=False,  # Whether the model returns all hidden-states.
-            torchscript=True,
-        )
-
-    def forward(self, tokens):
-        return self.model.forward(tokens)[0]
-
-
-class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
-    # SharkRunner derived class with Benchmarking capabilities.
-    def __init__(
-        self,
-        model_name: str,
-        input: tuple,
-        dynamic: bool = False,
-        device: str = None,
-        jit_trace: bool = False,
-        from_aot: bool = False,
-        frontend: str = "torch",
-    ):
-        self.device = device if device is not None else shark_args.device
-        if self.device == "gpu":
-            raise ValueError(
-                "Currently GPU Benchmarking is not supported due to OOM from ORT."
-            )
-        self.model_name = model_name
-        model = HuggingFaceLanguage(model_name)
-        SharkBenchmarkRunner.__init__(
-            self,
-            model,
-            input,
-            dynamic,
-            self.device,
-            jit_trace,
-            from_aot,
-            frontend,
-        )
-
-    def benchmark_torch(self, inputs):
-        use_gpu = self.device == "gpu"
-        # Set set the model's layer number to automatic.
-        config_modifier = ConfigModifier(None)
-        num_threads = psutil.cpu_count(logical=False)
-        batch_sizes = [inputs.shape[0]]
-        sequence_lengths = [inputs.shape[-1]]
-        cache_dir = os.path.join(".", "cache_models")
-        verbose = False
-        result = run_pytorch(
-            use_gpu,
-            [self.model_name],
-            None,
-            config_modifier,
-            Precision.FLOAT32,
-            num_threads,
-            batch_sizes,
-            sequence_lengths,
-            shark_args.num_iterations,
-            False,
-            cache_dir,
-            verbose,
-        )
-        print(
-            f"ONNX Pytorch-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
-
-    # TODO: Currently non-functional due to TF runtime error. There might be some issue with, initializing TF.
-    def benchmark_tf(self, inputs):
-        use_gpu = self.device == "gpu"
-        # Set set the model's layer number to automatic.
-        config_modifier = ConfigModifier(None)
-        num_threads = psutil.cpu_count(logical=False)
-        batch_sizes = [inputs.shape[0]]
-        sequence_lengths = [inputs.shape[-1]]
-        cache_dir = os.path.join(".", "cache_models")
-        verbose = False
-        result = run_tensorflow(
-            use_gpu,
-            [self.model_name],
-            None,
-            config_modifier,
-            Precision.FLOAT32,
-            num_threads,
-            batch_sizes,
-            sequence_lengths,
-            shark_args.num_iterations,
-            cache_dir,
-            verbose,
-        )
-        print(
-            f"ONNX TF-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
-
-    def benchmark_onnx(self, inputs):
-        if self.model_name not in MODELS:
-            print(
-                f"{self.model_name} is currently not supported in ORT's HF. Check \
-https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/huggingface_models.py \
-for currently supported models. Exiting benchmark ONNX."
-            )
-            return
-        use_gpu = self.device == "gpu"
-        num_threads = psutil.cpu_count(logical=False)
-        batch_sizes = [inputs.shape[0]]
-        sequence_lengths = [inputs.shape[-1]]
-        cache_dir = os.path.join(".", "cache_models")
-        onnx_dir = os.path.join(".", "onnx_models")
-        verbose = False
-        input_counts = [1]
-        optimize_onnx = True
-        validate_onnx = False
-        disable_ort_io_binding = False
-        use_raw_attention_mask = True
-        model_fusion_statistics = {}
-        overwrite = False
-        model_source = "pt"  # Either "pt" or "tf"
-        provider = None
-        config_modifier = ConfigModifier(None)
-        onnx_args = OnnxFusionOptions()
-        result = run_onnxruntime(
-            use_gpu,
-            provider,
-            [self.model_name],
-            None,
-            config_modifier,
-            Precision.FLOAT32,
-            num_threads,
-            batch_sizes,
-            sequence_lengths,
-            shark_args.num_iterations,
-            input_counts,
-            optimize_onnx,
-            validate_onnx,
-            cache_dir,
-            onnx_dir,
-            verbose,
-            overwrite,
-            disable_ort_io_binding,
-            use_raw_attention_mask,
-            model_fusion_statistics,
-            model_source,
-            onnx_args,
-        )
-        print(
-            f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
--- a/benchmarks/tests/test_benchmark.py
+++ b/benchmarks/tests/test_benchmark.py
@@ -1,231 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers
-
-import torch
-import tensorflow as tf
-import numpy as np
-import torchvision.models as models
-from transformers import (
-    AutoModelForSequenceClassification,
-    BertTokenizer,
-    TFBertModel,
-)
-import importlib
-import pytest
-import unittest
-
-torch.manual_seed(0)
-gpus = tf.config.experimental.list_physical_devices("GPU")
-for gpu in gpus:
-    tf.config.experimental.set_memory_growth(gpu, True)
-
-##################### Tensorflow Hugging Face LM Models ###################################
-MAX_SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-# Create a set of 2-dimensional inputs
-tf_bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-]
-
-
-class TFHuggingFaceLanguage(tf.Module):
-    def __init__(self, hf_model_name):
-        super(TFHuggingFaceLanguage, self).__init__()
-        # Create a BERT trainer with the created network.
-        self.m = TFBertModel.from_pretrained(hf_model_name, from_pt=True)
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m.predict = lambda x, y, z: self.m.call(
-            input_ids=x, attention_mask=y, token_type_ids=z, training=False
-        )
-
-    @tf.function(input_signature=tf_bert_input, jit_compile=True)
-    def forward(self, input_ids, attention_mask, token_type_ids):
-        return self.m.predict(input_ids, attention_mask, token_type_ids)
-
-
-def get_TFhf_model(name):
-    model = TFHuggingFaceLanguage(name)
-    tokenizer = BertTokenizer.from_pretrained(name)
-    text = "Replace me by any text you'd like."
-    encoded_input = tokenizer(
-        text,
-        padding="max_length",
-        truncation=True,
-        max_length=MAX_SEQUENCE_LENGTH,
-    )
-    for key in encoded_input:
-        encoded_input[key] = tf.expand_dims(
-            tf.convert_to_tensor(encoded_input[key]), 0
-        )
-    test_input = (
-        encoded_input["input_ids"],
-        encoded_input["attention_mask"],
-        encoded_input["token_type_ids"],
-    )
-    actual_out = model.forward(*test_input)
-    return model, test_input, actual_out
-
-
-##################### Hugging Face LM Models ###################################
-
-
-class HuggingFaceLanguage(torch.nn.Module):
-    def __init__(self, hf_model_name):
-        super().__init__()
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            hf_model_name,  # The pretrained model.
-            num_labels=2,  # The number of output labels--2 for binary classification.
-            output_attentions=False,  # Whether the model returns attentions weights.
-            output_hidden_states=False,  # Whether the model returns all hidden-states.
-            torchscript=True,
-        )
-
-    def forward(self, tokens):
-        return self.model.forward(tokens)[0]
-
-
-def get_hf_model(name):
-    model = HuggingFaceLanguage(name)
-    # TODO: Currently the test input is set to (1,128)
-    test_input = torch.randint(2, (1, 128))
-    actual_out = model(test_input)
-    return model, test_input, actual_out
-
-
-################################################################################
-
-##################### Torch Vision Models    ###################################
-
-
-class VisionModule(torch.nn.Module):
-    def __init__(self, model):
-        super().__init__()
-        self.model = model
-        self.train(False)
-
-    def forward(self, input):
-        return self.model.forward(input)
-
-
-def get_vision_model(torch_model):
-    model = VisionModule(torch_model)
-    # TODO: Currently the test input is set to (1,128)
-    test_input = torch.randn(1, 3, 224, 224)
-    actual_out = model(test_input)
-    return model, test_input, actual_out
-
-
-#############################   Benchmark Tests ####################################
-
-pytest_benchmark_param = pytest.mark.parametrize(
-    ("dynamic", "device"),
-    [
-        pytest.param(False, "cpu"),
-        # TODO: Language models are failing for dynamic case..
-        pytest.param(True, "cpu", marks=pytest.mark.skip),
-        pytest.param(
-            False,
-            "cuda",
-            marks=pytest.mark.skipif(
-                check_device_drivers("cuda"), reason="nvidia-smi not found"
-            ),
-        ),
-        pytest.param(True, "cuda", marks=pytest.mark.skip),
-        pytest.param(
-            False,
-            "vulkan",
-            marks=pytest.mark.skipif(
-                check_device_drivers("vulkan"),
-                reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases",
-            ),
-        ),
-        pytest.param(
-            True,
-            "vulkan",
-            marks=pytest.mark.skipif(
-                check_device_drivers("vulkan"),
-                reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases",
-            ),
-        ),
-    ],
-)
-
-
-@pytest.mark.skipif(
-    importlib.util.find_spec("iree.tools") is None,
-    reason="Cannot find tools to import TF",
-)
-@pytest_benchmark_param
-def test_bench_minilm_torch(dynamic, device):
-    model, test_input, act_out = get_hf_model(
-        "microsoft/MiniLM-L12-H384-uncased"
-    )
-    shark_module = SharkInference(
-        model,
-        (test_input,),
-        device=device,
-        dynamic=dynamic,
-        jit_trace=True,
-        benchmark_mode=True,
-    )
-    try:
-        # If becnhmarking succesful, assert success/True.
-        shark_module.compile()
-        shark_module.benchmark_all((test_input,))
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
-
-
-@pytest.mark.skipif(
-    importlib.util.find_spec("iree.tools") is None,
-    reason="Cannot find tools to import TF",
-)
-@pytest_benchmark_param
-def test_bench_distilbert(dynamic, device):
-    model, test_input, act_out = get_TFhf_model("distilbert-base-uncased")
-    shark_module = SharkInference(
-        model,
-        test_input,
-        device=device,
-        dynamic=dynamic,
-        jit_trace=True,
-        benchmark_mode=True,
-    )
-    try:
-        # If becnhmarking succesful, assert success/True.
-        shark_module.set_frontend("tensorflow")
-        shark_module.compile()
-        shark_module.benchmark_all(test_input)
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
-
-
-@pytest.mark.skip(reason="XLM Roberta too large to test.")
-@pytest_benchmark_param
-def test_bench_xlm_roberta(dynamic, device):
-    model, test_input, act_out = get_TFhf_model("xlm-roberta-base")
-    shark_module = SharkInference(
-        model,
-        test_input,
-        device=device,
-        dynamic=dynamic,
-        jit_trace=True,
-        benchmark_mode=True,
-    )
-    try:
-        # If becnhmarking succesful, assert success/True.
-        shark_module.set_frontend("tensorflow")
-        shark_module.compile()
-        shark_module.benchmark_all(test_input)
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
--- a/benchmarks/tests/test_hf_benchmark.py
+++ b/benchmarks/tests/test_hf_benchmark.py
@@ -1,45 +0,0 @@
-import torch
-from benchmarks.hf_transformer import SharkHFBenchmarkRunner
-import importlib
-import pytest
-
-torch.manual_seed(0)
-
-############################# HF Benchmark Tests ####################################
-
-# Test running benchmark module without failing.
-pytest_benchmark_param = pytest.mark.parametrize(
-    ("dynamic", "device"),
-    [
-        pytest.param(False, "cpu"),
-        # TODO: Language models are failing for dynamic case..
-        pytest.param(True, "cpu", marks=pytest.mark.skip),
-    ],
-)
-
-
-@pytest.mark.skipif(
-    importlib.util.find_spec("onnxruntime") is None,
-    reason="Cannot find ONNXRUNTIME.",
-)
-@pytest_benchmark_param
-def test_HFbench_minilm_torch(dynamic, device):
-    model_name = "bert-base-uncased"
-    test_input = torch.randint(2, (1, 128))
-    try:
-        shark_module = SharkHFBenchmarkRunner(
-            model_name,
-            (test_input,),
-            jit_trace=True,
-            dynamic=dynamic,
-            device=device,
-        )
-        shark_module.benchmark_c()
-        shark_module.benchmark_python((test_input,))
-        shark_module.benchmark_torch(test_input)
-        shark_module.benchmark_onnx(test_input)
-        # If becnhmarking succesful, assert success/True.
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
--- a/cpp/.gitignore
+++ b/cpp/.gitignore
@@ -1,3 +0,0 @@
-*.mlir
-*.vmfb
-*.ini
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -1,52 +0,0 @@
-# Copyright 2022 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-cmake_minimum_required(VERSION 3.21...3.23)
-
-#-------------------------------------------------------------------------------
-# Project configuration
-#-------------------------------------------------------------------------------
-
-project(iree-samples C CXX)
-set(CMAKE_C_STANDARD 11)
-set(CMAKE_CXX_STANDARD 17)
-set_property(GLOBAL PROPERTY USE_FOLDERS ON)
-
-#-------------------------------------------------------------------------------
-# Core project dependency
-#-------------------------------------------------------------------------------
-
-message(STATUS "Fetching core IREE repo (this may take a few minutes)...")
-# Note: for log output, set -DFETCHCONTENT_QUIET=OFF,
-# see https://gitlab.kitware.com/cmake/cmake/-/issues/18238#note_440475
-
-include(FetchContent)
-
-FetchContent_Declare(
-  iree
-  GIT_REPOSITORY https://github.com/nod-ai/srt.git
-  GIT_TAG shark 
-  GIT_SUBMODULES_RECURSE OFF
-  GIT_SHALLOW OFF
-  GIT_PROGRESS ON
-  USES_TERMINAL_DOWNLOAD ON
-)
-
-# Extend module path to find MLIR CMake modules.
-list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_BINARY_DIR}/lib/cmake/mlir")
-
-# Disable core project features not needed for these out of tree samples.
-set(IREE_BUILD_TESTS OFF CACHE BOOL "" FORCE)
-set(IREE_BUILD_SAMPLES OFF CACHE BOOL "" FORCE)
-
-FetchContent_MakeAvailable(iree)
-FetchContent_GetProperties(iree SOURCE_DIR IREE_SOURCE_DIR)
-
-#-------------------------------------------------------------------------------
-# Individual samples
-#-------------------------------------------------------------------------------
-
-add_subdirectory(vulkan_gui)
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -1,82 +0,0 @@
-# SHARK C/C++ Samples
-
-These C/C++ samples can be built using CMake. The samples depend on the main
-SHARK-Runtime project's C/C++ sources, including both the runtime and the compiler. 
-
-Individual samples may require additional dependencies. Watch CMake's output
-for information about which you are missing for individual samples.
-
-On Windows we recommend using https://github.com/microsoft/vcpkg to download packages for
-your system. The general setup flow looks like
-
-*Install and activate SHARK*
-
-```bash
-source shark.venv/bin/activate #follow main repo instructions to setup your venv
-```
-
-*Install Dependencies*
-
-```bash
-vcpkg install [library] --triplet [your platform]
-vcpkg integrate install
-
-# Then pass `-DCMAKE_TOOLCHAIN_FILE=[check logs for path]` when configuring CMake
-```
-
-In Ubuntu Linux you can install
-
-```bash
-sudo apt install libsdl2-dev
-```
-
-*Build*
-```bash
-cd cpp
-cmake -GNinja -B build/
-cmake --build build/
-```
-
-*Prepare the model*
-```bash
-wget https://storage.googleapis.com/shark_tank/latest/resnet50_tf/resnet50_tf.mlir
-iree-compile --iree-input-type=auto --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --iree-llvmcpu-embedded-linker-path=`python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])'`/iree/compiler/tools/../_mlir_libs/iree-lld --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=ist/core-reproducer.mlir --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux  resnet50_tf.mlir -o resnet50_tf.vmfb
-```
-*Prepare the input*
-
-```bash
-python save_img.py
-```
-Note that this requires tensorflow, e.g.
-```bash
-python -m pip install tensorflow
-```
-
-*Run the vulkan_gui*
-```bash
-./build/vulkan_gui/iree-samples-resnet-vulkan-gui
-```
-
-## Other models
-A tool for benchmarking other models is built and can be invoked with a command like the following
-```bash
-./build/vulkan_gui/iree-vulkan-gui --module-file=path/to/.vmfb --function_input=...
-```
-see `./build/vulkan_gui/iree-vulkan-gui --help` for an explanation on the function input. For example, stable diffusion unet can be tested with the following commands:
-```bash
-wget https://storage.googleapis.com/shark_tank/quinn/stable_diff_tf/stable_diff_tf.mlir
-iree-compile --iree-input-type=auto --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux  stable_diff_tf.mlir -o stable_diff_tf.vmfb
-./build/vulkan_gui/iree-vulkan-gui --module-file=stable_diff_tf.vmfb --function_input=2x4x64x64xf32 --function_input=1xf32 --function_input=2x77x768xf32
-```
-VAE and Autoencoder are also available
-```bash
-# VAE
-wget https://storage.googleapis.com/shark_tank/quinn/stable_diff_tf/vae_tf/vae.mlir
-iree-compile --iree-input-type=auto --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux  vae.mlir -o vae.vmfb
-./build/vulkan_gui/iree-vulkan-gui --module-file=stable_diff_tf.vmfb --function_input=1x4x64x64xf32
-
-# CLIP Autoencoder
-wget https://storage.googleapis.com/shark_tank/quinn/stable_diff_tf/clip_tf/clip_autoencoder.mlir
-iree-compile --iree-input-type=auto --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux  clip_autoencoder.mlir -o clip_autoencoder.vmfb
-./build/vulkan_gui/iree-vulkan-gui --module-file=stable_diff_tf.vmfb --function_input=1x77xi32 --function_input=1x77xi32
-```
--- a/cpp/dog_imagenet.jpg
+++ b/cpp/dog_imagenet.jpg
--- a/cpp/save_img.py
+++ b/cpp/save_img.py
@@ -1,18 +0,0 @@
-import numpy as np
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-
-
-def load_and_preprocess_image(fname: str):
-    image = tf.io.read_file(fname)
-    image = tf.image.decode_image(image, channels=3)
-    image = tf.image.resize(image, (224, 224))
-    image = image[tf.newaxis, :]
-    # preprocessing pipeline
-    input_tensor = tf.keras.applications.resnet50.preprocess_input(image)
-    return input_tensor
-
-
-data = load_and_preprocess_image("dog_imagenet.jpg").numpy()
-
-data.tofile("dog.bin")
--- a/cpp/vision_inference/CMakeLists.txt
+++ b/cpp/vision_inference/CMakeLists.txt
@@ -1,84 +0,0 @@
-# Copyright 2022 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-if(NOT IREE_TARGET_BACKEND_LLVM_CPU OR
-   NOT IREE_HAL_EXECUTABLE_LOADER_EMBEDDED_ELF)
-  message(STATUS "Missing LLVM backend and/or embeddded elf loader, skipping vision_inference sample")
-  return()
-endif()
-
-# vcpkg install stb
-#   tested with version 2021-09-10
-find_package(Stb)
-if(NOT Stb_FOUND)
-  message(STATUS "Could not find Stb, skipping vision inference sample")
-  return()
-endif()
-
-# Compile mnist.mlir to mnist.vmfb.
-set(_COMPILE_TOOL_EXECUTABLE $<TARGET_FILE:iree-compile>)
-set(_COMPILE_ARGS)
-list(APPEND _COMPILE_ARGS "--iree-input-type=auto")
-list(APPEND _COMPILE_ARGS "--iree-hal-target-backends=llvm-cpu")
-list(APPEND _COMPILE_ARGS "${IREE_SOURCE_DIR}/samples/models/mnist.mlir")
-list(APPEND _COMPILE_ARGS "-o")
-list(APPEND _COMPILE_ARGS "mnist.vmfb")
-add_custom_command(
-  OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/mnist.vmfb
-  COMMAND ${_COMPILE_TOOL_EXECUTABLE} ${_COMPILE_ARGS}
-  DEPENDS ${_COMPILE_TOOL_EXECUTABLE} "${IREE_SOURCE_DIR}/samples/models/mnist.mlir"
-)
-# Embed mnist.vmfb into a C file as mnist_bytecode_module_c.[h/c]
-set(_EMBED_DATA_EXECUTABLE $<TARGET_FILE:generate_embed_data>)
-set(_EMBED_ARGS)
-list(APPEND _EMBED_ARGS "--output_header=mnist_bytecode_module_c.h")
-list(APPEND _EMBED_ARGS "--output_impl=mnist_bytecode_module_c.c")
-list(APPEND _EMBED_ARGS "--identifier=iree_samples_vision_inference_mnist_bytecode_module")
-list(APPEND _EMBED_ARGS "--flatten")
-list(APPEND _EMBED_ARGS "${CMAKE_CURRENT_BINARY_DIR}/mnist.vmfb")
-add_custom_command(
-  OUTPUT "mnist_bytecode_module_c.h" "mnist_bytecode_module_c.c"
-  COMMAND ${_EMBED_DATA_EXECUTABLE} ${_EMBED_ARGS}
-  DEPENDS ${_EMBED_DATA_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/mnist.vmfb
-)
-# Define a library target for mnist_bytecode_module_c.
-add_library(iree_samples_vision_inference_mnist_bytecode_module_c OBJECT)
-target_sources(iree_samples_vision_inference_mnist_bytecode_module_c
-  PRIVATE
-    mnist_bytecode_module_c.h
-    mnist_bytecode_module_c.c
-)
-
-# Define the sample executable.
-set(_NAME "iree-run-mnist-module")
-add_executable(${_NAME} "")
-target_sources(${_NAME}
-  PRIVATE
-    "image_util.h"
-    "image_util.c"
-    "iree-run-mnist-module.c"
-)
-set_target_properties(${_NAME} PROPERTIES OUTPUT_NAME "iree-run-mnist-module")
-target_include_directories(${_NAME} PUBLIC
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
-)
-target_include_directories(${_NAME} PRIVATE
-    ${Stb_INCLUDE_DIR}
-)
-target_link_libraries(${_NAME}
-  iree_base_base
-  iree_base_tracing
-  iree_hal_hal
-  iree_runtime_runtime
-  iree_samples_vision_inference_mnist_bytecode_module_c
-)
-
-# Define a target that copies the test image into the build directory.
-add_custom_target(iree_samples_vision_inference_test_image
-  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/mnist_test.png" "${CMAKE_CURRENT_BINARY_DIR}/mnist_test.png")
-add_dependencies(${_NAME} iree_samples_vision_inference_test_image)
-
-message(STATUS "Configured vision_inference sample successfully")
--- a/cpp/vision_inference/README.md
+++ b/cpp/vision_inference/README.md
@@ -1,8 +0,0 @@
-# Vision Inference Sample (C code)
-
-This sample demonstrates how to run a MNIST handwritten digit detection vision
-model on an image using IREE's C API.
-
-A similar sample is implemented using a Python script and IREE's command line
-tools over in the primary iree repository at
-https://github.com/iree-org/iree/tree/main/samples/vision_inference
--- a/cpp/vision_inference/image_util.c
+++ b/cpp/vision_inference/image_util.c
@@ -1,224 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#include "image_util.h"
-
-#include <math.h>
-
-#include "iree/base/internal/flags.h"
-#include "iree/base/tracing.h"
-
-#define STB_IMAGE_IMPLEMENTATION
-#include "stb_image.h"
-
-iree_status_t iree_tools_utils_pixel_rescaled_to_buffer(
-    const uint8_t* pixel_data, iree_host_size_t buffer_length,
-    const float* input_range, iree_host_size_t range_length,
-    float* out_buffer) {
-  IREE_TRACE_ZONE_BEGIN(z0);
-  if (range_length != 2) {
-    IREE_TRACE_ZONE_END(z0);
-    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                            "range defined as 2-element [min, max] array.");
-  }
-  float input_scale = fabsf(input_range[1] - input_range[0]) / 2.0f;
-  float input_offset = (input_range[0] + input_range[1]) / 2.0f;
-  const float kUint8Mean = 127.5f;
-  for (int i = 0; i < buffer_length; ++i) {
-    out_buffer[i] =
-        (((float)(pixel_data[i])) - kUint8Mean) / kUint8Mean * input_scale +
-        input_offset;
-  }
-  IREE_TRACE_ZONE_END(z0);
-  return iree_ok_status();
-}
-
-iree_status_t iree_tools_utils_load_pixel_data_impl(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    uint8_t** out_pixel_data, iree_host_size_t* out_buffer_length) {
-  int img_dims[3];
-  if (stbi_info(filename.data, img_dims, &(img_dims[1]), &(img_dims[2])) == 0) {
-    return iree_make_status(IREE_STATUS_NOT_FOUND, "can't load image %.*s",
-                            (int)filename.size, filename.data);
-  }
-  if (!(element_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32 ||
-        element_type == IREE_HAL_ELEMENT_TYPE_SINT_8 ||
-        element_type == IREE_HAL_ELEMENT_TYPE_UINT_8)) {
-    char element_type_str[16];
-    IREE_RETURN_IF_ERROR(iree_hal_format_element_type(
-        element_type, sizeof(element_type_str), element_type_str, NULL));
-    return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
-                            "element type %s not supported", element_type_str);
-  }
-  switch (shape_rank) {
-    case 2: {  // Assume tensor <height x width>
-      if (img_dims[2] != 1 || (shape[0] != img_dims[1]) ||
-          (shape[1] != img_dims[0])) {
-        return iree_make_status(
-            IREE_STATUS_INVALID_ARGUMENT,
-            "image size: %dx%dx%d, expected: %" PRIdim "x%" PRIdim, img_dims[0],
-            img_dims[1], img_dims[2], shape[1], shape[0]);
-      }
-      break;
-    }
-    case 3: {  // Assume tensor <height x width x channel>
-      if (shape[0] != img_dims[1] || shape[1] != img_dims[0] ||
-          shape[2] != img_dims[2]) {
-        return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                                "image size: %dx%dx%d, expected: %" PRIdim
-                                "x%" PRIdim "x%" PRIdim,
-                                img_dims[0], img_dims[1], img_dims[2], shape[1],
-                                shape[0], shape[2]);
-      }
-      break;
-    }
-    case 4: {  // Assume tensor <batch x height x width x channel>
-      if (shape[1] != img_dims[1] || shape[2] != img_dims[0] ||
-          shape[3] != img_dims[2]) {
-        return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                                "image size: %dx%dx%d, expected: %" PRIdim
-                                "x%" PRIdim "x%" PRIdim,
-                                img_dims[0], img_dims[1], img_dims[2], shape[2],
-                                shape[1], shape[3]);
-      }
-      break;
-    }
-    default:
-      return iree_make_status(
-          IREE_STATUS_INVALID_ARGUMENT,
-          "Input buffer shape rank %" PRIhsz " not supported", shape_rank);
-  }
-  // Drop the alpha channel if present.
-  int req_ch = (img_dims[2] >= 3) ? 3 : 0;
-  *out_pixel_data = stbi_load(filename.data, img_dims, &(img_dims[1]),
-                              &(img_dims[2]), req_ch);
-  if (*out_pixel_data == NULL) {
-    return iree_make_status(IREE_STATUS_NOT_FOUND, "can't load image %.*s",
-                            (int)filename.size, filename.data);
-  }
-  *out_buffer_length =
-      img_dims[0] * img_dims[1] * (img_dims[2] > 3 ? 3 : img_dims[2]);
-  return iree_ok_status();
-}
-
-iree_status_t iree_tools_utils_load_pixel_data(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    uint8_t** out_pixel_data, iree_host_size_t* out_buffer_length) {
-  IREE_TRACE_ZONE_BEGIN(z0);
-  iree_status_t result = iree_tools_utils_load_pixel_data_impl(
-      filename, shape, shape_rank, element_type, out_pixel_data,
-      out_buffer_length);
-  IREE_TRACE_ZONE_END(z0);
-  return result;
-}
-
-iree_status_t iree_tools_utils_buffer_view_from_image(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_allocator_t* allocator, iree_hal_buffer_view_t** out_buffer_view) {
-  IREE_TRACE_ZONE_BEGIN(z0);
-  *out_buffer_view = NULL;
-  if (element_type != IREE_HAL_ELEMENT_TYPE_SINT_8 &&
-      element_type != IREE_HAL_ELEMENT_TYPE_UINT_8) {
-    IREE_TRACE_ZONE_END(z0);
-    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                            "element type should be i8 or u8");
-  }
-
-  iree_status_t result;
-  uint8_t* pixel_data = NULL;
-  iree_host_size_t buffer_length;
-  result = iree_tools_utils_load_pixel_data(
-      filename, shape, shape_rank, element_type, &pixel_data, &buffer_length);
-  if (iree_status_is_ok(result)) {
-    iree_host_size_t element_byte =
-        iree_hal_element_dense_byte_count(element_type);
-    // SINT_8 and UINT_8 perform direct buffer wrap.
-    result = iree_hal_buffer_view_allocate_buffer(
-        allocator, shape_rank, shape, element_type,
-        IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
-        (iree_hal_buffer_params_t){
-            .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,
-            .access = IREE_HAL_MEMORY_ACCESS_READ,
-            .usage = IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE |
-                     IREE_HAL_BUFFER_USAGE_TRANSFER,
-        },
-        iree_make_const_byte_span(pixel_data, element_byte * buffer_length),
-        out_buffer_view);
-  }
-  stbi_image_free(pixel_data);
-  IREE_TRACE_ZONE_END(z0);
-  return result;
-}
-
-typedef struct iree_tools_utils_buffer_view_load_params_t {
-  const uint8_t* pixel_data;
-  iree_host_size_t pixel_data_length;
-  const float* input_range;
-  iree_host_size_t input_range_length;
-} iree_tools_utils_buffer_view_load_params_t;
-static iree_status_t iree_tools_utils_buffer_view_load_image_rescaled(
-    iree_hal_buffer_mapping_t* mapping, void* user_data) {
-  iree_tools_utils_buffer_view_load_params_t* params =
-      (iree_tools_utils_buffer_view_load_params_t*)user_data;
-  return iree_tools_utils_pixel_rescaled_to_buffer(
-      params->pixel_data, params->pixel_data_length, params->input_range,
-      params->input_range_length, (float*)mapping->contents.data);
-}
-
-iree_status_t iree_tools_utils_buffer_view_from_image_rescaled(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_allocator_t* allocator, const float* input_range,
-    iree_host_size_t input_range_length,
-    iree_hal_buffer_view_t** out_buffer_view) {
-  IREE_TRACE_ZONE_BEGIN(z0);
-  *out_buffer_view = NULL;
-  if (element_type != IREE_HAL_ELEMENT_TYPE_FLOAT_32) {
-    IREE_TRACE_ZONE_END(z0);
-    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                            "element type should be f32");
-  }
-
-  // Classic row-major image layout.
-  iree_hal_encoding_type_t encoding_type =
-      IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR;
-
-  // Load pixel data from the file into a new host memory allocation (the only
-  // interface stb_image provides). A real application would want to use the
-  // generation callback to directly decode the image into the target mapped
-  // device buffer.
-  uint8_t* pixel_data = NULL;
-  iree_host_size_t buffer_length = 0;
-  IREE_RETURN_AND_END_ZONE_IF_ERROR(
-      z0, iree_tools_utils_load_pixel_data(filename, shape, shape_rank,
-                                           element_type, &pixel_data,
-                                           &buffer_length));
-
-  iree_tools_utils_buffer_view_load_params_t params = {
-      .pixel_data = pixel_data,
-      .pixel_data_length = buffer_length,
-      .input_range = input_range,
-      .input_range_length = input_range_length,
-  };
-  iree_status_t status = iree_hal_buffer_view_generate_buffer(
-      allocator, shape_rank, shape, element_type, encoding_type,
-      (iree_hal_buffer_params_t){
-          .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL |
-                  IREE_HAL_MEMORY_TYPE_HOST_VISIBLE,
-          .usage = IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE |
-                   IREE_HAL_BUFFER_USAGE_TRANSFER |
-                   IREE_HAL_BUFFER_USAGE_MAPPING,
-      },
-      iree_tools_utils_buffer_view_load_image_rescaled, &params,
-      out_buffer_view);
-
-  stbi_image_free(pixel_data);
-  IREE_TRACE_ZONE_END(z0);
-  return status;
-}
--- a/cpp/vision_inference/image_util.h
+++ b/cpp/vision_inference/image_util.h
@@ -1,77 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef IREE_SAMPLES_VISION_INFERENCE_IMAGE_UTIL_H_
-#define IREE_SAMPLES_VISION_INFERENCE_IMAGE_UTIL_H_
-
-#include "iree/base/api.h"
-#include "iree/hal/api.h"
-#include "iree/hal/buffer_view.h"
-
-#if __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-// Loads the image at |filename| into |out_pixel_data| and sets
-// |out_buffer_length| to its length.
-//
-// The image dimension must match the width, height, and channel in|shape|,
-// while 2 <= |shape_rank| <= 4 to match the image tensor format.
-//
-// The file must be in a format supported by stb_image.h.
-// The returned |out_pixel_data| buffer must be released by the caller.
-iree_status_t iree_tools_utils_load_pixel_data(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    uint8_t** out_pixel_data, iree_host_size_t* out_buffer_length);
-
-// Parse the content in an image file in |filename| into a HAL buffer view
-// |out_buffer_view|. |out_buffer_view| properties are defined by |shape|,
-// |shape_rank|, and |element_type|, while being allocated by |allocator|.
-//
-// The |element_type| has to be SINT_8 or UINT_8. For FLOAT_32, use
-// |iree_tools_utils_buffer_view_from_image_rescaled| instead.
-//
-// The returned |out_buffer_view| must be released by the caller.
-iree_status_t iree_tools_utils_buffer_view_from_image(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_allocator_t* allocator, iree_hal_buffer_view_t** out_buffer_view);
-
-// Parse the content in an image file in |filename| into a HAL buffer view
-// |out_buffer_view|. |out_buffer_view| properties are defined by |shape|,
-// |shape_rank|, and |element_type|, while being allocated by |allocator|.
-// The value in |out_buffer_view| is rescaled with |input_range|.
-//
-// The |element_type| has to be FLOAT_32, For SINT_8 or UINT_8, use
-// |iree_tools_utils_buffer_view_from_image| instead.
-//
-// The returned |out_buffer_view| must be released by the caller.
-iree_status_t iree_tools_utils_buffer_view_from_image_rescaled(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_allocator_t* allocator, const float* input_range,
-    iree_host_size_t input_range_length,
-    iree_hal_buffer_view_t** out_buffer_view);
-
-// Normalize uint8_t |pixel_data| of the size |buffer_length| to float buffer
-// |out_buffer| with the range |input_range|.
-//
-// float32_x = (uint8_x - 127.5) / 127.5 * input_scale + input_offset, where
-// input_scale = abs(|input_range[0]| - |input_range[1]| / 2
-// input_offset = |input_range[0]| + |input_range[1]| / 2
-//
-// |out_buffer| needs to be allocated before the call.
-iree_status_t iree_tools_utils_pixel_rescaled_to_buffer(
-    const uint8_t* pixel_data, iree_host_size_t pixel_count,
-    const float* input_range, iree_host_size_t input_range_length,
-    float* out_buffer);
-
-#if __cplusplus
-}
-#endif  // __cplusplus
-
-#endif  // IREE_SAMPLES_VISION_INFERENCE_IMAGE_UTIL_H_
--- a/cpp/vision_inference/iree-run-mnist-module.c
+++ b/cpp/vision_inference/iree-run-mnist-module.c
@@ -1,121 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-// This sample uses image_util to load a hand-written image as an
-// iree_hal_buffer_view_t then passes it to the bytecode module built from
-// mnist.mlir on the CPU backend with the local-task driver.
-
-#include <float.h>
-
-#include "image_util.h"
-#include "iree/runtime/api.h"
-#include "mnist_bytecode_module_c.h"
-
-iree_status_t Run(const iree_string_view_t image_path) {
-  iree_runtime_instance_options_t instance_options;
-  iree_runtime_instance_options_initialize(IREE_API_VERSION_LATEST,
-                                           &instance_options);
-  iree_runtime_instance_options_use_all_available_drivers(&instance_options);
-  iree_runtime_instance_t* instance = NULL;
-  IREE_RETURN_IF_ERROR(iree_runtime_instance_create(
-      &instance_options, iree_allocator_system(), &instance));
-
-  // TODO(#5724): move device selection into the compiled modules.
-  iree_hal_device_t* device = NULL;
-  IREE_RETURN_IF_ERROR(iree_runtime_instance_try_create_default_device(
-      instance, iree_make_cstring_view("local-task"), &device));
-
-  // Create one session per loaded module to hold the module state.
-  iree_runtime_session_options_t session_options;
-  iree_runtime_session_options_initialize(&session_options);
-  iree_runtime_session_t* session = NULL;
-  IREE_RETURN_IF_ERROR(iree_runtime_session_create_with_device(
-      instance, &session_options, device,
-      iree_runtime_instance_host_allocator(instance), &session));
-  iree_hal_device_release(device);
-
-  const struct iree_file_toc_t* module_file =
-      iree_samples_vision_inference_mnist_bytecode_module_create();
-
-  IREE_RETURN_IF_ERROR(iree_runtime_session_append_bytecode_module_from_memory(
-      session, iree_make_const_byte_span(module_file->data, module_file->size),
-      iree_allocator_null()));
-
-  iree_runtime_call_t call;
-  IREE_RETURN_IF_ERROR(iree_runtime_call_initialize_by_name(
-      session, iree_make_cstring_view("module.predict"), &call));
-
-  // Prepare the input hal buffer view with image_util library.
-  // The input of the mmist model is single 28x28 pixel image as a
-  // tensor<1x28x28x1xf32>, with pixels in [0.0, 1.0].
-  iree_hal_buffer_view_t* buffer_view = NULL;
-  iree_hal_dim_t buffer_shape[] = {1, 28, 28, 1};
-  iree_hal_element_type_t hal_element_type = IREE_HAL_ELEMENT_TYPE_FLOAT_32;
-  float input_range[2] = {0.0f, 1.0f};
-  IREE_RETURN_IF_ERROR(
-      iree_tools_utils_buffer_view_from_image_rescaled(
-          image_path, buffer_shape, IREE_ARRAYSIZE(buffer_shape),
-          hal_element_type, iree_hal_device_allocator(device), input_range,
-          IREE_ARRAYSIZE(input_range), &buffer_view),
-      "load image");
-  IREE_RETURN_IF_ERROR(
-      iree_runtime_call_inputs_push_back_buffer_view(&call, buffer_view));
-  iree_hal_buffer_view_release(buffer_view);
-
-  IREE_RETURN_IF_ERROR(iree_runtime_call_invoke(&call, /*flags=*/0));
-
-  // Get the result buffers from the invocation.
-  iree_hal_buffer_view_t* ret_buffer_view = NULL;
-  IREE_RETURN_IF_ERROR(
-      iree_runtime_call_outputs_pop_front_buffer_view(&call, &ret_buffer_view));
-
-  // Read back the results. The output of the mnist model is a 1x10 prediction
-  // confidence values for each digit in [0, 9].
-  float predictions[1 * 10] = {0.0f};
-  IREE_RETURN_IF_ERROR(iree_hal_device_transfer_d2h(
-      iree_runtime_session_device(session),
-      iree_hal_buffer_view_buffer(ret_buffer_view), 0, predictions,
-      sizeof(predictions), IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT,
-      iree_infinite_timeout()));
-  iree_hal_buffer_view_release(ret_buffer_view);
-
-  // Get the highest index from the output.
-  float result_val = FLT_MIN;
-  int result_idx = 0;
-  for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(predictions); ++i) {
-    if (predictions[i] > result_val) {
-      result_val = predictions[i];
-      result_idx = i;
-    }
-  }
-  fprintf(stdout, "Detected number: %d\n", result_idx);
-
-  iree_runtime_call_deinitialize(&call);
-  iree_runtime_session_release(session);
-  iree_runtime_instance_release(instance);
-  return iree_ok_status();
-}
-
-int main(int argc, char** argv) {
-  if (argc > 2) {
-    fprintf(stderr, "Usage: iree-run-mnist-module <image file>\n");
-    return -1;
-  }
-  iree_string_view_t image_path;
-  if (argc == 1) {
-    image_path = iree_make_cstring_view("mnist_test.png");
-  } else {
-    image_path = iree_make_cstring_view(argv[1]);
-  }
-  iree_status_t result = Run(image_path);
-  if (!iree_status_is_ok(result)) {
-    iree_status_fprint(stderr, result);
-    iree_status_ignore(result);
-    return -1;
-  }
-  iree_status_ignore(result);
-  return 0;
-}
--- a/cpp/vision_inference/mnist_test.png
+++ b/cpp/vision_inference/mnist_test.png
--- a/cpp/vulkan_gui/CMakeLists.txt
+++ b/cpp/vulkan_gui/CMakeLists.txt
@@ -1,116 +0,0 @@
-# Copyright 2022 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-if(NOT IREE_TARGET_BACKEND_VULKAN_SPIRV OR
-   NOT IREE_HAL_DRIVER_VULKAN)
-  message(STATUS "Missing Vulkan backend and/or driver, skipping vulkan_gui sample")
-  return()
-endif()
-
-# This target statically links against Vulkan.
-# One way to achieve this is by installing the Vulkan SDK from
-# https://vulkan.lunarg.com/.
-include(FindVulkan)
-if(NOT Vulkan_FOUND)
-  message(STATUS "Could not find Vulkan, skipping vulkan_gui sample")
-  return()
-endif()
-
-# vcpkg install sdl2[vulkan]
-#   tested with versions 2.0.14#4 - 2.0.22#1
-find_package(SDL2)
-if(NOT SDL2_FOUND)
-  message(STATUS "Could not find SDL2, skipping vulkan_gui sample")
-  return()
-endif()
-
-FetchContent_Declare(
-  imgui
-  GIT_REPOSITORY https://github.com/ocornut/imgui
-  GIT_TAG        master
-)
-
-FetchContent_MakeAvailable(imgui)
-
-# Dear ImGui
-set(IMGUI_DIR ${CMAKE_BINARY_DIR}/_deps/imgui-src)
-message("Looking for Imgui in ${IMGUI_DIR}")
-include_directories(${IMGUI_DIR} ${IMGUI_DIR}/backends ..)
-
-
-function(iree_vulkan_sample)
-
-  cmake_parse_arguments(
-    _RULE
-    ""
-    "NAME"
-    "SRCS"
-    ${ARGN}
-  )
-
-
-  # Define the sample executable.
-  set(_NAME "${_RULE_NAME}")
-  set(SRCS "${_RULE_SRCS}")
-  add_executable(${_NAME} "")
-  target_sources(${_NAME}
-    PRIVATE
-      ${SRCS}
-      "${IMGUI_DIR}/backends/imgui_impl_sdl.cpp"
-      "${IMGUI_DIR}/backends/imgui_impl_vulkan.cpp"
-      "${IMGUI_DIR}/imgui.cpp"
-      "${IMGUI_DIR}/imgui_draw.cpp"
-      "${IMGUI_DIR}/imgui_demo.cpp"
-      "${IMGUI_DIR}/imgui_tables.cpp"
-      "${IMGUI_DIR}/imgui_widgets.cpp"
-  )
-  set_target_properties(${_NAME} PROPERTIES OUTPUT_NAME "${_NAME}")
-  target_include_directories(${_NAME} PUBLIC
-      $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
-  )
-  target_link_libraries(${_NAME}
-    SDL2::SDL2
-    Vulkan::Vulkan
-    iree_runtime_runtime
-    iree_base_internal_main
-    iree_hal_drivers_vulkan_registration_registration
-    iree_modules_hal_hal
-    iree_vm_vm
-    iree_vm_bytecode_module
-    iree_vm_cc
-    iree_tooling_vm_util_cc
-    iree_tooling_context_util
-  )
-
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-    set(_GUI_LINKOPTS "-SUBSYSTEM:CONSOLE")
-  else()
-    set(_GUI_LINKOPTS "")
-  endif()
-
-  target_link_options(${_NAME}
-    PRIVATE
-      ${_GUI_LINKOPTS}
-  )
-endfunction()
-
-iree_vulkan_sample(
-    NAME
-      iree-samples-resnet-vulkan-gui
-
-    SRCS
-      vulkan_resnet_inference_gui.cc
-)
-
-iree_vulkan_sample(
-    NAME
-      iree-vulkan-gui
-
-    SRCS
-      vulkan_inference_gui.cc
-)
-
-message(STATUS "Configured vulkan_gui sample successfully")
--- a/cpp/vulkan_gui/simple_mul.mlir
+++ b/cpp/vulkan_gui/simple_mul.mlir
@@ -1,4 +0,0 @@
-func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> {
-  %0 = "arith.mulf"(%arg0, %arg1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
-  return %0 : tensor<4xf32>
-}
--- a/cpp/vulkan_gui/snail_imagenet.jpg
+++ b/cpp/vulkan_gui/snail_imagenet.jpg
--- a/cpp/vulkan_gui/stb_image.h
+++ b/cpp/vulkan_gui/stb_image.h
--- a/cpp/vulkan_gui/vulkan_inference_gui.cc
+++ b/cpp/vulkan_gui/vulkan_inference_gui.cc
@@ -1,957 +0,0 @@
-// Copyright 2019 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-// Vulkan Graphics + IREE API Integration Sample.
-
-#include <SDL.h>
-#include <SDL_vulkan.h>
-#include <imgui.h>
-#include <imgui_impl_sdl.h>
-#include <imgui_impl_vulkan.h>
-#include <vulkan/vulkan.h>
-
-
-#include <cstring>
-#include <set>
-#include <vector>
-#include <fstream>
-#include <array>
-#include <cstdio>
-#include <cstdlib>
-#include <iterator>
-#include <string>
-#include <utility>
-
-#include "iree/hal/drivers/vulkan/api.h"
-
-// IREE's C API:
-#include "iree/base/api.h"
-#include "iree/hal/api.h"
-#include "iree/hal/drivers/vulkan/registration/driver_module.h"
-#include "iree/modules/hal/module.h"
-#include "iree/vm/api.h"
-#include "iree/vm/bytecode_module.h"
-#include "iree/vm/ref_cc.h"
-
-// iree-run-module
-#include "iree/base/internal/flags.h"
-#include "iree/base/status_cc.h"
-#include "iree/base/tracing.h"
-#include "iree/modules/hal/types.h"
-#include "iree/tooling/comparison.h"
-#include "iree/tooling/context_util.h"
-#include "iree/tooling/vm_util_cc.h"
-
-// Other dependencies (helpers, etc.)
-#include "iree/base/internal/main.h"
-
-#define IMGUI_UNLIMITED_FRAME_RATE
-
-#define STB_IMAGE_IMPLEMENTATION
-#include "stb_image.h"
-
-IREE_FLAG(string, entry_function, "",
-          "Name of a function contained in the module specified by module_file "
-          "to run.");
-
-// TODO(benvanik): move --function_input= flag into a util.
-static iree_status_t parse_function_io(iree_string_view_t flag_name,
-                                       void* storage,
-                                       iree_string_view_t value) {
-  auto* list = (std::vector<std::string>*)storage;
-  list->push_back(std::string(value.data, value.size));
-  return iree_ok_status();
-}
-static void print_function_io(iree_string_view_t flag_name, void* storage,
-                              FILE* file) {
-  auto* list = (std::vector<std::string>*)storage;
-  if (list->empty()) {
-    fprintf(file, "# --%.*s=\n", (int)flag_name.size, flag_name.data);
-  } else {
-    for (size_t i = 0; i < list->size(); ++i) {
-      fprintf(file, "--%.*s=\"%s\"\n", (int)flag_name.size, flag_name.data,
-              list->at(i).c_str());
-    }
-  }
-}
-static std::vector<std::string> FLAG_function_inputs;
-IREE_FLAG_CALLBACK(
-    parse_function_io, print_function_io, &FLAG_function_inputs, function_input,
-    "An input (a) value or (b) buffer of the format:\n"
-    "  (a) scalar value\n"
-    "     value\n"
-    "     e.g.: --function_input=\"3.14\"\n"
-    "  (b) buffer:\n"
-    "     [shape]xtype=[value]\n"
-    "     e.g.: --function_input=\"2x2xi32=1 2 3 4\"\n"
-    "Optionally, brackets may be used to separate the element values:\n"
-    "  2x2xi32=[[1 2][3 4]]\n"
-    "Raw binary files can be read to provide buffer contents:\n"
-    "  2x2xi32=@some/file.bin\n"
-    "numpy npy files (from numpy.save) can be read to provide 1+ values:\n"
-    "  @some.npy\n"
-    "Each occurrence of the flag indicates an input in the order they were\n"
-    "specified on the command line.");
-
-typedef struct iree_file_toc_t {
-  const char* name;             // the file's original name
-  char* data;             // beginning of the file
-  size_t size;                  // length of the file
-} iree_file_toc_t;
-
-bool load_file(const char* filename, char** pOut, size_t* pSize)
-{
-    FILE* f = fopen(filename, "rb");
-    if (f == NULL)
-    {
-        fprintf(stderr, "Can't open %s\n", filename);
-        return false;
-    }
-
-    fseek(f, 0L, SEEK_END);
-    *pSize = ftell(f);
-    fseek(f, 0L, SEEK_SET);
-
-    *pOut = (char*)malloc(*pSize);
-
-    size_t size = fread(*pOut, *pSize, 1, f);
-
-    fclose(f);
-
-    return size != 0;
-}
-
-static VkAllocationCallbacks* g_Allocator = NULL;
-static VkInstance g_Instance = VK_NULL_HANDLE;
-static VkPhysicalDevice g_PhysicalDevice = VK_NULL_HANDLE;
-static VkDevice g_Device = VK_NULL_HANDLE;
-static uint32_t g_QueueFamily = (uint32_t)-1;
-static VkQueue g_Queue = VK_NULL_HANDLE;
-static VkPipelineCache g_PipelineCache = VK_NULL_HANDLE;
-static VkDescriptorPool g_DescriptorPool = VK_NULL_HANDLE;
-
-static ImGui_ImplVulkanH_Window g_MainWindowData;
-static uint32_t g_MinImageCount = 2;
-static bool g_SwapChainRebuild = false;
-static int g_SwapChainResizeWidth = 0;
-static int g_SwapChainResizeHeight = 0;
-
-static void check_vk_result(VkResult err) {
-  if (err == 0) return;
-  fprintf(stderr, "VkResult: %d\n", err);
-  abort();
-}
-
-// Returns the names of the Vulkan layers used for the given IREE
-// |extensibility_set| and |features|.
-std::vector<const char*> GetIreeLayers(
-    iree_hal_vulkan_extensibility_set_t extensibility_set,
-    iree_hal_vulkan_features_t features) {
-  iree_host_size_t required_count;
-  iree_hal_vulkan_query_extensibility_set(
-      features, extensibility_set, /*string_capacity=*/0, &required_count,
-      /*out_string_values=*/NULL);
-  std::vector<const char*> layers(required_count);
-  iree_hal_vulkan_query_extensibility_set(features, extensibility_set,
-                                          layers.size(), &required_count,
-                                          layers.data());
-  return layers;
-}
-
-// Returns the names of the Vulkan extensions used for the given IREE
-// |extensibility_set| and |features|.
-std::vector<const char*> GetIreeExtensions(
-    iree_hal_vulkan_extensibility_set_t extensibility_set,
-    iree_hal_vulkan_features_t features) {
-  iree_host_size_t required_count;
-  iree_hal_vulkan_query_extensibility_set(
-      features, extensibility_set, /*string_capacity=*/0, &required_count,
-      /*out_string_values=*/NULL);
-  std::vector<const char*> extensions(required_count);
-  iree_hal_vulkan_query_extensibility_set(features, extensibility_set,
-                                          extensions.size(), &required_count,
-                                          extensions.data());
-  return extensions;
-}
-
-// Returns the names of the Vulkan extensions used for the given IREE
-// |vulkan_features|.
-std::vector<const char*> GetDeviceExtensions(
-    VkPhysicalDevice physical_device,
-    iree_hal_vulkan_features_t vulkan_features) {
-  std::vector<const char*> iree_required_extensions = GetIreeExtensions(
-      IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_REQUIRED,
-      vulkan_features);
-  std::vector<const char*> iree_optional_extensions = GetIreeExtensions(
-      IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_OPTIONAL,
-      vulkan_features);
-
-  uint32_t extension_count = 0;
-  check_vk_result(vkEnumerateDeviceExtensionProperties(
-      physical_device, nullptr, &extension_count, nullptr));
-  std::vector<VkExtensionProperties> extension_properties(extension_count);
-  check_vk_result(vkEnumerateDeviceExtensionProperties(
-      physical_device, nullptr, &extension_count, extension_properties.data()));
-
-  // Merge extensions lists, including optional and required for simplicity.
-  std::set<const char*> ext_set;
-  ext_set.insert("VK_KHR_swapchain");
-  ext_set.insert(iree_required_extensions.begin(),
-                 iree_required_extensions.end());
-  for (int i = 0; i < iree_optional_extensions.size(); ++i) {
-    const char* optional_extension = iree_optional_extensions[i];
-    for (int j = 0; j < extension_count; ++j) {
-      if (strcmp(optional_extension, extension_properties[j].extensionName) ==
-          0) {
-        ext_set.insert(optional_extension);
-        break;
-      }
-    }
-  }
-  std::vector<const char*> extensions(ext_set.begin(), ext_set.end());
-  return extensions;
-}
-
-std::vector<const char*> GetInstanceLayers(
-    iree_hal_vulkan_features_t vulkan_features) {
-  // Query the layers that IREE wants / needs.
-  std::vector<const char*> required_layers = GetIreeLayers(
-      IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_LAYERS_REQUIRED, vulkan_features);
-  std::vector<const char*> optional_layers = GetIreeLayers(
-      IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_LAYERS_OPTIONAL, vulkan_features);
-
-  // Query the layers that are available on the Vulkan ICD.
-  uint32_t layer_property_count = 0;
-  check_vk_result(
-      vkEnumerateInstanceLayerProperties(&layer_property_count, NULL));
-  std::vector<VkLayerProperties> layer_properties(layer_property_count);
-  check_vk_result(vkEnumerateInstanceLayerProperties(&layer_property_count,
-                                                     layer_properties.data()));
-
-  // Match between optional/required and available layers.
-  std::vector<const char*> layers;
-  for (const char* layer_name : required_layers) {
-    bool found = false;
-    for (const auto& layer_property : layer_properties) {
-      if (std::strcmp(layer_name, layer_property.layerName) == 0) {
-        found = true;
-        layers.push_back(layer_name);
-        break;
-      }
-    }
-    if (!found) {
-      fprintf(stderr, "Required layer %s not available\n", layer_name);
-      abort();
-    }
-  }
-  for (const char* layer_name : optional_layers) {
-    for (const auto& layer_property : layer_properties) {
-      if (std::strcmp(layer_name, layer_property.layerName) == 0) {
-        layers.push_back(layer_name);
-        break;
-      }
-    }
-  }
-
-  return layers;
-}
-
-std::vector<const char*> GetInstanceExtensions(
-    SDL_Window* window, iree_hal_vulkan_features_t vulkan_features) {
-  // Ask SDL for its list of required instance extensions.
-  uint32_t sdl_extensions_count = 0;
-  SDL_Vulkan_GetInstanceExtensions(window, &sdl_extensions_count, NULL);
-  std::vector<const char*> sdl_extensions(sdl_extensions_count);
-  SDL_Vulkan_GetInstanceExtensions(window, &sdl_extensions_count,
-                                   sdl_extensions.data());
-
-  std::vector<const char*> iree_required_extensions = GetIreeExtensions(
-      IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_EXTENSIONS_REQUIRED,
-      vulkan_features);
-  std::vector<const char*> iree_optional_extensions = GetIreeExtensions(
-      IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_EXTENSIONS_OPTIONAL,
-      vulkan_features);
-
-  // Merge extensions lists, including optional and required for simplicity.
-  std::set<const char*> ext_set;
-  ext_set.insert(sdl_extensions.begin(), sdl_extensions.end());
-  ext_set.insert(iree_required_extensions.begin(),
-                 iree_required_extensions.end());
-  ext_set.insert(iree_optional_extensions.begin(),
-                 iree_optional_extensions.end());
-  std::vector<const char*> extensions(ext_set.begin(), ext_set.end());
-  return extensions;
-}
-
-void SetupVulkan(iree_hal_vulkan_features_t vulkan_features,
-                 const char** instance_layers, uint32_t instance_layers_count,
-                 const char** instance_extensions,
-                 uint32_t instance_extensions_count,
-                 const VkAllocationCallbacks* allocator, VkInstance* instance,
-                 uint32_t* queue_family_index,
-                 VkPhysicalDevice* physical_device, VkQueue* queue,
-                 VkDevice* device, VkDescriptorPool* descriptor_pool) {
-  VkResult err;
-
-  // Create Vulkan Instance
-  {
-    VkInstanceCreateInfo create_info = {};
-    create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
-    create_info.enabledLayerCount = instance_layers_count;
-    create_info.ppEnabledLayerNames = instance_layers;
-    create_info.enabledExtensionCount = instance_extensions_count;
-    create_info.ppEnabledExtensionNames = instance_extensions;
-    err = vkCreateInstance(&create_info, allocator, instance);
-    check_vk_result(err);
-  }
-
-  // Select GPU
-  {
-    uint32_t gpu_count;
-    err = vkEnumeratePhysicalDevices(*instance, &gpu_count, NULL);
-    check_vk_result(err);
-    IM_ASSERT(gpu_count > 0);
-
-    VkPhysicalDevice* gpus =
-        (VkPhysicalDevice*)malloc(sizeof(VkPhysicalDevice) * gpu_count);
-    err = vkEnumeratePhysicalDevices(*instance, &gpu_count, gpus);
-    check_vk_result(err);
-
-    // Use the first reported GPU for simplicity.
-    *physical_device = gpus[0];
-
-    VkPhysicalDeviceProperties properties;
-    vkGetPhysicalDeviceProperties(*physical_device, &properties);
-    fprintf(stdout, "Selected Vulkan device: '%s'\n", properties.deviceName);
-    free(gpus);
-  }
-
-  // Select queue family. We want a single queue with graphics and compute for
-  // simplicity, but we could also discover and use separate queues for each.
-  {
-    uint32_t count;
-    vkGetPhysicalDeviceQueueFamilyProperties(*physical_device, &count, NULL);
-    VkQueueFamilyProperties* queues = (VkQueueFamilyProperties*)malloc(
-        sizeof(VkQueueFamilyProperties) * count);
-    vkGetPhysicalDeviceQueueFamilyProperties(*physical_device, &count, queues);
-    for (uint32_t i = 0; i < count; i++) {
-      if (queues[i].queueFlags &
-          (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) {
-        *queue_family_index = i;
-        break;
-      }
-    }
-    free(queues);
-    IM_ASSERT(*queue_family_index != (uint32_t)-1);
-  }
-
-  // Create Logical Device (with 1 queue)
-  {
-    std::vector<const char*> device_extensions =
-        GetDeviceExtensions(*physical_device, vulkan_features);
-    const float queue_priority[] = {1.0f};
-    VkDeviceQueueCreateInfo queue_info = {};
-    queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
-    queue_info.queueFamilyIndex = *queue_family_index;
-    queue_info.queueCount = 1;
-    queue_info.pQueuePriorities = queue_priority;
-    VkDeviceCreateInfo create_info = {};
-    create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
-    create_info.queueCreateInfoCount = 1;
-    create_info.pQueueCreateInfos = &queue_info;
-    create_info.enabledExtensionCount =
-        static_cast<uint32_t>(device_extensions.size());
-    create_info.ppEnabledExtensionNames = device_extensions.data();
-
-    // Enable timeline semaphores.
-    VkPhysicalDeviceFeatures2 features2;
-    memset(&features2, 0, sizeof(features2));
-    features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
-    create_info.pNext = &features2;
-    VkPhysicalDeviceTimelineSemaphoreFeatures semaphore_features;
-    memset(&semaphore_features, 0, sizeof(semaphore_features));
-    semaphore_features.sType =
-        VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES;
-    semaphore_features.pNext = features2.pNext;
-    features2.pNext = &semaphore_features;
-    semaphore_features.timelineSemaphore = VK_TRUE;
-
-    err = vkCreateDevice(*physical_device, &create_info, allocator, device);
-    check_vk_result(err);
-    vkGetDeviceQueue(*device, *queue_family_index, 0, queue);
-  }
-
-  // Create Descriptor Pool
-  {
-    VkDescriptorPoolSize pool_sizes[] = {
-        {VK_DESCRIPTOR_TYPE_SAMPLER, 1000},
-        {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1000},
-        {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1000},
-        {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1000},
-        {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1000},
-        {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1000},
-        {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1000},
-        {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1000},
-        {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1000},
-        {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, 1000},
-        {VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 1000}};
-    VkDescriptorPoolCreateInfo pool_info = {};
-    pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
-    pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
-    pool_info.maxSets = 1000 * IREE_ARRAYSIZE(pool_sizes);
-    pool_info.poolSizeCount = (uint32_t)IREE_ARRAYSIZE(pool_sizes);
-    pool_info.pPoolSizes = pool_sizes;
-    err =
-        vkCreateDescriptorPool(*device, &pool_info, allocator, descriptor_pool);
-    check_vk_result(err);
-  }
-}
-
-void SetupVulkanWindow(ImGui_ImplVulkanH_Window* wd,
-                       const VkAllocationCallbacks* allocator,
-                       VkInstance instance, uint32_t queue_family_index,
-                       VkPhysicalDevice physical_device, VkDevice device,
-                       VkSurfaceKHR surface, int width, int height,
-                       uint32_t min_image_count) {
-  wd->Surface = surface;
-
-  // Check for WSI support
-  VkBool32 res;
-  vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, queue_family_index,
-                                       wd->Surface, &res);
-  if (res != VK_TRUE) {
-    fprintf(stderr, "Error no WSI support on physical device 0\n");
-    exit(-1);
-  }
-
-  // Select Surface Format
-  const VkFormat requestSurfaceImageFormat[] = {
-      VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_R8G8B8A8_UNORM,
-      VK_FORMAT_B8G8R8_UNORM, VK_FORMAT_R8G8B8_UNORM};
-  const VkColorSpaceKHR requestSurfaceColorSpace =
-      VK_COLORSPACE_SRGB_NONLINEAR_KHR;
-  wd->SurfaceFormat = ImGui_ImplVulkanH_SelectSurfaceFormat(
-      physical_device, wd->Surface, requestSurfaceImageFormat,
-      (size_t)IREE_ARRAYSIZE(requestSurfaceImageFormat),
-      requestSurfaceColorSpace);
-
-  // Select Present Mode
-#ifdef IMGUI_UNLIMITED_FRAME_RATE
-  VkPresentModeKHR present_modes[] = {VK_PRESENT_MODE_MAILBOX_KHR,
-                                      VK_PRESENT_MODE_IMMEDIATE_KHR,
-                                      VK_PRESENT_MODE_FIFO_KHR};
-#else
-  VkPresentModeKHR present_modes[] = {VK_PRESENT_MODE_FIFO_KHR};
-#endif
-  wd->PresentMode = ImGui_ImplVulkanH_SelectPresentMode(
-      physical_device, wd->Surface, &present_modes[0],
-      IREE_ARRAYSIZE(present_modes));
-
-  // Create SwapChain, RenderPass, Framebuffer, etc.
-  IM_ASSERT(min_image_count >= 2);
-  ImGui_ImplVulkanH_CreateOrResizeWindow(instance, physical_device, device, wd,
-                                         queue_family_index, allocator, width,
-                                         height, min_image_count);
-
-  // Set clear color.
-  ImVec4 clear_color = ImVec4(0.45f, 0.55f, 0.60f, 1.00f);
-  memcpy(&wd->ClearValue.color.float32[0], &clear_color, 4 * sizeof(float));
-}
-
-void RenderFrame(ImGui_ImplVulkanH_Window* wd, VkDevice device, VkQueue queue) {
-  VkResult err;
-
-  VkSemaphore image_acquired_semaphore =
-      wd->FrameSemaphores[wd->SemaphoreIndex].ImageAcquiredSemaphore;
-  VkSemaphore render_complete_semaphore =
-      wd->FrameSemaphores[wd->SemaphoreIndex].RenderCompleteSemaphore;
-  err = vkAcquireNextImageKHR(device, wd->Swapchain, UINT64_MAX,
-                              image_acquired_semaphore, VK_NULL_HANDLE,
-                              &wd->FrameIndex);
-  check_vk_result(err);
-
-  ImGui_ImplVulkanH_Frame* fd = &wd->Frames[wd->FrameIndex];
-  {
-    err = vkWaitForFences(
-        device, 1, &fd->Fence, VK_TRUE,
-        UINT64_MAX);  // wait indefinitely instead of periodically checking
-    check_vk_result(err);
-
-    err = vkResetFences(device, 1, &fd->Fence);
-    check_vk_result(err);
-  }
-  {
-    err = vkResetCommandPool(device, fd->CommandPool, 0);
-    check_vk_result(err);
-    VkCommandBufferBeginInfo info = {};
-    info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
-    info.flags |= VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
-    err = vkBeginCommandBuffer(fd->CommandBuffer, &info);
-    check_vk_result(err);
-  }
-  {
-    VkRenderPassBeginInfo info = {};
-    info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
-    info.renderPass = wd->RenderPass;
-    info.framebuffer = fd->Framebuffer;
-    info.renderArea.extent.width = wd->Width;
-    info.renderArea.extent.height = wd->Height;
-    info.clearValueCount = 1;
-    info.pClearValues = &wd->ClearValue;
-    vkCmdBeginRenderPass(fd->CommandBuffer, &info, VK_SUBPASS_CONTENTS_INLINE);
-  }
-
-  // Record Imgui Draw Data and draw funcs into command buffer
-  ImGui_ImplVulkan_RenderDrawData(ImGui::GetDrawData(), fd->CommandBuffer);
-
-  // Submit command buffer
-  vkCmdEndRenderPass(fd->CommandBuffer);
-  {
-    VkPipelineStageFlags wait_stage =
-        VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
-    VkSubmitInfo info = {};
-    info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-    info.waitSemaphoreCount = 1;
-    info.pWaitSemaphores = &image_acquired_semaphore;
-    info.pWaitDstStageMask = &wait_stage;
-    info.commandBufferCount = 1;
-    info.pCommandBuffers = &fd->CommandBuffer;
-    info.signalSemaphoreCount = 1;
-    info.pSignalSemaphores = &render_complete_semaphore;
-
-    err = vkEndCommandBuffer(fd->CommandBuffer);
-    check_vk_result(err);
-    err = vkQueueSubmit(queue, 1, &info, fd->Fence);
-    check_vk_result(err);
-  }
-}
-
-void PresentFrame(ImGui_ImplVulkanH_Window* wd, VkQueue queue) {
-  VkSemaphore render_complete_semaphore =
-      wd->FrameSemaphores[wd->SemaphoreIndex].RenderCompleteSemaphore;
-  VkPresentInfoKHR info = {};
-  info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
-  info.waitSemaphoreCount = 1;
-  info.pWaitSemaphores = &render_complete_semaphore;
-  info.swapchainCount = 1;
-  info.pSwapchains = &wd->Swapchain;
-  info.pImageIndices = &wd->FrameIndex;
-  VkResult err = vkQueuePresentKHR(queue, &info);
-  check_vk_result(err);
-  wd->SemaphoreIndex =
-      (wd->SemaphoreIndex + 1) %
-      wd->ImageCount;  // Now we can use the next set of semaphores
-}
-
-static void CleanupVulkan() {
-  vkDestroyDescriptorPool(g_Device, g_DescriptorPool, g_Allocator);
-
-  vkDestroyDevice(g_Device, g_Allocator);
-  vkDestroyInstance(g_Instance, g_Allocator);
-}
-
-static void CleanupVulkanWindow() {
-  ImGui_ImplVulkanH_DestroyWindow(g_Instance, g_Device, &g_MainWindowData,
-                                  g_Allocator);
-}
-
-namespace iree {
-
-extern "C" int iree_main(int argc, char** argv) {
-
-  iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_DEFAULT, &argc, &argv);
-  if (argc > 1) {
-    // Avoid iree-run-module spinning endlessly on stdin if the user uses single
-    // dashes for flags.
-    printf(
-        "[ERROR] unexpected positional argument (expected none)."
-        " Did you use pass a flag with a single dash ('-')?"
-        " Use '--' instead.\n");
-    return 1;
-  }
-
-  // --------------------------------------------------------------------------
-  // Create a window.
-  if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_TIMER) != 0) {
-    fprintf(stderr, "Failed to initialize SDL\n");
-    abort();
-    return 1;
-  }
-
-  // Setup window
-  // clang-format off
-  SDL_WindowFlags window_flags = (SDL_WindowFlags)(
-      SDL_WINDOW_VULKAN | SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI);
-  // clang-format on
-  SDL_Window* window = SDL_CreateWindow(
-      "IREE Samples - Vulkan Inference GUI", SDL_WINDOWPOS_CENTERED,
-      SDL_WINDOWPOS_CENTERED, 1280, 720, window_flags);
-  if (window == nullptr)
-  {
-    const char* sdl_err = SDL_GetError();
-    fprintf(stderr, "Error, SDL_CreateWindow returned: %s\n", sdl_err);
-    abort();
-    return 1;
-  }
-
-  // Setup Vulkan
-  iree_hal_vulkan_features_t iree_vulkan_features =
-      static_cast<iree_hal_vulkan_features_t>(
-          IREE_HAL_VULKAN_FEATURE_ENABLE_VALIDATION_LAYERS |
-          IREE_HAL_VULKAN_FEATURE_ENABLE_DEBUG_UTILS);
-  std::vector<const char*> layers = GetInstanceLayers(iree_vulkan_features);
-  std::vector<const char*> extensions =
-      GetInstanceExtensions(window, iree_vulkan_features);
-  SetupVulkan(iree_vulkan_features, layers.data(),
-              static_cast<uint32_t>(layers.size()), extensions.data(),
-              static_cast<uint32_t>(extensions.size()), g_Allocator,
-              &g_Instance, &g_QueueFamily, &g_PhysicalDevice, &g_Queue,
-              &g_Device, &g_DescriptorPool);
-
-  // Create Window Surface
-  VkSurfaceKHR surface;
-  VkResult err;
-  if (SDL_Vulkan_CreateSurface(window, g_Instance, &surface) == 0) {
-    fprintf(stderr, "Failed to create Vulkan surface.\n");
-    abort();
-    return 1;
-  }
-
-  // Create Framebuffers
-  int w, h;
-  SDL_GetWindowSize(window, &w, &h);
-  ImGui_ImplVulkanH_Window* wd = &g_MainWindowData;
-  SetupVulkanWindow(wd, g_Allocator, g_Instance, g_QueueFamily,
-                    g_PhysicalDevice, g_Device, surface, w, h, g_MinImageCount);
-
-  // Setup Dear ImGui context
-  IMGUI_CHECKVERSION();
-  ImGui::CreateContext();
-  ImGuiIO& io = ImGui::GetIO();
-  (void)io;
-
-  ImGui::StyleColorsDark();
-
-  // Setup Platform/Renderer bindings
-  ImGui_ImplSDL2_InitForVulkan(window);
-  ImGui_ImplVulkan_InitInfo init_info = {};
-  init_info.Instance = g_Instance;
-  init_info.PhysicalDevice = g_PhysicalDevice;
-  init_info.Device = g_Device;
-  init_info.QueueFamily = g_QueueFamily;
-  init_info.Queue = g_Queue;
-  init_info.PipelineCache = g_PipelineCache;
-  init_info.DescriptorPool = g_DescriptorPool;
-  init_info.Allocator = g_Allocator;
-  init_info.MinImageCount = g_MinImageCount;
-  init_info.ImageCount = wd->ImageCount;
-  init_info.CheckVkResultFn = check_vk_result;
-  ImGui_ImplVulkan_Init(&init_info, wd->RenderPass);
-
-  // Upload Fonts
-  {
-    // Use any command queue
-    VkCommandPool command_pool = wd->Frames[wd->FrameIndex].CommandPool;
-    VkCommandBuffer command_buffer = wd->Frames[wd->FrameIndex].CommandBuffer;
-
-    err = vkResetCommandPool(g_Device, command_pool, 0);
-    check_vk_result(err);
-    VkCommandBufferBeginInfo begin_info = {};
-    begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
-    begin_info.flags |= VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
-    err = vkBeginCommandBuffer(command_buffer, &begin_info);
-    check_vk_result(err);
-
-    ImGui_ImplVulkan_CreateFontsTexture(command_buffer);
-
-    VkSubmitInfo end_info = {};
-    end_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-    end_info.commandBufferCount = 1;
-    end_info.pCommandBuffers = &command_buffer;
-    err = vkEndCommandBuffer(command_buffer);
-    check_vk_result(err);
-    err = vkQueueSubmit(g_Queue, 1, &end_info, VK_NULL_HANDLE);
-    check_vk_result(err);
-
-    err = vkDeviceWaitIdle(g_Device);
-    check_vk_result(err);
-    ImGui_ImplVulkan_DestroyFontUploadObjects();
-  }
-
-  // Demo state.
-  bool show_iree_window = true;
-  // --------------------------------------------------------------------------
-  // Setup IREE.
-
-  // Check API version.
-  iree_api_version_t actual_version;
-  iree_status_t status =
-      iree_api_version_check(IREE_API_VERSION_LATEST, &actual_version);
-  if (iree_status_is_ok(status)) {
-    fprintf(stdout, "IREE runtime API version: %d\n", actual_version);
-  } else {
-    fprintf(stderr, "Unsupported runtime API version: %d\n", actual_version);
-    abort();
-  }
-
-  // Create a runtime Instance.
-  iree_vm_instance_t* iree_instance = nullptr;
-  IREE_CHECK_OK(
-      iree_vm_instance_create(iree_allocator_system(), &iree_instance));
-
-  // Register HAL drivers and VM module types.
-  IREE_CHECK_OK(iree_hal_vulkan_driver_module_register(
-      iree_hal_driver_registry_default()));
-  IREE_CHECK_OK(iree_hal_module_register_all_types(iree_instance));
-
-  // Create IREE Vulkan Driver and Device, sharing our VkInstance/VkDevice.
-  fprintf(stdout, "Creating Vulkan driver/device\n");
-  // Load symbols from our static `vkGetInstanceProcAddr` for IREE to use.
-  iree_hal_vulkan_syms_t* iree_vk_syms = nullptr;
-  IREE_CHECK_OK(iree_hal_vulkan_syms_create(
-      reinterpret_cast<void*>(&vkGetInstanceProcAddr), iree_allocator_system(),
-      &iree_vk_syms));
-  // Create the driver sharing our VkInstance.
-  iree_hal_driver_t* iree_vk_driver = nullptr;
-  iree_string_view_t driver_identifier = iree_make_cstring_view("vulkan");
-  iree_hal_vulkan_driver_options_t driver_options;
-  driver_options.api_version = VK_API_VERSION_1_0;
-  driver_options.requested_features = static_cast<iree_hal_vulkan_features_t>(
-      IREE_HAL_VULKAN_FEATURE_ENABLE_DEBUG_UTILS);
-  IREE_CHECK_OK(iree_hal_vulkan_driver_create_using_instance(
-      driver_identifier, &driver_options, iree_vk_syms, g_Instance,
-      iree_allocator_system(), &iree_vk_driver));
-  // Create a device sharing our VkDevice and queue.
-  // We could also create a separate (possibly low priority) compute queue for
-  // IREE, and/or provide a dedicated transfer queue.
-  iree_string_view_t device_identifier = iree_make_cstring_view("vulkan");
-  iree_hal_vulkan_queue_set_t compute_queue_set;
-  compute_queue_set.queue_family_index = g_QueueFamily;
-  compute_queue_set.queue_indices = 1 << 0;
-  iree_hal_vulkan_queue_set_t transfer_queue_set;
-  transfer_queue_set.queue_indices = 0;
-  iree_hal_device_t* iree_vk_device = nullptr;
-  IREE_CHECK_OK(iree_hal_vulkan_wrap_device(
-      device_identifier, &driver_options.device_options, iree_vk_syms,
-      g_Instance, g_PhysicalDevice, g_Device, &compute_queue_set,
-      &transfer_queue_set, iree_allocator_system(), &iree_vk_device));
-  // Create a HAL module using the HAL device.
-  iree_vm_module_t* hal_module = nullptr;
-  IREE_CHECK_OK(iree_hal_module_create(iree_instance, iree_vk_device,
-                                       IREE_HAL_MODULE_FLAG_NONE,
-                                       iree_allocator_system(), &hal_module));
-
-
-  // Load bytecode module
-  //iree_file_toc_t module_file_toc;
-  //const char network_model[] = "resnet50_tf.vmfb";
-  //fprintf(stdout, "Loading: %s\n", network_model);
-  //if (load_file(network_model, &module_file_toc.data, &module_file_toc.size) == false)
-  //{
-  //    abort();
-  //    return 1;
-  //}
-  //fprintf(stdout, "module size: %zu\n", module_file_toc.size);
-
-  iree_vm_module_t* bytecode_module = nullptr;
-  iree_status_t module_status = iree_tooling_load_module_from_flags(
-      iree_instance, iree_allocator_system(), &bytecode_module);
-  if (!iree_status_is_ok(module_status))
-    return -1;
-  //IREE_CHECK_OK(iree_vm_bytecode_module_create(
-  //    iree_instance,
-  //    iree_const_byte_span_t{
-  //        reinterpret_cast<const uint8_t*>(module_file_toc.data),
-  //        module_file_toc.size},
-  //    iree_allocator_null(), iree_allocator_system(), &bytecode_module));
-  //// Query for details about what is in the loaded module.
-  //iree_vm_module_signature_t bytecode_module_signature =
-  //    iree_vm_module_signature(bytecode_module);
-  //fprintf(stdout, "Module loaded, have <%" PRIhsz "> exported functions:\n",
-  //        bytecode_module_signature.export_function_count);
-  //for (int i = 0; i < bytecode_module_signature.export_function_count; ++i) {
-  //  iree_vm_function_t function;
-  //  IREE_CHECK_OK(iree_vm_module_lookup_function_by_ordinal(
-  //      bytecode_module, IREE_VM_FUNCTION_LINKAGE_EXPORT, i, &function));
-  //  auto function_name = iree_vm_function_name(&function);
-  //  auto function_signature = iree_vm_function_signature(&function);
-
-  //  fprintf(stdout, "  %d: '%.*s' with calling convention '%.*s'\n", i,
-  //          (int)function_name.size, function_name.data,
-  //          (int)function_signature.calling_convention.size,
-  //          function_signature.calling_convention.data);
-  //}
-
-  // Allocate a context that will hold the module state across invocations.
-  iree_vm_context_t* iree_context = nullptr;
-  std::vector<iree_vm_module_t*> modules = {hal_module, bytecode_module};
-  IREE_CHECK_OK(iree_vm_context_create_with_modules(
-      iree_instance, IREE_VM_CONTEXT_FLAG_NONE, modules.size(), modules.data(),
-      iree_allocator_system(), &iree_context));
-  fprintf(stdout, "Context with modules is ready for use\n");
-
-  // Lookup the entry point function.
-  iree_vm_function_t main_function;
-  const char kMainFunctionName[] = "module.forward";
-  IREE_CHECK_OK(iree_vm_context_resolve_function(
-      iree_context,
-      iree_string_view_t{kMainFunctionName, sizeof(kMainFunctionName) - 1},
-      &main_function));
-  iree_string_view_t main_function_name = iree_vm_function_name(&main_function);
-  fprintf(stdout, "Resolved main function named '%.*s'\n",
-          (int)main_function_name.size, main_function_name.data);
-
-  // --------------------------------------------------------------------------
-
-        // Write inputs into mappable buffers.
-        iree_hal_allocator_t* allocator =
-            iree_hal_device_allocator(iree_vk_device);
-        //iree_hal_memory_type_t input_memory_type =
-        //    static_cast<iree_hal_memory_type_t>(
-        //        IREE_HAL_MEMORY_TYPE_HOST_LOCAL |
-        //        IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE);
-        //iree_hal_buffer_usage_t input_buffer_usage =
-        //    static_cast<iree_hal_buffer_usage_t>(IREE_HAL_BUFFER_USAGE_DEFAULT);
-        //iree_hal_buffer_params_t buffer_params;
-        //buffer_params.type = input_memory_type;
-        //buffer_params.usage = input_buffer_usage;
-        //buffer_params.access = IREE_HAL_MEMORY_ACCESS_READ | IREE_HAL_MEMORY_ACCESS_WRITE;
-
-       // Wrap input buffers in buffer views.
-
-        vm::ref<iree_vm_list_t> inputs;
-        iree_status_t input_status = ParseToVariantList(
-            allocator,
-            iree::span<const std::string>{FLAG_function_inputs.data(),
-                                          FLAG_function_inputs.size()},
-            iree_allocator_system(), &inputs);
-        if (!iree_status_is_ok(input_status))
-            return -1;
-        //vm::ref<iree_vm_list_t> inputs;
-        //IREE_CHECK_OK(iree_vm_list_create(/*element_type=*/nullptr, 6, iree_allocator_system(), &inputs));
-
-        //iree_hal_buffer_view_t* input0_buffer_view = nullptr;
-        //constexpr iree_hal_dim_t input_buffer_shape[] = {1, 224, 224, 3};
-        //IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer(
-        //    allocator,
-        //    /*shape_rank=*/4, /*shape=*/input_buffer_shape,
-        //    IREE_HAL_ELEMENT_TYPE_FLOAT_32,
-        //    IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, buffer_params,
-        //    iree_make_const_byte_span(&input_res50, sizeof(input_res50)),
-        //    &input0_buffer_view));
-
-        //auto input0_buffer_view_ref = iree_hal_buffer_view_move_ref(input0_buffer_view);
-        //IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs.get(), &input0_buffer_view_ref));
-
-        // Prepare outputs list to accept results from the invocation.
-
-        vm::ref<iree_vm_list_t> outputs;
-        constexpr iree_hal_dim_t kOutputCount = 1000;
-        IREE_CHECK_OK(iree_vm_list_create(/*element_type=*/nullptr, kOutputCount * sizeof(float), iree_allocator_system(), &outputs));
-
-  // --------------------------------------------------------------------------
-
-  // Main loop.
-  bool done = false;
-  while (!done) {
-    SDL_Event event;
-
-    while (SDL_PollEvent(&event)) {
-      if (event.type == SDL_QUIT) {
-        done = true;
-      }
-
-      ImGui_ImplSDL2_ProcessEvent(&event);
-      if (event.type == SDL_QUIT) done = true;
-      if (event.type == SDL_WINDOWEVENT &&
-          event.window.event == SDL_WINDOWEVENT_RESIZED &&
-          event.window.windowID == SDL_GetWindowID(window)) {
-        g_SwapChainResizeWidth = (int)event.window.data1;
-        g_SwapChainResizeHeight = (int)event.window.data2;
-        g_SwapChainRebuild = true;
-      }
-    }
-
-    if (g_SwapChainRebuild) {
-      g_SwapChainRebuild = false;
-      ImGui_ImplVulkan_SetMinImageCount(g_MinImageCount);
-      ImGui_ImplVulkanH_CreateOrResizeWindow(
-          g_Instance, g_PhysicalDevice, g_Device, &g_MainWindowData,
-          g_QueueFamily, g_Allocator, g_SwapChainResizeWidth,
-          g_SwapChainResizeHeight, g_MinImageCount);
-      g_MainWindowData.FrameIndex = 0;
-    }
-
-    // Start the Dear ImGui frame
-    ImGui_ImplVulkan_NewFrame();
-    ImGui_ImplSDL2_NewFrame(window);
-    ImGui::NewFrame();
-
-    // Custom window.
-    {
-      ImGui::Begin("IREE Vulkan Integration Demo", &show_iree_window);
-
-      ImGui::Separator();
-
-      // ImGui Inputs for two input tensors.
-      // Run computation whenever any of the values changes.
-      static bool dirty = true;
-      if (dirty) {
-
-        // Synchronously invoke the function.
-        IREE_CHECK_OK(iree_vm_invoke(iree_context, main_function,
-                                     IREE_VM_INVOCATION_FLAG_NONE,
-                                     /*policy=*/nullptr, inputs.get(),
-                                     outputs.get(), iree_allocator_system()));
-
-
-        // we want to run continuously so we can use tools like RenderDoc, RGP, etc...
-        dirty = true;
-      }
-
-      // Framerate counter.
-      ImGui::Text("Application average %.3f ms/frame (%.1f FPS)",
-                  1000.0f / ImGui::GetIO().Framerate, ImGui::GetIO().Framerate);
-
-      ImGui::End();
-    }
-
-    // Rendering
-    ImGui::Render();
-    RenderFrame(wd, g_Device, g_Queue);
-
-    PresentFrame(wd, g_Queue);
-  }
-  // --------------------------------------------------------------------------
-
-  // --------------------------------------------------------------------------
-  // Cleanup
-  iree_vm_module_release(hal_module);
-  iree_vm_module_release(bytecode_module);
-  iree_vm_context_release(iree_context);
-  iree_hal_device_release(iree_vk_device);
-  iree_hal_allocator_release(allocator);
-  iree_hal_driver_release(iree_vk_driver);
-  iree_hal_vulkan_syms_release(iree_vk_syms);
-  iree_vm_instance_release(iree_instance);
-
-  err = vkDeviceWaitIdle(g_Device);
-  check_vk_result(err);
-  ImGui_ImplVulkan_Shutdown();
-  ImGui_ImplSDL2_Shutdown();
-  ImGui::DestroyContext();
-
-  CleanupVulkanWindow();
-  CleanupVulkan();
-
-  SDL_DestroyWindow(window);
-  SDL_Quit();
-  // --------------------------------------------------------------------------
-
-  return 0;
-}
-
-}  // namespace iree
--- a/cpp/vulkan_gui/vulkan_resnet_inference_gui.cc
+++ b/cpp/vulkan_gui/vulkan_resnet_inference_gui.cc
--- a/dataset/annotation_tool.py
+++ b/dataset/annotation_tool.py
@@ -10,7 +10,7 @@ from utils import get_datasets

 shark_root = Path(__file__).parent.parent
 demo_css = shark_root.joinpath("web/demo.css").resolve()
-nodlogo_loc = shark_root.joinpath("web/models/stable_diffusion/logos/nod-logo.png")
+nodlogo_loc = shark_root.joinpath("web/models/stable_diffusion/logos/amd-logo.jpg")


 with gr.Blocks(title="Dataset Annotation Tool", css=demo_css) as shark_web:
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,41 +1,26 @@
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
-f https://openxla.github.io/iree/pip-release-links.html
+-r https://raw.githubusercontent.com/llvm/torch-mlir/main/requirements.txt
+-r https://raw.githubusercontent.com/llvm/torch-mlir/main/torchvision-requirements.txt
+-f https://download.pytorch.org/whl/nightly/cpu
+-f https://iree.dev/pip-release-links.html
 --pre

 setuptools
 wheel

-torch==2.3.0.dev20240305
-shark-turbine @ git+https://github.com/nod-ai/SHARK-Turbine.git@main#subdirectory=core
-turbine-models @ git+https://github.com/nod-ai/SHARK-Turbine.git@main#subdirectory=models
-
-# SHARK Runner
-tqdm
-
-# SHARK Downloader
-google-cloud-storage
-
-# Testing
-pytest
-pytest-xdist
-pytest-forked
+shark-turbine @ git+https://github.com/iree-org/iree-turbine.git@main
+turbine-models @ git+https://github.com/nod-ai/SHARK-Turbine.git@merge_punet_sdxl#subdirectory=models
+diffusers @ git+https://github.com/nod-ai/diffusers@0.29.0.dev0-shark
 Pillow
-parameterized
-
-# Add transformers, diffusers and scipy since it most commonly used
-#accelerate is now required for diffusers import from ckpt.
-accelerate
-scipy
+transformers==4.43.3
 ftfy
-gradio==4.19.2
-altair
-omegaconf
-# 0.3.2 doesn't have binaries for arm64
-safetensors==0.3.1
+safetensors
 py-cpuinfo
 pydantic==2.4.1 # pin until pyinstaller-hooks-contrib works with beta versions
 mpmath==1.3.0

+# Testing
+pytest
+
 # Keep PyInstaller at the end. Sometimes Windows Defender flags it but most folks can continue even if it errors
 pefile
 pyinstaller
--- a/rest_api_tests/sd3api_test.py
+++ b/rest_api_tests/sd3api_test.py
@@ -0,0 +1,77 @@
+import requests
+from pydantic import BaseModel, Field
+import json
+
+
+def view_json_file(file_path):
+    content = ""
+    with open(file_path, "r") as fopen:
+        content = fopen.read()
+    return content
+
+
+# Define the URL of the REST API endpoint
+api_url = "http://127.0.0.1:8080/sdapi/v1/txt2img/"  # Replace with your actual API URL
+
+
+class GenerationInputData(BaseModel):
+    prompt: list = [""]
+    negative_prompt: list = [""]
+    hf_model_id: str | None = None
+    height: int = Field(default=512, ge=128, le=1024, multiple_of=8)
+    width: int = Field(default=512, ge=128, le=1024, multiple_of=8)
+    sampler_name: str = "EulerDiscrete"
+    cfg_scale: float = Field(default=7.5, ge=1)
+    steps: int = Field(default=20, ge=1, le=100)
+    seed: int = Field(default=-1)
+    n_iter: int = Field(default=1)
+    config: dict = None
+
+
+# Create an instance of GenerationInputData with example arguments
+data = GenerationInputData(
+    prompt=[
+        "A phoenix made of diamond, black background, dream sequence, rising from coals"
+    ],
+    negative_prompt=[
+        "cropped, cartoon, lowres, low quality, black and white, bad scan, pixelated"
+    ],
+    hf_model_id="shark_sd3.py",
+    height=512,
+    width=512,
+    sampler_name="EulerDiscrete",
+    cfg_scale=7.5,
+    steps=20,
+    seed=-1,
+    n_iter=1,
+    config=json.loads(view_json_file("../configs/sd3_phoenix_npu.json")),
+)
+
+# Convert the data to a dictionary
+data_dict = data.dict()
+
+# Optional: Define headers if needed (e.g., for authentication)
+headers = {
+    "User-Agent": "PythonTest",
+    "Accept": "*/*",
+    "Accept-Encoding": "gzip, deflate, br",
+}
+
+
+def test_post_request(url, data, headers=None):
+    try:
+        # Send a POST request to the API endpoint
+        response = requests.post(url, json=data, headers=headers)
+
+        # Print the status code and response content
+        print(f"Status Code: {response.status_code}")
+        print("Response Content:")
+        # print(response.json())  # Print the JSON response
+
+    except requests.RequestException as e:
+        # Handle any exceptions that occur during the request
+        print(f"An error occurred: {e}")
+
+
+# Run the test
+test_post_request(api_url, data_dict, headers)
--- a/setup_venv.ps1
+++ b/setup_venv.ps1
@@ -87,9 +87,8 @@ if ($NULL -ne $PyVer) {py -3.11 -m venv .\shark.venv\}
 else {python -m venv .\shark.venv\}
 .\shark.venv\Scripts\activate
 python -m pip install --upgrade pip
-pip install wheel
-pip install -r requirements.txt
-# remove this when windows DLL issues are fixed from LLVM changes
-pip install --force-reinstall https://github.com/openxla/iree/releases/download/candidate-20240326.843/iree_compiler-20240326.843-cp311-cp311-win_amd64.whl https://github.com/openxla/iree/releases/download/candidate-20240326.843/iree_runtime-20240326.843-cp311-cp311-win_amd64.whl
+pip install https://github.com/nod-ai/SRT/releases/download/candidate-20240619.291/iree_compiler-20240619.291-cp311-cp311-win_amd64.whl https://github.com/nod-ai/SRT/releases/download/candidate-20240619.291/iree_runtime-20240619.291-cp311-cp311-win_amd64.whl
+pip install --pre -r requirements.txt
+pip install -e .

 Write-Host "Source your venv with ./shark.venv/Scripts/activate"
--- a/setup_venv.sh
+++ b/setup_venv.sh
@@ -84,21 +84,7 @@ else
  PYTORCH_URL=https://download.pytorch.org/whl/nightly/cpu/
 fi

-$PYTHON -m pip install --no-warn-conflicts -e . -f https://llvm.github.io/torch-mlir/package-index/ -f ${RUNTIME} -f ${PYTORCH_URL}
-
-if [[ $(uname -s) = 'Linux' && ! -z "${IMPORTER}" ]]; then
-  T_VER=$($PYTHON -m pip show torch | grep Version)
-  T_VER_MIN=${T_VER:14:12}
-  TV_VER=$($PYTHON -m pip show torchvision | grep Version)
-  TV_VER_MAJ=${TV_VER:9:6}
-  $PYTHON -m pip uninstall -y torchvision
-  $PYTHON -m pip install torchvision==${TV_VER_MAJ}${T_VER_MIN} --no-deps -f https://download.pytorch.org/whl/nightly/cpu/torchvision/
-  if [ $? -eq 0 ];then
-    echo "Successfully Installed torch + cu118."
-  else
-    echo "Could not install torch + cu118." >&2
-  fi
-fi
+$PYTHON -m pip install --no-warn-conflicts -e . -f ${RUNTIME} -f ${PYTORCH_URL}

 if [[ -z "${NO_BREVITAS}" ]]; then
  $PYTHON -m pip install git+https://github.com/Xilinx/brevitas.git@dev
--- a/shark/init.py
+++ b/shark/init.py
@@ -1,28 +0,0 @@
-import importlib
-import logging
-
-from torch._dynamo import register_backend
-
-log = logging.getLogger(__name__)
-
-
-@register_backend
-def shark(model, inputs, *, options):
-    try:
-        from shark.dynamo_backend.utils import SharkBackend
-    except ImportError:
-        log.exception(
-            "Unable to import SHARK - High Performance Machine Learning Distribution"
-            "Please install the right version of SHARK that matches the PyTorch version being used. "
-            "Refer to https://github.com/nod-ai/SHARK/ for details."
-        )
-        raise
-    return SharkBackend(model, inputs, options)
-
-
-def has_shark():
-    try:
-        importlib.import_module("shark")
-        return True
-    except ImportError:
-        return False
--- a/shark/backward_makefx.py
+++ b/shark/backward_makefx.py
@@ -1,78 +0,0 @@
-# Copyright 2020 The Nod Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from torch._decomp import get_decompositions
-from torch.fx.experimental.proxy_tensor import make_fx
-from torch.nn.utils import stateless
-
-from torch import fx
-import tempfile
-
-
-class MakeFxModule:
-    def __init__(self, model, inputs, labels=None, custom_inference_fn=None):
-        self.model = model
-        self.inputs = inputs
-        self.custom_inference_fn = custom_inference_fn
-        self.training_graph = None
-
-    # Doesn't replace the None type.
-    def change_fx_graph_return_to_tuple(self, fx_g: fx.GraphModule):
-        for node in fx_g.graph.nodes:
-            if node.op == "output":
-                # output nodes always have one argument
-                node_arg = node.args[0]
-                out_nodes = []
-                if isinstance(node_arg, list):
-                    # Don't return NoneType elements.
-                    for out_node in node_arg:
-                        if not isinstance(out_node, type(None)):
-                            out_nodes.append(out_node)
-                    # If there is a single tensor/element to be returned don't
-                    # a tuple for it.
-                    if len(out_nodes) == 1:
-                        node.args = out_nodes
-                    else:
-                        node.args = (tuple(out_nodes),)
-        fx_g.graph.lint()
-        fx_g.recompile()
-        return fx_g
-
-    def generate_graph(self):
-        fx_g = make_fx(
-            self.custom_inference_fn,
-            decomposition_table=get_decompositions(
-                [
-                    torch.ops.aten.embedding_dense_backward,
-                    torch.ops.aten.native_layer_norm_backward,
-                    torch.ops.aten.slice_backward,
-                    torch.ops.aten.select_backward,
-                ]
-            ),
-        )(
-            dict(self.model.named_parameters()),
-            dict(self.model.named_buffers()),
-            self.inputs,
-        )
-        fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
-        fx_g.recompile()
-        fx_g = self.change_fx_graph_return_to_tuple(fx_g)
-        ts_g = torch.jit.script(fx_g)
-        temp = tempfile.NamedTemporaryFile(
-            suffix="_shark_ts", prefix="temp_ts_"
-        )
-        ts_g.save(temp.name)
-        new_ts = torch.jit.load(temp.name)
-        self.training_graph = new_ts
--- a/shark/dynamo_backend/init.py
+++ b/shark/dynamo_backend/init.py
--- a/shark/dynamo_backend/utils.py
+++ b/shark/dynamo_backend/utils.py
@@ -1,154 +0,0 @@
-import functools
-from typing import List, Optional
-import torch
-from torch.fx.experimental.proxy_tensor import make_fx
-from torch._functorch.compile_utils import strip_overloads
-from shark.shark_inference import SharkInference
-from torch._decomp import get_decompositions
-from torch.func import functionalize
-import io
-import torch_mlir
-
-
-# TODO: Control decompositions.
-def default_decompositions():
-    return get_decompositions(
-        [
-            torch.ops.aten.embedding_dense_backward,
-            torch.ops.aten.native_layer_norm_backward,
-            torch.ops.aten.slice_backward,
-            torch.ops.aten.select_backward,
-            torch.ops.aten.norm.ScalarOpt_dim,
-            torch.ops.aten.native_group_norm,
-            torch.ops.aten.upsample_bilinear2d.vec,
-            torch.ops.aten.split.Tensor,
-            torch.ops.aten.split_with_sizes,
-            torch.ops.aten.native_layer_norm,
-            torch.ops.aten.masked_fill.Tensor,
-            torch.ops.aten.masked_fill.Scalar,
-        ]
-    )
-
-
-def _remove_nones(fx_g: torch.fx.GraphModule) -> List[int]:
-    removed_indexes = []
-    for node in fx_g.graph.nodes:
-        if node.op == "output":
-            assert (
-                len(node.args) == 1
-            ), "Output node must have a single argument"
-            node_arg = node.args[0]
-            if isinstance(node_arg, (list, tuple)):
-                node_arg = list(node_arg)
-                node_args_len = len(node_arg)
-                for i in range(node_args_len):
-                    curr_index = node_args_len - (i + 1)
-                    if node_arg[curr_index] is None:
-                        removed_indexes.append(curr_index)
-                        node_arg.pop(curr_index)
-                node.args = (tuple(node_arg),)
-                break
-
-    if len(removed_indexes) > 0:
-        fx_g.graph.lint()
-        fx_g.graph.eliminate_dead_code()
-        fx_g.recompile()
-    removed_indexes.sort()
-    return removed_indexes
-
-
-def _returns_nothing(fx_g: torch.fx.GraphModule) -> bool:
-    for node in fx_g.graph.nodes:
-        if node.op == "output":
-            assert (
-                len(node.args) == 1
-            ), "Output node must have a single argument"
-            node_arg = node.args[0]
-            if isinstance(node_arg, tuple):
-                return len(node_arg) == 0
-    return False
-
-
-def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule) -> bool:
-    """
-    Replace tuple with tuple element in functions that return one-element tuples.
-    Returns true if an unwrapping took place, and false otherwise.
-    """
-    unwrapped_tuple = False
-    for node in fx_g.graph.nodes:
-        if node.op == "output":
-            assert (
-                len(node.args) == 1
-            ), "Output node must have a single argument"
-            node_arg = node.args[0]
-            if isinstance(node_arg, tuple):
-                if len(node_arg) == 1:
-                    node.args = (node_arg[0],)
-                    unwrapped_tuple = True
-                    break
-
-    if unwrapped_tuple:
-        fx_g.graph.lint()
-        fx_g.recompile()
-    return unwrapped_tuple
-
-
-class SharkBackend:
-    def __init__(
-        self, fx_g: torch.fx.GraphModule, inputs: tuple, options: dict
-    ):
-        self.fx_g = fx_g
-        self.inputs = inputs
-        self.shark_module = None
-        self.device: str = options.get("device", "cpu")
-        self.was_unwrapped: bool = False
-        self.none_indices: list = []
-        self._modify_fx_g()
-        self.compile()
-
-    def _modify_fx_g(self):
-        self.none_indices = _remove_nones(self.fx_g)
-        self.was_unwrapped = _unwrap_single_tuple_return(self.fx_g)
-
-    def compile(self):
-        gm = make_fx(
-            functionalize(self.fx_g),
-            decomposition_table=default_decompositions(),
-        )(*self.inputs)
-        gm.graph.set_codegen(torch.fx.graph.CodeGen())
-        gm.recompile()
-        strip_overloads(gm)
-        ts_g = torch.jit.script(gm)
-        mlir_module = torch_mlir.compile(
-            ts_g, self.inputs, output_type="linalg-on-tensors"
-        )
-        bytecode_stream = io.BytesIO()
-        mlir_module.operation.write_bytecode(bytecode_stream)
-        bytecode = bytecode_stream.getvalue()
-        from shark.shark_inference import SharkInference
-
-        shark_module = SharkInference(
-            mlir_module=bytecode,
-            device=self.device,
-            mlir_dialect="tm_tensor",
-        )
-        shark_module.compile(extra_args=[])
-        self.shark_module = shark_module
-
-    def __call__(self, *inputs):
-        np_inputs = [x.contiguous().detach().cpu().numpy() for x in inputs]
-        np_outs = self.shark_module("forward", np_inputs)
-        if self.was_unwrapped:
-            np_outs = [
-                np_outs,
-            ]
-
-        if not isinstance(np_outs, list):
-            res = torch.from_numpy(np_outs)
-            return res
-
-        result = [torch.from_numpy(x) for x in np_outs]
-        for r_in in self.none_indices:
-            result.insert(r_in, None)
-        result = tuple(result)
-        return result
--- a/shark/examples/shark_dynamo/basic_examples.py
+++ b/shark/examples/shark_dynamo/basic_examples.py
@@ -1,25 +0,0 @@
-import torch
-import shark
-
-
-def foo(x, a):
-    if x.shape[0] > 3:
-        return x + a
-    else:
-        return x + 3
-
-
-shark_options = {"device": "cpu"}
-compiled = torch.compile(foo, backend="shark", options=shark_options)
-
-input = torch.ones(4)
-
-x = compiled(input, input)
-
-print(x)
-
-input = torch.ones(3)
-
-x = compiled(input, input)
-
-print(x)
--- a/shark/examples/shark_eager/dynamo_demo.ipynb
+++ b/shark/examples/shark_eager/dynamo_demo.ipynb
@@ -1,309 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": true,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/mlevental/miniconda3/envs/torch-mlir/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "# standard imports\n",
-    "import torch\n",
-    "from shark.iree_utils import get_iree_compiled_module"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "outputs": [],
-   "source": [
-    "# torch dynamo related imports\n",
-    "try:\n",
-    "    import torchdynamo\n",
-    "    from torchdynamo.optimizations.backends import create_backend\n",
-    "    from torchdynamo.optimizations.subgraph import SubGraph\n",
-    "except ModuleNotFoundError:\n",
-    "    print(\n",
-    "        \"Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo\"\n",
-    "    )\n",
-    "    exit()\n",
-    "\n",
-    "# torch-mlir imports for compiling\n",
-    "from torch_mlir import compile, OutputType"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "[TorchDynamo](https://github.com/pytorch/torchdynamo) is a compiler for PyTorch programs that uses the [frame evaluation API](https://www.python.org/dev/peps/pep-0523/) in CPython to dynamically modify Python bytecode right before it is executed. It creates this FX Graph through bytecode analysis and is designed to mix Python execution with compiled backends."
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "outputs": [],
-   "source": [
-    "def toy_example(*args):\n",
-    "    a, b = args\n",
-    "\n",
-    "    x = a / (torch.abs(a) + 1)\n",
-    "    if b.sum() < 0:\n",
-    "        b = b * -1\n",
-    "    return x * b"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "outputs": [],
-   "source": [
-    "# compiler that lowers fx_graph to through MLIR\n",
-    "def __torch_mlir(fx_graph, *args, **kwargs):\n",
-    "    assert isinstance(\n",
-    "        fx_graph, torch.fx.GraphModule\n",
-    "    ), \"Model must be an FX GraphModule.\"\n",
-    "\n",
-    "    def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule):\n",
-    "        \"\"\"Replace tuple with tuple element in functions that return one-element tuples.\"\"\"\n",
-    "\n",
-    "        for node in fx_g.graph.nodes:\n",
-    "            if node.op == \"output\":\n",
-    "                assert (\n",
-    "                    len(node.args) == 1\n",
-    "                ), \"Output node must have a single argument\"\n",
-    "                node_arg = node.args[0]\n",
-    "                if isinstance(node_arg, tuple) and len(node_arg) == 1:\n",
-    "                    node.args = (node_arg[0],)\n",
-    "        fx_g.graph.lint()\n",
-    "        fx_g.recompile()\n",
-    "        return fx_g\n",
-    "\n",
-    "    fx_graph = _unwrap_single_tuple_return(fx_graph)\n",
-    "    ts_graph = torch.jit.script(fx_graph)\n",
-    "\n",
-    "    # torchdynamo does munges the args differently depending on whether you use\n",
-    "    # the @torchdynamo.optimize decorator or the context manager\n",
-    "    if isinstance(args, tuple):\n",
-    "        args = list(args)\n",
-    "    assert isinstance(args, list)\n",
-    "    if len(args) == 1 and isinstance(args[0], list):\n",
-    "        args = args[0]\n",
-    "\n",
-    "    linalg_module = compile(\n",
-    "        ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS\n",
-    "    )\n",
-    "    callable, _ = get_iree_compiled_module(\n",
-    "        linalg_module, \"cuda\", func_name=\"forward\"\n",
-    "    )\n",
-    "\n",
-    "    def forward(*inputs):\n",
-    "        return callable(*inputs)\n",
-    "\n",
-    "    return forward"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "Simplest way to use TorchDynamo with the `torchdynamo.optimize` context manager:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Found 1 device(s).\n",
-      "Device: 0\n",
-      "  Name: NVIDIA GeForce RTX 3080\n",
-      "  Compute Capability: 8.6\n",
-      "[-0.40066046 -0.4210303   0.03225489 -0.44849953  0.10370405 -0.04422468\n",
-      "  0.33262825 -0.20109026  0.02102537 -0.24882983]\n",
-      "[-0.07824923 -0.17004533  0.06439921 -0.06163602  0.26633525 -1.1560082\n",
-      " -0.06660341  0.24227881  0.1462235  -0.32055548]\n",
-      "[-0.01464001  0.442209   -0.0607936  -0.5477967  -0.25226554 -0.08588809\n",
-      " -0.30497575  0.00061084 -0.50069696  0.2317973 ]\n",
-      "[ 0.25726247  0.39388427 -0.24093066  0.12316308 -0.01981307  0.5661146\n",
-      "  0.26199922  0.8123446  -0.01576749  0.30846444]\n",
-      "[ 0.7878203  -0.45975062 -0.29956317 -0.07032048 -0.55817443 -0.62506855\n",
-      " -1.6837492  -0.38442805  0.28220773 -1.5325156 ]\n",
-      "[ 0.07975311  0.67754704 -0.30927914  0.00347631 -0.07326564  0.01893554\n",
-      " -0.7518105  -0.03078967 -0.07623022  0.38865626]\n",
-      "[-0.7751679  -0.5841397  -0.6622711   0.18574935 -0.6049372   0.02844244\n",
-      " -0.20471913  0.3337415  -0.3619432  -0.35087156]\n",
-      "[-0.08569919 -0.10775139 -0.02338934  0.21933547 -0.46712473  0.00062137\n",
-      " -0.58207744  0.06457533  0.18276742  0.03866556]\n",
-      "[-0.2311981  -0.43036282  0.20561649 -0.10363232 -0.13248594  0.02885137\n",
-      " -0.31241602 -0.36907142  0.08861586  0.2331427 ]\n",
-      "[-0.07273526 -0.31246194 -0.24218291 -0.24145737  0.0364486   0.14382267\n",
-      " -0.00531162  0.15447603 -0.5220248  -0.09016377]\n"
-     ]
-    }
-   ],
-   "source": [
-    "with torchdynamo.optimize(__torch_mlir):\n",
-    "    for _ in range(10):\n",
-    "        print(toy_example(torch.randn(10), torch.randn(10)))"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "It can also be used through a decorator:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "outputs": [],
-   "source": [
-    "@create_backend\n",
-    "def torch_mlir(subgraph, *args, **kwargs):\n",
-    "    assert isinstance(subgraph, SubGraph), \"Model must be a dynamo SubGraph.\"\n",
-    "    return __torch_mlir(subgraph.model, *list(subgraph.example_inputs))\n",
-    "\n",
-    "\n",
-    "@torchdynamo.optimize(\"torch_mlir\")\n",
-    "def toy_example2(*args):\n",
-    "    a, b = args\n",
-    "\n",
-    "    x = a / (torch.abs(a) + 1)\n",
-    "    if b.sum() < 0:\n",
-    "        b = b * -1\n",
-    "    return x * b"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Found 1 device(s).\n",
-      "Device: 0\n",
-      "  Name: NVIDIA GeForce RTX 3080\n",
-      "  Compute Capability: 8.6\n",
-      "[-0.35494277  0.03409214 -0.02271946  0.7335942   0.03122527 -0.41881397\n",
-      " -0.6609761  -0.6418614   0.29336175 -0.01973678]\n",
-      "[-2.7246824e-01 -3.5543957e-01  6.0087401e-01 -7.4570496e-03\n",
-      " -4.2481605e-02 -5.0296803e-04  7.2928613e-01 -1.4673788e-03\n",
-      " -2.7621329e-01 -6.0995776e-02]\n",
-      "[-0.03165906  0.3889693   0.24052973  0.27279532 -0.02773128 -0.12602475\n",
-      " -1.0124422   0.5720256  -0.35437614 -0.20992722]\n",
-      "[-0.41831446  0.5525326  -0.29749998 -0.17044766  0.11804754 -0.05210691\n",
-      " -0.46145165 -0.8776549   0.10090438  0.17463352]\n",
-      "[ 0.02194221  0.20959911  0.26973712  0.12551276 -0.0020404   0.1490246\n",
-      " -0.04456685  1.1100804   0.8105744   0.6676846 ]\n",
-      "[ 0.06528181 -0.13591261  0.5370964  -0.4398162  -0.03372452  0.9691372\n",
-      " -0.01120087  0.2947028   0.4804801  -0.3324341 ]\n",
-      "[ 0.33549032 -0.23001772 -0.08681437  0.16490957 -0.11223086  0.09168988\n",
-      "  0.02403045  0.17344482  0.46406478 -0.00129451]\n",
-      "[-0.27475086  0.42384806  1.9090122  -0.41147137 -0.6888369   0.08435658\n",
-      " -0.26628923 -0.17436793 -0.8058869  -0.02582378]\n",
-      "[-0.10109414  0.08681287 -0.10055986  0.6858881   0.29267687 -0.02797117\n",
-      " -0.01425194  0.4882803   0.3551982  -0.858935  ]\n",
-      "[-0.22086617  0.524994    0.17721705 -0.03813264 -0.54570735 -0.4421502\n",
-      "  0.11938014 -0.01122053  0.39294165 -0.61770755]\n"
-     ]
-    }
-   ],
-   "source": [
-    "for _ in range(10):\n",
-    "    print(toy_example2(torch.randn(10), torch.randn(10)))"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
--- a/shark/examples/shark_eager/dynamo_demo.py
+++ b/shark/examples/shark_eager/dynamo_demo.py
@@ -1,92 +0,0 @@
-import torch
-from torch_mlir import compile, OutputType
-
-from shark.iree_utils import get_iree_compiled_module
-
-try:
-    import torchdynamo
-    from torchdynamo.optimizations.backends import create_backend
-    from torchdynamo.optimizations.subgraph import SubGraph
-except ModuleNotFoundError:
-    print(
-        "Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo"
-    )
-    exit()
-
-NUM_ITERS = 10
-
-
-def __torch_mlir(fx_graph, *args, **kwargs):
-    assert isinstance(
-        fx_graph, torch.fx.GraphModule
-    ), "Model must be an FX GraphModule."
-
-    def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule):
-        """Replace tuple with tuple element in functions that return one-element tuples."""
-
-        for node in fx_g.graph.nodes:
-            if node.op == "output":
-                assert (
-                    len(node.args) == 1
-                ), "Output node must have a single argument"
-                node_arg = node.args[0]
-                if isinstance(node_arg, tuple) and len(node_arg) == 1:
-                    node.args = (node_arg[0],)
-        fx_g.graph.lint()
-        fx_g.recompile()
-        return fx_g
-
-    fx_graph = _unwrap_single_tuple_return(fx_graph)
-    ts_graph = torch.jit.script(fx_graph)
-
-    if isinstance(args, tuple):
-        args = list(args)
-    assert isinstance(args, list)
-    if len(args) == 1 and isinstance(args[0], list):
-        args = args[0]
-
-    linalg_module = compile(
-        ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS
-    )
-    callable, _ = get_iree_compiled_module(
-        linalg_module, "cuda", func_name="forward"
-    )
-
-    def forward(*inputs):
-        return callable(*inputs)
-
-    return forward
-
-
-def toy_example(*args):
-    a, b = args
-
-    x = a / (torch.abs(a) + 1)
-    if b.sum() < 0:
-        b = b * -1
-    return x * b
-
-
-with torchdynamo.optimize(__torch_mlir):
-    for _ in range(10):
-        print(toy_example(torch.randn(10), torch.randn(10)))
-
-
-@create_backend
-def torch_mlir(subgraph, *args, **kwargs):
-    assert isinstance(subgraph, SubGraph), "Model must be a dynamo SubGraph."
-    return __torch_mlir(subgraph.model, *list(subgraph.example_inputs))
-
-
-@torchdynamo.optimize("torch_mlir")
-def toy_example2(*args):
-    a, b = args
-
-    x = a / (torch.abs(a) + 1)
-    if b.sum() < 0:
-        b = b * -1
-    return x * b
-
-
-for _ in range(10):
-    print(toy_example2(torch.randn(10), torch.randn(10)))
--- a/shark/examples/shark_eager/eager_mode.ipynb
+++ b/shark/examples/shark_eager/eager_mode.ipynb
@@ -1,805 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/mlevental/miniconda3/envs/torch-mlir/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "# standard imports\n",
-    "import torch\n",
-    "from torch_mlir.eager_mode import torch_mlir_tensor"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "outputs": [],
-   "source": [
-    "# eager mode imports\n",
-    "from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor\n",
-    "from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "The simplest way of using Eager Mode (through IREE) requires setting a \"backend\":"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "outputs": [],
-   "source": [
-    "torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(\"cpu\")"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "and wrapping all your `torch.Tensor`s:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n"
-     ]
-    }
-   ],
-   "source": [
-    "NUM_ITERS = 10\n",
-    "\n",
-    "t = torch.ones((10, 10))\n",
-    "u = 2 * torch.ones((10, 10))\n",
-    "\n",
-    "tt = TorchMLIRTensor(t)\n",
-    "print(tt)\n",
-    "uu = TorchMLIRTensor(u)\n",
-    "print(uu)"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "`TorchMLIRTensor` is a \"tensor wrapper subclass\" (more info [here](https://github.com/albanD/subclass_zoo)) that keeps the IREE `DeviceArray` in a field `elem`:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "for i in range(NUM_ITERS):\n",
-    "    yy = tt + uu\n",
-    "    print(type(yy))\n",
-    "    print(yy.elem.to_host())\n",
-    "    yy = tt * uu\n",
-    "    print(type(yy))\n",
-    "    print(yy.elem.to_host())"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "If you have a GPU (and CUDA installed) that works too (you can verify by having `watch -n1 nvidia-smi` up in a terminal while running the next cell):"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(\"gpu\")\n",
-    "\n",
-    "t = torch.ones((10, 10))\n",
-    "u = 2 * torch.ones((10, 10))\n",
-    "\n",
-    "tt = TorchMLIRTensor(t)\n",
-    "print(tt)\n",
-    "uu = TorchMLIRTensor(u)\n",
-    "print(uu)\n",
-    "\n",
-    "yy = tt + uu\n",
-    "print(yy.elem.to_host())\n",
-    "yy = tt * uu\n",
-    "print(yy.elem.to_host())"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "There is a convenience class `SharkEagerMode` that will handle both the installation of the backend and the wrapping of `torch.Tensor`s:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# eager mode RAII\n",
-    "from shark.shark_runner import SharkEagerMode\n",
-    "\n",
-    "shark_eager_mode = SharkEagerMode(\"cpu\")\n",
-    "\n",
-    "t = torch.ones((10, 10))\n",
-    "u = torch.ones((10, 10))\n",
-    "\n",
-    "print(t)\n",
-    "print(u)\n",
-    "\n",
-    "for i in range(NUM_ITERS):\n",
-    "    yy = t + u\n",
-    "    print(type(yy))\n",
-    "    print(yy.elem.to_host())\n",
-    "    yy = t * u\n",
-    "    print(type(yy))\n",
-    "    print(yy.elem.to_host())"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "The `SharkEagerMode` class is a hacky take on [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization) that defines a \"deleter\" that runs when an instantiation (of `SharkEagerMode`) is garbage collected. Takeaway is that if you want to turn off `SharkEagerMode`, or switch backends, you need to `del` the instance:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "del shark_eager_mode\n",
-    "shark_eager_mode = SharkEagerMode(\"cuda\")\n",
-    "\n",
-    "t = torch.ones((10, 10))\n",
-    "u = torch.ones((10, 10))\n",
-    "\n",
-    "print(t)\n",
-    "print(u)\n",
-    "\n",
-    "yy = t + u\n",
-    "print(type(yy))\n",
-    "print(yy.elem.to_host())\n",
-    "yy = t * u\n",
-    "print(type(yy))\n",
-    "print(yy.elem.to_host())"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
--- a/shark/examples/shark_eager/eager_mode.py
+++ b/shark/examples/shark_eager/eager_mode.py
@@ -1,148 +0,0 @@
-# Copyright 2020 The Nod Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from torch.utils.cpp_extension import load_inline, include_paths
-from torch_mlir.eager_mode import torch_mlir_tensor
-from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
-
-from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
-from shark.shark_runner import SharkEagerMode
-
-
-def test_cpu():
-    torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend("cpu")
-
-    t = torch.ones((10, 10), device="cpu")
-    u = 2 * torch.ones((10, 10), device="cpu")
-
-    tt = TorchMLIRTensor(t)
-    print(tt)
-    uu = TorchMLIRTensor(u)
-    print(uu)
-
-    for i in range(NUM_ITERS):
-        yy = tt + uu
-        print(type(yy))
-        print(yy.elem.to_host())
-        yy = tt * uu
-        print(type(yy))
-        print(yy.elem.to_host())
-
-
-def test_gpu():
-    source = """
-    #include <iostream>
-    #include "cuda.h"
-    #include "cuda_runtime_api.h"
-
-    using namespace std;
-
-    void print_free_mem() {
-        int num_gpus;
-        size_t free, total;
-        cudaSetDevice(0);
-        int id;
-        cudaGetDevice(&id);
-        cudaMemGetInfo(&free, &total);
-        cout << "GPU " << id << " memory: used=" << (total-free)/(1<<20) << endl;
-    }
-    """
-    gpu_stats = load_inline(
-        name="inline_extension",
-        cpp_sources=[source],
-        extra_include_paths=include_paths(cuda=True),
-        functions=["print_free_mem"],
-    )
-    torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend("gpu")
-
-    t = torch.ones((10, 10), device="cpu")
-    u = 2 * torch.ones((10, 10), device="cpu")
-
-    tt = TorchMLIRTensor(t)
-    print(tt)
-    uu = TorchMLIRTensor(u)
-    print(uu)
-
-    for i in range(NUM_ITERS):
-        yy = tt + uu
-        print(yy.elem.to_host())
-        yy = tt * uu
-        print(yy.elem.to_host())
-        gpu_stats.print_free_mem()
-
-
-def test_python_mode_ref_backend():
-    # hide this wherever you want?
-    _ = SharkEagerMode("refbackend")
-
-    t = torch.ones((10, 10), device="cpu")
-    u = torch.ones((10, 10), device="cpu")
-
-    print(t)
-    print(u)
-
-    for i in range(NUM_ITERS):
-        print(i)
-        yy = t + u
-        print(yy.elem)
-        yy = t * u
-        print(yy.elem)
-
-
-def test_python_mode_iree_cpu():
-    # hide this wherever you want?
-    _ = SharkEagerMode("cpu")
-
-    t = torch.ones((10, 10), device="cpu")
-    u = torch.ones((10, 10), device="cpu")
-
-    print(t)
-    print(u)
-
-    for i in range(NUM_ITERS):
-        yy = t + u
-        print(type(yy))
-        print(yy.elem.to_host())
-        yy = t * u
-        print(type(yy))
-        print(yy.elem.to_host())
-
-
-def test_python_mode_iree_gpu():
-    _ = SharkEagerMode("gpu")
-
-    t = torch.ones((10, 10), device="cpu")
-    u = torch.ones((10, 10), device="cpu")
-
-    print(t)
-    print(u)
-
-    for i in range(NUM_ITERS):
-        yy = t + u
-        print(type(yy))
-        print(yy.elem.to_host())
-        yy = t * u
-        print(type(yy))
-        print(yy.elem.to_host())
-
-
-if __name__ == "__main__":
-    NUM_ITERS = 10
-    test_cpu()
-    if torch.cuda.is_available():
-        test_gpu()
-    test_python_mode_ref_backend()
-    test_python_mode_iree_cpu()
-    test_python_mode_iree_gpu()
--- a/shark/examples/shark_eager/squeezenet_lockstep.py
+++ b/shark/examples/shark_eager/squeezenet_lockstep.py
@@ -1,73 +0,0 @@
-import torch
-import numpy as np
-
-model = torch.hub.load(
-    "pytorch/vision:v0.10.0", "squeezenet1_0", pretrained=True
-)
-model.eval()
-
-# from PIL import Image
-# from torchvision import transforms
-# import urllib
-#
-# url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
-# try: urllib.URLopener().retrieve(url, filename)
-# except: urllib.request.urlretrieve(url, filename)
-#
-#
-# input_image = Image.open(filename)
-# preprocess = transforms.Compose([
-#     transforms.Resize(256),
-#     transforms.CenterCrop(224),
-#     transforms.ToTensor(),
-#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-# ])
-# input_tensor = preprocess(input_image)
-# input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model
-# print(input_batch.shape) # size = [1, 3, 224, 224]
-
-# The above is code for generating sample inputs from an image. We can just use
-# random values for accuracy testing though
-input_batch = torch.randn(1, 3, 224, 224)
-
-
-# Focus on CPU for now
-if False and torch.cuda.is_available():
-    input_batch = input_batch.to("cuda")
-    model.to("cuda")
-
-with torch.no_grad():
-    output = model(input_batch)
-# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
-golden_confidences = output[0]
-# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
-golden_probabilities = torch.nn.functional.softmax(
-    golden_confidences, dim=0
-).numpy()
-
-golden_confidences = golden_confidences.numpy()
-
-from shark.torch_mlir_lockstep_tensor import TorchMLIRLockstepTensor
-
-input_detached_clone = input_batch.clone()
-eager_input_batch = TorchMLIRLockstepTensor(input_detached_clone)
-
-print("getting torch-mlir result")
-
-output = model(eager_input_batch)
-
-static_output = output.elem
-confidences = static_output[0]
-probabilities = torch.nn.functional.softmax(
-    torch.from_numpy(confidences), dim=0
-).numpy()
-
-print("The obtained result via shark is: ", confidences)
-print("The golden result is:", golden_confidences)
-
-np.testing.assert_allclose(
-    golden_confidences, confidences, rtol=1e-02, atol=1e-03
-)
-np.testing.assert_allclose(
-    golden_probabilities, probabilities, rtol=1e-02, atol=1e-03
-)
--- a/shark/examples/shark_inference/CLIPModel_tf.py
+++ b/shark/examples/shark_inference/CLIPModel_tf.py
@@ -1,65 +0,0 @@
-from PIL import Image
-import requests
-
-from transformers import CLIPProcessor, TFCLIPModel
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-
-# Create a set of inputs
-clip_vit_inputs = [
-    tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
-    tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
-    tf.TensorSpec(shape=[1, 3, 224, 224], dtype=tf.float32),
-]
-
-
-class CLIPModule(tf.Module):
-    def __init__(self):
-        super(CLIPModule, self).__init__()
-        self.m = TFCLIPModel.from_pretrained("openai/clip-vit-base-patch32")
-
-        self.m.predict = lambda x, y, z: self.m(
-            input_ids=x, attention_mask=y, pixel_values=z
-        )
-
-    @tf.function(input_signature=clip_vit_inputs, jit_compile=True)
-    def forward(self, input_ids, attention_mask, pixel_values):
-        return self.m.predict(
-            input_ids, attention_mask, pixel_values
-        ).logits_per_image
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
-
-    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
-    image = Image.open(requests.get(url, stream=True).raw)
-
-    inputs = processor(
-        text=["a photo of a cat", "a photo of a dog"],
-        images=image,
-        return_tensors="tf",
-        padding=True,
-    )
-
-    shark_module = SharkInference(
-        CLIPModule(),
-        (
-            inputs["input_ids"],
-            inputs["attention_mask"],
-            inputs["pixel_values"],
-        ),
-    )
-    shark_module.set_frontend("tensorflow")
-    shark_module.compile()
-
-    print(
-        shark_module.forward(
-            (
-                inputs["input_ids"],
-                inputs["attention_mask"],
-                inputs["pixel_values"],
-            )
-        )
-    )
--- a/shark/examples/shark_inference/ESRGAN/README.md
+++ b/shark/examples/shark_inference/ESRGAN/README.md
@@ -1,15 +0,0 @@
-## Running ESRGAN
-
-```
-1. pip install numpy opencv-python
-2. mkdir InputImages
-   (this is where all the input images will reside in)
-3. mkdir OutputImages
-   (this is where the model will generate all the images)
-4. mkdir models
-   (save the .pth checkpoint file here)
-5. python esrgan.py
-```
-
- Download [RRDB_ESRGAN_x4.pth](https://drive.google.com/drive/u/0/folders/17VYV_SoZZesU6mbxz2dMAIccSSlqLecY) and place it in the `models` directory as mentioned above in step 4.
- Credits : [ESRGAN](https://github.com/xinntao/ESRGAN)
--- a/shark/examples/shark_inference/ESRGAN/esrgan.py
+++ b/shark/examples/shark_inference/ESRGAN/esrgan.py
@@ -1,239 +0,0 @@
-from ast import arg
-import os.path as osp
-import glob
-import cv2
-import numpy as np
-import torch
-
-from torch.fx.experimental.proxy_tensor import make_fx
-from torch._decomp import get_decompositions
-from shark.shark_inference import SharkInference
-import torch_mlir
-import tempfile
-import functools
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-def make_layer(block, n_layers):
-    layers = []
-    for _ in range(n_layers):
-        layers.append(block())
-    return nn.Sequential(*layers)
-
-
-class ResidualDenseBlock_5C(nn.Module):
-    def __init__(self, nf=64, gc=32, bias=True):
-        super(ResidualDenseBlock_5C, self).__init__()
-        # gc: growth channel, i.e. intermediate channels
-        self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias)
-        self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias)
-        self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias)
-        self.conv4 = nn.Conv2d(nf + 3 * gc, gc, 3, 1, 1, bias=bias)
-        self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias=bias)
-        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
-
-        # initialization
-        # mutil.initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1)
-
-    def forward(self, x):
-        x1 = self.lrelu(self.conv1(x))
-        x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
-        x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
-        x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
-        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
-        return x5 * 0.2 + x
-
-
-class RRDB(nn.Module):
-    """Residual in Residual Dense Block"""
-
-    def __init__(self, nf, gc=32):
-        super(RRDB, self).__init__()
-        self.RDB1 = ResidualDenseBlock_5C(nf, gc)
-        self.RDB2 = ResidualDenseBlock_5C(nf, gc)
-        self.RDB3 = ResidualDenseBlock_5C(nf, gc)
-
-    def forward(self, x):
-        out = self.RDB1(x)
-        out = self.RDB2(out)
-        out = self.RDB3(out)
-        return out * 0.2 + x
-
-
-class RRDBNet(nn.Module):
-    def __init__(self, in_nc, out_nc, nf, nb, gc=32):
-        super(RRDBNet, self).__init__()
-        RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc)
-
-        self.conv_first = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True)
-        self.RRDB_trunk = make_layer(RRDB_block_f, nb)
-        self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        #### upsampling
-        self.upconv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        self.upconv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        self.HRconv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True)
-
-        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
-
-    def forward(self, x):
-        fea = self.conv_first(x)
-        trunk = self.trunk_conv(self.RRDB_trunk(fea))
-        fea = fea + trunk
-
-        fea = self.lrelu(
-            self.upconv1(F.interpolate(fea, scale_factor=2, mode="nearest"))
-        )
-        fea = self.lrelu(
-            self.upconv2(F.interpolate(fea, scale_factor=2, mode="nearest"))
-        )
-        out = self.conv_last(self.lrelu(self.HRconv(fea)))
-
-        return out
-
-
-############### Parsing args #####################
-import argparse
-
-p = argparse.ArgumentParser(
-    description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
-)
-
-p.add_argument("--device", type=str, default="cpu", help="the device to use")
-p.add_argument(
-    "--mlir_loc",
-    type=str,
-    default=None,
-    help="location of the model's mlir file",
-)
-args = p.parse_args()
-###################################################
-
-
-def inference(input_m):
-    return model(input_m)
-
-
-def load_mlir(mlir_loc):
-    import os
-
-    if mlir_loc == None:
-        return None
-    print(f"Trying to load the model from {mlir_loc}.")
-    with open(os.path.join(mlir_loc)) as f:
-        mlir_module = f.read()
-    return mlir_module
-
-
-def compile_through_fx(model, inputs, mlir_loc=None):
-    module = load_mlir(mlir_loc)
-    if module == None:
-        fx_g = make_fx(
-            model,
-            decomposition_table=get_decompositions(
-                [
-                    torch.ops.aten.embedding_dense_backward,
-                    torch.ops.aten.native_layer_norm_backward,
-                    torch.ops.aten.slice_backward,
-                    torch.ops.aten.select_backward,
-                    torch.ops.aten.norm.ScalarOpt_dim,
-                    torch.ops.aten.native_group_norm,
-                    torch.ops.aten.upsample_bilinear2d.vec,
-                    torch.ops.aten.split.Tensor,
-                    torch.ops.aten.split_with_sizes,
-                ]
-            ),
-        )(inputs)
-
-        fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
-        fx_g.recompile()
-
-        def strip_overloads(gm):
-            """
-            Modifies the target of graph nodes in :attr:`gm` to strip overloads.
-            Args:
-                gm(fx.GraphModule): The input Fx graph module to be modified
-            """
-            for node in gm.graph.nodes:
-                if isinstance(node.target, torch._ops.OpOverload):
-                    node.target = node.target.overloadpacket
-            gm.recompile()
-
-        strip_overloads(fx_g)
-
-        ts_g = torch.jit.script(fx_g)
-
-        print("Torchscript graph generated successfully")
-        module = torch_mlir.compile(
-            ts_g,
-            inputs,
-            torch_mlir.OutputType.LINALG_ON_TENSORS,
-            use_tracing=False,
-            verbose=False,
-        )
-
-    mlir_model = str(module)
-    func_name = "forward"
-    shark_module = SharkInference(
-        mlir_model, device=args.device, mlir_dialect="linalg"
-    )
-    shark_module.compile()
-
-    return shark_module
-
-
-model_path = "models/RRDB_ESRGAN_x4.pth"  # models/RRDB_ESRGAN_x4.pth OR models/RRDB_PSNR_x4.pth
-# device = torch.device('cuda')  # if you want to run on CPU, change 'cuda' -> cpu
-device = torch.device("cpu")
-
-test_img_folder = "InputImages/*"
-
-model = RRDBNet(3, 3, 64, 23, gc=32)
-model.load_state_dict(torch.load(model_path), strict=True)
-model.eval()
-model = model.to(device)
-
-print("Model path {:s}. \nTesting...".format(model_path))
-
-if __name__ == "__main__":
-    idx = 0
-    for path in glob.glob(test_img_folder):
-        idx += 1
-        base = osp.splitext(osp.basename(path))[0]
-        print(idx, base)
-        # read images
-        img = cv2.imread(path, cv2.IMREAD_COLOR)
-        img = img * 1.0 / 255
-        img = torch.from_numpy(
-            np.transpose(img[:, :, [2, 1, 0]], (2, 0, 1))
-        ).float()
-        img_LR = img.unsqueeze(0)
-        img_LR = img_LR.to(device)
-
-        with torch.no_grad():
-            shark_module = compile_through_fx(inference, img_LR)
-            shark_output = shark_module.forward((img_LR,))
-            shark_output = torch.from_numpy(shark_output)
-            shark_output = (
-                shark_output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
-            )
-            esrgan_output = (
-                model(img_LR).data.squeeze().float().cpu().clamp_(0, 1).numpy()
-            )
-        # SHARK OUTPUT
-        shark_output = np.transpose(shark_output[[2, 1, 0], :, :], (1, 2, 0))
-        shark_output = (shark_output * 255.0).round()
-        cv2.imwrite(
-            "OutputImages/{:s}_rlt_shark_output.png".format(base), shark_output
-        )
-        print("Generated SHARK's output")
-        # ESRGAN OUTPUT
-        esrgan_output = np.transpose(esrgan_output[[2, 1, 0], :, :], (1, 2, 0))
-        esrgan_output = (esrgan_output * 255.0).round()
-        cv2.imwrite(
-            "OutputImages/{:s}_rlt_esrgan_output.png".format(base),
-            esrgan_output,
-        )
-        print("Generated ESRGAN's output")
--- a/shark/examples/shark_inference/albert_maskfill_pt.py
+++ b/shark/examples/shark_inference/albert_maskfill_pt.py
@@ -1,86 +0,0 @@
-from transformers import AutoModelForMaskedLM, AutoTokenizer
-import torch
-from shark.shark_inference import SharkInference
-from shark.shark_importer import SharkImporter
-from iree.compiler import compile_str
-from iree import runtime as ireert
-import os
-import numpy as np
-
-MAX_SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-
-class AlbertModule(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.model = AutoModelForMaskedLM.from_pretrained("albert-base-v2")
-        self.model.eval()
-
-    def forward(self, input_ids, attention_mask):
-        return self.model(
-            input_ids=input_ids, attention_mask=attention_mask
-        ).logits
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
-    text = "This [MASK] is very tasty."
-    encoded_inputs = tokenizer(
-        text,
-        padding="max_length",
-        truncation=True,
-        max_length=MAX_SEQUENCE_LENGTH,
-        return_tensors="pt",
-    )
-    inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
-    mlir_importer = SharkImporter(
-        AlbertModule(),
-        inputs,
-        frontend="torch",
-    )
-    minilm_mlir, func_name = mlir_importer.import_mlir(
-        is_dynamic=False, tracing_required=True
-    )
-    shark_module = SharkInference(minilm_mlir)
-    shark_module.compile()
-    token_logits = torch.tensor(shark_module.forward(inputs))
-    mask_id = torch.where(
-        encoded_inputs["input_ids"] == tokenizer.mask_token_id
-    )[1]
-    mask_token_logits = token_logits[0, mask_id, :]
-    top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
-    for token in top_5_tokens:
-        print(
-            f"'>>> Sample/Warmup output: {text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
-        )
-    while True:
-        try:
-            new_text = input("Give me a sentence with [MASK] to fill: ")
-            encoded_inputs = tokenizer(
-                new_text,
-                padding="max_length",
-                truncation=True,
-                max_length=MAX_SEQUENCE_LENGTH,
-                return_tensors="pt",
-            )
-            inputs = (
-                encoded_inputs["input_ids"],
-                encoded_inputs["attention_mask"],
-            )
-            token_logits = torch.tensor(shark_module.forward(inputs))
-            mask_id = torch.where(
-                encoded_inputs["input_ids"] == tokenizer.mask_token_id
-            )[1]
-            mask_token_logits = token_logits[0, mask_id, :]
-            top_5_tokens = (
-                torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
-            )
-            for token in top_5_tokens:
-                print(
-                    f"'>>> {new_text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
-                )
-        except KeyboardInterrupt:
-            print("Exiting program.")
-            break
--- a/shark/examples/shark_inference/albert_maskfill_tf.py
+++ b/shark/examples/shark_inference/albert_maskfill_tf.py
@@ -1,100 +0,0 @@
-from PIL import Image
-import requests
-
-from transformers import TFAutoModelForMaskedLM, AutoTokenizer
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-from shark.shark_importer import SharkImporter
-from iree.compiler import tf as tfc
-from iree.compiler import compile_str
-from iree import runtime as ireert
-import os
-import numpy as np
-import sys
-
-MAX_SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-# Create a set of inputs
-t5_inputs = [
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-]
-
-
-class AlbertModule(tf.Module):
-    def __init__(self):
-        super(AlbertModule, self).__init__()
-        self.m = TFAutoModelForMaskedLM.from_pretrained("albert-base-v2")
-        self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)
-
-    @tf.function(input_signature=t5_inputs, jit_compile=True)
-    def forward(self, input_ids, attention_mask):
-        return self.m.predict(input_ids, attention_mask)
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
-    # text = "This is a great [MASK]."
-    text = "This [MASK] is very tasty."
-    encoded_inputs = tokenizer(
-        text,
-        padding="max_length",
-        truncation=True,
-        max_length=MAX_SEQUENCE_LENGTH,
-        return_tensors="tf",
-    )
-    inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
-    mlir_importer = SharkImporter(
-        AlbertModule(),
-        inputs,
-        frontend="tf",
-    )
-    minilm_mlir, func_name = mlir_importer.import_mlir(
-        is_dynamic=False, tracing_required=False
-    )
-    shark_module = SharkInference(minilm_mlir, mlir_dialect="mhlo")
-    shark_module.compile()
-    output_idx = 0
-    data_idx = 1
-    token_logits = shark_module.forward(inputs)[output_idx][data_idx]
-    mask_id = np.where(
-        tf.squeeze(encoded_inputs["input_ids"]) == tokenizer.mask_token_id
-    )
-    mask_token_logits = token_logits[0, mask_id, :]
-    top_5_tokens = np.flip(np.argsort(mask_token_logits)).squeeze()[0:5]
-    for token in top_5_tokens:
-        print(
-            f"'>>> Sample/Warmup output: {text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
-        )
-    while True:
-        try:
-            new_text = input("Give me a sentence with [MASK] to fill: ")
-            encoded_inputs = tokenizer(
-                new_text,
-                padding="max_length",
-                truncation=True,
-                max_length=MAX_SEQUENCE_LENGTH,
-                return_tensors="tf",
-            )
-            inputs = (
-                encoded_inputs["input_ids"],
-                encoded_inputs["attention_mask"],
-            )
-            token_logits = shark_module.forward(inputs)[output_idx][data_idx]
-            mask_id = np.where(
-                tf.squeeze(encoded_inputs["input_ids"])
-                == tokenizer.mask_token_id
-            )
-            mask_token_logits = token_logits[0, mask_id, :]
-            top_5_tokens = np.flip(np.argsort(mask_token_logits)).squeeze()[
-                0:5
-            ]
-            for token in top_5_tokens:
-                print(
-                    f"'>>> {new_text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
-                )
-        except KeyboardInterrupt:
-            print("Exiting program.")
-            sys.exit()
--- a/shark/examples/shark_inference/bloom_tank.py
+++ b/shark/examples/shark_inference/bloom_tank.py
@@ -1,14 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_model
-
-mlir_model, func_name, inputs, golden_out = download_model(
-    "bloom", frontend="torch"
-)
-
-shark_module = SharkInference(
-    mlir_model, device="cpu", mlir_dialect="tm_tensor"
-)
-shark_module.compile()
-result = shark_module.forward(inputs)
-print("The obtained result via shark is: ", result)
-print("The golden result is:", golden_out)
--- a/shark/examples/shark_inference/gpt2_tf.py
+++ b/shark/examples/shark_inference/gpt2_tf.py
@@ -1,40 +0,0 @@
-from PIL import Image
-import requests
-
-from transformers import GPT2Tokenizer, TFGPT2Model
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-
-# Create a set of inputs
-gpt2_inputs = [
-    tf.TensorSpec(shape=[1, 8], dtype=tf.int32),
-    tf.TensorSpec(shape=[1, 8], dtype=tf.int32),
-]
-
-
-class GPT2Module(tf.Module):
-    def __init__(self):
-        super(GPT2Module, self).__init__()
-        self.m = TFGPT2Model.from_pretrained("distilgpt2")
-
-        self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)
-
-    @tf.function(input_signature=gpt2_inputs, jit_compile=True)
-    def forward(self, input_ids, attention_mask):
-        return self.m.predict(input_ids, attention_mask)
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
-    text = "I love the distilled version of models."
-
-    inputs = tokenizer(text, return_tensors="tf")
-    shark_module = SharkInference(
-        GPT2Module(), (inputs["input_ids"], inputs["attention_mask"])
-    )
-    shark_module.set_frontend("tensorflow")
-    shark_module.compile()
-    print(
-        shark_module.forward((inputs["input_ids"], inputs["attention_mask"]))
-    )
--- a/shark/examples/shark_inference/llama/README.md
+++ b/shark/examples/shark_inference/llama/README.md
@@ -1,18 +0,0 @@
-# SHARK LLaMA
-
-## TORCH-MLIR Version
-
-```
-https://github.com/nod-ai/torch-mlir.git
-```
-Then check out the `complex` branch and `git submodule update --init` and then build with `.\build_tools\python_deploy\build_windows.ps1`
-
-### Setup & Run
-```
-git clone https://github.com/nod-ai/llama.git
-```
-Then in this repository
-```
-pip install -e .
-python llama/shark_model.py
-```
--- a/shark/examples/shark_inference/mega_test.py
+++ b/shark/examples/shark_inference/mega_test.py
@@ -1,72 +0,0 @@
-import torch
-import torch_mlir
-from shark.shark_inference import SharkInference
-from shark.shark_compile import shark_compile_through_fx
-from MEGABYTE_pytorch import MEGABYTE
-
-import os
-
-
-class MegaModel(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.model = MEGABYTE(
-            num_tokens=16000,  # number of tokens
-            dim=(
-                512,
-                256,
-            ),  # transformer model dimension (512 for coarsest, 256 for fine in this example)
-            max_seq_len=(
-                1024,
-                4,
-            ),  # sequence length for global and then local. this can be more than 2
-            depth=(
-                6,
-                4,
-            ),  # number of layers for global and then local. this can be more than 2, but length must match the max_seq_len's
-            dim_head=64,  # dimension per head
-            heads=8,  # number of attention heads
-            flash_attn=True,  # use flash attention
-        )
-
-    def forward(self, input):
-        return self.model(input)
-
-
-megaModel = MegaModel()
-inputs = [torch.randint(0, 16000, (1, 1024, 4))]
-
-# CURRENTLY IT BAILS OUT HERE BECAUSE OF MISSING OP LOWERINGS :-
-# 1. aten.alias
-shark_module, _ = shark_compile_through_fx(
-    model=megaModel,
-    inputs=inputs,
-    extended_model_name="mega_shark",
-    is_f16=False,
-    f16_input_mask=None,
-    save_dir=os.getcwd(),
-    debug=False,
-    generate_or_load_vmfb=True,
-    extra_args=[],
-    device="cuda",
-    mlir_dialect="tm_tensor",
-)
-# logits = model(x)
-
-
-def print_output_info(output, msg):
-    print("\n", msg)
-    print("\n\t", output.shape)
-
-
-ans = shark_module("forward", inputs)
-print_output_info(torch.from_numpy(ans), "SHARK's output")
-
-ans = megaModel.forward(*inputs)
-print_output_info(ans, "ORIGINAL Model's output")
-
-# and sample from the logits accordingly
-# or you can use the generate function
-
-# NEED TO LOOK AT THIS LATER IF REQUIRED IN SHARK.
-# sampled = model.generate(temperature = 0.9, filter_thres = 0.9) # (1, 1024, 4)
--- a/shark/examples/shark_inference/mhlo_example.py
+++ b/shark/examples/shark_inference/mhlo_example.py
@@ -1,31 +0,0 @@
-from shark.shark_inference import SharkInference
-import numpy as np
-
-mhlo_ir = r"""builtin.module  {
-      func.func @forward(%arg0: tensor<1x4xf32>, %arg1: tensor<4x1xf32>) -> tensor<4x4xf32> {
-        %0 = chlo.broadcast_add %arg0, %arg1 : (tensor<1x4xf32>, tensor<4x1xf32>) -> tensor<4x4xf32>
-        %1 = "mhlo.abs"(%0) : (tensor<4x4xf32>) -> tensor<4x4xf32>
-        return %1 : tensor<4x4xf32>
-      }
-}"""
-
-arg0 = np.ones((1, 4)).astype(np.float32)
-arg1 = np.ones((4, 1)).astype(np.float32)
-
-print("Running shark on cpu backend")
-shark_module = SharkInference(mhlo_ir, device="cpu", mlir_dialect="mhlo")
-
-# Generate the random inputs and feed into the graph.
-x = shark_module.generate_random_inputs()
-shark_module.compile()
-print(shark_module.forward(x))
-
-print("Running shark on cuda backend")
-shark_module = SharkInference(mhlo_ir, device="cuda", mlir_dialect="mhlo")
-shark_module.compile()
-print(shark_module.forward(x))
-
-print("Running shark on vulkan backend")
-shark_module = SharkInference(mhlo_ir, device="vulkan", mlir_dialect="mhlo")
-shark_module.compile()
-print(shark_module.forward(x))
--- a/shark/examples/shark_inference/minilm_benchmark.py
+++ b/shark/examples/shark_inference/minilm_benchmark.py
@@ -1,35 +0,0 @@
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from shark.shark_inference import SharkInference
-
-torch.manual_seed(0)
-tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
-
-
-class MiniLMSequenceClassification(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            "microsoft/MiniLM-L12-H384-uncased",  # The pretrained model.
-            num_labels=2,  # The number of output labels--2 for binary classification.
-            output_attentions=False,  # Whether the model returns attentions weights.
-            output_hidden_states=False,  # Whether the model returns all hidden-states.
-            torchscript=True,
-        )
-
-    def forward(self, tokens):
-        return self.model.forward(tokens)[0]
-
-
-test_input = torch.randint(2, (1, 128))
-
-shark_module = SharkInference(
-    MiniLMSequenceClassification(),
-    (test_input,),
-    jit_trace=True,
-    benchmark_mode=True,
-)
-
-shark_module.compile()
-shark_module.forward((test_input,))
-shark_module.benchmark_all((test_input,))
--- a/shark/examples/shark_inference/minilm_benchmark_tf.py
+++ b/shark/examples/shark_inference/minilm_benchmark_tf.py
@@ -1,61 +0,0 @@
-import tensorflow as tf
-from transformers import BertModel, BertTokenizer, TFBertModel
-from shark.shark_inference import SharkInference
-
-MAX_SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-# Create a set of 2-dimensional inputs
-bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-]
-
-
-class BertModule(tf.Module):
-    def __init__(self):
-        super(BertModule, self).__init__()
-        # Create a BERT trainer with the created network.
-        self.m = TFBertModel.from_pretrained(
-            "microsoft/MiniLM-L12-H384-uncased", from_pt=True
-        )
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m.predict = lambda x, y, z: self.m.call(
-            input_ids=x, attention_mask=y, token_type_ids=z, training=False
-        )
-
-    @tf.function(input_signature=bert_input, jit_compile=True)
-    def forward(self, input_ids, attention_mask, token_type_ids):
-        return self.m.predict(input_ids, attention_mask, token_type_ids)
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    tokenizer = BertTokenizer.from_pretrained(
-        "microsoft/MiniLM-L12-H384-uncased"
-    )
-    text = "Replace me by any text you'd like."
-    encoded_input = tokenizer(
-        text,
-        padding="max_length",
-        truncation=True,
-        max_length=MAX_SEQUENCE_LENGTH,
-    )
-    for key in encoded_input:
-        encoded_input[key] = tf.expand_dims(
-            tf.convert_to_tensor(encoded_input[key]), 0
-        )
-
-    test_input = (
-        encoded_input["input_ids"],
-        encoded_input["attention_mask"],
-        encoded_input["token_type_ids"],
-    )
-    shark_module = SharkInference(
-        BertModule(), test_input, benchmark_mode=True
-    )
-    shark_module.set_frontend("tensorflow")
-    shark_module.compile()
-    shark_module.benchmark_all(test_input)
--- a/shark/examples/shark_inference/minilm_jax.py
+++ b/shark/examples/shark_inference/minilm_jax.py
@@ -1,73 +0,0 @@
-from transformers import AutoTokenizer, FlaxAutoModel
-import torch
-import jax
-from typing import Union, Dict, List, Any
-import numpy as np
-from shark.shark_inference import SharkInference
-import io
-
-NumpyTree = Union[np.ndarray, Dict[str, np.ndarray], List[np.ndarray]]
-
-
-def convert_torch_tensor_tree_to_numpy(
-    tree: Union[torch.tensor, Dict[str, torch.tensor], List[torch.tensor]]
-) -> NumpyTree:
-    return jax.tree_util.tree_map(
-        lambda torch_tensor: torch_tensor.cpu().detach().numpy(), tree
-    )
-
-
-def convert_int64_to_int32(tree: NumpyTree) -> NumpyTree:
-    return jax.tree_util.tree_map(
-        lambda tensor: np.array(tensor, dtype=np.int32)
-        if tensor.dtype == np.int64
-        else tensor,
-        tree,
-    )
-
-
-def get_sample_input():
-    tokenizer = AutoTokenizer.from_pretrained(
-        "microsoft/MiniLM-L12-H384-uncased"
-    )
-    inputs_torch = tokenizer("Hello, World!", return_tensors="pt")
-    return convert_int64_to_int32(
-        convert_torch_tensor_tree_to_numpy(inputs_torch.data)
-    )
-
-
-def get_jax_model():
-    return FlaxAutoModel.from_pretrained("microsoft/MiniLM-L12-H384-uncased")
-
-
-def export_jax_to_mlir(jax_model: Any, sample_input: NumpyTree):
-    model_mlir = jax.jit(jax_model).lower(**sample_input).compiler_ir()
-    byte_stream = io.BytesIO()
-    model_mlir.operation.write_bytecode(file=byte_stream)
-    return byte_stream.getvalue()
-
-
-def assert_array_list_allclose(x, y, *args, **kwargs):
-    assert len(x) == len(y)
-    for a, b in zip(x, y):
-        np.testing.assert_allclose(
-            np.asarray(a), np.asarray(b), *args, **kwargs
-        )
-
-
-sample_input = get_sample_input()
-jax_model = get_jax_model()
-mlir = export_jax_to_mlir(jax_model, sample_input)
-
-# Compile and load module.
-shark_inference = SharkInference(mlir_module=mlir, mlir_dialect="mhlo")
-shark_inference.compile()
-
-# Run main function.
-result = shark_inference("main", jax.tree_util.tree_flatten(sample_input)[0])
-
-# Run JAX model.
-reference_result = jax.tree_util.tree_flatten(jax_model(**sample_input))[0]
-
-# Verify result.
-assert_array_list_allclose(result, reference_result, atol=1e-5)
--- a/shark/examples/shark_inference/minilm_jax_requirements.txt
+++ b/shark/examples/shark_inference/minilm_jax_requirements.txt
@@ -1,6 +0,0 @@
-flax
-jax[cpu]
-nodai-SHARK
-orbax
-transformers
-torch
--- a/shark/examples/shark_inference/minilm_jit.py
+++ b/shark/examples/shark_inference/minilm_jit.py
@@ -1,23 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_model
-
-
-mlir_model, func_name, inputs, golden_out = download_model(
-    "microsoft/MiniLM-L12-H384-uncased",
-    frontend="torch",
-)
-
-
-shark_module = SharkInference(mlir_model, device="cpu", mlir_dialect="linalg")
-shark_module.compile()
-result = shark_module.forward(inputs)
-print("The obtained result via shark is: ", result)
-print("The golden result is:", golden_out)
-
-
-# Let's generate random inputs, currently supported
-# for static models.
-rand_inputs = shark_module.generate_random_inputs()
-rand_results = shark_module.forward(rand_inputs)
-
-print("Running shark_module with random_inputs is: ", rand_results)
--- a/shark/examples/shark_inference/minilm_tf.py
+++ b/shark/examples/shark_inference/minilm_tf.py
@@ -1,70 +0,0 @@
-import tensorflow as tf
-from transformers import BertModel, BertTokenizer, TFBertModel
-from shark.shark_inference import SharkInference
-
-MAX_SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-# Create a set of 2-dimensional inputs
-bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-]
-
-
-class BertModule(tf.Module):
-    def __init__(self):
-        super(BertModule, self).__init__()
-        # Create a BERT trainer with the created network.
-        self.m = TFBertModel.from_pretrained(
-            "microsoft/MiniLM-L12-H384-uncased", from_pt=True
-        )
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m.predict = lambda x, y, z: self.m.call(
-            input_ids=x, attention_mask=y, token_type_ids=z, training=False
-        )
-
-    @tf.function(input_signature=bert_input, jit_compile=True)
-    def forward(self, input_ids, attention_mask, token_type_ids):
-        return self.m.predict(input_ids, attention_mask, token_type_ids)
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    tokenizer = BertTokenizer.from_pretrained(
-        "microsoft/MiniLM-L12-H384-uncased"
-    )
-    text = "Replace me by any text you'd like."
-    encoded_input = tokenizer(
-        text,
-        padding="max_length",
-        truncation=True,
-        max_length=MAX_SEQUENCE_LENGTH,
-    )
-    for key in encoded_input:
-        encoded_input[key] = tf.expand_dims(
-            tf.convert_to_tensor(encoded_input[key]), 0
-        )
-
-    shark_module = SharkInference(
-        BertModule(),
-        (
-            encoded_input["input_ids"],
-            encoded_input["attention_mask"],
-            encoded_input["token_type_ids"],
-        ),
-    )
-    shark_module.set_frontend("tensorflow")
-    shark_module.compile()
-
-    print(
-        shark_module.forward(
-            (
-                encoded_input["input_ids"],
-                encoded_input["attention_mask"],
-                encoded_input["token_type_ids"],
-            )
-        )
-    )
--- a/shark/examples/shark_inference/minilm_tf_gpu_config.json
+++ b/shark/examples/shark_inference/minilm_tf_gpu_config.json
--- a/shark/examples/shark_inference/resnest.py
+++ b/shark/examples/shark_inference/resnest.py
@@ -1,39 +0,0 @@
-import torch
-import torchvision.models as models
-from shark.shark_inference import SharkInference
-from shark.shark_importer import SharkImporter
-
-torch.hub.list("zhanghang1989/ResNeSt", force_reload=True)
-
-
-class ResnestModule(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.model = torch.hub.load(
-            "zhanghang1989/ResNeSt", "resnest50", pretrained=True
-        )
-        self.model.eval()
-
-    def forward(self, input):
-        return self.model.forward(input)
-
-
-input = torch.randn(1, 3, 224, 224)
-
-
-mlir_importer = SharkImporter(
-    ResnestModule(),
-    (input,),
-    frontend="torch",
-)
-
-(vision_mlir, func_name), inputs, golden_out = mlir_importer.import_debug(
-    tracing_required=True
-)
-
-print(golden_out)
-
-shark_module = SharkInference(vision_mlir, mlir_dialect="linalg")
-shark_module.compile()
-result = shark_module.forward((input,))
-print("Obtained result", result)
--- a/shark/examples/shark_inference/resnet50_fp16.py
+++ b/shark/examples/shark_inference/resnet50_fp16.py
@@ -1,74 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.parser import shark_args
-
-import torch
-import numpy as np
-import sys
-import torchvision.models as models
-import torch_mlir
-
-torch.manual_seed(0)
-
-
-class VisionModule(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.model = models.resnet50(pretrained=True)
-        self.train(False)
-
-    def forward(self, input):
-        return self.model.forward(input)
-
-
-model = VisionModule()
-test_input = torch.randn(1, 3, 224, 224)
-actual_out = model(test_input)
-
-test_input_fp16 = test_input.to(device=torch.device("cuda"), dtype=torch.half)
-model_fp16 = model.half()
-model_fp16.eval()
-model_fp16.to("cuda")
-actual_out_fp16 = model_fp16(test_input_fp16)
-
-ts_g = torch.jit.trace(model_fp16, [test_input_fp16])
-
-module = torch_mlir.compile(
-    ts_g,
-    (test_input_fp16),
-    torch_mlir.OutputType.LINALG_ON_TENSORS,
-    use_tracing=True,
-    verbose=False,
-)
-
-# from contextlib import redirect_stdout
-
-# with open('resnet50_fp16_linalg_ir.mlir', 'w') as f:
-#     with redirect_stdout(f):
-#         print(module.operation.get_asm())
-
-mlir_model = module
-func_name = "forward"
-
-shark_module = SharkInference(mlir_model, device="cuda", mlir_dialect="linalg")
-shark_module.compile()
-
-
-def shark_result(x):
-    x_ny = x.cpu().detach().numpy()
-    inputs = (x_ny,)
-    result = shark_module.forward(inputs)
-    return torch.from_numpy(result)
-
-
-observed_out = shark_result(test_input_fp16)
-
-print("Golden result:", actual_out_fp16)
-print("SHARK result:", observed_out)
-
-actual_out_fp16 = actual_out_fp16.to(device=torch.device("cpu"))
-
-print(
-    torch.testing.assert_allclose(
-        actual_out_fp16, observed_out, rtol=1e-2, atol=1e-2
-    )
-)
--- a/shark/examples/shark_inference/resnet50_script.py
+++ b/shark/examples/shark_inference/resnet50_script.py
@@ -1,85 +0,0 @@
-from PIL import Image
-import requests
-import torch
-import torchvision.models as models
-from torchvision import transforms
-import sys
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_model
-
-
-################################## Preprocessing inputs and model ############
-def load_and_preprocess_image(url: str):
-    headers = {
-        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
-    }
-    img = Image.open(
-        requests.get(url, headers=headers, stream=True).raw
-    ).convert("RGB")
-    # preprocessing pipeline
-    preprocess = transforms.Compose(
-        [
-            transforms.Resize(256),
-            transforms.CenterCrop(224),
-            transforms.ToTensor(),
-            transforms.Normalize(
-                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
-            ),
-        ]
-    )
-    img_preprocessed = preprocess(img)
-    return torch.unsqueeze(img_preprocessed, 0)
-
-
-def load_labels():
-    classes_text = requests.get(
-        "https://raw.githubusercontent.com/cathyzhyi/ml-data/main/imagenet-classes.txt",
-        stream=True,
-    ).text
-    labels = [line.strip() for line in classes_text.splitlines()]
-    return labels
-
-
-def top3_possibilities(res):
-    _, indexes = torch.sort(res, descending=True)
-    percentage = torch.nn.functional.softmax(res, dim=1)[0] * 100
-    top3 = [(labels[idx], percentage[idx].item()) for idx in indexes[0][:3]]
-    return top3
-
-
-class Resnet50Module(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.resnet = models.resnet50(pretrained=True)
-        self.train(False)
-
-    def forward(self, img):
-        return self.resnet.forward(img)
-
-
-image_url = "https://upload.wikimedia.org/wikipedia/commons/2/26/YellowLabradorLooking_new.jpg"
-print("load image from " + image_url, file=sys.stderr)
-img = load_and_preprocess_image(image_url)
-labels = load_labels()
-
-##############################################################################
-
-
-## Can pass any img or input to the forward module.
-mlir_model, func_name, inputs, golden_out = download_model(
-    "resnet50", frontend="torch"
-)
-
-shark_module = SharkInference(mlir_model, mlir_dialect="linalg")
-shark_module.compile()
-path = shark_module.save_module()
-shark_module.load_module(path)
-result = shark_module("forward", (img.detach().numpy(),))
-
-print("The top 3 results obtained via shark_runner is:")
-print(top3_possibilities(torch.from_numpy(result)))
-
-print()
-
-print("The top 3 results obtained via torch is:")
-print(top3_possibilities(Resnet50Module()(img)))
--- a/shark/examples/shark_inference/sharded_bloom.py
+++ b/shark/examples/shark_inference/sharded_bloom.py
@@ -1,842 +0,0 @@
-####################################################################################
-# Please make sure you have transformers 4.21.2 installed before running this demo
-#
-# -p --model_path: the directory in which you want to store the bloom files.
-# -dl --device_list: the list of device indices you want to use.  if you want to only use the first device, or you are running on cpu leave this blank.
-#                     Otherwise, please give this argument in this format: "[0, 1, 2]"
-# -de --device: the device you want to run bloom on.  E.G. cpu, cuda
-# -c, --recompile: set to true if you want to recompile to vmfb.
-# -d, --download: set to true if you want to redownload the mlir files
-# -cm, --create_mlirs: set to true if you want to create the mlir files from scratch.  please make sure you have transformers 4.21.2 before using this option
-# -t --token_count: the number of tokens you want to generate
-# -pr --prompt: the prompt you want to feed to the model
-# -m --model_name: the name of the model, e.g. bloom-560m
-#
-# If you don't specify a prompt when you run this example, you will be able to give prompts through the terminal.  Run the
-# example in this way if you want to run multiple examples without reinitializing the model
-#####################################################################################
-
-import os
-import io
-import torch
-import torch.nn as nn
-from collections import OrderedDict
-import torch_mlir
-from torch_mlir import TensorPlaceholder
-import re
-from transformers.models.bloom.configuration_bloom import BloomConfig
-import json
-import sys
-import argparse
-import json
-import urllib.request
-import subprocess
-
-from torch.fx.experimental.proxy_tensor import make_fx
-from torch._decomp import get_decompositions
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_public_file
-from transformers import (
-    BloomTokenizerFast,
-    BloomForSequenceClassification,
-    BloomForCausalLM,
-)
-from transformers.models.bloom.modeling_bloom import (
-    BloomBlock,
-    build_alibi_tensor,
-)
-
-IS_CUDA = False
-
-
-class ShardedBloom:
-    def __init__(self, src_folder):
-        f = open(f"{src_folder}/config.json")
-        config = json.load(f)
-        f.close()
-
-        self.layers_initialized = False
-
-        self.src_folder = src_folder
-        try:
-            self.n_embed = config["n_embed"]
-        except KeyError:
-            self.n_embed = config["hidden_size"]
-        self.vocab_size = config["vocab_size"]
-        self.n_layer = config["n_layer"]
-        try:
-            self.n_head = config["num_attention_heads"]
-        except KeyError:
-            self.n_head = config["n_head"]
-
-    def _init_layer(self, layer_name, device, replace, device_idx):
-        if replace or not os.path.exists(
-            f"{self.src_folder}/{layer_name}.vmfb"
-        ):
-            f_ = open(f"{self.src_folder}/{layer_name}.mlir", encoding="utf-8")
-            module = f_.read()
-            f_.close()
-            module = bytes(module, "utf-8")
-            shark_module = SharkInference(
-                module,
-                device=device,
-                mlir_dialect="tm_tensor",
-                device_idx=device_idx,
-            )
-            shark_module.save_module(
-                module_name=f"{self.src_folder}/{layer_name}",
-                extra_args=[
-                    "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
-                    "--iree-stream-resource-max-allocation-size=1000000000",
-                    "--iree-codegen-check-ir-before-llvm-conversion=false",
-                ],
-            )
-        else:
-            shark_module = SharkInference(
-                "",
-                device=device,
-                mlir_dialect="tm_tensor",
-                device_idx=device_idx,
-            )
-
-        return shark_module
-
-    def init_layers(self, device, replace=False, device_idx=[0]):
-        if device_idx is not None:
-            n_devices = len(device_idx)
-
-        self.word_embeddings_module = self._init_layer(
-            "word_embeddings",
-            device,
-            replace,
-            device_idx if device_idx is None else device_idx[0 % n_devices],
-        )
-        self.word_embeddings_layernorm_module = self._init_layer(
-            "word_embeddings_layernorm",
-            device,
-            replace,
-            device_idx if device_idx is None else device_idx[1 % n_devices],
-        )
-        self.ln_f_module = self._init_layer(
-            "ln_f",
-            device,
-            replace,
-            device_idx if device_idx is None else device_idx[2 % n_devices],
-        )
-        self.lm_head_module = self._init_layer(
-            "lm_head",
-            device,
-            replace,
-            device_idx if device_idx is None else device_idx[3 % n_devices],
-        )
-        self.block_modules = [
-            self._init_layer(
-                f"bloom_block_{i}",
-                device,
-                replace,
-                device_idx
-                if device_idx is None
-                else device_idx[(i + 4) % n_devices],
-            )
-            for i in range(self.n_layer)
-        ]
-
-        self.layers_initialized = True
-
-    def load_layers(self):
-        assert self.layers_initialized
-
-        self.word_embeddings_module.load_module(
-            f"{self.src_folder}/word_embeddings.vmfb"
-        )
-        self.word_embeddings_layernorm_module.load_module(
-            f"{self.src_folder}/word_embeddings_layernorm.vmfb"
-        )
-        for block_module, i in zip(self.block_modules, range(self.n_layer)):
-            block_module.load_module(f"{self.src_folder}/bloom_block_{i}.vmfb")
-        self.ln_f_module.load_module(f"{self.src_folder}/ln_f.vmfb")
-        self.lm_head_module.load_module(f"{self.src_folder}/lm_head.vmfb")
-
-    def forward_pass(self, input_ids, device):
-        if IS_CUDA:
-            cudaSetDevice(self.word_embeddings_module.device_idx)
-
-        input_embeds = self.word_embeddings_module(
-            inputs=(input_ids,), function_name="forward"
-        )
-
-        input_embeds = torch.tensor(input_embeds).float()
-        if IS_CUDA:
-            cudaSetDevice(self.word_embeddings_layernorm_module.device_idx)
-        hidden_states = self.word_embeddings_layernorm_module(
-            inputs=(input_embeds,), function_name="forward"
-        )
-
-        hidden_states = torch.tensor(hidden_states).float()
-
-        attention_mask = torch.ones(
-            [hidden_states.shape[0], len(input_ids[0])]
-        )
-        alibi = build_alibi_tensor(
-            attention_mask,
-            self.n_head,
-            hidden_states.dtype,
-            hidden_states.device,
-        )
-
-        causal_mask = _prepare_attn_mask(
-            attention_mask, input_ids.size(), input_embeds, 0
-        )
-        causal_mask = torch.tensor(causal_mask).float()
-
-        presents = ()
-        all_hidden_states = tuple(hidden_states)
-
-        for block_module, i in zip(self.block_modules, range(self.n_layer)):
-            if IS_CUDA:
-                cudaSetDevice(block_module.device_idx)
-
-            output = block_module(
-                inputs=(
-                    hidden_states.detach().numpy(),
-                    alibi.detach().numpy(),
-                    causal_mask.detach().numpy(),
-                ),
-                function_name="forward",
-            )
-            hidden_states = torch.tensor(output[0]).float()
-            all_hidden_states = all_hidden_states + (hidden_states,)
-            presents = presents + (
-                tuple(
-                    (
-                        output[1],
-                        output[2],
-                    )
-                ),
-            )
-        if IS_CUDA:
-            cudaSetDevice(self.ln_f_module.device_idx)
-
-        hidden_states = self.ln_f_module(
-            inputs=(hidden_states,), function_name="forward"
-        )
-        if IS_CUDA:
-            cudaSetDevice(self.lm_head_module.device_idx)
-
-        logits = self.lm_head_module(
-            inputs=(hidden_states,), function_name="forward"
-        )
-        logits = torch.tensor(logits).float()
-
-        return torch.argmax(logits[:, -1, :], dim=-1)
-
-
-def _make_causal_mask(
-    input_ids_shape: torch.Size,
-    dtype: torch.dtype,
-    past_key_values_length: int = 0,
-):
-    """
-    Make causal mask used for bi-directional self-attention.
-    """
-    batch_size, target_length = input_ids_shape
-    mask = torch.full((target_length, target_length), torch.finfo(dtype).min)
-    mask_cond = torch.arange(mask.size(-1))
-    intermediate_mask = mask_cond < (mask_cond + 1).view(mask.size(-1), 1)
-    mask.masked_fill_(intermediate_mask, 0)
-    mask = mask.to(dtype)
-
-    if past_key_values_length > 0:
-        mask = torch.cat(
-            [
-                torch.zeros(
-                    target_length, past_key_values_length, dtype=dtype
-                ),
-                mask,
-            ],
-            dim=-1,
-        )
-    expanded_mask = mask[None, None, :, :].expand(
-        batch_size, 1, target_length, target_length + past_key_values_length
-    )
-    return expanded_mask
-
-
-def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: int = None):
-    """
-    Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
-    """
-    batch_size, source_length = mask.size()
-    tgt_len = tgt_len if tgt_len is not None else source_length
-
-    expanded_mask = (
-        mask[:, None, None, :]
-        .expand(batch_size, 1, tgt_len, source_length)
-        .to(dtype)
-    )
-
-    inverted_mask = 1.0 - expanded_mask
-
-    return inverted_mask.masked_fill(
-        inverted_mask.to(torch.bool), torch.finfo(dtype).min
-    )
-
-
-def _prepare_attn_mask(
-    attention_mask, input_shape, inputs_embeds, past_key_values_length
-):
-    # create causal mask
-    # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-    combined_attention_mask = None
-    if input_shape[-1] > 1:
-        combined_attention_mask = _make_causal_mask(
-            input_shape,
-            inputs_embeds.dtype,
-            past_key_values_length=past_key_values_length,
-        ).to(attention_mask.device)
-
-    if attention_mask is not None:
-        # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-        expanded_attn_mask = _expand_mask(
-            attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]
-        )
-        combined_attention_mask = (
-            expanded_attn_mask
-            if combined_attention_mask is None
-            else expanded_attn_mask + combined_attention_mask
-        )
-
-    return combined_attention_mask
-
-
-def download_model(destination_folder, model_name):
-    download_public_file(
-        f"gs://shark_tank/sharded_bloom/{model_name}/", destination_folder
-    )
-
-
-def compile_embeddings(embeddings_layer, input_ids, path):
-    input_ids_placeholder = torch_mlir.TensorPlaceholder.like(
-        input_ids, dynamic_axes=[1]
-    )
-    module = torch_mlir.compile(
-        embeddings_layer,
-        (input_ids_placeholder),
-        torch_mlir.OutputType.LINALG_ON_TENSORS,
-        use_tracing=False,
-        verbose=False,
-    )
-
-    bytecode_stream = io.BytesIO()
-    module.operation.write_bytecode(bytecode_stream)
-    bytecode = bytecode_stream.getvalue()
-
-    f_ = open(path, "w+")
-    f_.write(str(module))
-    f_.close()
-    return
-
-
-def compile_word_embeddings_layernorm(
-    embeddings_layer_layernorm, embeds, path
-):
-    embeds_placeholder = torch_mlir.TensorPlaceholder.like(
-        embeds, dynamic_axes=[1]
-    )
-    module = torch_mlir.compile(
-        embeddings_layer_layernorm,
-        (embeds_placeholder),
-        torch_mlir.OutputType.LINALG_ON_TENSORS,
-        use_tracing=False,
-        verbose=False,
-    )
-
-    bytecode_stream = io.BytesIO()
-    module.operation.write_bytecode(bytecode_stream)
-    bytecode = bytecode_stream.getvalue()
-
-    f_ = open(path, "w+")
-    f_.write(str(module))
-    f_.close()
-    return
-
-
-def strip_overloads(gm):
-    """
-    Modifies the target of graph nodes in :attr:`gm` to strip overloads.
-    Args:
-        gm(fx.GraphModule): The input Fx graph module to be modified
-    """
-    for node in gm.graph.nodes:
-        if isinstance(node.target, torch._ops.OpOverload):
-            node.target = node.target.overloadpacket
-    gm.recompile()
-
-
-def compile_to_mlir(
-    bblock,
-    hidden_states,
-    layer_past=None,
-    attention_mask=None,
-    head_mask=None,
-    use_cache=None,
-    output_attentions=False,
-    alibi=None,
-    block_index=0,
-    path=".",
-):
-    fx_g = make_fx(
-        bblock,
-        decomposition_table=get_decompositions(
-            [
-                torch.ops.aten.split.Tensor,
-                torch.ops.aten.split_with_sizes,
-            ]
-        ),
-        tracing_mode="real",
-        _allow_non_fake_inputs=False,
-    )(hidden_states, alibi, attention_mask)
-
-    fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
-    fx_g.recompile()
-
-    strip_overloads(fx_g)
-
-    hidden_states_placeholder = TensorPlaceholder.like(
-        hidden_states, dynamic_axes=[1]
-    )
-    attention_mask_placeholder = TensorPlaceholder.like(
-        attention_mask, dynamic_axes=[2, 3]
-    )
-    alibi_placeholder = TensorPlaceholder.like(alibi, dynamic_axes=[2])
-
-    ts_g = torch.jit.script(fx_g)
-
-    module = torch_mlir.compile(
-        ts_g,
-        (
-            hidden_states_placeholder,
-            alibi_placeholder,
-            attention_mask_placeholder,
-        ),
-        torch_mlir.OutputType.LINALG_ON_TENSORS,
-        use_tracing=False,
-        verbose=False,
-    )
-
-    module_placeholder = module
-    module_context = module_placeholder.context
-
-    def check_valid_line(line, line_n, mlir_file_len):
-        if "private" in line:
-            return False
-        if "attributes" in line:
-            return False
-        if mlir_file_len - line_n == 2:
-            return False
-
-        return True
-
-    mlir_file_len = len(str(module).split("\n"))
-
-    def remove_constant_dim(line):
-        if "17x" in line:
-            line = re.sub("17x", "?x", line)
-            line = re.sub("tensor.empty\(\)", "tensor.empty(%dim)", line)
-        if "tensor.empty" in line and "?x?" in line:
-            line = re.sub(
-                "tensor.empty\(%dim\)", "tensor.empty(%dim, %dim)", line
-            )
-        if "arith.cmpi eq" in line:
-            line = re.sub("c17", "dim", line)
-        if " 17," in line:
-            line = re.sub(" 17,", " %dim,", line)
-        return line
-
-    module = "\n".join(
-        [
-            remove_constant_dim(line)
-            for line, line_n in zip(
-                str(module).split("\n"), range(mlir_file_len)
-            )
-            if check_valid_line(line, line_n, mlir_file_len)
-        ]
-    )
-
-    module = module_placeholder.parse(module, context=module_context)
-    bytecode_stream = io.BytesIO()
-    module.operation.write_bytecode(bytecode_stream)
-    bytecode = bytecode_stream.getvalue()
-
-    f_ = open(path, "w+")
-    f_.write(str(module))
-    f_.close()
-    return
-
-
-def compile_ln_f(ln_f, hidden_layers, path):
-    hidden_layers_placeholder = torch_mlir.TensorPlaceholder.like(
-        hidden_layers, dynamic_axes=[1]
-    )
-    module = torch_mlir.compile(
-        ln_f,
-        (hidden_layers_placeholder),
-        torch_mlir.OutputType.LINALG_ON_TENSORS,
-        use_tracing=False,
-        verbose=False,
-    )
-
-    bytecode_stream = io.BytesIO()
-    module.operation.write_bytecode(bytecode_stream)
-    bytecode = bytecode_stream.getvalue()
-
-    f_ = open(path, "w+")
-    f_.write(str(module))
-    f_.close()
-    return
-
-
-def compile_lm_head(lm_head, hidden_layers, path):
-    hidden_layers_placeholder = torch_mlir.TensorPlaceholder.like(
-        hidden_layers, dynamic_axes=[1]
-    )
-    module = torch_mlir.compile(
-        lm_head,
-        (hidden_layers_placeholder),
-        torch_mlir.OutputType.LINALG_ON_TENSORS,
-        use_tracing=False,
-        verbose=False,
-    )
-
-    bytecode_stream = io.BytesIO()
-    module.operation.write_bytecode(bytecode_stream)
-    bytecode = bytecode_stream.getvalue()
-
-    f_ = open(path, "w+")
-    f_.write(str(module))
-    f_.close()
-    return
-
-
-def create_mlirs(destination_folder, model_name):
-    model_config = "bigscience/" + model_name
-    sample_input_ids = torch.ones([1, 17], dtype=torch.int64)
-
-    urllib.request.urlretrieve(
-        f"https://huggingface.co/bigscience/{model_name}/resolve/main/config.json",
-        filename=f"{destination_folder}/config.json",
-    )
-    urllib.request.urlretrieve(
-        f"https://huggingface.co/bigscience/bloom/resolve/main/tokenizer.json",
-        filename=f"{destination_folder}/tokenizer.json",
-    )
-
-    class HuggingFaceLanguage(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.model = BloomForCausalLM.from_pretrained(model_config)
-
-        def forward(self, tokens):
-            return self.model.forward(tokens)[0]
-
-    class HuggingFaceBlock(torch.nn.Module):
-        def __init__(self, block):
-            super().__init__()
-            self.model = block
-
-        def forward(self, tokens, alibi, attention_mask):
-            output = self.model(
-                hidden_states=tokens,
-                alibi=alibi,
-                attention_mask=attention_mask,
-                use_cache=True,
-                output_attentions=False,
-            )
-            return (output[0], output[1][0], output[1][1])
-
-    model = HuggingFaceLanguage()
-
-    compile_embeddings(
-        model.model.transformer.word_embeddings,
-        sample_input_ids,
-        f"{destination_folder}/word_embeddings.mlir",
-    )
-
-    inputs_embeds = model.model.transformer.word_embeddings(sample_input_ids)
-
-    compile_word_embeddings_layernorm(
-        model.model.transformer.word_embeddings_layernorm,
-        inputs_embeds,
-        f"{destination_folder}/word_embeddings_layernorm.mlir",
-    )
-
-    hidden_states = model.model.transformer.word_embeddings_layernorm(
-        inputs_embeds
-    )
-
-    input_shape = sample_input_ids.size()
-
-    current_sequence_length = hidden_states.shape[1]
-    past_key_values_length = 0
-    past_key_values = tuple([None] * len(model.model.transformer.h))
-
-    attention_mask = torch.ones(
-        (hidden_states.shape[0], current_sequence_length), device="cpu"
-    )
-
-    alibi = build_alibi_tensor(
-        attention_mask,
-        model.model.transformer.n_head,
-        hidden_states.dtype,
-        "cpu",
-    )
-
-    causal_mask = _prepare_attn_mask(
-        attention_mask, input_shape, inputs_embeds, past_key_values_length
-    )
-
-    head_mask = model.model.transformer.get_head_mask(
-        None, model.model.transformer.config.n_layer
-    )
-    output_attentions = model.model.transformer.config.output_attentions
-
-    all_hidden_states = ()
-
-    for i, (block, layer_past) in enumerate(
-        zip(model.model.transformer.h, past_key_values)
-    ):
-        all_hidden_states = all_hidden_states + (hidden_states,)
-
-        proxy_model = HuggingFaceBlock(block)
-
-        compile_to_mlir(
-            proxy_model,
-            hidden_states,
-            layer_past=layer_past,
-            attention_mask=causal_mask,
-            head_mask=head_mask[i],
-            use_cache=True,
-            output_attentions=output_attentions,
-            alibi=alibi,
-            block_index=i,
-            path=f"{destination_folder}/bloom_block_{i}.mlir",
-        )
-
-    compile_ln_f(
-        model.model.transformer.ln_f,
-        hidden_states,
-        f"{destination_folder}/ln_f.mlir",
-    )
-    hidden_states = model.model.transformer.ln_f(hidden_states)
-    compile_lm_head(
-        model.model.lm_head,
-        hidden_states,
-        f"{destination_folder}/lm_head.mlir",
-    )
-
-
-def run_large_model(
-    token_count,
-    recompile,
-    model_path,
-    prompt,
-    device_list,
-    script_path,
-    device,
-):
-    f = open(f"{model_path}/prompt.txt", "w+")
-    f.write(prompt)
-    f.close()
-    for i in range(token_count):
-        if i == 0:
-            will_compile = recompile
-        else:
-            will_compile = False
-            f = open(f"{model_path}/prompt.txt", "r")
-            prompt = f.read()
-            f.close()
-
-        subprocess.run(
-            [
-                "python",
-                script_path,
-                model_path,
-                "start",
-                str(will_compile),
-                "cpu",
-                "None",
-                prompt,
-            ]
-        )
-        for i in range(config["n_layer"]):
-            if device_list is not None:
-                device_idx = str(device_list[i % len(device_list)])
-            else:
-                device_idx = "None"
-            subprocess.run(
-                [
-                    "python",
-                    script_path,
-                    model_path,
-                    str(i),
-                    str(will_compile),
-                    device,
-                    device_idx,
-                    prompt,
-                ]
-            )
-        subprocess.run(
-            [
-                "python",
-                script_path,
-                model_path,
-                "end",
-                str(will_compile),
-                "cpu",
-                "None",
-                prompt,
-            ]
-        )
-
-    f = open(f"{model_path}/prompt.txt", "r")
-    output = f.read()
-    f.close()
-    print(output)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(prog="Bloom-560m")
-    parser.add_argument("-p", "--model_path")
-    parser.add_argument("-dl", "--device_list", default=None)
-    parser.add_argument("-de", "--device", default="cpu")
-    parser.add_argument("-c", "--recompile", default=False, type=bool)
-    parser.add_argument("-d", "--download", default=False, type=bool)
-    parser.add_argument("-t", "--token_count", default=10, type=int)
-    parser.add_argument("-m", "--model_name", default="bloom-560m")
-    parser.add_argument("-cm", "--create_mlirs", default=False, type=bool)
-
-    parser.add_argument(
-        "-lm", "--large_model_memory_efficient", default=False, type=bool
-    )
-
-    parser.add_argument(
-        "-pr",
-        "--prompt",
-        default=None,
-    )
-    args = parser.parse_args()
-
-    if args.create_mlirs and args.large_model_memory_efficient:
-        print(
-            "Warning: If you need to use memory efficient mode, you probably want to use 'download' instead"
-        )
-
-    if not os.path.isdir(args.model_path):
-        os.mkdir(args.model_path)
-
-    if args.device_list is not None:
-        args.device_list = json.loads(args.device_list)
-
-    if args.device == "cuda" and args.device_list is not None:
-        IS_CUDA = True
-        from cuda.cudart import cudaSetDevice
-    if args.download and args.create_mlirs:
-        print(
-            "WARNING: It is not advised to turn on both download and create_mlirs"
-        )
-    if args.download:
-        download_model(args.model_path, args.model_name)
-    if args.create_mlirs:
-        create_mlirs(args.model_path, args.model_name)
-    from transformers import AutoTokenizer, AutoModelForCausalLM, BloomConfig
-
-    tokenizer = AutoTokenizer.from_pretrained(args.model_path)
-    if args.prompt is not None:
-        input_ids = tokenizer.encode(args.prompt, return_tensors="pt")
-
-    if args.large_model_memory_efficient:
-        f = open(f"{args.model_path}/config.json")
-        config = json.load(f)
-        f.close()
-
-        self_path = os.path.dirname(os.path.abspath(__file__))
-        script_path = os.path.join(self_path, "sharded_bloom_large_models.py")
-
-        if args.prompt is not None:
-            run_large_model(
-                args.token_count,
-                args.recompile,
-                args.model_path,
-                args.prompt,
-                args.device_list,
-                script_path,
-                args.device,
-            )
-
-        else:
-            while True:
-                prompt = input("Enter Prompt: ")
-                try:
-                    token_count = int(
-                        input("Enter number of tokens you want to generate: ")
-                    )
-                except:
-                    print(
-                        "Invalid integer entered.  Using default value of 10"
-                    )
-                    token_count = 10
-
-                run_large_model(
-                    token_count,
-                    args.recompile,
-                    args.model_path,
-                    prompt,
-                    args.device_list,
-                    script_path,
-                    args.device,
-                )
-
-    else:
-        shardedbloom = ShardedBloom(args.model_path)
-        shardedbloom.init_layers(
-            device=args.device,
-            replace=args.recompile,
-            device_idx=args.device_list,
-        )
-        shardedbloom.load_layers()
-
-        if args.prompt is not None:
-            for _ in range(args.token_count):
-                next_token = shardedbloom.forward_pass(
-                    torch.tensor(input_ids), device=args.device
-                )
-                input_ids = torch.cat(
-                    [input_ids, next_token.unsqueeze(-1)], dim=-1
-                )
-
-            print(tokenizer.decode(input_ids.squeeze()))
-
-        else:
-            while True:
-                prompt = input("Enter Prompt: ")
-                try:
-                    token_count = int(
-                        input("Enter number of tokens you want to generate: ")
-                    )
-                except:
-                    print(
-                        "Invalid integer entered.  Using default value of 10"
-                    )
-                    token_count = 10
-
-                input_ids = tokenizer.encode(prompt, return_tensors="pt")
-
-                for _ in range(token_count):
-                    next_token = shardedbloom.forward_pass(
-                        torch.tensor(input_ids), device=args.device
-                    )
-                    input_ids = torch.cat(
-                        [input_ids, next_token.unsqueeze(-1)], dim=-1
-                    )
-
-                print(tokenizer.decode(input_ids.squeeze()))
--- a/shark/examples/shark_inference/sharded_bloom_large_models.py
+++ b/shark/examples/shark_inference/sharded_bloom_large_models.py
@@ -1,381 +0,0 @@
-import sys
-import os
-from transformers import AutoTokenizer, AutoModelForCausalLM, BloomConfig
-import re
-from shark.shark_inference import SharkInference
-import torch
-import torch.nn as nn
-from collections import OrderedDict
-from transformers.models.bloom.modeling_bloom import (
-    BloomBlock,
-    build_alibi_tensor,
-)
-import time
-import json
-
-
-def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: int = None):
-    """
-    Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
-    """
-    batch_size, source_length = mask.size()
-    tgt_len = tgt_len if tgt_len is not None else source_length
-
-    expanded_mask = (
-        mask[:, None, None, :]
-        .expand(batch_size, 1, tgt_len, source_length)
-        .to(dtype)
-    )
-
-    inverted_mask = 1.0 - expanded_mask
-
-    return inverted_mask.masked_fill(
-        inverted_mask.to(torch.bool), torch.finfo(dtype).min
-    )
-
-
-def _prepare_attn_mask(
-    attention_mask, input_shape, inputs_embeds, past_key_values_length
-):
-    # create causal mask
-    # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-    combined_attention_mask = None
-    if input_shape[-1] > 1:
-        combined_attention_mask = _make_causal_mask(
-            input_shape,
-            inputs_embeds.dtype,
-            past_key_values_length=past_key_values_length,
-        ).to(attention_mask.device)
-
-    if attention_mask is not None:
-        # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-        expanded_attn_mask = _expand_mask(
-            attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]
-        )
-        combined_attention_mask = (
-            expanded_attn_mask
-            if combined_attention_mask is None
-            else expanded_attn_mask + combined_attention_mask
-        )
-
-    return combined_attention_mask
-
-
-def _make_causal_mask(
-    input_ids_shape: torch.Size,
-    dtype: torch.dtype,
-    past_key_values_length: int = 0,
-):
-    """
-    Make causal mask used for bi-directional self-attention.
-    """
-    batch_size, target_length = input_ids_shape
-    mask = torch.full((target_length, target_length), torch.finfo(dtype).min)
-    mask_cond = torch.arange(mask.size(-1))
-    intermediate_mask = mask_cond < (mask_cond + 1).view(mask.size(-1), 1)
-    mask.masked_fill_(intermediate_mask, 0)
-    mask = mask.to(dtype)
-
-    if past_key_values_length > 0:
-        mask = torch.cat(
-            [
-                torch.zeros(
-                    target_length, past_key_values_length, dtype=dtype
-                ),
-                mask,
-            ],
-            dim=-1,
-        )
-    expanded_mask = mask[None, None, :, :].expand(
-        batch_size, 1, target_length, target_length + past_key_values_length
-    )
-    return expanded_mask
-
-
-if __name__ == "__main__":
-    working_dir = sys.argv[1]
-    layer_name = sys.argv[2]
-    will_compile = sys.argv[3]
-    device = sys.argv[4]
-    device_idx = sys.argv[5]
-    prompt = sys.argv[6]
-
-    if device_idx.lower().strip() == "none":
-        device_idx = None
-    else:
-        device_idx = int(device_idx)
-
-    if will_compile.lower().strip() == "true":
-        will_compile = True
-    else:
-        will_compile = False
-
-    f = open(f"{working_dir}/config.json")
-    config = json.load(f)
-    f.close()
-
-    layers_initialized = False
-    try:
-        n_embed = config["n_embed"]
-    except KeyError:
-        n_embed = config["hidden_size"]
-    vocab_size = config["vocab_size"]
-    n_layer = config["n_layer"]
-    try:
-        n_head = config["num_attention_heads"]
-    except KeyError:
-        n_head = config["n_head"]
-
-    if not os.path.isdir(working_dir):
-        os.mkdir(working_dir)
-
-    if layer_name == "start":
-        tokenizer = AutoTokenizer.from_pretrained(working_dir)
-        input_ids = tokenizer.encode(prompt, return_tensors="pt")
-
-        mlir_str = ""
-
-        if will_compile:
-            f = open(f"{working_dir}/word_embeddings.mlir", encoding="utf-8")
-            mlir_str = f.read()
-            f.close()
-
-            mlir_str = bytes(mlir_str, "utf-8")
-
-        shark_module = SharkInference(
-            mlir_str,
-            device="cpu",
-            mlir_dialect="tm_tensor",
-            device_idx=None,
-        )
-
-        if will_compile:
-            shark_module.save_module(
-                module_name=f"{working_dir}/word_embeddings",
-                extra_args=[
-                    "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
-                    "--iree-stream-resource-max-allocation-size=1000000000",
-                    "--iree-codegen-check-ir-before-llvm-conversion=false",
-                ],
-            )
-
-        shark_module.load_module(f"{working_dir}/word_embeddings.vmfb")
-        input_embeds = shark_module(
-            inputs=(input_ids,), function_name="forward"
-        )
-        input_embeds = torch.tensor(input_embeds).float()
-
-        mlir_str = ""
-
-        if will_compile:
-            f = open(
-                f"{working_dir}/word_embeddings_layernorm.mlir",
-                encoding="utf-8",
-            )
-            mlir_str = f.read()
-            f.close()
-
-        shark_module = SharkInference(
-            mlir_str,
-            device="cpu",
-            mlir_dialect="tm_tensor",
-            device_idx=None,
-        )
-
-        if will_compile:
-            shark_module.save_module(
-                module_name=f"{working_dir}/word_embeddings_layernorm",
-                extra_args=[
-                    "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
-                    "--iree-stream-resource-max-allocation-size=1000000000",
-                    "--iree-codegen-check-ir-before-llvm-conversion=false",
-                ],
-            )
-
-        shark_module.load_module(
-            f"{working_dir}/word_embeddings_layernorm.vmfb"
-        )
-        hidden_states = shark_module(
-            inputs=(input_embeds,), function_name="forward"
-        )
-        hidden_states = torch.tensor(hidden_states).float()
-
-        torch.save(hidden_states, f"{working_dir}/hidden_states_0.pt")
-
-        attention_mask = torch.ones(
-            [hidden_states.shape[0], len(input_ids[0])]
-        )
-
-        attention_mask = torch.tensor(attention_mask).float()
-
-        alibi = build_alibi_tensor(
-            attention_mask,
-            n_head,
-            hidden_states.dtype,
-            device="cpu",
-        )
-
-        torch.save(alibi, f"{working_dir}/alibi.pt")
-
-        causal_mask = _prepare_attn_mask(
-            attention_mask, input_ids.size(), input_embeds, 0
-        )
-        causal_mask = torch.tensor(causal_mask).float()
-
-        torch.save(causal_mask, f"{working_dir}/causal_mask.pt")
-
-    elif layer_name in [str(x) for x in range(n_layer)]:
-        hidden_states = torch.load(
-            f"{working_dir}/hidden_states_{layer_name}.pt"
-        )
-        alibi = torch.load(f"{working_dir}/alibi.pt")
-        causal_mask = torch.load(f"{working_dir}/causal_mask.pt")
-
-        mlir_str = ""
-
-        if will_compile:
-            f = open(
-                f"{working_dir}/bloom_block_{layer_name}.mlir",
-                encoding="utf-8",
-            )
-            mlir_str = f.read()
-            f.close()
-
-            mlir_str = bytes(mlir_str, "utf-8")
-
-        shark_module = SharkInference(
-            mlir_str,
-            device=device,
-            mlir_dialect="tm_tensor",
-            device_idx=device_idx,
-        )
-
-        if will_compile:
-            shark_module.save_module(
-                module_name=f"{working_dir}/bloom_block_{layer_name}",
-                extra_args=[
-                    "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
-                    "--iree-stream-resource-max-allocation-size=1000000000",
-                    "--iree-codegen-check-ir-before-llvm-conversion=false",
-                ],
-            )
-
-        shark_module.load_module(
-            f"{working_dir}/bloom_block_{layer_name}.vmfb"
-        )
-
-        output = shark_module(
-            inputs=(
-                hidden_states.detach().numpy(),
-                alibi.detach().numpy(),
-                causal_mask.detach().numpy(),
-            ),
-            function_name="forward",
-        )
-
-        hidden_states = torch.tensor(output[0]).float()
-
-        torch.save(
-            hidden_states,
-            f"{working_dir}/hidden_states_{int(layer_name) + 1}.pt",
-        )
-
-    elif layer_name == "end":
-        mlir_str = ""
-
-        if will_compile:
-            f = open(f"{working_dir}/ln_f.mlir", encoding="utf-8")
-            mlir_str = f.read()
-            f.close()
-
-            mlir_str = bytes(mlir_str, "utf-8")
-
-        shark_module = SharkInference(
-            mlir_str,
-            device="cpu",
-            mlir_dialect="tm_tensor",
-            device_idx=None,
-        )
-
-        if will_compile:
-            shark_module.save_module(
-                module_name=f"{working_dir}/ln_f",
-                extra_args=[
-                    "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
-                    "--iree-stream-resource-max-allocation-size=1000000000",
-                    "--iree-codegen-check-ir-before-llvm-conversion=false",
-                ],
-            )
-
-        shark_module.load_module(f"{working_dir}/ln_f.vmfb")
-
-        hidden_states = torch.load(f"{working_dir}/hidden_states_{n_layer}.pt")
-
-        hidden_states = shark_module(
-            inputs=(hidden_states,), function_name="forward"
-        )
-
-        mlir_str = ""
-
-        if will_compile:
-            f = open(f"{working_dir}/lm_head.mlir", encoding="utf-8")
-            mlir_str = f.read()
-            f.close()
-
-            mlir_str = bytes(mlir_str, "utf-8")
-
-        if config["n_embed"] == 14336:
-
-            def get_state_dict():
-                d = torch.load(
-                    f"{working_dir}/pytorch_model_00001-of-00072.bin"
-                )
-                return OrderedDict(
-                    (k.replace("word_embeddings.", ""), v)
-                    for k, v in d.items()
-                )
-
-            def load_causal_lm_head():
-                linear = nn.utils.skip_init(
-                    nn.Linear, 14336, 250880, bias=False, dtype=torch.float
-                )
-                linear.load_state_dict(get_state_dict(), strict=False)
-                return linear.float()
-
-            lm_head = load_causal_lm_head()
-
-            logits = lm_head(torch.tensor(hidden_states).float())
-
-        else:
-            shark_module = SharkInference(
-                mlir_str,
-                device="cpu",
-                mlir_dialect="tm_tensor",
-                device_idx=None,
-            )
-
-            if will_compile:
-                shark_module.save_module(
-                    module_name=f"{working_dir}/lm_head",
-                    extra_args=[
-                        "--iree-vm-bytecode-module-output-format=flatbuffer-binary",
-                        "--iree-stream-resource-max-allocation-size=1000000000",
-                        "--iree-codegen-check-ir-before-llvm-conversion=false",
-                    ],
-                )
-
-            shark_module.load_module(f"{working_dir}/lm_head.vmfb")
-
-            logits = shark_module(
-                inputs=(hidden_states,), function_name="forward"
-            )
-
-        logits = torch.tensor(logits).float()
-
-        tokenizer = AutoTokenizer.from_pretrained(working_dir)
-
-        next_token = tokenizer.decode(torch.argmax(logits[:, -1, :], dim=-1))
-
-        f = open(f"{working_dir}/prompt.txt", "w+")
-        f.write(prompt + next_token)
-        f.close()
--- a/shark/examples/shark_inference/simple_dlrm.py
+++ b/shark/examples/shark_inference/simple_dlrm.py
@@ -1,390 +0,0 @@
-# Description: an implementation of a deep learning recommendation model (DLRM)
-# The model input consists of dense and sparse features. The former is a vector
-# of floating point values. The latter is a list of sparse indices into
-# embedding tables, which consist of vectors of floating point values.
-# The selected vectors are passed to mlp networks denoted by triangles,
-# in some cases the vectors are interacted through operators (Ops).
-#
-# output:
-#                         vector of values
-# model:                        |
-#                              /\
-#                             /__\
-#                               |
-#       _____________________> Op  <___________________
-#     /                         |                      \
-#    /\                        /\                      /\
-#   /__\                      /__\           ...      /__\
-#    |                          |                       |
-#    |                         Op                      Op
-#    |                    ____/__\_____           ____/__\____
-#    |                   |_Emb_|____|__|    ...  |_Emb_|__|___|
-# input:
-# [ dense features ]     [sparse indices] , ..., [sparse indices]
-#
-# More precise definition of model layers:
-# 1) fully connected layers of an mlp
-# z = f(y)
-# y = Wx + b
-#
-# 2) embedding lookup (for a list of sparse indices p=[p1,...,pk])
-# z = Op(e1,...,ek)
-# obtain vectors e1=E[:,p1], ..., ek=E[:,pk]
-#
-# 3) Operator Op can be one of the following
-# Sum(e1,...,ek) = e1 + ... + ek
-# Dot(e1,...,ek) = [e1'e1, ..., e1'ek, ..., ek'e1, ..., ek'ek]
-# Cat(e1,...,ek) = [e1', ..., ek']'
-# where ' denotes transpose operation
-#
-# References:
-# [1] Maxim Naumov, Dheevatsa Mudigere, Hao-Jun Michael Shi, Jianyu Huang,
-# Narayanan Sundaram, Jongsoo Park, Xiaodong Wang, Udit Gupta, Carole-Jean Wu,
-# Alisson G. Azzolini, Dmytro Dzhulgakov, Andrey Mallevich, Ilia Cherniavskii,
-# Yinghai Lu, Raghuraman Krishnamoorthi, Ansha Yu, Volodymyr Kondratenko,
-# Stephanie Pereira, Xianjie Chen, Wenlin Chen, Vijay Rao, Bill Jia, Liang Xiong,
-# Misha Smelyanskiy, "Deep Learning Recommendation Model for Personalization and
-# Recommendation Systems", CoRR, arXiv:1906.00091, 2019
-
-
-import argparse
-import sys
-import numpy as np
-import torch
-import torch.nn as nn
-from shark.shark_inference import SharkInference
-from shark.shark_importer import SharkImporter
-
-
-torch.manual_seed(0)
-np.random.seed(0)
-
-
-### define dlrm in PyTorch ###
-class DLRM_Net(nn.Module):
-    def create_mlp(self, ln, sigmoid_layer):
-        # build MLP layer by layer
-        layers = nn.ModuleList()
-        for i in range(0, ln.size - 1):
-            n = ln[i]
-            m = ln[i + 1]
-
-            # construct fully connected operator
-            LL = nn.Linear(int(n), int(m), bias=True)
-
-            # initialize the weights
-            # with torch.no_grad():
-            # custom Xavier input, output or two-sided fill
-
-            mean = 0.0  # std_dev = np.sqrt(variance)
-            std_dev = np.sqrt(2 / (m + n))  # np.sqrt(1 / m) # np.sqrt(1 / n)
-            W = np.random.normal(mean, std_dev, size=(m, n)).astype(np.float32)
-            std_dev = np.sqrt(1 / m)  # np.sqrt(2 / (m + 1))
-            bt = np.random.normal(mean, std_dev, size=m).astype(np.float32)
-            LL.weight.data = torch.tensor(W, requires_grad=True)
-            LL.bias.data = torch.tensor(bt, requires_grad=True)
-
-            # approach 2
-            # LL.weight.data.copy_(torch.tensor(W))
-            # LL.bias.data.copy_(torch.tensor(bt))
-            # approach 3
-            # LL.weight = Parameter(torch.tensor(W),requires_grad=True)
-            # LL.bias = Parameter(torch.tensor(bt),requires_grad=True)
-            layers.append(LL)
-
-            # construct sigmoid or relu operator
-            if i == sigmoid_layer:
-                layers.append(nn.Sigmoid())
-            else:
-                layers.append(nn.ReLU())
-
-        # approach 1: use ModuleList
-        # return layers
-        # approach 2: use Sequential container to wrap all layers
-        return torch.nn.Sequential(*layers)
-
-    def create_emb(self, m, ln, weighted_pooling=None):
-        emb_l = nn.ModuleList()
-        v_W_l = []
-        for i in range(0, ln.size):
-            n = ln[i]
-
-            # construct embedding operator
-            EE = nn.EmbeddingBag(n, m, mode="sum")
-            # initialize embeddings
-            # nn.init.uniform_(EE.weight, a=-np.sqrt(1 / n), b=np.sqrt(1 / n))
-            W = np.random.uniform(
-                low=-np.sqrt(1 / n), high=np.sqrt(1 / n), size=(n, m)
-            ).astype(np.float32)
-            # approach 1
-            print(W)
-            EE.weight.data = torch.tensor(W, requires_grad=True)
-            # approach 2
-            # EE.weight.data.copy_(torch.tensor(W))
-            # approach 3
-            # EE.weight = Parameter(torch.tensor(W),requires_grad=True)
-            if weighted_pooling is None:
-                v_W_l.append(None)
-            else:
-                v_W_l.append(torch.ones(n, dtype=torch.float32))
-            emb_l.append(EE)
-        return emb_l, v_W_l
-
-    def __init__(
-        self,
-        m_spa=None,
-        ln_emb=None,
-        ln_bot=None,
-        ln_top=None,
-        arch_interaction_op=None,
-        arch_interaction_itself=False,
-        sigmoid_bot=-1,
-        sigmoid_top=-1,
-        weighted_pooling=None,
-    ):
-        super(DLRM_Net, self).__init__()
-
-        if (
-            (m_spa is not None)
-            and (ln_emb is not None)
-            and (ln_bot is not None)
-            and (ln_top is not None)
-            and (arch_interaction_op is not None)
-        ):
-            # save arguments
-            self.output_d = 0
-            self.arch_interaction_op = arch_interaction_op
-            self.arch_interaction_itself = arch_interaction_itself
-            if weighted_pooling is not None and weighted_pooling != "fixed":
-                self.weighted_pooling = "learned"
-            else:
-                self.weighted_pooling = weighted_pooling
-
-            # create operators
-            self.emb_l, w_list = self.create_emb(
-                m_spa, ln_emb, weighted_pooling
-            )
-            if self.weighted_pooling == "learned":
-                self.v_W_l = nn.ParameterList()
-                for w in w_list:
-                    self.v_W_l.append(nn.Parameter(w))
-            else:
-                self.v_W_l = w_list
-            self.bot_l = self.create_mlp(ln_bot, sigmoid_bot)
-            self.top_l = self.create_mlp(ln_top, sigmoid_top)
-
-    def apply_mlp(self, x, layers):
-        return layers(x)
-
-    def apply_emb(self, lS_o, lS_i, emb_l, v_W_l):
-        # WARNING: notice that we are processing the batch at once. We implicitly
-        # assume that the data is laid out such that:
-        # 1. each embedding is indexed with a group of sparse indices,
-        #   corresponding to a single lookup
-        # 2. for each embedding the lookups are further organized into a batch
-        # 3. for a list of embedding tables there is a list of batched lookups
-        # TORCH-MLIR
-        # We are passing all the embeddings as arguments for easy parsing.
-
-        ly = []
-        for k, sparse_index_group_batch in enumerate(lS_i):
-            sparse_offset_group_batch = lS_o[k]
-
-            # embedding lookup
-            # We are using EmbeddingBag, which implicitly uses sum operator.
-            # The embeddings are represented as tall matrices, with sum
-            # happening vertically across 0 axis, resulting in a row vector
-            # E = emb_l[k]
-
-            if v_W_l[k] is not None:
-                per_sample_weights = v_W_l[k].gather(
-                    0, sparse_index_group_batch
-                )
-            else:
-                per_sample_weights = None
-
-            E = emb_l[k]
-            V = E(
-                sparse_index_group_batch,
-                sparse_offset_group_batch,
-                per_sample_weights=per_sample_weights,
-            )
-
-            ly.append(V)
-
-        return ly
-
-    def interact_features(self, x, ly):
-        if self.arch_interaction_op == "dot":
-            # concatenate dense and sparse features
-            (batch_size, d) = x.shape
-            T = torch.cat([x] + ly, dim=1).view((batch_size, -1, d))
-            # perform a dot product
-            Z = torch.bmm(T, torch.transpose(T, 1, 2))
-            # append dense feature with the interactions (into a row vector)
-            # approach 1: all
-            # Zflat = Z.view((batch_size, -1))
-            # approach 2: unique
-            _, ni, nj = Z.shape
-            # approach 1: tril_indices
-            # offset = 0 if self.arch_interaction_itself else -1
-            # li, lj = torch.tril_indices(ni, nj, offset=offset)
-            # approach 2: custom
-            offset = 1 if self.arch_interaction_itself else 0
-            li = torch.tensor(
-                [i for i in range(ni) for j in range(i + offset)]
-            )
-            lj = torch.tensor(
-                [j for i in range(nj) for j in range(i + offset)]
-            )
-            Zflat = Z[:, li, lj]
-            # concatenate dense features and interactions
-            R = torch.cat([x] + [Zflat], dim=1)
-        elif self.arch_interaction_op == "cat":
-            # concatenation features (into a row vector)
-            R = torch.cat([x] + ly, dim=1)
-        else:
-            sys.exit(
-                "ERROR: --arch-interaction-op="
-                + self.arch_interaction_op
-                + " is not supported"
-            )
-
-        return R
-
-    def forward(self, dense_x, lS_o, *lS_i):
-        return self.sequential_forward(dense_x, lS_o, lS_i)
-
-    def sequential_forward(self, dense_x, lS_o, lS_i):
-        # process dense features (using bottom mlp), resulting in a row vector
-        x = self.apply_mlp(dense_x, self.bot_l)
-        # debug prints
-        # print("intermediate")
-        # print(x.detach().cpu().numpy())
-
-        # process sparse features(using embeddings), resulting in a list of row vectors
-        ly = self.apply_emb(lS_o, lS_i, self.emb_l, self.v_W_l)
-        # for y in ly:
-        #     print(y.detach().cpu().numpy())
-
-        # interact features (dense and sparse)
-        z = self.interact_features(x, ly)
-        # print(z.detach().cpu().numpy())
-
-        # obtain probability of a click (using top mlp)
-        p = self.apply_mlp(z, self.top_l)
-
-        # # clamp output if needed
-        # if 0.0 < self.loss_threshold and self.loss_threshold < 1.0:
-        # z = torch.clamp(p, min=self.loss_threshold, max=(1.0 - self.loss_threshold))
-        # else:
-        # z = p
-
-        return p
-
-
-def dash_separated_ints(value):
-    vals = value.split("-")
-    for val in vals:
-        try:
-            int(val)
-        except ValueError:
-            raise argparse.ArgumentTypeError(
-                "%s is not a valid dash separated list of ints" % value
-            )
-
-    return value
-
-
-# model related parameters
-parser = argparse.ArgumentParser(
-    description="Train Deep Learning Recommendation Model (DLRM)"
-)
-parser.add_argument("--arch-sparse-feature-size", type=int, default=2)
-parser.add_argument(
-    "--arch-embedding-size", type=dash_separated_ints, default="4-3-2"
-)
-# j will be replaced with the table number
-parser.add_argument(
-    "--arch-mlp-bot", type=dash_separated_ints, default="4-3-2"
-)
-parser.add_argument(
-    "--arch-mlp-top", type=dash_separated_ints, default="8-2-1"
-)
-parser.add_argument(
-    "--arch-interaction-op", type=str, choices=["dot", "cat"], default="dot"
-)
-parser.add_argument(
-    "--arch-interaction-itself", action="store_true", default=False
-)
-parser.add_argument("--weighted-pooling", type=str, default=None)
-
-args = parser.parse_args()
-
-ln_bot = np.fromstring(args.arch_mlp_bot, dtype=int, sep="-")
-ln_top = np.fromstring(args.arch_mlp_top, dtype=int, sep="-")
-m_den = ln_bot[0]
-ln_emb = np.fromstring(args.arch_embedding_size, dtype=int, sep="-")
-m_spa = args.arch_sparse_feature_size
-ln_emb = np.asarray(ln_emb)
-num_fea = ln_emb.size + 1  # num sparse + num dense features
-
-
-# Initialize the model.
-dlrm_model = DLRM_Net(
-    m_spa=m_spa,
-    ln_emb=ln_emb,
-    ln_bot=ln_bot,
-    ln_top=ln_top,
-    arch_interaction_op=args.arch_interaction_op,
-)
-
-
-# Inputs to the model.
-dense_inp = torch.tensor([[0.6965, 0.2861, 0.2269, 0.5513]])
-vs0 = torch.tensor([[0], [0], [0]], dtype=torch.int64)
-vsi = torch.tensor([1, 2, 3]), torch.tensor([1]), torch.tensor([1])
-
-input_dlrm = (dense_inp, vs0, *vsi)
-
-golden_output = dlrm_model(dense_inp, vs0, *vsi)
-
-mlir_importer = SharkImporter(
-    dlrm_model,
-    input_dlrm,
-    frontend="torch",
-)
-
-(dlrm_mlir, func_name), inputs, golden_out = mlir_importer.import_debug(
-    tracing_required=True
-)
-
-shark_module = SharkInference(
-    dlrm_mlir, device="vulkan", mlir_dialect="linalg"
-)
-shark_module.compile()
-result = shark_module.forward(input_dlrm)
-np.testing.assert_allclose(
-    golden_output.detach().numpy(), result, rtol=1e-02, atol=1e-03
-)
-
-
-# Verified via torch-mlir.
-# import torch_mlir
-# from torch_mlir_e2e_test.linalg_on_tensors_backends import refbackend
-
-
-# module = torch_mlir.compile(
-# dlrm_model, inputs, use_tracing=True, output_type="linalg-on-tensors"
-# )
-# backend = refbackend.RefBackendLinalgOnTensorsBackend()
-# compiled = backend.compile(module)
-# jit_module = backend.load(compiled)
-
-# dense_numpy = dense_inp.numpy()
-# vs0_numpy = vs0.numpy()
-# vsi_numpy = [inp.numpy() for inp in vsi]
-
-# numpy_inp = (dense_numpy, vs0_numpy, *vsi_numpy)
-
-# print(jit_module.forward(*numpy_inp))
--- a/shark/examples/shark_inference/sparse_arch.py
+++ b/shark/examples/shark_inference/sparse_arch.py
@@ -1,311 +0,0 @@
-import torch
-from torch import nn
-from torchrec.datasets.utils import Batch
-from torchrec.modules.crossnet import LowRankCrossNet
-from torchrec.sparse.jagged_tensor import KeyedJaggedTensor, KeyedTensor
-from torchrec.modules.embedding_configs import EmbeddingBagConfig
-from torchrec.modules.embedding_modules import EmbeddingBagCollection
-from torchrec.sparse.jagged_tensor import KeyedJaggedTensor
-from typing import Dict, List, Optional, Tuple
-from torchrec.models.dlrm import (
-    choose,
-    DenseArch,
-    DLRM,
-    InteractionArch,
-    SparseArch,
-    OverArch,
-)
-from shark.shark_inference import SharkInference
-from shark.shark_importer import SharkImporter
-import numpy as np
-
-torch.manual_seed(0)
-
-np.random.seed(0)
-
-
-def calculate_offsets(tensor_list, prev_values, prev_offsets):
-    offset_init = 0
-    offset_list = []
-    values_list = []
-
-    if prev_offsets != None:
-        offset_init = prev_values.shape[-1]
-    for tensor in tensor_list:
-        offset_list.append(offset_init)
-        offset_init += tensor.shape[0]
-
-    concatendated_tensor_list = torch.cat(tensor_list)
-
-    if prev_values != None:
-        concatendated_tensor_list = torch.cat(
-            [prev_values, concatendated_tensor_list]
-        )
-
-    concatenated_offsets = torch.tensor(offset_list)
-
-    if prev_offsets != None:
-        concatenated_offsets = torch.cat([prev_offsets, concatenated_offsets])
-
-    return concatendated_tensor_list, concatenated_offsets
-
-
-# Have to make combined_keys as dict as to which embedding bags they
-# point to. {f1: 0, f3: 0, f2: 1}
-# The result will be a triple containing values, indices and pointer tensor.
-def to_list(key_jagged, combined_keys):
-    key_jagged_dict = key_jagged.to_dict()
-    combined_list = []
-
-    for key in combined_keys:
-        prev_values, prev_offsets = calculate_offsets(
-            key_jagged_dict[key].to_dense(), None, None
-        )
-        print(prev_values)
-        print(prev_offsets)
-        combined_list.append(prev_values)
-        combined_list.append(prev_offsets)
-        combined_list.append(torch.tensor(combined_keys[key]))
-
-    return combined_list
-
-
-class SparseArchShark(nn.Module):
-    def create_emb(self, embedding_dim, num_embeddings_list):
-        embedding_list = nn.ModuleList()
-        for i in range(0, num_embeddings_list.size):
-            num_embeddings = num_embeddings_list[i]
-            EE = nn.EmbeddingBag(num_embeddings, embedding_dim, mode="sum")
-            W = np.random.uniform(
-                low=-np.sqrt(1 / num_embeddings),
-                high=np.sqrt(1 / num_embeddings),
-                size=(num_embeddings, embedding_dim),
-            ).astype(np.float32)
-            EE.weight.data = torch.tensor(W, requires_grad=True)
-            embedding_list.append(EE)
-        return embedding_list
-
-    def __init__(
-        self,
-        embedding_dim,
-        total_features,
-        num_embeddings_list,
-    ):
-        super(SparseArchShark, self).__init__()
-        self.embedding_dim = embedding_dim
-        self.num_features = total_features
-        self.embedding_list = self.create_emb(
-            embedding_dim, num_embeddings_list
-        )
-
-    def forward(self, *batched_inputs):
-        concatenated_list = []
-        input_enum, embedding_enum = 0, 0
-
-        for k in range(len(batched_inputs) // 3):
-            values = batched_inputs[input_enum]
-            input_enum += 1
-            offsets = batched_inputs[input_enum]
-            input_enum += 1
-            embedding_pointer = int(batched_inputs[input_enum])
-            input_enum += 1
-
-            E = self.embedding_list[embedding_pointer]
-            V = E(values, offsets)
-            concatenated_list.append(V)
-
-        return torch.cat(concatenated_list, dim=1).reshape(
-            -1, self.num_features, self.embedding_dim
-        )
-
-
-def test_sparse_arch() -> None:
-    D = 3
-    eb1_config = EmbeddingBagConfig(
-        name="t1",
-        embedding_dim=D,
-        num_embeddings=10,
-        feature_names=["f1", "f3"],
-    )
-    eb2_config = EmbeddingBagConfig(
-        name="t2",
-        embedding_dim=D,
-        num_embeddings=10,
-        feature_names=["f2"],
-    )
-
-    ebc = EmbeddingBagCollection(tables=[eb1_config, eb2_config])
-
-    w1 = ebc.embedding_bags["t1"].weight
-    w2 = ebc.embedding_bags["t2"].weight
-
-    sparse_arch = SparseArch(ebc)
-
-    keys = ["f1", "f2", "f3", "f4", "f5"]
-    offsets = torch.tensor([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 19])
-    features = KeyedJaggedTensor.from_offsets_sync(
-        keys=keys,
-        values=torch.tensor(
-            [1, 2, 4, 5, 4, 3, 2, 9, 1, 2, 4, 5, 4, 3, 2, 9, 1, 2, 3]
-        ),
-        offsets=offsets,
-    )
-    sparse_archi = SparseArchShark(D, 3, np.array([10, 10]))
-    sparse_archi.embedding_list[0].weight = w1
-    sparse_archi.embedding_list[1].weight = w2
-    inputs = to_list(features, {"f1": 0, "f3": 0, "f2": 1})
-
-    test_results = sparse_archi(*inputs)
-    sparse_features = sparse_arch(features)
-
-    torch.allclose(
-        sparse_features,
-        test_results,
-        rtol=1e-4,
-        atol=1e-4,
-    )
-
-
-test_sparse_arch()
-
-
-class DLRMShark(nn.Module):
-    def __init__(
-        self,
-        embedding_dim,
-        total_features,
-        num_embeddings_list,
-        dense_in_features: int,
-        dense_arch_layer_sizes: List[int],
-        over_arch_layer_sizes: List[int],
-    ) -> None:
-        super().__init__()
-
-        self.sparse_arch: SparseArchShark = SparseArchShark(
-            embedding_dim, total_features, num_embeddings_list
-        )
-        num_sparse_features: int = total_features
-
-        self.dense_arch = DenseArch(
-            in_features=dense_in_features,
-            layer_sizes=dense_arch_layer_sizes,
-        )
-
-        self.inter_arch = InteractionArch(
-            num_sparse_features=num_sparse_features,
-        )
-
-        over_in_features: int = (
-            embedding_dim
-            + choose(num_sparse_features, 2)
-            + num_sparse_features
-        )
-
-        self.over_arch = OverArch(
-            in_features=over_in_features,
-            layer_sizes=over_arch_layer_sizes,
-        )
-
-    def forward(
-        self, dense_features: torch.Tensor, *sparse_features
-    ) -> torch.Tensor:
-        embedded_dense = self.dense_arch(dense_features)
-        embedded_sparse = self.sparse_arch(*sparse_features)
-        concatenated_dense = self.inter_arch(
-            dense_features=embedded_dense, sparse_features=embedded_sparse
-        )
-        logits = self.over_arch(concatenated_dense)
-        return logits
-
-
-def test_dlrm() -> None:
-    B = 2
-    D = 8
-    dense_in_features = 100
-
-    eb1_config = EmbeddingBagConfig(
-        name="t1",
-        embedding_dim=D,
-        num_embeddings=100,
-        feature_names=["f1", "f3"],
-    )
-    eb2_config = EmbeddingBagConfig(
-        name="t2",
-        embedding_dim=D,
-        num_embeddings=100,
-        feature_names=["f2"],
-    )
-
-    ebc = EmbeddingBagCollection(tables=[eb1_config, eb2_config])
-
-    sparse_features = KeyedJaggedTensor.from_offsets_sync(
-        keys=["f1", "f3", "f2"],
-        values=torch.tensor([1, 2, 4, 5, 4, 3, 2, 9, 1, 2, 3]),
-        offsets=torch.tensor([0, 2, 4, 6, 8, 10, 11]),
-    )
-    ebc = EmbeddingBagCollection(tables=[eb1_config, eb2_config])
-    sparse_nn = DLRM(
-        embedding_bag_collection=ebc,
-        dense_in_features=dense_in_features,
-        dense_arch_layer_sizes=[20, D],
-        over_arch_layer_sizes=[5, 1],
-    )
-    sparse_nn_nod = DLRMShark(
-        embedding_dim=8,
-        total_features=3,
-        num_embeddings_list=np.array([100, 100]),
-        dense_in_features=dense_in_features,
-        dense_arch_layer_sizes=[20, D],
-        over_arch_layer_sizes=[5, 1],
-    )
-
-    dense_features = torch.rand((B, dense_in_features))
-
-    x = to_list(sparse_features, {"f1": 0, "f3": 0, "f2": 1})
-
-    w1 = ebc.embedding_bags["t1"].weight
-    w2 = ebc.embedding_bags["t2"].weight
-
-    sparse_nn_nod.sparse_arch.embedding_list[0].weight = w1
-    sparse_nn_nod.sparse_arch.embedding_list[1].weight = w2
-
-    sparse_nn_nod.dense_arch.load_state_dict(sparse_nn.dense_arch.state_dict())
-    sparse_nn_nod.inter_arch.load_state_dict(sparse_nn.inter_arch.state_dict())
-    sparse_nn_nod.over_arch.load_state_dict(sparse_nn.over_arch.state_dict())
-
-    logits = sparse_nn(
-        dense_features=dense_features,
-        sparse_features=sparse_features,
-    )
-    logits_nod = sparse_nn_nod(dense_features, *x)
-
-    # print(logits)
-    # print(logits_nod)
-
-    # Import the module and print.
-    mlir_importer = SharkImporter(
-        sparse_nn_nod,
-        (dense_features, *x),
-        frontend="torch",
-    )
-
-    (dlrm_mlir, func_name), inputs, golden_out = mlir_importer.import_debug(
-        tracing_required=True
-    )
-
-    shark_module = SharkInference(
-        dlrm_mlir, device="cpu", mlir_dialect="linalg"
-    )
-    shark_module.compile()
-    result = shark_module.forward(inputs)
-    np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
-
-    torch.allclose(
-        logits,
-        logits_nod,
-        rtol=1e-4,
-        atol=1e-4,
-    )
-
-
-test_dlrm()
--- a/shark/examples/shark_inference/t5_tf.py
+++ b/shark/examples/shark_inference/t5_tf.py
@@ -1,35 +0,0 @@
-from PIL import Image
-import requests
-
-from transformers import T5Tokenizer, TFT5Model
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-
-# Create a set of inputs
-t5_inputs = [
-    tf.TensorSpec(shape=[1, 10], dtype=tf.int32),
-    tf.TensorSpec(shape=[1, 10], dtype=tf.int32),
-]
-
-
-class T5Module(tf.Module):
-    def __init__(self):
-        super(T5Module, self).__init__()
-        self.m = TFT5Model.from_pretrained("t5-small")
-        self.m.predict = lambda x, y: self.m(input_ids=x, decoder_input_ids=y)
-
-    @tf.function(input_signature=t5_inputs, jit_compile=True)
-    def forward(self, input_ids, decoder_input_ids):
-        return self.m.predict(input_ids, decoder_input_ids)
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    tokenizer = T5Tokenizer.from_pretrained("t5-small")
-    text = "I love the distilled version of models."
-    inputs = tokenizer(text, return_tensors="tf").input_ids
-
-    shark_module = SharkInference(T5Module(), (inputs, inputs))
-    shark_module.set_frontend("tensorflow")
-    shark_module.compile()
-    print(shark_module.forward((inputs, inputs)))
--- a/shark/examples/shark_inference/torch_vision_models_script.py
+++ b/shark/examples/shark_inference/torch_vision_models_script.py
@@ -1,43 +0,0 @@
-import torch
-import torchvision.models as models
-from shark.shark_inference import SharkInference
-
-
-class VisionModule(torch.nn.Module):
-    def __init__(self, model):
-        super().__init__()
-        self.model = model
-        self.train(False)
-
-    def forward(self, input):
-        return self.model.forward(input)
-
-
-input = torch.randn(1, 3, 224, 224)
-
-## The vision models present here: https://pytorch.org/vision/stable/models.html
-vision_models_list = [
-    models.resnet18(pretrained=True),
-    models.alexnet(pretrained=True),
-    models.vgg16(pretrained=True),
-    models.squeezenet1_0(pretrained=True),
-    models.densenet161(pretrained=True),
-    models.inception_v3(pretrained=True),
-    models.shufflenet_v2_x1_0(pretrained=True),
-    models.mobilenet_v2(pretrained=True),
-    models.mobilenet_v3_small(pretrained=True),
-    models.resnext50_32x4d(pretrained=True),
-    models.wide_resnet50_2(pretrained=True),
-    models.mnasnet1_0(pretrained=True),
-    models.efficientnet_b0(pretrained=True),
-    models.regnet_y_400mf(pretrained=True),
-    models.regnet_x_400mf(pretrained=True),
-]
-
-for i, vision_model in enumerate(vision_models_list):
-    shark_module = SharkInference(
-        VisionModule(vision_model),
-        (input,),
-    )
-    shark_module.compile()
-    shark_module.forward((input,))
--- a/shark/examples/shark_inference/unet_script.py
+++ b/shark/examples/shark_inference/unet_script.py
@@ -1,39 +0,0 @@
-import torch
-import numpy as np
-from shark.shark_inference import SharkInference
-from shark.shark_importer import SharkImporter
-
-
-class UnetModule(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.model = torch.hub.load(
-            "mateuszbuda/brain-segmentation-pytorch",
-            "unet",
-            in_channels=3,
-            out_channels=1,
-            init_features=32,
-            pretrained=True,
-        )
-        self.model.eval()
-
-    def forward(self, input):
-        return self.model(input)
-
-
-input = torch.randn(1, 3, 224, 224)
-
-mlir_importer = SharkImporter(
-    UnetModule(),
-    (input,),
-    frontend="torch",
-)
-
-(vision_mlir, func_name), inputs, golden_out = mlir_importer.import_debug(
-    tracing_required=False
-)
-
-shark_module = SharkInference(vision_mlir, mlir_dialect="linalg")
-shark_module.compile()
-result = shark_module.forward((input,))
-np.testing.assert_allclose(golden_out, result, rtol=1e-02, atol=1e-03)
--- a/shark/examples/shark_inference/upscaler/main.py
+++ b/shark/examples/shark_inference/upscaler/main.py
@@ -1,21 +0,0 @@
-import requests
-from PIL import Image
-from io import BytesIO
-from pipeline_shark_stable_diffusion_upscale import (
-    SharkStableDiffusionUpscalePipeline,
-)
-import torch
-
-model_id = "stabilityai/stable-diffusion-x4-upscaler"
-pipeline = SharkStableDiffusionUpscalePipeline(model_id)
-
-# let's download an  image
-url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-upscale/low_res_cat.png"
-response = requests.get(url)
-low_res_img = Image.open(BytesIO(response.content)).convert("RGB")
-low_res_img = low_res_img.resize((128, 128))
-
-prompt = "a white cat"
-
-upscaled_image = pipeline(prompt=prompt, image=low_res_img).images[0]
-upscaled_image.save("upsampled_cat.png")
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Ean Garvey	dea405285e	Revert python version change and update actions/setup-python to v5	2024-08-08 16:50:15 -05:00
Ean Garvey	b1d2cb3bad	Change python version in nightly .yml to 3.11.9	2024-08-08 16:48:41 -05:00
Ean Garvey	4759e808f2	Rest API support and cleanup	2024-08-08 11:37:53 -05:00
Ean Garvey	d5f37eaf20	Bump requirements	2024-06-17 18:16:44 -05:00
Ean Garvey	84bc1437a7	Strip sdxl-turbo options	2024-06-17 17:57:40 -05:00
Ean Garvey	83f424a83e	Close advanced setting by default.	2024-06-06 13:22:57 -05:00
Ean Garvey	5b3b262359	Simplify ui further, add CLI option to load a default config	2024-06-06 13:21:25 -05:00
Ean Garvey	67b438eb9f	take all ireert calls out of studio flow	2024-06-04 01:46:48 -05:00
Ean Garvey	4aa2d8b2a2	Purge shark/ directory, minimal ireert api usage for dynamically loaded plugins	2024-06-04 00:53:07 -05:00
Ean Garvey	dac7a29eef	Purge unused code and patch out iree runtime handling from init	2024-06-03 18:00:05 -05:00
Ean Garvey	59600456be	seed fixes	2024-06-02 16:25:16 -05:00
Ean Garvey	e514910202	Remove sdxl 30step config	2024-06-02 14:28:58 -05:00
Ean Garvey	33f6c312d9	limit steps to 2 (gives best results)	2024-06-02 14:25:29 -05:00
Ean Garvey	ab06047108	set a default	2024-06-02 14:23:40 -05:00
Ean Garvey	ac48b843e7	actually reduce steps	2024-06-02 14:00:45 -05:00
Ean Garvey	5f1b5e58d6	igpu dont error on device parse fail	2024-06-02 12:56:44 -05:00
Ean Garvey	6adae49d9b	igpu restrictions	2024-06-02 12:51:53 -05:00
Ean Garvey	6abd9ff5cf	Reduce available step options for turbo.	2024-06-02 11:41:23 -05:00
Ean Garvey	9957c96014	More noticeable seed changes	2024-06-02 11:39:21 -05:00
Ean Garvey	36b8c2fd6d	disable pndm	2024-06-02 11:30:18 -05:00
Ean Garvey	9163c1fc50	small fixes	2024-06-02 11:28:37 -05:00
Ean Garvey	349e9f70fb	Progress indicators	2024-06-02 10:18:09 -05:00
Ean Garvey	64e63e7130	znver4 device handling	2024-06-02 10:08:00 -05:00
Ean Garvey	ea8738fb1a	Update SRT links	2024-06-02 09:50:09 -05:00
Ean Garvey	2a5bec3c4f	Fixes for seed.	2024-06-02 09:46:22 -05:00
Ean Garvey	bb58b01d75	Switch to fixed steps, tweak config loading to prevent race condition	2024-06-01 20:15:53 -05:00
Ean Garvey	02285b33a4	More fixes for demo.	2024-06-01 19:46:52 -05:00
Ean Garvey	f9a1d35b59	Hide chatbot.	2024-06-01 14:24:37 -05:00
Ean Garvey	b1ca19a6e6	Cleanup for demo.	2024-06-01 13:42:51 -05:00
Ean Garvey	b5dea85808	Reduce UI for demos.	2024-06-01 12:00:22 -05:00
Ean Garvey	e75f96f2d7	fixup conditional	2024-06-01 12:00:11 -05:00
Ean Garvey	bf67e2aa3b	Formatting	2024-06-01 11:59:10 -05:00
Ean Garvey	c088247aa1	Fix default configs, config loading, and add warnings/early returns for bad configs.	2024-06-01 11:58:51 -05:00
Ean Garvey	42abc6787d	Small tweaks to ckpt processing, add tool to prefix params keys	2024-06-01 11:53:40 -05:00
Ean Garvey	26f80ccbbb	Fixes to UI config defaults, config loading, and warnings. (#2153 )	2024-05-31 18:14:27 -04:00
Ean Garvey	d2c3752dc7	Fix batch count and tweaks to chatbot. (#2151 ) * Fix batch count * Add button to unload models manually. * Add compiled pipeline option * Add brevitas to requirements * Tweaks to chatbot * Change script loading trigger	2024-05-31 18:48:28 +05:30
Ean Garvey	4505c4549f	Force inlined weights on igpu for now, small fixes to chatbot (#2149 ) * Add igpu and custom triple support. * Small fixes to igpu, SDXL-turbo * custom pipe loading * formatting * Remove old nodlogo import.	2024-05-30 11:40:42 -05:00
Gaurav Shukla	793495c9c6	[ui] Add AMD logo in shark studio Signed-Off-by: Gaurav Shukla <gaurav.shukla@amd.com>	2024-05-30 21:43:15 +05:30
Ean Garvey	13e1d8d98a	Add igpu and custom triple support. (#2148 )	2024-05-29 17:39:36 -05:00
Ean Garvey	2074df40ad	Point to nod fork of diffusers. (#2146 )	2024-05-29 00:56:21 -05:00
Ean Garvey	7b30582408	Point to SRT links for windows. (#2145 )	2024-05-29 01:20:30 -04:00
Ean Garvey	151195ab74	Add a few requirements for ensured parity with turbine-models requirements. (#2142 ) * Add scipy to requirements. Adds diffusers req and a note for torchsde.	2024-05-28 15:37:31 -05:00
Ean Garvey	8146f0bd2f	Remove leftover merge conflict line from setup script. (#2141 )	2024-05-28 11:04:45 -07:00
Ean Garvey	68e9281778	(Studio2) Refactors SD pipeline to rely on turbine-models pipeline, fixes to LLM, gitignore (#2129 ) * Shark Studio SDXL support, HIP driver support, simpler device info, small fixes * Fixups to llm API/UI and ignore user config files. * Small fixes for unifying pipelines. * Update requirements.txt for iree-turbine (#2130) * Fix Llama2 on CPU (#2133) * Filesystem cleanup and custom model fixes (#2127) * Fix some formatting issues * Remove IREE pin (fixes exe issue) (#2126) * Update find links for IREE packages (#2136) * Shark Studio SDXL support, HIP driver support, simpler device info, small fixes * Abstract out SD pipelines from Studio Webui (WIP) * Switch from pin to minimum torch version and fix index url * Fix device parsing. * Fix linux setup * Fix custom weights. --------- Co-authored-by: saienduri <77521230+saienduri@users.noreply.github.com> Co-authored-by: gpetters-amd <159576198+gpetters-amd@users.noreply.github.com> Co-authored-by: gpetters94 <gpetters@protonmail.com>	2024-05-28 13:18:31 -04:00
Ean Garvey	fd07cae991	Update find links for IREE packages (#2136 )	2024-05-13 11:43:17 -05:00
gpetters94	6cb86a843e	Remove IREE pin (fixes exe issue) (#2126 ) * Diagnose a build issue * Remove IREE pin * Revert the build on pull request change	2024-04-30 12:27:30 -05:00
gpetters-amd	7db1612a5c	Filesystem cleanup and custom model fixes (#2127 ) * Initial filesystem cleanup * More filesystem cleanup * Fix some formatting issues * Address comments	2024-04-30 11:18:33 -05:00
gpetters-amd	81d6e059ac	Fix Llama2 on CPU (#2133 )	2024-04-29 12:18:16 -05:00
saienduri	e003d0abe8	Update requirements.txt for iree-turbine (#2130 ) * Update requirements.txt to iree-turbine creation * Update requirements.txt * Update requirements.txt * Update requirements.txt	2024-04-29 12:28:14 -04:00