Revert python version change and update actions/setup-python to v5

Change python version in nightly .yml to 3.11.9
Rest API support and cleanup
2026-01-11 14:58:11 -05:00 · 2024-08-08 16:50:15 -05:00 · 2024-08-08 16:48:41 -05:00 · 2024-08-08 11:37:53 -05:00 · 2024-06-17 18:16:44 -05:00 · 2024-06-17 17:57:40 -05:00
165 changed files with 7181 additions and 26637 deletions
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -19,7 +19,7 @@ jobs:
    steps:
    - uses: actions/checkout@v2
    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
+      uses: actions/setup-python@v5
      with:
        python-version: ${{ matrix.python-version }}

@@ -46,17 +46,18 @@ jobs:
        draft: true
        prerelease: true

-    - name: Build Package 
+    - name: Build Package (api only)
      shell: powershell
      run: |
        ./setup_venv.ps1
-        $env:SHARK_PACKAGE_VERSION=${{ env.package_version }}
-        pip wheel -v -w dist . --pre -f https://download.pytorch.org/whl/nightly/cpu -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SRT/pip-release-links.html
        python process_skipfiles.py
-        pyinstaller .\apps\stable_diffusion\shark_sd.spec
+        $env:SHARK_PACKAGE_VERSION=${{ env.package_version }}
+        pip install -e .
+        pip freeze -l
+        pyinstaller .\apps\shark_studio\shark_studio_apionly.spec
        mv ./dist/nodai_shark_studio.exe ./dist/nodai_shark_studio_${{ env.package_version_ }}.exe
        signtool sign /f c:\g\shark_02152023.cer /fd certHash /csp "eToken Base Cryptographic Provider" /k "${{ secrets.CI_CERT }}" ./dist/nodai_shark_studio_${{ env.package_version_ }}.exe
-  
+
    - name: Upload Release Assets
      id: upload-release-assets
      uses: dwenegar/upload-release-assets@v1
@@ -74,80 +75,3 @@ jobs:
        GITHUB_TOKEN: ${{ secrets.NODAI_INVOCATION_TOKEN }}
      with:
        release_id: ${{ steps.create_release.outputs.id }}
-
-  linux-build:
-
-    runs-on: a100
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.11"]
-        backend: [IREE, SHARK]
-
-    steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
-      with:
-        python-version: ${{ matrix.python-version }}
-    
-    - name: Setup pip cache
-      uses: actions/cache@v3
-      with:
-        path: ~/.cache/pip
-        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
-        restore-keys: |
-          ${{ runner.os }}-pip-
-
-    - name: Install dependencies
-      run: |
-        echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
-        python -m pip install --upgrade pip
-        python -m pip install flake8 pytest toml
-        if [ -f requirements.txt ]; then pip install -r requirements.txt -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SRT/pip-release-links.html; fi
-    - name: Lint with flake8
-      run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude shark.venv,lit.cfg.py 
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude shark.venv,lit.cfg.py 
-    - name: Build and validate the IREE package
-      if: ${{ matrix.backend == 'IREE' }}
-      continue-on-error: true
-      run: |
-        cd $GITHUB_WORKSPACE
-        USE_IREE=1 VENV_DIR=iree.venv ./setup_venv.sh
-        source iree.venv/bin/activate
-        package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
-        SHARK_PACKAGE_VERSION=${package_version} \
-        pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://llvm.github.io/torch-mlir/package-index/ -f https://openxla.github.io/iree/pip-release-links.html
-        # Install the built wheel
-        pip install ./wheelhouse/nodai*
-        # Validate the Models
-        /bin/bash "$GITHUB_WORKSPACE/build_tools/populate_sharktank_ci.sh"
-        pytest --ci --ci_sha=${SHORT_SHA} --local_tank_cache="./gen_shark_tank/" -k "not metal" |
-          tail -n 1 |
-          tee -a pytest_results.txt
-        if !(grep -Fxq " failed" pytest_results.txt) 
-          then 
-            export SHA=$(git log -1 --format='%h')
-            gsutil -m cp -r $GITHUB_WORKSPACE/gen_shark_tank/* gs://shark_tank/${DATE}_$SHA
-            gsutil -m cp -r gs://shark_tank/${DATE}_$SHA/* gs://shark_tank/nightly/
-        fi
-        rm -rf ./wheelhouse/nodai*
-
-    - name: Build and validate the SHARK Runtime package
-      if: ${{ matrix.backend == 'SHARK' }}
-      run: |
-        cd $GITHUB_WORKSPACE
-        ./setup_venv.sh
-        source shark.venv/bin/activate
-        package_version="$(printf '%(%Y%m%d)T.${{ github.run_number }}')"
-        SHARK_PACKAGE_VERSION=${package_version} \
-        pip wheel -v -w wheelhouse . --pre -f https://download.pytorch.org/whl/nightly/torch -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SRT/pip-release-links.html
-        # Install the built wheel
-        pip install ./wheelhouse/nodai*
-        # Validate the Models
-        pytest --ci --ci_sha=${SHORT_SHA} -k "not metal" |
-          tail -n 1 |
-          tee -a pytest_results.txt
--- a/.github/workflows/test-studio.yml
+++ b/.github/workflows/test-studio.yml
@@ -81,6 +81,5 @@ jobs:
        source shark.venv/bin/activate
        pip install -r requirements.txt --no-cache-dir
        pip install -e .
-        pip uninstall -y torch
-        pip install torch==2.1.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
-        python apps/shark_studio/tests/api_test.py
+        # Disabled due to hang when exporting test llama2
+        # python apps/shark_studio/tests/api_test.py
--- a/.gitignore
+++ b/.gitignore
@@ -164,7 +164,7 @@ cython_debug/
 # vscode related
 .vscode

-# Shark related artefacts
+# Shark related artifacts
 *venv/
 shark_tmp/
 *.vmfb
@@ -172,6 +172,7 @@ shark_tmp/
 tank/dict_configs.py
 *.csv
 reproducers/
+apps/shark_studio/web/configs

 # ORT related artefacts
 cache_models/
@@ -188,6 +189,11 @@ variants.json
 # models folder
 apps/stable_diffusion/web/models/

+# model artifacts (SHARK)
+*.tempfile
+*.mlir
+*.vmfb
+
 # Stencil annotators.
 stencil_annotator/

--- a/README.md
+++ b/README.md
@@ -372,7 +372,7 @@ For a complete list of the models supported in SHARK, please refer to [tank/READ

 *   [Upstream IREE issues](https://github.com/google/iree/issues): Feature requests,
    bugs, and other work tracking
-*   [Upstream IREE Discord server](https://discord.gg/26P4xW4): Daily development
+*   [Upstream IREE Discord server](https://discord.gg/wEWh6Z9nMU): Daily development
    discussions with the core team and collaborators
 *   [iree-discuss email list](https://groups.google.com/forum/#!forum/iree-discuss):
    Announcements, general and low-priority discussion
--- a/apps/shark_studio/api/controlnet.py
+++ b/apps/shark_studio/api/controlnet.py
@@ -0,0 +1,107 @@
+# from turbine_models.custom_models.controlnet import control_adapter, preprocessors
+import os
+import PIL
+import numpy as np
+from apps.shark_studio.web.utils.file_utils import (
+    get_generated_imgs_path,
+)
+from datetime import datetime
+from PIL import Image
+from gradio.components.image_editor import (
+    EditorValue,
+)
+
+
+class control_adapter:
+    def __init__(
+        self,
+        model: str,
+    ):
+        self.model = None
+
+    def export_control_adapter_model(model_keyword):
+        return None
+
+    def export_xl_control_adapter_model(model_keyword):
+        return None
+
+
+class preprocessors:
+    def __init__(
+        self,
+        model: str,
+    ):
+        self.model = None
+
+    def export_controlnet_model(model_keyword):
+        return None
+
+
+control_adapter_map = {
+    "sd15": {
+        "canny": {"initializer": control_adapter.export_control_adapter_model},
+        "openpose": {"initializer": control_adapter.export_control_adapter_model},
+        "scribble": {"initializer": control_adapter.export_control_adapter_model},
+        "zoedepth": {"initializer": control_adapter.export_control_adapter_model},
+    },
+    "sdxl": {
+        "canny": {"initializer": control_adapter.export_xl_control_adapter_model},
+    },
+}
+preprocessor_model_map = {
+    "canny": {"initializer": preprocessors.export_controlnet_model},
+    "openpose": {"initializer": preprocessors.export_controlnet_model},
+    "scribble": {"initializer": preprocessors.export_controlnet_model},
+    "zoedepth": {"initializer": preprocessors.export_controlnet_model},
+}
+
+
+class PreprocessorModel:
+    def __init__(
+        self,
+        hf_model_id,
+        device="cpu",
+    ):
+        self.model = hf_model_id
+        self.device = device
+
+    def compile(self):
+        print("compile not implemented for preprocessor.")
+        return
+
+    def run(self, inputs):
+        print("run not implemented for preprocessor.")
+        return inputs
+
+
+def cnet_preview(model, input_image):
+    curr_datetime = datetime.now().strftime("%Y-%m-%d.%H-%M-%S")
+    control_imgs_path = os.path.join(get_generated_imgs_path(), "control_hints")
+    if not os.path.exists(control_imgs_path):
+        os.mkdir(control_imgs_path)
+    img_dest = os.path.join(control_imgs_path, model + curr_datetime + ".png")
+    match model:
+        case "canny":
+            canny = PreprocessorModel("canny")
+            result = canny(
+                np.array(input_image),
+                100,
+                200,
+            )
+            Image.fromarray(result).save(fp=img_dest)
+            return result, img_dest
+        case "openpose":
+            openpose = PreprocessorModel("openpose")
+            result = openpose(np.array(input_image))
+            Image.fromarray(result[0]).save(fp=img_dest)
+            return result, img_dest
+        case "zoedepth":
+            zoedepth = PreprocessorModel("ZoeDepth")
+            result = zoedepth(np.array(input_image))
+            Image.fromarray(result).save(fp=img_dest)
+            return result, img_dest
+        case "scribble":
+            input_image.save(fp=img_dest)
+            return input_image, img_dest
+        case _:
+            return None, None
--- a/apps/shark_studio/api/initializers.py
+++ b/apps/shark_studio/api/initializers.py
@@ -0,0 +1,130 @@
+import importlib
+import os
+import signal
+import sys
+import warnings
+import json
+from threading import Thread
+
+from apps.shark_studio.modules.timer import startup_timer
+
+from apps.shark_studio.web.utils.tmp_configs import (
+    config_tmp,
+    clear_tmp_mlir,
+    clear_tmp_imgs,
+    shark_tmp,
+)
+
+
+def imports():
+    import torch  # noqa: F401
+
+    startup_timer.record("import torch")
+    warnings.filterwarnings(
+        action="ignore", category=DeprecationWarning, module="torch"
+    )
+    warnings.filterwarnings(action="ignore", category=UserWarning, module="torchvision")
+    warnings.filterwarnings(action="ignore", category=UserWarning, module="torch")
+    warnings.filterwarnings(action="ignore", category=UserWarning, module="diffusers")
+    warnings.filterwarnings(action="ignore", category=FutureWarning, module="diffusers")
+    warnings.filterwarnings(
+        action="ignore", category=FutureWarning, module="huggingface-hub"
+    )
+    warnings.filterwarnings(
+        action="ignore", category=UserWarning, module="huggingface-hub"
+    )
+
+    # import gradio  # noqa: F401
+
+    # startup_timer.record("import gradio")
+
+    import apps.shark_studio.web.utils.globals as global_obj
+
+    global_obj._init()
+    startup_timer.record("initialize globals")
+
+    from apps.shark_studio.modules import (
+        img_processing,
+    )  # noqa: F401
+
+    startup_timer.record("other imports")
+
+
+def initialize():
+    configure_sigint_handler()
+    # Setup to use shark_tmp for gradio's temporary image files and clear any
+    # existing temporary images there if they exist. Then we can import gradio.
+    # It has to be in this order or gradio ignores what we've set up.
+
+    # config_tmp()
+    # clear_tmp_imgs()
+
+    from apps.shark_studio.web.utils.file_utils import (
+        create_model_folders,
+    )
+
+    # Create custom models folders if they don't exist
+    create_model_folders()
+
+    # initialize_rest(reload_script_modules=False)
+
+
+def initialize_rest(*, reload_script_modules=False):
+    """
+    Called both from initialize() and when reloading the webui.
+    """
+    # Keep this for adding reload options to the webUI.
+
+
+def dumpstacks():
+    import threading
+    import traceback
+
+    id2name = {th.ident: th.name for th in threading.enumerate()}
+    code = []
+    for threadId, stack in sys._current_frames().items():
+        code.append(f"\n# Thread: {id2name.get(threadId, '')}({threadId})")
+        for filename, lineno, name, line in traceback.extract_stack(stack):
+            code.append(f"""File: "{filename}", line {lineno}, in {name}""")
+            if line:
+                code.append("  " + line.strip())
+    with open(os.path.join(shark_tmp, "stack_dump.log"), "w") as f:
+        f.write("\n".join(code))
+
+
+def setup_middleware(app):
+    from starlette.middleware.gzip import GZipMiddleware
+
+    app.middleware_stack = (
+        None  # reset current middleware to allow modifying user provided list
+    )
+    app.add_middleware(GZipMiddleware, minimum_size=1000)
+    configure_cors_middleware(app)
+    app.build_middleware_stack()  # rebuild middleware stack on-the-fly
+
+
+def configure_cors_middleware(app):
+    from starlette.middleware.cors import CORSMiddleware
+    from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+
+    cors_options = {
+        "allow_methods": ["*"],
+        "allow_headers": ["*"],
+        "allow_credentials": True,
+    }
+    if cmd_opts.api_accept_origin:
+        cors_options["allow_origins"] = cmd_opts.api_accept_origin.split(",")
+
+    app.add_middleware(CORSMiddleware, **cors_options)
+
+
+def configure_sigint_handler():
+    # make the program just exit at ctrl+c without waiting for anything
+    def sigint_handler(sig, frame):
+        print(f"Interrupted with signal {sig} in {frame}")
+
+        dumpstacks()
+
+        os._exit(0)
+
+    signal.signal(signal.SIGINT, sigint_handler)
--- a/apps/shark_studio/api/llm.py
+++ b/apps/shark_studio/api/llm.py
@@ -3,7 +3,13 @@ from turbine_models.model_runner import vmfbRunner
 from turbine_models.gen_external_params.gen_external_params import gen_external_params
 import time
 from shark.iree_utils.compile_utils import compile_module_to_flatbuffer
-from apps.shark_studio.web.utils import get_resource_path
+from apps.shark_studio.web.utils.file_utils import (
+    get_resource_path,
+    get_checkpoints_path,
+)
+from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+from apps.shark_studio.api.utils import parse_device
+from urllib.request import urlopen
 import iree.runtime as ireert
 from itertools import chain
 import gc
@@ -12,7 +18,7 @@ import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM

 llm_model_map = {
-    "llama2_7b": {
+    "meta-llama/Llama-2-7b-chat-hf": {
        "initializer": stateless_llama.export_transformer_model,
        "hf_model_name": "meta-llama/Llama-2-7b-chat-hf",
        "compile_flags": ["--iree-opt-const-expr-hoisting=False"],
@@ -64,6 +70,7 @@ class LanguageModel:
        use_system_prompt=True,
        streaming_llm=False,
    ):
+        _, _, self.triple = parse_device(device)
        self.hf_model_name = llm_model_map[model_name]["hf_model_name"]
        self.device = device.split("=>")[-1].strip()
        self.backend = self.device.split("://")[0]
@@ -88,21 +95,29 @@ class LanguageModel:
        if self.quantization != "None":
            self.file_spec += "_" + self.quantization

-        if external_weights is not None:
+        if external_weights in ["safetensors", "gguf"]:
            self.external_weight_file = get_resource_path(
-                self.file_spec + "." + external_weights
+                os.path.join("..", self.file_spec + "." + external_weights)
            )
+        else:
+            self.external_weights = None
+            self.external_weight_file = None

        if streaming_llm:
            # Add streaming suffix to file spec after setting external weights filename.
            self.file_spec += "_streaming"
        self.streaming_llm = streaming_llm

-        self.tempfile_name = get_resource_path(f"{self.file_spec}.tempfile")
-        # TODO: Tag vmfb with target triple of device instead of HAL backend
-        self.vmfb_name = get_resource_path(
-            f"{self.file_spec}_{self.backend}.vmfb.tempfile"
+        self.tempfile_name = get_resource_path(
+            os.path.join("..", f"{self.file_spec}.tempfile")
        )
+        # TODO: Tag vmfb with target triple of device instead of HAL backend
+        self.vmfb_name = str(
+            get_resource_path(
+                os.path.join("..", f"{self.file_spec}_{self.backend}.vmfb.tempfile")
+            )
+        )
+
        self.max_tokens = llm_model_map[model_name]["max_tokens"]
        self.iree_module_dict = None
        self.use_system_prompt = use_system_prompt
@@ -126,6 +141,8 @@ class LanguageModel:
                print(
                    f"External weight file {self.external_weight_file} found for {self.vmfb_name}"
                )
+            self.external_weight_file = str(self.external_weight_file)
+
        if os.path.exists(self.vmfb_name) and (
            external_weights is None or os.path.exists(str(self.external_weight_file))
        ):
@@ -144,7 +161,9 @@ class LanguageModel:
                use_auth_token=hf_auth_token,
            )
        elif not os.path.exists(self.tempfile_name):
-            self.torch_ir, self.tokenizer = llm_model_map[model_name]["initializer"](
+            self.torch_ir, self.tokenizer = llm_model_map[self.hf_model_name][
+                "initializer"
+            ](
                self.hf_model_name,
                hf_auth_token,
                compile_to="torch",
@@ -152,6 +171,7 @@ class LanguageModel:
                precision=self.precision,
                quantization=self.quantization,
                streaming_llm=self.streaming_llm,
+                decomp_attn=True,
            )
            with open(self.tempfile_name, "w+") as f:
                f.write(self.torch_ir)
@@ -181,11 +201,27 @@ class LanguageModel:
            )
        elif self.backend == "vulkan":
            flags.extend(["--iree-stream-resource-max-allocation-size=4294967296"])
+        elif self.backend == "rocm":
+            flags.extend(
+                [
+                    "--iree-codegen-llvmgpu-enable-transform-dialect-jit=false",
+                    "--iree-llvmgpu-enable-prefetch=true",
+                    "--iree-opt-outer-dim-concat=true",
+                    "--iree-flow-enable-aggressive-fusion",
+                ]
+            )
+            if "gfx9" in self.triple:
+                flags.extend(
+                    [
+                        f"--iree-codegen-transform-dialect-library={get_mfma_spec_path(self.triple, get_checkpoints_path())}",
+                        "--iree-codegen-llvmgpu-use-vector-distribution=true",
+                    ]
+                )
        flags.extend(llm_model_map[self.hf_model_name]["compile_flags"])
        flatbuffer_blob = compile_module_to_flatbuffer(
            self.tempfile_name,
            device=self.device,
-            frontend="torch",
+            frontend="auto",
            model_config_path=None,
            extra_args=flags,
            write_to=self.vmfb_name,
@@ -209,10 +245,8 @@ class LanguageModel:
        prompt = prompt.replace("\r", " ")
        if self.use_system_prompt and self.global_iter == 0:
            prompt = append_user_prompt(DEFAULT_CHAT_SYS_PROMPT, prompt)
-            print(prompt)
            return prompt
        else:
-            print(prompt)
            return f"{B_INST} {prompt} {E_INST}"

    def chat(self, prompt):
@@ -248,7 +282,10 @@ class LanguageModel:
                token_len += 1

            history.append(format_out(token))
-            while format_out(token) != llm_model_map["llama2_7b"]["stop_token"]:
+            while (
+                format_out(token) != llm_model_map[self.hf_model_name]["stop_token"]
+                and len(history) < self.max_tokens
+            ):
                dec_time = time.time()
                if self.streaming_llm and self.model["get_seq_step"]() > 600:
                    print("Evicting cache space!")
@@ -260,7 +297,7 @@ class LanguageModel:

            self.prev_token_len = token_len + len(history)

-            if format_out(token) == llm_model_map["llama2_7b"]["stop_token"]:
+            if format_out(token) == llm_model_map[self.hf_model_name]["stop_token"]:
                break

        for i in range(len(history)):
@@ -294,7 +331,7 @@ class LanguageModel:
                self.first_input = False

            history.append(int(token))
-            while token != llm_model_map["llama2_7b"]["stop_token"]:
+            while token != llm_model_map[self.hf_model_name]["stop_token"]:
                dec_time = time.time()
                result = self.hf_mod(token.reshape([1, 1]), past_key_values=pkv)
                history.append(int(token))
@@ -305,7 +342,7 @@ class LanguageModel:

            self.prev_token_len = token_len + len(history)

-            if token == llm_model_map["llama2_7b"]["stop_token"]:
+            if token == llm_model_map[self.hf_model_name]["stop_token"]:
                break
        for i in range(len(history)):
            if type(history[i]) != int:
@@ -315,6 +352,116 @@ class LanguageModel:
        return result_output, total_time


+def get_mfma_spec_path(target_chip, save_dir):
+    url = "https://raw.githubusercontent.com/iree-org/iree/main/build_tools/pkgci/external_test_suite/attention_and_matmul_spec.mlir"
+    attn_spec = urlopen(url).read().decode("utf-8")
+    spec_path = os.path.join(save_dir, "attention_and_matmul_spec_mfma.mlir")
+    if os.path.exists(spec_path):
+        return spec_path
+    with open(spec_path, "w") as f:
+        f.write(attn_spec)
+    return spec_path
+
+
+def llm_chat_api(InputData: dict):
+    from datetime import datetime as dt
+
+    import apps.shark_studio.web.utils.globals as global_obj
+
+    print(f"Input keys : {InputData.keys()}")
+
+    # print(f"model : {InputData['model']}")
+
+    is_chat_completion_api = (
+        "messages" in InputData.keys()
+    )  # else it is the legacy `completion` api
+
+    # For Debugging input data from API
+    if is_chat_completion_api:
+        print(f"message -> role : {InputData['messages'][0]['role']}")
+        print(f"message -> content : {InputData['messages'][0]['content']}")
+    else:
+        print(f"prompt : {InputData['prompt']}")
+
+    model_name = (
+        InputData["model"]
+        if "model" in InputData.keys()
+        else "meta-llama/Llama-2-7b-chat-hf"
+    )
+    model_path = llm_model_map[model_name]
+    device = InputData["device"] if "device" in InputData.keys() else "cpu"
+    precision = "fp16"
+    max_tokens = InputData["max_tokens"] if "max_tokens" in InputData.keys() else 4096
+
+    device_id = None
+    if not global_obj.get_llm_obj():
+        print("\n[LOG] Initializing new pipeline...")
+        global_obj.clear_cache()
+        gc.collect()
+        if "cuda" in device:
+            device = "cuda"
+        elif "vulkan" in device:
+            device_id = int(device.split("://")[1])
+            device = "vulkan"
+        elif "cpu" in device:
+            device = "cpu"
+            precision = "fp32"
+        else:
+            print("unrecognized device")
+        llm_model = LanguageModel(
+            model_name=model_name,
+            hf_auth_token=cmd_opts.hf_auth_token,
+            device=device,
+            quantization=cmd_opts.quantization,
+            external_weights="safetensors",
+            use_system_prompt=True,
+            streaming_llm=False,
+        )
+        global_obj.set_llm_obj(llm_model)
+    else:
+        llm_model = global_obj.get_llm_obj()
+
+    llm_model.max_tokens = max_tokens
+    # TODO: add role dict for different models
+    if is_chat_completion_api:
+        # TODO: add funtionality for multiple messages
+        prompt = append_user_prompt(
+            InputData["messages"][0]["role"], InputData["messages"][0]["content"]
+        )
+    else:
+        prompt = InputData["prompt"]
+    print("prompt = ", prompt)
+
+    for res_op, _ in llm_model.chat(prompt):
+        if is_chat_completion_api:
+            choices = [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": res_op,  # since we are yeilding the result
+                    },
+                    "finish_reason": "stop",  # or length
+                }
+            ]
+        else:
+            choices = [
+                {
+                    "text": res_op,
+                    "index": 0,
+                    "logprobs": None,
+                    "finish_reason": "stop",  # or length
+                }
+            ]
+    end_time = dt.now().strftime("%Y%m%d%H%M%S%f")
+    return {
+        "id": end_time,
+        "object": "chat.completion" if is_chat_completion_api else "text_completion",
+        "created": int(end_time),
+        "choices": choices,
+    }
+
+
 if __name__ == "__main__":
    lm = LanguageModel(
        "Trelis/Llama-2-7b-chat-hf-function-calling-v2",
--- a/apps/shark_studio/api/sd.py
+++ b/apps/shark_studio/api/sd.py
@@ -0,0 +1,579 @@
+import gc
+import torch
+import gradio as gr
+import time
+import os
+import json
+import numpy as np
+import copy
+import importlib.util
+import sys
+from tqdm.auto import tqdm
+
+from pathlib import Path
+from random import randint
+
+
+from apps.shark_studio.api.controlnet import control_adapter_map
+from apps.shark_studio.api.utils import parse_device
+from apps.shark_studio.web.utils.state import status_label
+from apps.shark_studio.web.utils.file_utils import (
+    safe_name,
+    get_resource_path,
+    get_checkpoints_path,
+)
+
+from apps.shark_studio.modules.img_processing import (
+    save_output_img,
+)
+
+
+from subprocess import check_output
+
+EMPTY_SD_MAP = {
+    "clip": None,
+    "scheduler": None,
+    "unet": None,
+    "vae_decode": None,
+}
+
+EMPTY_SDXL_MAP = {
+    "prompt_encoder": None,
+    "scheduled_unet": None,
+    "vae_decode": None,
+    "pipeline": None,
+    "full_pipeline": None,
+}
+
+EMPTY_FLAGS = {
+    "clip": None,
+    "unet": None,
+    "vae": None,
+    "pipeline": None,
+}
+
+
+def load_script(source, module_name):
+    """
+    reads file source and loads it as a module
+
+    :param source: file to load
+    :param module_name: name of module to register in sys.modules
+    :return: loaded module
+    """
+    spec = importlib.util.spec_from_file_location(module_name, source)
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)
+
+    return module
+
+
+class StableDiffusion:
+    # This class is responsible for executing image generation and creating
+    # /managing a set of compiled modules to run Stable Diffusion. The init
+    # aims to be as general as possible, and the class will infer and compile
+    # a list of necessary modules or a combined "pipeline module" for a
+    # specified job based on the inference task.
+
+    def __init__(
+        self,
+        base_model_id,
+        height: int,
+        width: int,
+        batch_size: int,
+        steps: int,
+        scheduler: str,
+        precision: str,
+        device: str,
+        target_triple: str = None,
+        custom_vae: str = None,
+        num_loras: int = 0,
+        import_ir: bool = True,
+        is_controlled: bool = False,
+        external_weights: str = "safetensors",
+        progress=gr.Progress(),
+    ):
+        progress(0, desc="Initializing pipeline...")
+        self.ui_device = device
+        self.precision = precision
+        self.compiled_pipeline = False
+        self.base_model_id = base_model_id
+        self.custom_vae = custom_vae
+        self.is_sdxl = "xl" in self.base_model_id.lower()
+        self.is_custom = ".py" in self.base_model_id.lower()
+        if self.is_custom:
+            custom_module = load_script(
+                os.path.join(get_checkpoints_path("scripts"), self.base_model_id),
+                "custom_pipeline",
+            )
+            self.turbine_pipe = custom_module.StudioPipeline
+            self.dynamic_steps = False
+            self.model_map = custom_module.MODEL_MAP
+        elif self.is_sdxl:
+            from turbine_models.custom_models.sdxl_inference.sdxl_compiled_pipeline import (
+                SharkSDXLPipeline,
+            )
+
+            self.turbine_pipe = SharkSDXLPipeline
+            self.dynamic_steps = False
+            self.model_map = EMPTY_SDXL_MAP
+        else:
+            from turbine_models.custom_models.sd_inference.sd_pipeline import (
+                SharkSDPipeline,
+            )
+
+            self.turbine_pipe = SharkSDPipeline
+            self.dynamic_steps = True
+            self.model_map = EMPTY_SD_MAP
+        max_length = 64
+        target_backend, self.rt_device, triple = parse_device(device, target_triple)
+        pipe_id_list = [
+            safe_name(base_model_id),
+            str(batch_size),
+            str(max_length),
+            f"{str(height)}x{str(width)}",
+            precision,
+            triple,
+        ]
+        if num_loras > 0:
+            pipe_id_list.append(str(num_loras) + "lora")
+        if is_controlled:
+            pipe_id_list.append("controlled")
+        if custom_vae:
+            pipe_id_list.append(custom_vae)
+        self.pipe_id = "_".join(pipe_id_list)
+        self.pipeline_dir = Path(os.path.join(get_checkpoints_path(), self.pipe_id))
+        self.weights_path = Path(
+            os.path.join(
+                get_checkpoints_path(), safe_name(self.base_model_id + "_" + precision)
+            )
+        )
+        if not os.path.exists(self.weights_path):
+            os.mkdir(self.weights_path)
+
+        decomp_attn = True
+        attn_spec = None
+        if triple in ["gfx940", "gfx942", "gfx90a"]:
+            decomp_attn = False
+            attn_spec = "mfma"
+        elif triple in ["gfx1100", "gfx1103", "gfx1150"]:
+            decomp_attn = False
+            attn_spec = "wmma"
+            if triple in ["gfx1103", "gfx1150"]:
+                # external weights have issues on igpu
+                external_weights = None
+        elif target_backend == "llvm-cpu":
+            decomp_attn = False
+        progress(0.5, desc="Initializing pipeline...")
+        self.sd_pipe = self.turbine_pipe(
+            hf_model_name=base_model_id,
+            scheduler_id=scheduler,
+            height=height,
+            width=width,
+            precision=precision,
+            max_length=max_length,
+            batch_size=batch_size,
+            num_inference_steps=steps,
+            device=target_backend,
+            iree_target_triple=triple,
+            ireec_flags=EMPTY_FLAGS,
+            attn_spec=attn_spec,
+            decomp_attn=decomp_attn,
+            pipeline_dir=self.pipeline_dir,
+            external_weights_dir=self.weights_path,
+            external_weights=external_weights,
+            custom_vae=custom_vae,
+        )
+        progress(1, desc="Pipeline initialized!...")
+        gc.collect()
+
+    def prepare_pipe(
+        self,
+        custom_weights,
+        adapters,
+        embeddings,
+        is_img2img,
+        compiled_pipeline,
+        progress=gr.Progress(),
+    ):
+        progress(0, desc="Preparing models...")
+
+        self.is_img2img = False
+        mlirs = copy.deepcopy(self.model_map)
+        vmfbs = copy.deepcopy(self.model_map)
+        weights = copy.deepcopy(self.model_map)
+        if not self.is_sdxl:
+            compiled_pipeline = False
+        self.compiled_pipeline = compiled_pipeline
+
+        if custom_weights:
+            from apps.shark_studio.modules.ckpt_processing import (
+                preprocessCKPT,
+                save_irpa,
+            )
+
+            custom_weights = os.path.join(
+                get_checkpoints_path("checkpoints"),
+                safe_name(self.base_model_id.split("/")[-1]),
+                custom_weights,
+            )
+            diffusers_weights_path = preprocessCKPT(custom_weights, self.precision)
+            for key in weights:
+                if key in ["scheduled_unet", "unet"]:
+                    unet_weights_path = os.path.join(
+                        diffusers_weights_path,
+                        "unet",
+                        "diffusion_pytorch_model.safetensors",
+                    )
+                    weights[key] = save_irpa(unet_weights_path, "unet.")
+                if key in ["mmdit"]:
+                    mmdit_weights_path = os.path.join(
+                        diffusers_weights_path,
+                        "mmdit",
+                        "diffusion_pytorch_model_fp16.safetensors",
+                    )
+                    weights[key] = save_irpa(mmdit_weights_path, "mmdit.")
+                elif key in ["clip", "prompt_encoder", "text_encoder"]:
+                    if not self.is_sdxl and not self.is_custom:
+                        sd1_path = os.path.join(
+                            diffusers_weights_path, "text_encoder", "model.safetensors"
+                        )
+                        weights[key] = save_irpa(sd1_path, "text_encoder_model.")
+                    elif self.is_sdxl:
+                        clip_1_path = os.path.join(
+                            diffusers_weights_path, "text_encoder", "model.safetensors"
+                        )
+                        clip_2_path = os.path.join(
+                            diffusers_weights_path,
+                            "text_encoder_2",
+                            "model.safetensors",
+                        )
+                        weights[key] = [
+                            save_irpa(clip_1_path, "text_encoder_model_1."),
+                            save_irpa(clip_2_path, "text_encoder_model_2."),
+                        ]
+                    elif self.is_custom:
+                        clip_g_path = os.path.join(
+                            diffusers_weights_path,
+                            "text_encoder",
+                            "model.fp16.safetensors",
+                        )
+                        clip_l_path = os.path.join(
+                            diffusers_weights_path,
+                            "text_encoder_2",
+                            "model.fp16.safetensors",
+                        )
+                        t5xxl_path = os.path.join(
+                            diffusers_weights_path,
+                            "text_encoder_3",
+                            "model.fp16.safetensors",
+                        )
+                        weights[key] = [
+                            save_irpa(clip_g_path, "clip_g.transformer."),
+                            save_irpa(clip_l_path, "clip_l.transformer."),
+                            save_irpa(t5xxl_path, "t5xxl.transformer."),
+                        ]
+                elif key in ["vae_decode"] and weights[key] is None:
+                    vae_weights_path = os.path.join(
+                        diffusers_weights_path,
+                        "vae",
+                        "diffusion_pytorch_model.safetensors",
+                    )
+                    weights[key] = save_irpa(vae_weights_path, "vae.")
+
+        progress(0.25, desc=f"Preparing pipeline for {self.ui_device}...")
+
+        vmfbs, weights = self.sd_pipe.check_prepared(
+            mlirs, vmfbs, weights, interactive=False
+        )
+        progress(0.5, desc=f"Artifacts ready!")
+        progress(0.75, desc=f"Loading models and weights...")
+
+        self.sd_pipe.load_pipeline(
+            vmfbs, weights, self.rt_device, self.compiled_pipeline
+        )
+        progress(1, desc="Pipeline loaded! Generating images...")
+        return
+
+    def generate_images(
+        self,
+        prompt,
+        negative_prompt,
+        image,
+        strength,
+        guidance_scale,
+        seed,
+        ondemand,
+        resample_type,
+        control_mode,
+        hints,
+        progress=gr.Progress(),
+    ):
+
+        img = self.sd_pipe.generate_images(
+            prompt,
+            negative_prompt,
+            1,
+            guidance_scale,
+            seed,
+            return_imgs=True,
+        )
+        return img
+
+
+def shark_sd_fn(
+    prompt,
+    negative_prompt,
+    sd_init_image: list,
+    height: int,
+    width: int,
+    steps: int,
+    strength: float,
+    guidance_scale: float,
+    seed: list,
+    batch_count: int,
+    batch_size: int,
+    scheduler: str,
+    base_model_id: str,
+    custom_weights: str,
+    custom_vae: str,
+    precision: str,
+    device: str,
+    target_triple: str,
+    ondemand: bool,
+    compiled_pipeline: bool,
+    resample_type: str,
+    controlnets: dict,
+    embeddings: dict,
+    seed_increment: str | int = 1,
+    output_type: str = "png",
+    # progress=gr.Progress(),
+):
+    sd_kwargs = locals()
+    if not isinstance(sd_init_image, list):
+        sd_init_image = [sd_init_image]
+    is_img2img = True if sd_init_image[0] is not None else False
+
+    from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+    import apps.shark_studio.web.utils.globals as global_obj
+
+    adapters = {}
+    is_controlled = False
+    control_mode = None
+    hints = []
+    num_loras = 0
+    import_ir = True
+    for i in embeddings:
+        num_loras += 1 if embeddings[i] else 0
+    if "model" in controlnets:
+        for i, model in enumerate(controlnets["model"]):
+            if "xl" not in base_model_id.lower():
+                adapters[f"control_adapter_{model}"] = {
+                    "hf_id": control_adapter_map["runwayml/stable-diffusion-v1-5"][
+                        model
+                    ],
+                    "strength": controlnets["strength"][i],
+                }
+            else:
+                adapters[f"control_adapter_{model}"] = {
+                    "hf_id": control_adapter_map["stabilityai/stable-diffusion-xl-1.0"][
+                        model
+                    ],
+                    "strength": controlnets["strength"][i],
+                }
+            if model is not None:
+                is_controlled = True
+        control_mode = controlnets["control_mode"]
+        for i in controlnets["hint"]:
+            hints.append[i]
+
+    submit_pipe_kwargs = {
+        "base_model_id": base_model_id,
+        "height": height,
+        "width": width,
+        "batch_size": batch_size,
+        "precision": precision,
+        "device": device,
+        "target_triple": target_triple,
+        "custom_vae": custom_vae,
+        "num_loras": num_loras,
+        "import_ir": import_ir,
+        "is_controlled": is_controlled,
+        "steps": steps,
+        "scheduler": scheduler,
+    }
+    submit_prep_kwargs = {
+        "custom_weights": custom_weights,
+        "adapters": adapters,
+        "embeddings": embeddings,
+        "is_img2img": is_img2img,
+        "compiled_pipeline": compiled_pipeline,
+    }
+    submit_run_kwargs = {
+        "prompt": prompt,
+        "negative_prompt": negative_prompt,
+        "image": sd_init_image,
+        "strength": strength,
+        "guidance_scale": guidance_scale,
+        "seed": seed,
+        "ondemand": ondemand,
+        "resample_type": resample_type,
+        "control_mode": control_mode,
+        "hints": hints,
+    }
+    if global_obj.get_sd_obj() and global_obj.get_sd_obj().dynamic_steps:
+        submit_run_kwargs["steps"] = submit_pipe_kwargs["steps"]
+        submit_pipe_kwargs.pop("steps")
+    if (
+        not global_obj.get_sd_obj()
+        or global_obj.get_pipe_kwargs() != submit_pipe_kwargs
+    ):
+        print("\n[LOG] Initializing new pipeline...")
+        global_obj.clear_cache()
+        gc.collect()
+
+        # Initializes the pipeline and retrieves IR based on all
+        # parameters that are static in the turbine output format,
+        # which is currently MLIR in the torch dialect.
+
+        sd_pipe = StableDiffusion(
+            **submit_pipe_kwargs,
+        )
+        global_obj.set_sd_obj(sd_pipe)
+        global_obj.set_pipe_kwargs(submit_pipe_kwargs)
+    if (
+        not global_obj.get_prep_kwargs()
+        or global_obj.get_prep_kwargs() != submit_prep_kwargs
+    ):
+        global_obj.set_prep_kwargs(submit_prep_kwargs)
+        global_obj.get_sd_obj().prepare_pipe(**submit_prep_kwargs)
+
+    generated_imgs = []
+    if submit_run_kwargs["seed"] in [-1, "-1"]:
+        submit_run_kwargs["seed"] = randint(0, 4294967295)
+        seed_increment = "random"
+        # print(f"\n[LOG] Random seed: {seed}")
+    # progress(None, desc=f"Generating...")
+
+    for current_batch in range(batch_count):
+        start_time = time.time()
+        out_imgs = global_obj.get_sd_obj().generate_images(**submit_run_kwargs)
+        if not isinstance(out_imgs, list):
+            out_imgs = [out_imgs]
+        # total_time = time.time() - start_time
+        # text_output = f"Total image(s) generation time: {total_time:.4f}sec"
+        # print(f"\n[LOG] {text_output}")
+        # if global_obj.get_sd_status() == SD_STATE_CANCEL:
+        #     break
+        # else:
+        for batch in range(batch_size):
+            if output_type == "png":
+                save_output_img(
+                    out_imgs[batch],
+                    seed,
+                    sd_kwargs,
+                )
+        generated_imgs.extend(out_imgs)
+
+        yield generated_imgs, status_label(
+            "Stable Diffusion", current_batch + 1, batch_count, batch_size
+        )
+        if batch_count > 1:
+            submit_run_kwargs["seed"] = get_next_seed(seed, seed_increment)
+
+    return (generated_imgs, "")
+
+
+def shark_sd_fn_dict_input(sd_kwargs: dict, *, progress=gr.Progress()):
+    print("\n[LOG] Submitting Request...")
+
+    for key in sd_kwargs:
+        if sd_kwargs[key] in [None, []]:
+            sd_kwargs[key] = None
+        if sd_kwargs[key] in ["None"]:
+            sd_kwargs[key] = ""
+        if key in ["steps", "height", "width", "batch_count", "batch_size"]:
+            sd_kwargs[key] = int(sd_kwargs[key])
+        if key == "seed":
+            sd_kwargs[key] = int(sd_kwargs[key])
+
+    # TODO: move these checks into the UI code so we don't have gradio warnings in a generalized dict input function.
+    if not sd_kwargs["device"]:
+        gr.Warning("No device specified. Please specify a device.")
+        return None, ""
+    if sd_kwargs["height"] not in [512, 1024]:
+        gr.Warning("Height must be 512 or 1024. This is a temporary limitation.")
+        return None, ""
+    if sd_kwargs["height"] != sd_kwargs["width"]:
+        gr.Warning("Height and width must be the same. This is a temporary limitation.")
+        return None, ""
+    if sd_kwargs["base_model_id"] == "stabilityai/sdxl-turbo":
+        if sd_kwargs["steps"] > 10:
+            gr.Warning("Max steps for sdxl-turbo is 10. 1 to 4 steps are recommended.")
+            return None, ""
+        if sd_kwargs["guidance_scale"] > 3:
+            gr.Warning(
+                "sdxl-turbo CFG scale should be less than 2.0 if using negative prompt, 0 otherwise."
+            )
+            return None, ""
+    if sd_kwargs["target_triple"] == "":
+        if not parse_device(sd_kwargs["device"], sd_kwargs["target_triple"])[2]:
+            gr.Warning(
+                "Target device architecture could not be inferred. Please specify a target triple, e.g. 'gfx1100' for a Radeon 7900xtx."
+            )
+            return None, ""
+
+    generated_imgs = yield from shark_sd_fn(**sd_kwargs)
+    return generated_imgs
+
+
+def get_next_seed(seed, seed_increment: str | int = 10):
+    if isinstance(seed_increment, int):
+        # print(f"\n[LOG] Seed after batch increment: {seed + seed_increment}")
+        return int(seed + seed_increment)
+    elif seed_increment == "random":
+        seed = randint(0, 4294967295)
+        # print(f"\n[LOG] Random seed: {seed}")
+        return seed
+
+
+def unload_sd():
+    print("Unloading models.")
+    import apps.shark_studio.web.utils.globals as global_obj
+
+    global_obj.clear_cache()
+    gc.collect()
+
+
+def cancel_sd():
+    print("Inject call to cancel longer API calls.")
+    return
+
+
+def view_json_file(file_path):
+    content = ""
+    with open(file_path, "r") as fopen:
+        content = fopen.read()
+    return content
+
+
+def safe_name(name):
+    return name.replace("/", "_").replace("\\", "_").replace(".", "_")
+
+
+if __name__ == "__main__":
+    from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+    import apps.shark_studio.web.utils.globals as global_obj
+
+    global_obj._init()
+
+    sd_json = view_json_file(
+        get_resource_path(os.path.join(cmd_opts.config_dir, cmd_opts.default_config))
+    )
+    sd_kwargs = json.loads(sd_json)
+    # for arg in vars(cmd_opts):
+    #     if arg in sd_kwargs:
+    #         sd_kwargs[arg] = getattr(cmd_opts, arg)
+    for i in shark_sd_fn_dict_input(sd_kwargs):
+        print(i)
--- a/apps/shark_studio/api/utils.py
+++ b/apps/shark_studio/api/utils.py
@@ -8,21 +8,65 @@ from random import (
 )

 from pathlib import Path
-
-# from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
 from cpuinfo import get_cpu_info

-# TODO: migrate these utils to studio
-from shark.iree_utils.vulkan_utils import (
-    set_iree_vulkan_runtime_flags,
-    get_vulkan_target_triple,
-    get_iree_vulkan_runtime_flags,
-)
+
+def iree_device_map(device):
+    uri_parts = device.split("://", 2)
+    iree_driver = (
+        _IREE_DEVICE_MAP[uri_parts[0]]
+        if uri_parts[0] in _IREE_DEVICE_MAP
+        else uri_parts[0]
+    )
+    if len(uri_parts) == 1:
+        return iree_driver
+    elif "rocm" in uri_parts:
+        return "rocm"
+    else:
+        return f"{iree_driver}://{uri_parts[1]}"
+
+
+def get_supported_device_list():
+    return list(_IREE_DEVICE_MAP.keys())
+
+
+_IREE_DEVICE_MAP = {
+    "cpu": "local-task",
+    "cpu-task": "local-task",
+    "cpu-sync": "local-sync",
+    "cuda": "cuda",
+    "vulkan": "vulkan",
+    "metal": "metal",
+    "rocm": "rocm",
+    "hip": "hip",
+    "intel-gpu": "level_zero",
+}
+
+
+def iree_target_map(device):
+    if "://" in device:
+        device = device.split("://")[0]
+    return _IREE_TARGET_MAP[device] if device in _IREE_TARGET_MAP else device
+
+
+_IREE_TARGET_MAP = {
+    "cpu": "llvm-cpu",
+    "cpu-task": "llvm-cpu",
+    "cpu-sync": "llvm-cpu",
+    "cuda": "cuda",
+    "vulkan": "vulkan-spirv",
+    "metal": "metal",
+    "rocm": "rocm",
+    "hip": "rocm",
+    "intel-gpu": "opencl-spirv",
+}


 def get_available_devices():
+    return ["rocm", "cpu"]
+
    def get_devices_by_name(driver_name):
-        from shark.iree_utils._common import iree_device_map

        device_list = []
        try:
@@ -50,41 +94,119 @@ def get_available_devices():
                        device_list.append(f"{device_name} => {driver_name}://{i}")
        return device_list

-    set_iree_runtime_flags()
+    # set_iree_runtime_flags()

    available_devices = []
-    from shark.iree_utils.vulkan_utils import (
-        get_all_vulkan_devices,
-    )
-
-    vulkaninfo_list = get_all_vulkan_devices()
-    vulkan_devices = []
-    id = 0
-    for device in vulkaninfo_list:
-        vulkan_devices.append(f"{device.strip()} => vulkan://{id}")
-        id += 1
-    if id != 0:
-        print(f"vulkan devices are available.")
-    available_devices.extend(vulkan_devices)
-    metal_devices = get_devices_by_name("metal")
-    available_devices.extend(metal_devices)
-    cuda_devices = get_devices_by_name("cuda")
-    available_devices.extend(cuda_devices)
    rocm_devices = get_devices_by_name("rocm")
    available_devices.extend(rocm_devices)
-    cpu_device = get_devices_by_name("cpu-sync")
-    available_devices.extend(cpu_device)
+    # cpu_device = get_devices_by_name("cpu-sync")
+    # available_devices.extend(cpu_device)
    cpu_device = get_devices_by_name("cpu-task")
    available_devices.extend(cpu_device)
+
+    # from shark.iree_utils.vulkan_utils import (
+    #     get_all_vulkan_devices,
+    # )
+
+    # vulkaninfo_list = get_all_vulkan_devices()
+    # vulkan_devices = []
+    # id = 0
+    # for device in vulkaninfo_list:
+    #     vulkan_devices.append(f"{device.strip()} => vulkan://{id}")
+    #     id += 1
+    # if id != 0:
+    #     print(f"vulkan devices are available.")
+
+    # available_devices.extend(vulkan_devices)
+    # metal_devices = get_devices_by_name("metal")
+    # available_devices.extend(metal_devices)
+    # cuda_devices = get_devices_by_name("cuda")
+    # available_devices.extend(cuda_devices)
+    # hip_devices = get_devices_by_name("hip")
+    # available_devices.extend(hip_devices)
+
+    for idx, device_str in enumerate(available_devices):
+        if "AMD Radeon(TM) Graphics =>" in device_str:
+            igpu_id_candidates = [
+                x.split("w/")[-1].split("=>")[0]
+                for x in available_devices
+                if "M Graphics" in x
+            ]
+            for igpu_name in igpu_id_candidates:
+                if igpu_name:
+                    available_devices[idx] = device_str.replace(
+                        "AMD Radeon(TM) Graphics", igpu_name
+                    )
+                break
    return available_devices


-def set_iree_runtime_flags():
-    # TODO: This function should be device-agnostic and piped properly
-    # to general runtime driver init.
-    vulkan_runtime_flags = get_iree_vulkan_runtime_flags()
+def clean_device_info(raw_device):
+    # return appropriate device and device_id for consumption by Studio pipeline
+    # Multiple devices only supported for vulkan and rocm (as of now).
+    # default device must be selected for all others

-    set_iree_vulkan_runtime_flags(flags=vulkan_runtime_flags)
+    device_id = None
+    device = raw_device if "=>" not in raw_device else raw_device.split("=>")[1].strip()
+    if "://" in device:
+        device, device_id = device.split("://")
+        if len(device_id) <= 2:
+            device_id = int(device_id)
+
+    if device not in ["hip", "rocm", "vulkan"]:
+        device_id = None
+    if device in ["hip", "rocm", "vulkan"] and device_id == None:
+        device_id = 0
+    return device, device_id
+
+
+def parse_device(device_str, target_override=""):
+
+    rt_driver, device_id = clean_device_info(device_str)
+    target_backend = iree_target_map(rt_driver)
+    if device_id:
+        rt_device = f"{rt_driver}://{device_id}"
+    else:
+        rt_device = rt_driver
+
+    if target_override:
+        if "cpu" in device_str:
+            rt_device = "local-task"
+        return target_backend, rt_device, target_override
+    match target_backend:
+        case "vulkan-spirv":
+            triple = get_iree_target_triple(device_str)
+            return target_backend, rt_device, triple
+        case "rocm":
+            triple = get_rocm_target_chip(device_str)
+            return target_backend, rt_device, triple
+        case "llvm-cpu":
+            if "Ryzen 9" in device_str:
+                return target_backend, "local-task", "znver4"
+            else:
+                return "llvm-cpu", "local-task", "x86_64-linux-gnu"
+
+
+def get_rocm_target_chip(device_str):
+    # TODO: Use a data file to map device_str to target chip.
+    rocm_chip_map = {
+        "6700": "gfx1031",
+        "6800": "gfx1030",
+        "6900": "gfx1030",
+        "7900": "gfx1100",
+        "MI300X": "gfx942",
+        "MI300A": "gfx940",
+        "MI210": "gfx90a",
+        "MI250": "gfx90a",
+        "MI100": "gfx908",
+        "MI50": "gfx906",
+        "MI60": "gfx906",
+        "780M": "gfx1103",
+    }
+    for key in rocm_chip_map:
+        if key in device_str:
+            return rocm_chip_map[key]
+    return None


 def get_all_devices(driver_name):
@@ -98,100 +220,69 @@ def get_all_devices(driver_name):
    driver = get_driver(driver_name)
    device_list_src = driver.query_available_devices()
    device_list_src.sort(key=lambda d: d["path"])
+    del driver
    return device_list_src


-def get_device_mapping(driver, key_combination=3):
-    """This method ensures consistent device ordering when choosing
-    specific devices for execution
-    Args:
-        driver (str): execution driver (vulkan, cuda, rocm, etc)
-        key_combination (int, optional): choice for mapping value for
-            device name.
-        1 : path
-        2 : name
-        3 : (name, path)
-        Defaults to 3.
-    Returns:
-        dict: map to possible device names user can input mapped to desired
-            combination of name/path.
-    """
-    from shark.iree_utils._common import iree_device_map
+# def get_device_mapping(driver, key_combination=3):
+#     """This method ensures consistent device ordering when choosing
+#     specific devices for execution
+#     Args:
+#         driver (str): execution driver (vulkan, cuda, rocm, etc)
+#         key_combination (int, optional): choice for mapping value for
+#             device name.
+#         1 : path
+#         2 : name
+#         3 : (name, path)
+#         Defaults to 3.
+#     Returns:
+#         dict: map to possible device names user can input mapped to desired
+#             combination of name/path.
+#     """

-    driver = iree_device_map(driver)
-    device_list = get_all_devices(driver)
-    device_map = dict()
+#     driver = iree_device_map(driver)
+#     device_list = get_all_devices(driver)
+#     device_map = dict()

-    def get_output_value(dev_dict):
-        if key_combination == 1:
-            return f"{driver}://{dev_dict['path']}"
-        if key_combination == 2:
-            return dev_dict["name"]
-        if key_combination == 3:
-            return dev_dict["name"], f"{driver}://{dev_dict['path']}"
+#     def get_output_value(dev_dict):
+#         if key_combination == 1:
+#             return f"{driver}://{dev_dict['path']}"
+#         if key_combination == 2:
+#             return dev_dict["name"]
+#         if key_combination == 3:
+#             return dev_dict["name"], f"{driver}://{dev_dict['path']}"

-    # mapping driver name to default device (driver://0)
-    device_map[f"{driver}"] = get_output_value(device_list[0])
-    for i, device in enumerate(device_list):
-        # mapping with index
-        device_map[f"{driver}://{i}"] = get_output_value(device)
-        # mapping with full path
-        device_map[f"{driver}://{device['path']}"] = get_output_value(device)
-    return device_map
+#     # mapping driver name to default device (driver://0)
+#     device_map[f"{driver}"] = get_output_value(device_list[0])
+#     for i, device in enumerate(device_list):
+#         # mapping with index
+#         device_map[f"{driver}://{i}"] = get_output_value(device)
+#         # mapping with full path
+#         device_map[f"{driver}://{device['path']}"] = get_output_value(device)
+#     return device_map


-def map_device_to_name_path(device, key_combination=3):
-    """Gives the appropriate device data (supported name/path) for user
-        selected execution device
-    Args:
-        device (str): user
-        key_combination (int, optional): choice for mapping value for
-            device name.
-        1 : path
-        2 : name
-        3 : (name, path)
-        Defaults to 3.
-    Raises:
-        ValueError:
-    Returns:
-        str / tuple: returns the mapping str or tuple of mapping str for
-        the device depending on key_combination value
-    """
-    driver = device.split("://")[0]
-    device_map = get_device_mapping(driver, key_combination)
-    try:
-        device_mapping = device_map[device]
-    except KeyError:
-        raise ValueError(f"Device '{device}' is not a valid device.")
-    return device_mapping
+# def get_opt_flags(model, precision="fp16"):
+#     iree_flags = []
+#     if len(cmd_opts.iree_vulkan_target_triple) > 0:
+#         iree_flags.append(
+#             f"-iree-vulkan-target-triple={cmd_opts.iree_vulkan_target_triple}"
+#         )
+#     if "rocm" in cmd_opts.device:
+#         from shark.iree_utils.gpu_utils import get_iree_rocm_args

+#         rocm_args = get_iree_rocm_args()
+#         iree_flags.extend(rocm_args)
+#     if cmd_opts.iree_constant_folding == False:
+#         iree_flags.append("--iree-opt-const-expr-hoisting=False")
+#         iree_flags.append(
+#             "--iree-codegen-linalg-max-constant-fold-elements=9223372036854775807"
+#         )
+#     if cmd_opts.data_tiling == False:
+#         iree_flags.append("--iree-opt-data-tiling=False")

-# Generate and return a new seed if the provided one is not in the
-# supported range (including -1)
-def sanitize_seed(seed: int | str):
-    seed = int(seed)
-    uint32_info = np.iinfo(np.uint32)
-    uint32_min, uint32_max = uint32_info.min, uint32_info.max
-    if seed < uint32_min or seed >= uint32_max:
-        seed = randint(uint32_min, uint32_max)
-    return seed
-
-
-# take a seed expression in an input format and convert it to
-# a list of integers, where possible
-def parse_seed_input(seed_input: str | list | int):
-    if isinstance(seed_input, str):
-        try:
-            seed_input = json.loads(seed_input)
-        except (ValueError, TypeError):
-            seed_input = None
-
-    if isinstance(seed_input, int):
-        return [seed_input]
-
-    if isinstance(seed_input, list) and all(type(seed) is int for seed in seed_input):
-        return seed_input
-
-    raise TypeError(
-        "Seed input must be an integer or an array of integers in JSON format"
-    )
+#     if "vae" not in model:
+#         # Due to lack of support for multi-reduce, we always collapse reduction
+#         # dims before dispatch formation right now.
+#         iree_flags += ["--iree-flow-collapse-reduction-dims"]
+#     return iree_flags
--- a/apps/shark_studio/modules/ckpt_processing.py
+++ b/apps/shark_studio/modules/ckpt_processing.py
@@ -0,0 +1,152 @@
+import os
+import json
+import re
+import requests
+import torch
+import safetensors
+from shark_turbine.aot.params import (
+    ParameterArchiveBuilder,
+)
+from io import BytesIO
+from pathlib import Path
+from tqdm import tqdm
+from omegaconf import OmegaConf
+from diffusers import StableDiffusionPipeline
+from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
+    download_from_original_stable_diffusion_ckpt,
+    create_vae_diffusers_config,
+    convert_ldm_vae_checkpoint,
+)
+
+
+def get_path_to_diffusers_checkpoint(custom_weights, precision="fp16"):
+    path = Path(custom_weights)
+    diffusers_path = path.parent.absolute()
+    diffusers_directory_name = os.path.join("diffusers", path.stem + f"_{precision}")
+    complete_path_to_diffusers = diffusers_path / diffusers_directory_name
+    complete_path_to_diffusers.mkdir(parents=True, exist_ok=True)
+    path_to_diffusers = complete_path_to_diffusers.as_posix()
+    return path_to_diffusers
+
+
+def preprocessCKPT(custom_weights, precision="fp16", is_inpaint=False):
+    path_to_diffusers = get_path_to_diffusers_checkpoint(custom_weights, precision)
+    if next(Path(path_to_diffusers).iterdir(), None):
+        print("Checkpoint already loaded at : ", path_to_diffusers)
+        return path_to_diffusers
+    else:
+        print(
+            "Diffusers' checkpoint will be identified here : ",
+            path_to_diffusers,
+        )
+    from_safetensors = (
+        True if custom_weights.lower().endswith(".safetensors") else False
+    )
+    # EMA weights usually yield higher quality images for inference but
+    # non-EMA weights have been yielding better results in our case.
+    # TODO: Add an option `--ema` (`--no-ema`) for users to specify if
+    #  they want to go for EMA weight extraction or not.
+    extract_ema = False
+    print("Loading diffusers' pipeline from original stable diffusion checkpoint")
+    num_in_channels = 9 if is_inpaint else 4
+    pipe = download_from_original_stable_diffusion_ckpt(
+        checkpoint_path_or_dict=custom_weights,
+        extract_ema=extract_ema,
+        from_safetensors=from_safetensors,
+        num_in_channels=num_in_channels,
+    )
+    if precision == "fp16":
+        pipe.to(dtype=torch.float16)
+    pipe.save_pretrained(path_to_diffusers)
+    del pipe
+    print("Loading complete")
+    return path_to_diffusers
+
+
+def save_irpa(weights_path, prepend_str):
+    weights = safetensors.torch.load_file(weights_path)
+    archive = ParameterArchiveBuilder()
+    for key in weights.keys():
+        new_key = prepend_str + key
+        archive.add_tensor(new_key, weights[key])
+
+    if "safetensors" in weights_path:
+        irpa_file = weights_path.replace(".safetensors", ".irpa")
+    elif "irpa" in weights_path:
+        irpa_file = weights_path
+    else:
+        return Exception(
+            "Invalid file format. Please provide a .safetensors or .irpa file."
+        )
+    archive.save(irpa_file)
+    return irpa_file
+
+
+def convert_original_vae(vae_checkpoint):
+    vae_state_dict = {}
+    for key in list(vae_checkpoint.keys()):
+        vae_state_dict["first_stage_model." + key] = vae_checkpoint.get(key)
+
+    config_url = (
+        "https://raw.githubusercontent.com/CompVis/stable-diffusion/"
+        "main/configs/stable-diffusion/v1-inference.yaml"
+    )
+    original_config_file = BytesIO(requests.get(config_url).content)
+    original_config = OmegaConf.load(original_config_file)
+    vae_config = create_vae_diffusers_config(original_config, image_size=512)
+
+    converted_vae_checkpoint = convert_ldm_vae_checkpoint(vae_state_dict, vae_config)
+    return converted_vae_checkpoint
+
+
+def process_custom_pipe_weights(custom_weights):
+    if custom_weights != "":
+        if custom_weights.startswith("https://civitai.com/api/"):
+            # download the checkpoint from civitai if we don't already have it
+            weights_path = get_civitai_checkpoint(custom_weights)
+
+            # act as if we were given the local file as custom_weights originally
+            custom_weights_tgt = get_path_to_diffusers_checkpoint(weights_path)
+            custom_weights_params = weights_path
+
+        else:
+            assert custom_weights.lower().endswith(
+                (".ckpt", ".safetensors")
+            ), "checkpoint files supported can be any of [.ckpt, .safetensors] type"
+            custom_weights_tgt = get_path_to_diffusers_checkpoint(custom_weights)
+            custom_weights_params = custom_weights
+
+        return custom_weights_params, custom_weights_tgt
+
+
+def get_civitai_checkpoint(url: str):
+    with requests.get(url, allow_redirects=True, stream=True) as response:
+        response.raise_for_status()
+
+        # civitai api returns the filename in the content disposition
+        base_filename = re.findall(
+            '"([^"]*)"', response.headers["Content-Disposition"]
+        )[0]
+        destination_path = Path.cwd() / (cmd_opts.model_dir or "models") / base_filename
+
+        # we don't have this model downloaded yet
+        if not destination_path.is_file():
+            print(f"downloading civitai model from {url} to {destination_path}")
+
+            size = int(response.headers["content-length"], 0)
+            progress_bar = tqdm(total=size, unit="iB", unit_scale=True)
+
+            with open(destination_path, "wb") as f:
+                for chunk in response.iter_content(chunk_size=65536):
+                    f.write(chunk)
+                    progress_bar.update(len(chunk))
+
+            progress_bar.close()
+
+        # we already have this model downloaded
+        else:
+            print(f"civitai model already downloaded to {destination_path}")
+
+        response.close()
+        return destination_path.as_posix()
--- a/apps/shark_studio/modules/embeddings.py
+++ b/apps/shark_studio/modules/embeddings.py
@@ -0,0 +1,185 @@
+import os
+import sys
+import torch
+import json
+import safetensors
+from dataclasses import dataclass
+from safetensors.torch import load_file
+from apps.shark_studio.web.utils.file_utils import (
+    get_checkpoint_pathfile,
+    get_path_stem,
+)
+
+
+@dataclass
+class LoRAweight:
+    up: torch.tensor
+    down: torch.tensor
+    mid: torch.tensor
+    alpha: torch.float32 = 1.0
+
+
+def processLoRA(model, use_lora, splitting_prefix, lora_strength=0.75):
+    state_dict = ""
+    if ".safetensors" in use_lora:
+        state_dict = load_file(use_lora)
+    else:
+        state_dict = torch.load(use_lora)
+
+    # gather the weights from the LoRA in a more convenient form, assumes
+    # everything will have an up.weight.
+    weight_dict: dict[str, LoRAweight] = {}
+    for key in state_dict:
+        if key.startswith(splitting_prefix) and key.endswith("up.weight"):
+            stem = key.split("up.weight")[0]
+            weight_key = stem.removesuffix(".lora_")
+            weight_key = weight_key.removesuffix("_lora_")
+            weight_key = weight_key.removesuffix(".lora_linear_layer.")
+
+            if weight_key not in weight_dict:
+                weight_dict[weight_key] = LoRAweight(
+                    state_dict[f"{stem}up.weight"],
+                    state_dict[f"{stem}down.weight"],
+                    state_dict.get(f"{stem}mid.weight", None),
+                    (
+                        state_dict[f"{weight_key}.alpha"]
+                        / state_dict[f"{stem}up.weight"].shape[1]
+                        if f"{weight_key}.alpha" in state_dict
+                        else 1.0
+                    ),
+                )
+
+    # Directly update weight in model
+
+    # Mostly adaptions of https://github.com/kohya-ss/sd-scripts/blob/main/networks/merge_lora.py
+    # and similar code in https://github.com/huggingface/diffusers/issues/3064
+
+    # TODO: handle mid weights (how do they even work?)
+    for key, lora_weight in weight_dict.items():
+        curr_layer = model
+        layer_infos = key.split(".")[0].split(splitting_prefix)[-1].split("_")
+
+        # find the target layer
+        temp_name = layer_infos.pop(0)
+        while len(layer_infos) > -1:
+            try:
+                curr_layer = curr_layer.__getattr__(temp_name)
+                if len(layer_infos) > 0:
+                    temp_name = layer_infos.pop(0)
+                elif len(layer_infos) == 0:
+                    break
+            except Exception:
+                if len(temp_name) > 0:
+                    temp_name += "_" + layer_infos.pop(0)
+                else:
+                    temp_name = layer_infos.pop(0)
+
+        weight = curr_layer.weight.data
+        scale = lora_weight.alpha * lora_strength
+        if len(weight.size()) == 2:
+            if len(lora_weight.up.shape) == 4:
+                weight_up = lora_weight.up.squeeze(3).squeeze(2).to(torch.float32)
+                weight_down = lora_weight.down.squeeze(3).squeeze(2).to(torch.float32)
+                change = torch.mm(weight_up, weight_down).unsqueeze(2).unsqueeze(3)
+            else:
+                change = torch.mm(lora_weight.up, lora_weight.down)
+        elif lora_weight.down.size()[2:4] == (1, 1):
+            weight_up = lora_weight.up.squeeze(3).squeeze(2).to(torch.float32)
+            weight_down = lora_weight.down.squeeze(3).squeeze(2).to(torch.float32)
+            change = torch.mm(weight_up, weight_down).unsqueeze(2).unsqueeze(3)
+        else:
+            change = torch.nn.functional.conv2d(
+                lora_weight.down.permute(1, 0, 2, 3),
+                lora_weight.up,
+            ).permute(1, 0, 2, 3)
+
+        curr_layer.weight.data += change * scale
+
+    return model
+
+
+def update_lora_weight_for_unet(unet, use_lora, lora_strength):
+    extensions = [".bin", ".safetensors", ".pt"]
+    if not any([extension in use_lora for extension in extensions]):
+        # We assume if it is a HF ID with standalone LoRA weights.
+        unet.load_attn_procs(use_lora)
+        return unet
+
+    main_file_name = get_path_stem(use_lora)
+    if ".bin" in use_lora:
+        main_file_name += ".bin"
+    elif ".safetensors" in use_lora:
+        main_file_name += ".safetensors"
+    elif ".pt" in use_lora:
+        main_file_name += ".pt"
+    else:
+        sys.exit("Only .bin and .safetensors format for LoRA is supported")
+
+    try:
+        dir_name = os.path.dirname(use_lora)
+        unet.load_attn_procs(dir_name, weight_name=main_file_name)
+        return unet
+    except:
+        return processLoRA(unet, use_lora, "lora_unet_", lora_strength)
+
+
+def update_lora_weight(model, use_lora, model_name, lora_strength=1.0):
+    if "unet" in model_name:
+        return update_lora_weight_for_unet(model, use_lora, lora_strength)
+    try:
+        return processLoRA(model, use_lora, "lora_te_", lora_strength)
+    except:
+        return None
+
+
+def get_lora_metadata(lora_filename):
+    # get the metadata from the file
+    filename = get_checkpoint_pathfile(lora_filename, "lora")
+    with safetensors.safe_open(filename, framework="pt", device="cpu") as f:
+        metadata = f.metadata()
+
+    # guard clause for if there isn't any metadata
+    if not metadata:
+        return None
+
+    # metadata is a dictionary of strings, the values of the keys we're
+    # interested in are actually json, and need to be loaded as such
+    tag_frequencies = json.loads(metadata.get("ss_tag_frequency", str("{}")))
+    dataset_dirs = json.loads(metadata.get("ss_dataset_dirs", str("{}")))
+    tag_dirs = [dir for dir in tag_frequencies.keys()]
+
+    # gather the tag frequency information for all the datasets trained
+    all_frequencies = {}
+    for dataset in tag_dirs:
+        frequencies = sorted(
+            [entry for entry in tag_frequencies[dataset].items()],
+            reverse=True,
+            key=lambda x: x[1],
+        )
+
+        # get a figure for the total number of images processed for this dataset
+        # either then number actually listed or in its dataset_dir entry or
+        # the highest frequency's number if that doesn't exist
+        img_count = dataset_dirs.get(dir, {}).get("img_count", frequencies[0][1])
+
+        # add the dataset frequencies to the overall frequencies replacing the
+        # frequency counts on the tags with a percentage/ratio
+        all_frequencies.update(
+            [(entry[0], entry[1] / img_count) for entry in frequencies]
+        )
+
+    trained_model_id = " ".join(
+        [
+            metadata.get("ss_sd_model_hash", ""),
+            metadata.get("ss_sd_model_name", ""),
+            metadata.get("ss_base_model_version", ""),
+        ]
+    ).strip()
+
+    # return the topmost <count> of all frequencies in all datasets
+    return {
+        "model": trained_model_id,
+        "frequencies": sorted(
+            all_frequencies.items(), reverse=True, key=lambda x: x[1]
+        ),
+    }
--- a/apps/shark_studio/modules/img_processing.py
+++ b/apps/shark_studio/modules/img_processing.py
@@ -0,0 +1,204 @@
+import os
+import re
+import json
+import torch
+import numpy as np
+
+from csv import DictWriter
+from PIL import Image, PngImagePlugin
+from pathlib import Path
+from datetime import datetime as dt
+from base64 import decode
+
+
+resamplers = {
+    "Lanczos": Image.Resampling.LANCZOS,
+    "Nearest Neighbor": Image.Resampling.NEAREST,
+    "Bilinear": Image.Resampling.BILINEAR,
+    "Bicubic": Image.Resampling.BICUBIC,
+    "Hamming": Image.Resampling.HAMMING,
+    "Box": Image.Resampling.BOX,
+}
+
+resampler_list = resamplers.keys()
+
+
+# save output images and the inputs corresponding to it.
+def save_output_img(output_img, img_seed, extra_info=None):
+    from apps.shark_studio.web.utils.file_utils import (
+        get_generated_imgs_path,
+        get_generated_imgs_todays_subdir,
+    )
+    from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+
+    if extra_info is None:
+        extra_info = {}
+    elif "progress" in extra_info.keys():
+        extra_info.pop("progress")
+    generated_imgs_path = Path(
+        get_generated_imgs_path(), get_generated_imgs_todays_subdir()
+    )
+    generated_imgs_path.mkdir(parents=True, exist_ok=True)
+    csv_path = Path(generated_imgs_path, "imgs_details.csv")
+
+    prompt_slice = re.sub("[^a-zA-Z0-9]", "_", extra_info["prompt"][0][:15])
+    out_img_name = f"{dt.now().strftime('%H%M%S')}_{prompt_slice}_{img_seed}"
+
+    img_model = extra_info["base_model_id"]
+    if extra_info["custom_weights"] not in [None, "None"]:
+        img_model = Path(os.path.basename(extra_info["custom_weights"])).stem
+
+    img_vae = None
+    if extra_info["custom_vae"]:
+        img_vae = Path(os.path.basename(extra_info["custom_vae"])).stem
+
+    img_loras = None
+    if extra_info["embeddings"]:
+        img_lora = []
+        for i in extra_info["embeddings"]:
+            img_lora += Path(os.path.basename(cmd_opts.use_lora)).stem
+        img_loras = ", ".join(img_lora)
+
+    if cmd_opts.output_img_format == "jpg":
+        out_img_path = Path(generated_imgs_path, f"{out_img_name}.jpg")
+        output_img.save(out_img_path, quality=95, subsampling=0)
+    else:
+        out_img_path = Path(generated_imgs_path, f"{out_img_name}.png")
+        pngInfo = PngImagePlugin.PngInfo()
+
+        if cmd_opts.write_metadata_to_png:
+            # Using a conditional expression caused problems, so setting a new
+            # variable for now.
+            # if cmd_opts.use_hiresfix:
+            #    png_size_text = (
+            #        f"{cmd_opts.hiresfix_width}x{cmd_opts.hiresfix_height}"
+            #    )
+            # else:
+            png_size_text = f"{extra_info['width']}x{extra_info['height']}"
+
+            pngInfo.add_text(
+                "parameters",
+                f"{extra_info['prompt'][0]}"
+                f"\nNegative prompt: {extra_info['negative_prompt'][0]}"
+                f"\nSteps: {extra_info['steps']},"
+                f"Sampler: {extra_info['scheduler']}, "
+                f"CFG scale: {extra_info['guidance_scale']}, "
+                f"Seed: {img_seed},"
+                f"Size: {png_size_text}, "
+                f"Model: {img_model}, "
+                f"VAE: {img_vae}, "
+                f"LoRA: {img_loras}",
+            )
+
+        output_img.save(out_img_path, "PNG", pnginfo=pngInfo)
+
+        if cmd_opts.output_img_format not in ["png", "jpg"]:
+            print(
+                f"[ERROR] Format {cmd_opts.output_img_format} is not "
+                f"supported yet. Image saved as png instead."
+                f"Supported formats: png / jpg"
+            )
+
+    # To be as low-impact as possible to the existing CSV format, we append
+    # "VAE" and "LORA" to the end. However, it does not fit the hierarchy of
+    # importance for each data point. Something to consider.
+    new_entry = {}
+
+    new_entry.update(extra_info)
+
+    csv_mode = "a" if os.path.isfile(csv_path) else "w"
+    with open(csv_path, csv_mode, encoding="utf-8") as csv_obj:
+        dictwriter_obj = DictWriter(csv_obj, fieldnames=list(new_entry.keys()))
+        if csv_mode == "w":
+            dictwriter_obj.writeheader()
+        dictwriter_obj.writerow(new_entry)
+        csv_obj.close()
+
+    json_path = Path(generated_imgs_path, f"{out_img_name}.json")
+    with open(json_path, "w") as f:
+        json.dump(new_entry, f, indent=4)
+
+
+# For stencil, the input image can be of any size, but we need to ensure that
+# it conforms with our model constraints :-
+#   Both width and height should be in the range of [128, 768] and multiple of 8.
+# This utility function performs the transformation on the input image while
+# also maintaining the aspect ratio before sending it to the stencil pipeline.
+def resize_stencil(image: Image.Image, width, height, resampler_type=None):
+    aspect_ratio = width / height
+    min_size = min(width, height)
+    if min_size < 128:
+        n_size = 128
+        if width == min_size:
+            width = n_size
+            height = n_size / aspect_ratio
+        else:
+            height = n_size
+            width = n_size * aspect_ratio
+    width = int(width)
+    height = int(height)
+    n_width = width // 8
+    n_height = height // 8
+    n_width *= 8
+    n_height *= 8
+
+    min_size = min(width, height)
+    if min_size > 768:
+        n_size = 768
+        if width == min_size:
+            height = n_size
+            width = n_size * aspect_ratio
+        else:
+            width = n_size
+            height = n_size / aspect_ratio
+    width = int(width)
+    height = int(height)
+    n_width = width // 8
+    n_height = height // 8
+    n_width *= 8
+    n_height *= 8
+    if resampler_type in resamplers:
+        resampler = resamplers[resampler_type]
+    else:
+        resampler = resamplers["Nearest Neighbor"]
+    new_image = image.resize((n_width, n_height), resampler=resampler)
+    return new_image, n_width, n_height
+
+
+def process_sd_init_image(self, sd_init_image, resample_type):
+    if isinstance(sd_init_image, list):
+        images = []
+        for img in sd_init_image:
+            img, _ = self.process_sd_init_image(img, resample_type)
+            images.append(img)
+            is_img2img = True
+            return images, is_img2img
+    if isinstance(sd_init_image, str):
+        if os.path.isfile(sd_init_image):
+            sd_init_image = Image.open(sd_init_image, mode="r").convert("RGB")
+            image, is_img2img = self.process_sd_init_image(sd_init_image, resample_type)
+        else:
+            image = None
+            is_img2img = False
+    elif isinstance(sd_init_image, Image.Image):
+        image = sd_init_image.convert("RGB")
+    elif sd_init_image:
+        image = sd_init_image["image"].convert("RGB")
+    else:
+        image = None
+        is_img2img = False
+    if image:
+        resample_type = (
+            resamplers[resample_type]
+            if resample_type in resampler_list
+            # Fallback to Lanczos
+            else Image.Resampling.LANCZOS
+        )
+        image = image.resize((self.width, self.height), resample=resample_type)
+        image_arr = np.stack([np.array(i) for i in (image,)], axis=0)
+        image_arr = image_arr / 255.0
+        image_arr = torch.from_numpy(image_arr).permute(0, 3, 1, 2).to(self.dtype)
+        image_arr = 2 * (image_arr - 0.5)
+        is_img2img = True
+        image = image_arr
+    return image, is_img2img
--- a/apps/shark_studio/modules/logger.py
+++ b/apps/shark_studio/modules/logger.py
@@ -0,0 +1,37 @@
+import sys
+
+
+class Logger:
+    def __init__(self, filename, filter=None):
+        self.terminal = sys.stdout
+        self.log = open(filename, "w")
+        self.filter = filter
+
+    def write(self, message):
+        for x in message.split("\n"):
+            if self.filter in x:
+                self.log.write(message)
+            else:
+                self.terminal.write(message)
+
+    def flush(self):
+        self.terminal.flush()
+        self.log.flush()
+
+    def isatty(self):
+        return False
+
+
+def logger_test(x):
+    print("[LOG] This is a test")
+    print(f"This is another test, without the filter")
+    return x
+
+
+def read_sd_logs():
+    sys.stdout.flush()
+    with open("shark_tmp/sd.log", "r") as f:
+        return f.read()
+
+
+sys.stdout = Logger("shark_tmp/sd.log", filter="[LOG]")
--- a/apps/shark_studio/modules/pipeline.py
+++ b/apps/shark_studio/modules/pipeline.py
@@ -0,0 +1,205 @@
+from shark.iree_utils.compile_utils import (
+    get_iree_compiled_module,
+    load_vmfb_using_mmap,
+    clean_device_info,
+    get_iree_target_triple,
+)
+from apps.shark_studio.web.utils.file_utils import (
+    get_checkpoints_path,
+    get_resource_path,
+)
+from apps.shark_studio.modules.shared_cmd_opts import (
+    cmd_opts,
+)
+from iree import runtime as ireert
+from pathlib import Path
+import gc
+import os
+
+
+class SharkPipelineBase:
+    # This class is a lightweight base for managing an
+    # inference API class. It should provide methods for:
+    # - compiling a set (model map) of torch IR modules
+    # - preparing weights for an inference job
+    # - loading weights for an inference job
+    # - utilites like benchmarks, tests
+
+    def __init__(
+        self,
+        model_map: dict,
+        base_model_id: str,
+        static_kwargs: dict,
+        device: str,
+        import_mlir: bool = True,
+    ):
+        self.model_map = model_map
+        self.pipe_map = {}
+        self.static_kwargs = static_kwargs
+        self.base_model_id = base_model_id
+        self.triple = get_iree_target_triple(device)
+        self.device, self.device_id = clean_device_info(device)
+        self.import_mlir = import_mlir
+        self.iree_module_dict = {}
+        self.tmp_dir = get_resource_path(cmd_opts.tmp_dir)
+        if not os.path.exists(self.tmp_dir):
+            os.mkdir(self.tmp_dir)
+        self.tempfiles = {}
+        self.pipe_vmfb_path = ""
+
+    def get_compiled_map(self, pipe_id, submodel="None", init_kwargs={}) -> None:
+        # First checks whether we have .vmfbs precompiled, then populates the map
+        # with the precompiled executables and fetches executables for the rest of the map.
+        # The weights aren't static here anymore so this function should be a part of pipeline
+        # initialization. As soon as you have a pipeline ID unique to your static torch IR parameters,
+        # and your model map is populated with any IR - unique model IDs and their static params,
+        # call this method to get the artifacts associated with your map.
+        self.pipe_id = self.safe_name(pipe_id)
+        self.pipe_vmfb_path = Path(os.path.join(get_checkpoints_path(), self.pipe_id))
+        self.pipe_vmfb_path.mkdir(parents=False, exist_ok=True)
+        if submodel == "None":
+            print("\n[LOG] Gathering any pre-compiled artifacts....")
+            for key in self.model_map:
+                self.get_compiled_map(pipe_id, submodel=key)
+        else:
+            self.pipe_map[submodel] = {}
+            self.get_precompiled(self.pipe_id, submodel)
+            ireec_flags = []
+            if submodel in self.iree_module_dict:
+                return
+            elif "vmfb_path" in self.pipe_map[submodel]:
+                return
+            elif submodel not in self.tempfiles:
+                print(
+                    f"\n[LOG] Tempfile for {submodel} not found. Fetching torch IR..."
+                )
+                if submodel in self.static_kwargs:
+                    init_kwargs = self.static_kwargs[submodel]
+                for key in self.static_kwargs["pipe"]:
+                    if key not in init_kwargs:
+                        init_kwargs[key] = self.static_kwargs["pipe"][key]
+                self.import_torch_ir(submodel, init_kwargs)
+                self.get_compiled_map(pipe_id, submodel)
+            else:
+                ireec_flags = (
+                    self.model_map[submodel]["ireec_flags"]
+                    if "ireec_flags" in self.model_map[submodel]
+                    else []
+                )
+
+                weights_path = self.get_io_params(submodel)
+                if weights_path:
+                    ireec_flags.append("--iree-opt-const-eval=False")
+
+                self.iree_module_dict[submodel] = get_iree_compiled_module(
+                    self.tempfiles[submodel],
+                    device=self.device,
+                    frontend="torch",
+                    mmap=True,
+                    external_weight_file=weights_path,
+                    extra_args=ireec_flags,
+                    write_to=os.path.join(self.pipe_vmfb_path, submodel + ".vmfb"),
+                )
+        return
+
+    def get_io_params(self, submodel):
+        if "external_weight_file" in self.static_kwargs[submodel]:
+            # we are using custom weights
+            weights_path = self.static_kwargs[submodel]["external_weight_file"]
+        elif "external_weight_path" in self.static_kwargs[submodel]:
+            # we are using the default weights for the HF model
+            weights_path = self.static_kwargs[submodel]["external_weight_path"]
+        else:
+            # assume the torch IR contains the weights.
+            weights_path = None
+        return weights_path
+
+    def get_precompiled(self, pipe_id, submodel="None"):
+        if submodel == "None":
+            for model in self.model_map:
+                self.get_precompiled(pipe_id, model)
+        vmfbs = []
+        for dirpath, dirnames, filenames in os.walk(self.pipe_vmfb_path):
+            vmfbs.extend(filenames)
+            break
+        for file in vmfbs:
+            if submodel in file:
+                self.pipe_map[submodel]["vmfb_path"] = os.path.join(
+                    self.pipe_vmfb_path, file
+                )
+        return
+
+    def import_torch_ir(self, submodel, kwargs):
+        torch_ir = self.model_map[submodel]["initializer"](
+            **self.safe_dict(kwargs), compile_to="torch"
+        )
+        if submodel == "clip":
+            # clip.export_clip_model returns (torch_ir, tokenizer)
+            torch_ir = torch_ir[0]
+
+        self.tempfiles[submodel] = os.path.join(
+            self.tmp_dir, f"{submodel}.torch.tempfile"
+        )
+
+        with open(self.tempfiles[submodel], "w+") as f:
+            f.write(torch_ir)
+        del torch_ir
+        gc.collect()
+        return
+
+    def load_submodels(self, submodels: list):
+        for submodel in submodels:
+            if submodel in self.iree_module_dict:
+                print(f"\n[LOG] {submodel} is ready for inference.")
+                continue
+            if "vmfb_path" in self.pipe_map[submodel]:
+                weights_path = self.get_io_params(submodel)
+                # print(
+                #     f"\n[LOG] Loading .vmfb for {submodel} from {self.pipe_map[submodel]['vmfb_path']}"
+                # )
+                self.iree_module_dict[submodel] = {}
+                (
+                    self.iree_module_dict[submodel]["vmfb"],
+                    self.iree_module_dict[submodel]["config"],
+                    self.iree_module_dict[submodel]["temp_file_to_unlink"],
+                ) = load_vmfb_using_mmap(
+                    self.pipe_map[submodel]["vmfb_path"],
+                    self.device,
+                    device_idx=0,
+                    rt_flags=[],
+                    external_weight_file=weights_path,
+                )
+            else:
+                self.get_compiled_map(self.pipe_id, submodel)
+        return
+
+    def unload_submodels(self, submodels: list):
+        for submodel in submodels:
+            if submodel in self.iree_module_dict:
+                del self.iree_module_dict[submodel]
+                gc.collect()
+        return
+
+    def run(self, submodel, inputs):
+        if not isinstance(inputs, list):
+            inputs = [inputs]
+        inp = [
+            ireert.asdevicearray(
+                self.iree_module_dict[submodel]["config"].device, input
+            )
+            for input in inputs
+        ]
+        return self.iree_module_dict[submodel]["vmfb"]["main"](*inp)
+
+    def safe_name(self, name):
+        return name.replace("/", "_").replace("-", "_").replace("\\", "_")
+
+    def safe_dict(self, kwargs: dict):
+        flat_args = {}
+        for i in kwargs:
+            if isinstance(kwargs[i], dict) and "pass_dict" not in kwargs[i]:
+                flat_args[i] = [kwargs[i][j] for j in kwargs[i]]
+            else:
+                flat_args[i] = kwargs[i]
+
+        return flat_args
--- a/apps/shark_studio/modules/prompt_encoding.py
+++ b/apps/shark_studio/modules/prompt_encoding.py
@@ -0,0 +1,376 @@
+from typing import List, Optional, Union
+from iree import runtime as ireert
+import re
+import torch
+import numpy as np
+
+re_attention = re.compile(
+    r"""
+\\\(|
+\\\)|
+\\\[|
+\\]|
+\\\\|
+\\|
+\(|
+\[|
+:([+-]?[.\d]+)\)|
+\)|
+]|
+[^\\()\[\]:]+|
+:
+""",
+    re.X,
+)
+
+
+def parse_prompt_attention(text):
+    """
+    Parses a string with attention tokens and returns a list of pairs:
+        text and its associated weight.
+    Accepted tokens are:
+      (abc) - increases attention to abc by a multiplier of 1.1
+      (abc:3.12) - increases attention to abc by a multiplier of 3.12
+      [abc] - decreases attention to abc by a multiplier of 1.1
+      \( - literal character '('
+      \[ - literal character '['
+      \) - literal character ')'
+      \] - literal character ']'
+      \\ - literal character '\'
+      anything else - just text
+    >>> parse_prompt_attention('normal text')
+    [['normal text', 1.0]]
+    >>> parse_prompt_attention('an (important) word')
+    [['an ', 1.0], ['important', 1.1], [' word', 1.0]]
+    >>> parse_prompt_attention('(unbalanced')
+    [['unbalanced', 1.1]]
+    >>> parse_prompt_attention('\(literal\]')
+    [['(literal]', 1.0]]
+    >>> parse_prompt_attention('(unnecessary)(parens)')
+    [['unnecessaryparens', 1.1]]
+    >>> parse_prompt_attention('a (((house:1.3)) [on] a (hill:0.5), sun, (((sky))).')
+    [['a ', 1.0],
+     ['house', 1.5730000000000004],
+     [' ', 1.1],
+     ['on', 1.0],
+     [' a ', 1.1],
+     ['hill', 0.55],
+     [', sun, ', 1.1],
+     ['sky', 1.4641000000000006],
+     ['.', 1.1]]
+    """
+
+    res = []
+    round_brackets = []
+    square_brackets = []
+
+    round_bracket_multiplier = 1.1
+    square_bracket_multiplier = 1 / 1.1
+
+    def multiply_range(start_position, multiplier):
+        for p in range(start_position, len(res)):
+            res[p][1] *= multiplier
+
+    for m in re_attention.finditer(text):
+        text = m.group(0)
+        weight = m.group(1)
+
+        if text.startswith("\\"):
+            res.append([text[1:], 1.0])
+        elif text == "(":
+            round_brackets.append(len(res))
+        elif text == "[":
+            square_brackets.append(len(res))
+        elif weight is not None and len(round_brackets) > 0:
+            multiply_range(round_brackets.pop(), float(weight))
+        elif text == ")" and len(round_brackets) > 0:
+            multiply_range(round_brackets.pop(), round_bracket_multiplier)
+        elif text == "]" and len(square_brackets) > 0:
+            multiply_range(square_brackets.pop(), square_bracket_multiplier)
+        else:
+            res.append([text, 1.0])
+
+    for pos in round_brackets:
+        multiply_range(pos, round_bracket_multiplier)
+
+    for pos in square_brackets:
+        multiply_range(pos, square_bracket_multiplier)
+
+    if len(res) == 0:
+        res = [["", 1.0]]
+
+    # merge runs of identical weights
+    i = 0
+    while i + 1 < len(res):
+        if res[i][1] == res[i + 1][1]:
+            res[i][0] += res[i + 1][0]
+            res.pop(i + 1)
+        else:
+            i += 1
+
+    return res
+
+
+def get_prompts_with_weights(pipe, prompt: List[str], max_length: int):
+    r"""
+    Tokenize a list of prompts and return its tokens with weights of each token.
+    No padding, starting or ending token is included.
+    """
+    tokens = []
+    weights = []
+    truncated = False
+    for text in prompt:
+        texts_and_weights = parse_prompt_attention(text)
+        text_token = []
+        text_weight = []
+        for word, weight in texts_and_weights:
+            # tokenize and discard the starting and the ending token
+            token = pipe.tokenizer(word).input_ids[1:-1]
+            text_token += token
+            # copy the weight by length of token
+            text_weight += [weight] * len(token)
+            # stop if the text is too long (longer than truncation limit)
+            if len(text_token) > max_length:
+                truncated = True
+                break
+        # truncate
+        if len(text_token) > max_length:
+            truncated = True
+            text_token = text_token[:max_length]
+            text_weight = text_weight[:max_length]
+        tokens.append(text_token)
+        weights.append(text_weight)
+    if truncated:
+        print(
+            "Prompt was truncated. Try to shorten the prompt or increase max_embeddings_multiples"
+        )
+    return tokens, weights
+
+
+def pad_tokens_and_weights(
+    tokens,
+    weights,
+    max_length,
+    bos,
+    eos,
+    no_boseos_middle=True,
+    chunk_length=77,
+):
+    r"""
+    Pad the tokens (with starting and ending tokens) and weights (with 1.0) to max_length.
+    """
+    max_embeddings_multiples = (max_length - 2) // (chunk_length - 2)
+    weights_length = (
+        max_length if no_boseos_middle else max_embeddings_multiples * chunk_length
+    )
+    for i in range(len(tokens)):
+        tokens[i] = [bos] + tokens[i] + [eos] * (max_length - 1 - len(tokens[i]))
+        if no_boseos_middle:
+            weights[i] = [1.0] + weights[i] + [1.0] * (max_length - 1 - len(weights[i]))
+        else:
+            w = []
+            if len(weights[i]) == 0:
+                w = [1.0] * weights_length
+            else:
+                for j in range(max_embeddings_multiples):
+                    w.append(1.0)  # weight for starting token in this chunk
+                    w += weights[i][
+                        j
+                        * (chunk_length - 2) : min(
+                            len(weights[i]), (j + 1) * (chunk_length - 2)
+                        )
+                    ]
+                    w.append(1.0)  # weight for ending token in this chunk
+                w += [1.0] * (weights_length - len(w))
+            weights[i] = w[:]
+
+    return tokens, weights
+
+
+def get_unweighted_text_embeddings(
+    pipe,
+    text_input,
+    chunk_length: int,
+    no_boseos_middle: Optional[bool] = True,
+):
+    """
+    When the length of tokens is a multiple of the capacity of the text encoder,
+    it should be split into chunks and sent to the text encoder individually.
+    """
+    max_embeddings_multiples = (text_input.shape[1] - 2) // (chunk_length - 2)
+    if max_embeddings_multiples > 1:
+        text_embeddings = []
+        for i in range(max_embeddings_multiples):
+            # extract the i-th chunk
+            text_input_chunk = text_input[
+                :, i * (chunk_length - 2) : (i + 1) * (chunk_length - 2) + 2
+            ].clone()
+
+            # cover the head and the tail by the starting and the ending tokens
+            text_input_chunk[:, 0] = text_input[0, 0]
+            text_input_chunk[:, -1] = text_input[0, -1]
+
+            text_embedding = pipe.run("clip", text_input_chunk)[0].to_host()
+
+            if no_boseos_middle:
+                if i == 0:
+                    # discard the ending token
+                    text_embedding = text_embedding[:, :-1]
+                elif i == max_embeddings_multiples - 1:
+                    # discard the starting token
+                    text_embedding = text_embedding[:, 1:]
+                else:
+                    # discard both starting and ending tokens
+                    text_embedding = text_embedding[:, 1:-1]
+
+            text_embeddings.append(text_embedding)
+        # SHARK: Convert the result to tensor
+        # text_embeddings = torch.concat(text_embeddings, axis=1)
+        text_embeddings_np = np.concatenate(np.array(text_embeddings))
+        text_embeddings = torch.from_numpy(text_embeddings_np)
+    else:
+        text_embeddings = pipe.run("clip", text_input)[0]
+        text_embeddings = torch.from_numpy(text_embeddings.to_host())
+    return text_embeddings
+
+
+# This function deals with NoneType values occuring in tokens after padding
+# It switches out None with 49407 as truncating None values causes matrix dimension errors,
+def filter_nonetype_tokens(tokens: List[List]):
+    return [[49407 if token is None else token for token in tokens[0]]]
+
+
+def get_weighted_text_embeddings(
+    pipe,
+    prompt: List[str],
+    uncond_prompt: List[str] = None,
+    max_embeddings_multiples: Optional[int] = 8,
+    no_boseos_middle: Optional[bool] = True,
+    skip_parsing: Optional[bool] = False,
+    skip_weighting: Optional[bool] = False,
+):
+    max_length = (pipe.model_max_length - 2) * max_embeddings_multiples + 2
+
+    if not skip_parsing:
+        prompt_tokens, prompt_weights = get_prompts_with_weights(
+            pipe, prompt, max_length - 2
+        )
+        if uncond_prompt is not None:
+            uncond_tokens, uncond_weights = get_prompts_with_weights(
+                pipe, uncond_prompt, max_length - 2
+            )
+    else:
+        prompt_tokens = [
+            token[1:-1]
+            for token in pipe.tokenizer(
+                prompt, max_length=max_length, truncation=True
+            ).input_ids
+        ]
+        prompt_weights = [[1.0] * len(token) for token in prompt_tokens]
+        if uncond_prompt is not None:
+            if isinstance(uncond_prompt, str):
+                uncond_prompt = [uncond_prompt]
+            uncond_tokens = [
+                token[1:-1]
+                for token in pipe.tokenizer(
+                    uncond_prompt, max_length=max_length, truncation=True
+                ).input_ids
+            ]
+            uncond_weights = [[1.0] * len(token) for token in uncond_tokens]
+
+    # round up the longest length of tokens to a multiple of (model_max_length - 2)
+    max_length = max([len(token) for token in prompt_tokens])
+    if uncond_prompt is not None:
+        max_length = max(max_length, max([len(token) for token in uncond_tokens]))
+    max_embeddings_multiples = min(
+        max_embeddings_multiples,
+        (max_length - 1) // (pipe.model_max_length - 2) + 1,
+    )
+    max_embeddings_multiples = max(1, max_embeddings_multiples)
+
+    max_length = (pipe.model_max_length - 2) * max_embeddings_multiples + 2
+
+    # pad the length of tokens and weights
+    bos = pipe.tokenizer.bos_token_id
+    eos = pipe.tokenizer.eos_token_id
+    prompt_tokens, prompt_weights = pad_tokens_and_weights(
+        prompt_tokens,
+        prompt_weights,
+        max_length,
+        bos,
+        eos,
+        no_boseos_middle=no_boseos_middle,
+        chunk_length=pipe.model_max_length,
+    )
+
+    # FIXME: This is a hacky fix caused by tokenizer padding with None values
+    prompt_tokens = filter_nonetype_tokens(prompt_tokens)
+
+    # prompt_tokens = torch.tensor(prompt_tokens, dtype=torch.long, device=pipe.device)
+    prompt_tokens = torch.tensor(prompt_tokens, dtype=torch.long, device="cpu")
+    if uncond_prompt is not None:
+        uncond_tokens, uncond_weights = pad_tokens_and_weights(
+            uncond_tokens,
+            uncond_weights,
+            max_length,
+            bos,
+            eos,
+            no_boseos_middle=no_boseos_middle,
+            chunk_length=pipe.model_max_length,
+        )
+
+        # FIXME: This is a hacky fix caused by tokenizer padding with None values
+        uncond_tokens = filter_nonetype_tokens(uncond_tokens)
+
+        # uncond_tokens = torch.tensor(uncond_tokens, dtype=torch.long, device=pipe.device)
+        uncond_tokens = torch.tensor(uncond_tokens, dtype=torch.long, device="cpu")
+
+    # get the embeddings
+    text_embeddings = get_unweighted_text_embeddings(
+        pipe,
+        prompt_tokens,
+        pipe.model_max_length,
+        no_boseos_middle=no_boseos_middle,
+    )
+    # prompt_weights = torch.tensor(prompt_weights, dtype=text_embeddings.dtype, device=pipe.device)
+    prompt_weights = torch.tensor(prompt_weights, dtype=torch.float, device="cpu")
+    if uncond_prompt is not None:
+        uncond_embeddings = get_unweighted_text_embeddings(
+            pipe,
+            uncond_tokens,
+            pipe.model_max_length,
+            no_boseos_middle=no_boseos_middle,
+        )
+        # uncond_weights = torch.tensor(uncond_weights, dtype=uncond_embeddings.dtype, device=pipe.device)
+        uncond_weights = torch.tensor(uncond_weights, dtype=torch.float, device="cpu")
+
+    # assign weights to the prompts and normalize in the sense of mean
+    # TODO: should we normalize by chunk or in a whole (current implementation)?
+    if (not skip_parsing) and (not skip_weighting):
+        previous_mean = (
+            text_embeddings.float().mean(axis=[-2, -1]).to(text_embeddings.dtype)
+        )
+        text_embeddings *= prompt_weights.unsqueeze(-1)
+        current_mean = (
+            text_embeddings.float().mean(axis=[-2, -1]).to(text_embeddings.dtype)
+        )
+        text_embeddings *= (previous_mean / current_mean).unsqueeze(-1).unsqueeze(-1)
+        if uncond_prompt is not None:
+            previous_mean = (
+                uncond_embeddings.float()
+                .mean(axis=[-2, -1])
+                .to(uncond_embeddings.dtype)
+            )
+            uncond_embeddings *= uncond_weights.unsqueeze(-1)
+            current_mean = (
+                uncond_embeddings.float()
+                .mean(axis=[-2, -1])
+                .to(uncond_embeddings.dtype)
+            )
+            uncond_embeddings *= (
+                (previous_mean / current_mean).unsqueeze(-1).unsqueeze(-1)
+            )
+
+    if uncond_prompt is not None:
+        return text_embeddings, uncond_embeddings
+    return text_embeddings, None
--- a/apps/shark_studio/modules/schedulers.py
+++ b/apps/shark_studio/modules/schedulers.py
@@ -0,0 +1,118 @@
+# from shark_turbine.turbine_models.schedulers import export_scheduler_model
+from diffusers import (
+    LCMScheduler,
+    LMSDiscreteScheduler,
+    PNDMScheduler,
+    DDPMScheduler,
+    DDIMScheduler,
+    DPMSolverMultistepScheduler,
+    KDPM2DiscreteScheduler,
+    EulerDiscreteScheduler,
+    EulerAncestralDiscreteScheduler,
+    DEISMultistepScheduler,
+    DPMSolverSinglestepScheduler,
+    KDPM2AncestralDiscreteScheduler,
+    HeunDiscreteScheduler,
+)
+
+
+def get_schedulers(model_id):
+    # TODO: switch over to turbine and run all on GPU
+    print(f"\n[LOG] Initializing schedulers from model id: {model_id}")
+    schedulers = dict()
+    schedulers["PNDM"] = PNDMScheduler.from_pretrained(
+        model_id,
+        subfolder="scheduler",
+    )
+    # schedulers["DDPM"] = DDPMScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
+    # schedulers["KDPM2Discrete"] = KDPM2DiscreteScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
+    # schedulers["LMSDiscrete"] = LMSDiscreteScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
+    # schedulers["DDIM"] = DDIMScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
+    # schedulers["LCMScheduler"] = LCMScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
+    # schedulers["DPMSolverMultistep"] = DPMSolverMultistepScheduler.from_pretrained(
+    #     model_id, subfolder="scheduler", algorithm_type="dpmsolver"
+    # )
+    # schedulers["DPMSolverMultistep++"] = DPMSolverMultistepScheduler.from_pretrained(
+    #     model_id, subfolder="scheduler", algorithm_type="dpmsolver++"
+    # )
+    # schedulers["DPMSolverMultistepKarras"] = (
+    #     DPMSolverMultistepScheduler.from_pretrained(
+    #         model_id,
+    #         subfolder="scheduler",
+    #         use_karras_sigmas=True,
+    #     )
+    # )
+    # schedulers["DPMSolverMultistepKarras++"] = (
+    #     DPMSolverMultistepScheduler.from_pretrained(
+    #         model_id,
+    #         subfolder="scheduler",
+    #         algorithm_type="dpmsolver++",
+    #         use_karras_sigmas=True,
+    #     )
+    # )
+    schedulers["EulerDiscrete"] = EulerDiscreteScheduler.from_pretrained(
+        model_id,
+        subfolder="scheduler",
+    )
+    schedulers["EulerAncestralDiscrete"] = (
+        EulerAncestralDiscreteScheduler.from_pretrained(
+            model_id,
+            subfolder="scheduler",
+        )
+    )
+    # schedulers["DEISMultistep"] = DEISMultistepScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
+    # schedulers["DPMSolverSinglestep"] = DPMSolverSinglestepScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
+    # schedulers["KDPM2AncestralDiscrete"] = (
+    #     KDPM2AncestralDiscreteScheduler.from_pretrained(
+    #         model_id,
+    #         subfolder="scheduler",
+    #     )
+    # )
+    # schedulers["HeunDiscrete"] = HeunDiscreteScheduler.from_pretrained(
+    #     model_id,
+    #     subfolder="scheduler",
+    # )
+    return schedulers
+
+
+def export_scheduler_model(model):
+    return "None", "None"
+
+
+scheduler_model_map = {
+    # "PNDM": export_scheduler_model("PNDMScheduler"),
+    # "DPMSolverSDE": export_scheduler_model("DpmSolverSDEScheduler"),
+    "EulerDiscrete": export_scheduler_model("EulerDiscreteScheduler"),
+    "EulerAncestralDiscrete": export_scheduler_model("EulerAncestralDiscreteScheduler"),
+    # "LCM": export_scheduler_model("LCMScheduler"),
+    # "LMSDiscrete": export_scheduler_model("LMSDiscreteScheduler"),
+    # "DDPM": export_scheduler_model("DDPMScheduler"),
+    # "DDIM": export_scheduler_model("DDIMScheduler"),
+    # "DPMSolverMultistep": export_scheduler_model("DPMSolverMultistepScheduler"),
+    # "KDPM2Discrete": export_scheduler_model("KDPM2DiscreteScheduler"),
+    # "DEISMultistep": export_scheduler_model("DEISMultistepScheduler"),
+    # "DPMSolverSinglestep": export_scheduler_model("DPMSolverSingleStepScheduler"),
+    # "KDPM2AncestralDiscrete": export_scheduler_model("KDPM2AncestralDiscreteScheduler"),
+    # "HeunDiscrete": export_scheduler_model("HeunDiscreteScheduler"),
+}
--- a/apps/shark_studio/modules/seed.py
+++ b/apps/shark_studio/modules/seed.py
@@ -0,0 +1,66 @@
+import numpy as np
+import json
+from random import (
+    randint,
+    seed as seed_random,
+    getstate as random_getstate,
+    setstate as random_setstate,
+)
+
+
+# Generate and return a new seed if the provided one is not in the
+# supported range (including -1)
+def sanitize_seed(seed: int | str):
+    seed = int(seed)
+    uint32_info = np.iinfo(np.uint32)
+    uint32_min, uint32_max = uint32_info.min, uint32_info.max
+    if seed < uint32_min or seed >= uint32_max:
+        seed = randint(uint32_min, uint32_max)
+    return seed
+
+
+# take a seed expression in an input format and convert it to
+# a list of integers, where possible
+def parse_seed_input(seed_input: str | list | int):
+    if isinstance(seed_input, str):
+        try:
+            seed_input = json.loads(seed_input)
+        except (ValueError, TypeError):
+            seed_input = None
+
+    if isinstance(seed_input, int):
+        return [seed_input]
+
+    if isinstance(seed_input, list) and all(type(seed) is int for seed in seed_input):
+        return seed_input
+
+    raise TypeError(
+        "Seed input must be an integer or an array of integers in JSON format"
+    )
+
+
+# Generate a set of seeds from an input expression for batch_count batches,
+# optionally using that input as the rng seed for any randomly generated seeds.
+def batch_seeds(seed_input: str | list | int, batch_count: int, repeatable=False):
+    # turn the input into a list if possible
+    seeds = parse_seed_input(seed_input)
+
+    # slice or pad the list to be of batch_count length
+    seeds = seeds[:batch_count] + [-1] * (batch_count - len(seeds))
+
+    if repeatable:
+        if all(seed < 0 for seed in seeds):
+            seeds[0] = sanitize_seed(seeds[0])
+
+        # set seed for the rng based on what we have so far
+        saved_random_state = random_getstate()
+        seed_random(str([n for n in seeds if n > -1]))
+
+    # generate any seeds that are unspecified
+    seeds = [sanitize_seed(seed) for seed in seeds]
+
+    if repeatable:
+        # reset the rng back to normal
+        random_setstate(saved_random_state)
+
+    return seeds
--- a/apps/shark_studio/modules/shared_cmd_opts.py
+++ b/apps/shark_studio/modules/shared_cmd_opts.py
@@ -0,0 +1,793 @@
+import argparse
+import os
+from pathlib import Path
+
+from apps.shark_studio.modules.img_processing import resampler_list
+
+
+def path_expand(s):
+    return Path(s).expanduser().resolve()
+
+
+def is_valid_file(arg):
+    if not os.path.exists(arg):
+        return None
+    else:
+        return arg
+
+
+p = argparse.ArgumentParser(
+    description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
+)
+
+##############################################################################
+# Stable Diffusion Params
+##############################################################################
+p.add_argument(
+    "-a",
+    "--app",
+    default="txt2img",
+    help="Which app to use, one of: txt2img, img2img, outpaint, inpaint.",
+)
+p.add_argument(
+    "-p",
+    "--prompt",
+    nargs="+",
+    default=[
+        "A hi-res photo of a red street racer drifting around a curve on a mountain, high altitude, at night, tokyo in the background, 8k"
+    ],
+    help="Text of which images to be generated.",
+)
+
+p.add_argument(
+    "--negative_prompt",
+    nargs="+",
+    default=[
+        "watermark, signature, logo, text, lowres, ((monochrome, grayscale)), "
+        "blurry, ugly, blur, oversaturated, cropped"
+    ],
+    help="Text you don't want to see in the generated image.",
+)
+
+p.add_argument(
+    "--sd_init_image",
+    type=str,
+    help="Path to the image input for img2img/inpainting.",
+)
+
+p.add_argument(
+    "--steps",
+    type=int,
+    default=2,
+    help="The number of steps to do the sampling.",
+)
+
+p.add_argument(
+    "--seed",
+    type=str,
+    default=-1,
+    help="The seed or list of seeds to use. -1 for a random one.",
+)
+
+p.add_argument(
+    "--batch_size",
+    type=int,
+    default=1,
+    choices=range(1, 4),
+    help="The number of inferences to be made in a single `batch_count`.",
+)
+
+p.add_argument(
+    "--height",
+    type=int,
+    default=512,
+    choices=range(128, 1025, 8),
+    help="The height of the output image.",
+)
+
+p.add_argument(
+    "--width",
+    type=int,
+    default=512,
+    choices=range(128, 1025, 8),
+    help="The width of the output image.",
+)
+
+p.add_argument(
+    "--guidance_scale",
+    type=float,
+    default=0,
+    help="The value to be used for guidance scaling.",
+)
+
+p.add_argument(
+    "--noise_level",
+    type=int,
+    default=20,
+    help="The value to be used for noise level of upscaler.",
+)
+
+p.add_argument(
+    "--max_length",
+    type=int,
+    default=64,
+    help="Max length of the tokenizer output, options are 64 and 77.",
+)
+
+p.add_argument(
+    "--max_embeddings_multiples",
+    type=int,
+    default=5,
+    help="The max multiple length of prompt embeddings compared to the max "
+    "output length of text encoder.",
+)
+
+p.add_argument(
+    "--strength",
+    type=float,
+    default=0.8,
+    help="The strength of change applied on the given input image for " "img2img.",
+)
+
+p.add_argument(
+    "--use_hiresfix",
+    type=bool,
+    default=False,
+    help="Use Hires Fix to do higher resolution images, while trying to "
+    "avoid the issues that come with it. This is accomplished by first "
+    "generating an image using txt2img, then running it through img2img.",
+)
+
+p.add_argument(
+    "--hiresfix_height",
+    type=int,
+    default=768,
+    choices=range(128, 769, 8),
+    help="The height of the Hires Fix image.",
+)
+
+p.add_argument(
+    "--hiresfix_width",
+    type=int,
+    default=768,
+    choices=range(128, 769, 8),
+    help="The width of the Hires Fix image.",
+)
+
+p.add_argument(
+    "--hiresfix_strength",
+    type=float,
+    default=0.6,
+    help="The denoising strength to apply for the Hires Fix.",
+)
+
+p.add_argument(
+    "--resample_type",
+    type=str,
+    default="Nearest Neighbor",
+    choices=resampler_list,
+    help="The resample type to use when resizing an image before being run "
+    "through stable diffusion.",
+)
+
+##############################################################################
+# Stable Diffusion Training Params
+##############################################################################
+
+p.add_argument(
+    "--lora_save_dir",
+    type=str,
+    default="models/lora/",
+    help="Directory to save the lora fine tuned model.",
+)
+
+p.add_argument(
+    "--training_images_dir",
+    type=str,
+    default="models/lora/training_images/",
+    help="Directory containing images that are an example of the prompt.",
+)
+
+p.add_argument(
+    "--training_steps",
+    type=int,
+    default=2000,
+    help="The number of steps to train.",
+)
+
+##############################################################################
+# Inpainting and Outpainting Params
+##############################################################################
+
+p.add_argument(
+    "--mask_path",
+    type=str,
+    help="Path to the mask image input for inpainting.",
+)
+
+p.add_argument(
+    "--inpaint_full_res",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="If inpaint only masked area or whole picture.",
+)
+
+p.add_argument(
+    "--inpaint_full_res_padding",
+    type=int,
+    default=32,
+    choices=range(0, 257, 4),
+    help="Number of pixels for only masked padding.",
+)
+
+p.add_argument(
+    "--pixels",
+    type=int,
+    default=128,
+    choices=range(8, 257, 8),
+    help="Number of expended pixels for one direction for outpainting.",
+)
+
+p.add_argument(
+    "--mask_blur",
+    type=int,
+    default=8,
+    choices=range(0, 65),
+    help="Number of blur pixels for outpainting.",
+)
+
+p.add_argument(
+    "--left",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="If extend left for outpainting.",
+)
+
+p.add_argument(
+    "--right",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="If extend right for outpainting.",
+)
+
+p.add_argument(
+    "--up",
+    "--top",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="If extend top for outpainting.",
+)
+
+p.add_argument(
+    "--down",
+    "--bottom",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="If extend bottom for outpainting.",
+)
+
+p.add_argument(
+    "--noise_q",
+    type=float,
+    default=1.0,
+    help="Fall-off exponent for outpainting (lower=higher detail) "
+    "(min=0.0, max=4.0).",
+)
+
+p.add_argument(
+    "--color_variation",
+    type=float,
+    default=0.05,
+    help="Color variation for outpainting (min=0.0, max=1.0).",
+)
+
+##############################################################################
+# Model Config and Usage Params
+##############################################################################
+
+p.add_argument("--device", type=str, default="vulkan", help="Device to run the model.")
+
+p.add_argument(
+    "--precision", type=str, default="fp16", help="Precision to run the model."
+)
+
+p.add_argument(
+    "--import_mlir",
+    default=True,
+    action=argparse.BooleanOptionalAction,
+    help="Imports the model from torch module to shark_module otherwise "
+    "downloads the model from shark_tank.",
+)
+
+p.add_argument(
+    "--use_tuned",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Download and use the tuned version of the model if available.",
+)
+
+p.add_argument(
+    "--use_base_vae",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Do conversion from the VAE output to pixel space on cpu.",
+)
+
+p.add_argument(
+    "--scheduler",
+    type=str,
+    default="DDIM",
+    help="Other supported schedulers are [DDIM, PNDM, LMSDiscrete, "
+    "DPMSolverMultistep, DPMSolverMultistep++, DPMSolverMultistepKarras, "
+    "DPMSolverMultistepKarras++, EulerDiscrete, EulerAncestralDiscrete, "
+    "DEISMultistep, KDPM2AncestralDiscrete, DPMSolverSinglestep, DDPM, "
+    "HeunDiscrete].",
+)
+
+p.add_argument(
+    "--output_img_format",
+    type=str,
+    default="png",
+    help="Specify the format in which output image is save. "
+    "Supported options: jpg / png.",
+)
+
+p.add_argument(
+    "--output_dir",
+    type=str,
+    default=os.path.join(os.getcwd(), "generated_imgs"),
+    help="Directory path to save the output images and json.",
+)
+
+p.add_argument(
+    "--batch_count",
+    type=int,
+    default=1,
+    help="Number of batches to be generated with random seeds in " "single execution.",
+)
+
+p.add_argument(
+    "--repeatable_seeds",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="The seed of the first batch will be used as the rng seed to "
+    "generate the subsequent seeds for subsequent batches in that run.",
+)
+
+p.add_argument(
+    "--custom_weights",
+    type=str,
+    default="",
+    help="Path to a .safetensors or .ckpt file for SD pipeline weights.",
+)
+
+p.add_argument(
+    "--custom_vae",
+    type=str,
+    default="",
+    help="HuggingFace repo-id or path to SD model's checkpoint whose VAE "
+    "needs to be plugged in.",
+)
+
+p.add_argument(
+    "--base_model_id",
+    type=str,
+    default="stabilityai/stable-diffusion-2-1-base",
+    help="The repo-id of hugging face.",
+)
+
+p.add_argument(
+    "--low_cpu_mem_usage",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Use the accelerate package to reduce cpu memory consumption.",
+)
+
+p.add_argument(
+    "--attention_slicing",
+    type=str,
+    default="none",
+    help="Amount of attention slicing to use (one of 'max', 'auto', 'none', "
+    "or an integer).",
+)
+
+p.add_argument(
+    "--use_stencil",
+    choices=["canny", "openpose", "scribble", "zoedepth"],
+    help="Enable the stencil feature.",
+)
+
+p.add_argument(
+    "--control_mode",
+    choices=["Prompt", "Balanced", "Controlnet"],
+    default="Balanced",
+    help="How Controlnet injection should be prioritized.",
+)
+
+p.add_argument(
+    "--use_lora",
+    type=str,
+    default="",
+    help="Use standalone LoRA weight using a HF ID or a checkpoint " "file (~3 MB).",
+)
+
+p.add_argument(
+    "--use_quantize",
+    type=str,
+    default="none",
+    help="Runs the quantized version of stable diffusion model. "
+    "This is currently in experimental phase. "
+    "Currently, only runs the stable-diffusion-2-1-base model in "
+    "int8 quantization.",
+)
+
+p.add_argument(
+    "--lowvram",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Load and unload models for low VRAM.",
+)
+
+p.add_argument(
+    "--hf_auth_token",
+    type=str,
+    default=None,
+    help="Specify your own huggingface authentication tokens for models like Llama2.",
+)
+
+p.add_argument(
+    "--external_weights",
+    type=str,
+    default=None,
+    help="What type of externalized weights to use. Currently options are 'safetensors' and defaults to inlined weights.",
+)
+
+p.add_argument(
+    "--device_allocator_heap_key",
+    type=str,
+    default="",
+    help="Specify heap key for device caching allocator."
+    "Expected form: max_allocation_size;max_allocation_capacity;max_free_allocation_count"
+    "Example: --device_allocator_heap_key='*;1gib' (will limit caching on device to 1 gigabyte)",
+)
+
+##############################################################################
+# IREE - Vulkan supported flags
+##############################################################################
+
+p.add_argument(
+    "--iree_vulkan_target_triple",
+    type=str,
+    default="",
+    help="Specify target triple for vulkan.",
+)
+
+p.add_argument(
+    "--iree_metal_target_platform",
+    type=str,
+    default="",
+    help="Specify target triple for metal.",
+)
+
+##############################################################################
+# Misc. Debug and Optimization flags
+##############################################################################
+
+p.add_argument(
+    "--use_compiled_scheduler",
+    default=True,
+    action=argparse.BooleanOptionalAction,
+    help="Use the default scheduler precompiled into the model if available.",
+)
+
+p.add_argument(
+    "--local_tank_cache",
+    default="",
+    help="Specify where to save downloaded shark_tank artifacts. "
+    "If this is not set, the default is ~/.local/shark_tank/.",
+)
+
+p.add_argument(
+    "--dump_isa",
+    default=False,
+    action="store_true",
+    help="When enabled call amdllpc to get ISA dumps. " "Use with dispatch benchmarks.",
+)
+
+p.add_argument(
+    "--dispatch_benchmarks",
+    default=None,
+    help="Dispatches to return benchmark data on. "
+    'Use "All" for all, and None for none.',
+)
+
+p.add_argument(
+    "--dispatch_benchmarks_dir",
+    default="temp_dispatch_benchmarks",
+    help="Directory where you want to store dispatch data "
+    'generated with "--dispatch_benchmarks".',
+)
+
+p.add_argument(
+    "--enable_rgp",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Flag for inserting debug frames between iterations " "for use with rgp.",
+)
+
+p.add_argument(
+    "--hide_steps",
+    default=True,
+    action=argparse.BooleanOptionalAction,
+    help="Flag for hiding the details of iteration/sec for each step.",
+)
+
+p.add_argument(
+    "--warmup_count",
+    type=int,
+    default=0,
+    help="Flag setting warmup count for CLIP and VAE [>= 0].",
+)
+
+p.add_argument(
+    "--clear_all",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Flag to clear all mlir and vmfb from common locations. "
+    "Recompiling will take several minutes.",
+)
+
+p.add_argument(
+    "--save_metadata_to_json",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Flag for whether or not to save a generation information "
+    "json file with the image.",
+)
+
+p.add_argument(
+    "--write_metadata_to_png",
+    default=True,
+    action=argparse.BooleanOptionalAction,
+    help="Flag for whether or not to save generation information in "
+    "PNG chunk text to generated images.",
+)
+
+p.add_argument(
+    "--import_debug",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="If import_mlir is True, saves mlir via the debug option "
+    "in shark importer. Does nothing if import_mlir is false (the default).",
+)
+
+p.add_argument(
+    "--compile_debug",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Flag to toggle debug assert/verify flags for imported IR in the"
+    "iree-compiler. Default to false.",
+)
+
+p.add_argument(
+    "--iree_constant_folding",
+    default=True,
+    action=argparse.BooleanOptionalAction,
+    help="Controls constant folding in iree-compile for all SD models.",
+)
+
+p.add_argument(
+    "--data_tiling",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Controls data tiling in iree-compile for all SD models.",
+)
+
+p.add_argument(
+    "--quantization",
+    type=str,
+    default="None",
+    help="Quantization to be used for api-exposed model.",
+)
+
+##############################################################################
+# Web UI flags
+##############################################################################
+p.add_argument(
+    "--defaults",
+    default="sdxl-turbo.json",
+    type=str,
+    help="Path to the default API request .json file. Works for CLI and webui.",
+)
+
+p.add_argument(
+    "--webui",
+    default=True,
+    action=argparse.BooleanOptionalAction,
+    help="controls whether the webui is launched.",
+)
+
+p.add_argument(
+    "--progress_bar",
+    default=True,
+    action=argparse.BooleanOptionalAction,
+    help="Flag for removing the progress bar animation during " "image generation.",
+)
+
+p.add_argument(
+    "--tmp_dir",
+    type=str,
+    default=os.path.join(os.getcwd(), "shark_tmp"),
+    help="Path to tmp directory",
+)
+
+p.add_argument(
+    "--config_dir",
+    type=str,
+    default=os.path.join(os.getcwd(), "configs"),
+    help="Path to config directory",
+)
+
+p.add_argument(
+    "--model_dir",
+    type=str,
+    default=os.path.join(os.getcwd(), "models"),
+    help="Path to directory where all .ckpts are stored in order to populate "
+    "them in the web UI.",
+)
+
+# TODO: replace API flag when these can be run together
+p.add_argument(
+    "--ui",
+    type=str,
+    default="app" if os.name == "nt" else "web",
+    help="One of: [api, app, web].",
+)
+
+p.add_argument(
+    "--share",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Flag for generating a public URL.",
+)
+
+p.add_argument(
+    "--server_port",
+    type=int,
+    default=8080,
+    help="Flag for setting server port.",
+)
+
+p.add_argument(
+    "--api",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Flag for enabling rest API.",
+)
+
+p.add_argument(
+    "--api_accept_origin",
+    action="append",
+    type=str,
+    help="An origin to be accepted by the REST api for Cross Origin"
+    "Resource Sharing (CORS). Use multiple times for multiple origins, "
+    'or use --api_accept_origin="*" to accept all origins. If no origins '
+    "are set no CORS headers will be returned by the api. Use, for "
+    "instance, if you need to access the REST api from Javascript running "
+    "in a web browser.",
+)
+
+p.add_argument(
+    "--debug",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Flag for enabling debugging log in WebUI.",
+)
+
+p.add_argument(
+    "--output_gallery",
+    default=True,
+    action=argparse.BooleanOptionalAction,
+    help="Flag for removing the output gallery tab, and avoid exposing "
+    "images under --output_dir in the UI.",
+)
+
+p.add_argument(
+    "--configs_path",
+    default=None,
+    type=str,
+    help="Path to .json config directory.",
+)
+
+p.add_argument(
+    "--output_gallery_followlinks",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Flag for whether the output gallery tab in the UI should "
+    "follow symlinks when listing subdirectories under --output_dir.",
+)
+
+p.add_argument(
+    "--api_log",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Enables Compatibility API logging.",
+)
+
+##############################################################################
+# SD model auto-annotation flags
+##############################################################################
+
+p.add_argument(
+    "--annotation_output",
+    type=path_expand,
+    default="./",
+    help="Directory to save the annotated mlir file.",
+)
+
+p.add_argument(
+    "--annotation_model",
+    type=str,
+    default="unet",
+    help="Options are unet and vae.",
+)
+
+p.add_argument(
+    "--save_annotation",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Save annotated mlir file.",
+)
+##############################################################################
+# SD model auto-tuner flags
+##############################################################################
+
+p.add_argument(
+    "--tuned_config_dir",
+    type=path_expand,
+    default="./",
+    help="Directory to save the tuned config file.",
+)
+
+p.add_argument(
+    "--num_iters",
+    type=int,
+    default=400,
+    help="Number of iterations for tuning.",
+)
+
+p.add_argument(
+    "--search_op",
+    type=str,
+    default="all",
+    help="Op to be optimized, options are matmul, bmm, conv and all.",
+)
+
+##############################################################################
+# DocuChat Flags
+##############################################################################
+
+p.add_argument(
+    "--run_docuchat_web",
+    default=False,
+    action=argparse.BooleanOptionalAction,
+    help="Specifies whether the docuchat's web version is running or not.",
+)
+
+##############################################################################
+# rocm Flags
+##############################################################################
+
+p.add_argument(
+    "--iree_rocm_target_chip",
+    type=str,
+    default="",
+    help="Add the rocm device architecture ex gfx1100, gfx90a, etc. Use `hipinfo` "
+    "or `iree-run-module --dump_devices=rocm` or `hipinfo` to get desired arch name",
+)
+
+cmd_opts, unknown = p.parse_known_args()
+if cmd_opts.import_debug:
+    os.environ["IREE_SAVE_TEMPS"] = os.path.join(
+        os.getcwd(), cmd_opts.hf_model_id.replace("/", "_")
+    )
--- a/apps/shark_studio/modules/timer.py
+++ b/apps/shark_studio/modules/timer.py
@@ -0,0 +1,106 @@
+import time
+import argparse
+
+
+class TimerSubcategory:
+    def __init__(self, timer, category):
+        self.timer = timer
+        self.category = category
+        self.start = None
+        self.original_base_category = timer.base_category
+
+    def __enter__(self):
+        self.start = time.time()
+        self.timer.base_category = self.original_base_category + self.category + "/"
+        self.timer.subcategory_level += 1
+
+        if self.timer.print_log:
+            print(f"{'  ' * self.timer.subcategory_level}{self.category}:")
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        elapsed_for_subcategroy = time.time() - self.start
+        self.timer.base_category = self.original_base_category
+        self.timer.add_time_to_record(
+            self.original_base_category + self.category,
+            elapsed_for_subcategroy,
+        )
+        self.timer.subcategory_level -= 1
+        self.timer.record(self.category, disable_log=True)
+
+
+class Timer:
+    def __init__(self, print_log=False):
+        self.start = time.time()
+        self.records = {}
+        self.total = 0
+        self.base_category = ""
+        self.print_log = print_log
+        self.subcategory_level = 0
+
+    def elapsed(self):
+        end = time.time()
+        res = end - self.start
+        self.start = end
+        return res
+
+    def add_time_to_record(self, category, amount):
+        if category not in self.records:
+            self.records[category] = 0
+
+        self.records[category] += amount
+
+    def record(self, category, extra_time=0, disable_log=False):
+        e = self.elapsed()
+
+        self.add_time_to_record(self.base_category + category, e + extra_time)
+
+        self.total += e + extra_time
+
+        if self.print_log and not disable_log:
+            print(
+                f"{'  ' * self.subcategory_level}{category}: done in {e + extra_time:.3f}s"
+            )
+
+    def subcategory(self, name):
+        self.elapsed()
+
+        subcat = TimerSubcategory(self, name)
+        return subcat
+
+    def summary(self):
+        res = f"{self.total:.1f}s"
+
+        additions = [
+            (category, time_taken)
+            for category, time_taken in self.records.items()
+            if time_taken >= 0.1 and "/" not in category
+        ]
+        if not additions:
+            return res
+
+        res += " ("
+        res += ", ".join(
+            [f"{category}: {time_taken:.1f}s" for category, time_taken in additions]
+        )
+        res += ")"
+
+        return res
+
+    def dump(self):
+        return {"total": self.total, "records": self.records}
+
+    def reset(self):
+        self.__init__()
+
+
+parser = argparse.ArgumentParser(add_help=False)
+parser.add_argument(
+    "--log-startup",
+    action="store_true",
+    help="print a detailed log of what's happening at startup",
+)
+args = parser.parse_known_args()[0]
+
+startup_timer = Timer(print_log=args.log_startup)
+
+startup_record = None
--- a/apps/shark_studio/shark_studio.spec
+++ b/apps/shark_studio/shark_studio.spec
@@ -0,0 +1,48 @@
+# -*- mode: python ; coding: utf-8 -*-
+from apps.shark_studio.studio_imports import pathex, datas, hiddenimports
+
+binaries = []
+
+block_cipher = None
+
+a = Analysis(
+    ['web/index.py'],
+    pathex=pathex,
+    binaries=binaries,
+    datas=datas,
+    hiddenimports=hiddenimports,
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[],
+    win_no_prefer_redirects=False,
+    win_private_assemblies=False,
+    cipher=block_cipher,
+    noarchive=False,
+    module_collection_mode={
+        'gradio': 'py',  # Collect gradio package as source .py files
+    },
+)
+pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
+
+exe = EXE(
+    pyz,
+    a.scripts,
+    a.binaries,
+    a.zipfiles,
+    a.datas,
+    [],
+    name='nodai_shark_studio',
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=False,
+    upx_exclude=[],
+    runtime_tmpdir=None,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+)
--- a/apps/shark_studio/shark_studio_apionly.spec
+++ b/apps/shark_studio/shark_studio_apionly.spec
@@ -0,0 +1,45 @@
+# -*- mode: python ; coding: utf-8 -*-
+from apps.shark_studio.studio_imports_apionly import pathex, datas, hiddenimports
+
+binaries = []
+
+block_cipher = None
+
+a = Analysis(
+    ['web/index.py'],
+    pathex=pathex,
+    binaries=binaries,
+    datas=datas,
+    hiddenimports=hiddenimports,
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[],
+    win_no_prefer_redirects=False,
+    win_private_assemblies=False,
+    cipher=block_cipher,
+    noarchive=False,
+)
+pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
+
+exe = EXE(
+    pyz,
+    a.scripts,
+    a.binaries,
+    a.zipfiles,
+    a.datas,
+    [],
+    name='shark_sd3_server',
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=False,
+    upx_exclude=[],
+    runtime_tmpdir=None,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+)
--- a/apps/shark_studio/studio_imports.py
+++ b/apps/shark_studio/studio_imports.py
@@ -0,0 +1,62 @@
+from PyInstaller.utils.hooks import collect_data_files
+from PyInstaller.utils.hooks import copy_metadata
+from PyInstaller.utils.hooks import collect_submodules
+
+import sys
+
+sys.setrecursionlimit(sys.getrecursionlimit() * 5)
+
+# python path for pyinstaller
+pathex = [
+    ".",
+]
+
+# datafiles for pyinstaller
+datas = []
+datas += copy_metadata("torch")
+datas += copy_metadata("tokenizers")
+datas += copy_metadata("tqdm")
+datas += copy_metadata("regex")
+datas += copy_metadata("requests")
+datas += copy_metadata("packaging")
+datas += copy_metadata("filelock")
+datas += copy_metadata("numpy")
+datas += copy_metadata("importlib_metadata")
+datas += copy_metadata("safetensors")
+datas += copy_metadata("Pillow")
+datas += copy_metadata("sentencepiece")
+datas += copy_metadata("pyyaml")
+datas += copy_metadata("huggingface-hub")
+datas += copy_metadata("gradio")
+datas += collect_data_files("torch")
+datas += collect_data_files("tokenizers")
+datas += collect_data_files("diffusers")
+datas += collect_data_files("transformers")
+datas += collect_data_files("gradio")
+datas += collect_data_files("gradio_client")
+datas += collect_data_files("iree", include_py_files=True)
+datas += collect_data_files("shark-turbine", include_py_files=True)
+datas += collect_data_files("tqdm")
+datas += collect_data_files("sentencepiece")
+datas += collect_data_files("jsonschema")
+datas += collect_data_files("jsonschema_specifications")
+datas += collect_data_files("cpuinfo")
+datas += [
+    ("web/ui/css/*", "ui/css"),
+    ("web/ui/js/*", "ui/js"),
+    ("web/ui/logos/*", "logos"),
+]
+
+
+# hidden imports for pyinstaller
+hiddenimports = ["apps", "shark-turbine"]
+hiddenimports += [x for x in collect_submodules("gradio") if "tests" not in x]
+hiddenimports += [x for x in collect_submodules("diffusers") if "tests" not in x]
+blacklist = ["tests", "convert"]
+hiddenimports += [
+    x
+    for x in collect_submodules("transformers")
+    if not any(kw in x for kw in blacklist)
+]
+hiddenimports += [x for x in collect_submodules("iree") if "test" not in x]
+hiddenimports += ["iree._runtime"]
--- a/apps/shark_studio/studio_imports_apionly.py
+++ b/apps/shark_studio/studio_imports_apionly.py
@@ -0,0 +1,46 @@
+from PyInstaller.utils.hooks import collect_data_files
+from PyInstaller.utils.hooks import copy_metadata
+from PyInstaller.utils.hooks import collect_submodules
+
+import sys
+
+sys.setrecursionlimit(sys.getrecursionlimit() * 5)
+
+# python path for pyinstaller
+pathex = [
+    ".",
+]
+
+# datafiles for pyinstaller
+datas = []
+datas += copy_metadata("torch")
+datas += copy_metadata("tokenizers")
+datas += copy_metadata("tqdm")
+datas += copy_metadata("regex")
+datas += copy_metadata("requests")
+datas += copy_metadata("packaging")
+datas += copy_metadata("filelock")
+datas += copy_metadata("numpy")
+datas += copy_metadata("importlib_metadata")
+datas += copy_metadata("safetensors")
+datas += copy_metadata("Pillow")
+datas += copy_metadata("sentencepiece")
+datas += copy_metadata("pyyaml")
+datas += copy_metadata("huggingface-hub")
+datas += copy_metadata("gradio")
+datas += collect_data_files("torch")
+datas += collect_data_files("tokenizers")
+datas += collect_data_files("diffusers")
+datas += collect_data_files("transformers")
+datas += collect_data_files("iree", include_py_files=True)
+datas += collect_data_files("tqdm")
+datas += collect_data_files("jsonschema")
+datas += collect_data_files("jsonschema_specifications")
+datas += collect_data_files("cpuinfo")
+
+
+# hidden imports for pyinstaller
+hiddenimports = ["apps", "shark-turbine"]
+hiddenimports += [x for x in collect_submodules("diffusers") if "tests" not in x]
+hiddenimports += [x for x in collect_submodules("iree") if "test" not in x]
+hiddenimports += ["iree._runtime"]
--- a/apps/shark_studio/tests/api_test.py
+++ b/apps/shark_studio/tests/api_test.py
@@ -6,8 +6,26 @@

 import logging
 import unittest
-from apps.shark_studio.api.llm import LanguageModel
+import json
 import gc
+from apps.shark_studio.api.llm import LanguageModel, llm_chat_api
+from apps.shark_studio.api.sd import shark_sd_fn_dict_input, view_json_file
+from apps.shark_studio.web.utils.file_utils import get_resource_path
+
+# class SDAPITest(unittest.TestCase):
+#     def testSDSimple(self):
+#         from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+#         import apps.shark_studio.web.utils.globals as global_obj
+
+#         global_obj._init()
+
+#         sd_json = view_json_file(get_resource_path("../configs/default_sd_config.json"))
+#         sd_kwargs = json.loads(sd_json)
+#         for arg in vars(cmd_opts):
+#             if arg in sd_kwargs:
+#                 sd_kwargs[arg] = getattr(cmd_opts, arg)
+#         for i in shark_sd_fn_dict_input(sd_kwargs):
+#             print(i)


 class LLMAPITest(unittest.TestCase):
@@ -18,6 +36,7 @@ class LLMAPITest(unittest.TestCase):
            device="cpu",
            precision="fp32",
            quantization="None",
+            streaming_llm=True,
        )
        count = 0
        label = "Turkishoure Turkish"
--- a/apps/shark_studio/tests/export_unet.py
+++ b/apps/shark_studio/tests/export_unet.py
@@ -0,0 +1,41 @@
+import torch
+from diffusers import (
+    UNet2DConditionModel,
+)
+from torch.fx.experimental.proxy_tensor import make_fx
+
+
+class UnetModel(torch.nn.Module):
+    def __init__(self, hf_model_name):
+        super().__init__()
+        self.unet = UNet2DConditionModel.from_pretrained(
+            hf_model_name,
+            subfolder="unet",
+        )
+
+    def forward(self, sample, timestep, encoder_hidden_states, guidance_scale):
+        samples = torch.cat([sample] * 2)
+        unet_out = self.unet.forward(
+            samples, timestep, encoder_hidden_states, return_dict=False
+        )[0]
+        noise_pred_uncond, noise_pred_text = unet_out.chunk(2)
+        noise_pred = noise_pred_uncond + guidance_scale * (
+            noise_pred_text - noise_pred_uncond
+        )
+        return noise_pred
+
+
+if __name__ == "__main__":
+    hf_model_name = "CompVis/stable-diffusion-v1-4"
+    unet = UnetModel(hf_model_name)
+    inputs = (torch.randn(1, 4, 64, 64), 1, torch.randn(2, 77, 768), 7.5)
+
+    fx_g = make_fx(
+        unet,
+        decomposition_table={},
+        tracing_mode="symbolic",
+        _allow_non_fake_inputs=True,
+        _allow_fake_constant=False,
+    )(*inputs)
+
+    print(fx_g)
--- a/apps/shark_studio/tests/jupiter.png
+++ b/apps/shark_studio/tests/jupiter.png
--- a/apps/shark_studio/tests/rest_api_test.py
+++ b/apps/shark_studio/tests/rest_api_test.py
@@ -0,0 +1,45 @@
+import requests
+from PIL import Image
+import base64
+from io import BytesIO
+import json
+
+
+def llm_chat_test(verbose=False):
+    # Define values here
+    prompt = "What is the significance of the number 42?"
+
+    url = "http://127.0.0.1:8080/v1/chat/completions"
+
+    headers = {
+        "User-Agent": "PythonTest",
+        "Accept": "*/*",
+        "Accept-Encoding": "gzip, deflate, br",
+    }
+
+    data = {
+        "model": "Trelis/Llama-2-7b-chat-hf-function-calling-v2",
+        "messages": [
+            {
+                "role": "",
+                "content": prompt,
+            }
+        ],
+        "device": "vulkan://0",
+        "max_tokens": 4096,
+    }
+
+    res = requests.post(url=url, json=data, headers=headers, timeout=1000)
+    res_dict = json.loads(res.content.decode("utf-8"))
+    print(f"[chat] response from server was : {res.status_code} {res.reason}")
+
+    if verbose or res.status_code != 200:
+        print(f"\n{res_dict['choices'][0]['message']['content']}\n")
+
+
+if __name__ == "__main__":
+    # "Exercises the chatbot REST API of Shark. Make sure "
+    # "Shark is running in API mode on 127.0.0.1:8080 before running"
+    # "this script."
+
+    llm_chat_test(verbose=True)
--- a/apps/shark_studio/tools/params_prefixer.py
+++ b/apps/shark_studio/tools/params_prefixer.py
@@ -0,0 +1,20 @@
+from apps.shark_studio.modules.ckpt_processing import save_irpa
+import argparse
+import safetensors
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--input",
+    type=str,
+    default="",
+    help="input safetensors/irpa",
+)
+parser.add_argument(
+    "--prefix",
+    type=str,
+    default="",
+    help="prefix to add to all the keys in the irpa",
+)
+args = parser.parse_args()
+output_file = save_irpa(args.input, args.prefix)
+print("saved irpa to", output_file, "with prefix", args.prefix)
--- a/apps/shark_studio/web/api/compat.py
+++ b/apps/shark_studio/web/api/compat.py
@@ -0,0 +1,220 @@
+import base64
+import io
+import os
+import time
+import datetime
+import uvicorn
+import ipaddress
+import requests
+import threading
+import collections
+import gradio as gr
+from PIL import Image, PngImagePlugin
+from threading import Lock
+from io import BytesIO
+from fastapi import APIRouter, Depends, FastAPI, Request, Response
+from fastapi.security import HTTPBasic, HTTPBasicCredentials
+from fastapi.exceptions import HTTPException
+from fastapi.responses import JSONResponse
+from fastapi.encoders import jsonable_encoder
+
+from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+
+
+def decode_base64_to_image(encoding):
+    if encoding.startswith("http://") or encoding.startswith("https://"):
+        headers = {}
+        response = requests.get(encoding, timeout=30, headers=headers)
+        try:
+            image = Image.open(BytesIO(response.content))
+            return image
+        except Exception as e:
+            raise HTTPException(status_code=500, detail="Invalid image url") from e
+
+    if encoding.startswith("data:image/"):
+        encoding = encoding.split(";")[1].split(",")[1]
+    try:
+        image = Image.open(BytesIO(base64.b64decode(encoding)))
+        return image
+    except Exception as e:
+        raise HTTPException(status_code=500, detail="Invalid encoded image") from e
+
+
+def encode_pil_to_base64(image):
+    with io.BytesIO() as output_bytes:
+        use_metadata = False
+        metadata = PngImagePlugin.PngInfo()
+        for key, value in image.info.items():
+            if isinstance(key, str) and isinstance(value, str):
+                metadata.add_text(key, value)
+                use_metadata = True
+        image.save(
+            output_bytes,
+            format="PNG",
+            pnginfo=(metadata if use_metadata else None),
+        )
+
+        bytes_data = output_bytes.getvalue()
+
+    return base64.b64encode(bytes_data)
+
+
+# reference: https://gist.github.com/vitaliyp/6d54dd76ca2c3cdfc1149d33007dc34a
+class FIFOLock(object):
+    def __init__(self):
+        self._lock = threading.Lock()
+        self._inner_lock = threading.Lock()
+        self._pending_threads = collections.deque()
+
+    def acquire(self, blocking=True):
+        with self._inner_lock:
+            lock_acquired = self._lock.acquire(False)
+            if lock_acquired:
+                return True
+            elif not blocking:
+                return False
+
+            release_event = threading.Event()
+            self._pending_threads.append(release_event)
+
+        release_event.wait()
+        return self._lock.acquire()
+
+    def release(self):
+        with self._inner_lock:
+            if self._pending_threads:
+                release_event = self._pending_threads.popleft()
+                release_event.set()
+
+            self._lock.release()
+
+    __enter__ = acquire
+
+    def __exit__(self, t, v, tb):
+        self.release()
+
+
+def api_middleware(app: FastAPI):
+    rich_available = False
+    try:
+        if os.environ.get("WEBUI_RICH_EXCEPTIONS", None) is not None:
+            import anyio  # importing just so it can be placed on silent list
+            import starlette  # importing just so it can be placed on silent list
+            from rich.console import Console
+
+            console = Console()
+            rich_available = True
+    except Exception:
+        pass
+
+    @app.middleware("http")
+    async def log_and_time(req: Request, call_next):
+        ts = time.time()
+        res: Response = await call_next(req)
+        duration = str(round(time.time() - ts, 4))
+        res.headers["X-Process-Time"] = duration
+        endpoint = req.scope.get("path", "err")
+        if cmd_opts.api_log and endpoint.startswith("/sdapi"):
+            print(
+                "API {t} {code} {prot}/{ver} {method} {endpoint} {cli} {duration}".format(
+                    t=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"),
+                    code=res.status_code,
+                    ver=req.scope.get("http_version", "0.0"),
+                    cli=req.scope.get("client", ("0:0.0.0", 0))[0],
+                    prot=req.scope.get("scheme", "err"),
+                    method=req.scope.get("method", "err"),
+                    endpoint=endpoint,
+                    duration=duration,
+                )
+            )
+        return res
+
+    def handle_exception(request: Request, e: Exception):
+        err = {
+            "error": type(e).__name__,
+            "detail": vars(e).get("detail", ""),
+            "body": vars(e).get("body", ""),
+            "errors": str(e),
+        }
+        if not isinstance(
+            e, HTTPException
+        ):  # do not print backtrace on known httpexceptions
+            message = f"API error: {request.method}: {request.url} {err}"
+            if rich_available:
+                print(message)
+                console.print_exception(
+                    show_locals=True,
+                    max_frames=2,
+                    extra_lines=1,
+                    suppress=[anyio, starlette],
+                    word_wrap=False,
+                    width=min([console.width, 200]),
+                )
+            else:
+                print(message)
+                raise (e)
+        return JSONResponse(
+            status_code=vars(e).get("status_code", 500),
+            content=jsonable_encoder(err),
+        )
+
+    @app.middleware("http")
+    async def exception_handling(request: Request, call_next):
+        try:
+            return await call_next(request)
+        except Exception as e:
+            return handle_exception(request, e)
+
+    @app.exception_handler(Exception)
+    async def fastapi_exception_handler(request: Request, e: Exception):
+        return handle_exception(request, e)
+
+    @app.exception_handler(HTTPException)
+    async def http_exception_handler(request: Request, e: HTTPException):
+        return handle_exception(request, e)
+
+
+class ApiCompat:
+    def __init__(self, app: FastAPI, queue_lock: Lock):
+        self.router = APIRouter()
+        self.app = app
+        self.queue_lock = queue_lock
+        api_middleware(self.app)
+
+        # self.add_api_route("/sdapi/v1/txt2img", shark_sd_api, methods=["POST"])
+
+        self.default_script_arg_txt2img = []
+        self.default_script_arg_img2img = []
+
+    def add_api_route(self, path: str, endpoint, **kwargs):
+        return self.app.add_api_route(path, endpoint, **kwargs)
+
+    def launch(self, server_name, port, root_path):
+        self.app.include_router(self.router)
+        uvicorn.run(
+            self.app,
+            host=server_name,
+            port=port,
+            root_path=root_path,
+        )
+
+    # def kill_studio(self):
+    #     restart.stop_program()
+
+    # def restart_studio(self):
+    #     if restart.is_restartable():
+    #         restart.restart_program()
+    #     return Response(status_code=501)
+
+    # def preprocess(self, args: dict):
+    #     try:
+    #         studio.state.begin(job="preprocess")
+    #         preprocess(**args)
+    #         studio.state.end()
+    #         return models.PreprocessResponse(info="preprocess complete")
+    #     except:
+    #         studio.state.end()
+
+    # def stop_studio(request):
+    #     studio.state.server_command = "stop"
+    #     return Response("Stopping.")
--- a/apps/shark_studio/web/api/sd.py
+++ b/apps/shark_studio/web/api/sd.py
@@ -0,0 +1,115 @@
+import base64
+
+from fastapi import FastAPI
+
+from io import BytesIO
+from PIL import Image
+from pydantic import BaseModel, Field
+from fastapi.exceptions import HTTPException
+
+from apps.shark_studio.api.sd import shark_sd_fn
+
+sdapi = FastAPI()
+
+
+class GenerationInputData(BaseModel):
+    prompt: list = [""]
+    negative_prompt: list = [""]
+    hf_model_id: str | None = None
+    height: int = Field(default=512, ge=128, le=1024, multiple_of=8)
+    width: int = Field(default=512, ge=128, le=1024, multiple_of=8)
+    sampler_name: str = "EulerDiscrete"
+    cfg_scale: float = Field(default=7.5, ge=1)
+    steps: int = Field(default=20, ge=1, le=100)
+    seed: int = Field(default=-1)
+    n_iter: int = Field(default=1)
+    config: dict = None
+
+
+class GenerationResponseData(BaseModel):
+    images: list[str] = Field(description="Generated images, Base64 encoded")
+    properties: dict = {}
+    info: str
+
+
+def encode_pil_to_base64(images: list[Image.Image]):
+    encoded_imgs = []
+    for image in images:
+        with BytesIO() as output_bytes:
+            image.save(output_bytes, format="PNG")
+            bytes_data = output_bytes.getvalue()
+            encoded_imgs.append(base64.b64encode(bytes_data))
+    return encoded_imgs
+
+
+def decode_base64_to_image(encoding: str):
+    if encoding.startswith("data:image/"):
+        encoding = encoding.split(";", 1)[1].split(",", 1)[1]
+    try:
+        image = Image.open(BytesIO(base64.b64decode(encoding)))
+        return image
+    except Exception as err:
+        print(err)
+        raise HTTPException(status_code=400, detail="Invalid encoded image")
+
+
+@sdapi.post(
+    "/v1/txt2img",
+    summary="Does text to image generation",
+    response_model=GenerationResponseData,
+)
+def txt2img_api(InputData: GenerationInputData):
+    model_id = (
+        InputData.hf_model_id or "stabilityai/stable-diffusion-3-medium-diffusers"
+    )
+    scheduler = "FlowEulerDiscrete"
+    print(
+        f"Prompt: {InputData.prompt}, "
+        f"Negative Prompt: {InputData.negative_prompt}, "
+        f"Seed: {InputData.seed},"
+        f"Model: {model_id}, "
+        f"Scheduler: {scheduler}. "
+    )
+    if not getattr(InputData, "config"):
+        InputData.config = {
+            "precision": "fp16",
+            "device": "rocm",
+            "target_triple": "gfx1150",
+        }
+
+    res = shark_sd_fn(
+        InputData.prompt,
+        InputData.negative_prompt,
+        None,
+        InputData.height,
+        InputData.width,
+        InputData.steps,
+        None,
+        InputData.cfg_scale,
+        InputData.seed,
+        custom_vae=None,
+        batch_count=InputData.n_iter,
+        batch_size=1,
+        scheduler=scheduler,
+        base_model_id=model_id,
+        custom_weights=None,
+        precision=InputData.config["precision"],
+        device=InputData.config["device"],
+        target_triple=InputData.config["target_triple"],
+        output_type="pil",
+        ondemand=False,
+        compiled_pipeline=False,
+        resample_type=None,
+        controlnets=[],
+        embeddings=[],
+    )
+
+    # Since we're not streaming we just want the last generator result
+    for items_so_far in res:
+        items = items_so_far
+
+    return {
+        "images": encode_pil_to_base64(items[0]),
+        "parameters": {},
+        "info": items[1],
+    }
--- a/apps/shark_studio/web/index.py
+++ b/apps/shark_studio/web/index.py
@@ -1,22 +1,64 @@
 from multiprocessing import Process, freeze_support
+
+freeze_support()
+from PIL import Image
+
 import os
+import time
 import sys
 import logging
-from ui.chat import chat_element
+import apps.shark_studio.api.initializers as initialize
+
+
+from apps.shark_studio.modules import timer
+
+startup_timer = timer.startup_timer
+startup_timer.record("launcher")
+
+initialize.imports()

 if sys.platform == "darwin":
    os.environ["DYLD_LIBRARY_PATH"] = "/usr/local/lib"
    # import before IREE to avoid MLIR library issues
    import torch_mlir

-# import PIL, transformers, sentencepiece  # ensures inclusion in pysintaller exe generation
-# from apps.stable_diffusion.src import args, clear_all
-# import apps.stable_diffusion.web.utils.global_obj as global_obj
+
+def create_api(app):
+    from apps.shark_studio.web.api.compat import ApiCompat, FIFOLock
+
+    queue_lock = FIFOLock()
+    api = ApiCompat(app, queue_lock)
+    return api


-def launch_app(address):
+def api_only():
+    from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+    from apps.shark_studio.web.api.sd import sdapi
+    from fastapi import FastAPI
+
+    initialize.initialize()
+
+    app = FastAPI()
+    initialize.setup_middleware(app)
+    app.mount("/sdapi/", sdapi)
+    api = create_api(app)
+
+    # from modules import script_callbacks
+    # script_callbacks.before_ui_callback()
+    # script_callbacks.app_started_callback(None, app)
+
+    print(f"Startup time: {startup_timer.summary()}.")
+    api.launch(
+        server_name="0.0.0.0",
+        port=cmd_opts.server_port,
+        root_path="",
+    )
+
+
+def launch_webui(address):
    from tkinter import Tk
    import webview
+    import gradio as gr

    window = Tk()

@@ -34,138 +76,78 @@ def launch_app(address):
    webview.start(private_mode=False, storage_path=os.getcwd())


-if __name__ == "__main__":
-    # if args.debug:
-    logging.basicConfig(level=logging.DEBUG)
+def webui():
+    from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+    from apps.shark_studio.web.ui.utils import (
+        amdicon_loc,
+        amdlogo_loc,
+    )
+
+    launch_api = cmd_opts.api
+    initialize.initialize()
+
+    # from ui.chat import chat_element
+    from ui.sd import sd_element
+    from ui.outputgallery import outputgallery_element
+
    # required to do multiprocessing in a pyinstaller freeze
    freeze_support()
-    #    if args.api or "api" in args.ui.split(","):
-    #        from apps.stable_diffusion.web.ui import (
-    #            txt2img_api,
-    #            img2img_api,
-    #            upscaler_api,
-    #            inpaint_api,
-    #            outpaint_api,
-    #            llm_chat_api,
-    #        )
-    #
-    #        from fastapi import FastAPI, APIRouter
-    #        import uvicorn
-    #
-    #        # init global sd pipeline and config
-    #        global_obj._init()
-    #
-    #        app = FastAPI()
-    #        app.add_api_route("/sdapi/v1/txt2img", txt2img_api, methods=["post"])
-    #        app.add_api_route("/sdapi/v1/img2img", img2img_api, methods=["post"])
-    #        app.add_api_route("/sdapi/v1/inpaint", inpaint_api, methods=["post"])
-    #        app.add_api_route("/sdapi/v1/outpaint", outpaint_api, methods=["post"])
-    #        app.add_api_route("/sdapi/v1/upscaler", upscaler_api, methods=["post"])
-    #
-    #        # chat APIs needed for compatibility with multiple extensions using OpenAI API
-    #        app.add_api_route(
-    #            "/v1/chat/completions", llm_chat_api, methods=["post"]
-    #        )
-    #        app.add_api_route("/v1/completions", llm_chat_api, methods=["post"])
-    #        app.add_api_route("/chat/completions", llm_chat_api, methods=["post"])
-    #        app.add_api_route("/completions", llm_chat_api, methods=["post"])
-    #        app.add_api_route(
-    #            "/v1/engines/codegen/completions", llm_chat_api, methods=["post"]
-    #        )
-    #        app.include_router(APIRouter())
-    #        uvicorn.run(app, host="0.0.0.0", port=args.server_port)
-    #        sys.exit(0)
-    #
-    # Setup to use shark_tmp for gradio's temporary image files and clear any
-    # existing temporary images there if they exist. Then we can import gradio.
-    # It has to be in this order or gradio ignores what we've set up.
-    # from apps.stable_diffusion.web.utils.gradio_configs import (
-    #    config_gradio_tmp_imgs_folder,
-    # )

-    # config_gradio_tmp_imgs_folder()
+    # if args.api or "api" in args.ui.split(","):
+    #     from apps.shark_studio.api.llm import (
+    #         chat,
+    #     )
+    #     from apps.shark_studio.web.api import sdapi
+    #
+    #     from fastapi import FastAPI, APIRouter
+    #     from fastapi.middleware.cors import CORSMiddleware
+    #     import uvicorn
+    #
+    #     # init global sd pipeline and config
+    #     global_obj._init()
+    #
+    #     api = FastAPI()
+    #     api.mount("/sdapi/", sdapi)
+    #
+    #     # chat APIs needed for compatibility with multiple extensions using OpenAI API
+    #     api.add_api_route(
+    #         "/v1/chat/completions", llm_chat_api, methods=["post"]
+    #     )
+    #     api.add_api_route("/v1/completions", llm_chat_api, methods=["post"])
+    #     api.add_api_route("/chat/completions", llm_chat_api, methods=["post"])
+    #     api.add_api_route("/completions", llm_chat_api, methods=["post"])
+    #     api.add_api_route(
+    #         "/v1/engines/codegen/completions", llm_chat_api, methods=["post"]
+    #     )
+    #     api.include_router(APIRouter())
+    #
+    #     # deal with CORS requests if CORS accept origins are set
+    #     if args.api_accept_origin:
+    #         print(
+    #             f"API Configured for CORS. Accepting origins: { args.api_accept_origin }"
+    #         )
+    #         api.add_middleware(
+    #             CORSMiddleware,
+    #             allow_origins=args.api_accept_origin,
+    #             allow_methods=["GET", "POST"],
+    #             allow_headers=["*"],
+    #         )
+    #     else:
+    #         print("API not configured for CORS")
+    #
+    #     uvicorn.run(api, host="0.0.0.0", port=args.server_port)
+    #     sys.exit(0)
    import gradio as gr

-    # Create custom models folders if they don't exist
-    # from apps.stable_diffusion.web.ui.utils import create_custom_models_folders
-
-    # create_custom_models_folders()
-
    def resource_path(relative_path):
        """Get absolute path to resource, works for dev and for PyInstaller"""
        base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__)))
        return os.path.join(base_path, relative_path)

    dark_theme = resource_path("ui/css/sd_dark_theme.css")
+    gradio_workarounds = resource_path("ui/js/sd_gradio_workarounds.js")

-    # from apps.stable_diffusion.web.ui import (
-    # txt2img_web,
-    # txt2img_custom_model,
-    # txt2img_gallery,
-    # txt2img_png_info_img,
-    # txt2img_status,
-    # txt2img_sendto_img2img,
-    # txt2img_sendto_inpaint,
-    # txt2img_sendto_outpaint,
-    # txt2img_sendto_upscaler,
-    ## h2ogpt_upload,
-    ## h2ogpt_web,
-    # img2img_web,
-    # img2img_custom_model,
-    # img2img_gallery,
-    # img2img_init_image,
-    # img2img_status,
-    # img2img_sendto_inpaint,
-    # img2img_sendto_outpaint,
-    # img2img_sendto_upscaler,
-    # inpaint_web,
-    # inpaint_custom_model,
-    # inpaint_gallery,
-    # inpaint_init_image,
-    # inpaint_status,
-    # inpaint_sendto_img2img,
-    # inpaint_sendto_outpaint,
-    # inpaint_sendto_upscaler,
-    # outpaint_web,
-    # outpaint_custom_model,
-    # outpaint_gallery,
-    # outpaint_init_image,
-    # outpaint_status,
-    # outpaint_sendto_img2img,
-    # outpaint_sendto_inpaint,
-    # outpaint_sendto_upscaler,
-    # upscaler_web,
-    # upscaler_custom_model,
-    # upscaler_gallery,
-    # upscaler_init_image,
-    # upscaler_status,
-    # upscaler_sendto_img2img,
-    # upscaler_sendto_inpaint,
-    # upscaler_sendto_outpaint,
-    ##  lora_train_web,
-    ##  model_web,
-    ##  model_config_web,
-    # hf_models,
-    # modelmanager_sendto_txt2img,
-    # modelmanager_sendto_img2img,
-    # modelmanager_sendto_inpaint,
-    # modelmanager_sendto_outpaint,
-    # modelmanager_sendto_upscaler,
-    # stablelm_chat,
-    # minigpt4_web,
-    # outputgallery_web,
-    # outputgallery_tab_select,
-    # outputgallery_watch,
-    # outputgallery_filename,
-    # outputgallery_sendto_txt2img,
-    # outputgallery_sendto_img2img,
-    # outputgallery_sendto_inpaint,
-    # outputgallery_sendto_outpaint,
-    # outputgallery_sendto_upscaler,
-    # )
-
-    # init global sd pipeline and config
-    # global_obj._init()
+    # from apps.shark_studio.web.ui import load_ui_from_script

    def register_button_click(button, selectedid, inputs, outputs):
        button.click(
@@ -177,17 +159,6 @@ if __name__ == "__main__":
            outputs,
        )

-    def register_modelmanager_button(button, selectedid, inputs, outputs):
-        button.click(
-            lambda x: (
-                "None",
-                x,
-                gr.Tabs.update(selected=selectedid),
-            ),
-            inputs,
-            outputs,
-        )
-
    def register_outputgallery_button(button, selectedid, inputs, outputs):
        button.click(
            lambda x: (
@@ -199,8 +170,19 @@ if __name__ == "__main__":
        )

    with gr.Blocks(
-        css=dark_theme, analytics_enabled=False, title="Shark Studio 2.0 Beta"
-    ) as sd_web:
+        css=dark_theme,
+        js=gradio_workarounds,
+        analytics_enabled=False,
+        title="Shark Studio 2.0",
+    ) as studio_web:
+        amd_logo = Image.open(amdlogo_loc)
+        gr.Image(
+            value=amd_logo,
+            show_label=False,
+            interactive=False,
+            elem_id="tab_bar_logo",
+            show_download_button=False,
+        )
        with gr.Tabs() as tabs:
            # NOTE: If adding, removing, or re-ordering tabs, make sure that they
            # have a unique id that doesn't clash with any of the other tabs,
@@ -211,216 +193,34 @@ if __name__ == "__main__":
            # destination of one of the 'send to' buttons. If you do have to change
            # that id, make sure you update the relevant register_button_click calls
            # further down with the new id.
-            # with gr.TabItem(label="Text-to-Image", id=0):
-            #    txt2img_web.render()
-            # with gr.TabItem(label="Image-to-Image", id=1):
-            #    img2img_web.render()
-            # with gr.TabItem(label="Inpainting", id=2):
-            #    inpaint_web.render()
-            # with gr.TabItem(label="Outpainting", id=3):
-            #    outpaint_web.render()
-            # with gr.TabItem(label="Upscaler", id=4):
-            #    upscaler_web.render()
-            # if args.output_gallery:
-            #    with gr.TabItem(label="Output Gallery", id=5) as og_tab:
-            #        outputgallery_web.render()
+            with gr.TabItem(label="Stable Diffusion", id=0):
+                sd_element.render()
+            with gr.TabItem(label="Output Gallery", id=1):
+                outputgallery_element.render()
+            # with gr.TabItem(label="Chat Bot", id=2):
+            #     chat_element.render()

-            #    # extra output gallery configuration
-            #    outputgallery_tab_select(og_tab.select)
-            #    outputgallery_watch(
-            #        [
-            #            txt2img_status,
-            #            img2img_status,
-            #            inpaint_status,
-            #            outpaint_status,
-            #            upscaler_status,
-            #        ]
-            #    )
-            ##  with gr.TabItem(label="Model Manager", id=6):
-            ##      model_web.render()
-            ##  with gr.TabItem(label="LoRA Training (Experimental)", id=7):
-            ##      lora_train_web.render()
-            with gr.TabItem(label="Chat Bot", id=0):
-                chat_element.render()
-            ##  with gr.TabItem(
-            ##      label="Generate Sharding Config (Experimental)", id=9
-            ##  ):
-            ##      model_config_web.render()
-            # with gr.TabItem(label="MultiModal (Experimental)", id=10):
-            #    minigpt4_web.render()
-            # with gr.TabItem(label="DocuChat Upload", id=11):
-            #     h2ogpt_upload.render()
-            # with gr.TabItem(label="DocuChat(Experimental)", id=12):
-            #     h2ogpt_web.render()
+    studio_web.queue()

-        # send to buttons
-        # register_button_click(
-        #    txt2img_sendto_img2img,
-        #    1,
-        #    [txt2img_gallery],
-        #    [img2img_init_image, tabs],
-        # )
-        # register_button_click(
-        #    txt2img_sendto_inpaint,
-        #    2,
-        #    [txt2img_gallery],
-        #    [inpaint_init_image, tabs],
-        # )
-        # register_button_click(
-        #    txt2img_sendto_outpaint,
-        #    3,
-        #    [txt2img_gallery],
-        #    [outpaint_init_image, tabs],
-        # )
-        # register_button_click(
-        #    txt2img_sendto_upscaler,
-        #    4,
-        #    [txt2img_gallery],
-        #    [upscaler_init_image, tabs],
-        # )
-        # register_button_click(
-        #    img2img_sendto_inpaint,
-        #    2,
-        #    [img2img_gallery],
-        #    [inpaint_init_image, tabs],
-        # )
-        # register_button_click(
-        #    img2img_sendto_outpaint,
-        #    3,
-        #    [img2img_gallery],
-        #    [outpaint_init_image, tabs],
-        # )
-        # register_button_click(
-        #    img2img_sendto_upscaler,
-        #    4,
-        #    [img2img_gallery],
-        #    [upscaler_init_image, tabs],
-        # )
-        # register_button_click(
-        #    inpaint_sendto_img2img,
-        #    1,
-        #    [inpaint_gallery],
-        #    [img2img_init_image, tabs],
-        # )
-        # register_button_click(
-        #    inpaint_sendto_outpaint,
-        #    3,
-        #    [inpaint_gallery],
-        #    [outpaint_init_image, tabs],
-        # )
-        # register_button_click(
-        #    inpaint_sendto_upscaler,
-        #    4,
-        #    [inpaint_gallery],
-        #    [upscaler_init_image, tabs],
-        # )
-        # register_button_click(
-        #    outpaint_sendto_img2img,
-        #    1,
-        #    [outpaint_gallery],
-        #    [img2img_init_image, tabs],
-        # )
-        # register_button_click(
-        #    outpaint_sendto_inpaint,
-        #    2,
-        #    [outpaint_gallery],
-        #    [inpaint_init_image, tabs],
-        # )
-        # register_button_click(
-        #    outpaint_sendto_upscaler,
-        #    4,
-        #    [outpaint_gallery],
-        #    [upscaler_init_image, tabs],
-        # )
-        # register_button_click(
-        #    upscaler_sendto_img2img,
-        #    1,
-        #    [upscaler_gallery],
-        #    [img2img_init_image, tabs],
-        # )
-        # register_button_click(
-        #    upscaler_sendto_inpaint,
-        #    2,
-        #    [upscaler_gallery],
-        #    [inpaint_init_image, tabs],
-        # )
-        # register_button_click(
-        #    upscaler_sendto_outpaint,
-        #    3,
-        #    [upscaler_gallery],
-        #    [outpaint_init_image, tabs],
-        # )
-        # if args.output_gallery:
-        #    register_outputgallery_button(
-        #        outputgallery_sendto_txt2img,
-        #        0,
-        #        [outputgallery_filename],
-        #        [txt2img_png_info_img, tabs],
-        #    )
-        #    register_outputgallery_button(
-        #        outputgallery_sendto_img2img,
-        #        1,
-        #        [outputgallery_filename],
-        #        [img2img_init_image, tabs],
-        #    )
-        #    register_outputgallery_button(
-        #        outputgallery_sendto_inpaint,
-        #        2,
-        #        [outputgallery_filename],
-        #        [inpaint_init_image, tabs],
-        #    )
-        #    register_outputgallery_button(
-        #        outputgallery_sendto_outpaint,
-        #        3,
-        #        [outputgallery_filename],
-        #        [outpaint_init_image, tabs],
-        #    )
-        #    register_outputgallery_button(
-        #        outputgallery_sendto_upscaler,
-        #        4,
-        #        [outputgallery_filename],
-        #        [upscaler_init_image, tabs],
-        #    )
-        # register_modelmanager_button(
-        #    modelmanager_sendto_txt2img,
-        #    0,
-        #    [hf_models],
-        #    [txt2img_custom_model, tabs],
-        # )
-        # register_modelmanager_button(
-        #    modelmanager_sendto_img2img,
-        #    1,
-        #    [hf_models],
-        #    [img2img_custom_model, tabs],
-        # )
-        # register_modelmanager_button(
-        #    modelmanager_sendto_inpaint,
-        #    2,
-        #    [hf_models],
-        #    [inpaint_custom_model, tabs],
-        # )
-        # register_modelmanager_button(
-        #    modelmanager_sendto_outpaint,
-        #    3,
-        #    [hf_models],
-        #    [outpaint_custom_model, tabs],
-        # )
-        # register_modelmanager_button(
-        #    modelmanager_sendto_upscaler,
-        #    4,
-        #    [hf_models],
-        #    [upscaler_custom_model, tabs],
-        # )
-
-    sd_web.queue()
    # if args.ui == "app":
    #    t = Process(
    #        target=launch_app, args=[f"http://localhost:{args.server_port}"]
    #    )
    #    t.start()
-    sd_web.launch(
-        share=True,
+    studio_web.launch(
+        share=cmd_opts.share,
        inbrowser=True,
        server_name="0.0.0.0",
-        server_port=11911,  # args.server_port,
+        server_port=cmd_opts.server_port,
+        favicon_path=amdicon_loc,
    )
+
+
+if __name__ == "__main__":
+    from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+
+    api_only()
+    # if cmd_opts.webui == False:
+    #     api_only()
+    # else:
+    #     webui()
--- a/apps/shark_studio/web/ui/chat.py
+++ b/apps/shark_studio/web/ui/chat.py
@@ -5,13 +5,16 @@ from pathlib import Path
 from datetime import datetime as dt
 import json
 import sys
-from apps.shark_studio.api.utils import (
-    get_available_devices,
-)
 from apps.shark_studio.api.llm import (
    llm_model_map,
    LanguageModel,
 )
+from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+import apps.shark_studio.web.utils.globals as global_obj
+
+B_SYS, E_SYS = "<s>", "</s>"
+
+B_SYS, E_SYS = "<s>", "</s>"

 B_SYS, E_SYS = "<s>", "</s>"

@@ -62,6 +65,7 @@ def chat_fn(
            external_weights="safetensors",
            use_system_prompt=prompt_prefix,
            streaming_llm=streaming_llm,
+            hf_auth_token=cmd_opts.hf_auth_token,
        )
        history[-1][-1] = "Getting the model ready... Done"
        yield history, ""
@@ -99,7 +103,7 @@ with gr.Blocks(title="Chat") as chat_element:
            choices=model_choices,
            allow_custom_value=True,
        )
-        supported_devices = get_available_devices()
+        supported_devices = global_obj.get_device_list()
        enabled = True
        if len(supported_devices) == 0:
            supported_devices = ["cpu-task"]
@@ -133,7 +137,8 @@ with gr.Blocks(title="Chat") as chat_element:
            streaming_llm = gr.Checkbox(
                label="Run in streaming mode (requires recompilation)",
                value=True,
-                interactive=True,
+                interactive=False,
+                visible=False,
            )
            prompt_prefix = gr.Checkbox(
                label="Add System Prompt",
--- a/apps/shark_studio/web/ui/common_events.py
+++ b/apps/shark_studio/web/ui/common_events.py
@@ -0,0 +1,67 @@
+from apps.shark_studio.web.ui.utils import (
+    HSLHue,
+    hsl_color,
+)
+from apps.shark_studio.modules.embeddings import get_lora_metadata
+
+
+# Answers HTML to show the most frequent tags used when a LoRA was trained,
+# taken from the metadata of its .safetensors file.
+def lora_changed(lora_files):
+    # tag frequency percentage, that gets maximum amount of the staring hue
+    TAG_COLOR_THRESHOLD = 0.55
+    # tag frequency percentage, above which a tag is displayed
+    TAG_DISPLAY_THRESHOLD = 0.65
+    # template for the html used to display a tag
+    TAG_HTML_TEMPLATE = (
+        '<span class="lora-tag" style="border: 1px solid {color};">{tag}</span>'
+    )
+    output = []
+    for lora_file in lora_files:
+        if lora_file == "":
+            output.extend(["<div><i>No LoRA selected</i></div>"])
+        elif not lora_file.lower().endswith(".safetensors"):
+            output.extend(
+                [
+                    "<div><i>Only metadata queries for .safetensors files are currently supported</i></div>"
+                ]
+            )
+        else:
+            metadata = get_lora_metadata(lora_file)
+            if metadata:
+                frequencies = metadata["frequencies"]
+                output.extend(
+                    [
+                        "".join(
+                            [
+                                f'<div class="lora-model">Trained against weights in: {metadata["model"]}</div>'
+                            ]
+                            + [
+                                TAG_HTML_TEMPLATE.format(
+                                    color=hsl_color(
+                                        (tag[1] - TAG_COLOR_THRESHOLD)
+                                        / (1 - TAG_COLOR_THRESHOLD),
+                                        start=HSLHue.RED,
+                                        end=HSLHue.GREEN,
+                                    ),
+                                    tag=tag[0],
+                                )
+                                for tag in frequencies
+                                if tag[1] > TAG_DISPLAY_THRESHOLD
+                            ],
+                        )
+                    ]
+                )
+            elif metadata is None:
+                output.extend(
+                    [
+                        "<div><i>This LoRA does not publish tag frequency metadata</i></div>"
+                    ]
+                )
+            else:
+                output.extend(
+                    [
+                        "<div><i>This LoRA has empty tag frequency metadata, or we could not parse it</i></div>"
+                    ]
+                )
+    return output
--- a/apps/shark_studio/web/ui/css/sd_dark_theme.css
+++ b/apps/shark_studio/web/ui/css/sd_dark_theme.css
@@ -0,0 +1,373 @@
+/*
+Apply Gradio dark theme to the default Gradio theme.
+Procedure to upgrade the dark theme:
+- Using your browser, visit http://localhost:8080/?__theme=dark
+- Open your browser inspector, search for the .dark css class
+- Copy .dark class declarations, apply them here into :root
+*/
+
+:root {
+    --body-background-fill: var(--background-fill-primary);
+    --body-text-color: var(--neutral-100);
+    --color-accent-soft: var(--neutral-700);
+    --background-fill-primary: var(--neutral-950);
+    --background-fill-secondary: var(--neutral-900);
+    --border-color-accent: var(--neutral-600);
+    --border-color-primary: var(--neutral-700);
+    --link-text-color-active: var(--secondary-500);
+    --link-text-color: var(--secondary-500);
+    --link-text-color-hover: var(--secondary-400);
+    --link-text-color-visited: var(--secondary-600);
+    --body-text-color-subdued: var(--neutral-400);
+    --shadow-spread: 1px;
+    --block-background-fill: var(--neutral-800);
+    --block-border-color: var(--border-color-primary);
+    --block_border_width: None;
+    --block-info-text-color: var(--body-text-color-subdued);
+    --block-label-background-fill: var(--background-fill-secondary);
+    --block-label-border-color: var(--border-color-primary);
+    --block_label_border_width: None;
+    --block-label-text-color: var(--neutral-200);
+    --block_shadow: None;
+    --block_title_background_fill: None;
+    --block_title_border_color: None;
+    --block_title_border_width: None;
+    --block-title-text-color: var(--neutral-200);
+    --panel-background-fill: var(--background-fill-secondary);
+    --panel-border-color: var(--border-color-primary);
+    --panel_border_width: None;
+    --checkbox-background-color: var(--neutral-800);
+    --checkbox-background-color-focus: var(--checkbox-background-color);
+    --checkbox-background-color-hover: var(--checkbox-background-color);
+    --checkbox-background-color-selected: var(--secondary-600);
+    --checkbox-border-color: var(--neutral-700);
+    --checkbox-border-color-focus: var(--secondary-500);
+    --checkbox-border-color-hover: var(--neutral-600);
+    --checkbox-border-color-selected: var(--secondary-600);
+    --checkbox-border-width: var(--input-border-width);
+    --checkbox-label-background-fill: linear-gradient(to top, var(--neutral-900), var(--neutral-800));
+    --checkbox-label-background-fill-hover: linear-gradient(to top, var(--neutral-900), var(--neutral-800));
+    --checkbox-label-background-fill-selected: var(--checkbox-label-background-fill);
+    --checkbox-label-border-color: var(--border-color-primary);
+    --checkbox-label-border-color-hover: var(--checkbox-label-border-color);
+    --checkbox-label-border-width: var(--input-border-width);
+    --checkbox-label-text-color: var(--body-text-color);
+    --checkbox-label-text-color-selected: var(--checkbox-label-text-color);
+    --error-background-fill: var(--background-fill-primary);
+    --error-border-color: var(--border-color-primary);
+    --error_border_width: None;
+    --error-text-color: #ef4444;
+    --input-background-fill: var(--neutral-800);
+    --input-background-fill-focus: var(--secondary-600);
+    --input-background-fill-hover: var(--input-background-fill);
+    --input-border-color: var(--border-color-primary);
+    --input-border-color-focus: var(--neutral-700);
+    --input-border-color-hover: var(--input-border-color);
+    --input_border_width: None;
+    --input-placeholder-color: var(--neutral-500);
+    --input_shadow: None;
+    --input-shadow-focus: 0 0 0 var(--shadow-spread) var(--neutral-700), var(--shadow-inset);
+    --loader_color: None;
+    --slider_color: None;
+    --stat-background-fill: linear-gradient(to right, var(--primary-400), var(--primary-600));
+    --table-border-color: var(--neutral-700);
+    --table-even-background-fill: var(--neutral-950);
+    --table-odd-background-fill: var(--neutral-900);
+    --table-row-focus: var(--color-accent-soft);
+    --button-border-width: var(--input-border-width);
+    --button-cancel-background-fill: linear-gradient(to bottom right, #dc2626, #b91c1c);
+    --button-cancel-background-fill-hover: linear-gradient(to bottom right, #dc2626, #dc2626);
+    --button-cancel-border-color: #dc2626;
+    --button-cancel-border-color-hover: var(--button-cancel-border-color);
+    --button-cancel-text-color: white;
+    --button-cancel-text-color-hover: var(--button-cancel-text-color);
+    --button-primary-background-fill: linear-gradient(to bottom right, var(--primary-500), var(--primary-600));
+    --button-primary-background-fill-hover: linear-gradient(to bottom right, var(--primary-500), var(--primary-500));
+    --button-primary-border-color: var(--primary-500);
+    --button-primary-border-color-hover: var(--button-primary-border-color);
+    --button-primary-text-color: white;
+    --button-primary-text-color-hover: var(--button-primary-text-color);
+    --button-secondary-background-fill: linear-gradient(to bottom right, var(--neutral-600), var(--neutral-700));
+    --button-secondary-background-fill-hover: linear-gradient(to bottom right, var(--neutral-600), var(--neutral-600));
+    --button-secondary-border-color: var(--neutral-600);
+    --button-secondary-border-color-hover: var(--button-secondary-border-color);
+    --button-secondary-text-color: white;
+    --button-secondary-text-color-hover: var(--button-secondary-text-color);
+    --block-border-width: 1px;
+    --block-label-border-width: 1px;
+    --form-gap-width: 1px;
+    --error-border-width: 1px;
+    --input-border-width: 1px;
+}
+
+/* SHARK theme */
+body {
+    background-color: var(--background-fill-primary);
+}
+
+.generating.svelte-zlszon.svelte-zlszon {
+    border: none;
+}
+
+.generating {
+    border: none !important;
+}
+
+#chatbot {
+    height: 100% !important;
+}
+
+/* display in full width for desktop devices, but see below */
+@media (min-width: 1536px)
+{
+    .gradio-container {
+        max-width: var(--size-full) !important;
+    }
+}
+
+/* media rules in custom css are don't appear to be applied in
+   gradio versions > 4.7, so we have to define a class which
+   we will manually need add and remove using javascript.
+   Remove this once this fixed in gradio.
+*/
+.gradio-container-size-full {
+    max-width: var(--size-full) !important;
+}
+
+.gradio-container .contain {
+    padding: 0 var(--size-4) !important;
+}
+
+#top_logo {
+    color: transparent;
+    background-color: transparent;
+    border-radius: 0 !important;
+    border: 0;
+}
+
+#ui_title {
+    padding: var(--size-2) 0 0 var(--size-1);
+}
+
+#demo_title_outer {
+    border-radius: 0;
+}
+
+#prompt_box_outer div:first-child {
+    border-radius: 0 !important
+}
+
+#prompt_box textarea, #negative_prompt_box textarea {
+    background-color: var(--background-fill-primary) !important;
+}
+
+#prompt_examples {
+    margin: 0 !important;
+}
+
+#prompt_examples svg {
+    display: none !important;
+}
+
+#ui_body {
+    padding: var(--size-2) !important;
+    border-radius: 0.5em !important;
+}
+
+#img_result+div {
+    display: none !important;
+}
+
+footer {
+    display: none !important;
+}
+
+#gallery + div {
+    border-radius: 0 !important;
+}
+
+/* Gallery: Remove the default square ratio thumbnail and limit images height to the container */
+#gallery .thumbnail-item.thumbnail-lg {
+    aspect-ratio: unset;
+    max-height: calc(55vh - (2 * var(--spacing-lg)));
+}
+/* fix width and height of gallery items when on very large desktop screens, but see below */
+@media (min-width: 1921px) {
+    /* Force a 768px_height + 4px_margin_height + navbar_height for the gallery */
+    #gallery .grid-wrap, #gallery .preview{
+        min-height: calc(768px + 4px + var(--size-14));
+        max-height: calc(768px + 4px + var(--size-14));
+    }
+    /* Limit height to 768px_height + 2px_margin_height for the thumbnails */
+    #gallery .thumbnail-item.thumbnail-lg {
+        max-height: 770px !important;
+    }
+}
+
+/* media rules in custom css are don't appear to be applied in
+   gradio versions > 4.7, so we have to define classes which
+   we will manually need add and remove using javascript.
+   Remove this once this fixed in gradio.
+*/
+.gallery-force-height768 .grid-wrap, .gallery-force-height768 .preview {
+    min-height: calc(768px + 4px + var(--size-14)) !important;
+    max-height: calc(768px + 4px + var(--size-14)) !important;
+}
+.gallery-limit-height768 .thumbnail-item.thumbnail-lg {
+    max-height: 770px !important;
+}
+
+/* Don't upscale when viewing in solo image mode */
+#gallery .preview img {
+    object-fit: scale-down;
+}
+/* Navbar images in cover mode*/
+#gallery .preview .thumbnail-item img {
+    object-fit: cover;
+}
+
+/* Limit the stable diffusion text output height */
+#std_output textarea {
+    max-height: 215px;
+}
+
+/* Prevent progress bar to block gallery navigation while building images (Gradio V3.19.0) */
+#gallery .wrap.default {
+    pointer-events: none;
+}
+
+/* Import Png info box */
+#txt2img_prompt_image {
+    height: var(--size-32) !important;
+}
+
+/* Hide "remove buttons" from ui dropdowns */
+#custom_model .token-remove.remove-all,
+#lora_weights .token-remove.remove-all,
+#scheduler .token-remove.remove-all,
+#device .token-remove.remove-all,
+#stencil_model .token-remove.remove-all {
+    display: none;
+}
+
+/* Hide selected items from ui dropdowns */
+#custom_model .options .item .inner-item,
+#scheduler .options .item .inner-item,
+#device .options .item .inner-item,
+#stencil_model .options .item .inner-item {
+    display:none;
+}
+
+/* workarounds for container=false not currently working for dropdowns */
+.dropdown_no_container {
+    padding: 0 !important;
+}
+
+#output_subdir_container :first-child {
+    border: none;
+}
+
+/* reduced animation load when generating */
+.generating {
+    animation-play-state: paused !important;
+}
+
+/* better clarity when progress bars are minimal */
+.meta-text {
+    background-color: var(--block-label-background-fill);
+}
+
+/* lora tag pills */
+.lora-tags {
+    border: 1px solid var(--border-color-primary);
+    color: var(--block-info-text-color) !important;
+    padding: var(--block-padding);
+}
+
+.lora-tag {
+    display: inline-block;
+    height: 2em;
+    color: rgb(212 212 212) !important;
+    margin-right: 5pt;
+    margin-bottom: 5pt;
+    padding: 2pt 5pt;
+    border-radius: 5pt;
+    white-space: nowrap;
+}
+
+.lora-model {
+    margin-bottom: var(--spacing-lg);
+    color: var(--block-info-text-color) !important;
+    line-height: var(--line-sm);
+}
+
+/* output gallery tab */
+.output_parameters_dataframe table.table {
+    /* works around a gradio bug that always shows scrollbars */
+    overflow: clip auto;
+}
+
+.output_parameters_dataframe tbody td {
+    font-size: small;
+    line-height: var(--line-xs);
+}
+
+.output_icon_button {
+    max-width: 30px;
+    align-self: end;
+    padding-bottom: 8px;
+}
+
+.outputgallery_sendto {
+    min-width: 7em !important;
+}
+
+/* output gallery should take up most of the viewport height regardless of image size/number */
+#outputgallery_gallery .fixed-height {
+    min-height: 89vh !important;
+}
+
+.sd-right-panel {
+    height: calc(100vmin - var(--size-32) - var(--size-10)) !important;
+    overflow-y: scroll;
+}
+
+.sd-right-panel .fill {
+    flex: 1;
+}
+
+/* don't stretch non-square images to be square, breaking their aspect ratio */
+#outputgallery_gallery .thumbnail-item.thumbnail-lg > img {
+    object-fit: contain !important;
+}
+
+/* centered logo for when there are no images */
+#top_logo.logo_centered {
+    height: 100%;
+    width: 100%;
+}
+
+#top_logo.logo_centered img {
+    object-fit: scale-down;
+    position: absolute;
+    width: 80%;
+    top: 50%;
+    left: 50%;
+    transform: translate(-50%, -50%);
+}
+
+#tab_bar_logo {
+    overflow: visible !important;
+    border-width: 0 !important;
+    height: 0px !important;
+    padding: 0;
+    margin: 0;
+}
+
+#tab_bar_logo .image-container {
+    object-fit: scale-down;
+    position: absolute !important;
+    top: 10px;
+    right: 0px;
+    height: 36px;
+}
--- a/apps/shark_studio/web/ui/js/sd_gradio_workarounds.js
+++ b/apps/shark_studio/web/ui/js/sd_gradio_workarounds.js
@@ -0,0 +1,49 @@
+// workaround gradio after 4.7, not applying any @media rules form the custom .css file
+
+() => {
+    console.log(`innerWidth: ${window.innerWidth}` )
+
+    // 1536px rules
+
+    const mediaQuery1536 = window.matchMedia('(min-width: 1536px)')
+
+    function handleWidth1536(event) {
+
+        // display in full width for desktop devices
+        document.querySelectorAll(".gradio-container")
+            .forEach( (node) => {
+                if (event.matches) {
+                    node.classList.add("gradio-container-size-full");
+                } else {
+                    node.classList.remove("gradio-container-size-full")
+                }
+            });
+    }
+
+    mediaQuery1536.addEventListener("change", handleWidth1536);
+    mediaQuery1536.dispatchEvent(new MediaQueryListEvent("change", {matches: window.innerWidth >= 1536}));
+
+    // 1921px rules
+
+    const mediaQuery1921 = window.matchMedia('(min-width: 1921px)')
+
+    function handleWidth1921(event) {
+
+        /* Force a 768px_height + 4px_margin_height + navbar_height for the gallery */
+        /* Limit height to 768px_height + 2px_margin_height for the thumbnails */
+        document.querySelectorAll("#gallery")
+            .forEach( (node) => {
+                if (event.matches) {
+                    node.classList.add("gallery-force-height768");
+                    node.classList.add("gallery-limit-height768");
+                } else {
+                    node.classList.remove("gallery-force-height768");
+                    node.classList.remove("gallery-limit-height768");
+                }
+            });
+    }
+
+    mediaQuery1921.addEventListener("change", handleWidth1921);
+    mediaQuery1921.dispatchEvent(new MediaQueryListEvent("change", {matches: window.innerWidth >= 1921}));
+
+}
--- a/apps/shark_studio/web/ui/logos/amd-icon.jpg
+++ b/apps/shark_studio/web/ui/logos/amd-icon.jpg
--- a/apps/shark_studio/web/ui/logos/amd-logo.jpg
+++ b/apps/shark_studio/web/ui/logos/amd-logo.jpg
--- a/apps/shark_studio/web/ui/outputgallery.py
+++ b/apps/shark_studio/web/ui/outputgallery.py
@@ -0,0 +1,406 @@
+import glob
+import gradio as gr
+import os
+import subprocess
+import sys
+from PIL import Image
+
+from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+from apps.shark_studio.web.utils.file_utils import (
+    get_generated_imgs_path,
+    get_generated_imgs_todays_subdir,
+)
+from apps.shark_studio.web.ui.utils import amdlogo_loc
+from apps.shark_studio.web.utils.metadata import displayable_metadata
+
+# -- Functions for file, directory and image info querying
+
+output_dir = get_generated_imgs_path()
+
+
+def outputgallery_filenames(subdir) -> list[str]:
+    new_dir_path = os.path.join(output_dir, subdir)
+    if os.path.exists(new_dir_path):
+        filenames = [
+            glob.glob(new_dir_path + "/" + ext) for ext in ("*.png", "*.jpg", "*.jpeg")
+        ]
+
+        return sorted(sum(filenames, []), key=os.path.getmtime, reverse=True)
+    else:
+        return []
+
+
+def output_subdirs() -> list[str]:
+    # Gets a list of subdirectories of output_dir and below, as relative paths.
+    relative_paths = [
+        os.path.relpath(entry[0], output_dir)
+        for entry in os.walk(
+            output_dir, followlinks=cmd_opts.output_gallery_followlinks
+        )
+    ]
+
+    # It is less confusing to always including the subdir that will take any
+    # images generated today even if it doesn't exist yet
+    if get_generated_imgs_todays_subdir() not in relative_paths:
+        relative_paths.append(get_generated_imgs_todays_subdir())
+
+    # sort subdirectories so that the date named ones we probably
+    # created in this or previous sessions come first, sorted with the most
+    # recent first. Other subdirs are listed after.
+    generated_paths = sorted(
+        [path for path in relative_paths if path.isnumeric()], reverse=True
+    )
+    result_paths = generated_paths + sorted(
+        [path for path in relative_paths if (not path.isnumeric()) and path != "."]
+    )
+
+    return result_paths
+
+
+# --- Define UI layout for Gradio
+
+with gr.Blocks() as outputgallery_element:
+    amd_logo = Image.open(amdlogo_loc)
+
+    with gr.Row(elem_id="outputgallery_gallery"):
+        # needed to workaround gradio issue:
+        # https://github.com/gradio-app/gradio/issues/2907
+        dev_null = gr.Textbox("", visible=False)
+
+        gallery_files = gr.State(value=[])
+        subdirectory_paths = gr.State(value=[])
+
+        with gr.Column(scale=6):
+            logo = gr.Image(
+                label="Getting subdirectories...",
+                value=amd_logo,
+                interactive=False,
+                visible=True,
+                show_label=True,
+                elem_id="top_logo",
+                elem_classes="logo_centered",
+                show_download_button=False,
+            )
+
+            gallery = gr.Gallery(
+                label="",
+                value=gallery_files.value,
+                visible=False,
+                show_label=True,
+                columns=4,
+            )
+
+        with gr.Column(scale=4):
+            with gr.Group():
+                with gr.Row():
+                    with gr.Column(
+                        scale=15,
+                        min_width=160,
+                        elem_id="output_subdir_container",
+                    ):
+                        subdirectories = gr.Dropdown(
+                            label=f"Subdirectories of {output_dir}",
+                            type="value",
+                            choices=subdirectory_paths.value,
+                            value="",
+                            interactive=True,
+                            elem_classes="dropdown_no_container",
+                            allow_custom_value=True,
+                        )
+                    with gr.Column(
+                        scale=1,
+                        min_width=32,
+                        elem_classes="output_icon_button",
+                    ):
+                        open_subdir = gr.Button(
+                            variant="secondary",
+                            value="\U0001F5C1",  # unicode open folder
+                            interactive=False,
+                            size="sm",
+                        )
+                    with gr.Column(
+                        scale=1,
+                        min_width=32,
+                        elem_classes="output_icon_button",
+                    ):
+                        refresh = gr.Button(
+                            variant="secondary",
+                            value="\u21BB",  # unicode clockwise arrow circle
+                            size="sm",
+                        )
+
+            image_columns = gr.Slider(
+                label="Columns shown", value=4, minimum=1, maximum=16, step=1
+            )
+            outputgallery_filename = gr.Textbox(
+                label="Filename",
+                value="None",
+                interactive=False,
+                show_copy_button=True,
+            )
+
+            with gr.Accordion(
+                label="Parameter Information", open=False
+            ) as parameters_accordian:
+                image_parameters = gr.DataFrame(
+                    headers=["Parameter", "Value"],
+                    col_count=2,
+                    wrap=True,
+                    elem_classes="output_parameters_dataframe",
+                    value=[["Status", "No image selected"]],
+                    interactive=True,
+                )
+
+            with gr.Accordion(label="Send To", open=True):
+                with gr.Row():
+                    outputgallery_sendto_sd = gr.Button(
+                        value="Stable Diffusion",
+                        interactive=False,
+                        elem_classes="outputgallery_sendto",
+                        size="sm",
+                    )
+
+    # --- Event handlers
+
+    def on_clear_gallery():
+        return [
+            gr.Gallery(
+                value=[],
+                visible=False,
+            ),
+            gr.Image(
+                visible=True,
+            ),
+        ]
+
+    def on_image_columns_change(columns):
+        return gr.Gallery(columns=columns)
+
+    def on_select_subdir(subdir) -> list:
+        # evt.value is the subdirectory name
+        new_images = outputgallery_filenames(subdir)
+        new_label = f"{len(new_images)} images in {os.path.join(output_dir, subdir)}"
+        return [
+            new_images,
+            gr.Gallery(
+                value=new_images,
+                label=new_label,
+                visible=len(new_images) > 0,
+            ),
+            gr.Image(
+                label=new_label,
+                visible=len(new_images) == 0,
+            ),
+        ]
+
+    def on_open_subdir(subdir):
+        subdir_path = os.path.normpath(os.path.join(output_dir, subdir))
+
+        if os.path.isdir(subdir_path):
+            if sys.platform == "linux":
+                subprocess.run(["xdg-open", subdir_path])
+            elif sys.platform == "darwin":
+                subprocess.run(["open", subdir_path])
+            elif sys.platform == "win32":
+                os.startfile(subdir_path)
+
+    def on_refresh(current_subdir: str) -> list:
+        # get an up-to-date subdirectory list
+        refreshed_subdirs = output_subdirs()
+        # get the images using either the current subdirectory or the most
+        # recent valid one
+        new_subdir = (
+            current_subdir
+            if current_subdir in refreshed_subdirs
+            else refreshed_subdirs[0]
+        )
+        new_images = outputgallery_filenames(new_subdir)
+        new_label = (
+            f"{len(new_images)} images in " f"{os.path.join(output_dir, new_subdir)}"
+        )
+
+        return [
+            gr.Dropdown(
+                choices=refreshed_subdirs,
+                value=new_subdir,
+            ),
+            refreshed_subdirs,
+            new_images,
+            gr.Gallery(value=new_images, label=new_label, visible=len(new_images) > 0),
+            gr.Image(
+                label=new_label,
+                visible=len(new_images) == 0,
+            ),
+        ]
+
+    def on_new_image(subdir, subdir_paths, status) -> list:
+        # prevent error triggered when an image generates before the tab
+        # has even been selected
+        subdir_paths = (
+            subdir_paths
+            if len(subdir_paths) > 0
+            else [get_generated_imgs_todays_subdir()]
+        )
+
+        # only update if the current subdir is the most recent one as
+        # new images only go there
+        if subdir_paths[0] == subdir:
+            new_images = outputgallery_filenames(subdir)
+            new_label = (
+                f"{len(new_images)} images in "
+                f"{os.path.join(output_dir, subdir)} - {status}"
+            )
+
+            return [
+                new_images,
+                gr.Gallery(
+                    value=new_images,
+                    label=new_label,
+                    visible=len(new_images) > 0,
+                ),
+                gr.Image(
+                    label=new_label,
+                    visible=len(new_images) == 0,
+                ),
+            ]
+        else:
+            # otherwise change nothing,
+            # (only untyped gradio gr.update() does this)
+            return [gr.update(), gr.update(), gr.update()]
+
+    def on_select_image(images: list[str], evt: gr.SelectData) -> list:
+        # evt.index is an index into the full list of filenames for
+        # the current subdirectory
+        filename = images[evt.index]
+        params = displayable_metadata(filename)
+
+        if params:
+            if params["source"] == "missing":
+                return [
+                    "Could not find this image file, refresh the gallery and update the images",
+                    [["Status", "File missing"]],
+                ]
+            else:
+                return [
+                    filename,
+                    list(map(list, params["parameters"].items())),
+                ]
+
+        return [
+            filename,
+            [["Status", "No parameters found"]],
+        ]
+
+    def on_outputgallery_filename_change(filename: str) -> list:
+        exists = filename != "None" and os.path.exists(filename)
+        return [
+            # disable or enable each of the sendto button based on whether
+            # an image is selected
+            gr.Button(interactive=exists),
+        ]
+
+    # The time first our tab is selected we need to do an initial refresh
+    # to populate the subdirectory select box and the images from the most
+    # recent subdirectory.
+    #
+    # We do it at this point rather than setting this up in the controls'
+    # definitions as when you refresh the browser you always get what was
+    # *initially* set, which won't include any new subdirectories or images
+    # that might have created since the application was started. Doing it
+    # this way means a browser refresh/reload always gets the most
+    # up-to-date data.
+    def on_select_tab(subdir_paths, request: gr.Request):
+        local_client = request.headers["host"].startswith(
+            "127.0.0.1:"
+        ) or request.headers["host"].startswith("localhost:")
+
+        if len(subdir_paths) == 0:
+            return on_refresh("") + [gr.update(interactive=local_client)]
+        else:
+            return (
+                # Change nothing, (only untyped gr.update() does this)
+                gr.update(),
+                gr.update(),
+                gr.update(),
+                gr.update(),
+                gr.update(),
+                gr.update(),
+            )
+
+    # clearing images when we need to completely change what's in the
+    # gallery avoids current images being shown replacing piecemeal and
+    # prevents weirdness and errors if the user selects an image during the
+    # replacement phase.
+    clear_gallery = dict(
+        fn=on_clear_gallery,
+        inputs=None,
+        outputs=[gallery, logo],
+        queue=False,
+    )
+
+    subdirectories.select(**clear_gallery).then(
+        on_select_subdir,
+        [subdirectories],
+        [gallery_files, gallery, logo],
+        queue=False,
+    )
+
+    open_subdir.click(on_open_subdir, inputs=[subdirectories], queue=False)
+
+    refresh.click(**clear_gallery).then(
+        on_refresh,
+        [subdirectories],
+        [subdirectories, subdirectory_paths, gallery_files, gallery, logo],
+        queue=False,
+    )
+
+    image_columns.change(
+        fn=on_image_columns_change,
+        inputs=[image_columns],
+        outputs=[gallery],
+        queue=False,
+    )
+
+    gallery.select(
+        on_select_image,
+        [gallery_files],
+        [outputgallery_filename, image_parameters],
+        queue=False,
+    )
+
+    outputgallery_filename.change(
+        on_outputgallery_filename_change,
+        [outputgallery_filename],
+        [
+            outputgallery_sendto_sd,
+        ],
+        queue=False,
+    )
+
+    # We should have been given the .select function for our tab, so set it up
+    def outputgallery_tab_select(select):
+        select(
+            fn=on_select_tab,
+            inputs=[subdirectory_paths],
+            outputs=[
+                subdirectories,
+                subdirectory_paths,
+                gallery_files,
+                gallery,
+                logo,
+                open_subdir,
+            ],
+            queue=False,
+        )
+
+    # We should have been passed a list of components on other tabs that update
+    # when a new image has generated on that tab, so set things up so the user
+    # will see that new image if they are looking at today's subdirectory
+    def outputgallery_watch(components: gr.Textbox):
+        for component in components:
+            component.change(
+                on_new_image,
+                inputs=[subdirectories, subdirectory_paths, component],
+                outputs=[gallery_files, gallery, logo],
+                queue=False,
+            )
--- a/apps/shark_studio/web/ui/sd.py
+++ b/apps/shark_studio/web/ui/sd.py
@@ -0,0 +1,866 @@
+import os
+import json
+import gradio as gr
+import numpy as np
+from inspect import signature
+from PIL import Image
+from pathlib import Path
+from datetime import datetime as dt
+from gradio.components.image_editor import (
+    EditorValue,
+)
+from apps.shark_studio.web.utils.file_utils import (
+    get_generated_imgs_path,
+    get_checkpoints_path,
+    get_checkpoints,
+    get_configs_path,
+    get_configs,
+    write_default_sd_configs,
+)
+from apps.shark_studio.api.sd import (
+    shark_sd_fn_dict_input,
+    cancel_sd,
+    unload_sd,
+)
+from apps.shark_studio.api.controlnet import (
+    cnet_preview,
+)
+from apps.shark_studio.modules.schedulers import (
+    scheduler_model_map,
+)
+from apps.shark_studio.modules.img_processing import (
+    resampler_list,
+    resize_stencil,
+)
+from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+from apps.shark_studio.web.ui.utils import (
+    amdlogo_loc,
+    none_to_str_none,
+    str_none_to_none,
+)
+from apps.shark_studio.web.utils.state import (
+    status_label,
+)
+from apps.shark_studio.web.ui.common_events import lora_changed
+from apps.shark_studio.modules import logger
+import apps.shark_studio.web.utils.globals as global_obj
+
+# Disabled some models for demo purposes
+sd_default_models = [
+    # "runwayml/stable-diffusion-v1-5",
+    # "stabilityai/stable-diffusion-2-1-base",
+    # "stabilityai/stable-diffusion-2-1",
+    # "stabilityai/stable-diffusion-xl-base-1.0",
+    # "stabilityai/sdxl-turbo",
+]
+sd_default_models.extend(get_checkpoints(model_type="scripts"))
+
+
+def view_json_file(file_path):
+    content = ""
+    with open(file_path, "r") as fopen:
+        content = fopen.read()
+    return content
+
+
+def submit_to_cnet_config(
+    stencil: str,
+    preprocessed_hint: str,
+    cnet_strength: int,
+    control_mode: str,
+    curr_config: dict,
+):
+    if any(i in [None, ""] for i in [stencil, preprocessed_hint]):
+        return gr.update()
+    if curr_config is not None:
+        if "controlnets" in curr_config:
+            curr_config["controlnets"]["control_mode"] = control_mode
+            curr_config["controlnets"]["model"].append(stencil)
+            curr_config["controlnets"]["hint"].append(preprocessed_hint)
+            curr_config["controlnets"]["strength"].append(cnet_strength)
+            return curr_config
+
+    cnet_map = {}
+    cnet_map["controlnets"] = {
+        "control_mode": control_mode,
+        "model": [stencil],
+        "hint": [preprocessed_hint],
+        "strength": [cnet_strength],
+    }
+    return cnet_map
+
+
+def update_embeddings_json(embedding):
+    return {"embeddings": [embedding]}
+
+
+def submit_to_main_config(input_cfg: dict, main_cfg: dict):
+    if main_cfg in [None, "", {}]:
+        return input_cfg
+
+    for base_key in input_cfg:
+        main_cfg[base_key] = input_cfg[base_key]
+    return main_cfg
+
+
+def pull_sd_configs(
+    prompt,
+    negative_prompt,
+    sd_init_image,
+    height,
+    width,
+    steps,
+    strength,
+    guidance_scale,
+    seed,
+    batch_count,
+    batch_size,
+    scheduler,
+    base_model_id,
+    custom_weights,
+    custom_vae,
+    precision,
+    device,
+    target_triple,
+    ondemand,
+    compiled_pipeline,
+    resample_type,
+    controlnets,
+    embeddings,
+):
+    sd_args = str_none_to_none(locals())
+    sd_cfg = {}
+    for arg in sd_args:
+        if arg in [
+            "prompt",
+            "negative_prompt",
+            "sd_init_image",
+        ]:
+            sd_cfg[arg] = [sd_args[arg]]
+        elif arg in ["controlnets", "embeddings"]:
+            if isinstance(arg, dict):
+                sd_cfg[arg] = json.loads(sd_args[arg])
+            else:
+                sd_cfg[arg] = {}
+        else:
+            sd_cfg[arg] = sd_args[arg]
+
+    return json.dumps(sd_cfg)
+
+
+def load_sd_cfg(sd_json: dict, load_sd_config: str):
+    if os.path.exists(load_sd_config):
+        config = load_sd_config
+    elif os.path.exists(os.path.join(get_configs_path(), load_sd_config)):
+        config = os.path.join(get_configs_path(), load_sd_config)
+    else:
+        print(
+            "Default config not found as absolute path or in configs folder. Using sdxl-turbo as default config."
+        )
+        config = sd_json
+    new_sd_config = none_to_str_none(json.loads(view_json_file(config)))
+    if sd_json:
+        for key in new_sd_config:
+            sd_json[key] = new_sd_config[key]
+    else:
+        sd_json = new_sd_config
+    for i in sd_json["sd_init_image"]:
+        if i is not None:
+            if os.path.isfile(i):
+                sd_image = [Image.open(i, mode="r")]
+    else:
+        sd_image = None
+    if not sd_json["device"]:
+        sd_json["device"] = gr.update()
+
+    return [
+        sd_json["prompt"][0],
+        sd_json["negative_prompt"][0],
+        sd_image,
+        sd_json["height"],
+        sd_json["width"],
+        gr.update(),
+        sd_json["strength"],
+        sd_json["guidance_scale"],
+        sd_json["seed"],
+        sd_json["batch_count"],
+        sd_json["batch_size"],
+        sd_json["scheduler"],
+        sd_json["base_model_id"],
+        sd_json["custom_weights"],
+        sd_json["custom_vae"],
+        sd_json["precision"],
+        sd_json["device"],
+        sd_json["target_triple"],
+        sd_json["ondemand"],
+        sd_json["compiled_pipeline"],
+        sd_json["resample_type"],
+        sd_json["controlnets"],
+        sd_json["embeddings"],
+        sd_json,
+    ]
+
+
+def save_sd_cfg(config: dict, save_name: str):
+    if os.path.exists(save_name):
+        filepath = save_name
+    elif cmd_opts.configs_path:
+        filepath = os.path.join(cmd_opts.configs_path, save_name)
+    else:
+        filepath = os.path.join(get_configs_path(), save_name)
+    if ".json" not in filepath:
+        filepath += ".json"
+    with open(filepath, mode="w") as f:
+        f.write(json.dumps(config))
+    return save_name
+
+
+def create_canvas(width, height):
+    data = Image.fromarray(
+        np.zeros(
+            shape=(height, width, 3),
+            dtype=np.uint8,
+        )
+        + 255
+    )
+    img_dict = {
+        "background": data,
+        "layers": [],
+        "composite": None,
+    }
+    return EditorValue(img_dict)
+
+
+def import_original(original_img, width, height):
+    if original_img is None:
+        resized_img = create_canvas(width, height)
+        return resized_img
+    else:
+        resized_img, _, _ = resize_stencil(original_img, width, height)
+        img_dict = {
+            "background": resized_img,
+            "layers": [],
+            "composite": None,
+        }
+        return EditorValue(img_dict)
+
+
+def base_model_changed(base_model_id):
+    new_choices = get_checkpoints(
+        os.path.join("checkpoints", os.path.basename(str(base_model_id)))
+    ) + get_checkpoints(model_type="checkpoints")
+    if "turbo" in base_model_id:
+        new_steps = gr.Dropdown(
+            value=2,
+            choices=[1, 2],
+            label="\U0001F3C3\U0000FE0F Steps",
+            allow_custom_value=True,
+        )
+    if "stable-diffusion-xl-base-1.0" in base_model_id:
+        new_steps = gr.Dropdown(
+            value=40,
+            choices=[20, 25, 30, 35, 40, 45, 50],
+            label="\U0001F3C3\U0000FE0F Steps",
+            allow_custom_value=True,
+        )
+    elif ".py" in base_model_id:
+        new_steps = gr.Dropdown(
+            value=20,
+            choices=[10, 15, 20],
+            label="\U0001F3C3\U0000FE0F Steps",
+            allow_custom_value=True,
+        )
+    else:
+        new_steps = gr.Dropdown(
+            value=20,
+            choices=[10, 20, 30, 40, 50],
+            label="\U0001F3C3\U0000FE0F Steps",
+            allow_custom_value=True,
+        )
+
+    return [
+        gr.Dropdown(
+            value=new_choices[0] if len(new_choices) > 0 else "None",
+            choices=["None"] + new_choices,
+        ),
+        new_steps,
+    ]
+
+
+init_config = global_obj.get_init_config()
+init_config = none_to_str_none(json.loads(view_json_file(init_config)))
+
+with gr.Blocks(title="Stable Diffusion") as sd_element:
+    with gr.Column(elem_id="ui_body"):
+        with gr.Row():
+            with gr.Column(scale=2, min_width=600):
+                with gr.Group(elem_id="prompt_box_outer"):
+                    prompt = gr.Textbox(
+                        label="\U00002795\U0000FE0F Prompt",
+                        value=init_config["prompt"][0],
+                        lines=4,
+                        elem_id="prompt_box",
+                        show_copy_button=True,
+                    )
+                    negative_prompt = gr.Textbox(
+                        label="\U00002796\U0000FE0F Negative Prompt",
+                        value=init_config["negative_prompt"][0],
+                        lines=4,
+                        elem_id="negative_prompt_box",
+                        show_copy_button=True,
+                    )
+                with gr.Accordion(
+                    label="\U0001F4D0\U0000FE0F Advanced Settings", open=False
+                ):
+                    with gr.Accordion(label="Device Settings", open=False):
+                        device = gr.Dropdown(
+                            elem_id="device",
+                            label="Device",
+                            value=(
+                                init_config["device"]
+                                if init_config["device"]
+                                else "rocm"
+                            ),
+                            choices=global_obj.get_device_list(),
+                            allow_custom_value=True,
+                        )
+                        target_triple = gr.Textbox(
+                            elem_id="target_triple",
+                            label="Architecture",
+                            value=init_config["target_triple"],
+                        )
+                        with gr.Row():
+                            ondemand = gr.Checkbox(
+                                value=init_config["ondemand"],
+                                label="Low VRAM",
+                                interactive=True,
+                                visible=False,
+                            )
+                            precision = gr.Radio(
+                                label="Precision",
+                                value=init_config["precision"],
+                                choices=[
+                                    "fp16",
+                                    "fp32",
+                                ],
+                                visible=False,
+                            )
+                    with gr.Row():
+                        height = gr.Slider(
+                            512,
+                            1024,
+                            value=512,
+                            step=512,
+                            label="\U00002195\U0000FE0F Height",
+                            interactive=False,  # DEMO
+                            visible=False,  # DEMO
+                        )
+                        width = gr.Slider(
+                            512,
+                            1024,
+                            value=512,
+                            step=512,
+                            label="\U00002194\U0000FE0F Width",
+                            interactive=False,  # DEMO
+                            visible=False,  # DEMO
+                        )
+
+                    with gr.Accordion(
+                        label="\U0001F9EA\U0000FE0F Input Image Processing",
+                        open=False,
+                        visible=False,
+                    ):
+                        strength = gr.Slider(
+                            0,
+                            1,
+                            value=init_config["strength"],
+                            step=0.01,
+                            label="Denoising Strength",
+                        )
+                        resample_type = gr.Dropdown(
+                            value=init_config["resample_type"],
+                            choices=resampler_list,
+                            label="Resample Type",
+                            allow_custom_value=True,
+                        )
+                    with gr.Row():
+                        sd_model_info = (
+                            f"Checkpoint Path: {str(get_checkpoints_path())}"
+                        )
+                        base_model_id = gr.Dropdown(
+                            label="\U000026F0\U0000FE0F Base Model",
+                            info="Select or enter HF model ID",
+                            elem_id="custom_model",
+                            value=init_config["base_model_id"],
+                            choices=sd_default_models,
+                            allow_custom_value=True,
+                        )  # base_model_id
+                    with gr.Row(equal_height=True):
+                        seed = gr.Textbox(
+                            value=init_config["seed"],
+                            label="\U0001F331\U0000FE0F Seed",
+                            info="An integer, -1 for random",
+                            show_copy_button=True,
+                        )
+                        scheduler = gr.Dropdown(
+                            elem_id="scheduler",
+                            label="\U0001F4C5\U0000FE0F Scheduler",
+                            info="\U000E0020",  # forces same height as seed
+                            value=init_config["scheduler"],
+                            choices=scheduler_model_map.keys(),
+                            allow_custom_value=False,
+                            visible=False,
+                        )
+                    with gr.Row():
+                        steps = gr.Dropdown(
+                            value=20,
+                            choices=[10, 15, 20],
+                            label="\U0001F3C3\U0000FE0F Steps",
+                            allow_custom_value=True,
+                        )
+                        guidance_scale = gr.Slider(
+                            0,
+                            5,  # DEMO
+                            value=4,
+                            step=0.1,
+                            label="\U0001F5C3\U0000FE0F CFG Scale",
+                            visible=False,
+                        )
+                    with gr.Row():
+                        batch_count = gr.Slider(
+                            1,
+                            100,
+                            value=init_config["batch_count"],
+                            step=1,
+                            label="Batch Count",
+                            interactive=True,
+                            visible=False,
+                        )
+                        batch_size = gr.Slider(
+                            1,
+                            4,
+                            value=init_config["batch_size"],
+                            step=1,
+                            label="Batch Size",
+                            interactive=False,  # DEMO
+                            visible=False,
+                        )
+                        compiled_pipeline = gr.Checkbox(
+                            value=init_config["compiled_pipeline"],
+                            label="Faster txt2img (SDXL only)",
+                            visible=False,  # DEMO
+                        )
+                    with gr.Row(elem_classes=["fill"], visible=False):
+                        Path(get_configs_path()).mkdir(parents=True, exist_ok=True)
+                        write_default_sd_configs(get_configs_path())
+                        default_config_file = global_obj.get_init_config()
+                        sd_json = gr.JSON(
+                            elem_classes=["fill"],
+                            value=view_json_file(default_config_file),
+                        )
+                    with gr.Row(visible=False):
+                        with gr.Row():
+                            load_sd_config = gr.Dropdown(
+                                label="Load Config",
+                                value=cmd_opts.defaults,
+                                choices=get_configs(),
+                                allow_custom_value=True,
+                                visible=False,
+                            )
+                        with gr.Row():
+                            save_sd_config = gr.Button(value="Save Config", size="sm")
+                            clear_sd_config = gr.ClearButton(
+                                value="Clear Config",
+                                size="sm",
+                                components=sd_json,
+                            )
+                            # with gr.Row():
+                            sd_config_name = gr.Textbox(
+                                value="Config Name",
+                                info="Name of the file this config will be saved to.",
+                                interactive=True,
+                                show_label=False,
+                            )
+                with gr.Accordion(
+                    label="\U00002696\U0000FE0F Model Weights",
+                    open=False,
+                    visible=False,  # DEMO
+                ):
+                    with gr.Column():
+                        custom_weights = gr.Dropdown(
+                            label="Checkpoint Weights",
+                            info="Select or enter HF model ID",
+                            elem_id="custom_model",
+                            value=init_config["custom_weights"],
+                            allow_custom_value=True,
+                            choices=["None"]
+                            + get_checkpoints(os.path.basename(str(base_model_id))),
+                        )  # custom_weights
+                        sd_vae_info = (str(get_checkpoints_path("vae"))).replace(
+                            "\\", "\n\\"
+                        )
+                        sd_vae_info = f"VAE Path: {sd_vae_info}"
+                        custom_vae = gr.Dropdown(
+                            label=f"VAE Model",
+                            info=sd_vae_info,
+                            elem_id="custom_model",
+                            value=init_config["custom_vae"],
+                            choices=["None"] + get_checkpoints("vae"),
+                            allow_custom_value=True,
+                            scale=1,
+                        )
+                        sd_lora_info = (str(get_checkpoints_path("loras"))).replace(
+                            "\\", "\n\\"
+                        )
+                        lora_opt = gr.Dropdown(
+                            allow_custom_value=True,
+                            label=f"Standalone LoRA Weights",
+                            info=sd_lora_info,
+                            elem_id="lora_weights",
+                            value=(
+                                init_config["embeddings"][0]
+                                if (len(init_config["embeddings"].keys()) > 1)
+                                else "None"
+                            ),
+                            multiselect=True,
+                            choices=[] + get_checkpoints("lora"),
+                            scale=2,
+                        )
+                        lora_tags = gr.HTML(
+                            value="<div><i>No LoRA selected</i></div>",
+                            elem_classes="lora-tags",
+                        )
+                        embeddings_config = gr.JSON(
+                            label="Embeddings Options", min_width=50, scale=1
+                        )
+                        gr.on(
+                            triggers=[lora_opt.change],
+                            fn=lora_changed,
+                            inputs=[lora_opt],
+                            outputs=[lora_tags],
+                            queue=True,
+                            show_progress=False,
+                        ).then(
+                            fn=update_embeddings_json,
+                            inputs=[lora_opt],
+                            outputs=[embeddings_config],
+                            show_progress=False,
+                        )
+                with gr.Accordion(
+                    label="Controlnet Options",
+                    open=False,
+                    visible=False,
+                ):
+                    preprocessed_hints = gr.State([])
+                    with gr.Column():
+                        sd_cnet_info = (
+                            str(get_checkpoints_path("controlnet"))
+                        ).replace("\\", "\n\\")
+                    with gr.Row():
+                        cnet_config = gr.JSON()
+                        with gr.Column():
+                            clear_config = gr.ClearButton(
+                                value="Clear Controlnet Config",
+                                size="sm",
+                                components=cnet_config,
+                            )
+                            control_mode = gr.Radio(
+                                choices=["Prompt", "Balanced", "Controlnet"],
+                                value="Balanced",
+                                label="Control Mode",
+                            )
+                    with gr.Row():
+                        with gr.Column(scale=1):
+                            cnet_model = gr.Dropdown(
+                                allow_custom_value=True,
+                                label=f"Controlnet Model",
+                                info=sd_cnet_info,
+                                value="None",
+                                choices=[
+                                    "None",
+                                    "canny",
+                                    "openpose",
+                                    "scribble",
+                                    "zoedepth",
+                                ]
+                                + get_checkpoints("controlnet"),
+                            )
+                            cnet_strength = gr.Slider(
+                                label="Controlnet Strength",
+                                minimum=0,
+                                maximum=100,
+                                value=50,
+                                step=1,
+                            )
+                            with gr.Row():
+                                canvas_width = gr.Slider(
+                                    label="Canvas Width",
+                                    minimum=512,
+                                    maximum=1024,
+                                    value=512,
+                                    step=512,
+                                )
+                                canvas_height = gr.Slider(
+                                    label="Canvas Height",
+                                    minimum=512,
+                                    maximum=1024,
+                                    value=512,
+                                    step=512,
+                                )
+                            make_canvas = gr.Button(
+                                value="Make Canvas!",
+                            )
+                            use_input_img = gr.Button(
+                                value="Use Original Image",
+                                size="sm",
+                            )
+                        cnet_input = gr.Image(
+                            value=None,
+                            type="pil",
+                            image_mode="RGB",
+                            interactive=True,
+                        )
+                        with gr.Column(scale=1):
+                            cnet_output = gr.Image(
+                                value=None,
+                                visible=True,
+                                label="Preprocessed Hint",
+                                interactive=False,
+                                show_label=True,
+                            )
+                            cnet_gen = gr.Button(
+                                value="Preprocess controlnet input",
+                            )
+                            use_result = gr.Button(
+                                "Submit",
+                                size="sm",
+                            )
+                        make_canvas.click(
+                            fn=create_canvas,
+                            inputs=[canvas_width, canvas_height],
+                            outputs=[cnet_input],
+                            queue=False,
+                        )
+                        cnet_gen.click(
+                            fn=cnet_preview,
+                            inputs=[
+                                cnet_model,
+                                cnet_input,
+                            ],
+                            outputs=[
+                                cnet_output,
+                                preprocessed_hints,
+                            ],
+                        )
+                        use_result.click(
+                            fn=submit_to_cnet_config,
+                            inputs=[
+                                cnet_model,
+                                cnet_output,
+                                cnet_strength,
+                                control_mode,
+                                cnet_config,
+                            ],
+                            outputs=[
+                                cnet_config,
+                            ],
+                            queue=False,
+                        )
+            with gr.Column(scale=3, min_width=600):
+                with gr.Tabs() as sd_tabs:
+                    sd_element.load(
+                        # Workaround for Gradio issue #7085
+                        # TODO: revert to setting selected= in gr.Tabs declaration
+                        # once this is resolved in Gradio
+                        lambda: gr.Tabs(selected=101),
+                        outputs=[sd_tabs],
+                    )
+                    with gr.Tab(
+                        label="Input Image", id=100, visible=False
+                    ) as sd_tab_init_image:  # DEMO
+                        with gr.Column(elem_classes=["sd-right-panel"]):
+                            with gr.Row(elem_classes=["fill"]):
+                                # TODO: make this import image prompt info if it exists
+                                sd_init_image = gr.Image(
+                                    type="pil",
+                                    interactive=True,
+                                    show_label=False,
+                                )
+                                use_input_img.click(
+                                    fn=import_original,
+                                    inputs=[
+                                        sd_init_image,
+                                        canvas_width,
+                                        canvas_height,
+                                    ],
+                                    outputs=[cnet_input],
+                                    queue=False,
+                                )
+                    with gr.Tab(label="Generate Images", id=101) as sd_tab_gallery:
+                        with gr.Column(elem_classes=["sd-right-panel"]):
+                            with gr.Row(elem_classes=["fill"]):
+                                sd_gallery = gr.Gallery(
+                                    label="Generated images",
+                                    show_label=False,
+                                    elem_id="gallery",
+                                    columns=2,
+                                    object_fit="fit",
+                                    preview=True,
+                                )
+                            with gr.Row():
+                                stable_diffusion = gr.Button("Start")
+                                unload = gr.Button("Unload Models")
+                                unload.click(
+                                    fn=unload_sd,
+                                    queue=False,
+                                    show_progress=False,
+                                )
+                                stop_batch = gr.Button("Stop", visible=False)
+                    # with gr.Tab(label="Config", id=102) as sd_tab_config:
+                    #     with gr.Group():#elem_classes=["sd-right-panel"]):
+                    #         with gr.Row(elem_classes=["fill"], visible=False):
+                    #             Path(get_configs_path()).mkdir(
+                    #                 parents=True, exist_ok=True
+                    #             )
+                    #             write_default_sd_configs(get_configs_path())
+                    #             default_config_file = global_obj.get_init_config()
+                    #             sd_json = gr.JSON(
+                    #                 elem_classes=["fill"],
+                    #                 value=view_json_file(default_config_file),
+                    #             )
+                    #         with gr.Row():
+                    #             with gr.Row():
+                    #                 load_sd_config = gr.Dropdown(
+                    #                     label="Load Config",
+                    #                     value=cmd_opts.defaults,
+                    #                     choices=get_configs(),
+                    #                     allow_custom_value=True,
+                    #                 )
+                    #             with gr.Row():
+                    #                 save_sd_config = gr.Button(
+                    #                     value="Save Config", size="sm"
+                    #                 )
+                    #                 clear_sd_config = gr.ClearButton(
+                    #                     value="Clear Config",
+                    #                     size="sm",
+                    #                     components=sd_json,
+                    #                 )
+                    #                 # with gr.Row():
+                    #                 sd_config_name = gr.Textbox(
+                    #                     value="Config Name",
+                    #                     info="Name of the file this config will be saved to.",
+                    #                     interactive=True,
+                    #                     show_label=False,
+                    #                 )
+                    with gr.Tab(label="Log", id=103, visible=False) as sd_tab_log:
+                        with gr.Row():
+                            std_output = gr.Textbox(
+                                value=f"{sd_model_info}\n"
+                                f"Images will be saved at "
+                                f"{get_generated_imgs_path()}",
+                                lines=2,
+                                elem_id="std_output",
+                                show_label=True,
+                                label="Log",
+                                show_copy_button=True,
+                            )
+                            sd_element.load(
+                                logger.read_sd_logs, None, std_output, every=1
+                            )
+                            sd_status = gr.Textbox(visible=False)
+    base_model_id.change(
+        fn=base_model_changed,
+        inputs=[base_model_id],
+        outputs=[custom_weights, steps],
+    )
+    load_sd_config.change(
+        fn=load_sd_cfg,
+        inputs=[sd_json, load_sd_config],
+        outputs=[
+            prompt,
+            negative_prompt,
+            sd_init_image,
+            height,
+            width,
+            steps,
+            strength,
+            guidance_scale,
+            seed,
+            batch_count,
+            batch_size,
+            scheduler,
+            base_model_id,
+            custom_weights,
+            custom_vae,
+            precision,
+            device,
+            target_triple,
+            ondemand,
+            compiled_pipeline,
+            resample_type,
+            cnet_config,
+            embeddings_config,
+            sd_json,
+        ],
+    )
+    save_sd_config.click(
+        fn=save_sd_cfg,
+        inputs=[sd_json, sd_config_name],
+        outputs=[sd_config_name],
+    )
+    pull_kwargs = dict(
+        fn=pull_sd_configs,
+        inputs=[
+            prompt,
+            negative_prompt,
+            sd_init_image,
+            height,
+            width,
+            steps,
+            strength,
+            guidance_scale,
+            seed,
+            batch_count,
+            batch_size,
+            scheduler,
+            base_model_id,
+            custom_weights,
+            custom_vae,
+            precision,
+            device,
+            target_triple,
+            ondemand,
+            compiled_pipeline,
+            resample_type,
+            cnet_config,
+            embeddings_config,
+        ],
+        outputs=[
+            sd_json,
+        ],
+    )
+
+    status_kwargs = dict(
+        fn=lambda bc, bs: status_label("Stable Diffusion", 0, bc, bs),
+        inputs=[batch_count, batch_size],
+        outputs=sd_status,
+    )
+
+    gen_kwargs = dict(
+        fn=shark_sd_fn_dict_input,
+        inputs=[sd_json],
+        outputs=[
+            sd_gallery,
+            sd_status,
+        ],
+    )
+
+    prompt_submit = prompt.submit(**status_kwargs).then(**pull_kwargs)
+    neg_prompt_submit = negative_prompt.submit(**status_kwargs).then(**pull_kwargs)
+    generate_click = (
+        stable_diffusion.click(**status_kwargs).then(**pull_kwargs).then(**gen_kwargs)
+    )
+    stop_batch.click(
+        fn=cancel_sd,
+        cancels=[prompt_submit, neg_prompt_submit, generate_click],
+    )
--- a/apps/shark_studio/web/ui/utils.py
+++ b/apps/shark_studio/web/ui/utils.py
@@ -0,0 +1,43 @@
+from enum import IntEnum
+import math
+import sys
+import os
+
+
+def resource_path(relative_path):
+    """Get absolute path to resource, works for dev and for PyInstaller"""
+    base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__)))
+    return os.path.join(base_path, relative_path)
+
+
+amdlogo_loc = resource_path("logos/amd-logo.jpg")
+amdicon_loc = resource_path("logos/amd-icon.jpg")
+
+
+class HSLHue(IntEnum):
+    RED = 0
+    YELLOW = 60
+    GREEN = 120
+    CYAN = 180
+    BLUE = 240
+    MAGENTA = 300
+
+
+def hsl_color(alpha: float, start, end):
+    b = (end - start) * (alpha if alpha > 0 else 0)
+    result = b + start
+
+    # Return a CSS HSL string
+    return f"hsl({math.floor(result)}, 80%, 35%)"
+
+
+def none_to_str_none(props: dict):
+    for key in props:
+        props[key] = "None" if props[key] == None else props[key]
+    return props
+
+
+def str_none_to_none(props: dict):
+    for key in props:
+        props[key] = None if props[key] == "None" else props[key]
+    return props
--- a/apps/shark_studio/web/utils/init.py
+++ b/apps/shark_studio/web/utils/init.py
--- a/apps/shark_studio/web/utils/default_configs.py
+++ b/apps/shark_studio/web/utils/default_configs.py
@@ -0,0 +1,95 @@
+default_sd_config = r"""{
+  "prompt": [
+    "a photo taken of the front of a super-car drifting on a road near mountains at high speeds with smoke coming off the tires, front angle, front point of view, trees in the mountains of the background, ((sharp focus))"
+  ],
+  "negative_prompt": [
+    "watermark, signature, logo, text, lowres, ((monochrome, grayscale)), blurry, ugly, blur, oversaturated, cropped"
+  ],
+  "sd_init_image": [null],
+  "height": 512,
+  "width": 512,
+  "steps": 50,
+  "strength": 0.8,
+  "guidance_scale": 7.5,
+  "seed": "-1",
+  "batch_count": 1,
+  "batch_size": 1,
+  "scheduler": "EulerDiscrete",
+  "base_model_id": "stabilityai/stable-diffusion-2-1-base",
+  "custom_weights": null,
+  "custom_vae": null,
+  "precision": "fp16",
+  "device": "",
+  "target_triple": "",
+  "ondemand": false,
+  "compiled_pipeline": false,
+  "resample_type": "Nearest Neighbor",
+  "controlnets": {},
+  "embeddings": {}
+}"""
+
+sdxl_30steps = r"""{
+  "prompt": [
+    "a cat under the snow with blue eyes, covered by snow, cinematic style, medium shot, professional photo, animal"
+  ],
+  "negative_prompt": [
+    "watermark, signature, logo, text, lowres, ((monochrome, grayscale)), blurry, ugly, blur, oversaturated, cropped"
+  ],
+  "sd_init_image": [null],
+  "height": 1024,
+  "width": 1024,
+  "steps": 30,
+  "strength": 0.8,
+  "guidance_scale": 7.5,
+  "seed": "-1",
+  "batch_count": 1,
+  "batch_size": 1,
+  "scheduler": "EulerDiscrete",
+  "base_model_id": "stabilityai/stable-diffusion-xl-base-1.0",
+  "custom_weights": null,
+  "custom_vae": null,
+  "precision": "fp16",
+  "device": "",
+  "target_triple": "",
+  "ondemand": false,
+  "compiled_pipeline": true,
+  "resample_type": "Nearest Neighbor",
+  "controlnets": {},
+  "embeddings": {}
+}"""
+
+sdxl_turbo = r"""{
+  "prompt": [
+    "A cat wearing a hat that says 'TURBO' on it. The cat is sitting on a skateboard."
+  ],
+  "negative_prompt": [
+    ""
+  ],
+  "sd_init_image": [null],
+  "height": 512,
+  "width": 512,
+  "steps": 2,
+  "strength": 0.8,
+  "guidance_scale": 0,
+  "seed": "-1",
+  "batch_count": 1,
+  "batch_size": 1,
+  "scheduler": "EulerAncestralDiscrete",
+  "base_model_id": "stabilityai/sdxl-turbo",
+  "custom_weights": null,
+  "custom_vae": null,
+  "precision": "fp16",
+  "device": "",
+  "target_triple": "",
+  "ondemand": false,
+  "compiled_pipeline": true,
+  "resample_type": "Nearest Neighbor",
+  "controlnets": {},
+  "embeddings": {}
+}"""
+
+default_sd_configs = {
+    # "default_sd_config.json": sdxl_turbo,
+    # "sdxl-30steps.json": sdxl_30steps,
+    "sdxl-turbo.json": sdxl_turbo,
+}
--- a/apps/shark_studio/web/utils/file_utils.py
+++ b/apps/shark_studio/web/utils/file_utils.py
@@ -0,0 +1,115 @@
+import os
+import sys
+import glob
+from datetime import datetime as dt
+from pathlib import Path
+
+from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+
+checkpoints_filetypes = (
+    "*.ckpt",
+    "*.safetensors",
+)
+
+from apps.shark_studio.web.utils.default_configs import default_sd_configs
+
+
+def write_default_sd_configs(path):
+    for key in default_sd_configs.keys():
+        config_fpath = os.path.join(path, key)
+        if not os.path.exists(config_fpath):
+            with open(config_fpath, "w") as f:
+                f.write(default_sd_configs[key])
+
+
+def safe_name(name):
+    return name.split("/")[-1].replace("-", "_")
+
+
+def get_path_stem(path):
+    path = Path(path)
+    return path.stem
+
+
+def get_resource_path(path):
+    """Get absolute path to resource, works for dev and for PyInstaller"""
+    if os.path.isabs(path):
+        return path
+    else:
+        base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__)))
+        result = Path(os.path.join(base_path, path)).resolve(strict=False)
+        return result
+
+
+def get_configs_path() -> Path:
+    configs = get_resource_path(cmd_opts.config_dir)
+    if not os.path.exists(configs):
+        os.mkdir(configs)
+    return Path(configs)
+
+
+def get_generated_imgs_path() -> Path:
+    outputs = get_resource_path(cmd_opts.output_dir)
+    if not os.path.exists(outputs):
+        os.mkdir(outputs)
+    return Path(outputs)
+
+
+def get_tmp_path() -> Path:
+    tmpdir = get_resource_path(cmd_opts.model_dir)
+    if not os.path.exists(tmpdir):
+        os.mkdir(tmpdir)
+    return Path(tmpdir)
+
+
+def get_generated_imgs_todays_subdir() -> str:
+    return dt.now().strftime("%Y%m%d")
+
+
+def create_model_folders():
+    dir = ["checkpoints", "vae", "lora", "vmfb"]
+    if not os.path.isdir(cmd_opts.model_dir):
+        try:
+            os.makedirs(cmd_opts.model_dir)
+        except OSError:
+            sys.exit(
+                f"Invalid --model_dir argument, "
+                f"{cmd_opts.model_dir} folder does not exist, and cannot be created."
+            )
+
+    for root in dir:
+        Path(get_checkpoints_path(root)).mkdir(parents=True, exist_ok=True)
+
+
+def get_checkpoints_path(model_type=""):
+    return get_resource_path(os.path.join(cmd_opts.model_dir, model_type))
+
+
+def get_checkpoints(model_type="checkpoints"):
+    ckpt_files = []
+    file_types = checkpoints_filetypes
+    if model_type == "scripts":
+        file_types = ["shark_*.py"]
+    if model_type == "lora":
+        file_types = file_types + ("*.pt", "*.bin")
+    for extn in file_types:
+        files = [
+            os.path.basename(x)
+            for x in glob.glob(os.path.join(get_checkpoints_path(model_type), extn))
+        ]
+    ckpt_files.extend(files)
+    return sorted(ckpt_files, key=str.casefold)
+
+
+def get_configs():
+    return sorted(
+        [
+            os.path.basename(x)
+            for x in glob.glob(os.path.join(get_configs_path(), "*.json"))
+        ],
+        key=str.casefold,
+    )
+
+
+def get_checkpoint_pathfile(checkpoint_name, model_type="checkpoints"):
+    return os.path.join(get_checkpoints_path(model_type), checkpoint_name)
--- a/apps/shark_studio/web/utils/globals.py
+++ b/apps/shark_studio/web/utils/globals.py
@@ -0,0 +1,158 @@
+import gc
+from ...api.utils import get_available_devices
+from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+import os
+from apps.shark_studio.web.utils.file_utils import get_configs_path
+
+"""
+The global objects include SD pipeline and config.
+Maintaining the global objects would avoid creating extra pipeline objects when switching modes.
+Also we could avoid memory leak when switching models by clearing the cache.
+"""
+
+
+def view_json_file(file_path):
+    content = ""
+    with open(file_path, "r") as fopen:
+        content = fopen.read()
+    return content
+
+
+def _init():
+    global _sd_obj
+    global _llm_obj
+    global _devices
+    global _pipe_kwargs
+    global _prep_kwargs
+    global _gen_kwargs
+    global _schedulers
+    _sd_obj = None
+    _llm_obj = None
+    _devices = None
+    _pipe_kwargs = None
+    _prep_kwargs = None
+    _gen_kwargs = None
+    _schedulers = None
+    set_devices()
+
+
+def set_sd_obj(value):
+    global _sd_obj
+    global _llm_obj
+    _llm_obj = None
+    _sd_obj = value
+
+
+def set_llm_obj(value):
+    global _sd_obj
+    global _llm_obj
+    _llm_obj = value
+    _sd_obj = None
+
+
+def set_devices():
+    global _devices
+    _devices = get_available_devices()
+
+
+def set_sd_scheduler(key):
+    global _sd_obj
+    _sd_obj.scheduler = _schedulers[key]
+
+
+def set_sd_status(value):
+    global _sd_obj
+    _sd_obj.status = value
+
+
+def set_pipe_kwargs(value):
+    global _pipe_kwargs
+    _pipe_kwargs = value
+
+
+def set_prep_kwargs(value):
+    global _prep_kwargs
+    _prep_kwargs = value
+
+
+def set_gen_kwargs(value):
+    global _gen_kwargs
+    _gen_kwargs = value
+
+
+def set_schedulers(value):
+    global _schedulers
+    _schedulers = value
+
+
+def get_sd_obj():
+    global _sd_obj
+    return _sd_obj
+
+
+def get_llm_obj():
+    global _llm_obj
+    return _llm_obj
+
+
+def get_device_list():
+    global _devices
+    return _devices
+
+
+def get_init_config():
+    global _init_config
+    if os.path.exists(cmd_opts.defaults):
+        _init_config = cmd_opts.defaults
+    elif os.path.exists(os.path.join(get_configs_path(), cmd_opts.defaults)):
+        _init_config = os.path.join(get_configs_path(), cmd_opts.defaults)
+    else:
+        print(
+            "Default config not found as absolute path or in configs folder. Using sdxl-turbo as default config."
+        )
+        _init_config = os.path.join(get_configs_path(), "sdxl-turbo.json")
+    return _init_config
+
+
+def get_sd_status():
+    global _sd_obj
+    return _sd_obj.status
+
+
+def get_pipe_kwargs():
+    global _pipe_kwargs
+    return _pipe_kwargs
+
+
+def get_prep_kwargs():
+    global _prep_kwargs
+    return _prep_kwargs
+
+
+def get_gen_kwargs():
+    global _gen_kwargs
+    return _gen_kwargs
+
+
+def get_scheduler(key):
+    global _schedulers
+    return _schedulers[key]
+
+
+def clear_cache():
+    global _sd_obj
+    global _llm_obj
+    global _pipe_kwargs
+    global _prep_kwargs
+    global _gen_kwargs
+    global _schedulers
+    del _sd_obj
+    del _llm_obj
+    del _schedulers
+    gc.collect()
+    _sd_obj = None
+    _llm_obj = None
+    _pipe_kwargs = None
+    _prep_kwargs = None
+    _gen_kwargs = None
+    _schedulers = None
--- a/apps/shark_studio/web/utils/metadata/init.py
+++ b/apps/shark_studio/web/utils/metadata/init.py
@@ -0,0 +1,6 @@
+from .png_metadata import (
+    import_png_metadata,
+)
+from .display import (
+    displayable_metadata,
+)
--- a/apps/shark_studio/web/utils/metadata/csv_metadata.py
+++ b/apps/shark_studio/web/utils/metadata/csv_metadata.py
@@ -0,0 +1,43 @@
+import csv
+import os
+from .format import humanize, humanizable
+
+
+def csv_path(image_filename: str):
+    return os.path.join(os.path.dirname(image_filename), "imgs_details.csv")
+
+
+def has_csv(image_filename: str) -> bool:
+    return os.path.exists(csv_path(image_filename))
+
+
+def matching_filename(image_filename: str, row):
+    # we assume the final column of the csv has the original filename with full path and match that
+    # against the image_filename if we are given a list. Otherwise we assume a dict and and take
+    # the value of the OUTPUT key
+    return os.path.basename(image_filename) in (
+        row[-1] if isinstance(row, list) else row["OUTPUT"]
+    )
+
+
+def parse_csv(image_filename: str):
+    csv_filename = csv_path(image_filename)
+
+    with open(csv_filename, "r", newline="") as csv_file:
+        # We use a reader or DictReader here for images_details.csv depending on whether we think it
+        # has headers or not. Having headers means less guessing of the format.
+        has_header = csv.Sniffer().has_header(csv_file.read(2048))
+        csv_file.seek(0)
+
+        reader = csv.DictReader(csv_file) if has_header else csv.reader(csv_file)
+
+        matches = [
+            # we rely on humanize and humanizable to work out the parsing of the individual .csv rows
+            humanize(row)
+            for row in reader
+            if row
+            and (has_header or humanizable(row))
+            and matching_filename(image_filename, row)
+        ]
+
+    return matches[0] if matches else {}
--- a/apps/shark_studio/web/utils/metadata/display.py
+++ b/apps/shark_studio/web/utils/metadata/display.py
@@ -0,0 +1,53 @@
+import json
+import os
+from PIL import Image
+from .png_metadata import parse_generation_parameters
+from .exif_metadata import has_exif, parse_exif
+from .csv_metadata import has_csv, parse_csv
+from .format import compact, humanize
+
+
+def displayable_metadata(image_filename: str) -> dict:
+    if not os.path.isfile(image_filename):
+        return {"source": "missing", "parameters": {}}
+
+    pil_image = Image.open(image_filename)
+
+    # we have PNG generation parameters (preferred, as it's what the txt2img dropzone reads,
+    # and we go via that for SendTo, and is directly tied to the image)
+    if "parameters" in pil_image.info:
+        return {
+            "source": "png",
+            "parameters": compact(
+                parse_generation_parameters(pil_image.info["parameters"])
+            ),
+        }
+
+    # we have a matching json file (next most likely to be accurate when it's there)
+    json_path = os.path.splitext(image_filename)[0] + ".json"
+    if os.path.isfile(json_path):
+        with open(json_path) as params_file:
+            return {
+                "source": "json",
+                "parameters": compact(
+                    humanize(json.load(params_file), includes_filename=False)
+                ),
+            }
+
+    # we have a CSV file so try that (can be different shapes, and it usually has no
+    # headers/param names so of the things we we *know* have parameters, it's the
+    # last resort)
+    if has_csv(image_filename):
+        params = parse_csv(image_filename)
+        if params:  # we might not have found the filename in the csv
+            return {
+                "source": "csv",
+                "parameters": compact(params),  # already humanized
+            }
+
+    # EXIF data, probably a .jpeg, may well not include parameters, but at least it's *something*
+    if has_exif(image_filename):
+        return {"source": "exif", "parameters": parse_exif(pil_image)}
+
+    # we've got nothing
+    return None
--- a/apps/shark_studio/web/utils/metadata/exif_metadata.py
+++ b/apps/shark_studio/web/utils/metadata/exif_metadata.py
@@ -0,0 +1,52 @@
+from PIL import Image
+from PIL.ExifTags import Base as EXIFKeys, TAGS, IFD, GPSTAGS
+
+
+def has_exif(image_filename: str) -> bool:
+    return True if Image.open(image_filename).getexif() else False
+
+
+def parse_exif(pil_image: Image) -> dict:
+    img_exif = pil_image.getexif()
+
+    # See this stackoverflow answer for where most this comes from: https://stackoverflow.com/a/75357594
+    # I did try to use the exif library but it broke just as much as my initial attempt at this (albeit I
+    # I was probably using it wrong) so I reverted back to using PIL with more filtering and saved a
+    # dependency
+    exif_tags = {
+        TAGS.get(key, key): str(val)
+        for (key, val) in img_exif.items()
+        if key in TAGS
+        and key not in (EXIFKeys.ExifOffset, EXIFKeys.GPSInfo)
+        and val
+        and (not isinstance(val, bytes))
+        and (not str(val).isspace())
+    }
+
+    def try_get_ifd(ifd_id):
+        try:
+            return img_exif.get_ifd(ifd_id).items()
+        except KeyError:
+            return {}
+
+    ifd_tags = {
+        TAGS.get(key, key): str(val)
+        for ifd_id in IFD
+        for (key, val) in try_get_ifd(ifd_id)
+        if ifd_id != IFD.GPSInfo
+        and key in TAGS
+        and val
+        and (not isinstance(val, bytes))
+        and (not str(val).isspace())
+    }
+
+    gps_tags = {
+        GPSTAGS.get(key, key): str(val)
+        for (key, val) in try_get_ifd(IFD.GPSInfo)
+        if key in GPSTAGS
+        and val
+        and (not isinstance(val, bytes))
+        and (not str(val).isspace())
+    }
+
+    return {**exif_tags, **ifd_tags, **gps_tags}
--- a/apps/shark_studio/web/utils/metadata/format.py
+++ b/apps/shark_studio/web/utils/metadata/format.py
@@ -0,0 +1,139 @@
+# As SHARK has evolved more columns have been added to images_details.csv. However, since
+# no version of the CSV has any headers (yet) we don't actually have anything within the
+# file that tells us which parameter each column is for. So this is a list of known patterns
+# indexed by length which is what we're going to have to use to guess which columns are the
+# right ones for the file we're looking at.
+
+# The same ordering is used for JSON, but these do have key names, however they are not very
+# human friendly, nor do they match up with the what is written to the .png headers
+
+# So these are functions to try and get something consistent out the raw input from all
+# these sources
+
+PARAMS_FORMATS = {
+    9: {
+        "VARIANT": "Model",
+        "SCHEDULER": "Sampler",
+        "PROMPT": "Prompt",
+        "NEG_PROMPT": "Negative prompt",
+        "SEED": "Seed",
+        "CFG_SCALE": "CFG scale",
+        "PRECISION": "Precision",
+        "STEPS": "Steps",
+        "OUTPUT": "Filename",
+    },
+    10: {
+        "MODEL": "Model",
+        "VARIANT": "Variant",
+        "SCHEDULER": "Sampler",
+        "PROMPT": "Prompt",
+        "NEG_PROMPT": "Negative prompt",
+        "SEED": "Seed",
+        "CFG_SCALE": "CFG scale",
+        "PRECISION": "Precision",
+        "STEPS": "Steps",
+        "OUTPUT": "Filename",
+    },
+    12: {
+        "VARIANT": "Model",
+        "SCHEDULER": "Sampler",
+        "PROMPT": "Prompt",
+        "NEG_PROMPT": "Negative prompt",
+        "SEED": "Seed",
+        "CFG_SCALE": "CFG scale",
+        "PRECISION": "Precision",
+        "STEPS": "Steps",
+        "HEIGHT": "Height",
+        "WIDTH": "Width",
+        "MAX_LENGTH": "Max Length",
+        "OUTPUT": "Filename",
+    },
+}
+
+PARAMS_FORMAT_CURRENT = {
+    "VARIANT": "Model",
+    "VAE": "VAE",
+    "LORA": "LoRA",
+    "SCHEDULER": "Sampler",
+    "PROMPT": "Prompt",
+    "NEG_PROMPT": "Negative prompt",
+    "SEED": "Seed",
+    "CFG_SCALE": "CFG scale",
+    "PRECISION": "Precision",
+    "STEPS": "Steps",
+    "HEIGHT": "Height",
+    "WIDTH": "Width",
+    "MAX_LENGTH": "Max Length",
+    "OUTPUT": "Filename",
+}
+
+
+def compact(metadata: dict) -> dict:
+    # we don't want to alter the original dictionary
+    result = dict(metadata)
+
+    # discard the filename because we should already have it
+    if result.keys() & {"Filename"}:
+        result.pop("Filename")
+
+    # make showing the sizes more compact by using only one line each
+    if result.keys() & {"Size-1", "Size-2"}:
+        result["Size"] = f"{result.pop('Size-1')}x{result.pop('Size-2')}"
+    elif result.keys() & {"Height", "Width"}:
+        result["Size"] = f"{result.pop('Height')}x{result.pop('Width')}"
+
+    if result.keys() & {"Hires resize-1", "Hires resize-1"}:
+        hires_y = result.pop("Hires resize-1")
+        hires_x = result.pop("Hires resize-2")
+
+        if hires_x == 0 and hires_y == 0:
+            result["Hires resize"] = "None"
+        else:
+            result["Hires resize"] = f"{hires_y}x{hires_x}"
+
+    # remove VAE if it exists and is empty
+    if (result.keys() & {"VAE"}) and (not result["VAE"] or result["VAE"] == "None"):
+        result.pop("VAE")
+
+    # remove LoRA if it exists and is empty
+    if (result.keys() & {"LoRA"}) and (not result["LoRA"] or result["LoRA"] == "None"):
+        result.pop("LoRA")
+
+    return result
+
+
+def humanizable(metadata: dict | list[str], includes_filename=True) -> dict:
+    lookup_key = len(metadata) + (0 if includes_filename else 1)
+    return lookup_key in PARAMS_FORMATS.keys()
+
+
+def humanize(metadata: dict | list[str], includes_filename=True) -> dict:
+    lookup_key = len(metadata) + (0 if includes_filename else 1)
+
+    # For lists we can only work based on the length, we have no other information
+    if isinstance(metadata, list):
+        if humanizable(metadata, includes_filename):
+            return dict(zip(PARAMS_FORMATS[lookup_key].values(), metadata))
+        else:
+            raise KeyError(
+                f"Humanize could not find the format for a parameter list of length {len(metadata)}"
+            )
+
+    # For dictionaries we try to use the matching length parameter format if
+    # available, otherwise we just use the current format which is assumed to
+    # have everything currently known about. Then we swap keys in the metadata
+    # that match keys in the format for the friendlier name that we have set
+    # in the format value
+    if isinstance(metadata, dict):
+        if humanizable(metadata, includes_filename):
+            format = PARAMS_FORMATS[lookup_key]
+        else:
+            format = PARAMS_FORMAT_CURRENT
+
+        return {
+            format[key]: metadata[key]
+            for key in format.keys()
+            if key in metadata.keys() and metadata[key]
+        }
+
+    raise TypeError("Can only humanize parameter lists or dictionaries")
--- a/apps/shark_studio/web/utils/metadata/png_metadata.py
+++ b/apps/shark_studio/web/utils/metadata/png_metadata.py
@@ -0,0 +1,216 @@
+import re
+from pathlib import Path
+from apps.shark_studio.web.utils.file_utils import (
+    get_checkpoint_pathfile,
+)
+from apps.shark_studio.api.sd import EMPTY_SD_MAP as sd_model_map
+
+from apps.shark_studio.modules.schedulers import (
+    scheduler_model_map,
+)
+
+re_param_code = r'\s*([\w ]+):\s*("(?:\\"[^,]|\\"|\\|[^\"])+"|[^,]*)(?:,|$)'
+re_param = re.compile(re_param_code)
+re_imagesize = re.compile(r"^(\d+)x(\d+)$")
+
+
+def parse_generation_parameters(x: str):
+    res = {}
+    prompt = ""
+    negative_prompt = ""
+    done_with_prompt = False
+
+    *lines, lastline = x.strip().split("\n")
+    if len(re_param.findall(lastline)) < 3:
+        lines.append(lastline)
+        lastline = ""
+
+    for i, line in enumerate(lines):
+        line = line.strip()
+        if line.startswith("Negative prompt:"):
+            done_with_prompt = True
+            line = line[16:].strip()
+
+        if done_with_prompt:
+            negative_prompt += ("" if negative_prompt == "" else "\n") + line
+        else:
+            prompt += ("" if prompt == "" else "\n") + line
+
+    res["Prompt"] = prompt
+    res["Negative prompt"] = negative_prompt
+
+    for k, v in re_param.findall(lastline):
+        v = v[1:-1] if v[0] == '"' and v[-1] == '"' else v
+        m = re_imagesize.match(v)
+        if m is not None:
+            res[k + "-1"] = m.group(1)
+            res[k + "-2"] = m.group(2)
+        else:
+            res[k] = v
+
+    # Missing CLIP skip means it was set to 1 (the default)
+    if "Clip skip" not in res:
+        res["Clip skip"] = "1"
+
+    hypernet = res.get("Hypernet", None)
+    if hypernet is not None:
+        res[
+            "Prompt"
+        ] += f"""<hypernet:{hypernet}:{res.get("Hypernet strength", "1.0")}>"""
+
+    if "Hires resize-1" not in res:
+        res["Hires resize-1"] = 0
+        res["Hires resize-2"] = 0
+
+    return res
+
+
+def try_find_model_base_from_png_metadata(file: str, folder: str = "models") -> str:
+    custom = ""
+
+    # Remove extension from file info
+    if file.endswith(".safetensors") or file.endswith(".ckpt"):
+        file = Path(file).stem
+    # Check for the file name match with one of the local ckpt or safetensors files
+    if Path(get_checkpoint_pathfile(file + ".ckpt", folder)).is_file():
+        custom = file + ".ckpt"
+    if Path(get_checkpoint_pathfile(file + ".safetensors", folder)).is_file():
+        custom = file + ".safetensors"
+
+    return custom
+
+
+def find_model_from_png_metadata(
+    key: str, metadata: dict[str, str | int]
+) -> tuple[str, str]:
+    png_hf_id = ""
+    png_custom = ""
+
+    if key in metadata:
+        model_file = metadata[key]
+        png_custom = try_find_model_base_from_png_metadata(model_file)
+        # Check for a model match with one of the default model list (ex: "Linaqruf/anything-v3.0")
+        if model_file in sd_model_map:
+            png_custom = model_file
+        # If nothing had matched, check vendor/hf_model_id
+        if not png_custom and model_file.count("/"):
+            png_hf_id = model_file
+        # No matching model was found
+        if not png_custom and not png_hf_id:
+            print(
+                "Import PNG info: Unable to find a matching model for %s" % model_file
+            )
+
+    return png_custom, png_hf_id
+
+
+def find_vae_from_png_metadata(key: str, metadata: dict[str, str | int]) -> str:
+    vae_custom = ""
+
+    if key in metadata:
+        vae_file = metadata[key]
+        vae_custom = try_find_model_base_from_png_metadata(vae_file, "vae")
+
+    # VAE input is optional, should not print or throw an error if missing
+
+    return vae_custom
+
+
+def find_lora_from_png_metadata(
+    key: str, metadata: dict[str, str | int]
+) -> tuple[str, str]:
+    lora_hf_id = ""
+    lora_custom = ""
+
+    if key in metadata:
+        lora_file = metadata[key]
+        lora_custom = try_find_model_base_from_png_metadata(lora_file, "lora")
+        # If nothing had matched, check vendor/hf_model_id
+        if not lora_custom and lora_file.count("/"):
+            lora_hf_id = lora_file
+
+    # LoRA input is optional, should not print or throw an error if missing
+
+    return lora_custom, lora_hf_id
+
+
+def import_png_metadata(
+    pil_data,
+    prompt,
+    negative_prompt,
+    steps,
+    sampler,
+    cfg_scale,
+    seed,
+    width,
+    height,
+    custom_model,
+    custom_lora,
+    hf_lora_id,
+    custom_vae,
+):
+    try:
+        png_info = pil_data.info["parameters"]
+        metadata = parse_generation_parameters(png_info)
+
+        (png_custom_model, png_hf_model_id) = find_model_from_png_metadata(
+            "Model", metadata
+        )
+        (lora_custom_model, lora_hf_model_id) = find_lora_from_png_metadata(
+            "LoRA", metadata
+        )
+        vae_custom_model = find_vae_from_png_metadata("VAE", metadata)
+
+        negative_prompt = metadata["Negative prompt"]
+        steps = int(metadata["Steps"])
+        cfg_scale = float(metadata["CFG scale"])
+        seed = int(metadata["Seed"])
+        width = float(metadata["Size-1"])
+        height = float(metadata["Size-2"])
+
+        if "Model" in metadata and png_custom_model:
+            custom_model = png_custom_model
+        elif "Model" in metadata and png_hf_model_id:
+            custom_model = png_hf_model_id
+
+        if "LoRA" in metadata and lora_custom_model:
+            custom_lora = lora_custom_model
+            hf_lora_id = ""
+        if "LoRA" in metadata and lora_hf_model_id:
+            custom_lora = "None"
+            hf_lora_id = lora_hf_model_id
+
+        if "VAE" in metadata and vae_custom_model:
+            custom_vae = vae_custom_model
+
+        if "Prompt" in metadata:
+            prompt = metadata["Prompt"]
+        if "Sampler" in metadata:
+            if metadata["Sampler"] in scheduler_model_map:
+                sampler = metadata["Sampler"]
+            else:
+                print(
+                    "Import PNG info: Unable to find a scheduler for %s"
+                    % metadata["Sampler"]
+                )
+
+    except Exception as ex:
+        if pil_data and pil_data.info.get("parameters"):
+            print("import_png_metadata failed with %s" % ex)
+        pass
+
+    return (
+        None,
+        prompt,
+        negative_prompt,
+        steps,
+        sampler,
+        cfg_scale,
+        seed,
+        width,
+        height,
+        custom_model,
+        custom_lora,
+        hf_lora_id,
+        custom_vae,
+    )
--- a/apps/shark_studio/web/utils/state.py
+++ b/apps/shark_studio/web/utils/state.py
@@ -0,0 +1,39 @@
+import apps.shark_studio.web.utils.globals as global_obj
+import gc
+
+
+def status_label(tab_name, batch_index=0, batch_count=1, batch_size=1):
+    if batch_index < batch_count:
+        bs = f"x{batch_size}" if batch_size > 1 else ""
+        return f"{tab_name} generating {batch_index+1}/{batch_count}{bs}"
+    else:
+        return f"{tab_name} complete"
+
+
+def get_generation_text_info(seeds, device):
+    cfg_dump = {}
+    for cfg in global_obj.get_config_dict():
+        cfg_dump[cfg] = cfg
+    text_output = f"prompt={cfg_dump['prompts']}"
+    text_output += f"\nnegative prompt={cfg_dump['negative_prompts']}"
+    text_output += (
+        f"\nmodel_id={cfg_dump['hf_model_id']}, " f"ckpt_loc={cfg_dump['ckpt_loc']}"
+    )
+    text_output += f"\nscheduler={cfg_dump['scheduler']}, " f"device={device}"
+    text_output += (
+        f"\nsteps={cfg_dump['steps']}, "
+        f"guidance_scale={cfg_dump['guidance_scale']}, "
+        f"seed={seeds}"
+    )
+    text_output += (
+        f"\nsize={cfg_dump['height']}x{cfg_dump['width']}, "
+        if not cfg_dump.use_hiresfix
+        else f"\nsize={cfg_dump['hiresfix_height']}x{cfg_dump['hiresfix_width']}, "
+    )
+    text_output += (
+        f"batch_count={cfg_dump['batch_count']}, "
+        f"batch_size={cfg_dump['batch_size']}, "
+        f"max_length={cfg_dump['max_length']}"
+    )
+
+    return text_output
--- a/apps/shark_studio/web/utils/tmp_configs.py
+++ b/apps/shark_studio/web/utils/tmp_configs.py
@@ -0,0 +1,75 @@
+import os
+import shutil
+from time import time
+
+from apps.shark_studio.modules.shared_cmd_opts import cmd_opts
+
+shark_tmp = cmd_opts.tmp_dir  # os.path.join(os.getcwd(), "shark_tmp/")
+
+
+def clear_tmp_mlir():
+    cleanup_start = time()
+    print("Clearing .mlir temporary files from a prior run. This may take some time...")
+    mlir_files = [
+        filename
+        for filename in os.listdir(shark_tmp)
+        if os.path.isfile(os.path.join(shark_tmp, filename))
+        and filename.endswith(".mlir")
+    ]
+    for filename in mlir_files:
+        os.remove(os.path.join(shark_tmp, filename))
+    print(f"Clearing .mlir temporary files took {time() - cleanup_start:.4f} seconds.")
+
+
+def clear_tmp_imgs():
+    # tell gradio to use a directory under shark_tmp for its temporary
+    # image files unless somewhere else has been set
+    if "GRADIO_TEMP_DIR" not in os.environ:
+        os.environ["GRADIO_TEMP_DIR"] = os.path.join(shark_tmp, "gradio")
+
+    print(
+        f"gradio temporary image cache located at {os.environ['GRADIO_TEMP_DIR']}. "
+        + "You may change this by setting the GRADIO_TEMP_DIR environment variable."
+    )
+
+    # Clear all gradio tmp images from the last session
+    if os.path.exists(os.environ["GRADIO_TEMP_DIR"]):
+        cleanup_start = time()
+        print(
+            "Clearing gradio UI temporary image files from a prior run. This may take some time..."
+        )
+        shutil.rmtree(os.environ["GRADIO_TEMP_DIR"], ignore_errors=True)
+        print(
+            f"Clearing gradio UI temporary image files took {time() - cleanup_start:.4f} seconds."
+        )
+
+    # older SHARK versions had to workaround gradio bugs and stored things differently
+    else:
+        image_files = [
+            filename
+            for filename in os.listdir(shark_tmp)
+            if os.path.isfile(os.path.join(shark_tmp, filename))
+            and filename.startswith("tmp")
+            and filename.endswith(".png")
+        ]
+        if len(image_files) > 0:
+            print(
+                "Clearing temporary image files of a prior run of a previous SHARK version. This may take some time..."
+            )
+            cleanup_start = time()
+            for filename in image_files:
+                os.remove(shark_tmp + filename)
+            print(
+                f"Clearing temporary image files took {time() - cleanup_start:.4f} seconds."
+            )
+        else:
+            print("No temporary images files to clear.")
+
+
+def config_tmp():
+    # create shark_tmp if it does not exist
+    if not os.path.exists(shark_tmp):
+        os.mkdir(shark_tmp)
+
+    clear_tmp_mlir()
+    clear_tmp_imgs()
--- a/benchmarks/hf_model_benchmark.py
+++ b/benchmarks/hf_model_benchmark.py
@@ -1,22 +0,0 @@
-import torch
-from shark.parser import parser
-from benchmarks.hf_transformer import SharkHFBenchmarkRunner
-
-parser.add_argument(
-    "--model_name",
-    type=str,
-    required=True,
-    help='Specifies name of HF model to benchmark. (For exmaple "microsoft/MiniLM-L12-H384-uncased"',
-)
-load_args, unknown = parser.parse_known_args()
-
-if __name__ == "__main__":
-    model_name = load_args.model_name
-    test_input = torch.randint(2, (1, 128))
-    shark_module = SharkHFBenchmarkRunner(
-        model_name, (test_input,), jit_trace=True
-    )
-    shark_module.benchmark_c()
-    shark_module.benchmark_python((test_input,))
-    shark_module.benchmark_torch(test_input)
-    shark_module.benchmark_onnx(test_input)
--- a/benchmarks/hf_transformer.py
+++ b/benchmarks/hf_transformer.py
@@ -1,181 +0,0 @@
-import torch
-from shark.shark_benchmark_runner import SharkBenchmarkRunner
-from shark.parser import shark_args
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from onnxruntime.transformers.benchmark import (
-    run_pytorch,
-    run_tensorflow,
-    run_onnxruntime,
-)
-from onnxruntime.transformers.huggingface_models import MODELS
-from onnxruntime.transformers.benchmark_helper import ConfigModifier, Precision
-import os
-import psutil
-
-
-class OnnxFusionOptions(object):
-    def __init__(self):
-        self.disable_gelu = False
-        self.disable_layer_norm = False
-        self.disable_attention = False
-        self.disable_skip_layer_norm = False
-        self.disable_embed_layer_norm = False
-        self.disable_bias_skip_layer_norm = False
-        self.disable_bias_gelu = False
-        self.enable_gelu_approximation = False
-        self.use_mask_index = False
-        self.no_attention_mask = False
-
-
-class HuggingFaceLanguage(torch.nn.Module):
-    def __init__(self, hf_model_name):
-        super().__init__()
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            hf_model_name,  # The pretrained model.
-            num_labels=2,  # The number of output labels--2 for binary classification.
-            output_attentions=False,  # Whether the model returns attentions weights.
-            output_hidden_states=False,  # Whether the model returns all hidden-states.
-            torchscript=True,
-        )
-
-    def forward(self, tokens):
-        return self.model.forward(tokens)[0]
-
-
-class SharkHFBenchmarkRunner(SharkBenchmarkRunner):
-    # SharkRunner derived class with Benchmarking capabilities.
-    def __init__(
-        self,
-        model_name: str,
-        input: tuple,
-        dynamic: bool = False,
-        device: str = None,
-        jit_trace: bool = False,
-        from_aot: bool = False,
-        frontend: str = "torch",
-    ):
-        self.device = device if device is not None else shark_args.device
-        if self.device == "gpu":
-            raise ValueError(
-                "Currently GPU Benchmarking is not supported due to OOM from ORT."
-            )
-        self.model_name = model_name
-        model = HuggingFaceLanguage(model_name)
-        SharkBenchmarkRunner.__init__(
-            self,
-            model,
-            input,
-            dynamic,
-            self.device,
-            jit_trace,
-            from_aot,
-            frontend,
-        )
-
-    def benchmark_torch(self, inputs):
-        use_gpu = self.device == "gpu"
-        # Set set the model's layer number to automatic.
-        config_modifier = ConfigModifier(None)
-        num_threads = psutil.cpu_count(logical=False)
-        batch_sizes = [inputs.shape[0]]
-        sequence_lengths = [inputs.shape[-1]]
-        cache_dir = os.path.join(".", "cache_models")
-        verbose = False
-        result = run_pytorch(
-            use_gpu,
-            [self.model_name],
-            None,
-            config_modifier,
-            Precision.FLOAT32,
-            num_threads,
-            batch_sizes,
-            sequence_lengths,
-            shark_args.num_iterations,
-            False,
-            cache_dir,
-            verbose,
-        )
-        print(
-            f"ONNX Pytorch-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
-
-    # TODO: Currently non-functional due to TF runtime error. There might be some issue with, initializing TF.
-    def benchmark_tf(self, inputs):
-        use_gpu = self.device == "gpu"
-        # Set set the model's layer number to automatic.
-        config_modifier = ConfigModifier(None)
-        num_threads = psutil.cpu_count(logical=False)
-        batch_sizes = [inputs.shape[0]]
-        sequence_lengths = [inputs.shape[-1]]
-        cache_dir = os.path.join(".", "cache_models")
-        verbose = False
-        result = run_tensorflow(
-            use_gpu,
-            [self.model_name],
-            None,
-            config_modifier,
-            Precision.FLOAT32,
-            num_threads,
-            batch_sizes,
-            sequence_lengths,
-            shark_args.num_iterations,
-            cache_dir,
-            verbose,
-        )
-        print(
-            f"ONNX TF-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
-
-    def benchmark_onnx(self, inputs):
-        if self.model_name not in MODELS:
-            print(
-                f"{self.model_name} is currently not supported in ORT's HF. Check \
-https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/huggingface_models.py \
-for currently supported models. Exiting benchmark ONNX."
-            )
-            return
-        use_gpu = self.device == "gpu"
-        num_threads = psutil.cpu_count(logical=False)
-        batch_sizes = [inputs.shape[0]]
-        sequence_lengths = [inputs.shape[-1]]
-        cache_dir = os.path.join(".", "cache_models")
-        onnx_dir = os.path.join(".", "onnx_models")
-        verbose = False
-        input_counts = [1]
-        optimize_onnx = True
-        validate_onnx = False
-        disable_ort_io_binding = False
-        use_raw_attention_mask = True
-        model_fusion_statistics = {}
-        overwrite = False
-        model_source = "pt"  # Either "pt" or "tf"
-        provider = None
-        config_modifier = ConfigModifier(None)
-        onnx_args = OnnxFusionOptions()
-        result = run_onnxruntime(
-            use_gpu,
-            provider,
-            [self.model_name],
-            None,
-            config_modifier,
-            Precision.FLOAT32,
-            num_threads,
-            batch_sizes,
-            sequence_lengths,
-            shark_args.num_iterations,
-            input_counts,
-            optimize_onnx,
-            validate_onnx,
-            cache_dir,
-            onnx_dir,
-            verbose,
-            overwrite,
-            disable_ort_io_binding,
-            use_raw_attention_mask,
-            model_fusion_statistics,
-            model_source,
-            onnx_args,
-        )
-        print(
-            f"ONNX ORT-benchmark:{result[0]['QPS']} iter/second, Total Iterations:{shark_args.num_iterations}"
-        )
--- a/benchmarks/tests/test_benchmark.py
+++ b/benchmarks/tests/test_benchmark.py
@@ -1,231 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.iree_utils._common import check_device_drivers
-
-import torch
-import tensorflow as tf
-import numpy as np
-import torchvision.models as models
-from transformers import (
-    AutoModelForSequenceClassification,
-    BertTokenizer,
-    TFBertModel,
-)
-import importlib
-import pytest
-import unittest
-
-torch.manual_seed(0)
-gpus = tf.config.experimental.list_physical_devices("GPU")
-for gpu in gpus:
-    tf.config.experimental.set_memory_growth(gpu, True)
-
-##################### Tensorflow Hugging Face LM Models ###################################
-MAX_SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-# Create a set of 2-dimensional inputs
-tf_bert_input = [
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-]
-
-
-class TFHuggingFaceLanguage(tf.Module):
-    def __init__(self, hf_model_name):
-        super(TFHuggingFaceLanguage, self).__init__()
-        # Create a BERT trainer with the created network.
-        self.m = TFBertModel.from_pretrained(hf_model_name, from_pt=True)
-
-        # Invoke the trainer model on the inputs. This causes the layer to be built.
-        self.m.predict = lambda x, y, z: self.m.call(
-            input_ids=x, attention_mask=y, token_type_ids=z, training=False
-        )
-
-    @tf.function(input_signature=tf_bert_input, jit_compile=True)
-    def forward(self, input_ids, attention_mask, token_type_ids):
-        return self.m.predict(input_ids, attention_mask, token_type_ids)
-
-
-def get_TFhf_model(name):
-    model = TFHuggingFaceLanguage(name)
-    tokenizer = BertTokenizer.from_pretrained(name)
-    text = "Replace me by any text you'd like."
-    encoded_input = tokenizer(
-        text,
-        padding="max_length",
-        truncation=True,
-        max_length=MAX_SEQUENCE_LENGTH,
-    )
-    for key in encoded_input:
-        encoded_input[key] = tf.expand_dims(
-            tf.convert_to_tensor(encoded_input[key]), 0
-        )
-    test_input = (
-        encoded_input["input_ids"],
-        encoded_input["attention_mask"],
-        encoded_input["token_type_ids"],
-    )
-    actual_out = model.forward(*test_input)
-    return model, test_input, actual_out
-
-
-##################### Hugging Face LM Models ###################################
-
-
-class HuggingFaceLanguage(torch.nn.Module):
-    def __init__(self, hf_model_name):
-        super().__init__()
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            hf_model_name,  # The pretrained model.
-            num_labels=2,  # The number of output labels--2 for binary classification.
-            output_attentions=False,  # Whether the model returns attentions weights.
-            output_hidden_states=False,  # Whether the model returns all hidden-states.
-            torchscript=True,
-        )
-
-    def forward(self, tokens):
-        return self.model.forward(tokens)[0]
-
-
-def get_hf_model(name):
-    model = HuggingFaceLanguage(name)
-    # TODO: Currently the test input is set to (1,128)
-    test_input = torch.randint(2, (1, 128))
-    actual_out = model(test_input)
-    return model, test_input, actual_out
-
-
-################################################################################
-
-##################### Torch Vision Models    ###################################
-
-
-class VisionModule(torch.nn.Module):
-    def __init__(self, model):
-        super().__init__()
-        self.model = model
-        self.train(False)
-
-    def forward(self, input):
-        return self.model.forward(input)
-
-
-def get_vision_model(torch_model):
-    model = VisionModule(torch_model)
-    # TODO: Currently the test input is set to (1,128)
-    test_input = torch.randn(1, 3, 224, 224)
-    actual_out = model(test_input)
-    return model, test_input, actual_out
-
-
-#############################   Benchmark Tests ####################################
-
-pytest_benchmark_param = pytest.mark.parametrize(
-    ("dynamic", "device"),
-    [
-        pytest.param(False, "cpu"),
-        # TODO: Language models are failing for dynamic case..
-        pytest.param(True, "cpu", marks=pytest.mark.skip),
-        pytest.param(
-            False,
-            "cuda",
-            marks=pytest.mark.skipif(
-                check_device_drivers("cuda"), reason="nvidia-smi not found"
-            ),
-        ),
-        pytest.param(True, "cuda", marks=pytest.mark.skip),
-        pytest.param(
-            False,
-            "vulkan",
-            marks=pytest.mark.skipif(
-                check_device_drivers("vulkan"),
-                reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases",
-            ),
-        ),
-        pytest.param(
-            True,
-            "vulkan",
-            marks=pytest.mark.skipif(
-                check_device_drivers("vulkan"),
-                reason="vulkaninfo not found, install from https://github.com/KhronosGroup/MoltenVK/releases",
-            ),
-        ),
-    ],
-)
-
-
-@pytest.mark.skipif(
-    importlib.util.find_spec("iree.tools") is None,
-    reason="Cannot find tools to import TF",
-)
-@pytest_benchmark_param
-def test_bench_minilm_torch(dynamic, device):
-    model, test_input, act_out = get_hf_model(
-        "microsoft/MiniLM-L12-H384-uncased"
-    )
-    shark_module = SharkInference(
-        model,
-        (test_input,),
-        device=device,
-        dynamic=dynamic,
-        jit_trace=True,
-        benchmark_mode=True,
-    )
-    try:
-        # If becnhmarking succesful, assert success/True.
-        shark_module.compile()
-        shark_module.benchmark_all((test_input,))
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
-
-
-@pytest.mark.skipif(
-    importlib.util.find_spec("iree.tools") is None,
-    reason="Cannot find tools to import TF",
-)
-@pytest_benchmark_param
-def test_bench_distilbert(dynamic, device):
-    model, test_input, act_out = get_TFhf_model("distilbert-base-uncased")
-    shark_module = SharkInference(
-        model,
-        test_input,
-        device=device,
-        dynamic=dynamic,
-        jit_trace=True,
-        benchmark_mode=True,
-    )
-    try:
-        # If becnhmarking succesful, assert success/True.
-        shark_module.set_frontend("tensorflow")
-        shark_module.compile()
-        shark_module.benchmark_all(test_input)
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
-
-
-@pytest.mark.skip(reason="XLM Roberta too large to test.")
-@pytest_benchmark_param
-def test_bench_xlm_roberta(dynamic, device):
-    model, test_input, act_out = get_TFhf_model("xlm-roberta-base")
-    shark_module = SharkInference(
-        model,
-        test_input,
-        device=device,
-        dynamic=dynamic,
-        jit_trace=True,
-        benchmark_mode=True,
-    )
-    try:
-        # If becnhmarking succesful, assert success/True.
-        shark_module.set_frontend("tensorflow")
-        shark_module.compile()
-        shark_module.benchmark_all(test_input)
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
--- a/benchmarks/tests/test_hf_benchmark.py
+++ b/benchmarks/tests/test_hf_benchmark.py
@@ -1,45 +0,0 @@
-import torch
-from benchmarks.hf_transformer import SharkHFBenchmarkRunner
-import importlib
-import pytest
-
-torch.manual_seed(0)
-
-############################# HF Benchmark Tests ####################################
-
-# Test running benchmark module without failing.
-pytest_benchmark_param = pytest.mark.parametrize(
-    ("dynamic", "device"),
-    [
-        pytest.param(False, "cpu"),
-        # TODO: Language models are failing for dynamic case..
-        pytest.param(True, "cpu", marks=pytest.mark.skip),
-    ],
-)
-
-
-@pytest.mark.skipif(
-    importlib.util.find_spec("onnxruntime") is None,
-    reason="Cannot find ONNXRUNTIME.",
-)
-@pytest_benchmark_param
-def test_HFbench_minilm_torch(dynamic, device):
-    model_name = "bert-base-uncased"
-    test_input = torch.randint(2, (1, 128))
-    try:
-        shark_module = SharkHFBenchmarkRunner(
-            model_name,
-            (test_input,),
-            jit_trace=True,
-            dynamic=dynamic,
-            device=device,
-        )
-        shark_module.benchmark_c()
-        shark_module.benchmark_python((test_input,))
-        shark_module.benchmark_torch(test_input)
-        shark_module.benchmark_onnx(test_input)
-        # If becnhmarking succesful, assert success/True.
-        assert True
-    except Exception as e:
-        # If anything happen during benchmarking, assert False/failure.
-        assert False
--- a/cpp/.gitignore
+++ b/cpp/.gitignore
@@ -1,3 +0,0 @@
-*.mlir
-*.vmfb
-*.ini
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -1,52 +0,0 @@
-# Copyright 2022 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-cmake_minimum_required(VERSION 3.21...3.23)
-
-#-------------------------------------------------------------------------------
-# Project configuration
-#-------------------------------------------------------------------------------
-
-project(iree-samples C CXX)
-set(CMAKE_C_STANDARD 11)
-set(CMAKE_CXX_STANDARD 17)
-set_property(GLOBAL PROPERTY USE_FOLDERS ON)
-
-#-------------------------------------------------------------------------------
-# Core project dependency
-#-------------------------------------------------------------------------------
-
-message(STATUS "Fetching core IREE repo (this may take a few minutes)...")
-# Note: for log output, set -DFETCHCONTENT_QUIET=OFF,
-# see https://gitlab.kitware.com/cmake/cmake/-/issues/18238#note_440475
-
-include(FetchContent)
-
-FetchContent_Declare(
-  iree
-  GIT_REPOSITORY https://github.com/nod-ai/srt.git
-  GIT_TAG shark 
-  GIT_SUBMODULES_RECURSE OFF
-  GIT_SHALLOW OFF
-  GIT_PROGRESS ON
-  USES_TERMINAL_DOWNLOAD ON
-)
-
-# Extend module path to find MLIR CMake modules.
-list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_BINARY_DIR}/lib/cmake/mlir")
-
-# Disable core project features not needed for these out of tree samples.
-set(IREE_BUILD_TESTS OFF CACHE BOOL "" FORCE)
-set(IREE_BUILD_SAMPLES OFF CACHE BOOL "" FORCE)
-
-FetchContent_MakeAvailable(iree)
-FetchContent_GetProperties(iree SOURCE_DIR IREE_SOURCE_DIR)
-
-#-------------------------------------------------------------------------------
-# Individual samples
-#-------------------------------------------------------------------------------
-
-add_subdirectory(vulkan_gui)
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -1,82 +0,0 @@
-# SHARK C/C++ Samples
-
-These C/C++ samples can be built using CMake. The samples depend on the main
-SHARK-Runtime project's C/C++ sources, including both the runtime and the compiler. 
-
-Individual samples may require additional dependencies. Watch CMake's output
-for information about which you are missing for individual samples.
-
-On Windows we recommend using https://github.com/microsoft/vcpkg to download packages for
-your system. The general setup flow looks like
-
-*Install and activate SHARK*
-
-```bash
-source shark.venv/bin/activate #follow main repo instructions to setup your venv
-```
-
-*Install Dependencies*
-
-```bash
-vcpkg install [library] --triplet [your platform]
-vcpkg integrate install
-
-# Then pass `-DCMAKE_TOOLCHAIN_FILE=[check logs for path]` when configuring CMake
-```
-
-In Ubuntu Linux you can install
-
-```bash
-sudo apt install libsdl2-dev
-```
-
-*Build*
-```bash
-cd cpp
-cmake -GNinja -B build/
-cmake --build build/
-```
-
-*Prepare the model*
-```bash
-wget https://storage.googleapis.com/shark_tank/latest/resnet50_tf/resnet50_tf.mlir
-iree-compile --iree-input-type=auto --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --iree-llvmcpu-embedded-linker-path=`python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])'`/iree/compiler/tools/../_mlir_libs/iree-lld --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=ist/core-reproducer.mlir --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux  resnet50_tf.mlir -o resnet50_tf.vmfb
-```
-*Prepare the input*
-
-```bash
-python save_img.py
-```
-Note that this requires tensorflow, e.g.
-```bash
-python -m pip install tensorflow
-```
-
-*Run the vulkan_gui*
-```bash
-./build/vulkan_gui/iree-samples-resnet-vulkan-gui
-```
-
-## Other models
-A tool for benchmarking other models is built and can be invoked with a command like the following
-```bash
-./build/vulkan_gui/iree-vulkan-gui --module-file=path/to/.vmfb --function_input=...
-```
-see `./build/vulkan_gui/iree-vulkan-gui --help` for an explanation on the function input. For example, stable diffusion unet can be tested with the following commands:
-```bash
-wget https://storage.googleapis.com/shark_tank/quinn/stable_diff_tf/stable_diff_tf.mlir
-iree-compile --iree-input-type=auto --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux  stable_diff_tf.mlir -o stable_diff_tf.vmfb
-./build/vulkan_gui/iree-vulkan-gui --module-file=stable_diff_tf.vmfb --function_input=2x4x64x64xf32 --function_input=1xf32 --function_input=2x77x768xf32
-```
-VAE and Autoencoder are also available
-```bash
-# VAE
-wget https://storage.googleapis.com/shark_tank/quinn/stable_diff_tf/vae_tf/vae.mlir
-iree-compile --iree-input-type=auto --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux  vae.mlir -o vae.vmfb
-./build/vulkan_gui/iree-vulkan-gui --module-file=stable_diff_tf.vmfb --function_input=1x4x64x64xf32
-
-# CLIP Autoencoder
-wget https://storage.googleapis.com/shark_tank/quinn/stable_diff_tf/clip_tf/clip_autoencoder.mlir
-iree-compile --iree-input-type=auto --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=vulkan --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --iree-llvmcpu-target-cpu-features=host -iree-vulkan-target-triple=rdna2-unknown-linux  clip_autoencoder.mlir -o clip_autoencoder.vmfb
-./build/vulkan_gui/iree-vulkan-gui --module-file=stable_diff_tf.vmfb --function_input=1x77xi32 --function_input=1x77xi32
-```
--- a/cpp/dog_imagenet.jpg
+++ b/cpp/dog_imagenet.jpg
--- a/cpp/save_img.py
+++ b/cpp/save_img.py
@@ -1,18 +0,0 @@
-import numpy as np
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-
-
-def load_and_preprocess_image(fname: str):
-    image = tf.io.read_file(fname)
-    image = tf.image.decode_image(image, channels=3)
-    image = tf.image.resize(image, (224, 224))
-    image = image[tf.newaxis, :]
-    # preprocessing pipeline
-    input_tensor = tf.keras.applications.resnet50.preprocess_input(image)
-    return input_tensor
-
-
-data = load_and_preprocess_image("dog_imagenet.jpg").numpy()
-
-data.tofile("dog.bin")
--- a/cpp/vision_inference/CMakeLists.txt
+++ b/cpp/vision_inference/CMakeLists.txt
@@ -1,84 +0,0 @@
-# Copyright 2022 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-if(NOT IREE_TARGET_BACKEND_LLVM_CPU OR
-   NOT IREE_HAL_EXECUTABLE_LOADER_EMBEDDED_ELF)
-  message(STATUS "Missing LLVM backend and/or embeddded elf loader, skipping vision_inference sample")
-  return()
-endif()
-
-# vcpkg install stb
-#   tested with version 2021-09-10
-find_package(Stb)
-if(NOT Stb_FOUND)
-  message(STATUS "Could not find Stb, skipping vision inference sample")
-  return()
-endif()
-
-# Compile mnist.mlir to mnist.vmfb.
-set(_COMPILE_TOOL_EXECUTABLE $<TARGET_FILE:iree-compile>)
-set(_COMPILE_ARGS)
-list(APPEND _COMPILE_ARGS "--iree-input-type=auto")
-list(APPEND _COMPILE_ARGS "--iree-hal-target-backends=llvm-cpu")
-list(APPEND _COMPILE_ARGS "${IREE_SOURCE_DIR}/samples/models/mnist.mlir")
-list(APPEND _COMPILE_ARGS "-o")
-list(APPEND _COMPILE_ARGS "mnist.vmfb")
-add_custom_command(
-  OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/mnist.vmfb
-  COMMAND ${_COMPILE_TOOL_EXECUTABLE} ${_COMPILE_ARGS}
-  DEPENDS ${_COMPILE_TOOL_EXECUTABLE} "${IREE_SOURCE_DIR}/samples/models/mnist.mlir"
-)
-# Embed mnist.vmfb into a C file as mnist_bytecode_module_c.[h/c]
-set(_EMBED_DATA_EXECUTABLE $<TARGET_FILE:generate_embed_data>)
-set(_EMBED_ARGS)
-list(APPEND _EMBED_ARGS "--output_header=mnist_bytecode_module_c.h")
-list(APPEND _EMBED_ARGS "--output_impl=mnist_bytecode_module_c.c")
-list(APPEND _EMBED_ARGS "--identifier=iree_samples_vision_inference_mnist_bytecode_module")
-list(APPEND _EMBED_ARGS "--flatten")
-list(APPEND _EMBED_ARGS "${CMAKE_CURRENT_BINARY_DIR}/mnist.vmfb")
-add_custom_command(
-  OUTPUT "mnist_bytecode_module_c.h" "mnist_bytecode_module_c.c"
-  COMMAND ${_EMBED_DATA_EXECUTABLE} ${_EMBED_ARGS}
-  DEPENDS ${_EMBED_DATA_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/mnist.vmfb
-)
-# Define a library target for mnist_bytecode_module_c.
-add_library(iree_samples_vision_inference_mnist_bytecode_module_c OBJECT)
-target_sources(iree_samples_vision_inference_mnist_bytecode_module_c
-  PRIVATE
-    mnist_bytecode_module_c.h
-    mnist_bytecode_module_c.c
-)
-
-# Define the sample executable.
-set(_NAME "iree-run-mnist-module")
-add_executable(${_NAME} "")
-target_sources(${_NAME}
-  PRIVATE
-    "image_util.h"
-    "image_util.c"
-    "iree-run-mnist-module.c"
-)
-set_target_properties(${_NAME} PROPERTIES OUTPUT_NAME "iree-run-mnist-module")
-target_include_directories(${_NAME} PUBLIC
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
-)
-target_include_directories(${_NAME} PRIVATE
-    ${Stb_INCLUDE_DIR}
-)
-target_link_libraries(${_NAME}
-  iree_base_base
-  iree_base_tracing
-  iree_hal_hal
-  iree_runtime_runtime
-  iree_samples_vision_inference_mnist_bytecode_module_c
-)
-
-# Define a target that copies the test image into the build directory.
-add_custom_target(iree_samples_vision_inference_test_image
-  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/mnist_test.png" "${CMAKE_CURRENT_BINARY_DIR}/mnist_test.png")
-add_dependencies(${_NAME} iree_samples_vision_inference_test_image)
-
-message(STATUS "Configured vision_inference sample successfully")
--- a/cpp/vision_inference/README.md
+++ b/cpp/vision_inference/README.md
@@ -1,8 +0,0 @@
-# Vision Inference Sample (C code)
-
-This sample demonstrates how to run a MNIST handwritten digit detection vision
-model on an image using IREE's C API.
-
-A similar sample is implemented using a Python script and IREE's command line
-tools over in the primary iree repository at
-https://github.com/iree-org/iree/tree/main/samples/vision_inference
--- a/cpp/vision_inference/image_util.c
+++ b/cpp/vision_inference/image_util.c
@@ -1,224 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#include "image_util.h"
-
-#include <math.h>
-
-#include "iree/base/internal/flags.h"
-#include "iree/base/tracing.h"
-
-#define STB_IMAGE_IMPLEMENTATION
-#include "stb_image.h"
-
-iree_status_t iree_tools_utils_pixel_rescaled_to_buffer(
-    const uint8_t* pixel_data, iree_host_size_t buffer_length,
-    const float* input_range, iree_host_size_t range_length,
-    float* out_buffer) {
-  IREE_TRACE_ZONE_BEGIN(z0);
-  if (range_length != 2) {
-    IREE_TRACE_ZONE_END(z0);
-    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                            "range defined as 2-element [min, max] array.");
-  }
-  float input_scale = fabsf(input_range[1] - input_range[0]) / 2.0f;
-  float input_offset = (input_range[0] + input_range[1]) / 2.0f;
-  const float kUint8Mean = 127.5f;
-  for (int i = 0; i < buffer_length; ++i) {
-    out_buffer[i] =
-        (((float)(pixel_data[i])) - kUint8Mean) / kUint8Mean * input_scale +
-        input_offset;
-  }
-  IREE_TRACE_ZONE_END(z0);
-  return iree_ok_status();
-}
-
-iree_status_t iree_tools_utils_load_pixel_data_impl(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    uint8_t** out_pixel_data, iree_host_size_t* out_buffer_length) {
-  int img_dims[3];
-  if (stbi_info(filename.data, img_dims, &(img_dims[1]), &(img_dims[2])) == 0) {
-    return iree_make_status(IREE_STATUS_NOT_FOUND, "can't load image %.*s",
-                            (int)filename.size, filename.data);
-  }
-  if (!(element_type == IREE_HAL_ELEMENT_TYPE_FLOAT_32 ||
-        element_type == IREE_HAL_ELEMENT_TYPE_SINT_8 ||
-        element_type == IREE_HAL_ELEMENT_TYPE_UINT_8)) {
-    char element_type_str[16];
-    IREE_RETURN_IF_ERROR(iree_hal_format_element_type(
-        element_type, sizeof(element_type_str), element_type_str, NULL));
-    return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
-                            "element type %s not supported", element_type_str);
-  }
-  switch (shape_rank) {
-    case 2: {  // Assume tensor <height x width>
-      if (img_dims[2] != 1 || (shape[0] != img_dims[1]) ||
-          (shape[1] != img_dims[0])) {
-        return iree_make_status(
-            IREE_STATUS_INVALID_ARGUMENT,
-            "image size: %dx%dx%d, expected: %" PRIdim "x%" PRIdim, img_dims[0],
-            img_dims[1], img_dims[2], shape[1], shape[0]);
-      }
-      break;
-    }
-    case 3: {  // Assume tensor <height x width x channel>
-      if (shape[0] != img_dims[1] || shape[1] != img_dims[0] ||
-          shape[2] != img_dims[2]) {
-        return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                                "image size: %dx%dx%d, expected: %" PRIdim
-                                "x%" PRIdim "x%" PRIdim,
-                                img_dims[0], img_dims[1], img_dims[2], shape[1],
-                                shape[0], shape[2]);
-      }
-      break;
-    }
-    case 4: {  // Assume tensor <batch x height x width x channel>
-      if (shape[1] != img_dims[1] || shape[2] != img_dims[0] ||
-          shape[3] != img_dims[2]) {
-        return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                                "image size: %dx%dx%d, expected: %" PRIdim
-                                "x%" PRIdim "x%" PRIdim,
-                                img_dims[0], img_dims[1], img_dims[2], shape[2],
-                                shape[1], shape[3]);
-      }
-      break;
-    }
-    default:
-      return iree_make_status(
-          IREE_STATUS_INVALID_ARGUMENT,
-          "Input buffer shape rank %" PRIhsz " not supported", shape_rank);
-  }
-  // Drop the alpha channel if present.
-  int req_ch = (img_dims[2] >= 3) ? 3 : 0;
-  *out_pixel_data = stbi_load(filename.data, img_dims, &(img_dims[1]),
-                              &(img_dims[2]), req_ch);
-  if (*out_pixel_data == NULL) {
-    return iree_make_status(IREE_STATUS_NOT_FOUND, "can't load image %.*s",
-                            (int)filename.size, filename.data);
-  }
-  *out_buffer_length =
-      img_dims[0] * img_dims[1] * (img_dims[2] > 3 ? 3 : img_dims[2]);
-  return iree_ok_status();
-}
-
-iree_status_t iree_tools_utils_load_pixel_data(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    uint8_t** out_pixel_data, iree_host_size_t* out_buffer_length) {
-  IREE_TRACE_ZONE_BEGIN(z0);
-  iree_status_t result = iree_tools_utils_load_pixel_data_impl(
-      filename, shape, shape_rank, element_type, out_pixel_data,
-      out_buffer_length);
-  IREE_TRACE_ZONE_END(z0);
-  return result;
-}
-
-iree_status_t iree_tools_utils_buffer_view_from_image(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_allocator_t* allocator, iree_hal_buffer_view_t** out_buffer_view) {
-  IREE_TRACE_ZONE_BEGIN(z0);
-  *out_buffer_view = NULL;
-  if (element_type != IREE_HAL_ELEMENT_TYPE_SINT_8 &&
-      element_type != IREE_HAL_ELEMENT_TYPE_UINT_8) {
-    IREE_TRACE_ZONE_END(z0);
-    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                            "element type should be i8 or u8");
-  }
-
-  iree_status_t result;
-  uint8_t* pixel_data = NULL;
-  iree_host_size_t buffer_length;
-  result = iree_tools_utils_load_pixel_data(
-      filename, shape, shape_rank, element_type, &pixel_data, &buffer_length);
-  if (iree_status_is_ok(result)) {
-    iree_host_size_t element_byte =
-        iree_hal_element_dense_byte_count(element_type);
-    // SINT_8 and UINT_8 perform direct buffer wrap.
-    result = iree_hal_buffer_view_allocate_buffer(
-        allocator, shape_rank, shape, element_type,
-        IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
-        (iree_hal_buffer_params_t){
-            .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,
-            .access = IREE_HAL_MEMORY_ACCESS_READ,
-            .usage = IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE |
-                     IREE_HAL_BUFFER_USAGE_TRANSFER,
-        },
-        iree_make_const_byte_span(pixel_data, element_byte * buffer_length),
-        out_buffer_view);
-  }
-  stbi_image_free(pixel_data);
-  IREE_TRACE_ZONE_END(z0);
-  return result;
-}
-
-typedef struct iree_tools_utils_buffer_view_load_params_t {
-  const uint8_t* pixel_data;
-  iree_host_size_t pixel_data_length;
-  const float* input_range;
-  iree_host_size_t input_range_length;
-} iree_tools_utils_buffer_view_load_params_t;
-static iree_status_t iree_tools_utils_buffer_view_load_image_rescaled(
-    iree_hal_buffer_mapping_t* mapping, void* user_data) {
-  iree_tools_utils_buffer_view_load_params_t* params =
-      (iree_tools_utils_buffer_view_load_params_t*)user_data;
-  return iree_tools_utils_pixel_rescaled_to_buffer(
-      params->pixel_data, params->pixel_data_length, params->input_range,
-      params->input_range_length, (float*)mapping->contents.data);
-}
-
-iree_status_t iree_tools_utils_buffer_view_from_image_rescaled(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_allocator_t* allocator, const float* input_range,
-    iree_host_size_t input_range_length,
-    iree_hal_buffer_view_t** out_buffer_view) {
-  IREE_TRACE_ZONE_BEGIN(z0);
-  *out_buffer_view = NULL;
-  if (element_type != IREE_HAL_ELEMENT_TYPE_FLOAT_32) {
-    IREE_TRACE_ZONE_END(z0);
-    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                            "element type should be f32");
-  }
-
-  // Classic row-major image layout.
-  iree_hal_encoding_type_t encoding_type =
-      IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR;
-
-  // Load pixel data from the file into a new host memory allocation (the only
-  // interface stb_image provides). A real application would want to use the
-  // generation callback to directly decode the image into the target mapped
-  // device buffer.
-  uint8_t* pixel_data = NULL;
-  iree_host_size_t buffer_length = 0;
-  IREE_RETURN_AND_END_ZONE_IF_ERROR(
-      z0, iree_tools_utils_load_pixel_data(filename, shape, shape_rank,
-                                           element_type, &pixel_data,
-                                           &buffer_length));
-
-  iree_tools_utils_buffer_view_load_params_t params = {
-      .pixel_data = pixel_data,
-      .pixel_data_length = buffer_length,
-      .input_range = input_range,
-      .input_range_length = input_range_length,
-  };
-  iree_status_t status = iree_hal_buffer_view_generate_buffer(
-      allocator, shape_rank, shape, element_type, encoding_type,
-      (iree_hal_buffer_params_t){
-          .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL |
-                  IREE_HAL_MEMORY_TYPE_HOST_VISIBLE,
-          .usage = IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE |
-                   IREE_HAL_BUFFER_USAGE_TRANSFER |
-                   IREE_HAL_BUFFER_USAGE_MAPPING,
-      },
-      iree_tools_utils_buffer_view_load_image_rescaled, &params,
-      out_buffer_view);
-
-  stbi_image_free(pixel_data);
-  IREE_TRACE_ZONE_END(z0);
-  return status;
-}
--- a/cpp/vision_inference/image_util.h
+++ b/cpp/vision_inference/image_util.h
@@ -1,77 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef IREE_SAMPLES_VISION_INFERENCE_IMAGE_UTIL_H_
-#define IREE_SAMPLES_VISION_INFERENCE_IMAGE_UTIL_H_
-
-#include "iree/base/api.h"
-#include "iree/hal/api.h"
-#include "iree/hal/buffer_view.h"
-
-#if __cplusplus
-extern "C" {
-#endif  // __cplusplus
-
-// Loads the image at |filename| into |out_pixel_data| and sets
-// |out_buffer_length| to its length.
-//
-// The image dimension must match the width, height, and channel in|shape|,
-// while 2 <= |shape_rank| <= 4 to match the image tensor format.
-//
-// The file must be in a format supported by stb_image.h.
-// The returned |out_pixel_data| buffer must be released by the caller.
-iree_status_t iree_tools_utils_load_pixel_data(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    uint8_t** out_pixel_data, iree_host_size_t* out_buffer_length);
-
-// Parse the content in an image file in |filename| into a HAL buffer view
-// |out_buffer_view|. |out_buffer_view| properties are defined by |shape|,
-// |shape_rank|, and |element_type|, while being allocated by |allocator|.
-//
-// The |element_type| has to be SINT_8 or UINT_8. For FLOAT_32, use
-// |iree_tools_utils_buffer_view_from_image_rescaled| instead.
-//
-// The returned |out_buffer_view| must be released by the caller.
-iree_status_t iree_tools_utils_buffer_view_from_image(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_allocator_t* allocator, iree_hal_buffer_view_t** out_buffer_view);
-
-// Parse the content in an image file in |filename| into a HAL buffer view
-// |out_buffer_view|. |out_buffer_view| properties are defined by |shape|,
-// |shape_rank|, and |element_type|, while being allocated by |allocator|.
-// The value in |out_buffer_view| is rescaled with |input_range|.
-//
-// The |element_type| has to be FLOAT_32, For SINT_8 or UINT_8, use
-// |iree_tools_utils_buffer_view_from_image| instead.
-//
-// The returned |out_buffer_view| must be released by the caller.
-iree_status_t iree_tools_utils_buffer_view_from_image_rescaled(
-    const iree_string_view_t filename, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_allocator_t* allocator, const float* input_range,
-    iree_host_size_t input_range_length,
-    iree_hal_buffer_view_t** out_buffer_view);
-
-// Normalize uint8_t |pixel_data| of the size |buffer_length| to float buffer
-// |out_buffer| with the range |input_range|.
-//
-// float32_x = (uint8_x - 127.5) / 127.5 * input_scale + input_offset, where
-// input_scale = abs(|input_range[0]| - |input_range[1]| / 2
-// input_offset = |input_range[0]| + |input_range[1]| / 2
-//
-// |out_buffer| needs to be allocated before the call.
-iree_status_t iree_tools_utils_pixel_rescaled_to_buffer(
-    const uint8_t* pixel_data, iree_host_size_t pixel_count,
-    const float* input_range, iree_host_size_t input_range_length,
-    float* out_buffer);
-
-#if __cplusplus
-}
-#endif  // __cplusplus
-
-#endif  // IREE_SAMPLES_VISION_INFERENCE_IMAGE_UTIL_H_
--- a/cpp/vision_inference/iree-run-mnist-module.c
+++ b/cpp/vision_inference/iree-run-mnist-module.c
@@ -1,121 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-// This sample uses image_util to load a hand-written image as an
-// iree_hal_buffer_view_t then passes it to the bytecode module built from
-// mnist.mlir on the CPU backend with the local-task driver.
-
-#include <float.h>
-
-#include "image_util.h"
-#include "iree/runtime/api.h"
-#include "mnist_bytecode_module_c.h"
-
-iree_status_t Run(const iree_string_view_t image_path) {
-  iree_runtime_instance_options_t instance_options;
-  iree_runtime_instance_options_initialize(IREE_API_VERSION_LATEST,
-                                           &instance_options);
-  iree_runtime_instance_options_use_all_available_drivers(&instance_options);
-  iree_runtime_instance_t* instance = NULL;
-  IREE_RETURN_IF_ERROR(iree_runtime_instance_create(
-      &instance_options, iree_allocator_system(), &instance));
-
-  // TODO(#5724): move device selection into the compiled modules.
-  iree_hal_device_t* device = NULL;
-  IREE_RETURN_IF_ERROR(iree_runtime_instance_try_create_default_device(
-      instance, iree_make_cstring_view("local-task"), &device));
-
-  // Create one session per loaded module to hold the module state.
-  iree_runtime_session_options_t session_options;
-  iree_runtime_session_options_initialize(&session_options);
-  iree_runtime_session_t* session = NULL;
-  IREE_RETURN_IF_ERROR(iree_runtime_session_create_with_device(
-      instance, &session_options, device,
-      iree_runtime_instance_host_allocator(instance), &session));
-  iree_hal_device_release(device);
-
-  const struct iree_file_toc_t* module_file =
-      iree_samples_vision_inference_mnist_bytecode_module_create();
-
-  IREE_RETURN_IF_ERROR(iree_runtime_session_append_bytecode_module_from_memory(
-      session, iree_make_const_byte_span(module_file->data, module_file->size),
-      iree_allocator_null()));
-
-  iree_runtime_call_t call;
-  IREE_RETURN_IF_ERROR(iree_runtime_call_initialize_by_name(
-      session, iree_make_cstring_view("module.predict"), &call));
-
-  // Prepare the input hal buffer view with image_util library.
-  // The input of the mmist model is single 28x28 pixel image as a
-  // tensor<1x28x28x1xf32>, with pixels in [0.0, 1.0].
-  iree_hal_buffer_view_t* buffer_view = NULL;
-  iree_hal_dim_t buffer_shape[] = {1, 28, 28, 1};
-  iree_hal_element_type_t hal_element_type = IREE_HAL_ELEMENT_TYPE_FLOAT_32;
-  float input_range[2] = {0.0f, 1.0f};
-  IREE_RETURN_IF_ERROR(
-      iree_tools_utils_buffer_view_from_image_rescaled(
-          image_path, buffer_shape, IREE_ARRAYSIZE(buffer_shape),
-          hal_element_type, iree_hal_device_allocator(device), input_range,
-          IREE_ARRAYSIZE(input_range), &buffer_view),
-      "load image");
-  IREE_RETURN_IF_ERROR(
-      iree_runtime_call_inputs_push_back_buffer_view(&call, buffer_view));
-  iree_hal_buffer_view_release(buffer_view);
-
-  IREE_RETURN_IF_ERROR(iree_runtime_call_invoke(&call, /*flags=*/0));
-
-  // Get the result buffers from the invocation.
-  iree_hal_buffer_view_t* ret_buffer_view = NULL;
-  IREE_RETURN_IF_ERROR(
-      iree_runtime_call_outputs_pop_front_buffer_view(&call, &ret_buffer_view));
-
-  // Read back the results. The output of the mnist model is a 1x10 prediction
-  // confidence values for each digit in [0, 9].
-  float predictions[1 * 10] = {0.0f};
-  IREE_RETURN_IF_ERROR(iree_hal_device_transfer_d2h(
-      iree_runtime_session_device(session),
-      iree_hal_buffer_view_buffer(ret_buffer_view), 0, predictions,
-      sizeof(predictions), IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT,
-      iree_infinite_timeout()));
-  iree_hal_buffer_view_release(ret_buffer_view);
-
-  // Get the highest index from the output.
-  float result_val = FLT_MIN;
-  int result_idx = 0;
-  for (iree_host_size_t i = 0; i < IREE_ARRAYSIZE(predictions); ++i) {
-    if (predictions[i] > result_val) {
-      result_val = predictions[i];
-      result_idx = i;
-    }
-  }
-  fprintf(stdout, "Detected number: %d\n", result_idx);
-
-  iree_runtime_call_deinitialize(&call);
-  iree_runtime_session_release(session);
-  iree_runtime_instance_release(instance);
-  return iree_ok_status();
-}
-
-int main(int argc, char** argv) {
-  if (argc > 2) {
-    fprintf(stderr, "Usage: iree-run-mnist-module <image file>\n");
-    return -1;
-  }
-  iree_string_view_t image_path;
-  if (argc == 1) {
-    image_path = iree_make_cstring_view("mnist_test.png");
-  } else {
-    image_path = iree_make_cstring_view(argv[1]);
-  }
-  iree_status_t result = Run(image_path);
-  if (!iree_status_is_ok(result)) {
-    iree_status_fprint(stderr, result);
-    iree_status_ignore(result);
-    return -1;
-  }
-  iree_status_ignore(result);
-  return 0;
-}
--- a/cpp/vision_inference/mnist_test.png
+++ b/cpp/vision_inference/mnist_test.png
--- a/cpp/vulkan_gui/CMakeLists.txt
+++ b/cpp/vulkan_gui/CMakeLists.txt
@@ -1,116 +0,0 @@
-# Copyright 2022 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-if(NOT IREE_TARGET_BACKEND_VULKAN_SPIRV OR
-   NOT IREE_HAL_DRIVER_VULKAN)
-  message(STATUS "Missing Vulkan backend and/or driver, skipping vulkan_gui sample")
-  return()
-endif()
-
-# This target statically links against Vulkan.
-# One way to achieve this is by installing the Vulkan SDK from
-# https://vulkan.lunarg.com/.
-include(FindVulkan)
-if(NOT Vulkan_FOUND)
-  message(STATUS "Could not find Vulkan, skipping vulkan_gui sample")
-  return()
-endif()
-
-# vcpkg install sdl2[vulkan]
-#   tested with versions 2.0.14#4 - 2.0.22#1
-find_package(SDL2)
-if(NOT SDL2_FOUND)
-  message(STATUS "Could not find SDL2, skipping vulkan_gui sample")
-  return()
-endif()
-
-FetchContent_Declare(
-  imgui
-  GIT_REPOSITORY https://github.com/ocornut/imgui
-  GIT_TAG        master
-)
-
-FetchContent_MakeAvailable(imgui)
-
-# Dear ImGui
-set(IMGUI_DIR ${CMAKE_BINARY_DIR}/_deps/imgui-src)
-message("Looking for Imgui in ${IMGUI_DIR}")
-include_directories(${IMGUI_DIR} ${IMGUI_DIR}/backends ..)
-
-
-function(iree_vulkan_sample)
-
-  cmake_parse_arguments(
-    _RULE
-    ""
-    "NAME"
-    "SRCS"
-    ${ARGN}
-  )
-
-
-  # Define the sample executable.
-  set(_NAME "${_RULE_NAME}")
-  set(SRCS "${_RULE_SRCS}")
-  add_executable(${_NAME} "")
-  target_sources(${_NAME}
-    PRIVATE
-      ${SRCS}
-      "${IMGUI_DIR}/backends/imgui_impl_sdl.cpp"
-      "${IMGUI_DIR}/backends/imgui_impl_vulkan.cpp"
-      "${IMGUI_DIR}/imgui.cpp"
-      "${IMGUI_DIR}/imgui_draw.cpp"
-      "${IMGUI_DIR}/imgui_demo.cpp"
-      "${IMGUI_DIR}/imgui_tables.cpp"
-      "${IMGUI_DIR}/imgui_widgets.cpp"
-  )
-  set_target_properties(${_NAME} PROPERTIES OUTPUT_NAME "${_NAME}")
-  target_include_directories(${_NAME} PUBLIC
-      $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
-  )
-  target_link_libraries(${_NAME}
-    SDL2::SDL2
-    Vulkan::Vulkan
-    iree_runtime_runtime
-    iree_base_internal_main
-    iree_hal_drivers_vulkan_registration_registration
-    iree_modules_hal_hal
-    iree_vm_vm
-    iree_vm_bytecode_module
-    iree_vm_cc
-    iree_tooling_vm_util_cc
-    iree_tooling_context_util
-  )
-
-  if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
-    set(_GUI_LINKOPTS "-SUBSYSTEM:CONSOLE")
-  else()
-    set(_GUI_LINKOPTS "")
-  endif()
-
-  target_link_options(${_NAME}
-    PRIVATE
-      ${_GUI_LINKOPTS}
-  )
-endfunction()
-
-iree_vulkan_sample(
-    NAME
-      iree-samples-resnet-vulkan-gui
-
-    SRCS
-      vulkan_resnet_inference_gui.cc
-)
-
-iree_vulkan_sample(
-    NAME
-      iree-vulkan-gui
-
-    SRCS
-      vulkan_inference_gui.cc
-)
-
-message(STATUS "Configured vulkan_gui sample successfully")
--- a/cpp/vulkan_gui/simple_mul.mlir
+++ b/cpp/vulkan_gui/simple_mul.mlir
@@ -1,4 +0,0 @@
-func.func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> {
-  %0 = "arith.mulf"(%arg0, %arg1) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
-  return %0 : tensor<4xf32>
-}
--- a/cpp/vulkan_gui/snail_imagenet.jpg
+++ b/cpp/vulkan_gui/snail_imagenet.jpg
--- a/cpp/vulkan_gui/stb_image.h
+++ b/cpp/vulkan_gui/stb_image.h
--- a/cpp/vulkan_gui/vulkan_inference_gui.cc
+++ b/cpp/vulkan_gui/vulkan_inference_gui.cc
@@ -1,957 +0,0 @@
-// Copyright 2019 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-// Vulkan Graphics + IREE API Integration Sample.
-
-#include <SDL.h>
-#include <SDL_vulkan.h>
-#include <imgui.h>
-#include <imgui_impl_sdl.h>
-#include <imgui_impl_vulkan.h>
-#include <vulkan/vulkan.h>
-
-
-#include <cstring>
-#include <set>
-#include <vector>
-#include <fstream>
-#include <array>
-#include <cstdio>
-#include <cstdlib>
-#include <iterator>
-#include <string>
-#include <utility>
-
-#include "iree/hal/drivers/vulkan/api.h"
-
-// IREE's C API:
-#include "iree/base/api.h"
-#include "iree/hal/api.h"
-#include "iree/hal/drivers/vulkan/registration/driver_module.h"
-#include "iree/modules/hal/module.h"
-#include "iree/vm/api.h"
-#include "iree/vm/bytecode_module.h"
-#include "iree/vm/ref_cc.h"
-
-// iree-run-module
-#include "iree/base/internal/flags.h"
-#include "iree/base/status_cc.h"
-#include "iree/base/tracing.h"
-#include "iree/modules/hal/types.h"
-#include "iree/tooling/comparison.h"
-#include "iree/tooling/context_util.h"
-#include "iree/tooling/vm_util_cc.h"
-
-// Other dependencies (helpers, etc.)
-#include "iree/base/internal/main.h"
-
-#define IMGUI_UNLIMITED_FRAME_RATE
-
-#define STB_IMAGE_IMPLEMENTATION
-#include "stb_image.h"
-
-IREE_FLAG(string, entry_function, "",
-          "Name of a function contained in the module specified by module_file "
-          "to run.");
-
-// TODO(benvanik): move --function_input= flag into a util.
-static iree_status_t parse_function_io(iree_string_view_t flag_name,
-                                       void* storage,
-                                       iree_string_view_t value) {
-  auto* list = (std::vector<std::string>*)storage;
-  list->push_back(std::string(value.data, value.size));
-  return iree_ok_status();
-}
-static void print_function_io(iree_string_view_t flag_name, void* storage,
-                              FILE* file) {
-  auto* list = (std::vector<std::string>*)storage;
-  if (list->empty()) {
-    fprintf(file, "# --%.*s=\n", (int)flag_name.size, flag_name.data);
-  } else {
-    for (size_t i = 0; i < list->size(); ++i) {
-      fprintf(file, "--%.*s=\"%s\"\n", (int)flag_name.size, flag_name.data,
-              list->at(i).c_str());
-    }
-  }
-}
-static std::vector<std::string> FLAG_function_inputs;
-IREE_FLAG_CALLBACK(
-    parse_function_io, print_function_io, &FLAG_function_inputs, function_input,
-    "An input (a) value or (b) buffer of the format:\n"
-    "  (a) scalar value\n"
-    "     value\n"
-    "     e.g.: --function_input=\"3.14\"\n"
-    "  (b) buffer:\n"
-    "     [shape]xtype=[value]\n"
-    "     e.g.: --function_input=\"2x2xi32=1 2 3 4\"\n"
-    "Optionally, brackets may be used to separate the element values:\n"
-    "  2x2xi32=[[1 2][3 4]]\n"
-    "Raw binary files can be read to provide buffer contents:\n"
-    "  2x2xi32=@some/file.bin\n"
-    "numpy npy files (from numpy.save) can be read to provide 1+ values:\n"
-    "  @some.npy\n"
-    "Each occurrence of the flag indicates an input in the order they were\n"
-    "specified on the command line.");
-
-typedef struct iree_file_toc_t {
-  const char* name;             // the file's original name
-  char* data;             // beginning of the file
-  size_t size;                  // length of the file
-} iree_file_toc_t;
-
-bool load_file(const char* filename, char** pOut, size_t* pSize)
-{
-    FILE* f = fopen(filename, "rb");
-    if (f == NULL)
-    {
-        fprintf(stderr, "Can't open %s\n", filename);
-        return false;
-    }
-
-    fseek(f, 0L, SEEK_END);
-    *pSize = ftell(f);
-    fseek(f, 0L, SEEK_SET);
-
-    *pOut = (char*)malloc(*pSize);
-
-    size_t size = fread(*pOut, *pSize, 1, f);
-
-    fclose(f);
-
-    return size != 0;
-}
-
-static VkAllocationCallbacks* g_Allocator = NULL;
-static VkInstance g_Instance = VK_NULL_HANDLE;
-static VkPhysicalDevice g_PhysicalDevice = VK_NULL_HANDLE;
-static VkDevice g_Device = VK_NULL_HANDLE;
-static uint32_t g_QueueFamily = (uint32_t)-1;
-static VkQueue g_Queue = VK_NULL_HANDLE;
-static VkPipelineCache g_PipelineCache = VK_NULL_HANDLE;
-static VkDescriptorPool g_DescriptorPool = VK_NULL_HANDLE;
-
-static ImGui_ImplVulkanH_Window g_MainWindowData;
-static uint32_t g_MinImageCount = 2;
-static bool g_SwapChainRebuild = false;
-static int g_SwapChainResizeWidth = 0;
-static int g_SwapChainResizeHeight = 0;
-
-static void check_vk_result(VkResult err) {
-  if (err == 0) return;
-  fprintf(stderr, "VkResult: %d\n", err);
-  abort();
-}
-
-// Returns the names of the Vulkan layers used for the given IREE
-// |extensibility_set| and |features|.
-std::vector<const char*> GetIreeLayers(
-    iree_hal_vulkan_extensibility_set_t extensibility_set,
-    iree_hal_vulkan_features_t features) {
-  iree_host_size_t required_count;
-  iree_hal_vulkan_query_extensibility_set(
-      features, extensibility_set, /*string_capacity=*/0, &required_count,
-      /*out_string_values=*/NULL);
-  std::vector<const char*> layers(required_count);
-  iree_hal_vulkan_query_extensibility_set(features, extensibility_set,
-                                          layers.size(), &required_count,
-                                          layers.data());
-  return layers;
-}
-
-// Returns the names of the Vulkan extensions used for the given IREE
-// |extensibility_set| and |features|.
-std::vector<const char*> GetIreeExtensions(
-    iree_hal_vulkan_extensibility_set_t extensibility_set,
-    iree_hal_vulkan_features_t features) {
-  iree_host_size_t required_count;
-  iree_hal_vulkan_query_extensibility_set(
-      features, extensibility_set, /*string_capacity=*/0, &required_count,
-      /*out_string_values=*/NULL);
-  std::vector<const char*> extensions(required_count);
-  iree_hal_vulkan_query_extensibility_set(features, extensibility_set,
-                                          extensions.size(), &required_count,
-                                          extensions.data());
-  return extensions;
-}
-
-// Returns the names of the Vulkan extensions used for the given IREE
-// |vulkan_features|.
-std::vector<const char*> GetDeviceExtensions(
-    VkPhysicalDevice physical_device,
-    iree_hal_vulkan_features_t vulkan_features) {
-  std::vector<const char*> iree_required_extensions = GetIreeExtensions(
-      IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_REQUIRED,
-      vulkan_features);
-  std::vector<const char*> iree_optional_extensions = GetIreeExtensions(
-      IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_OPTIONAL,
-      vulkan_features);
-
-  uint32_t extension_count = 0;
-  check_vk_result(vkEnumerateDeviceExtensionProperties(
-      physical_device, nullptr, &extension_count, nullptr));
-  std::vector<VkExtensionProperties> extension_properties(extension_count);
-  check_vk_result(vkEnumerateDeviceExtensionProperties(
-      physical_device, nullptr, &extension_count, extension_properties.data()));
-
-  // Merge extensions lists, including optional and required for simplicity.
-  std::set<const char*> ext_set;
-  ext_set.insert("VK_KHR_swapchain");
-  ext_set.insert(iree_required_extensions.begin(),
-                 iree_required_extensions.end());
-  for (int i = 0; i < iree_optional_extensions.size(); ++i) {
-    const char* optional_extension = iree_optional_extensions[i];
-    for (int j = 0; j < extension_count; ++j) {
-      if (strcmp(optional_extension, extension_properties[j].extensionName) ==
-          0) {
-        ext_set.insert(optional_extension);
-        break;
-      }
-    }
-  }
-  std::vector<const char*> extensions(ext_set.begin(), ext_set.end());
-  return extensions;
-}
-
-std::vector<const char*> GetInstanceLayers(
-    iree_hal_vulkan_features_t vulkan_features) {
-  // Query the layers that IREE wants / needs.
-  std::vector<const char*> required_layers = GetIreeLayers(
-      IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_LAYERS_REQUIRED, vulkan_features);
-  std::vector<const char*> optional_layers = GetIreeLayers(
-      IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_LAYERS_OPTIONAL, vulkan_features);
-
-  // Query the layers that are available on the Vulkan ICD.
-  uint32_t layer_property_count = 0;
-  check_vk_result(
-      vkEnumerateInstanceLayerProperties(&layer_property_count, NULL));
-  std::vector<VkLayerProperties> layer_properties(layer_property_count);
-  check_vk_result(vkEnumerateInstanceLayerProperties(&layer_property_count,
-                                                     layer_properties.data()));
-
-  // Match between optional/required and available layers.
-  std::vector<const char*> layers;
-  for (const char* layer_name : required_layers) {
-    bool found = false;
-    for (const auto& layer_property : layer_properties) {
-      if (std::strcmp(layer_name, layer_property.layerName) == 0) {
-        found = true;
-        layers.push_back(layer_name);
-        break;
-      }
-    }
-    if (!found) {
-      fprintf(stderr, "Required layer %s not available\n", layer_name);
-      abort();
-    }
-  }
-  for (const char* layer_name : optional_layers) {
-    for (const auto& layer_property : layer_properties) {
-      if (std::strcmp(layer_name, layer_property.layerName) == 0) {
-        layers.push_back(layer_name);
-        break;
-      }
-    }
-  }
-
-  return layers;
-}
-
-std::vector<const char*> GetInstanceExtensions(
-    SDL_Window* window, iree_hal_vulkan_features_t vulkan_features) {
-  // Ask SDL for its list of required instance extensions.
-  uint32_t sdl_extensions_count = 0;
-  SDL_Vulkan_GetInstanceExtensions(window, &sdl_extensions_count, NULL);
-  std::vector<const char*> sdl_extensions(sdl_extensions_count);
-  SDL_Vulkan_GetInstanceExtensions(window, &sdl_extensions_count,
-                                   sdl_extensions.data());
-
-  std::vector<const char*> iree_required_extensions = GetIreeExtensions(
-      IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_EXTENSIONS_REQUIRED,
-      vulkan_features);
-  std::vector<const char*> iree_optional_extensions = GetIreeExtensions(
-      IREE_HAL_VULKAN_EXTENSIBILITY_INSTANCE_EXTENSIONS_OPTIONAL,
-      vulkan_features);
-
-  // Merge extensions lists, including optional and required for simplicity.
-  std::set<const char*> ext_set;
-  ext_set.insert(sdl_extensions.begin(), sdl_extensions.end());
-  ext_set.insert(iree_required_extensions.begin(),
-                 iree_required_extensions.end());
-  ext_set.insert(iree_optional_extensions.begin(),
-                 iree_optional_extensions.end());
-  std::vector<const char*> extensions(ext_set.begin(), ext_set.end());
-  return extensions;
-}
-
-void SetupVulkan(iree_hal_vulkan_features_t vulkan_features,
-                 const char** instance_layers, uint32_t instance_layers_count,
-                 const char** instance_extensions,
-                 uint32_t instance_extensions_count,
-                 const VkAllocationCallbacks* allocator, VkInstance* instance,
-                 uint32_t* queue_family_index,
-                 VkPhysicalDevice* physical_device, VkQueue* queue,
-                 VkDevice* device, VkDescriptorPool* descriptor_pool) {
-  VkResult err;
-
-  // Create Vulkan Instance
-  {
-    VkInstanceCreateInfo create_info = {};
-    create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
-    create_info.enabledLayerCount = instance_layers_count;
-    create_info.ppEnabledLayerNames = instance_layers;
-    create_info.enabledExtensionCount = instance_extensions_count;
-    create_info.ppEnabledExtensionNames = instance_extensions;
-    err = vkCreateInstance(&create_info, allocator, instance);
-    check_vk_result(err);
-  }
-
-  // Select GPU
-  {
-    uint32_t gpu_count;
-    err = vkEnumeratePhysicalDevices(*instance, &gpu_count, NULL);
-    check_vk_result(err);
-    IM_ASSERT(gpu_count > 0);
-
-    VkPhysicalDevice* gpus =
-        (VkPhysicalDevice*)malloc(sizeof(VkPhysicalDevice) * gpu_count);
-    err = vkEnumeratePhysicalDevices(*instance, &gpu_count, gpus);
-    check_vk_result(err);
-
-    // Use the first reported GPU for simplicity.
-    *physical_device = gpus[0];
-
-    VkPhysicalDeviceProperties properties;
-    vkGetPhysicalDeviceProperties(*physical_device, &properties);
-    fprintf(stdout, "Selected Vulkan device: '%s'\n", properties.deviceName);
-    free(gpus);
-  }
-
-  // Select queue family. We want a single queue with graphics and compute for
-  // simplicity, but we could also discover and use separate queues for each.
-  {
-    uint32_t count;
-    vkGetPhysicalDeviceQueueFamilyProperties(*physical_device, &count, NULL);
-    VkQueueFamilyProperties* queues = (VkQueueFamilyProperties*)malloc(
-        sizeof(VkQueueFamilyProperties) * count);
-    vkGetPhysicalDeviceQueueFamilyProperties(*physical_device, &count, queues);
-    for (uint32_t i = 0; i < count; i++) {
-      if (queues[i].queueFlags &
-          (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) {
-        *queue_family_index = i;
-        break;
-      }
-    }
-    free(queues);
-    IM_ASSERT(*queue_family_index != (uint32_t)-1);
-  }
-
-  // Create Logical Device (with 1 queue)
-  {
-    std::vector<const char*> device_extensions =
-        GetDeviceExtensions(*physical_device, vulkan_features);
-    const float queue_priority[] = {1.0f};
-    VkDeviceQueueCreateInfo queue_info = {};
-    queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
-    queue_info.queueFamilyIndex = *queue_family_index;
-    queue_info.queueCount = 1;
-    queue_info.pQueuePriorities = queue_priority;
-    VkDeviceCreateInfo create_info = {};
-    create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
-    create_info.queueCreateInfoCount = 1;
-    create_info.pQueueCreateInfos = &queue_info;
-    create_info.enabledExtensionCount =
-        static_cast<uint32_t>(device_extensions.size());
-    create_info.ppEnabledExtensionNames = device_extensions.data();
-
-    // Enable timeline semaphores.
-    VkPhysicalDeviceFeatures2 features2;
-    memset(&features2, 0, sizeof(features2));
-    features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
-    create_info.pNext = &features2;
-    VkPhysicalDeviceTimelineSemaphoreFeatures semaphore_features;
-    memset(&semaphore_features, 0, sizeof(semaphore_features));
-    semaphore_features.sType =
-        VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES;
-    semaphore_features.pNext = features2.pNext;
-    features2.pNext = &semaphore_features;
-    semaphore_features.timelineSemaphore = VK_TRUE;
-
-    err = vkCreateDevice(*physical_device, &create_info, allocator, device);
-    check_vk_result(err);
-    vkGetDeviceQueue(*device, *queue_family_index, 0, queue);
-  }
-
-  // Create Descriptor Pool
-  {
-    VkDescriptorPoolSize pool_sizes[] = {
-        {VK_DESCRIPTOR_TYPE_SAMPLER, 1000},
-        {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1000},
-        {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1000},
-        {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1000},
-        {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1000},
-        {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 1000},
-        {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1000},
-        {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1000},
-        {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1000},
-        {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, 1000},
-        {VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 1000}};
-    VkDescriptorPoolCreateInfo pool_info = {};
-    pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
-    pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
-    pool_info.maxSets = 1000 * IREE_ARRAYSIZE(pool_sizes);
-    pool_info.poolSizeCount = (uint32_t)IREE_ARRAYSIZE(pool_sizes);
-    pool_info.pPoolSizes = pool_sizes;
-    err =
-        vkCreateDescriptorPool(*device, &pool_info, allocator, descriptor_pool);
-    check_vk_result(err);
-  }
-}
-
-void SetupVulkanWindow(ImGui_ImplVulkanH_Window* wd,
-                       const VkAllocationCallbacks* allocator,
-                       VkInstance instance, uint32_t queue_family_index,
-                       VkPhysicalDevice physical_device, VkDevice device,
-                       VkSurfaceKHR surface, int width, int height,
-                       uint32_t min_image_count) {
-  wd->Surface = surface;
-
-  // Check for WSI support
-  VkBool32 res;
-  vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, queue_family_index,
-                                       wd->Surface, &res);
-  if (res != VK_TRUE) {
-    fprintf(stderr, "Error no WSI support on physical device 0\n");
-    exit(-1);
-  }
-
-  // Select Surface Format
-  const VkFormat requestSurfaceImageFormat[] = {
-      VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_R8G8B8A8_UNORM,
-      VK_FORMAT_B8G8R8_UNORM, VK_FORMAT_R8G8B8_UNORM};
-  const VkColorSpaceKHR requestSurfaceColorSpace =
-      VK_COLORSPACE_SRGB_NONLINEAR_KHR;
-  wd->SurfaceFormat = ImGui_ImplVulkanH_SelectSurfaceFormat(
-      physical_device, wd->Surface, requestSurfaceImageFormat,
-      (size_t)IREE_ARRAYSIZE(requestSurfaceImageFormat),
-      requestSurfaceColorSpace);
-
-  // Select Present Mode
-#ifdef IMGUI_UNLIMITED_FRAME_RATE
-  VkPresentModeKHR present_modes[] = {VK_PRESENT_MODE_MAILBOX_KHR,
-                                      VK_PRESENT_MODE_IMMEDIATE_KHR,
-                                      VK_PRESENT_MODE_FIFO_KHR};
-#else
-  VkPresentModeKHR present_modes[] = {VK_PRESENT_MODE_FIFO_KHR};
-#endif
-  wd->PresentMode = ImGui_ImplVulkanH_SelectPresentMode(
-      physical_device, wd->Surface, &present_modes[0],
-      IREE_ARRAYSIZE(present_modes));
-
-  // Create SwapChain, RenderPass, Framebuffer, etc.
-  IM_ASSERT(min_image_count >= 2);
-  ImGui_ImplVulkanH_CreateOrResizeWindow(instance, physical_device, device, wd,
-                                         queue_family_index, allocator, width,
-                                         height, min_image_count);
-
-  // Set clear color.
-  ImVec4 clear_color = ImVec4(0.45f, 0.55f, 0.60f, 1.00f);
-  memcpy(&wd->ClearValue.color.float32[0], &clear_color, 4 * sizeof(float));
-}
-
-void RenderFrame(ImGui_ImplVulkanH_Window* wd, VkDevice device, VkQueue queue) {
-  VkResult err;
-
-  VkSemaphore image_acquired_semaphore =
-      wd->FrameSemaphores[wd->SemaphoreIndex].ImageAcquiredSemaphore;
-  VkSemaphore render_complete_semaphore =
-      wd->FrameSemaphores[wd->SemaphoreIndex].RenderCompleteSemaphore;
-  err = vkAcquireNextImageKHR(device, wd->Swapchain, UINT64_MAX,
-                              image_acquired_semaphore, VK_NULL_HANDLE,
-                              &wd->FrameIndex);
-  check_vk_result(err);
-
-  ImGui_ImplVulkanH_Frame* fd = &wd->Frames[wd->FrameIndex];
-  {
-    err = vkWaitForFences(
-        device, 1, &fd->Fence, VK_TRUE,
-        UINT64_MAX);  // wait indefinitely instead of periodically checking
-    check_vk_result(err);
-
-    err = vkResetFences(device, 1, &fd->Fence);
-    check_vk_result(err);
-  }
-  {
-    err = vkResetCommandPool(device, fd->CommandPool, 0);
-    check_vk_result(err);
-    VkCommandBufferBeginInfo info = {};
-    info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
-    info.flags |= VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
-    err = vkBeginCommandBuffer(fd->CommandBuffer, &info);
-    check_vk_result(err);
-  }
-  {
-    VkRenderPassBeginInfo info = {};
-    info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
-    info.renderPass = wd->RenderPass;
-    info.framebuffer = fd->Framebuffer;
-    info.renderArea.extent.width = wd->Width;
-    info.renderArea.extent.height = wd->Height;
-    info.clearValueCount = 1;
-    info.pClearValues = &wd->ClearValue;
-    vkCmdBeginRenderPass(fd->CommandBuffer, &info, VK_SUBPASS_CONTENTS_INLINE);
-  }
-
-  // Record Imgui Draw Data and draw funcs into command buffer
-  ImGui_ImplVulkan_RenderDrawData(ImGui::GetDrawData(), fd->CommandBuffer);
-
-  // Submit command buffer
-  vkCmdEndRenderPass(fd->CommandBuffer);
-  {
-    VkPipelineStageFlags wait_stage =
-        VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
-    VkSubmitInfo info = {};
-    info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-    info.waitSemaphoreCount = 1;
-    info.pWaitSemaphores = &image_acquired_semaphore;
-    info.pWaitDstStageMask = &wait_stage;
-    info.commandBufferCount = 1;
-    info.pCommandBuffers = &fd->CommandBuffer;
-    info.signalSemaphoreCount = 1;
-    info.pSignalSemaphores = &render_complete_semaphore;
-
-    err = vkEndCommandBuffer(fd->CommandBuffer);
-    check_vk_result(err);
-    err = vkQueueSubmit(queue, 1, &info, fd->Fence);
-    check_vk_result(err);
-  }
-}
-
-void PresentFrame(ImGui_ImplVulkanH_Window* wd, VkQueue queue) {
-  VkSemaphore render_complete_semaphore =
-      wd->FrameSemaphores[wd->SemaphoreIndex].RenderCompleteSemaphore;
-  VkPresentInfoKHR info = {};
-  info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
-  info.waitSemaphoreCount = 1;
-  info.pWaitSemaphores = &render_complete_semaphore;
-  info.swapchainCount = 1;
-  info.pSwapchains = &wd->Swapchain;
-  info.pImageIndices = &wd->FrameIndex;
-  VkResult err = vkQueuePresentKHR(queue, &info);
-  check_vk_result(err);
-  wd->SemaphoreIndex =
-      (wd->SemaphoreIndex + 1) %
-      wd->ImageCount;  // Now we can use the next set of semaphores
-}
-
-static void CleanupVulkan() {
-  vkDestroyDescriptorPool(g_Device, g_DescriptorPool, g_Allocator);
-
-  vkDestroyDevice(g_Device, g_Allocator);
-  vkDestroyInstance(g_Instance, g_Allocator);
-}
-
-static void CleanupVulkanWindow() {
-  ImGui_ImplVulkanH_DestroyWindow(g_Instance, g_Device, &g_MainWindowData,
-                                  g_Allocator);
-}
-
-namespace iree {
-
-extern "C" int iree_main(int argc, char** argv) {
-
-  iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_DEFAULT, &argc, &argv);
-  if (argc > 1) {
-    // Avoid iree-run-module spinning endlessly on stdin if the user uses single
-    // dashes for flags.
-    printf(
-        "[ERROR] unexpected positional argument (expected none)."
-        " Did you use pass a flag with a single dash ('-')?"
-        " Use '--' instead.\n");
-    return 1;
-  }
-
-  // --------------------------------------------------------------------------
-  // Create a window.
-  if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_TIMER) != 0) {
-    fprintf(stderr, "Failed to initialize SDL\n");
-    abort();
-    return 1;
-  }
-
-  // Setup window
-  // clang-format off
-  SDL_WindowFlags window_flags = (SDL_WindowFlags)(
-      SDL_WINDOW_VULKAN | SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI);
-  // clang-format on
-  SDL_Window* window = SDL_CreateWindow(
-      "IREE Samples - Vulkan Inference GUI", SDL_WINDOWPOS_CENTERED,
-      SDL_WINDOWPOS_CENTERED, 1280, 720, window_flags);
-  if (window == nullptr)
-  {
-    const char* sdl_err = SDL_GetError();
-    fprintf(stderr, "Error, SDL_CreateWindow returned: %s\n", sdl_err);
-    abort();
-    return 1;
-  }
-
-  // Setup Vulkan
-  iree_hal_vulkan_features_t iree_vulkan_features =
-      static_cast<iree_hal_vulkan_features_t>(
-          IREE_HAL_VULKAN_FEATURE_ENABLE_VALIDATION_LAYERS |
-          IREE_HAL_VULKAN_FEATURE_ENABLE_DEBUG_UTILS);
-  std::vector<const char*> layers = GetInstanceLayers(iree_vulkan_features);
-  std::vector<const char*> extensions =
-      GetInstanceExtensions(window, iree_vulkan_features);
-  SetupVulkan(iree_vulkan_features, layers.data(),
-              static_cast<uint32_t>(layers.size()), extensions.data(),
-              static_cast<uint32_t>(extensions.size()), g_Allocator,
-              &g_Instance, &g_QueueFamily, &g_PhysicalDevice, &g_Queue,
-              &g_Device, &g_DescriptorPool);
-
-  // Create Window Surface
-  VkSurfaceKHR surface;
-  VkResult err;
-  if (SDL_Vulkan_CreateSurface(window, g_Instance, &surface) == 0) {
-    fprintf(stderr, "Failed to create Vulkan surface.\n");
-    abort();
-    return 1;
-  }
-
-  // Create Framebuffers
-  int w, h;
-  SDL_GetWindowSize(window, &w, &h);
-  ImGui_ImplVulkanH_Window* wd = &g_MainWindowData;
-  SetupVulkanWindow(wd, g_Allocator, g_Instance, g_QueueFamily,
-                    g_PhysicalDevice, g_Device, surface, w, h, g_MinImageCount);
-
-  // Setup Dear ImGui context
-  IMGUI_CHECKVERSION();
-  ImGui::CreateContext();
-  ImGuiIO& io = ImGui::GetIO();
-  (void)io;
-
-  ImGui::StyleColorsDark();
-
-  // Setup Platform/Renderer bindings
-  ImGui_ImplSDL2_InitForVulkan(window);
-  ImGui_ImplVulkan_InitInfo init_info = {};
-  init_info.Instance = g_Instance;
-  init_info.PhysicalDevice = g_PhysicalDevice;
-  init_info.Device = g_Device;
-  init_info.QueueFamily = g_QueueFamily;
-  init_info.Queue = g_Queue;
-  init_info.PipelineCache = g_PipelineCache;
-  init_info.DescriptorPool = g_DescriptorPool;
-  init_info.Allocator = g_Allocator;
-  init_info.MinImageCount = g_MinImageCount;
-  init_info.ImageCount = wd->ImageCount;
-  init_info.CheckVkResultFn = check_vk_result;
-  ImGui_ImplVulkan_Init(&init_info, wd->RenderPass);
-
-  // Upload Fonts
-  {
-    // Use any command queue
-    VkCommandPool command_pool = wd->Frames[wd->FrameIndex].CommandPool;
-    VkCommandBuffer command_buffer = wd->Frames[wd->FrameIndex].CommandBuffer;
-
-    err = vkResetCommandPool(g_Device, command_pool, 0);
-    check_vk_result(err);
-    VkCommandBufferBeginInfo begin_info = {};
-    begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
-    begin_info.flags |= VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
-    err = vkBeginCommandBuffer(command_buffer, &begin_info);
-    check_vk_result(err);
-
-    ImGui_ImplVulkan_CreateFontsTexture(command_buffer);
-
-    VkSubmitInfo end_info = {};
-    end_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-    end_info.commandBufferCount = 1;
-    end_info.pCommandBuffers = &command_buffer;
-    err = vkEndCommandBuffer(command_buffer);
-    check_vk_result(err);
-    err = vkQueueSubmit(g_Queue, 1, &end_info, VK_NULL_HANDLE);
-    check_vk_result(err);
-
-    err = vkDeviceWaitIdle(g_Device);
-    check_vk_result(err);
-    ImGui_ImplVulkan_DestroyFontUploadObjects();
-  }
-
-  // Demo state.
-  bool show_iree_window = true;
-  // --------------------------------------------------------------------------
-  // Setup IREE.
-
-  // Check API version.
-  iree_api_version_t actual_version;
-  iree_status_t status =
-      iree_api_version_check(IREE_API_VERSION_LATEST, &actual_version);
-  if (iree_status_is_ok(status)) {
-    fprintf(stdout, "IREE runtime API version: %d\n", actual_version);
-  } else {
-    fprintf(stderr, "Unsupported runtime API version: %d\n", actual_version);
-    abort();
-  }
-
-  // Create a runtime Instance.
-  iree_vm_instance_t* iree_instance = nullptr;
-  IREE_CHECK_OK(
-      iree_vm_instance_create(iree_allocator_system(), &iree_instance));
-
-  // Register HAL drivers and VM module types.
-  IREE_CHECK_OK(iree_hal_vulkan_driver_module_register(
-      iree_hal_driver_registry_default()));
-  IREE_CHECK_OK(iree_hal_module_register_all_types(iree_instance));
-
-  // Create IREE Vulkan Driver and Device, sharing our VkInstance/VkDevice.
-  fprintf(stdout, "Creating Vulkan driver/device\n");
-  // Load symbols from our static `vkGetInstanceProcAddr` for IREE to use.
-  iree_hal_vulkan_syms_t* iree_vk_syms = nullptr;
-  IREE_CHECK_OK(iree_hal_vulkan_syms_create(
-      reinterpret_cast<void*>(&vkGetInstanceProcAddr), iree_allocator_system(),
-      &iree_vk_syms));
-  // Create the driver sharing our VkInstance.
-  iree_hal_driver_t* iree_vk_driver = nullptr;
-  iree_string_view_t driver_identifier = iree_make_cstring_view("vulkan");
-  iree_hal_vulkan_driver_options_t driver_options;
-  driver_options.api_version = VK_API_VERSION_1_0;
-  driver_options.requested_features = static_cast<iree_hal_vulkan_features_t>(
-      IREE_HAL_VULKAN_FEATURE_ENABLE_DEBUG_UTILS);
-  IREE_CHECK_OK(iree_hal_vulkan_driver_create_using_instance(
-      driver_identifier, &driver_options, iree_vk_syms, g_Instance,
-      iree_allocator_system(), &iree_vk_driver));
-  // Create a device sharing our VkDevice and queue.
-  // We could also create a separate (possibly low priority) compute queue for
-  // IREE, and/or provide a dedicated transfer queue.
-  iree_string_view_t device_identifier = iree_make_cstring_view("vulkan");
-  iree_hal_vulkan_queue_set_t compute_queue_set;
-  compute_queue_set.queue_family_index = g_QueueFamily;
-  compute_queue_set.queue_indices = 1 << 0;
-  iree_hal_vulkan_queue_set_t transfer_queue_set;
-  transfer_queue_set.queue_indices = 0;
-  iree_hal_device_t* iree_vk_device = nullptr;
-  IREE_CHECK_OK(iree_hal_vulkan_wrap_device(
-      device_identifier, &driver_options.device_options, iree_vk_syms,
-      g_Instance, g_PhysicalDevice, g_Device, &compute_queue_set,
-      &transfer_queue_set, iree_allocator_system(), &iree_vk_device));
-  // Create a HAL module using the HAL device.
-  iree_vm_module_t* hal_module = nullptr;
-  IREE_CHECK_OK(iree_hal_module_create(iree_instance, iree_vk_device,
-                                       IREE_HAL_MODULE_FLAG_NONE,
-                                       iree_allocator_system(), &hal_module));
-
-
-  // Load bytecode module
-  //iree_file_toc_t module_file_toc;
-  //const char network_model[] = "resnet50_tf.vmfb";
-  //fprintf(stdout, "Loading: %s\n", network_model);
-  //if (load_file(network_model, &module_file_toc.data, &module_file_toc.size) == false)
-  //{
-  //    abort();
-  //    return 1;
-  //}
-  //fprintf(stdout, "module size: %zu\n", module_file_toc.size);
-
-  iree_vm_module_t* bytecode_module = nullptr;
-  iree_status_t module_status = iree_tooling_load_module_from_flags(
-      iree_instance, iree_allocator_system(), &bytecode_module);
-  if (!iree_status_is_ok(module_status))
-    return -1;
-  //IREE_CHECK_OK(iree_vm_bytecode_module_create(
-  //    iree_instance,
-  //    iree_const_byte_span_t{
-  //        reinterpret_cast<const uint8_t*>(module_file_toc.data),
-  //        module_file_toc.size},
-  //    iree_allocator_null(), iree_allocator_system(), &bytecode_module));
-  //// Query for details about what is in the loaded module.
-  //iree_vm_module_signature_t bytecode_module_signature =
-  //    iree_vm_module_signature(bytecode_module);
-  //fprintf(stdout, "Module loaded, have <%" PRIhsz "> exported functions:\n",
-  //        bytecode_module_signature.export_function_count);
-  //for (int i = 0; i < bytecode_module_signature.export_function_count; ++i) {
-  //  iree_vm_function_t function;
-  //  IREE_CHECK_OK(iree_vm_module_lookup_function_by_ordinal(
-  //      bytecode_module, IREE_VM_FUNCTION_LINKAGE_EXPORT, i, &function));
-  //  auto function_name = iree_vm_function_name(&function);
-  //  auto function_signature = iree_vm_function_signature(&function);
-
-  //  fprintf(stdout, "  %d: '%.*s' with calling convention '%.*s'\n", i,
-  //          (int)function_name.size, function_name.data,
-  //          (int)function_signature.calling_convention.size,
-  //          function_signature.calling_convention.data);
-  //}
-
-  // Allocate a context that will hold the module state across invocations.
-  iree_vm_context_t* iree_context = nullptr;
-  std::vector<iree_vm_module_t*> modules = {hal_module, bytecode_module};
-  IREE_CHECK_OK(iree_vm_context_create_with_modules(
-      iree_instance, IREE_VM_CONTEXT_FLAG_NONE, modules.size(), modules.data(),
-      iree_allocator_system(), &iree_context));
-  fprintf(stdout, "Context with modules is ready for use\n");
-
-  // Lookup the entry point function.
-  iree_vm_function_t main_function;
-  const char kMainFunctionName[] = "module.forward";
-  IREE_CHECK_OK(iree_vm_context_resolve_function(
-      iree_context,
-      iree_string_view_t{kMainFunctionName, sizeof(kMainFunctionName) - 1},
-      &main_function));
-  iree_string_view_t main_function_name = iree_vm_function_name(&main_function);
-  fprintf(stdout, "Resolved main function named '%.*s'\n",
-          (int)main_function_name.size, main_function_name.data);
-
-  // --------------------------------------------------------------------------
-
-        // Write inputs into mappable buffers.
-        iree_hal_allocator_t* allocator =
-            iree_hal_device_allocator(iree_vk_device);
-        //iree_hal_memory_type_t input_memory_type =
-        //    static_cast<iree_hal_memory_type_t>(
-        //        IREE_HAL_MEMORY_TYPE_HOST_LOCAL |
-        //        IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE);
-        //iree_hal_buffer_usage_t input_buffer_usage =
-        //    static_cast<iree_hal_buffer_usage_t>(IREE_HAL_BUFFER_USAGE_DEFAULT);
-        //iree_hal_buffer_params_t buffer_params;
-        //buffer_params.type = input_memory_type;
-        //buffer_params.usage = input_buffer_usage;
-        //buffer_params.access = IREE_HAL_MEMORY_ACCESS_READ | IREE_HAL_MEMORY_ACCESS_WRITE;
-
-       // Wrap input buffers in buffer views.
-
-        vm::ref<iree_vm_list_t> inputs;
-        iree_status_t input_status = ParseToVariantList(
-            allocator,
-            iree::span<const std::string>{FLAG_function_inputs.data(),
-                                          FLAG_function_inputs.size()},
-            iree_allocator_system(), &inputs);
-        if (!iree_status_is_ok(input_status))
-            return -1;
-        //vm::ref<iree_vm_list_t> inputs;
-        //IREE_CHECK_OK(iree_vm_list_create(/*element_type=*/nullptr, 6, iree_allocator_system(), &inputs));
-
-        //iree_hal_buffer_view_t* input0_buffer_view = nullptr;
-        //constexpr iree_hal_dim_t input_buffer_shape[] = {1, 224, 224, 3};
-        //IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer(
-        //    allocator,
-        //    /*shape_rank=*/4, /*shape=*/input_buffer_shape,
-        //    IREE_HAL_ELEMENT_TYPE_FLOAT_32,
-        //    IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, buffer_params,
-        //    iree_make_const_byte_span(&input_res50, sizeof(input_res50)),
-        //    &input0_buffer_view));
-
-        //auto input0_buffer_view_ref = iree_hal_buffer_view_move_ref(input0_buffer_view);
-        //IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs.get(), &input0_buffer_view_ref));
-
-        // Prepare outputs list to accept results from the invocation.
-
-        vm::ref<iree_vm_list_t> outputs;
-        constexpr iree_hal_dim_t kOutputCount = 1000;
-        IREE_CHECK_OK(iree_vm_list_create(/*element_type=*/nullptr, kOutputCount * sizeof(float), iree_allocator_system(), &outputs));
-
-  // --------------------------------------------------------------------------
-
-  // Main loop.
-  bool done = false;
-  while (!done) {
-    SDL_Event event;
-
-    while (SDL_PollEvent(&event)) {
-      if (event.type == SDL_QUIT) {
-        done = true;
-      }
-
-      ImGui_ImplSDL2_ProcessEvent(&event);
-      if (event.type == SDL_QUIT) done = true;
-      if (event.type == SDL_WINDOWEVENT &&
-          event.window.event == SDL_WINDOWEVENT_RESIZED &&
-          event.window.windowID == SDL_GetWindowID(window)) {
-        g_SwapChainResizeWidth = (int)event.window.data1;
-        g_SwapChainResizeHeight = (int)event.window.data2;
-        g_SwapChainRebuild = true;
-      }
-    }
-
-    if (g_SwapChainRebuild) {
-      g_SwapChainRebuild = false;
-      ImGui_ImplVulkan_SetMinImageCount(g_MinImageCount);
-      ImGui_ImplVulkanH_CreateOrResizeWindow(
-          g_Instance, g_PhysicalDevice, g_Device, &g_MainWindowData,
-          g_QueueFamily, g_Allocator, g_SwapChainResizeWidth,
-          g_SwapChainResizeHeight, g_MinImageCount);
-      g_MainWindowData.FrameIndex = 0;
-    }
-
-    // Start the Dear ImGui frame
-    ImGui_ImplVulkan_NewFrame();
-    ImGui_ImplSDL2_NewFrame(window);
-    ImGui::NewFrame();
-
-    // Custom window.
-    {
-      ImGui::Begin("IREE Vulkan Integration Demo", &show_iree_window);
-
-      ImGui::Separator();
-
-      // ImGui Inputs for two input tensors.
-      // Run computation whenever any of the values changes.
-      static bool dirty = true;
-      if (dirty) {
-
-        // Synchronously invoke the function.
-        IREE_CHECK_OK(iree_vm_invoke(iree_context, main_function,
-                                     IREE_VM_INVOCATION_FLAG_NONE,
-                                     /*policy=*/nullptr, inputs.get(),
-                                     outputs.get(), iree_allocator_system()));
-
-
-        // we want to run continuously so we can use tools like RenderDoc, RGP, etc...
-        dirty = true;
-      }
-
-      // Framerate counter.
-      ImGui::Text("Application average %.3f ms/frame (%.1f FPS)",
-                  1000.0f / ImGui::GetIO().Framerate, ImGui::GetIO().Framerate);
-
-      ImGui::End();
-    }
-
-    // Rendering
-    ImGui::Render();
-    RenderFrame(wd, g_Device, g_Queue);
-
-    PresentFrame(wd, g_Queue);
-  }
-  // --------------------------------------------------------------------------
-
-  // --------------------------------------------------------------------------
-  // Cleanup
-  iree_vm_module_release(hal_module);
-  iree_vm_module_release(bytecode_module);
-  iree_vm_context_release(iree_context);
-  iree_hal_device_release(iree_vk_device);
-  iree_hal_allocator_release(allocator);
-  iree_hal_driver_release(iree_vk_driver);
-  iree_hal_vulkan_syms_release(iree_vk_syms);
-  iree_vm_instance_release(iree_instance);
-
-  err = vkDeviceWaitIdle(g_Device);
-  check_vk_result(err);
-  ImGui_ImplVulkan_Shutdown();
-  ImGui_ImplSDL2_Shutdown();
-  ImGui::DestroyContext();
-
-  CleanupVulkanWindow();
-  CleanupVulkan();
-
-  SDL_DestroyWindow(window);
-  SDL_Quit();
-  // --------------------------------------------------------------------------
-
-  return 0;
-}
-
-}  // namespace iree
--- a/cpp/vulkan_gui/vulkan_resnet_inference_gui.cc
+++ b/cpp/vulkan_gui/vulkan_resnet_inference_gui.cc
--- a/dataset/annotation_tool.py
+++ b/dataset/annotation_tool.py
@@ -10,7 +10,7 @@ from utils import get_datasets

 shark_root = Path(__file__).parent.parent
 demo_css = shark_root.joinpath("web/demo.css").resolve()
-nodlogo_loc = shark_root.joinpath("web/models/stable_diffusion/logos/nod-logo.png")
+nodlogo_loc = shark_root.joinpath("web/models/stable_diffusion/logos/amd-logo.jpg")


 with gr.Blocks(title="Dataset Annotation Tool", css=demo_css) as shark_web:
--- a/process_skipfiles.py
+++ b/process_skipfiles.py
@@ -5,6 +5,7 @@
 from distutils.sysconfig import get_python_lib
 import fileinput
 from pathlib import Path
+import os

 # Temporary workaround for transformers/__init__.py.
 path_to_transformers_hook = Path(
@@ -16,51 +17,16 @@ else:
    with open(path_to_transformers_hook, "w") as f:
        f.write("module_collection_mode = 'pyz+py'")

-path_to_skipfiles = Path(get_python_lib() + "/torch/_dynamo/skipfiles.py")
+paths_to_skipfiles = [Path(get_python_lib() + "/torch/_dynamo/skipfiles.py"), Path(get_python_lib() + "/torch/_dynamo/trace_rules.py")]

-modules_to_comment = ["abc,", "os,", "posixpath,", "_collections_abc,"]
-startMonitoring = 0
-for line in fileinput.input(path_to_skipfiles, inplace=True):
-    if "SKIP_DIRS = " in line:
-        startMonitoring = 1
-        print(line, end="")
-    elif startMonitoring in [1, 2]:
-        if "]" in line:
-            startMonitoring += 1
+for path in paths_to_skipfiles:
+    if not os.path.isfile(path):
+        continue
+    for line in fileinput.input(path, inplace=True):
+        if "[_module_dir(m) for m in BUILTIN_SKIPLIST]" in line and "x.__name__ for x in BUILTIN_SKIPLIST" not in line:
+            print(f"{line.rstrip()} + [x.__name__ for x in BUILTIN_SKIPLIST]")
+        elif "(_module_dir(m) for m in BUILTIN_SKIPLIST)" in line and "x.__name__ for x in BUILTIN_SKIPLIST" not in line:
            print(line, end="")
+            print(f"SKIP_DIRS.extend(filter(None, (x.__name__ for x in BUILTIN_SKIPLIST)))")
        else:
-            flag = True
-            for module in modules_to_comment:
-                if module in line:
-                    if not line.startswith("#"):
-                        print(f"#{line}", end="")
-                    else:
-                        print(f"{line[1:]}", end="")
-                    flag = False
-                    break
-            if flag:
-                print(line, end="")
-    else:
-        print(line, end="")
-
-# For getting around scikit-image's packaging, laze_loader has had a patch merged but yet to be released.
-# Refer: https://github.com/scientific-python/lazy_loader
-path_to_lazy_loader = Path(get_python_lib() + "/lazy_loader/__init__.py")
-
-for line in fileinput.input(path_to_lazy_loader, inplace=True):
-    if 'stubfile = filename if filename.endswith("i")' in line:
-        print(
-            '    stubfile = (filename if filename.endswith("i") else f"{os.path.splitext(filename)[0]}.pyi")',
-            end="",
-        )
-    else:
-        print(line, end="")
-
-# For getting around timm's packaging.
-# Refer: https://github.com/pyinstaller/pyinstaller/issues/5673#issuecomment-808731505
-path_to_timm_activations = Path(get_python_lib() + "/timm/layers/activations_jit.py")
-for line in fileinput.input(path_to_timm_activations, inplace=True):
-    if "@torch.jit.script" in line:
-        print("@torch.jit._script_if_tracing", end="\n")
-    else:
-        print(line, end="")
+            print(line, end="")
--- a/requirements-importer-macos.txt
+++ b/requirements-importer-macos.txt
@@ -1,34 +0,0 @@
-f https://download.pytorch.org/whl/nightly/cpu/
--pre
-
-numpy
-torch
-torchvision
-
-tqdm
-
-#iree-compiler  | iree-runtime should already be installed
-
-transformers
-#jax[cpu]
-
-# tflitehub dependencies.
-Pillow
-
-# web dependecies.
-gradio
-altair
-
-# Testing and support.
-#lit
-#pyyaml
-
-#ONNX and ORT for benchmarking
-#--extra-index-url https://test.pypi.org/simple/
-#protobuf
-#coloredlogs
-#flatbuffers
-#sympy
-#psutil
-#onnx-weekly
-#ort-nightly
--- a/requirements-importer.txt
+++ b/requirements-importer.txt
@@ -1,41 +0,0 @@
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
--pre
-
-numpy>1.22.4
-pytorch-triton
-torchvision 
-tabulate
-
-tqdm
-
-#iree-compiler  | iree-runtime should already be installed
-iree-tools-xla
-
-# Modelling and JAX.
-gin-config
-transformers
-diffusers
-#jax[cpu]
-Pillow
-
-# Testing and support.
-lit
-pyyaml
-python-dateutil
-sacremoses
-sentencepiece
-
-# web dependecies.
-gradio==3.44.3
-altair
-scipy
-
-#ONNX and ORT for benchmarking
-#--extra-index-url https://test.pypi.org/simple/
-#protobuf
-#coloredlogs
-#flatbuffers
-#sympy
-#psutil
-#onnx-weekly
-#ort-nightly
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,54 +1,26 @@
-f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
-f https://openxla.github.io/iree/pip-release-links.html
+-r https://raw.githubusercontent.com/llvm/torch-mlir/main/requirements.txt
+-r https://raw.githubusercontent.com/llvm/torch-mlir/main/torchvision-requirements.txt
+-f https://download.pytorch.org/whl/nightly/cpu
+-f https://iree.dev/pip-release-links.html
 --pre

 setuptools
 wheel

-shark-turbine @ git+https://github.com/nod-ai/SHARK-Turbine#egg=shark-turbine&subdirectory=core
-turbine-models @ git+https://github.com/nod-ai/SHARK-Turbine#egg=turbine-models&subdirectory=models
-
-# SHARK Runner
-tqdm
-
-# SHARK Downloader
-google-cloud-storage
+shark-turbine @ git+https://github.com/iree-org/iree-turbine.git@main
+turbine-models @ git+https://github.com/nod-ai/SHARK-Turbine.git@merge_punet_sdxl#subdirectory=models
+diffusers @ git+https://github.com/nod-ai/diffusers@0.29.0.dev0-shark
+Pillow
+transformers==4.43.3
+ftfy
+safetensors
+py-cpuinfo
+pydantic==2.4.1 # pin until pyinstaller-hooks-contrib works with beta versions
+mpmath==1.3.0

 # Testing
 pytest
-pytest-xdist
-pytest-forked
-Pillow
-parameterized
-
-# Add transformers, diffusers and scipy since it most commonly used
-#accelerate is now required for diffusers import from ckpt.
-accelerate
-scipy
-ftfy
-gradio==4.8.0
-altair
-omegaconf
-# 0.3.2 doesn't have binaries for arm64
-safetensors==0.3.1
-opencv-python
-scikit-image
-pytorch_lightning # for runwayml models
-tk
-pywebview
-sentencepiece
-py-cpuinfo
-tiktoken # for codegen
-joblib # for langchain
-timm # for MiniGPT4
-langchain
-einops # for zoedepth
-pydantic==2.4.1 # pin until pyinstaller-hooks-contrib works with beta versions

 # Keep PyInstaller at the end. Sometimes Windows Defender flags it but most folks can continue even if it errors
 pefile
 pyinstaller
-
-# For quantized GPTQ models
-optimum
-auto_gptq
--- a/rest_api_tests/sd3api_test.py
+++ b/rest_api_tests/sd3api_test.py
@@ -0,0 +1,77 @@
+import requests
+from pydantic import BaseModel, Field
+import json
+
+
+def view_json_file(file_path):
+    content = ""
+    with open(file_path, "r") as fopen:
+        content = fopen.read()
+    return content
+
+
+# Define the URL of the REST API endpoint
+api_url = "http://127.0.0.1:8080/sdapi/v1/txt2img/"  # Replace with your actual API URL
+
+
+class GenerationInputData(BaseModel):
+    prompt: list = [""]
+    negative_prompt: list = [""]
+    hf_model_id: str | None = None
+    height: int = Field(default=512, ge=128, le=1024, multiple_of=8)
+    width: int = Field(default=512, ge=128, le=1024, multiple_of=8)
+    sampler_name: str = "EulerDiscrete"
+    cfg_scale: float = Field(default=7.5, ge=1)
+    steps: int = Field(default=20, ge=1, le=100)
+    seed: int = Field(default=-1)
+    n_iter: int = Field(default=1)
+    config: dict = None
+
+
+# Create an instance of GenerationInputData with example arguments
+data = GenerationInputData(
+    prompt=[
+        "A phoenix made of diamond, black background, dream sequence, rising from coals"
+    ],
+    negative_prompt=[
+        "cropped, cartoon, lowres, low quality, black and white, bad scan, pixelated"
+    ],
+    hf_model_id="shark_sd3.py",
+    height=512,
+    width=512,
+    sampler_name="EulerDiscrete",
+    cfg_scale=7.5,
+    steps=20,
+    seed=-1,
+    n_iter=1,
+    config=json.loads(view_json_file("../configs/sd3_phoenix_npu.json")),
+)
+
+# Convert the data to a dictionary
+data_dict = data.dict()
+
+# Optional: Define headers if needed (e.g., for authentication)
+headers = {
+    "User-Agent": "PythonTest",
+    "Accept": "*/*",
+    "Accept-Encoding": "gzip, deflate, br",
+}
+
+
+def test_post_request(url, data, headers=None):
+    try:
+        # Send a POST request to the API endpoint
+        response = requests.post(url, json=data, headers=headers)
+
+        # Print the status code and response content
+        print(f"Status Code: {response.status_code}")
+        print("Response Content:")
+        # print(response.json())  # Print the JSON response
+
+    except requests.RequestException as e:
+        # Handle any exceptions that occur during the request
+        print(f"An error occurred: {e}")
+
+
+# Run the test
+test_post_request(api_url, data_dict, headers)
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ import glob
 with open("README.md", "r", encoding="utf-8") as fh:
    long_description = fh.read()

-PACKAGE_VERSION = os.environ.get("SHARK_PACKAGE_VERSION") or "0.0.5"
+PACKAGE_VERSION = os.environ.get("SHARK_PACKAGE_VERSION") or "2.0.0"
 backend_deps = []

 setup(
--- a/setup_venv.ps1
+++ b/setup_venv.ps1
@@ -7,13 +7,13 @@
  It checks the Python version installed and installs any required build
  dependencies into a Python virtual environment.
  If that environment does not exist, it creates it.
-  
+
 .PARAMETER update-src
  git pulls latest version

 .PARAMETER force
  removes and recreates venv to force update of all dependencies
-  
+
 .EXAMPLE
  .\setup_venv.ps1 --force

@@ -39,7 +39,7 @@ if ($arguments -eq "--force"){
        Write-Host "deactivating..."
        Deactivate
    }
-    
+
    if (Test-Path .\shark.venv\) {
        Write-Host "removing and recreating venv..."
        Remove-Item .\shark.venv -Force -Recurse
@@ -87,11 +87,8 @@ if ($NULL -ne $PyVer) {py -3.11 -m venv .\shark.venv\}
 else {python -m venv .\shark.venv\}
 .\shark.venv\Scripts\activate
 python -m pip install --upgrade pip
-pip install wheel
-pip install -r requirements.txt
-pip install --pre torch-mlir torchvision torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu -f https://llvm.github.io/torch-mlir/package-index/
-pip install --upgrade -f https://nod-ai.github.io/SRT/pip-release-links.html iree-compiler iree-runtime
-Write-Host "Building SHARK..."
-pip install -e . -f https://llvm.github.io/torch-mlir/package-index/ -f https://nod-ai.github.io/SRT/pip-release-links.html
-Write-Host "Build and installation completed successfully"
+pip install https://github.com/nod-ai/SRT/releases/download/candidate-20240619.291/iree_compiler-20240619.291-cp311-cp311-win_amd64.whl https://github.com/nod-ai/SRT/releases/download/candidate-20240619.291/iree_runtime-20240619.291-cp311-cp311-win_amd64.whl
+pip install --pre -r requirements.txt
+pip install -e .
+
 Write-Host "Source your venv with ./shark.venv/Scripts/activate"
--- a/setup_venv.sh
+++ b/setup_venv.sh
@@ -49,58 +49,20 @@ Red=`tput setaf 1`
 Green=`tput setaf 2`
 Yellow=`tput setaf 3`

-# Assume no binary torch-mlir.
-# Currently available for macOS m1&intel (3.11) and Linux(3.8,3.10,3.11)
-torch_mlir_bin=false
-if [[ $(uname -s) = 'Darwin' ]]; then
-  echo "${Yellow}Apple macOS detected"
-  if [[ $(uname -m) == 'arm64' ]]; then
-    echo "${Yellow}Apple M1 Detected"
-    hash rustc 2>/dev/null
-    if [ $? -eq 0 ];then
-      echo "${Green}rustc found to compile HF tokenizers"
-    else
-      echo "${Red}Could not find rustc" >&2
-      echo "${Red}Please run:"
-      echo "${Red}curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh"
-      exit 1
-    fi
-  fi
-  echo "${Yellow}Run the following commands to setup your SSL certs for your Python version if you see SSL errors with tests"
-  echo "${Yellow}/Applications/Python\ 3.XX/Install\ Certificates.command"
-  if [ "$PYTHON_VERSION_X_Y" == "3.11" ]; then
-    torch_mlir_bin=true
-  fi
-elif [[ $(uname -s) = 'Linux' ]]; then
-  echo "${Yellow}Linux detected"
-  if [ "$PYTHON_VERSION_X_Y" == "3.8" ]  || [ "$PYTHON_VERSION_X_Y" == "3.10" ] || [ "$PYTHON_VERSION_X_Y" == "3.11" ] ; then
-    torch_mlir_bin=true
-  fi
-else
-  echo "${Red}OS not detected. Pray and Play"
-fi
-
 # Upgrade pip and install requirements.
 $PYTHON -m pip install --upgrade pip || die "Could not upgrade pip"
 $PYTHON -m pip install --upgrade -r "$TD/requirements.txt"
-if [ "$torch_mlir_bin" = true ]; then
-  if [[ $(uname -s) = 'Darwin' ]]; then
-    echo "MacOS detected. Installing torch-mlir from .whl, to avoid dependency problems with torch."
-    $PYTHON -m pip uninstall -y timm #TEMP FIX FOR MAC
-    $PYTHON -m pip install --pre --no-cache-dir torch-mlir -f https://llvm.github.io/torch-mlir/package-index/ -f https://download.pytorch.org/whl/nightly/torch/
-  else
-    $PYTHON -m pip install --pre torch-mlir -f https://llvm.github.io/torch-mlir/package-index/
-    if [ $? -eq 0 ];then
-      echo "Successfully Installed torch-mlir"
-    else
-      echo "Could not install torch-mlir" >&2
-    fi
-  fi
+if [[ $(uname -s) = 'Darwin' ]]; then
+  echo "MacOS detected. Installing torch-mlir from .whl, to avoid dependency problems with torch."
+  $PYTHON -m pip uninstall -y timm #TEMP FIX FOR MAC
+  $PYTHON -m pip install --pre --no-cache-dir torch-mlir -f https://llvm.github.io/torch-mlir/package-index/ -f https://download.pytorch.org/whl/nightly/torch/
 else
-  echo "${Red}No binaries found for Python $PYTHON_VERSION_X_Y on $(uname -s)"
-  echo "${Yello}Python 3.11 supported on macOS and 3.8,3.10 and 3.11 on Linux"
-  echo "${Red}Please build torch-mlir from source in your environment"
-  exit 1
+  $PYTHON -m pip install --pre torch-mlir -f https://llvm.github.io/torch-mlir/package-index/
+  if [ $? -eq 0 ];then
+    echo "Successfully Installed torch-mlir"
+  else
+    echo "Could not install torch-mlir" >&2
+  fi
 fi
 if [[ -z "${USE_IREE}" ]]; then
  rm .use-iree
@@ -116,40 +78,13 @@ else
  echo "Not installing a backend, please make sure to add your backend to PYTHONPATH"
 fi

-if [[ ! -z "${IMPORTER}" ]]; then
-  echo "${Yellow}Installing importer tools.."
-  if [[ $(uname -s) = 'Linux' ]]; then
-    echo "${Yellow}Linux detected.. installing Linux importer tools"
-    #Always get the importer tools from upstream IREE
-    $PYTHON -m pip install --no-warn-conflicts --upgrade -r "$TD/requirements-importer.txt" -f https://openxla.github.io/iree/pip-release-links.html --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-  elif [[ $(uname -s) = 'Darwin' ]]; then
-    echo "${Yellow}macOS detected.. installing macOS importer tools"
-    #Conda seems to have some problems installing these packages and hope they get resolved upstream.
-    $PYTHON -m pip install --no-warn-conflicts --upgrade -r "$TD/requirements-importer-macos.txt" -f ${RUNTIME} --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-  fi
-fi
-
 if [[ $(uname -s) = 'Darwin' ]]; then
  PYTORCH_URL=https://download.pytorch.org/whl/nightly/torch/
 else
  PYTORCH_URL=https://download.pytorch.org/whl/nightly/cpu/
 fi

-$PYTHON -m pip install --no-warn-conflicts -e . -f https://llvm.github.io/torch-mlir/package-index/ -f ${RUNTIME} -f ${PYTORCH_URL}
-
-if [[ $(uname -s) = 'Linux' && ! -z "${IMPORTER}" ]]; then
-  T_VER=$($PYTHON -m pip show torch | grep Version)
-  T_VER_MIN=${T_VER:14:12}
-  TV_VER=$($PYTHON -m pip show torchvision | grep Version)
-  TV_VER_MAJ=${TV_VER:9:6}
-  $PYTHON -m pip uninstall -y torchvision
-  $PYTHON -m pip install torchvision==${TV_VER_MAJ}${T_VER_MIN} --no-deps -f https://download.pytorch.org/whl/nightly/cpu/torchvision/
-  if [ $? -eq 0 ];then
-    echo "Successfully Installed torch + cu118."
-  else
-    echo "Could not install torch + cu118." >&2
-  fi
-fi
+$PYTHON -m pip install --no-warn-conflicts -e . -f ${RUNTIME} -f ${PYTORCH_URL}

 if [[ -z "${NO_BREVITAS}" ]]; then
  $PYTHON -m pip install git+https://github.com/Xilinx/brevitas.git@dev
--- a/shark/init.py
+++ b/shark/init.py
@@ -1,28 +0,0 @@
-import importlib
-import logging
-
-from torch._dynamo import register_backend
-
-log = logging.getLogger(__name__)
-
-
-@register_backend
-def shark(model, inputs, *, options):
-    try:
-        from shark.dynamo_backend.utils import SharkBackend
-    except ImportError:
-        log.exception(
-            "Unable to import SHARK - High Performance Machine Learning Distribution"
-            "Please install the right version of SHARK that matches the PyTorch version being used. "
-            "Refer to https://github.com/nod-ai/SHARK/ for details."
-        )
-        raise
-    return SharkBackend(model, inputs, options)
-
-
-def has_shark():
-    try:
-        importlib.import_module("shark")
-        return True
-    except ImportError:
-        return False
--- a/shark/backward_makefx.py
+++ b/shark/backward_makefx.py
@@ -1,78 +0,0 @@
-# Copyright 2020 The Nod Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from torch._decomp import get_decompositions
-from torch.fx.experimental.proxy_tensor import make_fx
-from torch.nn.utils import stateless
-
-from torch import fx
-import tempfile
-
-
-class MakeFxModule:
-    def __init__(self, model, inputs, labels=None, custom_inference_fn=None):
-        self.model = model
-        self.inputs = inputs
-        self.custom_inference_fn = custom_inference_fn
-        self.training_graph = None
-
-    # Doesn't replace the None type.
-    def change_fx_graph_return_to_tuple(self, fx_g: fx.GraphModule):
-        for node in fx_g.graph.nodes:
-            if node.op == "output":
-                # output nodes always have one argument
-                node_arg = node.args[0]
-                out_nodes = []
-                if isinstance(node_arg, list):
-                    # Don't return NoneType elements.
-                    for out_node in node_arg:
-                        if not isinstance(out_node, type(None)):
-                            out_nodes.append(out_node)
-                    # If there is a single tensor/element to be returned don't
-                    # a tuple for it.
-                    if len(out_nodes) == 1:
-                        node.args = out_nodes
-                    else:
-                        node.args = (tuple(out_nodes),)
-        fx_g.graph.lint()
-        fx_g.recompile()
-        return fx_g
-
-    def generate_graph(self):
-        fx_g = make_fx(
-            self.custom_inference_fn,
-            decomposition_table=get_decompositions(
-                [
-                    torch.ops.aten.embedding_dense_backward,
-                    torch.ops.aten.native_layer_norm_backward,
-                    torch.ops.aten.slice_backward,
-                    torch.ops.aten.select_backward,
-                ]
-            ),
-        )(
-            dict(self.model.named_parameters()),
-            dict(self.model.named_buffers()),
-            self.inputs,
-        )
-        fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
-        fx_g.recompile()
-        fx_g = self.change_fx_graph_return_to_tuple(fx_g)
-        ts_g = torch.jit.script(fx_g)
-        temp = tempfile.NamedTemporaryFile(
-            suffix="_shark_ts", prefix="temp_ts_"
-        )
-        ts_g.save(temp.name)
-        new_ts = torch.jit.load(temp.name)
-        self.training_graph = new_ts
--- a/shark/dynamo_backend/init.py
+++ b/shark/dynamo_backend/init.py
--- a/shark/dynamo_backend/utils.py
+++ b/shark/dynamo_backend/utils.py
@@ -1,154 +0,0 @@
-import functools
-from typing import List, Optional
-import torch
-from torch.fx.experimental.proxy_tensor import make_fx
-from torch._functorch.compile_utils import strip_overloads
-from shark.shark_inference import SharkInference
-from torch._decomp import get_decompositions
-from torch.func import functionalize
-import io
-import torch_mlir
-
-
-# TODO: Control decompositions.
-def default_decompositions():
-    return get_decompositions(
-        [
-            torch.ops.aten.embedding_dense_backward,
-            torch.ops.aten.native_layer_norm_backward,
-            torch.ops.aten.slice_backward,
-            torch.ops.aten.select_backward,
-            torch.ops.aten.norm.ScalarOpt_dim,
-            torch.ops.aten.native_group_norm,
-            torch.ops.aten.upsample_bilinear2d.vec,
-            torch.ops.aten.split.Tensor,
-            torch.ops.aten.split_with_sizes,
-            torch.ops.aten.native_layer_norm,
-            torch.ops.aten.masked_fill.Tensor,
-            torch.ops.aten.masked_fill.Scalar,
-        ]
-    )
-
-
-def _remove_nones(fx_g: torch.fx.GraphModule) -> List[int]:
-    removed_indexes = []
-    for node in fx_g.graph.nodes:
-        if node.op == "output":
-            assert (
-                len(node.args) == 1
-            ), "Output node must have a single argument"
-            node_arg = node.args[0]
-            if isinstance(node_arg, (list, tuple)):
-                node_arg = list(node_arg)
-                node_args_len = len(node_arg)
-                for i in range(node_args_len):
-                    curr_index = node_args_len - (i + 1)
-                    if node_arg[curr_index] is None:
-                        removed_indexes.append(curr_index)
-                        node_arg.pop(curr_index)
-                node.args = (tuple(node_arg),)
-                break
-
-    if len(removed_indexes) > 0:
-        fx_g.graph.lint()
-        fx_g.graph.eliminate_dead_code()
-        fx_g.recompile()
-    removed_indexes.sort()
-    return removed_indexes
-
-
-def _returns_nothing(fx_g: torch.fx.GraphModule) -> bool:
-    for node in fx_g.graph.nodes:
-        if node.op == "output":
-            assert (
-                len(node.args) == 1
-            ), "Output node must have a single argument"
-            node_arg = node.args[0]
-            if isinstance(node_arg, tuple):
-                return len(node_arg) == 0
-    return False
-
-
-def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule) -> bool:
-    """
-    Replace tuple with tuple element in functions that return one-element tuples.
-    Returns true if an unwrapping took place, and false otherwise.
-    """
-    unwrapped_tuple = False
-    for node in fx_g.graph.nodes:
-        if node.op == "output":
-            assert (
-                len(node.args) == 1
-            ), "Output node must have a single argument"
-            node_arg = node.args[0]
-            if isinstance(node_arg, tuple):
-                if len(node_arg) == 1:
-                    node.args = (node_arg[0],)
-                    unwrapped_tuple = True
-                    break
-
-    if unwrapped_tuple:
-        fx_g.graph.lint()
-        fx_g.recompile()
-    return unwrapped_tuple
-
-
-class SharkBackend:
-    def __init__(
-        self, fx_g: torch.fx.GraphModule, inputs: tuple, options: dict
-    ):
-        self.fx_g = fx_g
-        self.inputs = inputs
-        self.shark_module = None
-        self.device: str = options.get("device", "cpu")
-        self.was_unwrapped: bool = False
-        self.none_indices: list = []
-        self._modify_fx_g()
-        self.compile()
-
-    def _modify_fx_g(self):
-        self.none_indices = _remove_nones(self.fx_g)
-        self.was_unwrapped = _unwrap_single_tuple_return(self.fx_g)
-
-    def compile(self):
-        gm = make_fx(
-            functionalize(self.fx_g),
-            decomposition_table=default_decompositions(),
-        )(*self.inputs)
-        gm.graph.set_codegen(torch.fx.graph.CodeGen())
-        gm.recompile()
-        strip_overloads(gm)
-        ts_g = torch.jit.script(gm)
-        mlir_module = torch_mlir.compile(
-            ts_g, self.inputs, output_type="linalg-on-tensors"
-        )
-        bytecode_stream = io.BytesIO()
-        mlir_module.operation.write_bytecode(bytecode_stream)
-        bytecode = bytecode_stream.getvalue()
-        from shark.shark_inference import SharkInference
-
-        shark_module = SharkInference(
-            mlir_module=bytecode,
-            device=self.device,
-            mlir_dialect="tm_tensor",
-        )
-        shark_module.compile(extra_args=[])
-        self.shark_module = shark_module
-
-    def __call__(self, *inputs):
-        np_inputs = [x.contiguous().detach().cpu().numpy() for x in inputs]
-        np_outs = self.shark_module("forward", np_inputs)
-        if self.was_unwrapped:
-            np_outs = [
-                np_outs,
-            ]
-
-        if not isinstance(np_outs, list):
-            res = torch.from_numpy(np_outs)
-            return res
-
-        result = [torch.from_numpy(x) for x in np_outs]
-        for r_in in self.none_indices:
-            result.insert(r_in, None)
-        result = tuple(result)
-        return result
--- a/shark/examples/shark_dynamo/basic_examples.py
+++ b/shark/examples/shark_dynamo/basic_examples.py
@@ -1,25 +0,0 @@
-import torch
-import shark
-
-
-def foo(x, a):
-    if x.shape[0] > 3:
-        return x + a
-    else:
-        return x + 3
-
-
-shark_options = {"device": "cpu"}
-compiled = torch.compile(foo, backend="shark", options=shark_options)
-
-input = torch.ones(4)
-
-x = compiled(input, input)
-
-print(x)
-
-input = torch.ones(3)
-
-x = compiled(input, input)
-
-print(x)
--- a/shark/examples/shark_eager/dynamo_demo.ipynb
+++ b/shark/examples/shark_eager/dynamo_demo.ipynb
@@ -1,309 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": true,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/mlevental/miniconda3/envs/torch-mlir/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "# standard imports\n",
-    "import torch\n",
-    "from shark.iree_utils import get_iree_compiled_module"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "outputs": [],
-   "source": [
-    "# torch dynamo related imports\n",
-    "try:\n",
-    "    import torchdynamo\n",
-    "    from torchdynamo.optimizations.backends import create_backend\n",
-    "    from torchdynamo.optimizations.subgraph import SubGraph\n",
-    "except ModuleNotFoundError:\n",
-    "    print(\n",
-    "        \"Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo\"\n",
-    "    )\n",
-    "    exit()\n",
-    "\n",
-    "# torch-mlir imports for compiling\n",
-    "from torch_mlir import compile, OutputType"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "[TorchDynamo](https://github.com/pytorch/torchdynamo) is a compiler for PyTorch programs that uses the [frame evaluation API](https://www.python.org/dev/peps/pep-0523/) in CPython to dynamically modify Python bytecode right before it is executed. It creates this FX Graph through bytecode analysis and is designed to mix Python execution with compiled backends."
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "outputs": [],
-   "source": [
-    "def toy_example(*args):\n",
-    "    a, b = args\n",
-    "\n",
-    "    x = a / (torch.abs(a) + 1)\n",
-    "    if b.sum() < 0:\n",
-    "        b = b * -1\n",
-    "    return x * b"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "outputs": [],
-   "source": [
-    "# compiler that lowers fx_graph to through MLIR\n",
-    "def __torch_mlir(fx_graph, *args, **kwargs):\n",
-    "    assert isinstance(\n",
-    "        fx_graph, torch.fx.GraphModule\n",
-    "    ), \"Model must be an FX GraphModule.\"\n",
-    "\n",
-    "    def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule):\n",
-    "        \"\"\"Replace tuple with tuple element in functions that return one-element tuples.\"\"\"\n",
-    "\n",
-    "        for node in fx_g.graph.nodes:\n",
-    "            if node.op == \"output\":\n",
-    "                assert (\n",
-    "                    len(node.args) == 1\n",
-    "                ), \"Output node must have a single argument\"\n",
-    "                node_arg = node.args[0]\n",
-    "                if isinstance(node_arg, tuple) and len(node_arg) == 1:\n",
-    "                    node.args = (node_arg[0],)\n",
-    "        fx_g.graph.lint()\n",
-    "        fx_g.recompile()\n",
-    "        return fx_g\n",
-    "\n",
-    "    fx_graph = _unwrap_single_tuple_return(fx_graph)\n",
-    "    ts_graph = torch.jit.script(fx_graph)\n",
-    "\n",
-    "    # torchdynamo does munges the args differently depending on whether you use\n",
-    "    # the @torchdynamo.optimize decorator or the context manager\n",
-    "    if isinstance(args, tuple):\n",
-    "        args = list(args)\n",
-    "    assert isinstance(args, list)\n",
-    "    if len(args) == 1 and isinstance(args[0], list):\n",
-    "        args = args[0]\n",
-    "\n",
-    "    linalg_module = compile(\n",
-    "        ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS\n",
-    "    )\n",
-    "    callable, _ = get_iree_compiled_module(\n",
-    "        linalg_module, \"cuda\", func_name=\"forward\"\n",
-    "    )\n",
-    "\n",
-    "    def forward(*inputs):\n",
-    "        return callable(*inputs)\n",
-    "\n",
-    "    return forward"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "Simplest way to use TorchDynamo with the `torchdynamo.optimize` context manager:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Found 1 device(s).\n",
-      "Device: 0\n",
-      "  Name: NVIDIA GeForce RTX 3080\n",
-      "  Compute Capability: 8.6\n",
-      "[-0.40066046 -0.4210303   0.03225489 -0.44849953  0.10370405 -0.04422468\n",
-      "  0.33262825 -0.20109026  0.02102537 -0.24882983]\n",
-      "[-0.07824923 -0.17004533  0.06439921 -0.06163602  0.26633525 -1.1560082\n",
-      " -0.06660341  0.24227881  0.1462235  -0.32055548]\n",
-      "[-0.01464001  0.442209   -0.0607936  -0.5477967  -0.25226554 -0.08588809\n",
-      " -0.30497575  0.00061084 -0.50069696  0.2317973 ]\n",
-      "[ 0.25726247  0.39388427 -0.24093066  0.12316308 -0.01981307  0.5661146\n",
-      "  0.26199922  0.8123446  -0.01576749  0.30846444]\n",
-      "[ 0.7878203  -0.45975062 -0.29956317 -0.07032048 -0.55817443 -0.62506855\n",
-      " -1.6837492  -0.38442805  0.28220773 -1.5325156 ]\n",
-      "[ 0.07975311  0.67754704 -0.30927914  0.00347631 -0.07326564  0.01893554\n",
-      " -0.7518105  -0.03078967 -0.07623022  0.38865626]\n",
-      "[-0.7751679  -0.5841397  -0.6622711   0.18574935 -0.6049372   0.02844244\n",
-      " -0.20471913  0.3337415  -0.3619432  -0.35087156]\n",
-      "[-0.08569919 -0.10775139 -0.02338934  0.21933547 -0.46712473  0.00062137\n",
-      " -0.58207744  0.06457533  0.18276742  0.03866556]\n",
-      "[-0.2311981  -0.43036282  0.20561649 -0.10363232 -0.13248594  0.02885137\n",
-      " -0.31241602 -0.36907142  0.08861586  0.2331427 ]\n",
-      "[-0.07273526 -0.31246194 -0.24218291 -0.24145737  0.0364486   0.14382267\n",
-      " -0.00531162  0.15447603 -0.5220248  -0.09016377]\n"
-     ]
-    }
-   ],
-   "source": [
-    "with torchdynamo.optimize(__torch_mlir):\n",
-    "    for _ in range(10):\n",
-    "        print(toy_example(torch.randn(10), torch.randn(10)))"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "It can also be used through a decorator:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "outputs": [],
-   "source": [
-    "@create_backend\n",
-    "def torch_mlir(subgraph, *args, **kwargs):\n",
-    "    assert isinstance(subgraph, SubGraph), \"Model must be a dynamo SubGraph.\"\n",
-    "    return __torch_mlir(subgraph.model, *list(subgraph.example_inputs))\n",
-    "\n",
-    "\n",
-    "@torchdynamo.optimize(\"torch_mlir\")\n",
-    "def toy_example2(*args):\n",
-    "    a, b = args\n",
-    "\n",
-    "    x = a / (torch.abs(a) + 1)\n",
-    "    if b.sum() < 0:\n",
-    "        b = b * -1\n",
-    "    return x * b"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Found 1 device(s).\n",
-      "Device: 0\n",
-      "  Name: NVIDIA GeForce RTX 3080\n",
-      "  Compute Capability: 8.6\n",
-      "[-0.35494277  0.03409214 -0.02271946  0.7335942   0.03122527 -0.41881397\n",
-      " -0.6609761  -0.6418614   0.29336175 -0.01973678]\n",
-      "[-2.7246824e-01 -3.5543957e-01  6.0087401e-01 -7.4570496e-03\n",
-      " -4.2481605e-02 -5.0296803e-04  7.2928613e-01 -1.4673788e-03\n",
-      " -2.7621329e-01 -6.0995776e-02]\n",
-      "[-0.03165906  0.3889693   0.24052973  0.27279532 -0.02773128 -0.12602475\n",
-      " -1.0124422   0.5720256  -0.35437614 -0.20992722]\n",
-      "[-0.41831446  0.5525326  -0.29749998 -0.17044766  0.11804754 -0.05210691\n",
-      " -0.46145165 -0.8776549   0.10090438  0.17463352]\n",
-      "[ 0.02194221  0.20959911  0.26973712  0.12551276 -0.0020404   0.1490246\n",
-      " -0.04456685  1.1100804   0.8105744   0.6676846 ]\n",
-      "[ 0.06528181 -0.13591261  0.5370964  -0.4398162  -0.03372452  0.9691372\n",
-      " -0.01120087  0.2947028   0.4804801  -0.3324341 ]\n",
-      "[ 0.33549032 -0.23001772 -0.08681437  0.16490957 -0.11223086  0.09168988\n",
-      "  0.02403045  0.17344482  0.46406478 -0.00129451]\n",
-      "[-0.27475086  0.42384806  1.9090122  -0.41147137 -0.6888369   0.08435658\n",
-      " -0.26628923 -0.17436793 -0.8058869  -0.02582378]\n",
-      "[-0.10109414  0.08681287 -0.10055986  0.6858881   0.29267687 -0.02797117\n",
-      " -0.01425194  0.4882803   0.3551982  -0.858935  ]\n",
-      "[-0.22086617  0.524994    0.17721705 -0.03813264 -0.54570735 -0.4421502\n",
-      "  0.11938014 -0.01122053  0.39294165 -0.61770755]\n"
-     ]
-    }
-   ],
-   "source": [
-    "for _ in range(10):\n",
-    "    print(toy_example2(torch.randn(10), torch.randn(10)))"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
--- a/shark/examples/shark_eager/dynamo_demo.py
+++ b/shark/examples/shark_eager/dynamo_demo.py
@@ -1,92 +0,0 @@
-import torch
-from torch_mlir import compile, OutputType
-
-from shark.iree_utils import get_iree_compiled_module
-
-try:
-    import torchdynamo
-    from torchdynamo.optimizations.backends import create_backend
-    from torchdynamo.optimizations.subgraph import SubGraph
-except ModuleNotFoundError:
-    print(
-        "Please install TorchDynamo using pip install git+https://github.com/pytorch/torchdynamo"
-    )
-    exit()
-
-NUM_ITERS = 10
-
-
-def __torch_mlir(fx_graph, *args, **kwargs):
-    assert isinstance(
-        fx_graph, torch.fx.GraphModule
-    ), "Model must be an FX GraphModule."
-
-    def _unwrap_single_tuple_return(fx_g: torch.fx.GraphModule):
-        """Replace tuple with tuple element in functions that return one-element tuples."""
-
-        for node in fx_g.graph.nodes:
-            if node.op == "output":
-                assert (
-                    len(node.args) == 1
-                ), "Output node must have a single argument"
-                node_arg = node.args[0]
-                if isinstance(node_arg, tuple) and len(node_arg) == 1:
-                    node.args = (node_arg[0],)
-        fx_g.graph.lint()
-        fx_g.recompile()
-        return fx_g
-
-    fx_graph = _unwrap_single_tuple_return(fx_graph)
-    ts_graph = torch.jit.script(fx_graph)
-
-    if isinstance(args, tuple):
-        args = list(args)
-    assert isinstance(args, list)
-    if len(args) == 1 and isinstance(args[0], list):
-        args = args[0]
-
-    linalg_module = compile(
-        ts_graph, args, output_type=OutputType.LINALG_ON_TENSORS
-    )
-    callable, _ = get_iree_compiled_module(
-        linalg_module, "cuda", func_name="forward"
-    )
-
-    def forward(*inputs):
-        return callable(*inputs)
-
-    return forward
-
-
-def toy_example(*args):
-    a, b = args
-
-    x = a / (torch.abs(a) + 1)
-    if b.sum() < 0:
-        b = b * -1
-    return x * b
-
-
-with torchdynamo.optimize(__torch_mlir):
-    for _ in range(10):
-        print(toy_example(torch.randn(10), torch.randn(10)))
-
-
-@create_backend
-def torch_mlir(subgraph, *args, **kwargs):
-    assert isinstance(subgraph, SubGraph), "Model must be a dynamo SubGraph."
-    return __torch_mlir(subgraph.model, *list(subgraph.example_inputs))
-
-
-@torchdynamo.optimize("torch_mlir")
-def toy_example2(*args):
-    a, b = args
-
-    x = a / (torch.abs(a) + 1)
-    if b.sum() < 0:
-        b = b * -1
-    return x * b
-
-
-for _ in range(10):
-    print(toy_example2(torch.randn(10), torch.randn(10)))
--- a/shark/examples/shark_eager/eager_mode.ipynb
+++ b/shark/examples/shark_eager/eager_mode.ipynb
@@ -1,805 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/mlevental/miniconda3/envs/torch-mlir/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "# standard imports\n",
-    "import torch\n",
-    "from torch_mlir.eager_mode import torch_mlir_tensor"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "outputs": [],
-   "source": [
-    "# eager mode imports\n",
-    "from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor\n",
-    "from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "The simplest way of using Eager Mode (through IREE) requires setting a \"backend\":"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "outputs": [],
-   "source": [
-    "torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(\"cpu\")"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "and wrapping all your `torch.Tensor`s:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n"
-     ]
-    }
-   ],
-   "source": [
-    "NUM_ITERS = 10\n",
-    "\n",
-    "t = torch.ones((10, 10))\n",
-    "u = 2 * torch.ones((10, 10))\n",
-    "\n",
-    "tt = TorchMLIRTensor(t)\n",
-    "print(tt)\n",
-    "uu = TorchMLIRTensor(u)\n",
-    "print(uu)"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "`TorchMLIRTensor` is a \"tensor wrapper subclass\" (more info [here](https://github.com/albanD/subclass_zoo)) that keeps the IREE `DeviceArray` in a field `elem`:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "for i in range(NUM_ITERS):\n",
-    "    yy = tt + uu\n",
-    "    print(type(yy))\n",
-    "    print(yy.elem.to_host())\n",
-    "    yy = tt * uu\n",
-    "    print(type(yy))\n",
-    "    print(yy.elem.to_host())"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "If you have a GPU (and CUDA installed) that works too (you can verify by having `watch -n1 nvidia-smi` up in a terminal while running the next cell):"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "[[3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]\n",
-      " [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend(\"gpu\")\n",
-    "\n",
-    "t = torch.ones((10, 10))\n",
-    "u = 2 * torch.ones((10, 10))\n",
-    "\n",
-    "tt = TorchMLIRTensor(t)\n",
-    "print(tt)\n",
-    "uu = TorchMLIRTensor(u)\n",
-    "print(uu)\n",
-    "\n",
-    "yy = tt + uu\n",
-    "print(yy.elem.to_host())\n",
-    "yy = tt * uu\n",
-    "print(yy.elem.to_host())"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "There is a convenience class `SharkEagerMode` that will handle both the installation of the backend and the wrapping of `torch.Tensor`s:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# eager mode RAII\n",
-    "from shark.shark_runner import SharkEagerMode\n",
-    "\n",
-    "shark_eager_mode = SharkEagerMode(\"cpu\")\n",
-    "\n",
-    "t = torch.ones((10, 10))\n",
-    "u = torch.ones((10, 10))\n",
-    "\n",
-    "print(t)\n",
-    "print(u)\n",
-    "\n",
-    "for i in range(NUM_ITERS):\n",
-    "    yy = t + u\n",
-    "    print(type(yy))\n",
-    "    print(yy.elem.to_host())\n",
-    "    yy = t * u\n",
-    "    print(type(yy))\n",
-    "    print(yy.elem.to_host())"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  },
-  {
-   "cell_type": "markdown",
-   "source": [
-    "The `SharkEagerMode` class is a hacky take on [RAII](https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization) that defines a \"deleter\" that runs when an instantiation (of `SharkEagerMode`) is garbage collected. Takeaway is that if you want to turn off `SharkEagerMode`, or switch backends, you need to `del` the instance:"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%% md\n"
-    }
-   }
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "TorchMLIRTensor(<IREE DeviceArray: shape=[10, 10], dtype=float32>, backend=EagerModeIREELinalgOnTensorsBackend)\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]\n",
-      " [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]]\n",
-      "<class 'torch_mlir.eager_mode.torch_mlir_tensor.TorchMLIRTensor'>\n",
-      "[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]\n",
-      " [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "del shark_eager_mode\n",
-    "shark_eager_mode = SharkEagerMode(\"cuda\")\n",
-    "\n",
-    "t = torch.ones((10, 10))\n",
-    "u = torch.ones((10, 10))\n",
-    "\n",
-    "print(t)\n",
-    "print(u)\n",
-    "\n",
-    "yy = t + u\n",
-    "print(type(yy))\n",
-    "print(yy.elem.to_host())\n",
-    "yy = t * u\n",
-    "print(type(yy))\n",
-    "print(yy.elem.to_host())"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
--- a/shark/examples/shark_eager/eager_mode.py
+++ b/shark/examples/shark_eager/eager_mode.py
@@ -1,148 +0,0 @@
-# Copyright 2020 The Nod Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from torch.utils.cpp_extension import load_inline, include_paths
-from torch_mlir.eager_mode import torch_mlir_tensor
-from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
-
-from shark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
-from shark.shark_runner import SharkEagerMode
-
-
-def test_cpu():
-    torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend("cpu")
-
-    t = torch.ones((10, 10), device="cpu")
-    u = 2 * torch.ones((10, 10), device="cpu")
-
-    tt = TorchMLIRTensor(t)
-    print(tt)
-    uu = TorchMLIRTensor(u)
-    print(uu)
-
-    for i in range(NUM_ITERS):
-        yy = tt + uu
-        print(type(yy))
-        print(yy.elem.to_host())
-        yy = tt * uu
-        print(type(yy))
-        print(yy.elem.to_host())
-
-
-def test_gpu():
-    source = """
-    #include <iostream>
-    #include "cuda.h"
-    #include "cuda_runtime_api.h"
-
-    using namespace std;
-
-    void print_free_mem() {
-        int num_gpus;
-        size_t free, total;
-        cudaSetDevice(0);
-        int id;
-        cudaGetDevice(&id);
-        cudaMemGetInfo(&free, &total);
-        cout << "GPU " << id << " memory: used=" << (total-free)/(1<<20) << endl;
-    }
-    """
-    gpu_stats = load_inline(
-        name="inline_extension",
-        cpp_sources=[source],
-        extra_include_paths=include_paths(cuda=True),
-        functions=["print_free_mem"],
-    )
-    torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend("gpu")
-
-    t = torch.ones((10, 10), device="cpu")
-    u = 2 * torch.ones((10, 10), device="cpu")
-
-    tt = TorchMLIRTensor(t)
-    print(tt)
-    uu = TorchMLIRTensor(u)
-    print(uu)
-
-    for i in range(NUM_ITERS):
-        yy = tt + uu
-        print(yy.elem.to_host())
-        yy = tt * uu
-        print(yy.elem.to_host())
-        gpu_stats.print_free_mem()
-
-
-def test_python_mode_ref_backend():
-    # hide this wherever you want?
-    _ = SharkEagerMode("refbackend")
-
-    t = torch.ones((10, 10), device="cpu")
-    u = torch.ones((10, 10), device="cpu")
-
-    print(t)
-    print(u)
-
-    for i in range(NUM_ITERS):
-        print(i)
-        yy = t + u
-        print(yy.elem)
-        yy = t * u
-        print(yy.elem)
-
-
-def test_python_mode_iree_cpu():
-    # hide this wherever you want?
-    _ = SharkEagerMode("cpu")
-
-    t = torch.ones((10, 10), device="cpu")
-    u = torch.ones((10, 10), device="cpu")
-
-    print(t)
-    print(u)
-
-    for i in range(NUM_ITERS):
-        yy = t + u
-        print(type(yy))
-        print(yy.elem.to_host())
-        yy = t * u
-        print(type(yy))
-        print(yy.elem.to_host())
-
-
-def test_python_mode_iree_gpu():
-    _ = SharkEagerMode("gpu")
-
-    t = torch.ones((10, 10), device="cpu")
-    u = torch.ones((10, 10), device="cpu")
-
-    print(t)
-    print(u)
-
-    for i in range(NUM_ITERS):
-        yy = t + u
-        print(type(yy))
-        print(yy.elem.to_host())
-        yy = t * u
-        print(type(yy))
-        print(yy.elem.to_host())
-
-
-if __name__ == "__main__":
-    NUM_ITERS = 10
-    test_cpu()
-    if torch.cuda.is_available():
-        test_gpu()
-    test_python_mode_ref_backend()
-    test_python_mode_iree_cpu()
-    test_python_mode_iree_gpu()
--- a/shark/examples/shark_eager/squeezenet_lockstep.py
+++ b/shark/examples/shark_eager/squeezenet_lockstep.py
@@ -1,73 +0,0 @@
-import torch
-import numpy as np
-
-model = torch.hub.load(
-    "pytorch/vision:v0.10.0", "squeezenet1_0", pretrained=True
-)
-model.eval()
-
-# from PIL import Image
-# from torchvision import transforms
-# import urllib
-#
-# url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
-# try: urllib.URLopener().retrieve(url, filename)
-# except: urllib.request.urlretrieve(url, filename)
-#
-#
-# input_image = Image.open(filename)
-# preprocess = transforms.Compose([
-#     transforms.Resize(256),
-#     transforms.CenterCrop(224),
-#     transforms.ToTensor(),
-#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-# ])
-# input_tensor = preprocess(input_image)
-# input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model
-# print(input_batch.shape) # size = [1, 3, 224, 224]
-
-# The above is code for generating sample inputs from an image. We can just use
-# random values for accuracy testing though
-input_batch = torch.randn(1, 3, 224, 224)
-
-
-# Focus on CPU for now
-if False and torch.cuda.is_available():
-    input_batch = input_batch.to("cuda")
-    model.to("cuda")
-
-with torch.no_grad():
-    output = model(input_batch)
-# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
-golden_confidences = output[0]
-# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
-golden_probabilities = torch.nn.functional.softmax(
-    golden_confidences, dim=0
-).numpy()
-
-golden_confidences = golden_confidences.numpy()
-
-from shark.torch_mlir_lockstep_tensor import TorchMLIRLockstepTensor
-
-input_detached_clone = input_batch.clone()
-eager_input_batch = TorchMLIRLockstepTensor(input_detached_clone)
-
-print("getting torch-mlir result")
-
-output = model(eager_input_batch)
-
-static_output = output.elem
-confidences = static_output[0]
-probabilities = torch.nn.functional.softmax(
-    torch.from_numpy(confidences), dim=0
-).numpy()
-
-print("The obtained result via shark is: ", confidences)
-print("The golden result is:", golden_confidences)
-
-np.testing.assert_allclose(
-    golden_confidences, confidences, rtol=1e-02, atol=1e-03
-)
-np.testing.assert_allclose(
-    golden_probabilities, probabilities, rtol=1e-02, atol=1e-03
-)
--- a/shark/examples/shark_inference/CLIPModel_tf.py
+++ b/shark/examples/shark_inference/CLIPModel_tf.py
@@ -1,65 +0,0 @@
-from PIL import Image
-import requests
-
-from transformers import CLIPProcessor, TFCLIPModel
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-
-# Create a set of inputs
-clip_vit_inputs = [
-    tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
-    tf.TensorSpec(shape=[2, 7], dtype=tf.int32),
-    tf.TensorSpec(shape=[1, 3, 224, 224], dtype=tf.float32),
-]
-
-
-class CLIPModule(tf.Module):
-    def __init__(self):
-        super(CLIPModule, self).__init__()
-        self.m = TFCLIPModel.from_pretrained("openai/clip-vit-base-patch32")
-
-        self.m.predict = lambda x, y, z: self.m(
-            input_ids=x, attention_mask=y, pixel_values=z
-        )
-
-    @tf.function(input_signature=clip_vit_inputs, jit_compile=True)
-    def forward(self, input_ids, attention_mask, pixel_values):
-        return self.m.predict(
-            input_ids, attention_mask, pixel_values
-        ).logits_per_image
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
-
-    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
-    image = Image.open(requests.get(url, stream=True).raw)
-
-    inputs = processor(
-        text=["a photo of a cat", "a photo of a dog"],
-        images=image,
-        return_tensors="tf",
-        padding=True,
-    )
-
-    shark_module = SharkInference(
-        CLIPModule(),
-        (
-            inputs["input_ids"],
-            inputs["attention_mask"],
-            inputs["pixel_values"],
-        ),
-    )
-    shark_module.set_frontend("tensorflow")
-    shark_module.compile()
-
-    print(
-        shark_module.forward(
-            (
-                inputs["input_ids"],
-                inputs["attention_mask"],
-                inputs["pixel_values"],
-            )
-        )
-    )
--- a/shark/examples/shark_inference/ESRGAN/README.md
+++ b/shark/examples/shark_inference/ESRGAN/README.md
@@ -1,15 +0,0 @@
-## Running ESRGAN
-
-```
-1. pip install numpy opencv-python
-2. mkdir InputImages
-   (this is where all the input images will reside in)
-3. mkdir OutputImages
-   (this is where the model will generate all the images)
-4. mkdir models
-   (save the .pth checkpoint file here)
-5. python esrgan.py
-```
-
- Download [RRDB_ESRGAN_x4.pth](https://drive.google.com/drive/u/0/folders/17VYV_SoZZesU6mbxz2dMAIccSSlqLecY) and place it in the `models` directory as mentioned above in step 4.
- Credits : [ESRGAN](https://github.com/xinntao/ESRGAN)
--- a/shark/examples/shark_inference/ESRGAN/esrgan.py
+++ b/shark/examples/shark_inference/ESRGAN/esrgan.py
@@ -1,239 +0,0 @@
-from ast import arg
-import os.path as osp
-import glob
-import cv2
-import numpy as np
-import torch
-
-from torch.fx.experimental.proxy_tensor import make_fx
-from torch._decomp import get_decompositions
-from shark.shark_inference import SharkInference
-import torch_mlir
-import tempfile
-import functools
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-def make_layer(block, n_layers):
-    layers = []
-    for _ in range(n_layers):
-        layers.append(block())
-    return nn.Sequential(*layers)
-
-
-class ResidualDenseBlock_5C(nn.Module):
-    def __init__(self, nf=64, gc=32, bias=True):
-        super(ResidualDenseBlock_5C, self).__init__()
-        # gc: growth channel, i.e. intermediate channels
-        self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias)
-        self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias)
-        self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias)
-        self.conv4 = nn.Conv2d(nf + 3 * gc, gc, 3, 1, 1, bias=bias)
-        self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias=bias)
-        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
-
-        # initialization
-        # mutil.initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1)
-
-    def forward(self, x):
-        x1 = self.lrelu(self.conv1(x))
-        x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
-        x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
-        x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
-        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
-        return x5 * 0.2 + x
-
-
-class RRDB(nn.Module):
-    """Residual in Residual Dense Block"""
-
-    def __init__(self, nf, gc=32):
-        super(RRDB, self).__init__()
-        self.RDB1 = ResidualDenseBlock_5C(nf, gc)
-        self.RDB2 = ResidualDenseBlock_5C(nf, gc)
-        self.RDB3 = ResidualDenseBlock_5C(nf, gc)
-
-    def forward(self, x):
-        out = self.RDB1(x)
-        out = self.RDB2(out)
-        out = self.RDB3(out)
-        return out * 0.2 + x
-
-
-class RRDBNet(nn.Module):
-    def __init__(self, in_nc, out_nc, nf, nb, gc=32):
-        super(RRDBNet, self).__init__()
-        RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc)
-
-        self.conv_first = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True)
-        self.RRDB_trunk = make_layer(RRDB_block_f, nb)
-        self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        #### upsampling
-        self.upconv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        self.upconv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        self.HRconv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
-        self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True)
-
-        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
-
-    def forward(self, x):
-        fea = self.conv_first(x)
-        trunk = self.trunk_conv(self.RRDB_trunk(fea))
-        fea = fea + trunk
-
-        fea = self.lrelu(
-            self.upconv1(F.interpolate(fea, scale_factor=2, mode="nearest"))
-        )
-        fea = self.lrelu(
-            self.upconv2(F.interpolate(fea, scale_factor=2, mode="nearest"))
-        )
-        out = self.conv_last(self.lrelu(self.HRconv(fea)))
-
-        return out
-
-
-############### Parsing args #####################
-import argparse
-
-p = argparse.ArgumentParser(
-    description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
-)
-
-p.add_argument("--device", type=str, default="cpu", help="the device to use")
-p.add_argument(
-    "--mlir_loc",
-    type=str,
-    default=None,
-    help="location of the model's mlir file",
-)
-args = p.parse_args()
-###################################################
-
-
-def inference(input_m):
-    return model(input_m)
-
-
-def load_mlir(mlir_loc):
-    import os
-
-    if mlir_loc == None:
-        return None
-    print(f"Trying to load the model from {mlir_loc}.")
-    with open(os.path.join(mlir_loc)) as f:
-        mlir_module = f.read()
-    return mlir_module
-
-
-def compile_through_fx(model, inputs, mlir_loc=None):
-    module = load_mlir(mlir_loc)
-    if module == None:
-        fx_g = make_fx(
-            model,
-            decomposition_table=get_decompositions(
-                [
-                    torch.ops.aten.embedding_dense_backward,
-                    torch.ops.aten.native_layer_norm_backward,
-                    torch.ops.aten.slice_backward,
-                    torch.ops.aten.select_backward,
-                    torch.ops.aten.norm.ScalarOpt_dim,
-                    torch.ops.aten.native_group_norm,
-                    torch.ops.aten.upsample_bilinear2d.vec,
-                    torch.ops.aten.split.Tensor,
-                    torch.ops.aten.split_with_sizes,
-                ]
-            ),
-        )(inputs)
-
-        fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
-        fx_g.recompile()
-
-        def strip_overloads(gm):
-            """
-            Modifies the target of graph nodes in :attr:`gm` to strip overloads.
-            Args:
-                gm(fx.GraphModule): The input Fx graph module to be modified
-            """
-            for node in gm.graph.nodes:
-                if isinstance(node.target, torch._ops.OpOverload):
-                    node.target = node.target.overloadpacket
-            gm.recompile()
-
-        strip_overloads(fx_g)
-
-        ts_g = torch.jit.script(fx_g)
-
-        print("Torchscript graph generated successfully")
-        module = torch_mlir.compile(
-            ts_g,
-            inputs,
-            torch_mlir.OutputType.LINALG_ON_TENSORS,
-            use_tracing=False,
-            verbose=False,
-        )
-
-    mlir_model = str(module)
-    func_name = "forward"
-    shark_module = SharkInference(
-        mlir_model, device=args.device, mlir_dialect="linalg"
-    )
-    shark_module.compile()
-
-    return shark_module
-
-
-model_path = "models/RRDB_ESRGAN_x4.pth"  # models/RRDB_ESRGAN_x4.pth OR models/RRDB_PSNR_x4.pth
-# device = torch.device('cuda')  # if you want to run on CPU, change 'cuda' -> cpu
-device = torch.device("cpu")
-
-test_img_folder = "InputImages/*"
-
-model = RRDBNet(3, 3, 64, 23, gc=32)
-model.load_state_dict(torch.load(model_path), strict=True)
-model.eval()
-model = model.to(device)
-
-print("Model path {:s}. \nTesting...".format(model_path))
-
-if __name__ == "__main__":
-    idx = 0
-    for path in glob.glob(test_img_folder):
-        idx += 1
-        base = osp.splitext(osp.basename(path))[0]
-        print(idx, base)
-        # read images
-        img = cv2.imread(path, cv2.IMREAD_COLOR)
-        img = img * 1.0 / 255
-        img = torch.from_numpy(
-            np.transpose(img[:, :, [2, 1, 0]], (2, 0, 1))
-        ).float()
-        img_LR = img.unsqueeze(0)
-        img_LR = img_LR.to(device)
-
-        with torch.no_grad():
-            shark_module = compile_through_fx(inference, img_LR)
-            shark_output = shark_module.forward((img_LR,))
-            shark_output = torch.from_numpy(shark_output)
-            shark_output = (
-                shark_output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
-            )
-            esrgan_output = (
-                model(img_LR).data.squeeze().float().cpu().clamp_(0, 1).numpy()
-            )
-        # SHARK OUTPUT
-        shark_output = np.transpose(shark_output[[2, 1, 0], :, :], (1, 2, 0))
-        shark_output = (shark_output * 255.0).round()
-        cv2.imwrite(
-            "OutputImages/{:s}_rlt_shark_output.png".format(base), shark_output
-        )
-        print("Generated SHARK's output")
-        # ESRGAN OUTPUT
-        esrgan_output = np.transpose(esrgan_output[[2, 1, 0], :, :], (1, 2, 0))
-        esrgan_output = (esrgan_output * 255.0).round()
-        cv2.imwrite(
-            "OutputImages/{:s}_rlt_esrgan_output.png".format(base),
-            esrgan_output,
-        )
-        print("Generated ESRGAN's output")
--- a/shark/examples/shark_inference/albert_maskfill_pt.py
+++ b/shark/examples/shark_inference/albert_maskfill_pt.py
@@ -1,86 +0,0 @@
-from transformers import AutoModelForMaskedLM, AutoTokenizer
-import torch
-from shark.shark_inference import SharkInference
-from shark.shark_importer import SharkImporter
-from iree.compiler import compile_str
-from iree import runtime as ireert
-import os
-import numpy as np
-
-MAX_SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-
-class AlbertModule(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.model = AutoModelForMaskedLM.from_pretrained("albert-base-v2")
-        self.model.eval()
-
-    def forward(self, input_ids, attention_mask):
-        return self.model(
-            input_ids=input_ids, attention_mask=attention_mask
-        ).logits
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
-    text = "This [MASK] is very tasty."
-    encoded_inputs = tokenizer(
-        text,
-        padding="max_length",
-        truncation=True,
-        max_length=MAX_SEQUENCE_LENGTH,
-        return_tensors="pt",
-    )
-    inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
-    mlir_importer = SharkImporter(
-        AlbertModule(),
-        inputs,
-        frontend="torch",
-    )
-    minilm_mlir, func_name = mlir_importer.import_mlir(
-        is_dynamic=False, tracing_required=True
-    )
-    shark_module = SharkInference(minilm_mlir)
-    shark_module.compile()
-    token_logits = torch.tensor(shark_module.forward(inputs))
-    mask_id = torch.where(
-        encoded_inputs["input_ids"] == tokenizer.mask_token_id
-    )[1]
-    mask_token_logits = token_logits[0, mask_id, :]
-    top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
-    for token in top_5_tokens:
-        print(
-            f"'>>> Sample/Warmup output: {text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
-        )
-    while True:
-        try:
-            new_text = input("Give me a sentence with [MASK] to fill: ")
-            encoded_inputs = tokenizer(
-                new_text,
-                padding="max_length",
-                truncation=True,
-                max_length=MAX_SEQUENCE_LENGTH,
-                return_tensors="pt",
-            )
-            inputs = (
-                encoded_inputs["input_ids"],
-                encoded_inputs["attention_mask"],
-            )
-            token_logits = torch.tensor(shark_module.forward(inputs))
-            mask_id = torch.where(
-                encoded_inputs["input_ids"] == tokenizer.mask_token_id
-            )[1]
-            mask_token_logits = token_logits[0, mask_id, :]
-            top_5_tokens = (
-                torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
-            )
-            for token in top_5_tokens:
-                print(
-                    f"'>>> {new_text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
-                )
-        except KeyboardInterrupt:
-            print("Exiting program.")
-            break
--- a/shark/examples/shark_inference/albert_maskfill_tf.py
+++ b/shark/examples/shark_inference/albert_maskfill_tf.py
@@ -1,100 +0,0 @@
-from PIL import Image
-import requests
-
-from transformers import TFAutoModelForMaskedLM, AutoTokenizer
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-from shark.shark_importer import SharkImporter
-from iree.compiler import tf as tfc
-from iree.compiler import compile_str
-from iree import runtime as ireert
-import os
-import numpy as np
-import sys
-
-MAX_SEQUENCE_LENGTH = 512
-BATCH_SIZE = 1
-
-# Create a set of inputs
-t5_inputs = [
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-    tf.TensorSpec(shape=[BATCH_SIZE, MAX_SEQUENCE_LENGTH], dtype=tf.int32),
-]
-
-
-class AlbertModule(tf.Module):
-    def __init__(self):
-        super(AlbertModule, self).__init__()
-        self.m = TFAutoModelForMaskedLM.from_pretrained("albert-base-v2")
-        self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)
-
-    @tf.function(input_signature=t5_inputs, jit_compile=True)
-    def forward(self, input_ids, attention_mask):
-        return self.m.predict(input_ids, attention_mask)
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")
-    # text = "This is a great [MASK]."
-    text = "This [MASK] is very tasty."
-    encoded_inputs = tokenizer(
-        text,
-        padding="max_length",
-        truncation=True,
-        max_length=MAX_SEQUENCE_LENGTH,
-        return_tensors="tf",
-    )
-    inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
-    mlir_importer = SharkImporter(
-        AlbertModule(),
-        inputs,
-        frontend="tf",
-    )
-    minilm_mlir, func_name = mlir_importer.import_mlir(
-        is_dynamic=False, tracing_required=False
-    )
-    shark_module = SharkInference(minilm_mlir, mlir_dialect="mhlo")
-    shark_module.compile()
-    output_idx = 0
-    data_idx = 1
-    token_logits = shark_module.forward(inputs)[output_idx][data_idx]
-    mask_id = np.where(
-        tf.squeeze(encoded_inputs["input_ids"]) == tokenizer.mask_token_id
-    )
-    mask_token_logits = token_logits[0, mask_id, :]
-    top_5_tokens = np.flip(np.argsort(mask_token_logits)).squeeze()[0:5]
-    for token in top_5_tokens:
-        print(
-            f"'>>> Sample/Warmup output: {text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
-        )
-    while True:
-        try:
-            new_text = input("Give me a sentence with [MASK] to fill: ")
-            encoded_inputs = tokenizer(
-                new_text,
-                padding="max_length",
-                truncation=True,
-                max_length=MAX_SEQUENCE_LENGTH,
-                return_tensors="tf",
-            )
-            inputs = (
-                encoded_inputs["input_ids"],
-                encoded_inputs["attention_mask"],
-            )
-            token_logits = shark_module.forward(inputs)[output_idx][data_idx]
-            mask_id = np.where(
-                tf.squeeze(encoded_inputs["input_ids"])
-                == tokenizer.mask_token_id
-            )
-            mask_token_logits = token_logits[0, mask_id, :]
-            top_5_tokens = np.flip(np.argsort(mask_token_logits)).squeeze()[
-                0:5
-            ]
-            for token in top_5_tokens:
-                print(
-                    f"'>>> {new_text.replace(tokenizer.mask_token, tokenizer.decode(token))}'"
-                )
-        except KeyboardInterrupt:
-            print("Exiting program.")
-            sys.exit()
--- a/shark/examples/shark_inference/bloom_tank.py
+++ b/shark/examples/shark_inference/bloom_tank.py
@@ -1,14 +0,0 @@
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_model
-
-mlir_model, func_name, inputs, golden_out = download_model(
-    "bloom", frontend="torch"
-)
-
-shark_module = SharkInference(
-    mlir_model, device="cpu", mlir_dialect="tm_tensor"
-)
-shark_module.compile()
-result = shark_module.forward(inputs)
-print("The obtained result via shark is: ", result)
-print("The golden result is:", golden_out)
--- a/shark/examples/shark_inference/gpt2_tf.py
+++ b/shark/examples/shark_inference/gpt2_tf.py
@@ -1,40 +0,0 @@
-from PIL import Image
-import requests
-
-from transformers import GPT2Tokenizer, TFGPT2Model
-import tensorflow as tf
-from shark.shark_inference import SharkInference
-
-# Create a set of inputs
-gpt2_inputs = [
-    tf.TensorSpec(shape=[1, 8], dtype=tf.int32),
-    tf.TensorSpec(shape=[1, 8], dtype=tf.int32),
-]
-
-
-class GPT2Module(tf.Module):
-    def __init__(self):
-        super(GPT2Module, self).__init__()
-        self.m = TFGPT2Model.from_pretrained("distilgpt2")
-
-        self.m.predict = lambda x, y: self.m(input_ids=x, attention_mask=y)
-
-    @tf.function(input_signature=gpt2_inputs, jit_compile=True)
-    def forward(self, input_ids, attention_mask):
-        return self.m.predict(input_ids, attention_mask)
-
-
-if __name__ == "__main__":
-    # Prepping Data
-    tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
-    text = "I love the distilled version of models."
-
-    inputs = tokenizer(text, return_tensors="tf")
-    shark_module = SharkInference(
-        GPT2Module(), (inputs["input_ids"], inputs["attention_mask"])
-    )
-    shark_module.set_frontend("tensorflow")
-    shark_module.compile()
-    print(
-        shark_module.forward((inputs["input_ids"], inputs["attention_mask"]))
-    )
--- a/shark/examples/shark_inference/llama/README.md
+++ b/shark/examples/shark_inference/llama/README.md
@@ -1,18 +0,0 @@
-# SHARK LLaMA
-
-## TORCH-MLIR Version
-
-```
-https://github.com/nod-ai/torch-mlir.git
-```
-Then check out the `complex` branch and `git submodule update --init` and then build with `.\build_tools\python_deploy\build_windows.ps1`
-
-### Setup & Run
-```
-git clone https://github.com/nod-ai/llama.git
-```
-Then in this repository
-```
-pip install -e .
-python llama/shark_model.py
-```
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Ean Garvey	dea405285e	Revert python version change and update actions/setup-python to v5	2024-08-08 16:50:15 -05:00
Ean Garvey	b1d2cb3bad	Change python version in nightly .yml to 3.11.9	2024-08-08 16:48:41 -05:00
Ean Garvey	4759e808f2	Rest API support and cleanup	2024-08-08 11:37:53 -05:00
Ean Garvey	d5f37eaf20	Bump requirements	2024-06-17 18:16:44 -05:00
Ean Garvey	84bc1437a7	Strip sdxl-turbo options	2024-06-17 17:57:40 -05:00
Ean Garvey	83f424a83e	Close advanced setting by default.	2024-06-06 13:22:57 -05:00
Ean Garvey	5b3b262359	Simplify ui further, add CLI option to load a default config	2024-06-06 13:21:25 -05:00
Ean Garvey	67b438eb9f	take all ireert calls out of studio flow	2024-06-04 01:46:48 -05:00
Ean Garvey	4aa2d8b2a2	Purge shark/ directory, minimal ireert api usage for dynamically loaded plugins	2024-06-04 00:53:07 -05:00
Ean Garvey	dac7a29eef	Purge unused code and patch out iree runtime handling from init	2024-06-03 18:00:05 -05:00
Ean Garvey	59600456be	seed fixes	2024-06-02 16:25:16 -05:00
Ean Garvey	e514910202	Remove sdxl 30step config	2024-06-02 14:28:58 -05:00
Ean Garvey	33f6c312d9	limit steps to 2 (gives best results)	2024-06-02 14:25:29 -05:00
Ean Garvey	ab06047108	set a default	2024-06-02 14:23:40 -05:00
Ean Garvey	ac48b843e7	actually reduce steps	2024-06-02 14:00:45 -05:00
Ean Garvey	5f1b5e58d6	igpu dont error on device parse fail	2024-06-02 12:56:44 -05:00
Ean Garvey	6adae49d9b	igpu restrictions	2024-06-02 12:51:53 -05:00
Ean Garvey	6abd9ff5cf	Reduce available step options for turbo.	2024-06-02 11:41:23 -05:00
Ean Garvey	9957c96014	More noticeable seed changes	2024-06-02 11:39:21 -05:00
Ean Garvey	36b8c2fd6d	disable pndm	2024-06-02 11:30:18 -05:00
Ean Garvey	9163c1fc50	small fixes	2024-06-02 11:28:37 -05:00
Ean Garvey	349e9f70fb	Progress indicators	2024-06-02 10:18:09 -05:00
Ean Garvey	64e63e7130	znver4 device handling	2024-06-02 10:08:00 -05:00
Ean Garvey	ea8738fb1a	Update SRT links	2024-06-02 09:50:09 -05:00
Ean Garvey	2a5bec3c4f	Fixes for seed.	2024-06-02 09:46:22 -05:00
Ean Garvey	bb58b01d75	Switch to fixed steps, tweak config loading to prevent race condition	2024-06-01 20:15:53 -05:00
Ean Garvey	02285b33a4	More fixes for demo.	2024-06-01 19:46:52 -05:00
Ean Garvey	f9a1d35b59	Hide chatbot.	2024-06-01 14:24:37 -05:00
Ean Garvey	b1ca19a6e6	Cleanup for demo.	2024-06-01 13:42:51 -05:00
Ean Garvey	b5dea85808	Reduce UI for demos.	2024-06-01 12:00:22 -05:00
Ean Garvey	e75f96f2d7	fixup conditional	2024-06-01 12:00:11 -05:00
Ean Garvey	bf67e2aa3b	Formatting	2024-06-01 11:59:10 -05:00
Ean Garvey	c088247aa1	Fix default configs, config loading, and add warnings/early returns for bad configs.	2024-06-01 11:58:51 -05:00
Ean Garvey	42abc6787d	Small tweaks to ckpt processing, add tool to prefix params keys	2024-06-01 11:53:40 -05:00
Ean Garvey	26f80ccbbb	Fixes to UI config defaults, config loading, and warnings. (#2153 )	2024-05-31 18:14:27 -04:00
Ean Garvey	d2c3752dc7	Fix batch count and tweaks to chatbot. (#2151 ) * Fix batch count * Add button to unload models manually. * Add compiled pipeline option * Add brevitas to requirements * Tweaks to chatbot * Change script loading trigger	2024-05-31 18:48:28 +05:30
Ean Garvey	4505c4549f	Force inlined weights on igpu for now, small fixes to chatbot (#2149 ) * Add igpu and custom triple support. * Small fixes to igpu, SDXL-turbo * custom pipe loading * formatting * Remove old nodlogo import.	2024-05-30 11:40:42 -05:00
Gaurav Shukla	793495c9c6	[ui] Add AMD logo in shark studio Signed-Off-by: Gaurav Shukla <gaurav.shukla@amd.com>	2024-05-30 21:43:15 +05:30
Ean Garvey	13e1d8d98a	Add igpu and custom triple support. (#2148 )	2024-05-29 17:39:36 -05:00
Ean Garvey	2074df40ad	Point to nod fork of diffusers. (#2146 )	2024-05-29 00:56:21 -05:00
Ean Garvey	7b30582408	Point to SRT links for windows. (#2145 )	2024-05-29 01:20:30 -04:00
Ean Garvey	151195ab74	Add a few requirements for ensured parity with turbine-models requirements. (#2142 ) * Add scipy to requirements. Adds diffusers req and a note for torchsde.	2024-05-28 15:37:31 -05:00
Ean Garvey	8146f0bd2f	Remove leftover merge conflict line from setup script. (#2141 )	2024-05-28 11:04:45 -07:00
Ean Garvey	68e9281778	(Studio2) Refactors SD pipeline to rely on turbine-models pipeline, fixes to LLM, gitignore (#2129 ) * Shark Studio SDXL support, HIP driver support, simpler device info, small fixes * Fixups to llm API/UI and ignore user config files. * Small fixes for unifying pipelines. * Update requirements.txt for iree-turbine (#2130) * Fix Llama2 on CPU (#2133) * Filesystem cleanup and custom model fixes (#2127) * Fix some formatting issues * Remove IREE pin (fixes exe issue) (#2126) * Update find links for IREE packages (#2136) * Shark Studio SDXL support, HIP driver support, simpler device info, small fixes * Abstract out SD pipelines from Studio Webui (WIP) * Switch from pin to minimum torch version and fix index url * Fix device parsing. * Fix linux setup * Fix custom weights. --------- Co-authored-by: saienduri <77521230+saienduri@users.noreply.github.com> Co-authored-by: gpetters-amd <159576198+gpetters-amd@users.noreply.github.com> Co-authored-by: gpetters94 <gpetters@protonmail.com>	2024-05-28 13:18:31 -04:00
Ean Garvey	fd07cae991	Update find links for IREE packages (#2136 )	2024-05-13 11:43:17 -05:00
gpetters94	6cb86a843e	Remove IREE pin (fixes exe issue) (#2126 ) * Diagnose a build issue * Remove IREE pin * Revert the build on pull request change	2024-04-30 12:27:30 -05:00
gpetters-amd	7db1612a5c	Filesystem cleanup and custom model fixes (#2127 ) * Initial filesystem cleanup * More filesystem cleanup * Fix some formatting issues * Address comments	2024-04-30 11:18:33 -05:00
gpetters-amd	81d6e059ac	Fix Llama2 on CPU (#2133 )	2024-04-29 12:18:16 -05:00
saienduri	e003d0abe8	Update requirements.txt for iree-turbine (#2130 ) * Update requirements.txt to iree-turbine creation * Update requirements.txt * Update requirements.txt * Update requirements.txt	2024-04-29 12:28:14 -04:00
Quinn Dawkins	cf2513e7b1	Update IREE discord link (#2118 ) Discord links for IREE were purged, so update the link on the readme.	2024-04-15 12:54:27 -07:00
Ean Garvey	60d8591e95	Change shark-turbine requirement target branch to main. (#2116 )	2024-04-11 19:31:39 -04:00
gpetters-amd	ff91982168	Remove target env (#2114 )	2024-04-08 16:52:45 -05:00
powderluv	a6a9e524c1	Drop linux nightly for now	2024-04-05 12:04:36 -07:00
powderluv	732df2e263	Updated signtool key	2024-04-05 12:01:42 -07:00
gpetters-amd	1ee16bd256	Fix the nightly build (#2111 )	2024-04-05 19:22:33 +05:30
gpetters-amd	752d775fbd	Fix a typo in the nightly build script (#2110 )	2024-03-30 17:31:51 -07:00
gpetters-amd	4d1a6a204d	Fix builder issue (#2109 )	2024-03-30 16:21:55 -07:00
Ean Garvey	0eff62a468	(Studio 2.0) add Stable Diffusion features (#2037 ) * (WIP): Studio2 app infra and SD API UI/app structure and utility implementation. - Initializers for webui/API launch - Schedulers file for SD scheduling utilities - Additions to API-level utilities - Added embeddings module for LoRA, Lycoris, yada yada - Added image_processing module for resamplers, resize tools, transforms, and any image annotation (PNG metadata) - shared_cmd_opts module -- sorry, this is stable_args.py. It lives on. We still want to have some global control over the app exclusively from the command-line. At least we will be free from shark_args. - Moving around some utility pieces. - Try to make api+webui concurrency possible in index.py - SD UI -- this is just img2imgUI but hopefully a little better. - UI utilities for your nod logos and your gradio temps. Enable UI / bugfixes / tweaks * Studio2/SD: Use more correct LoRA alpha calculation (#2034) * Updates ProcessLoRA to use both embedded LoRA alpha, and lora_strength optional parameter (default 1.0) when applying LoRA weights. * Updates ProcessLoRA to cover more dim cases. * This bring ProcessLoRA into line with PR #2015 against Studio1 * Studio2: Remove duplications from api/utils.py (#2035) * Remove duplicate os import * Remove duplicate parse_seed_input function Migrating to JSON requests in SD UI More UI and app flow improvements, logging, shared device cache Model loading Complete SD pipeline. Tweaks to VAE, pipeline states Pipeline tweaks, add cmd_opts parsing to sd api * Add test for SD * Small cleanup * Shark2/SD/UI: Respect ckpt_dir, share and server_port args (#2070) * Takes whether to generate a gradio live link from the existing --share command line parameter, rather than hardcoding as True. * Takes server port from existing --server_port command line parameter, rather than hardcoding as 11911. * Default --ckpt_dir parameter to '../models' * Use --ckpt_dir rather than hardcoding ../models as the base directory for checkpoints, vae, and lora, etc * Add a 'checkpoints' directory below --ckpt_dir to match ComfyUI folder structure. Read custom_weights choices from there, and/or subfolders below there matching the selected base model. * Fix --ckpt_dir possibly not working correctly when an absolute rather than relative path is specified. * Relabel "Custom Weights" to "Custom Weights Checkpoint" in the UI * Add StreamingLLM support to studio2 chat (#2060) * Streaming LLM * Update precision and add gpu support * (studio2) Separate weights generation for quantization support * Adapt prompt changes to studio flow * Remove outdated flag from llm compile flags. * (studio2) use turbine vmfbRunner * tweaks to prompts * Update CPU path and llm api test. * Change device in test to cpu. * Fixes to runner, device names, vmfb mgmt * Use small test without external weights. * HF-Reference LLM mode + Update test result to match latest Turbine. (#2080) * HF-Reference LLM mode. * Fixup test to match current output from Turbine. * lint * Fix test error message + Only initialize HF torch model when used. * Remove redundant format_out change. * Add rest API endpoint from LanguageModel API * Add StreamingLLM support to studio2 chat (#2060) * Streaming LLM * Update precision and add gpu support * (studio2) Separate weights generation for quantization support * Adapt prompt changes to studio flow * Remove outdated flag from llm compile flags. * (studio2) use turbine vmfbRunner * tweaks to prompts * Update CPU path and llm api test. * Change device in test to cpu. * Fixes to runner, device names, vmfb mgmt * Use small test without external weights. * Formatting and init files. * Remove unused import. * Small fixes * Studio2/SD/UI: Improve various parts of the UI for Stable Diffusion (#2074) * Studio2/SD/UI: Improve various parts of the UI of Shark 2 * Update Gradio pin to 4.15.0. * Port workarounds for Gradio >4.8.0 main container sizing from Shark 1.0. * Move nod Logo out of the SD tab and onto the top right of the main tab bar. * Set nod logo icon as the favicon (as current Shark 1.0). * Create a tabbed right hand panel within the SD UI sized to the viewport height. * Make Input Image tab 1 in the right hand panel. * Make output images, generation log, and generation buttons, tab 2 in the right hand panel * Make config JSON display, with config load, save and clear, tab 3 in the right hand panel * Make gallery area of the Output tab take up all vertical space the other controls on the tab do not. * Tidy up the controls on the Config tab somewhat. * Studio2/SD/UI: Reorganise inputs on Left Panel of SD tab * Rename previously added Right Panel Output tab to 'Generate'. * Move Batch Count, Batch Size, and Repeatable Seeds, off of Left Panel and onto 'Generate' Tab. * On 'Generate' tab, rename 'Generate Image(s)' button to 'Start', and 'Stop Batch' button to 'Stop'. They are now below the Batch inputs on a Generate tab so don't need the specificity. * Move Device, Low VRAM, and Precision inputs into their own 'Device Settings' Accordion control. (starts closed) * Rename 'Custom Weights Checkpoint' to 'Checkpoint Weights' * Move Checkpoint Weights, VAE Model, Standalone Lora Weights, and Embeddings Options controls, into their own 'Model Weights' Accordion control. (starts closed) * Move Denoising Strength, and Resample Type controls into their own 'Input Image Processing' Accordion. (starts closed) * Move any remaining controls in the 'Advanced Options' Accorion directly onto the left panel, and remove then Accordion. * Enable the copy button for all text boxes on the SD tab. * Add emoji/unicode glphs to all top level controls and Accordions on the SD Left Panel. * Start with the 'Generate' as the initially selected tab in the SD Right Panel, working around Gradio issue #7805 * Tweaks to SD Right Tab Panel vertical height. * Studio2/SD/UI: Sizing tweaks for Right Panel, and >1920 width * Set height of right panel using vmin rather than vh, with explicit affordances for fixed areas above and below. * Port >1920 width Gradio >4.8 CSS workaround from Shark 1.0. * Studio2/SD: Fix sd pipeline up to "Windows not supported" (#2082) * Studio2/SD: Fix sd pipeline up to "Windows not supported" A number of fixes to the SD pipeline as run from the UI, up until the point that dynamo complains "Windows not yet supported for torch.compile". * Remove separate install of iree-runtime and iree-compile in setup_venv.ps1, and rely on the versions installed via the Turbine requirements.txt. Fixes #2063 for me. * Replace any "None" strings with python None when pulling the config in the UI. * Add 'hf_auth_token' param to api StableDiffusion class, defaulting to None, and then pass that in to the various Models where it is required and wasn't already being done before. * Fix clip custom_weight_params being passed to export_clip_model as "external_weight_file" rather than "external_weights" * Don't pass non-existing "custom_vae" parameter to the Turbine Vae Model, instead pass custom_vae as the "hf_model_id" if it is set. (this may be wrong in the custom vae cast, but stops the code always breaking). * Studio2/SD/UI: Improve UI config None handling * When populating the UI from a JSON Config set controls to "None" for null/None values. * When generating a JSON Config from the UI set props to null/None for controls set to "None". * Use null rather string 'None' in the default config --------- Co-authored-by: Ean Garvey <87458719+monorimet@users.noreply.github.com> * Studio2/SD/UI: Further sd ui pipeline fixes (#2091) On Windows, this gets us all the way failing in iree compile of the with SD 2.1 base. - Fix merge errors with sd right pane config UI tab. - Remove non-requirement.txt install/build of torch/mlir/iree/SRT in setup_venv.ps1, fixing "torch.compile not supported on Windows" error. - Fix gradio deprecation warning for `root=` FileExplorer kwarg. - Comment out `precision` and `max_length` kwargs being passed to unet, as not yet supported on main Turbine branch. Avoids keyword argument error. * Tweak compile-time flags for SD submodels. * Small fixes to sd, pin mpmath * Add pyinstaller spec and imports script. * Fix the .exe (#2101) * Fix _IREE_TARGET_MAP (#2103) (#2108) - Change target passed to iree for vulkan from 'vulkan' to 'vulkan-spriv', as 'vulkan' is not a valid value for --iree-hal-target-backends with the current iree compiler. Co-authored-by: Stefan Kapusniak <121311569+one-lithe-rune@users.noreply.github.com> * Cleanup sd model map. * Update dependencies. * Studio2/SD/UI: Update gradio to 4.19.2 (sd-studio2) (#2097) - Move pin for gradio from 4.15 -> 4.19.2 on the sd-studio2 branch * fix formatting and disable explicit vulkan env settings. --------- Co-authored-by: Stefan Kapusniak <121311569+one-lithe-rune@users.noreply.github.com> Co-authored-by: Stanley Winata <68087699+raikonenfnu@users.noreply.github.com> Co-authored-by: gpetters-amd <159576198+gpetters-amd@users.noreply.github.com> Co-authored-by: gpetters94 <gpetters@protonmail.com>	2024-03-29 18:13:21 -04:00