Drop old cli and webui (#911)

2026-04-03 03:00:17 -04:00 · 2023-02-01 13:13:46 -08:00
parent 6a32a4e26c
commit e26a70aa4f
50 changed files with 0 additions and 5895 deletions
--- a/shark/examples/shark_inference/stable_diffusion/profiling_with_iree.md
+++ b/shark/examples/shark_inference/stable_diffusion/profiling_with_iree.md
--- a/apps/stable_diffusion/scripts/telegram_bot.py
+++ b/apps/stable_diffusion/scripts/telegram_bot.py
--- a/shark/examples/shark_inference/stable_diffusion/stable_diffusion_amd.md
+++ b/shark/examples/shark_inference/stable_diffusion/stable_diffusion_amd.md
--- a/shark/examples/shark_inference/stable_diffusion/stable_diffusion_telegram_bot.md
+++ b/shark/examples/shark_inference/stable_diffusion/stable_diffusion_telegram_bot.md
--- a/shark/examples/shark_inference/stable_diff.py
+++ b/shark/examples/shark_inference/stable_diff.py
@@ -1,269 +0,0 @@
-from transformers import CLIPTextModel, CLIPTokenizer
-from diffusers import AutoencoderKL, UNet2DConditionModel, PNDMScheduler
-import torch
-from PIL import Image
-from diffusers import LMSDiscreteScheduler
-from tqdm.auto import tqdm
-from shark.shark_inference import SharkInference
-from torch.fx.experimental.proxy_tensor import make_fx
-from torch._decomp import get_decompositions
-import torch_mlir
-import tempfile
-import numpy as np
-
-# pip install diffusers
-# pip install scipy
-
-############### Parsing args #####################
-import argparse
-
-p = argparse.ArgumentParser(
-    description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
-)
-
-p.add_argument(
-    "--prompt",
-    type=str,
-    default="a photograph of an astronaut riding a horse",
-    help="the text prompt to use",
-)
-p.add_argument("--device", type=str, default="cpu", help="the device to use")
-p.add_argument("--steps", type=int, default=10, help="the device to use")
-p.add_argument("--mlir_loc", type=str, default=None, help="the device to use")
-p.add_argument("--vae_loc", type=str, default=None, help="the device to use")
-args = p.parse_args()
-
-#####################################################
-
-
-def load_mlir(mlir_loc):
-    import os
-
-    if mlir_loc == None:
-        return None
-    print(f"Trying to load the model from {mlir_loc}.")
-    with open(os.path.join(mlir_loc)) as f:
-        mlir_module = f.read()
-    return mlir_module
-
-
-def compile_through_fx(model, inputs, mlir_loc=None, extra_args=[]):
-    module = load_mlir(mlir_loc)
-    if mlir_loc == None:
-        fx_g = make_fx(
-            model,
-            decomposition_table=get_decompositions(
-                [
-                    torch.ops.aten.embedding_dense_backward,
-                    torch.ops.aten.native_layer_norm_backward,
-                    torch.ops.aten.slice_backward,
-                    torch.ops.aten.select_backward,
-                    torch.ops.aten.norm.ScalarOpt_dim,
-                    torch.ops.aten.native_group_norm,
-                    torch.ops.aten.upsample_bilinear2d.vec,
-                    torch.ops.aten.split.Tensor,
-                    torch.ops.aten.split_with_sizes,
-                ]
-            ),
-        )(*inputs)
-
-        fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
-        fx_g.recompile()
-
-        def strip_overloads(gm):
-            """
-            Modifies the target of graph nodes in :attr:`gm` to strip overloads.
-            Args:
-                gm(fx.GraphModule): The input Fx graph module to be modified
-            """
-            for node in gm.graph.nodes:
-                if isinstance(node.target, torch._ops.OpOverload):
-                    node.target = node.target.overloadpacket
-            gm.recompile()
-
-        strip_overloads(fx_g)
-
-        ts_g = torch.jit.script(fx_g)
-
-        module = torch_mlir.compile(
-            ts_g,
-            inputs,
-            torch_mlir.OutputType.LINALG_ON_TENSORS,
-            use_tracing=False,
-            verbose=False,
-        )
-
-    mlir_model = module
-    func_name = "forward"
-
-    shark_module = SharkInference(
-        mlir_model,
-        func_name,
-        device=args.device,
-        mlir_dialect="tm_tensor",
-    )
-    shark_module.compile(extra_args)
-
-    return shark_module
-
-
-if __name__ == "__main__":
-    YOUR_TOKEN = "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"
-
-    # 1. Load the autoencoder model which will be used to decode the latents into image space.
-    vae = AutoencoderKL.from_pretrained(
-        "CompVis/stable-diffusion-v1-4",
-        subfolder="vae",
-        use_auth_token=YOUR_TOKEN,
-    )
-
-    # 2. Load the tokenizer and text encoder to tokenize and encode the text.
-    tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
-    text_encoder = CLIPTextModel.from_pretrained(
-        "openai/clip-vit-large-patch14"
-    )
-
-    class VaeModel(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.vae = AutoencoderKL.from_pretrained(
-                "CompVis/stable-diffusion-v1-4",
-                subfolder="vae",
-                use_auth_token=YOUR_TOKEN,
-            )
-
-        def forward(self, input):
-            return self.vae.decode(input, return_dict=False)[0]
-
-    vae = VaeModel()
-    vae_input = torch.rand(1, 4, 64, 64)
-    shark_vae = compile_through_fx(vae, (vae_input,), args.vae_loc)
-
-    # Wrap the unet model to return tuples.
-    class UnetModel(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.unet = UNet2DConditionModel.from_pretrained(
-                "CompVis/stable-diffusion-v1-4",
-                subfolder="unet",
-                use_auth_token=YOUR_TOKEN,
-            )
-            self.in_channels = self.unet.in_channels
-            self.train(False)
-
-        def forward(self, x, y, z):
-            return self.unet.forward(x, y, z, return_dict=False)[0]
-
-    # 3. The UNet model for generating the latents.
-    unet = UnetModel()
-    latent_model_input = torch.rand([2, 4, 64, 64])
-    text_embeddings = torch.rand([2, 77, 768])
-    shark_unet = compile_through_fx(
-        unet,
-        (latent_model_input, torch.tensor([1.0]), text_embeddings),
-        args.mlir_loc,
-        ["--iree-flow-enable-conv-nchw-to-nhwc-transform"],
-    )
-
-    # torch.jit.script(unet)
-
-    scheduler = LMSDiscreteScheduler(
-        beta_start=0.00085,
-        beta_end=0.012,
-        beta_schedule="scaled_linear",
-        num_train_timesteps=1000,
-    )
-
-    prompt = [args.prompt]
-
-    height = 512  # default height of Stable Diffusion
-    width = 512  # default width of Stable Diffusion
-
-    num_inference_steps = args.steps  # Number of denoising steps
-
-    guidance_scale = 7.5  # Scale for classifier-free guidance
-
-    generator = torch.manual_seed(
-        42
-    )  # Seed generator to create the inital latent noise
-
-    batch_size = len(prompt)
-
-    text_input = tokenizer(
-        prompt,
-        padding="max_length",
-        max_length=tokenizer.model_max_length,
-        truncation=True,
-        return_tensors="pt",
-    )
-
-    text_embeddings = text_encoder(text_input.input_ids)[0]
-
-    max_length = text_input.input_ids.shape[-1]
-    uncond_input = tokenizer(
-        [""] * batch_size,
-        padding="max_length",
-        max_length=max_length,
-        return_tensors="pt",
-    )
-    uncond_embeddings = text_encoder(uncond_input.input_ids)[0]
-
-    text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
-
-    latents = torch.randn(
-        (batch_size, unet.in_channels, height // 8, width // 8),
-        generator=generator,
-    )
-    # latents = latents.to(torch_device)
-
-    scheduler.set_timesteps(num_inference_steps)
-
-    latents = latents * scheduler.sigmas[0]
-    # print(latents, latents.shape)
-
-    for i, t in tqdm(enumerate(scheduler.timesteps)):
-        print(f"i = {i} t = {t}")
-        # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes.
-        latent_model_input = torch.cat([latents] * 2)
-        sigma = scheduler.sigmas[i]
-        latent_model_input = latent_model_input / ((sigma**2 + 1) ** 0.5)
-
-        # predict the noise residual
-
-        # with torch.no_grad():
-        # noise_pred = unet(latent_model_input, t, encoder_hidden_states=text_embeddings)
-
-        latent_model_input_numpy = latent_model_input.detach().numpy()
-        text_embeddings_numpy = text_embeddings.detach().numpy()
-
-        noise_pred = shark_unet.forward(
-            (
-                latent_model_input_numpy,
-                np.array([t]).astype(np.float32),
-                text_embeddings_numpy,
-            )
-        )
-        noise_pred = torch.from_numpy(noise_pred)
-
-        # perform guidance
-        noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-        noise_pred = noise_pred_uncond + guidance_scale * (
-            noise_pred_text - noise_pred_uncond
-        )
-
-        # compute the previous noisy sample x_t -> x_t-1
-        latents = scheduler.step(noise_pred, i, latents)["prev_sample"]
-
-    # print("Latents shape : ", latents.shape)
-
-    # scale and decode the image latents with vae
-    latents = 1 / 0.18215 * latents
-    latents_numpy = latents.detach().numpy()
-    image = shark_vae.forward((latents_numpy,))
-    image = torch.from_numpy(image)
-
-    image = (image / 2 + 0.5).clamp(0, 1)
-    image = image.detach().cpu().permute(0, 2, 3, 1).numpy()
-    images = (image * 255).round().astype("uint8")
-    pil_images = [Image.fromarray(image) for image in images]
-    pil_images[0].save("astro.jpg")
--- a/shark/examples/shark_inference/stable_diff_f16.py
+++ b/shark/examples/shark_inference/stable_diff_f16.py
@@ -1,277 +0,0 @@
-from transformers import CLIPTextModel, CLIPTokenizer
-from diffusers import AutoencoderKL, UNet2DConditionModel, PNDMScheduler
-import torch
-from PIL import Image
-from diffusers import LMSDiscreteScheduler
-from tqdm.auto import tqdm
-from shark.shark_inference import SharkInference
-from torch.fx.experimental.proxy_tensor import make_fx
-from torch._decomp import get_decompositions
-import torch_mlir
-import tempfile
-import numpy as np
-
-# pip install diffusers
-# pip install scipy
-
-############### Parsing args #####################
-import argparse
-
-p = argparse.ArgumentParser(
-    description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
-)
-
-p.add_argument(
-    "--prompt",
-    type=str,
-    default="a photograph of an astronaut riding a horse",
-    help="the text prompt to use",
-)
-p.add_argument("--device", type=str, default="cpu", help="the device to use")
-p.add_argument("--steps", type=int, default=50, help="the device to use")
-p.add_argument("--mlir_loc", type=str, default=None, help="the device to use")
-p.add_argument("--vae_loc", type=str, default=None, help="the device to use")
-args = p.parse_args()
-
-#####################################################
-
-
-def fp16_unet():
-    from shark.shark_downloader import download_model
-
-    mlir_model, func_name, inputs, golden_out = download_model(
-        "stable_diff_f16_18_OCT",
-        tank_url="gs://shark_tank/prashant_nod",
-        frontend="torch",
-    )
-    shark_module = SharkInference(
-        mlir_model, func_name, device=args.device, mlir_dialect="linalg"
-    )
-    shark_module.compile()
-    return shark_module
-
-
-def load_mlir(mlir_loc):
-    import os
-
-    if mlir_loc == None:
-        return None
-    print(f"Trying to load the model from {mlir_loc}.")
-    with open(os.path.join(mlir_loc)) as f:
-        mlir_module = f.read()
-    return mlir_module
-
-
-def compile_through_fx(model, inputs, mlir_loc=None):
-    module = load_mlir(mlir_loc)
-    if mlir_loc == None:
-        fx_g = make_fx(
-            model,
-            decomposition_table=get_decompositions(
-                [
-                    torch.ops.aten.embedding_dense_backward,
-                    torch.ops.aten.native_layer_norm_backward,
-                    torch.ops.aten.slice_backward,
-                    torch.ops.aten.select_backward,
-                    torch.ops.aten.norm.ScalarOpt_dim,
-                    torch.ops.aten.native_group_norm,
-                    torch.ops.aten.upsample_bilinear2d.vec,
-                    torch.ops.aten.split.Tensor,
-                    torch.ops.aten.split_with_sizes,
-                ]
-            ),
-        )(*inputs)
-
-        fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
-        fx_g.recompile()
-
-        def strip_overloads(gm):
-            """
-            Modifies the target of graph nodes in :attr:`gm` to strip overloads.
-            Args:
-                gm(fx.GraphModule): The input Fx graph module to be modified
-            """
-            for node in gm.graph.nodes:
-                if isinstance(node.target, torch._ops.OpOverload):
-                    node.target = node.target.overloadpacket
-            gm.recompile()
-
-        strip_overloads(fx_g)
-
-        ts_g = torch.jit.script(fx_g)
-
-        module = torch_mlir.compile(
-            ts_g,
-            inputs,
-            torch_mlir.OutputType.LINALG_ON_TENSORS,
-            use_tracing=False,
-            verbose=False,
-        )
-
-    mlir_model = module
-    func_name = "forward"
-
-    shark_module = SharkInference(
-        mlir_model, func_name, device=args.device, mlir_dialect="linalg"
-    )
-    shark_module.compile()
-
-    return shark_module
-
-
-if __name__ == "__main__":
-    YOUR_TOKEN = "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"
-
-    # 1. Load the autoencoder model which will be used to decode the latents into image space.
-    vae = AutoencoderKL.from_pretrained(
-        "CompVis/stable-diffusion-v1-4",
-        subfolder="vae",
-        use_auth_token=YOUR_TOKEN,
-    )
-
-    # 2. Load the tokenizer and text encoder to tokenize and encode the text.
-    tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
-    text_encoder = CLIPTextModel.from_pretrained(
-        "openai/clip-vit-large-patch14"
-    )
-
-    class VaeModel(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.vae = AutoencoderKL.from_pretrained(
-                "CompVis/stable-diffusion-v1-4",
-                subfolder="vae",
-                use_auth_token=YOUR_TOKEN,
-            )
-
-        def forward(self, input):
-            return self.vae.decode(input, return_dict=False)[0]
-
-    vae = VaeModel()
-    vae_input = torch.rand(1, 4, 64, 64)
-    shark_vae = compile_through_fx(vae, (vae_input,), args.vae_loc)
-
-    # Wrap the unet model to return tuples.
-    class UnetModel(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.unet = UNet2DConditionModel.from_pretrained(
-                "CompVis/stable-diffusion-v1-4",
-                subfolder="unet",
-                use_auth_token=YOUR_TOKEN,
-            )
-            self.in_channels = self.unet.in_channels
-            self.train(False)
-
-    def forward(self, x, y, z):
-        return self.unet.forward(x, y, z, return_dict=False)[0]
-
-    # # 3. The UNet model for generating the latents.
-    unet = UnetModel()
-
-    shark_unet = fp16_unet()
-
-    scheduler = LMSDiscreteScheduler(
-        beta_start=0.00085,
-        beta_end=0.012,
-        beta_schedule="scaled_linear",
-        num_train_timesteps=1000,
-    )
-
-    prompt = [args.prompt]
-
-    height = 512  # default height of Stable Diffusion
-    width = 512  # default width of Stable Diffusion
-
-    num_inference_steps = args.steps  # Number of denoising steps
-
-    guidance_scale = 7.5  # Scale for classifier-free guidance
-
-    generator = torch.manual_seed(
-        42
-    )  # Seed generator to create the inital latent noise
-
-    batch_size = len(prompt)
-
-    text_input = tokenizer(
-        prompt,
-        padding="max_length",
-        max_length=tokenizer.model_max_length,
-        truncation=True,
-        return_tensors="pt",
-    )
-
-    text_embeddings = text_encoder(text_input.input_ids)[0]
-
-    max_length = text_input.input_ids.shape[-1]
-    uncond_input = tokenizer(
-        [""] * batch_size,
-        padding="max_length",
-        max_length=max_length,
-        return_tensors="pt",
-    )
-    uncond_embeddings = text_encoder(uncond_input.input_ids)[0]
-
-    text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
-
-    latents = torch.randn(
-        (batch_size, unet.in_channels, height // 8, width // 8),
-        generator=generator,
-    )
-    # latents = latents.to(torch_device)
-
-    scheduler.set_timesteps(num_inference_steps)
-
-    latents = latents * scheduler.sigmas[0]
-    # print(latents, latents.shape)
-
-    for i, t in tqdm(enumerate(scheduler.timesteps)):
-        print(f"i = {i} t = {t}")
-        # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes.
-        latent_model_input = torch.cat([latents] * 2)
-        sigma = scheduler.sigmas[i]
-        latent_model_input = latent_model_input / ((sigma**2 + 1) ** 0.5)
-
-        # predict the noise residual
-
-        # with torch.no_grad():
-        # noise_pred = unet(latent_model_input, t, encoder_hidden_states=text_embeddings)
-
-        latent_model_input_numpy = (
-            latent_model_input.detach().numpy().astype(np.half)
-        )
-        text_embeddings_numpy = (
-            text_embeddings.detach().numpy().astype(np.half)
-        )
-
-        noise_pred = shark_unet.forward(
-            (
-                latent_model_input_numpy,
-                np.array([t]).astype(np.half),
-                text_embeddings_numpy,
-            )
-        )
-        noise_pred = torch.from_numpy(noise_pred).to(torch.float32)
-
-        # perform guidance
-        noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-        noise_pred = noise_pred_uncond + guidance_scale * (
-            noise_pred_text - noise_pred_uncond
-        )
-
-        # compute the previous noisy sample x_t -> x_t-1
-        latents = scheduler.step(noise_pred, i, latents)["prev_sample"]
-
-    # print("Latents shape : ", latents.shape)
-
-    # scale and decode the image latents with vae
-    latents = 1 / 0.18215 * latents
-    latents_numpy = latents.detach().numpy()
-    image = shark_vae.forward((latents_numpy,))
-    image = torch.from_numpy(image)
-
-    image = (image / 2 + 0.5).clamp(0, 1)
-    image = image.detach().cpu().permute(0, 2, 3, 1).numpy()
-    images = (image * 255).round().astype("uint8")
-    pil_images = [Image.fromarray(image) for image in images]
-    pil_images[0].save("astro.jpg")
--- a/shark/examples/shark_inference/stable_diff_tf.py
+++ b/shark/examples/shark_inference/stable_diff_tf.py
@@ -1,313 +0,0 @@
-import math
-import numpy as np
-import tensorflow as tf
-from tensorflow import keras
-from keras_cv.models.generative.stable_diffusion.clip_tokenizer import (
-    SimpleTokenizer,
-)
-from keras_cv.models.generative.stable_diffusion.constants import (
-    _ALPHAS_CUMPROD,
-)
-from keras_cv.models.generative.stable_diffusion.constants import (
-    _UNCONDITIONAL_TOKENS,
-)
-from keras_cv.models.generative.stable_diffusion.decoder import Decoder
-from keras_cv.models.generative.stable_diffusion.text_encoder import (
-    TextEncoder,
-)
-
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_model
-from PIL import Image
-
-# pip install "git+https://github.com/keras-team/keras-cv.git"
-# pip install tensorflow_dataset
-
-############### Parsing args #####################
-import argparse
-
-p = argparse.ArgumentParser(
-    description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
-)
-
-p.add_argument(
-    "--prompt",
-    type=str,
-    default="a photograph of an astronaut riding a horse",
-    help="the text prompt to use",
-)
-p.add_argument("--device", type=str, default="cpu", help="the device to use")
-p.add_argument(
-    "--steps", type=int, default=10, help="the number of steps to use"
-)
-p.add_argument(
-    "--save_path",
-    type=str,
-    default=None,
-    help="the file to save the resulting image to. (default to <input prompt>.jpg)",
-)
-args = p.parse_args()
-
-#####################################################
-
-MAX_PROMPT_LENGTH = 77
-
-
-class SharkStableDiffusion:
-    """Shark implementation of Stable Diffusion based on model from keras_cv.
-    Stable Diffusion is a powerful image generation model that can be used,
-    among other things, to generate pictures according to a short text description
-    (called a "prompt").
-    Arguments:
-        device: Device to use with SHARK. Default: cpu
-        jit_compile: Whether to compile the underlying models to XLA.
-            This can lead to a significant speedup on some systems. Default: False.
-    References:
-    - [About Stable Diffusion](https://stability.ai/blog/stable-diffusion-announcement)
-    - [Original implementation](https://github.com/CompVis/stable-diffusion)
-    """
-
-    def __init__(self, device="cpu", jit_compile=True):
-        self.img_height = 512
-        self.img_width = 512
-        self.tokenizer = SimpleTokenizer()
-
-        # Create models
-        self.text_encoder = TextEncoder(MAX_PROMPT_LENGTH)
-
-        mlir_model, func_name, inputs, golden_out = download_model(
-            "stable_diff", tank_url="gs://shark_tank/quinn", frontend="tf"
-        )
-        shark_module = SharkInference(
-            mlir_model, func_name, device=device, mlir_dialect="mhlo"
-        )
-        shark_module.compile()
-        self.diffusion_model = shark_module
-        self.decoder = Decoder(self.img_height, self.img_width)
-        if jit_compile:
-            self.text_encoder.compile(jit_compile=True)
-            self.decoder.compile(jit_compile=True)
-
-        print(
-            "By using this model checkpoint, you acknowledge that its usage is "
-            "subject to the terms of the CreativeML Open RAIL-M license at "
-            "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/LICENSE"
-        )
-        # Load weights
-        text_encoder_weights_fpath = keras.utils.get_file(
-            origin="https://huggingface.co/fchollet/stable-diffusion/resolve/main/kcv_encoder.h5",
-            file_hash="4789e63e07c0e54d6a34a29b45ce81ece27060c499a709d556c7755b42bb0dc4",
-        )
-        decoder_weights_fpath = keras.utils.get_file(
-            origin="https://huggingface.co/fchollet/stable-diffusion/resolve/main/kcv_decoder.h5",
-            file_hash="ad350a65cc8bc4a80c8103367e039a3329b4231c2469a1093869a345f55b1962",
-        )
-        self.text_encoder.load_weights(text_encoder_weights_fpath)
-        self.decoder.load_weights(decoder_weights_fpath)
-
-    def text_to_image(
-        self,
-        prompt,
-        batch_size=1,
-        num_steps=25,
-        unconditional_guidance_scale=7.5,
-        seed=None,
-    ):
-        encoded_text = self.encode_text(prompt)
-
-        return self.generate_image(
-            encoded_text,
-            batch_size=batch_size,
-            num_steps=num_steps,
-            unconditional_guidance_scale=unconditional_guidance_scale,
-            seed=seed,
-        )
-
-    def encode_text(self, prompt):
-        """Encodes a prompt into a latent text encoding.
-        The encoding produced by this method should be used as the
-        `encoded_text` parameter of `StableDiffusion.generate_image`. Encoding
-        text separately from generating an image can be used to arbitrarily
-        modify the text encoding priot to image generation, e.g. for walking
-        between two prompts.
-        Args:
-            prompt: a string to encode, must be 77 tokens or shorter.
-        Example:
-        ```python
-        from keras_cv.models import StableDiffusion
-        model = StableDiffusion(img_height=512, img_width=512, jit_compile=True)
-        encoded_text  = model.encode_text("Tacos at dawn")
-        img = model.generate_image(encoded_text)
-        ```
-        """
-        # Tokenize prompt (i.e. starting context)
-        inputs = self.tokenizer.encode(prompt)
-        if len(inputs) > MAX_PROMPT_LENGTH:
-            raise ValueError(
-                f"Prompt is too long (should be <= {MAX_PROMPT_LENGTH} tokens)"
-            )
-        phrase = inputs + [49407] * (MAX_PROMPT_LENGTH - len(inputs))
-        phrase = tf.convert_to_tensor([phrase], dtype=tf.int32)
-
-        context = self.text_encoder.predict_on_batch(
-            [phrase, self._get_pos_ids()]
-        )
-
-        return context
-
-    def generate_image(
-        self,
-        encoded_text,
-        batch_size=1,
-        num_steps=25,
-        unconditional_guidance_scale=7.5,
-        diffusion_noise=None,
-        seed=None,
-    ):
-        """Generates an image based on encoded text.
-        The encoding passed to this method should be derived from
-        `StableDiffusion.encode_text`.
-        Args:
-            encoded_text: Tensor of shape (`batch_size`, 77, 768), or a Tensor
-            of shape (77, 768). When the batch axis is omitted, the same encoded
-            text will be used to produce every generated image.
-            batch_size: number of images to generate. Default: 1.
-            num_steps: number of diffusion steps (controls image quality).
-                Default: 25.
-            unconditional_guidance_scale: float controling how closely the image
-                should adhere to the prompt. Larger values result in more
-                closely adhering to the prompt, but will make the image noisier.
-                Default: 7.5.
-            diffusion_noise: Tensor of shape (`batch_size`, img_height // 8,
-                img_width // 8, 4), or a Tensor of shape (img_height // 8,
-                img_width // 8, 4). Optional custom noise to seed the diffusion
-                process. When the batch axis is omitted, the same noise will be
-                used to seed diffusion for every generated image.
-            seed: integer which is used to seed the random generation of
-                diffusion noise, only to be specified if `diffusion_noise` is
-                None.
-        Example:
-        ```python
-        from keras_cv.models import StableDiffusion
-        batch_size = 8
-        model = StableDiffusion(img_height=512, img_width=512, jit_compile=True)
-        e_tacos = model.encode_text("Tacos at dawn")
-        e_watermelons = model.encode_text("Watermelons at dusk")
-        e_interpolated = tf.linspace(e_tacos, e_watermelons, batch_size)
-        images = model.generate_image(e_interpolated, batch_size=batch_size)
-        ```
-        """
-        if diffusion_noise is not None and seed is not None:
-            raise ValueError(
-                "`diffusion_noise` and `seed` should not both be passed to "
-                "`generate_image`. `seed` is only used to generate diffusion "
-                "noise when it's not already user-specified."
-            )
-
-        encoded_text = tf.squeeze(encoded_text)
-        if encoded_text.shape.rank == 2:
-            encoded_text = tf.repeat(
-                tf.expand_dims(encoded_text, axis=0), batch_size, axis=0
-            )
-
-        context = encoded_text
-        unconditional_context = tf.repeat(
-            self._get_unconditional_context(), batch_size, axis=0
-        )
-        context = tf.concat([context, unconditional_context], 0)
-
-        if diffusion_noise is not None:
-            diffusion_noise = tf.squeeze(diffusion_noise)
-            if diffusion_noise.shape.rank == 3:
-                diffusion_noise = tf.repeat(
-                    tf.expand_dims(diffusion_noise, axis=0), batch_size, axis=0
-                )
-            latent = diffusion_noise
-        else:
-            latent = self._get_initial_diffusion_noise(batch_size, seed)
-
-        # Iterative reverse diffusion stage
-        timesteps = tf.range(1, 1000, 1000 // num_steps)
-        alphas, alphas_prev = self._get_initial_alphas(timesteps)
-        progbar = keras.utils.Progbar(len(timesteps))
-        iteration = 0
-        for index, timestep in list(enumerate(timesteps))[::-1]:
-            latent_prev = latent  # Set aside the previous latent vector
-            t_emb = self._get_timestep_embedding(timestep, batch_size)
-
-            # Prepare the latent and unconditional latent to be run with a single forward call
-            latent = tf.concat([latent, latent], 0)
-            t_emb = tf.concat([t_emb, t_emb], 0)
-            latent_numpy = self.diffusion_model.forward(
-                [latent.numpy(), t_emb.numpy(), context.numpy()]
-            )
-            latent = tf.convert_to_tensor(latent_numpy, dtype=tf.float32)
-            latent, unconditional_latent = tf.split(latent, 2)
-
-            latent = unconditional_latent + unconditional_guidance_scale * (
-                latent - unconditional_latent
-            )
-            a_t, a_prev = alphas[index], alphas_prev[index]
-            pred_x0 = (latent_prev - math.sqrt(1 - a_t) * latent) / math.sqrt(
-                a_t
-            )
-            latent = (
-                latent * math.sqrt(1.0 - a_prev) + math.sqrt(a_prev) * pred_x0
-            )
-            iteration += 1
-            progbar.update(iteration)
-
-        # Decoding stage
-        decoded = self.decoder.predict_on_batch(latent)
-        decoded = ((decoded + 1) / 2) * 255
-        return np.clip(decoded, 0, 255).astype("uint8")
-
-    def _get_unconditional_context(self):
-        unconditional_tokens = tf.convert_to_tensor(
-            [_UNCONDITIONAL_TOKENS], dtype=tf.int32
-        )
-        unconditional_context = self.text_encoder.predict_on_batch(
-            [unconditional_tokens, self._get_pos_ids()]
-        )
-
-        return unconditional_context
-
-    def _get_timestep_embedding(
-        self, timestep, batch_size, dim=320, max_period=10000
-    ):
-        half = dim // 2
-        freqs = tf.math.exp(
-            -math.log(max_period) * tf.range(0, half, dtype=tf.float32) / half
-        )
-        args = tf.convert_to_tensor([timestep], dtype=tf.float32) * freqs
-        embedding = tf.concat([tf.math.cos(args), tf.math.sin(args)], 0)
-        embedding = tf.reshape(embedding, [1, -1])
-        return tf.repeat(embedding, batch_size, axis=0)
-
-    def _get_initial_alphas(self, timesteps):
-        alphas = [_ALPHAS_CUMPROD[t] for t in timesteps]
-        alphas_prev = [1.0] + alphas[:-1]
-
-        return alphas, alphas_prev
-
-    def _get_initial_diffusion_noise(self, batch_size, seed):
-        return tf.random.normal(
-            (batch_size, self.img_height // 8, self.img_width // 8, 4),
-            seed=seed,
-        )
-
-    @staticmethod
-    def _get_pos_ids():
-        return tf.convert_to_tensor(
-            [list(range(MAX_PROMPT_LENGTH))], dtype=tf.int32
-        )
-
-
-if __name__ == "__main__":
-    SD = SharkStableDiffusion(device=args.device)
-    images = SD.text_to_image(args.prompt, num_steps=args.steps)
-    pil_images = [Image.fromarray(image) for image in images]
-    save_fname = args.prompt + ".jpg"
-    if args.save_path is not None:
-        save_fname = args.save_path
-    pil_images[0].save(save_fname)
--- a/shark/examples/shark_inference/stable_diffusion/.gitignore
+++ b/shark/examples/shark_inference/stable_diffusion/.gitignore
@@ -1,2 +0,0 @@
-*.vmfb
-*.jpg
--- a/shark/examples/shark_inference/stable_diffusion/README.md
+++ b/shark/examples/shark_inference/stable_diffusion/README.md
@@ -1,106 +0,0 @@
-# STABLE DIFFUSION
-
-## Installation
-
-Follow setup instructions in the main [README.md](https://github.com/nod-ai/SHARK#readme) for regular usage. 
-
- 
-## Using other supported Stable Diffusion variants with SHARK:
-
-Currently we support fine-tuned versions of Stable Diffusion such as:
- [AnythingV3](https://huggingface.co/Linaqruf/anything-v3.0)
- [Analog Diffusion](https://huggingface.co/wavymulder/Analog-Diffusion)
-
-use the flag `--hf_model_id=` to specify the repo-id of the model to be used.
-
-```shell
-python .\shark\examples\shark_inference\stable_diffusion\main.py --hf_model_id="Linaqruf/anything-v3.0" --max_length=77 --prompt="1girl, brown hair, green eyes, colorful, autumn, cumulonimbus clouds, lighting, blue sky, falling leaves, garden" --no-use_tuned
-```
-
-## Run a custom model using a `.ckpt` / `.safetensors` checkpoint file:
-* Ensure you don't have any `.yaml` file at the root directory of SHARK - best would be to ensure you're on the latest `main` branch and use `--clear_all` the first time you're running the command for inference.
-* Install `pytorch_lightning` by running :-
-```shell
-pip install pytorch_lightning
-```
-NOTE: This is needed to process [ckpt file of runwayml/stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned.ckpt).
-* Download a [.ckpt](https://huggingface.co/andite/anything-v4.0/resolve/main/anything-v4.0-pruned-fp32.ckpt) file in case you don't have a locally generated `.ckpt` file for StableDiffusion.
-
-* Now pass the above `.ckpt` file to `ckpt_loc` command-line argument using the following :-
-```shell
-python3.10 main.py --precision=fp16 --device=vulkan --prompt="tajmahal, oil on canvas, sunflowers, 4k, uhd" --max_length=64 --import_mlir --ckpt_loc="/path/to/.ckpt/file" --no-use_tuned
-```
-* We use a combination of 2 flags to make this feature work : `import_mlir` and `ckpt_loc`.
-* In case `ckpt_loc` is NOT specified then a [default](https://huggingface.co/stabilityai/stable-diffusion-2-1-base) HuggingFace repo-id is run via `hf_model_id`. So, two ways to use `import_mlir` :-
- With `hf_model_id` to run HuggingFace's StableDiffusion variants.
- With `ckpt_loc` to run a StableDiffusion variant with a `.ckpt` or `.safetensors` checkpoint file
-
-* Use custom model `.ckpt` files from [HuggingFace-StableDiffusion](https://huggingface.co/models?other=stable-diffusion) to generate images.
-* You may also try out [.safetensors file of Protogen x3.4 of civitai.com](https://civitai.com/models/3666/protogen-x34-photorealism-official-release) and provide the `.safetensors` path to `ckpt_loc` flag.
-* NOTE: Ensure that the `.ckpt` or `.safetensors` file are part of the path passed to `ckpt_loc` flag. Eg: `--ckpt_loc="/path/to/checkpoint/file/name_of_checkpoint.ckpt` OR `--ckpt_loc="/path/to/checkpoint/file/name_of_checkpoint.safetensors`. Also ensure that you're using `--no-use_tuned` flag in your run command.
-
-
-## Running the model for a `batch_size` and for a set of `runs`:
-We currently support batch size in the range `[1, 3]`.
-You can specify batch size using `batch_size` flag (defaults to `1`) and the number of times you want to run the model using `runs` flag (defaults to `1`).
-In total, you'll be able to generate `batch_size * runs` number of images.
- Usage 1: Using the same prompt -
-```shell
-python3.10 main.py --precision=fp16 --device=vulkan --prompt="tajmahal, oil on canvas, sunflowers, 4k, uhd" --max_length=64 --import_mlir --hf_model_id="runwayml/stable-diffusion-v1-5" --batch_size=3 --no-use_tuned
-```
-The example above generates `3` different images in total with the same prompt `tajmahal, oil on canvas, sunflowers, 4k, uhd`.
- Usage 2: Using different prompts -
-```shell
-python3.10 main.py --precision=fp16 --device=vulkan --prompt="tajmahal, oil on canvas, sunflowers, 4k, uhd" --max_length=64 --import_mlir --hf_model_id="runwayml/stable-diffusion-v1-5" --batch_size=3 -p="batman riding a horse, oil on canvas, 4k, uhd" -p="superman riding a horse, oil on canvas, 4k, uhd" --no-use_tuned
-```
-The example above generates `1` image for each different prompt, thus generating `3` images in total.
- Usage 3: Using `runs` -
-```shell
-python3.10 main.py --precision=fp16 --device=vulkan --prompt="tajmahal, oil on canvas, sunflowers, 4k, uhd" --max_length=64 --import_mlir --hf_model_id="runwayml/stable-diffusion-v1-5" --batch_size=2 --runs=3 --no-use_tuned
-```
-The example above generates `6` different images in total, `2` images for each `runs`.
-
-</details>
-  <details>
-  <summary>Debug Commands</summary>
-
-## Debug commands and other advanced usage follows.
-
-```shell
-python main.py --precision="fp32"|"fp16" --device="cpu"|"cuda"|"vulkan" --import_mlir|--no-import_mlir --prompt "enter the text" 
-
-```
-
-## dump all dispatch .spv and isa using amdllpc
-
-```shell
-python main.py --precision="fp16" --device="vulkan" --iree-vulkan-target-triple=rdna3-unknown-linux --no-load_vmfb --dispatch_benchmarks="all" --dispatch_benchmarks_dir="SD_dispatches" --dump_isa
-```
-
-## Compile and save the .vmfb (using vulkan fp16 as an example):
-
-```shell
-python shark/examples/shark_inference/stable_diffusion/main.py --precision=fp16 --device=vulkan --steps=50 --save_vmfb
-```
-
-## Capture an RGP trace
-
-```shell
-python shark/examples/shark_inference/stable_diffusion/main.py --precision=fp16 --device=vulkan --steps=50 --save_vmfb --enable_rgp
-```
-
-## Run the vae module with iree-benchmark-module (NCHW, fp16, vulkan, for example):
-
-```shell
-iree-benchmark-module --module_file=/path/to/output/vmfb --entry_function=forward --device=vulkan --function_input=1x4x64x64xf16  
-```
-
-## Run the unet module with iree-benchmark-module (same config as above):
-```shell
-##if you want to use .npz inputs:
-unzip ~/.local/shark_tank/<your unet>/inputs.npz
-
-iree-benchmark-module --module_file=/path/to/output/vmfb --entry_function=forward --function_input=@arr_0.npy --function_input=1xf16 --function_input=@arr_2.npy --function_input=@arr_3.npy --function_input=@arr_4.npy  
-```
-
-</details>
--- a/shark/examples/shark_inference/stable_diffusion/download_hf_models.py
+++ b/shark/examples/shark_inference/stable_diffusion/download_hf_models.py
@@ -1,25 +0,0 @@
-from PIL import Image
-import requests
-
-from transformers import CLIPProcessor, CLIPModel
-
-model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
-processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
-
-url = "http://images.cocodataset.org/val2017/000000039769.jpg"
-image = Image.open(requests.get(url, stream=True).raw)
-
-inputs = processor(
-    text=["a photo of a cat", "a photo of a dog"],
-    images=image,
-    return_tensors="pt",
-    padding=True,
-)
-
-outputs = model(**inputs)
-logits_per_image = (
-    outputs.logits_per_image
-)  # this is the image-text similarity score
-probs = logits_per_image.softmax(
-    dim=1
-)  # we can take the softmax to get the label probabilities
--- a/shark/examples/shark_inference/stable_diffusion/main.py
+++ b/shark/examples/shark_inference/stable_diffusion/main.py
@@ -1,355 +0,0 @@
-import os
-import sys
-
-if "AMD_ENABLE_LLPC" not in os.environ:
-    os.environ["AMD_ENABLE_LLPC"] = "1"
-
-if sys.platform == "darwin":
-    os.environ["DYLD_LIBRARY_PATH"] = "/usr/local/lib"
-
-from transformers import CLIPTextModel, CLIPTokenizer
-import torch
-from PIL import Image, PngImagePlugin
-from diffusers import (
-    LMSDiscreteScheduler,
-    PNDMScheduler,
-    DDIMScheduler,
-    DPMSolverMultistepScheduler,
-    EulerDiscreteScheduler,
-)
-from tqdm.auto import tqdm
-import numpy as np
-from random import randint
-from stable_args import args
-from datetime import datetime as dt
-import json
-import re
-from pathlib import Path
-from model_wrappers import SharkifyStableDiffusionModel
-
-# This has to come before importing cache objects
-if args.clear_all:
-    print("CLEARING ALL, EXPECT SEVERAL MINUTES TO RECOMPILE")
-    from glob import glob
-    import shutil
-
-    vmfbs = glob(os.path.join(os.getcwd(), "*.vmfb"))
-    for vmfb in vmfbs:
-        if os.path.exists(vmfb):
-            os.remove(vmfb)
-    # Temporary workaround of deleting yaml files to incorporate diffusers' pipeline.
-    # TODO: Remove this once we have better weight updation logic.
-    inference_yaml = ["v2-inference-v.yaml", "v1-inference.yaml"]
-    for yaml in inference_yaml:
-        if os.path.exists(yaml):
-            os.remove(yaml)
-    home = os.path.expanduser("~")
-    if os.name == "nt":  # Windows
-        appdata = os.getenv("LOCALAPPDATA")
-        shutil.rmtree(os.path.join(appdata, "AMD/VkCache"), ignore_errors=True)
-        shutil.rmtree(os.path.join(home, "shark_tank"), ignore_errors=True)
-    elif os.name == "unix":
-        shutil.rmtree(os.path.join(home, ".cache/AMD/VkCache"))
-        shutil.rmtree(os.path.join(home, ".local/shark_tank"))
-
-
-from utils import set_init_device_flags, disk_space_check, preprocessCKPT
-
-from schedulers import (
-    SharkEulerDiscreteScheduler,
-)
-import time
-from shark.iree_utils.compile_utils import dump_isas
-
-
-# Helper function to profile the vulkan device.
-def start_profiling(file_path="foo.rdc", profiling_mode="queue"):
-    if args.vulkan_debug_utils and "vulkan" in args.device:
-        import iree
-
-        print(f"Profiling and saving to {file_path}.")
-        vulkan_device = iree.runtime.get_device(args.device)
-        vulkan_device.begin_profiling(mode=profiling_mode, file_path=file_path)
-        return vulkan_device
-    return None
-
-
-def end_profiling(device):
-    if device:
-        return device.end_profiling()
-
-
-if __name__ == "__main__":
-    dtype = torch.float32 if args.precision == "fp32" else torch.half
-
-    # Make it as default prompt
-    if len(args.prompts) == 0:
-        args.prompts = ["cyberpunk forest by Salvador Dali"]
-
-    prompt = args.prompts
-    neg_prompt = args.negative_prompts
-    height = args.height
-    width = args.width
-    num_inference_steps = args.steps  # Number of denoising steps
-
-    # Scale for classifier-free guidance
-    guidance_scale = torch.tensor(args.guidance_scale).to(torch.float32)
-
-    batch_size = args.batch_size
-    prompt = prompt * batch_size if len(prompt) == 1 else prompt
-    len_of_prompt = len(prompt)
-    assert (
-        len_of_prompt == batch_size
-    ), f"no. of prompts ({len_of_prompt}) is not equal to batch_size ({batch_size})"
-    print("Running StableDiffusion with the following config :-")
-    print(f"Batch size : {batch_size}")
-    print(f"Prompts : {prompt}")
-    print(f"Runs : {args.runs}")
-
-    # Try to make neg_prompt equal to batch_size by appending blank strings.
-    for i in range(batch_size - len(neg_prompt)):
-        neg_prompt.append("")
-
-    set_init_device_flags()
-    disk_space_check(Path.cwd())
-
-    if not args.import_mlir:
-        from opt_params import get_unet, get_vae, get_clip
-
-        clip = get_clip()
-        unet = get_unet()
-        vae = get_vae()
-    else:
-        if args.ckpt_loc != "":
-            assert args.ckpt_loc.lower().endswith(
-                (".ckpt", ".safetensors")
-            ), "checkpoint files supported can be any of [.ckpt, .safetensors] type"
-            preprocessCKPT()
-        mlir_import = SharkifyStableDiffusionModel(
-            args.hf_model_id,
-            args.ckpt_loc,
-            args.precision,
-            max_len=args.max_length,
-            batch_size=batch_size,
-            height=height,
-            width=width,
-            use_base_vae=args.use_base_vae,
-            use_tuned=args.use_tuned,
-        )
-        clip, unet, vae = mlir_import()
-
-    if args.dump_isa:
-        dump_isas(args.dispatch_benchmarks_dir)
-
-    tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
-    scheduler = DPMSolverMultistepScheduler.from_pretrained(
-        "CompVis/stable-diffusion-v1-4",
-        subfolder="scheduler",
-    )
-    cpu_scheduling = True
-    if args.hf_model_id == "stabilityai/stable-diffusion-2-1":
-        tokenizer = CLIPTokenizer.from_pretrained(
-            "stabilityai/stable-diffusion-2-1", subfolder="tokenizer"
-        )
-
-        scheduler = DPMSolverMultistepScheduler.from_pretrained(
-            "stabilityai/stable-diffusion-2-1",
-            subfolder="scheduler",
-        )
-
-    if args.hf_model_id == "stabilityai/stable-diffusion-2-1-base":
-        tokenizer = CLIPTokenizer.from_pretrained(
-            "stabilityai/stable-diffusion-2-1-base", subfolder="tokenizer"
-        )
-
-        if args.use_compiled_scheduler:
-            scheduler = SharkEulerDiscreteScheduler.from_pretrained(
-                "stabilityai/stable-diffusion-2-1-base",
-                subfolder="scheduler",
-            )
-            scheduler.compile()
-            cpu_scheduling = False
-        else:
-            scheduler = EulerDiscreteScheduler.from_pretrained(
-                "stabilityai/stable-diffusion-2-1-base",
-                subfolder="scheduler",
-            )
-    for run in range(args.runs):
-        # Handle out of range seeds.
-        uint32_info = np.iinfo(np.uint32)
-        uint32_min, uint32_max = uint32_info.min, uint32_info.max
-        seed = args.seed
-        if run >= 1 or seed < uint32_min or seed >= uint32_max:
-            seed = randint(uint32_min, uint32_max)
-        generator = torch.manual_seed(
-            seed
-        )  # Seed generator to create the inital latent noise
-
-        # create a random initial latent.
-        latents = torch.randn(
-            (batch_size, 4, height // 8, width // 8),
-            generator=generator,
-            dtype=torch.float32,
-        ).to(dtype)
-        if run == 0:
-            # Warmup phase to improve performance.
-            if args.warmup_count >= 1:
-                vae_warmup_input = torch.clone(latents).detach().numpy()
-                clip_warmup_input = torch.randint(1, 2, (2, args.max_length))
-            for i in range(args.warmup_count):
-                vae("forward", (vae_warmup_input,))
-                clip("forward", (clip_warmup_input,))
-
-        start = time.time()
-        if run == 0:
-            text_input = tokenizer(
-                prompt,
-                padding="max_length",
-                max_length=args.max_length,
-                truncation=True,
-                return_tensors="pt",
-            )
-            max_length = text_input.input_ids.shape[-1]
-            uncond_input = tokenizer(
-                neg_prompt,
-                padding="max_length",
-                max_length=max_length,
-                truncation=True,
-                return_tensors="pt",
-            )
-            text_input = torch.cat(
-                [uncond_input.input_ids, text_input.input_ids]
-            )
-
-            clip_inf_start = time.time()
-            text_embeddings = clip("forward", (text_input,))
-            clip_inf_end = time.time()
-            text_embeddings = torch.from_numpy(text_embeddings).to(dtype)
-            text_embeddings_numpy = text_embeddings.detach().numpy()
-
-            scheduler.set_timesteps(num_inference_steps)
-            scheduler.is_scale_input_called = True
-
-        latents = latents * scheduler.init_noise_sigma
-
-        avg_ms = 0
-        for i, t in tqdm(
-            enumerate(scheduler.timesteps), disable=args.hide_steps
-        ):
-            step_start = time.time()
-            if not args.hide_steps:
-                print(f"i = {i} t = {t}", end="")
-            timestep = torch.tensor([t]).to(dtype).detach().numpy()
-            latent_model_input = scheduler.scale_model_input(latents, t)
-            if cpu_scheduling:
-                latent_model_input = latent_model_input.detach().numpy()
-
-            profile_device = start_profiling(file_path="unet.rdc")
-
-            noise_pred = unet(
-                "forward",
-                (
-                    latent_model_input,
-                    timestep,
-                    text_embeddings_numpy,
-                    guidance_scale,
-                ),
-                send_to_host=False,
-            )
-
-            end_profiling(profile_device)
-
-            if cpu_scheduling:
-                noise_pred = torch.from_numpy(noise_pred.to_host())
-                latents = scheduler.step(noise_pred, t, latents).prev_sample
-            else:
-                latents = scheduler.step(noise_pred, t, latents)
-            step_time = time.time() - step_start
-            avg_ms += step_time
-            step_ms = int((step_time) * 1000)
-            if not args.hide_steps:
-                print(f" ({step_ms}ms)")
-
-        # scale and decode the image latents with vae
-        if args.use_base_vae:
-            latents = 1 / 0.18215 * latents
-        latents_numpy = latents
-        if cpu_scheduling:
-            latents_numpy = latents.detach().numpy()
-        profile_device = start_profiling(file_path="vae.rdc")
-        vae_start = time.time()
-        images = vae("forward", (latents_numpy,))
-        vae_end = time.time()
-        end_profiling(profile_device)
-        if args.use_base_vae:
-            image = torch.from_numpy(images)
-            image = (image.detach().cpu() * 255.0).numpy()
-            images = image.round()
-        end_time = time.time()
-
-        avg_ms = 1000 * avg_ms / args.steps
-        clip_inf_time = (clip_inf_end - clip_inf_start) * 1000
-        vae_inf_time = (vae_end - vae_start) * 1000
-        total_time = end_time - start
-
-        print(f"\nStats for run {run}:")
-        print(f"Average step time: {avg_ms}ms/it")
-        print(f"Clip Inference time (ms) = {clip_inf_time:.3f}")
-        print(f"VAE Inference time (ms): {vae_inf_time:.3f}")
-        print(f"\nTotal image generation time: {total_time}sec")
-
-        images = torch.from_numpy(images).to(torch.uint8).permute(0, 2, 3, 1)
-        pil_images = [Image.fromarray(image) for image in images.numpy()]
-
-        if args.output_dir is not None:
-            output_path = Path(args.output_dir)
-            output_path.mkdir(parents=True, exist_ok=True)
-        else:
-            output_path = Path.cwd()
-        disk_space_check(output_path, lim=5)
-        for i in range(batch_size):
-            json_store = {
-                "prompt": prompt[i],
-                "negative prompt": args.negative_prompts[i],
-                "seed": seed,
-                "hf_model_id": args.hf_model_id,
-                "precision": args.precision,
-                "steps": args.steps,
-                "guidance_scale": args.guidance_scale,
-                "scheduler": args.scheduler,
-            }
-            prompt_slice = re.sub("[^a-zA-Z0-9]", "_", prompt[i][:15])
-            img_name = f"{prompt_slice}_{seed}_{run}_{i}_{dt.now().strftime('%y%m%d_%H%M%S')}"
-            if args.output_img_format == "jpg":
-                pil_images[i].save(
-                    output_path / f"{img_name}.jpg",
-                    quality=95,
-                    subsampling=0,
-                    optimize=True,
-                    progressive=True,
-                )
-            else:
-                pngInfo = PngImagePlugin.PngInfo()
-
-                if args.write_metadata_to_png:
-                    model_name = ""
-                    if args.ckpt_loc:
-                        model_name = Path(args.ckpt_loc).name
-                    else:
-                        model_name = json_store["hf_model_id"]
-                    pngInfo.add_text(
-                        "parameters",
-                        f"{json_store['prompt']}\nNegative prompt: {json_store['negative prompt']}\nSteps:{json_store['steps']}, Sampler: {json_store['scheduler']}, CFG scale: {json_store['guidance_scale']}, Seed: {json_store['seed']}, Size: {args.width}x{args.height}, Model: {model_name}",
-                    )
-
-                pil_images[i].save(
-                    output_path / f"{img_name}.png", "PNG", pnginfo=pngInfo
-                )
-                if args.output_img_format not in ["png", "jpg"]:
-                    print(
-                        f"[ERROR] Format {args.output_img_format} is not supported yet."
-                        "Image saved as png instead. Supported formats: png / jpg"
-                    )
-            if args.save_metadata_to_json:
-                with open(output_path / f"{img_name}.json", "w") as f:
-                    f.write(json.dumps(json_store, indent=4))
--- a/shark/examples/shark_inference/stable_diffusion/model_wrappers.py
+++ b/shark/examples/shark_inference/stable_diffusion/model_wrappers.py
@@ -1,284 +0,0 @@
-import sys
-import os
-
-sys.path.append(os.path.dirname(os.path.realpath(__file__)))
-from diffusers import AutoencoderKL, UNet2DConditionModel
-from transformers import CLIPTextModel
-from utils import compile_through_fx, get_opt_flags
-from resources import base_models
-from collections import defaultdict
-import torch
-
-
-# These shapes are parameter dependent.
-def replace_shape_str(shape, max_len, width, height, batch_size):
-    new_shape = []
-    for i in range(len(shape)):
-        if shape[i] == "max_len":
-            new_shape.append(max_len)
-        elif shape[i] == "height":
-            new_shape.append(height)
-        elif shape[i] == "width":
-            new_shape.append(width)
-        elif isinstance(shape[i], str):
-            if "batch_size" in shape[i]:
-                mul_val = int(shape[i].split("*")[0])
-                new_shape.append(batch_size * mul_val)
-        else:
-            new_shape.append(shape[i])
-    return new_shape
-
-
-# Get the input info for various models i.e. "unet", "clip", "vae".
-def get_input_info(model_info, max_len, width, height, batch_size):
-    dtype_config = {"f32": torch.float32, "i64": torch.int64}
-    input_map = defaultdict(list)
-    for k in model_info:
-        for inp in model_info[k]:
-            shape = model_info[k][inp]["shape"]
-            dtype = dtype_config[model_info[k][inp]["dtype"]]
-            tensor = None
-            if isinstance(shape, list):
-                clean_shape = replace_shape_str(
-                    shape, max_len, width, height, batch_size
-                )
-                if dtype == torch.int64:
-                    tensor = torch.randint(1, 3, tuple(clean_shape))
-                else:
-                    tensor = torch.randn(*clean_shape).to(dtype)
-            elif isinstance(shape, int):
-                tensor = torch.tensor(shape).to(dtype)
-            else:
-                sys.exit("shape isn't specified correctly.")
-            input_map[k].append(tensor)
-    return input_map
-
-
-class SharkifyStableDiffusionModel:
-    def __init__(
-        self,
-        model_id: str,
-        custom_weights: str,
-        precision: str,
-        max_len: int = 64,
-        width: int = 512,
-        height: int = 512,
-        batch_size: int = 1,
-        use_base_vae: bool = False,
-        use_tuned: bool = False,
-        debug: bool = False,
-        sharktank_dir: str = "",
-        generate_vmfb: bool = True,
-    ):
-        self.check_params(max_len, width, height)
-        self.max_len = max_len
-        self.height = height // 8
-        self.width = width // 8
-        self.batch_size = batch_size
-        self.model_id = model_id if custom_weights == "" else custom_weights
-        self.precision = precision
-        self.base_vae = use_base_vae
-        self.model_name = (
-            "_"
-            + str(batch_size)
-            + "_"
-            + str(max_len)
-            + "_"
-            + str(height)
-            + "_"
-            + str(width)
-            + "_"
-            + precision
-        )
-        self.use_tuned = use_tuned
-        self.debug = debug
-        self.sharktank_dir = sharktank_dir
-        self.generate_vmfb = generate_vmfb
-        # We need a better naming convention for the .vmfbs because despite
-        # using the custom model variant the .vmfb names remain the same and
-        # it'll always pick up the compiled .vmfb instead of compiling the
-        # custom model.
-        # So, currently, we add `self.model_id` in the `self.model_name` of
-        # .vmfb file.
-        # TODO: Have a better way of naming the vmfbs using self.model_name.
-        import re
-
-        model_name = re.sub(r"\W+", "_", self.model_id)
-        if model_name[0] == "_":
-            model_name = model_name[1:]
-        self.model_name = self.model_name + "_" + model_name
-
-    def check_params(self, max_len, width, height):
-        if not (max_len >= 32 and max_len <= 77):
-            sys.exit("please specify max_len in the range [32, 77].")
-        if not (width % 8 == 0 and width >= 384):
-            sys.exit("width should be greater than 384 and multiple of 8")
-        if not (height % 8 == 0 and height >= 384):
-            sys.exit("height should be greater than 384 and multiple of 8")
-
-    def get_vae(self):
-        class VaeModel(torch.nn.Module):
-            def __init__(self, model_id=self.model_id, base_vae=self.base_vae):
-                super().__init__()
-                self.vae = AutoencoderKL.from_pretrained(
-                    model_id,
-                    subfolder="vae",
-                )
-                self.base_vae = base_vae
-
-            def forward(self, input):
-                if not self.base_vae:
-                    input = 1 / 0.18215 * input
-                x = self.vae.decode(input, return_dict=False)[0]
-                x = (x / 2 + 0.5).clamp(0, 1)
-                if self.base_vae:
-                    return x
-                x = x * 255.0
-                return x.round()
-
-        vae = VaeModel()
-        inputs = tuple(self.inputs["vae"])
-        is_f16 = True if self.precision == "fp16" else False
-        vae_name = "base_vae" if self.base_vae else "vae"
-        vae_model_name = vae_name + self.model_name
-        if self.debug:
-            os.makedirs(
-                os.path.join(self.sharktank_dir, vae_model_name), exist_ok=True
-            )
-        shark_vae = compile_through_fx(
-            vae,
-            inputs,
-            is_f16=is_f16,
-            use_tuned=self.use_tuned,
-            model_name=vae_model_name,
-            extra_args=get_opt_flags("vae", precision=self.precision),
-            debug=self.debug,
-            generate_vmfb=self.generate_vmfb,
-        )
-        return shark_vae
-
-    def get_unet(self):
-        class UnetModel(torch.nn.Module):
-            def __init__(self, model_id=self.model_id):
-                super().__init__()
-                self.unet = UNet2DConditionModel.from_pretrained(
-                    model_id,
-                    subfolder="unet",
-                )
-                self.in_channels = self.unet.in_channels
-                self.train(False)
-
-            def forward(
-                self, latent, timestep, text_embedding, guidance_scale
-            ):
-                # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes.
-                latents = torch.cat([latent] * 2)
-                unet_out = self.unet.forward(
-                    latents, timestep, text_embedding, return_dict=False
-                )[0]
-                noise_pred_uncond, noise_pred_text = unet_out.chunk(2)
-                noise_pred = noise_pred_uncond + guidance_scale * (
-                    noise_pred_text - noise_pred_uncond
-                )
-                return noise_pred
-
-        unet = UnetModel()
-        is_f16 = True if self.precision == "fp16" else False
-        inputs = tuple(self.inputs["unet"])
-        input_mask = [True, True, True, False]
-        unet_model_name = "unet" + self.model_name
-        if self.debug:
-            os.makedirs(
-                os.path.join(self.sharktank_dir, unet_model_name),
-                exist_ok=True,
-            )
-        shark_unet = compile_through_fx(
-            unet,
-            inputs,
-            model_name=unet_model_name,
-            is_f16=is_f16,
-            f16_input_mask=input_mask,
-            use_tuned=self.use_tuned,
-            extra_args=get_opt_flags("unet", precision=self.precision),
-            debug=self.debug,
-            generate_vmfb=self.generate_vmfb,
-        )
-        return shark_unet
-
-    def get_clip(self):
-        class CLIPText(torch.nn.Module):
-            def __init__(self, model_id=self.model_id):
-                super().__init__()
-                self.text_encoder = CLIPTextModel.from_pretrained(
-                    model_id,
-                    subfolder="text_encoder",
-                )
-
-            def forward(self, input):
-                return self.text_encoder(input)[0]
-
-        clip_model = CLIPText()
-        clip_model_name = "clip" + self.model_name
-        if self.debug:
-            os.makedirs(
-                os.path.join(self.sharktank_dir, clip_model_name),
-                exist_ok=True,
-            )
-
-        shark_clip = compile_through_fx(
-            clip_model,
-            tuple(self.inputs["clip"]),
-            model_name=clip_model_name,
-            extra_args=get_opt_flags("clip", precision="fp32"),
-            debug=self.debug,
-            generate_vmfb=self.generate_vmfb,
-        )
-        return shark_clip
-
-    def __call__(self):
-        from utils import get_vmfb_path_name
-        from stable_args import args
-        import traceback, functools, operator, os
-
-        model_name = ["clip", "base_vae" if self.base_vae else "vae", "unet"]
-        vmfb_path = [
-            get_vmfb_path_name(model + self.model_name)[0]
-            for model in model_name
-        ]
-        for model_id in base_models:
-            self.inputs = get_input_info(
-                base_models[model_id],
-                self.max_len,
-                self.width,
-                self.height,
-                self.batch_size,
-            )
-            try:
-                compiled_unet = self.get_unet()
-                compiled_vae = self.get_vae()
-                compiled_clip = self.get_clip()
-            except Exception as e:
-                if args.enable_stack_trace:
-                    traceback.print_exc()
-                vmfb_present = [os.path.isfile(vmfb) for vmfb in vmfb_path]
-                all_vmfb_present = functools.reduce(
-                    operator.__and__, vmfb_present
-                )
-                # We need to delete vmfbs only if some of the models were compiled.
-                if not all_vmfb_present:
-                    for i in range(len(vmfb_path)):
-                        if vmfb_present[i]:
-                            os.remove(vmfb_path[i])
-                            print("Deleted: ", vmfb_path[i])
-                print("Retrying with a different base model configuration")
-                continue
-            # This is done just because in main.py we are basing the choice of tokenizer and scheduler
-            # on `args.hf_model_id`. Since now, we don't maintain 1:1 mapping of variants and the base
-            # model and rely on retrying method to find the input configuration, we should also update
-            # the knowledge of base model id accordingly into `args.hf_model_id`.
-            if args.ckpt_loc != "":
-                args.hf_model_id = model_id
-            return compiled_clip, compiled_unet, compiled_vae
-        sys.exit(
-            "Cannot compile the model. Please use `enable_stack_trace` and create an issue at https://github.com/nod-ai/SHARK/issues"
-        )
--- a/shark/examples/shark_inference/stable_diffusion/opt_params.py
+++ b/shark/examples/shark_inference/stable_diffusion/opt_params.py
@@ -1,112 +0,0 @@
-import sys
-import resources
-from stable_args import args
-from utils import get_shark_model
-
-models_db = (
-    resources.beta_models_db if args.beta_models else resources.models_db
-)
-BATCH_SIZE = len(args.prompts)
-if BATCH_SIZE != 1:
-    sys.exit("Only batch size 1 is supported.")
-
-hf_model_variant_map = {
-    "Linaqruf/anything-v3.0": ["anythingv3", "v2_1base"],
-    "dreamlike-art/dreamlike-diffusion-1.0": ["dreamlike", "v2_1base"],
-    "prompthero/openjourney": ["openjourney", "v2_1base"],
-    "wavymulder/Analog-Diffusion": ["analogdiffusion", "v2_1base"],
-    "stabilityai/stable-diffusion-2-1": ["stablediffusion", "v2_1"],
-    "stabilityai/stable-diffusion-2-1-base": ["stablediffusion", "v2_1base"],
-    "CompVis/stable-diffusion-v1-4": ["stablediffusion", "v1_4"],
-}
-
-variant, version = hf_model_variant_map[args.hf_model_id]
-
-
-def get_params(bucket_key, model_key, model, is_tuned, precision):
-    iree_flags = []
-    if len(args.iree_vulkan_target_triple) > 0:
-        iree_flags.append(
-            f"-iree-vulkan-target-triple={args.iree_vulkan_target_triple}"
-        )
-
-    # Disable bindings fusion to work with moltenVK.
-    if sys.platform == "darwin":
-        iree_flags.append("-iree-stream-fuse-binding=false")
-
-    try:
-        bucket = models_db[0][bucket_key]
-        model_name = models_db[1][model_key]
-        iree_flags += models_db[2][model][is_tuned][precision][
-            "default_compilation_flags"
-        ]
-    except KeyError:
-        raise Exception(
-            f"{bucket_key}/{model_key} is not present in the models database"
-        )
-
-    if (
-        "specified_compilation_flags"
-        in models_db[2][model][is_tuned][precision]
-    ):
-        device = (
-            args.device
-            if "://" not in args.device
-            else args.device.split("://")[0]
-        )
-        if (
-            device
-            not in models_db[2][model][is_tuned][precision][
-                "specified_compilation_flags"
-            ]
-        ):
-            device = "default_device"
-        iree_flags += models_db[2][model][is_tuned][precision][
-            "specified_compilation_flags"
-        ][device]
-
-    return bucket, model_name, iree_flags
-
-
-def get_unet():
-    # Tuned model is present only for `fp16` precision.
-    is_tuned = "tuned" if args.use_tuned else "untuned"
-    if "vulkan" not in args.device and args.use_tuned:
-        bucket_key = f"{variant}/{is_tuned}/{args.device}"
-        model_key = f"{variant}/{version}/unet/{args.precision}/length_{args.max_length}/{is_tuned}/{args.device}"
-    else:
-        bucket_key = f"{variant}/{is_tuned}"
-        model_key = f"{variant}/{version}/unet/{args.precision}/length_{args.max_length}/{is_tuned}"
-
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, "unet", is_tuned, args.precision
-    )
-    return get_shark_model(bucket, model_name, iree_flags)
-
-
-def get_vae():
-    # Tuned model is present only for `fp16` precision.
-    is_tuned = "tuned" if args.use_tuned else "untuned"
-    is_base = "/base" if args.use_base_vae else ""
-    if "vulkan" not in args.device and args.use_tuned:
-        bucket_key = f"{variant}/{is_tuned}/{args.device}"
-        model_key = f"{variant}/{version}/vae/{args.precision}/length_77/{is_tuned}{is_base}/{args.device}"
-    else:
-        bucket_key = f"{variant}/{is_tuned}"
-        model_key = f"{variant}/{version}/vae/{args.precision}/length_77/{is_tuned}{is_base}"
-
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, "vae", is_tuned, args.precision
-    )
-    return get_shark_model(bucket, model_name, iree_flags)
-
-
-def get_clip():
-    bucket_key = f"{variant}/untuned"
-    model_key = (
-        f"{variant}/{version}/clip/fp32/length_{args.max_length}/untuned"
-    )
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, "clip", "untuned", "fp32"
-    )
-    return get_shark_model(bucket, model_name, iree_flags)
--- a/shark/examples/shark_inference/stable_diffusion/resources.py
+++ b/shark/examples/shark_inference/stable_diffusion/resources.py
@@ -1,38 +0,0 @@
-import os
-import json
-import sys
-
-
-def resource_path(relative_path):
-    """Get absolute path to resource, works for dev and for PyInstaller"""
-    base_path = getattr(
-        sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))
-    )
-    return os.path.join(base_path, relative_path)
-
-
-def get_json_file(path):
-    json_var = []
-    loc_json = resource_path(path)
-    if os.path.exists(loc_json):
-        with open(loc_json, encoding="utf-8") as fopen:
-            json_var = json.load(fopen)
-
-    if not json_var:
-        print(f"Unable to fetch {path}")
-
-    return json_var
-
-
-# TODO: This shouldn't be called from here, every time the file imports
-# it will run all the global vars.
-prompts_examples = get_json_file("resources/prompts.json")
-models_db = get_json_file("resources/model_db.json")
-beta_models_db = get_json_file("resources/beta_model_db.json")
-
-# The base_model contains the input configuration for the different
-# models and also helps in providing information for the variants.
-base_models = get_json_file("resources/base_model.json")
-
-# Contains optimization flags for different models.
-opt_flags = get_json_file("resources/opt_flags.json")
--- a/shark/examples/shark_inference/stable_diffusion/resources/base_model.json
+++ b/shark/examples/shark_inference/stable_diffusion/resources/base_model.json
@@ -1,98 +0,0 @@
-{
-    "stabilityai/stable-diffusion-2-1": {
-        "unet": {
-            "latents": {
-                "shape": [
-                    "1*batch_size",
-                    4,
-                    "height",
-                    "width"
-                ],
-                "dtype": "f32"
-            },
-            "timesteps": {
-                "shape": [
-                    1
-                ],
-                "dtype": "f32"
-            },
-            "embedding": {
-                "shape": [
-                    "2*batch_size",
-                    "max_len",
-                    1024
-                ],
-                "dtype": "f32"
-            },
-            "guidance_scale": {
-                "shape": 2,
-                "dtype": "f32"
-            }
-        },
-        "vae": {
-            "latents" : {
-                "shape" : [
-                    "1*batch_size",4,"height","width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "clip": {
-            "token" : {
-                "shape" : [
-                    "2*batch_size",
-                    "max_len"
-                ],
-                "dtype":"i64"
-            }
-        }
-    },
-    "CompVis/stable-diffusion-v1-4": {
-        "unet": {
-            "latents": {
-                "shape": [
-                    "1*batch_size",
-                    4,
-                    "height",
-                    "width"
-                ],
-                "dtype": "f32"
-            },
-            "timesteps": {
-                "shape": [
-                    1
-                ],
-                "dtype": "f32"
-            },
-            "embedding": {
-                "shape": [
-                    "2*batch_size",
-                    "max_len",
-                    768
-                ],
-                "dtype": "f32"
-            },
-            "guidance_scale": {
-                "shape": 2,
-                "dtype": "f32"
-            }
-        },
-        "vae": {
-            "latents" : {
-                "shape" : [
-                    "1*batch_size",4,"height","width"
-                ],
-                "dtype":"f32"
-            }
-        },
-        "clip": {
-            "token" : {
-                "shape" : [
-                    "2*batch_size",
-                    "max_len"
-                ],
-                "dtype":"i64"
-            }
-        }
-    }
-}
--- a/shark/examples/shark_inference/stable_diffusion/resources/beta_model_db.json
+++ b/shark/examples/shark_inference/stable_diffusion/resources/beta_model_db.json
@@ -1,177 +0,0 @@
-[
-  {
-    "stablediffusion/untuned":"gs://shark_tank/latest",
-    "stablediffusion/tuned":"gs://shark_tank/sd_tuned",
-    "stablediffusion/tuned/cuda":"gs://shark_tank/sd_tuned/cuda",
-    "anythingv3/untuned":"gs://shark_tank/sd_anythingv3",
-    "anythingv3/tuned":"gs://shark_tank/sd_tuned",
-    "anythingv3/tuned/cuda":"gs://shark_tank/sd_tuned/cuda",
-    "analogdiffusion/untuned":"gs://shark_tank/sd_analog_diffusion",
-    "analogdiffusion/tuned":"gs://shark_tank/sd_tuned",
-    "analogdiffusion/tuned/cuda":"gs://shark_tank/sd_tuned/cuda",
-    "openjourney/untuned":"gs://shark_tank/sd_openjourney",
-    "openjourney/tuned":"gs://shark_tank/sd_tuned",
-    "dreamlike/untuned":"gs://shark_tank/sd_dreamlike_diffusion"
-  },
-  {
-    "stablediffusion/v1_4/unet/fp16/length_77/untuned":"unet_8dec_fp16",
-    "stablediffusion/v1_4/unet/fp16/length_77/tuned":"unet_8dec_fp16_tuned",
-    "stablediffusion/v1_4/unet/fp16/length_77/tuned/cuda":"unet_8dec_fp16_cuda_tuned",
-    "stablediffusion/v1_4/unet/fp32/length_77/untuned":"unet_1dec_fp32",
-    "stablediffusion/v1_4/vae/fp16/length_77/untuned":"vae_19dec_fp16",
-    "stablediffusion/v1_4/vae/fp16/length_77/tuned":"vae_19dec_fp16_tuned",
-    "stablediffusion/v1_4/vae/fp16/length_77/tuned/cuda":"vae_19dec_fp16_cuda_tuned",
-    "stablediffusion/v1_4/vae/fp16/length_77/untuned/base":"vae_8dec_fp16",
-    "stablediffusion/v1_4/vae/fp32/length_77/untuned":"vae_1dec_fp32",
-    "stablediffusion/v1_4/clip/fp32/length_77/untuned":"clip_18dec_fp32",
-    "stablediffusion/v2_1base/unet/fp16/length_77/untuned":"unet77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/unet/fp16/length_77/tuned":"unet2base_8dec_fp16_tuned_v2",
-    "stablediffusion/v2_1base/unet/fp16/length_77/tuned/cuda":"unet2base_8dec_fp16_cuda_tuned",
-    "stablediffusion/v2_1base/unet/fp16/length_64/untuned":"unet64_512_512_fp16_stabilityai_stable_diffusion_2_1_basec",
-    "stablediffusion/v2_1base/unet/fp16/length_64/tuned":"unet_19dec_v2p1base_fp16_64_tuned",
-    "stablediffusion/v2_1base/unet/fp16/length_64/tuned/cuda":"unet_19dec_v2p1base_fp16_64_cuda_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/untuned":"vae77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned":"vae2base_19dec_fp16_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/cuda":"vae2base_19dec_fp16_cuda_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/untuned/base":"vae77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/base":"vae2base_8dec_fp16_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/base/cuda":"vae2base_8dec_fp16_cuda_tuned",
-    "stablediffusion/v2_1base/clip/fp32/length_77/untuned":"clip77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/clip/fp32/length_64/untuned":"clip64_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1/unet/fp16/length_77/untuned":"unet77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1/vae/fp16/length_77/untuned":"vae77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1/vae/fp16/length_77/untuned/base":"77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1/clip/fp32/length_77/untuned":"clip77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "anythingv3/v2_1base/unet/fp16/length_77/untuned":"av3_unet_19dec_fp16",
-    "anythingv3/v2_1base/unet/fp16/length_77/tuned":"av3_unet_19dec_fp16_tuned",
-    "anythingv3/v2_1base/unet/fp16/length_77/tuned/cuda":"av3_unet_19dec_fp16_cuda_tuned",
-    "anythingv3/v2_1base/unet/fp32/length_77/untuned":"av3_unet_19dec_fp32",
-    "anythingv3/v2_1base/vae/fp16/length_77/untuned":"av3_vae_19dec_fp16",
-    "anythingv3/v2_1base/vae/fp16/length_77/tuned":"av3_vae_19dec_fp16_tuned",
-    "anythingv3/v2_1base/vae/fp16/length_77/tuned/cuda":"av3_vae_19dec_fp16_cuda_tuned",
-    "anythingv3/v2_1base/vae/fp16/length_77/untuned/base":"av3_vaebase_22dec_fp16",
-    "anythingv3/v2_1base/vae/fp32/length_77/untuned":"av3_vae_19dec_fp32",
-    "anythingv3/v2_1base/vae/fp32/length_77/untuned/base":"av3_vaebase_22dec_fp32",
-    "anythingv3/v2_1base/clip/fp32/length_77/untuned":"av3_clip_19dec_fp32",
-    "analogdiffusion/v2_1base/unet/fp16/length_77/untuned":"ad_unet_19dec_fp16",
-    "analogdiffusion/v2_1base/unet/fp16/length_77/tuned":"ad_unet_19dec_fp16_tuned",
-    "analogdiffusion/v2_1base/unet/fp16/length_77/tuned/cuda":"ad_unet_19dec_fp16_cuda_tuned",
-    "analogdiffusion/v2_1base/unet/fp32/length_77/untuned":"ad_unet_19dec_fp32",
-    "analogdiffusion/v2_1base/vae/fp16/length_77/untuned":"ad_vae_19dec_fp16",
-    "analogdiffusion/v2_1base/vae/fp16/length_77/tuned":"ad_vae_19dec_fp16_tuned",
-    "analogdiffusion/v2_1base/vae/fp16/length_77/tuned/cuda":"ad_vae_19dec_fp16_cuda_tuned",
-    "analogdiffusion/v2_1base/vae/fp16/length_77/untuned/base":"ad_vaebase_22dec_fp16",
-    "analogdiffusion/v2_1base/vae/fp32/length_77/untuned":"ad_vae_19dec_fp32",
-    "analogdiffusion/v2_1base/vae/fp32/length_77/untuned/base":"ad_vaebase_22dec_fp32",
-    "analogdiffusion/v2_1base/clip/fp32/length_77/untuned":"ad_clip_19dec_fp32",
-    "openjourney/v2_1base/unet/fp16/length_64/untuned":"oj_unet_22dec_fp16_64",
-    "openjourney/v2_1base/unet/fp32/length_64/untuned":"oj_unet_22dec_fp32_64",
-    "openjourney/v2_1base/vae/fp16/length_77/untuned":"oj_vae_22dec_fp16",
-    "openjourney/v2_1base/vae/fp16/length_77/untuned/base":"oj_vaebase_22dec_fp16",
-    "openjourney/v2_1base/vae/fp32/length_77/untuned":"oj_vae_22dec_fp32",
-    "openjourney/v2_1base/vae/fp32/length_77/untuned/base":"oj_vaebase_22dec_fp32",
-    "openjourney/v2_1base/clip/fp32/length_64/untuned":"oj_clip_22dec_fp32_64",
-    "dreamlike/v2_1base/unet/fp16/length_77/untuned":"dl_unet_23dec_fp16_77",
-    "dreamlike/v2_1base/unet/fp32/length_77/untuned":"dl_unet_23dec_fp32_77",
-    "dreamlike/v2_1base/vae/fp16/length_77/untuned":"dl_vae_23dec_fp16",
-    "dreamlike/v2_1base/vae/fp16/length_77/untuned/base":"dl_vaebase_23dec_fp16",
-    "dreamlike/v2_1base/vae/fp32/length_77/untuned":"dl_vae_23dec_fp32",
-    "dreamlike/v2_1base/vae/fp32/length_77/untuned/base":"dl_vaebase_23dec_fp32",
-    "dreamlike/v2_1base/clip/fp32/length_77/untuned":"dl_clip_23dec_fp32_77"
-  },
-  {
-    "unet": {
-      "tuned": {
-        "fp16": {
-          "default_compilation_flags": []
-        },
-        "fp32": {
-          "default_compilation_flags": []
-        }
-      },
-      "untuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32"
-          ],
-          "specified_compilation_flags": {
-            "cuda": ["--iree-flow-enable-conv-nchw-to-nhwc-transform"],
-            "default_device": ["--iree-flow-enable-conv-img2col-transform"]
-          }
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-conv-nchw-to-nhwc-transform",
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=16"
-          ]
-        }
-      }
-    },
-    "vae": {
-      "tuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32",
-            "--iree-flow-enable-conv-img2col-transform"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32",
-            "--iree-flow-enable-conv-img2col-transform"
-          ]
-        }
-      },
-      "untuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32",
-            "--iree-flow-enable-conv-img2col-transform"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-conv-nchw-to-nhwc-transform",
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=16"
-          ]
-        }
-      }
-    },
-    "clip": {
-      "tuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        }
-      },
-      "untuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        }
-      }
-    }
-  }
-]
--- a/shark/examples/shark_inference/stable_diffusion/resources/model_config.json
+++ b/shark/examples/shark_inference/stable_diffusion/resources/model_config.json
@@ -1,21 +0,0 @@
-[
-  {
-    "stablediffusion/v1_4":"CompVis/stable-diffusion-v1-4",
-    "stablediffusion/v2_1base":"stabilityai/stable-diffusion-2-1-base",
-    "stablediffusion/v2_1":"stabilityai/stable-diffusion-2-1",
-    "anythingv3/v1_4":"Linaqruf/anything-v3.0",
-    "analogdiffusion/v1_4":"wavymulder/Analog-Diffusion",
-    "openjourney/v1_4":"prompthero/openjourney",
-    "dreamlike/v1_4":"dreamlike-art/dreamlike-diffusion-1.0"
-  },
-  {
-    "stablediffusion/fp16":"fp16",
-    "stablediffusion/fp32":"main",
-    "anythingv3/fp16":"diffusers",
-    "anythingv3/fp32":"diffusers",
-    "analogdiffusion/fp16":"main",
-    "analogdiffusion/fp32":"main",
-    "openjourney/fp16":"main",
-    "openjourney/fp32":"main"
-  }
-]
--- a/shark/examples/shark_inference/stable_diffusion/resources/model_db.json
+++ b/shark/examples/shark_inference/stable_diffusion/resources/model_db.json
@@ -1,177 +0,0 @@
-[
-  {
-    "stablediffusion/untuned":"gs://shark_tank/stable_diffusion",
-    "stablediffusion/tuned":"gs://shark_tank/sd_tuned",
-    "stablediffusion/tuned/cuda":"gs://shark_tank/sd_tuned/cuda",
-    "anythingv3/untuned":"gs://shark_tank/sd_anythingv3",
-    "anythingv3/tuned":"gs://shark_tank/sd_tuned",
-    "anythingv3/tuned/cuda":"gs://shark_tank/sd_tuned/cuda",
-    "analogdiffusion/untuned":"gs://shark_tank/sd_analog_diffusion",
-    "analogdiffusion/tuned":"gs://shark_tank/sd_tuned",
-    "analogdiffusion/tuned/cuda":"gs://shark_tank/sd_tuned/cuda",
-    "openjourney/untuned":"gs://shark_tank/sd_openjourney",
-    "openjourney/tuned":"gs://shark_tank/sd_tuned",
-    "dreamlike/untuned":"gs://shark_tank/sd_dreamlike_diffusion"
-  },
-  {
-    "stablediffusion/v1_4/unet/fp16/length_77/untuned":"unet_8dec_fp16",
-    "stablediffusion/v1_4/unet/fp16/length_77/tuned":"unet_8dec_fp16_tuned",
-    "stablediffusion/v1_4/unet/fp16/length_77/tuned/cuda":"unet_8dec_fp16_cuda_tuned",
-    "stablediffusion/v1_4/unet/fp32/length_77/untuned":"unet_1dec_fp32",
-    "stablediffusion/v1_4/vae/fp16/length_77/untuned":"vae_19dec_fp16",
-    "stablediffusion/v1_4/vae/fp16/length_77/tuned":"vae_19dec_fp16_tuned",
-    "stablediffusion/v1_4/vae/fp16/length_77/tuned/cuda":"vae_19dec_fp16_cuda_tuned",
-    "stablediffusion/v1_4/vae/fp16/length_77/untuned/base":"vae_8dec_fp16",
-    "stablediffusion/v1_4/vae/fp32/length_77/untuned":"vae_1dec_fp32",
-    "stablediffusion/v1_4/clip/fp32/length_77/untuned":"clip_18dec_fp32",
-    "stablediffusion/v2_1base/unet/fp16/length_77/untuned":"unet2base_8dec_fp16",
-    "stablediffusion/v2_1base/unet/fp16/length_77/tuned":"unet2base_8dec_fp16_tuned_v2",
-    "stablediffusion/v2_1base/unet/fp16/length_77/tuned/cuda":"unet2base_8dec_fp16_cuda_tuned",
-    "stablediffusion/v2_1base/unet/fp16/length_64/untuned":"unet_19dec_v2p1base_fp16_64",
-    "stablediffusion/v2_1base/unet/fp16/length_64/tuned":"unet_19dec_v2p1base_fp16_64_tuned",
-    "stablediffusion/v2_1base/unet/fp16/length_64/tuned/cuda":"unet_19dec_v2p1base_fp16_64_cuda_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/untuned":"vae2base_19dec_fp16",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned":"vae2base_19dec_fp16_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/cuda":"vae2base_19dec_fp16_cuda_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/untuned/base":"vae2base_8dec_fp16",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/base":"vae2base_8dec_fp16_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/base/cuda":"vae2base_8dec_fp16_cuda_tuned",
-    "stablediffusion/v2_1base/clip/fp32/length_77/untuned":"clip2base_18dec_fp32",
-    "stablediffusion/v2_1base/clip/fp32/length_64/untuned":"clip_19dec_v2p1base_fp32_64",
-    "stablediffusion/v2_1/unet/fp16/length_77/untuned":"unet2_14dec_fp16",
-    "stablediffusion/v2_1/vae/fp16/length_77/untuned":"vae2_19dec_fp16",
-    "stablediffusion/v2_1/vae/fp16/length_77/untuned/base":"vae2_8dec_fp16",
-    "stablediffusion/v2_1/clip/fp32/length_77/untuned":"clip2_18dec_fp32",
-    "anythingv3/v2_1base/unet/fp16/length_77/untuned":"av3_unet_19dec_fp16",
-    "anythingv3/v2_1base/unet/fp16/length_77/tuned":"av3_unet_19dec_fp16_tuned",
-    "anythingv3/v2_1base/unet/fp16/length_77/tuned/cuda":"av3_unet_19dec_fp16_cuda_tuned",
-    "anythingv3/v2_1base/unet/fp32/length_77/untuned":"av3_unet_19dec_fp32",
-    "anythingv3/v2_1base/vae/fp16/length_77/untuned":"av3_vae_19dec_fp16",
-    "anythingv3/v2_1base/vae/fp16/length_77/tuned":"av3_vae_19dec_fp16_tuned",
-    "anythingv3/v2_1base/vae/fp16/length_77/tuned/cuda":"av3_vae_19dec_fp16_cuda_tuned",
-    "anythingv3/v2_1base/vae/fp16/length_77/untuned/base":"av3_vaebase_22dec_fp16",
-    "anythingv3/v2_1base/vae/fp32/length_77/untuned":"av3_vae_19dec_fp32",
-    "anythingv3/v2_1base/vae/fp32/length_77/untuned/base":"av3_vaebase_22dec_fp32",
-    "anythingv3/v2_1base/clip/fp32/length_77/untuned":"av3_clip_19dec_fp32",
-    "analogdiffusion/v2_1base/unet/fp16/length_77/untuned":"ad_unet_19dec_fp16",
-    "analogdiffusion/v2_1base/unet/fp16/length_77/tuned":"ad_unet_19dec_fp16_tuned",
-    "analogdiffusion/v2_1base/unet/fp16/length_77/tuned/cuda":"ad_unet_19dec_fp16_cuda_tuned",
-    "analogdiffusion/v2_1base/unet/fp32/length_77/untuned":"ad_unet_19dec_fp32",
-    "analogdiffusion/v2_1base/vae/fp16/length_77/untuned":"ad_vae_19dec_fp16",
-    "analogdiffusion/v2_1base/vae/fp16/length_77/tuned":"ad_vae_19dec_fp16_tuned",
-    "analogdiffusion/v2_1base/vae/fp16/length_77/tuned/cuda":"ad_vae_19dec_fp16_cuda_tuned",
-    "analogdiffusion/v2_1base/vae/fp16/length_77/untuned/base":"ad_vaebase_22dec_fp16",
-    "analogdiffusion/v2_1base/vae/fp32/length_77/untuned":"ad_vae_19dec_fp32",
-    "analogdiffusion/v2_1base/vae/fp32/length_77/untuned/base":"ad_vaebase_22dec_fp32",
-    "analogdiffusion/v2_1base/clip/fp32/length_77/untuned":"ad_clip_19dec_fp32",
-    "openjourney/v2_1base/unet/fp16/length_64/untuned":"oj_unet_22dec_fp16_64",
-    "openjourney/v2_1base/unet/fp32/length_64/untuned":"oj_unet_22dec_fp32_64",
-    "openjourney/v2_1base/vae/fp16/length_77/untuned":"oj_vae_22dec_fp16",
-    "openjourney/v2_1base/vae/fp16/length_77/untuned/base":"oj_vaebase_22dec_fp16",
-    "openjourney/v2_1base/vae/fp32/length_77/untuned":"oj_vae_22dec_fp32",
-    "openjourney/v2_1base/vae/fp32/length_77/untuned/base":"oj_vaebase_22dec_fp32",
-    "openjourney/v2_1base/clip/fp32/length_64/untuned":"oj_clip_22dec_fp32_64",
-    "dreamlike/v2_1base/unet/fp16/length_77/untuned":"dl_unet_23dec_fp16_77",
-    "dreamlike/v2_1base/unet/fp32/length_77/untuned":"dl_unet_23dec_fp32_77",
-    "dreamlike/v2_1base/vae/fp16/length_77/untuned":"dl_vae_23dec_fp16",
-    "dreamlike/v2_1base/vae/fp16/length_77/untuned/base":"dl_vaebase_23dec_fp16",
-    "dreamlike/v2_1base/vae/fp32/length_77/untuned":"dl_vae_23dec_fp32",
-    "dreamlike/v2_1base/vae/fp32/length_77/untuned/base":"dl_vaebase_23dec_fp32",
-    "dreamlike/v2_1base/clip/fp32/length_77/untuned":"dl_clip_23dec_fp32_77"
-  },
-  {
-    "unet": {
-      "tuned": {
-        "fp16": {
-          "default_compilation_flags": []
-        },
-        "fp32": {
-          "default_compilation_flags": []
-        }
-      },
-      "untuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32"
-          ],
-          "specified_compilation_flags": {
-            "cuda": ["--iree-flow-enable-conv-nchw-to-nhwc-transform"],
-            "default_device": ["--iree-flow-enable-conv-img2col-transform"]
-          }
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-conv-nchw-to-nhwc-transform",
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=16"
-          ]
-        }
-      }
-    },
-    "vae": {
-      "tuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32",
-            "--iree-flow-enable-conv-img2col-transform"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32",
-            "--iree-flow-enable-conv-img2col-transform"
-          ]
-        }
-      },
-      "untuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32",
-            "--iree-flow-enable-conv-img2col-transform"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-conv-nchw-to-nhwc-transform",
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=16"
-          ]
-        }
-      }
-    },
-    "clip": {
-      "tuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        }
-      },
-      "untuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        }
-      }
-    }
-  }
-]
--- a/shark/examples/shark_inference/stable_diffusion/resources/opt_flags.json
+++ b/shark/examples/shark_inference/stable_diffusion/resources/opt_flags.json
@@ -1,101 +0,0 @@
-  {
-    "unet": {
-      "tuned": {
-        "fp16": {
-          "default_compilation_flags": []
-        },
-        "fp32": {
-          "default_compilation_flags": []
-        }
-      },
-      "untuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32"
-          ],
-          "specified_compilation_flags": {
-            "cuda": ["--iree-flow-enable-conv-nchw-to-nhwc-transform"],
-            "default_device": ["--iree-flow-enable-conv-img2col-transform"]
-          }
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-conv-nchw-to-nhwc-transform",
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=16"
-          ]
-        }
-      }
-    },
-    "vae": {
-      "tuned": {
-        "fp16": {
-          "default_compilation_flags": [],
-          "specified_compilation_flags": {
-            "cuda": [],
-            "default_device": ["--iree-flow-enable-padding-linalg-ops",
-                               "--iree-flow-linalg-ops-padding-size=32",
-                               "--iree-flow-enable-conv-img2col-transform"]
-          }
-        },
-        "fp32": {
-          "default_compilation_flags": [],
-          "specified_compilation_flags": {
-            "cuda": [],
-            "default_device": [
-              "--iree-flow-enable-padding-linalg-ops",
-              "--iree-flow-linalg-ops-padding-size=32",
-              "--iree-flow-enable-conv-img2col-transform"
-            ]
-          }
-        }
-      },
-      "untuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32",
-            "--iree-flow-enable-conv-img2col-transform"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-conv-nchw-to-nhwc-transform",
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=16"
-          ]
-        }
-      }
-    },
-    "clip": {
-      "tuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        }
-      },
-      "untuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        }
-      }
-    }
-  }
--- a/shark/examples/shark_inference/stable_diffusion/resources/prompts.json
+++ b/shark/examples/shark_inference/stable_diffusion/resources/prompts.json
@@ -1,8 +0,0 @@
-[["A high tech solarpunk utopia in the Amazon rainforest"],
-["A pikachu fine dining with a view to the Eiffel Tower"],
-["A mecha robot in a favela in expressionist style"],
-["an insect robot preparing a delicious meal"],
-["A digital Illustration of the Babel tower, 4k, detailed, trending in artstation, fantasy vivid colors"],
-["Cluttered house in the woods, anime, oil painting, high resolution, cottagecore, ghibli inspired, 4k"],
-["A beautiful mansion beside a waterfall in the woods, by josef thoma, matte painting, trending on artstation HQ"],
-["portrait photo of a asia old warrior chief, tribal panther make up, blue on red, side profile, looking away, serious eyes"]]
--- a/shark/examples/shark_inference/stable_diffusion/schedulers.py
+++ b/shark/examples/shark_inference/stable_diffusion/schedulers.py
@@ -1,144 +0,0 @@
-import sys
-import numpy as np
-from typing import List, Optional, Tuple, Union
-from diffusers import (
-    LMSDiscreteScheduler,
-    PNDMScheduler,
-    DDIMScheduler,
-    DPMSolverMultistepScheduler,
-    EulerDiscreteScheduler,
-)
-from diffusers.configuration_utils import register_to_config
-from utils import compile_through_fx, get_shark_model
-from stable_args import args
-import torch
-
-SCHEDULER_BUCKET = "gs://shark_tank/stable_diffusion/schedulers"
-
-
-BATCH_SIZE = len(args.prompts)
-if len(args.prompts) == 0:
-    BATCH_SIZE = 1
-
-model_input = {
-    "euler": {
-        "latent": torch.randn(
-            BATCH_SIZE, 4, args.height // 8, args.width // 8
-        ),
-        "output": torch.randn(
-            BATCH_SIZE, 4, args.height // 8, args.width // 8
-        ),
-        "sigma": torch.tensor(1).to(torch.float32),
-        "dt": torch.tensor(1).to(torch.float32),
-    },
-}
-
-
-class SharkEulerDiscreteScheduler(EulerDiscreteScheduler):
-    @register_to_config
-    def __init__(
-        self,
-        num_train_timesteps: int = 1000,
-        beta_start: float = 0.0001,
-        beta_end: float = 0.02,
-        beta_schedule: str = "linear",
-        trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
-        prediction_type: str = "epsilon",
-    ):
-        super().__init__(
-            num_train_timesteps,
-            beta_start,
-            beta_end,
-            beta_schedule,
-            trained_betas,
-            prediction_type,
-        )
-
-    def compile(self):
-        example_latent = model_input["euler"]["latent"]
-        example_output = model_input["euler"]["output"]
-        if args.precision == "fp16":
-            example_latent = example_latent.half()
-            example_output = example_output.half()
-        example_sigma = model_input["euler"]["sigma"]
-        example_dt = model_input["euler"]["dt"]
-
-        class ScalingModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
-            def forward(self, latent, sigma):
-                return latent / ((sigma**2 + 1) ** 0.5)
-
-        class SchedulerStepModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
-            def forward(self, noise_pred, sigma, latent, dt):
-                pred_original_sample = latent - sigma * noise_pred
-                derivative = (latent - pred_original_sample) / sigma
-                return latent + derivative * dt
-
-        iree_flags = []
-        if len(args.iree_vulkan_target_triple) > 0:
-            iree_flags.append(
-                f"-iree-vulkan-target-triple={args.iree_vulkan_target_triple}"
-            )
-        # Disable bindings fusion to work with moltenVK.
-        if sys.platform == "darwin":
-            iree_flags.append("-iree-stream-fuse-binding=false")
-
-        if args.import_mlir:
-            scaling_model = ScalingModel()
-            self.scaling_model = compile_through_fx(
-                scaling_model,
-                (example_latent, example_sigma),
-                model_name=f"euler_scale_model_input_{BATCH_SIZE}_{args.height}_{args.width}"
-                + args.precision,
-                extra_args=iree_flags,
-            )
-
-            step_model = SchedulerStepModel()
-            self.step_model = compile_through_fx(
-                step_model,
-                (example_output, example_sigma, example_latent, example_dt),
-                model_name=f"euler_step_{BATCH_SIZE}_{args.height}_{args.width}"
-                + args.precision,
-                extra_args=iree_flags,
-            )
-        else:
-            self.scaling_model = get_shark_model(
-                SCHEDULER_BUCKET,
-                "euler_scale_model_input_" + args.precision,
-                iree_flags,
-            )
-            self.step_model = get_shark_model(
-                SCHEDULER_BUCKET, "euler_step_" + args.precision, iree_flags
-            )
-
-    def scale_model_input(self, sample, timestep):
-        step_index = (self.timesteps == timestep).nonzero().item()
-        sigma = self.sigmas[step_index]
-        return self.scaling_model(
-            "forward",
-            (
-                sample,
-                sigma,
-            ),
-            send_to_host=False,
-        )
-
-    def step(self, noise_pred, timestep, latent):
-        step_index = (self.timesteps == timestep).nonzero().item()
-        sigma = self.sigmas[step_index]
-        dt = self.sigmas[step_index + 1] - sigma
-        return self.step_model(
-            "forward",
-            (
-                noise_pred,
-                sigma,
-                latent,
-                dt,
-            ),
-            send_to_host=False,
-        )
--- a/shark/examples/shark_inference/stable_diffusion/sd_annotation.py
+++ b/shark/examples/shark_inference/stable_diffusion/sd_annotation.py
@@ -1,191 +0,0 @@
-import os
-from shark.model_annotation import model_annotation, create_context
-from shark.iree_utils._common import iree_target_map, run_cmd
-from shark.shark_downloader import (
-    download_model,
-    download_public_file,
-    WORKDIR,
-)
-from shark.parser import shark_args
-from stable_args import args
-
-
-device = (
-    args.device if "://" not in args.device else args.device.split("://")[0]
-)
-
-
-# Download the model (Unet or VAE fp16) from shark_tank
-def load_model_from_tank():
-    from opt_params import get_params, version, variant
-
-    shark_args.local_tank_cache = args.local_tank_cache
-    bucket_key = f"{variant}/untuned"
-    if args.annotation_model == "unet":
-        model_key = f"{variant}/{version}/unet/{args.precision}/length_{args.max_length}/untuned"
-    elif args.annotation_model == "vae":
-        is_base = "/base" if args.use_base_vae else ""
-        model_key = f"{variant}/{version}/vae/{args.precision}/length_77/untuned{is_base}"
-
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, args.annotation_model, "untuned", args.precision
-    )
-    mlir_model, func_name, inputs, golden_out = download_model(
-        model_name,
-        tank_url=bucket,
-        frontend="torch",
-    )
-    return mlir_model, model_name
-
-
-# Download the tuned config files from shark_tank
-def load_winograd_configs():
-    config_bucket = "gs://shark_tank/sd_tuned/configs/"
-    config_name = f"{args.annotation_model}_winograd_{device}.json"
-    full_gs_url = config_bucket + config_name
-    winograd_config_dir = f"{WORKDIR}configs/" + config_name
-    print("Loading Winograd config file from ", winograd_config_dir)
-    download_public_file(full_gs_url, winograd_config_dir, True)
-    return winograd_config_dir
-
-
-def load_lower_configs():
-    from opt_params import version, variant
-
-    config_bucket = "gs://shark_tank/sd_tuned/configs/"
-    config_version = version
-    if variant in ["anythingv3", "analogdiffusion"]:
-        args.max_length = 77
-        config_version = "v1_4"
-    if args.annotation_model == "vae":
-        args.max_length = 77
-    config_name = f"{args.annotation_model}_{config_version}_{args.precision}_len{args.max_length}_{device}.json"
-    full_gs_url = config_bucket + config_name
-    lowering_config_dir = f"{WORKDIR}configs/" + config_name
-    print("Loading lowering config file from ", lowering_config_dir)
-    download_public_file(full_gs_url, lowering_config_dir, True)
-    return lowering_config_dir
-
-
-# Annotate the model with Winograd attribute on selected conv ops
-def annotate_with_winograd(input_mlir, winograd_config_dir, model_name):
-    if model_name.split("_")[-1] != "tuned":
-        out_file_path = (
-            f"{args.annotation_output}/{model_name}_tuned_torch.mlir"
-        )
-    else:
-        out_file_path = f"{args.annotation_output}/{model_name}_torch.mlir"
-
-    with create_context() as ctx:
-        winograd_model = model_annotation(
-            ctx,
-            input_contents=input_mlir,
-            config_path=winograd_config_dir,
-            search_op="conv",
-            winograd=True,
-        )
-        with open(out_file_path, "w") as f:
-            f.write(str(winograd_model))
-            f.close()
-    return winograd_model, out_file_path
-
-
-# For Unet annotate the model with tuned lowering configs
-def annotate_with_lower_configs(
-    input_mlir, lowering_config_dir, model_name, use_winograd
-):
-    if use_winograd:
-        dump_after = "iree-linalg-ext-convert-conv2d-to-winograd"
-    else:
-        dump_after = "iree-flow-pad-linalg-ops"
-
-    # Dump IR after padding/img2col/winograd passes
-    device_spec_args = ""
-    if device == "cuda":
-        from shark.iree_utils.gpu_utils import get_iree_gpu_args
-
-        gpu_flags = get_iree_gpu_args()
-        for flag in gpu_flags:
-            device_spec_args += flag + " "
-    elif device == "vulkan":
-        device_spec_args = (
-            f"--iree-vulkan-target-triple={args.iree_vulkan_target_triple} "
-        )
-    print("Applying tuned configs on", model_name)
-
-    run_cmd(
-        f"iree-compile {input_mlir} "
-        "--iree-input-type=tm_tensor "
-        f"--iree-hal-target-backends={iree_target_map(device)} "
-        f"{device_spec_args}"
-        "--iree-stream-resource-index-bits=64 "
-        "--iree-vm-target-index-bits=64 "
-        "--iree-flow-enable-padding-linalg-ops "
-        "--iree-flow-linalg-ops-padding-size=32 "
-        "--iree-flow-enable-conv-img2col-transform "
-        f"--mlir-print-ir-after={dump_after} "
-        "--compile-to=flow "
-        f"2>{args.annotation_output}/dump_after_winograd.mlir "
-    )
-
-    # Annotate the model with lowering configs in the config file
-    with create_context() as ctx:
-        tuned_model = model_annotation(
-            ctx,
-            input_contents=f"{args.annotation_output}/dump_after_winograd.mlir",
-            config_path=lowering_config_dir,
-            search_op="all",
-        )
-
-    # Remove the intermediate mlir and save the final annotated model
-    os.remove(f"{args.annotation_output}/dump_after_winograd.mlir")
-    if model_name.split("_")[-1] != "tuned":
-        out_file_path = (
-            f"{args.annotation_output}/{model_name}_tuned_torch.mlir"
-        )
-    else:
-        out_file_path = f"{args.annotation_output}/{model_name}_torch.mlir"
-    with open(out_file_path, "w") as f:
-        f.write(str(tuned_model))
-        f.close()
-    return tuned_model, out_file_path
-
-
-def sd_model_annotation(mlir_model, model_name, model_from_tank=False):
-    if args.annotation_model == "unet" and device == "vulkan":
-        use_winograd = True
-        winograd_config_dir = load_winograd_configs()
-        winograd_model, model_path = annotate_with_winograd(
-            mlir_model, winograd_config_dir, model_name
-        )
-        lowering_config_dir = load_lower_configs()
-        tuned_model, output_path = annotate_with_lower_configs(
-            model_path, lowering_config_dir, model_name, use_winograd
-        )
-    elif args.annotation_model == "vae" and device == "vulkan":
-        use_winograd = True
-        winograd_config_dir = load_winograd_configs()
-        tuned_model, output_path = annotate_with_winograd(
-            mlir_model, winograd_config_dir, model_name
-        )
-    else:
-        use_winograd = False
-        if model_from_tank:
-            mlir_model = f"{WORKDIR}{model_name}_torch/{model_name}_torch.mlir"
-        else:
-            # Just use this function to convert bytecode to string
-            orig_model, model_path = annotate_with_winograd(
-                mlir_model, "", model_name
-            )
-            mlir_model = model_path
-        lowering_config_dir = load_lower_configs()
-        tuned_model, output_path = annotate_with_lower_configs(
-            mlir_model, lowering_config_dir, model_name, use_winograd
-        )
-    print(f"Saved the annotated mlir in {output_path}.")
-    return tuned_model, output_path
-
-
-if __name__ == "__main__":
-    mlir_model, model_name = load_model_from_tank()
-    sd_model_annotation(mlir_model, model_name, model_from_tank=True)
--- a/shark/examples/shark_inference/stable_diffusion/shark_sd_cli.spec
+++ b/shark/examples/shark_inference/stable_diffusion/shark_sd_cli.spec
@@ -1,76 +0,0 @@
-# -*- mode: python ; coding: utf-8 -*-
-from PyInstaller.utils.hooks import collect_data_files
-from PyInstaller.utils.hooks import copy_metadata
-
-import sys ; sys.setrecursionlimit(sys.getrecursionlimit() * 5)
-
-datas = []
-datas += collect_data_files('torch')
-datas += copy_metadata('torch')
-datas += copy_metadata('tqdm')
-datas += copy_metadata('regex')
-datas += copy_metadata('requests')
-datas += copy_metadata('packaging')
-datas += copy_metadata('filelock')
-datas += copy_metadata('numpy')
-datas += copy_metadata('tokenizers')
-datas += copy_metadata('importlib_metadata')
-datas += copy_metadata('torchvision')
-datas += copy_metadata('torch-mlir')
-datas += copy_metadata('diffusers')
-datas += copy_metadata('transformers')
-datas += copy_metadata('omegaconf')
-datas += copy_metadata('safetensors')
-datas += collect_data_files('iree')
-datas += collect_data_files('google-cloud-storage')
-datas += collect_data_files('shark')
-datas += [
-         ( 'resources/prompts.json', 'resources'),
-         ( 'resources/model_db.json', 'resources'),
-         ( 'resources/base_model.json', 'resources'),
-         ( 'resources/opt_flags.json', 'resources'),
-         ]
-
-binaries = []
-
-block_cipher = None
-
-
-a = Analysis(
-    ['main.py'],
-    pathex=['.'],
-    binaries=binaries,
-    datas=datas,
-    hiddenimports=['shark', 'shark.*', 'shark.shark_inference', 'shark_inference', 'iree.tools.core' ],
-    hookspath=[],
-    hooksconfig={},
-    runtime_hooks=[],
-    excludes=[],
-    win_no_prefer_redirects=False,
-    win_private_assemblies=False,
-    cipher=block_cipher,
-    noarchive=False,
-)
-pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
-
-exe = EXE(
-    pyz,
-    a.scripts,
-    a.binaries,
-    a.zipfiles,
-    a.datas,
-    [],
-    name='shark_sd_cli',
-    debug=False,
-    bootloader_ignore_signals=False,
-    strip=False,
-    upx=True,
-    upx_exclude=[],
-    runtime_tmpdir=None,
-    console=True,
-    disable_windowed_traceback=False,
-    argv_emulation=False,
-    target_arch=None,
-    codesign_identity=None,
-    entitlements_file=None,
-)
--- a/shark/examples/shark_inference/stable_diffusion/stable_args.py
+++ b/shark/examples/shark_inference/stable_diffusion/stable_args.py
@@ -1,386 +0,0 @@
-import os
-import argparse
-from pathlib import Path
-
-
-def path_expand(s):
-    return Path(s).expanduser().resolve()
-
-
-def is_valid_file(arg):
-    if not os.path.exists(arg):
-        return None
-    else:
-        return arg
-
-
-p = argparse.ArgumentParser(
-    description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
-)
-
-##############################################################################
-### Stable Diffusion Params
-##############################################################################
-
-p.add_argument(
-    "-p",
-    "--prompts",
-    action="append",
-    default=[],
-    help="text of which images to be generated.",
-)
-
-p.add_argument(
-    "--negative_prompts",
-    nargs="+",
-    default=[""],
-    help="text you don't want to see in the generated image.",
-)
-
-p.add_argument(
-    "--steps",
-    type=int,
-    default=50,
-    help="the no. of steps to do the sampling.",
-)
-
-p.add_argument(
-    "--seed",
-    type=int,
-    default=42,
-    help="the seed to use.",
-)
-
-p.add_argument(
-    "--batch_size",
-    type=int,
-    default=1,
-    choices=range(1, 4),
-    help="the number of inferences to be made in a single `run`.",
-)
-
-p.add_argument(
-    "--height",
-    type=int,
-    default=512,
-    help="the height of the output image.",
-)
-
-p.add_argument(
-    "--width",
-    type=int,
-    default=512,
-    help="the width of the output image.",
-)
-
-p.add_argument(
-    "--guidance_scale",
-    type=float,
-    default=7.5,
-    help="the value to be used for guidance scaling.",
-)
-
-p.add_argument(
-    "--max_length",
-    type=int,
-    default=64,
-    help="max length of the tokenizer output, options are 64 and 77.",
-)
-
-##############################################################################
-### Model Config and Usage Params
-##############################################################################
-
-p.add_argument(
-    "--device", type=str, default="vulkan", help="device to run the model."
-)
-
-p.add_argument(
-    "--precision", type=str, default="fp16", help="precision to run the model."
-)
-
-p.add_argument(
-    "--import_mlir",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="imports the model from torch module to shark_module otherwise downloads the model from shark_tank.",
-)
-
-p.add_argument(
-    "--load_vmfb",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="attempts to load the model from a precompiled flatbuffer and compiles + saves it if not found.",
-)
-
-p.add_argument(
-    "--save_vmfb",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="saves the compiled flatbuffer to the local directory",
-)
-
-p.add_argument(
-    "--use_tuned",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="Download and use the tuned version of the model if available",
-)
-
-p.add_argument(
-    "--use_base_vae",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="Do conversion from the VAE output to pixel space on cpu.",
-)
-
-p.add_argument(
-    "--scheduler",
-    type=str,
-    default="SharkEulerDiscrete",
-    help="other supported schedulers are [PNDM, DDIM, LMSDiscrete, EulerDiscrete, DPMSolverMultistep]",
-)
-
-p.add_argument(
-    "--output_img_format",
-    type=str,
-    default="png",
-    help="specify the format in which output image is save. Supported options: jpg / png",
-)
-
-p.add_argument(
-    "--output_dir",
-    type=str,
-    default=None,
-    help="Directory path to save the output images and json",
-)
-
-p.add_argument(
-    "--runs",
-    type=int,
-    default=1,
-    help="number of images to be generated with random seeds in single execution",
-)
-
-p.add_argument(
-    "--ckpt_loc",
-    type=str,
-    default="",
-    help="Path to SD's .ckpt file.",
-)
-
-p.add_argument(
-    "--hf_model_id",
-    type=str,
-    default="stabilityai/stable-diffusion-2-1-base",
-    help="The repo-id of hugging face.",
-)
-
-p.add_argument(
-    "--enable_stack_trace",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="Enable showing the stack trace when retrying the base model configuration",
-)
-p.add_argument(
-    "--beta_models",
-    default=False,
-    type=bool,
-    help="(False/True), use beta model files",
-)
-##############################################################################
-### IREE - Vulkan supported flags
-##############################################################################
-
-p.add_argument(
-    "--iree-vulkan-target-triple",
-    type=str,
-    default="",
-    help="Specify target triple for vulkan",
-)
-
-p.add_argument(
-    "--vulkan_debug_utils",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="Profiles vulkan device and collects the .rdc info",
-)
-
-p.add_argument(
-    "--vulkan_large_heap_block_size",
-    default="4147483648",
-    help="flag for setting VMA preferredLargeHeapBlockSize for vulkan device, default is 4G",
-)
-
-p.add_argument(
-    "--vulkan_validation_layers",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for disabling vulkan validation layers when benchmarking",
-)
-
-##############################################################################
-### Misc. Debug and Optimization flags
-##############################################################################
-
-p.add_argument(
-    "--use_compiled_scheduler",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="use the default scheduler precompiled into the model if available",
-)
-
-p.add_argument(
-    "--local_tank_cache",
-    default="",
-    help="Specify where to save downloaded shark_tank artifacts. If this is not set, the default is ~/.local/shark_tank/.",
-)
-
-p.add_argument(
-    "--dump_isa",
-    default=False,
-    action="store_true",
-    help="When enabled call amdllpc to get ISA dumps. use with dispatch benchmarks.",
-)
-
-p.add_argument(
-    "--dispatch_benchmarks",
-    default=None,
-    help='dispatches to return benchamrk data on.  use "All" for all, and None for none.',
-)
-
-p.add_argument(
-    "--dispatch_benchmarks_dir",
-    default="temp_dispatch_benchmarks",
-    help='directory where you want to store dispatch data generated with "--dispatch_benchmarks"',
-)
-
-p.add_argument(
-    "--enable_rgp",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for inserting debug frames between iterations for use with rgp.",
-)
-
-p.add_argument(
-    "--hide_steps",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="flag for hiding the details of iteration/sec for each step.",
-)
-
-p.add_argument(
-    "--warmup_count",
-    type=int,
-    default=0,
-    help="flag setting warmup count for clip and vae [>= 0].",
-)
-
-p.add_argument(
-    "--clear_all",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag to clear all mlir and vmfb from common locations. Recompiling will take several minutes",
-)
-
-p.add_argument(
-    "--save_metadata_to_json",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="flag for whether or not to save a generation information json file with the image.",
-)
-
-p.add_argument(
-    "--write_metadata_to_png",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for whether or not to save generation information in PNG chunk text to generated images.",
-)
-
-##############################################################################
-### Web UI flags
-##############################################################################
-
-p.add_argument(
-    "--progress_bar",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="flag for removing the pregress bar animation during image generation",
-)
-
-p.add_argument(
-    "--share",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for generating a public URL",
-)
-
-p.add_argument(
-    "--server_port",
-    type=int,
-    default=8080,
-    help="flag for setting server port",
-)
-
-##############################################################################
-### SD model auto-annotation flags
-##############################################################################
-
-p.add_argument(
-    "--annotation_output",
-    type=path_expand,
-    default="./",
-    help="Directory to save the annotated mlir file",
-)
-
-p.add_argument(
-    "--annotation_model",
-    type=str,
-    default="unet",
-    help="Options are unet and vae.",
-)
-
-p.add_argument(
-    "--use_winograd",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="Apply Winograd on selected conv ops.",
-)
-##############################################################################
-### CI generation tags
-##############################################################################
-
-# TODO: remove from here once argparse is not required by half of sd, none of these are relevant to main.py
-
-p.add_argument(
-    "--ci_tank_dir",
-    default=True,
-    type=bool,
-    help="used for CI generation purposes only.",
-)
-p.add_argument(
-    "--upload",
-    default=False,
-    type=bool,
-    help="upload generated models to shark tank (builder only), irrelevant to main.py",
-)
-p.add_argument(
-    "--torch_model_csv",
-    type=lambda x: is_valid_file(x),
-    default="./tank/torch_model_list.csv",
-    help="""Contains the file with torch_model name and args.
-		 Please see: https://github.com/nod-ai/SHARK/blob/main/tank/torch_model_list.csv""",
-)
-p.add_argument(
-    "--tf_model_csv",
-    type=lambda x: is_valid_file(x),
-    default="./tank/tf_model_list.csv",
-    help="Contains the file with tf model name and args.",
-)
-p.add_argument(
-    "--tflite_model_csv",
-    type=lambda x: is_valid_file(x),
-    default="./tank/tflite/tflite_model_list.csv",
-    help="Contains the file with tf model name and args.",
-)
-args = p.parse_args()
--- a/shark/examples/shark_inference/stable_diffusion/utils.py
+++ b/shark/examples/shark_inference/stable_diffusion/utils.py
@@ -1,416 +0,0 @@
-import os
-import gc
-import tempfile
-import torch
-from shark.shark_inference import SharkInference
-from shark.examples.shark_inference.stable_diffusion.stable_args import args
-from shark.shark_importer import import_with_fx
-from shark.iree_utils.vulkan_utils import (
-    set_iree_vulkan_runtime_flags,
-    get_vulkan_target_triple,
-)
-from shark.iree_utils.gpu_utils import get_cuda_sm_cc
-from resources import opt_flags
-from sd_annotation import sd_model_annotation
-import sys
-from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
-    load_pipeline_from_original_stable_diffusion_ckpt,
-)
-
-
-def get_vmfb_path_name(model_name):
-    device = (
-        args.device
-        if "://" not in args.device
-        else "-".join(args.device.split("://"))
-    )
-    extended_name = "{}_{}".format(model_name, device)
-    vmfb_path = os.path.join(os.getcwd(), extended_name + ".vmfb")
-    return [vmfb_path, extended_name]
-
-
-def _compile_module(shark_module, model_name, extra_args=[]):
-    if args.load_vmfb or args.save_vmfb:
-        [vmfb_path, extended_name] = get_vmfb_path_name(model_name)
-        if args.load_vmfb and os.path.isfile(vmfb_path) and not args.save_vmfb:
-            print(f"loading existing vmfb from: {vmfb_path}")
-            shark_module.load_module(vmfb_path, extra_args=extra_args)
-        else:
-            if args.save_vmfb:
-                print("Saving to {}".format(vmfb_path))
-            else:
-                print(
-                    "No vmfb found. Compiling and saving to {}".format(
-                        vmfb_path
-                    )
-                )
-            path = shark_module.save_module(
-                os.getcwd(), extended_name, extra_args
-            )
-            shark_module.load_module(path, extra_args=extra_args)
-    else:
-        shark_module.compile(extra_args)
-    return shark_module
-
-
-# Downloads the model from shark_tank and returns the shark_module.
-def get_shark_model(tank_url, model_name, extra_args=[]):
-    from shark.shark_downloader import download_model
-    from shark.parser import shark_args
-
-    # Set local shark_tank cache directory.
-    shark_args.local_tank_cache = args.local_tank_cache
-    if "cuda" in args.device:
-        shark_args.enable_tf32 = True
-
-    mlir_model, func_name, inputs, golden_out = download_model(
-        model_name,
-        tank_url=tank_url,
-        frontend="torch",
-    )
-    shark_module = SharkInference(
-        mlir_model, device=args.device, mlir_dialect="linalg"
-    )
-    return _compile_module(shark_module, model_name, extra_args)
-
-
-# Converts the torch-module into a shark_module.
-def compile_through_fx(
-    model,
-    inputs,
-    model_name,
-    is_f16=False,
-    f16_input_mask=None,
-    use_tuned=False,
-    extra_args=[],
-    save_dir=tempfile.gettempdir(),
-    debug=False,
-    generate_vmfb=True,
-):
-    from shark.parser import shark_args
-
-    if "cuda" in args.device:
-        shark_args.enable_tf32 = True
-
-    mlir_module, func_name = import_with_fx(
-        model, inputs, is_f16, f16_input_mask
-    )
-
-    if use_tuned:
-        model_name = model_name + "_tuned"
-        tuned_model_path = f"{args.annotation_output}/{model_name}_torch.mlir"
-        if not os.path.exists(tuned_model_path):
-            if "vae" in model_name.split("_")[0]:
-                args.annotation_model = "vae"
-
-            tuned_model, tuned_model_path = sd_model_annotation(
-                mlir_module, model_name
-            )
-            del mlir_module, tuned_model
-            gc.collect()
-
-        with open(tuned_model_path, "rb") as f:
-            mlir_module = f.read()
-            f.close()
-
-    save_dir = os.path.join(args.local_tank_cache, model_name)
-
-    (
-        mlir_module,
-        func_name,
-    ) = import_with_fx(
-        model=model,
-        inputs=inputs,
-        is_f16=is_f16,
-        f16_input_mask=f16_input_mask,
-        debug=debug,
-        model_name=model_name,
-        save_dir=save_dir,
-    )
-    if generate_vmfb:
-        shark_module = SharkInference(
-            mlir_module,
-            device=args.device,
-            mlir_dialect="linalg",
-        )
-
-        return _compile_module(shark_module, model_name, extra_args)
-
-
-def set_iree_runtime_flags():
-    vulkan_runtime_flags = [
-        f"--vulkan_large_heap_block_size={args.vulkan_large_heap_block_size}",
-        f"--vulkan_validation_layers={'true' if args.vulkan_validation_layers else 'false'}",
-    ]
-    if args.enable_rgp:
-        vulkan_runtime_flags += [
-            f"--enable_rgp=true",
-            f"--vulkan_debug_utils=true",
-        ]
-    set_iree_vulkan_runtime_flags(flags=vulkan_runtime_flags)
-
-
-def get_all_devices(driver_name):
-    """
-    Inputs: driver_name
-    Returns a list of all the available devices for a given driver sorted by
-    the iree path names of the device as in --list_devices option in iree.
-    """
-    from iree.runtime import get_driver
-
-    driver = get_driver(driver_name)
-    device_list_src = driver.query_available_devices()
-    device_list_src.sort(key=lambda d: d["path"])
-    return device_list_src
-
-
-def get_device_mapping(driver, key_combination=3):
-    """This method ensures consistent device ordering when choosing
-    specific devices for execution
-    Args:
-        driver (str): execution driver (vulkan, cuda, rocm, etc)
-        key_combination (int, optional): choice for mapping value for device name.
-        1 : path
-        2 : name
-        3 : (name, path)
-        Defaults to 3.
-    Returns:
-        dict: map to possible device names user can input mapped to desired combination of name/path.
-    """
-    from shark.iree_utils._common import iree_device_map
-
-    driver = iree_device_map(driver)
-    device_list = get_all_devices(driver)
-    device_map = dict()
-
-    def get_output_value(dev_dict):
-        if key_combination == 1:
-            return f"{driver}://{dev_dict['path']}"
-        if key_combination == 2:
-            return dev_dict["name"]
-        if key_combination == 3:
-            return (dev_dict["name"], f"{driver}://{dev_dict['path']}")
-
-    # mapping driver name to default device (driver://0)
-    device_map[f"{driver}"] = get_output_value(device_list[0])
-    for i, device in enumerate(device_list):
-        # mapping with index
-        device_map[f"{driver}://{i}"] = get_output_value(device)
-        # mapping with full path
-        device_map[f"{driver}://{device['path']}"] = get_output_value(device)
-    return device_map
-
-
-def map_device_to_name_path(device, key_combination=3):
-    """Gives the appropriate device data (supported name/path) for user selected execution device
-    Args:
-        device (str): user
-        key_combination (int, optional): choice for mapping value for device name.
-        1 : path
-        2 : name
-        3 : (name, path)
-        Defaults to 3.
-    Raises:
-        ValueError:
-    Returns:
-        str / tuple: returns the mapping str or tuple of mapping str for the device depending on key_combination value
-    """
-    driver = device.split("://")[0]
-    device_map = get_device_mapping(driver, key_combination)
-    try:
-        device_mapping = device_map[device]
-    except KeyError:
-        raise ValueError(f"Device '{device}' is not a valid device.")
-    return device_mapping
-
-
-def set_init_device_flags():
-    if "vulkan" in args.device:
-        # set runtime flags for vulkan.
-        set_iree_runtime_flags()
-
-        # set triple flag to avoid multiple calls to get_vulkan_triple_flag
-        device_name, args.device = map_device_to_name_path(args.device)
-        if not args.iree_vulkan_target_triple:
-            triple = get_vulkan_target_triple(device_name)
-            if triple is not None:
-                args.iree_vulkan_target_triple = triple
-        print(
-            f"Found device {device_name}. Using target triple {args.iree_vulkan_target_triple}."
-        )
-    elif "cuda" in args.device:
-        args.device = "cuda"
-    elif "cpu" in args.device:
-        args.device = "cpu"
-
-    # set max_length based on availability.
-    if args.hf_model_id in [
-        "Linaqruf/anything-v3.0",
-        "wavymulder/Analog-Diffusion",
-        "dreamlike-art/dreamlike-diffusion-1.0",
-    ]:
-        args.max_length = 77
-    elif args.hf_model_id == "prompthero/openjourney":
-        args.max_length = 64
-
-    # Use tuned models in the case of fp16, vulkan rdna3 or cuda sm devices.
-    if (
-        args.hf_model_id
-        in ["prompthero/openjourney", "dreamlike-art/dreamlike-diffusion-1.0"]
-        or args.precision != "fp16"
-        or args.height != 512
-        or args.width != 512
-        or args.batch_size != 1
-        or ("vulkan" not in args.device and "cuda" not in args.device)
-    ):
-        args.use_tuned = False
-
-    elif (
-        "vulkan" in args.device
-        and "rdna3" not in args.iree_vulkan_target_triple
-    ):
-        args.use_tuned = False
-
-    elif "cuda" in args.device and get_cuda_sm_cc() not in [
-        "sm_80",
-        "sm_84",
-        "sm_86",
-        "sm_89",
-    ]:
-        args.use_tuned = False
-
-    elif args.use_base_vae and args.hf_model_id not in [
-        "stabilityai/stable-diffusion-2-1-base",
-        "CompVis/stable-diffusion-v1-4",
-    ]:
-        args.use_tuned = False
-
-    # Use tuned model in the case of stablediffusion/fp16 and cuda device sm_80
-    if (
-        args.hf_model_id
-        in [
-            "stabilityai/stable-diffusion-2-1-base",
-            "Linaqruf/anything-v3.0",
-            "wavymulder/Analog-Diffusion",
-        ]
-        and args.precision == "fp16"
-        and "cuda" in args.device
-        and get_cuda_sm_cc() in ["sm_80", "sm_89"]
-        and args.use_tuned  # required to avoid always forcing true on these cards
-    ):
-        args.use_tuned = True
-    else:
-        args.use_tuned = False
-
-    if args.use_tuned:
-        print(f"Using {args.device} tuned models for stablediffusion/fp16.")
-    else:
-        print("Tuned models are currently not supported for this setting.")
-
-
-# Utility to get list of devices available.
-def get_available_devices():
-    def get_devices_by_name(driver_name):
-        from shark.iree_utils._common import iree_device_map
-
-        device_list = []
-        try:
-            driver_name = iree_device_map(driver_name)
-            device_list_dict = get_all_devices(driver_name)
-            print(f"{driver_name} devices are available.")
-        except:
-            print(f"{driver_name} devices are not available.")
-        else:
-            for i, device in enumerate(device_list_dict):
-                device_list.append(f"{device['name']} => {driver_name}://{i}")
-        return device_list
-
-    set_iree_runtime_flags()
-
-    available_devices = []
-    vulkan_devices = get_devices_by_name("vulkan")
-    available_devices.extend(vulkan_devices)
-    cuda_devices = get_devices_by_name("cuda")
-    available_devices.extend(cuda_devices)
-    available_devices.append("cpu")
-    return available_devices
-
-
-def disk_space_check(path, lim=20):
-    from shutil import disk_usage
-
-    du = disk_usage(path)
-    free = du.free / (1024 * 1024 * 1024)
-    if free <= lim:
-        print(f"[WARNING] Only {free:.2f}GB space available in {path}.")
-
-
-def get_opt_flags(model, precision="fp16"):
-    iree_flags = []
-    is_tuned = "tuned" if args.use_tuned else "untuned"
-    if len(args.iree_vulkan_target_triple) > 0:
-        iree_flags.append(
-            f"-iree-vulkan-target-triple={args.iree_vulkan_target_triple}"
-        )
-
-    # Disable bindings fusion to work with moltenVK.
-    if sys.platform == "darwin":
-        iree_flags.append("-iree-stream-fuse-binding=false")
-
-    if "default_compilation_flags" in opt_flags[model][is_tuned][precision]:
-        iree_flags += opt_flags[model][is_tuned][precision][
-            "default_compilation_flags"
-        ]
-
-    if "specified_compilation_flags" in opt_flags[model][is_tuned][precision]:
-        device = (
-            args.device
-            if "://" not in args.device
-            else args.device.split("://")[0]
-        )
-        if (
-            device
-            not in opt_flags[model][is_tuned][precision][
-                "specified_compilation_flags"
-            ]
-        ):
-            device = "default_device"
-        iree_flags += opt_flags[model][is_tuned][precision][
-            "specified_compilation_flags"
-        ][device]
-    return iree_flags
-
-
-def preprocessCKPT():
-    from pathlib import Path
-
-    path = Path(args.ckpt_loc)
-    diffusers_path = path.parent.absolute()
-    diffusers_directory_name = path.stem
-    complete_path_to_diffusers = diffusers_path / diffusers_directory_name
-    complete_path_to_diffusers.mkdir(parents=True, exist_ok=True)
-    print(
-        "Created directory : ",
-        diffusers_directory_name,
-        " at -> ",
-        diffusers_path,
-    )
-    path_to_diffusers = complete_path_to_diffusers.as_posix()
-    from_safetensors = (
-        True if args.ckpt_loc.lower().endswith(".safetensors") else False
-    )
-    # EMA weights usually yield higher quality images for inference but non-EMA weights have
-    # been yielding better results in our case.
-    # TODO: Add an option `--ema` (`--no-ema`) for users to specify if they want to go for EMA
-    #       weight extraction or not.
-    extract_ema = False
-    print("Loading pipeline from original stable diffusion checkpoint")
-    pipe = load_pipeline_from_original_stable_diffusion_ckpt(
-        checkpoint_path=args.ckpt_loc,
-        extract_ema=extract_ema,
-        from_safetensors=from_safetensors,
-    )
-    pipe.save_pretrained(path_to_diffusers)
-    print("Loading complete")
-    args.ckpt_loc = path_to_diffusers
-    print("Custom model path is : ", args.ckpt_loc)
--- a/web/README.md
+++ b/web/README.md
@@ -1,16 +0,0 @@
-In order to launch SHARK-web, from the root SHARK directory, run:
-
-## Linux
-```shell
-IMPORTER=1 ./setup_venv.sh
-source shark.venv/bin/activate
-cd web
-python index.py
-```
-
-## Windows
-```shell
-./setup_venv.ps1
-cd web
-python index.py --local_tank_cache=<current_working_dir>
-```
--- a/web/demo.css
+++ b/web/demo.css
@@ -1,67 +0,0 @@
-.gradio-container {
-    background-color: black
-}
-
-.container {
-    background-color: black !important;
-    padding-top: 20px !important;
-}
-
-#ui_title {
-    padding: 10px !important;
-}
-
-#top_logo {
-    background-color: transparent;
-    border-radius: 0 !important;
-    border: 0;
-}
-
-#demo_title {
-    background-color: black;
-    border-radius: 0 !important;
-    border: 0;
-    padding-top: 50px;
-    padding-bottom: 0px;
-    width: 460px !important;
-}
-
-#demo_title_outer {
-    border-radius: 0;
-}
-
-#prompt_box_outer div:first-child {
-    border-radius: 0 !important
-}
-
-#prompt_box textarea {
-    background-color: #1d1d1d !important
-}
-
-#prompt_examples {
-    margin: 0 !important
-}
-
-#prompt_examples svg {
-    display: none !important;
-}
-
-.gr-sample-textbox {
-    border-radius: 1rem !important;
-    border-color: rgb(31, 41, 55) !important;
-    border-width: 2px !important;
-}
-
-#ui_body {
-    background-color: #111111 !important;
-    padding: 10px !important;
-    border-radius: 0.5em !important;
-}
-
-#img_result+div {
-    display: none !important;
-}
-
-footer {
-    display: none !important;
-}
--- a/web/index.py
+++ b/web/index.py
@@ -1,175 +0,0 @@
-import os
-import sys
-from pathlib import Path
-
-if "AMD_ENABLE_LLPC" not in os.environ:
-    os.environ["AMD_ENABLE_LLPC"] = "1"
-
-if sys.platform == "darwin":
-    os.environ["DYLD_LIBRARY_PATH"] = "/usr/local/lib"
-
-import gradio as gr
-from PIL import Image
-from models.stable_diffusion.resources import resource_path, prompt_examples
-from models.stable_diffusion.main import stable_diff_inf
-from models.stable_diffusion.stable_args import args
-from models.stable_diffusion.utils import get_available_devices
-
-nodlogo_loc = resource_path("logos/nod-logo.png")
-sdlogo_loc = resource_path("logos/sd-demo-logo.png")
-
-
-demo_css = Path(__file__).parent.joinpath("demo.css").resolve()
-
-
-with gr.Blocks(title="Stable Diffusion", css=demo_css) as shark_web:
-    with gr.Row(elem_id="ui_title"):
-        nod_logo = Image.open(nodlogo_loc)
-        logo2 = Image.open(sdlogo_loc)
-        with gr.Row():
-            with gr.Column(scale=1, elem_id="demo_title_outer"):
-                gr.Image(
-                    value=nod_logo,
-                    show_label=False,
-                    interactive=False,
-                    elem_id="top_logo",
-                ).style(width=150, height=100)
-            with gr.Column(scale=5, elem_id="demo_title_outer"):
-                gr.Image(
-                    value=logo2,
-                    show_label=False,
-                    interactive=False,
-                    elem_id="demo_title",
-                ).style(width=150, height=100)
-
-    with gr.Row(elem_id="ui_body"):
-        with gr.Row():
-            with gr.Column(scale=1, min_width=600):
-                with gr.Group(elem_id="prompt_box_outer"):
-                    prompt = gr.Textbox(
-                        label="Prompt",
-                        value="cyberpunk forest by Salvador Dali",
-                        lines=1,
-                        elem_id="prompt_box",
-                    )
-                    negative_prompt = gr.Textbox(
-                        label="Negative Prompt",
-                        value="trees, green",
-                        lines=1,
-                        elem_id="prompt_box",
-                    )
-                with gr.Row():
-                    variant = gr.Dropdown(
-                        label="Model Variant",
-                        value="stablediffusion",
-                        choices=[
-                            "stablediffusion",
-                            "anythingv3",
-                            "analogdiffusion",
-                            "openjourney",
-                            "dreamlike",
-                        ],
-                    )
-                    scheduler_key = gr.Dropdown(
-                        label="Scheduler",
-                        value="SharkEulerDiscrete",
-                        choices=[
-                            "DDIM",
-                            "PNDM",
-                            "LMSDiscrete",
-                            "DPMSolverMultistep",
-                            "EulerDiscrete",
-                            "EulerAncestralDiscrete",
-                            "SharkEulerDiscrete",
-                        ],
-                    )
-                with gr.Row():
-                    steps = gr.Slider(1, 100, value=50, step=1, label="Steps")
-                    guidance_scale = gr.Slider(
-                        0,
-                        50,
-                        value=7.5,
-                        step=0.1,
-                        label="CFG Scale",
-                    )
-                with gr.Row():
-                    seed = gr.Number(value=-1, precision=0, label="Seed")
-                    available_devices = get_available_devices()
-                    device_key = gr.Dropdown(
-                        label="Device",
-                        value=available_devices[0],
-                        choices=available_devices,
-                    )
-                with gr.Row():
-                    random_seed = gr.Button("Randomize Seed")
-                    random_seed.click(
-                        None,
-                        inputs=[],
-                        outputs=[seed],
-                        _js="() => Math.floor(Math.random() * 4294967295)",
-                    )
-                    stable_diffusion = gr.Button("Generate Image")
-                with gr.Accordion(label="Prompt Examples!"):
-                    ex = gr.Examples(
-                        examples=prompt_examples,
-                        inputs=prompt,
-                        cache_examples=False,
-                        elem_id="prompt_examples",
-                    )
-
-            with gr.Column(scale=1, min_width=600):
-                with gr.Group():
-                    generated_img = gr.Image(
-                        type="pil", interactive=False
-                    ).style(height=512)
-                    std_output = gr.Textbox(
-                        value="Nothing to show.",
-                        lines=4,
-                        show_label=False,
-                    )
-                output_dir = args.output_dir if args.output_dir else Path.cwd()
-                output_dir = Path(output_dir, "generated_imgs")
-                output_loc = gr.Textbox(
-                    label="Saving Images at",
-                    value=output_dir,
-                    interactive=False,
-                )
-
-        prompt.submit(
-            stable_diff_inf,
-            inputs=[
-                prompt,
-                negative_prompt,
-                steps,
-                guidance_scale,
-                seed,
-                scheduler_key,
-                variant,
-                device_key,
-            ],
-            outputs=[generated_img, std_output],
-            show_progress=args.progress_bar,
-        )
-        stable_diffusion.click(
-            stable_diff_inf,
-            inputs=[
-                prompt,
-                negative_prompt,
-                steps,
-                guidance_scale,
-                seed,
-                scheduler_key,
-                variant,
-                device_key,
-            ],
-            outputs=[generated_img, std_output],
-            show_progress=args.progress_bar,
-        )
-
-shark_web.queue()
-shark_web.launch(
-    share=args.share,
-    inbrowser=True,
-    server_name="0.0.0.0",
-    server_port=args.server_port,
-)
--- a/web/models/init.py
+++ b/web/models/init.py
--- a/web/models/albert_maskfill.py
+++ b/web/models/albert_maskfill.py
@@ -1,105 +0,0 @@
-from transformers import AutoModelForMaskedLM, AutoTokenizer
-import torch
-from shark.shark_inference import SharkInference
-from shark.shark_importer import SharkImporter
-import numpy as np
-
-################################## Albert Module #########################
-
-
-class AlbertModule(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.model = AutoModelForMaskedLM.from_pretrained("albert-base-v2")
-        self.model.eval()
-
-    def forward(self, input_ids, attention_mask):
-        return self.model(
-            input_ids=input_ids, attention_mask=attention_mask
-        ).logits
-
-
-################################## Preprocessing inputs ####################
-
-DEBUG = False
-compiled_module = {}
-compiled_module["tokenizer"] = AutoTokenizer.from_pretrained("albert-base-v2")
-
-
-def preprocess_data(text):
-    global compiled_module
-
-    # Preparing Data
-    tokenizer = compiled_module["tokenizer"]
-    encoded_inputs = tokenizer(
-        text,
-        padding="max_length",
-        truncation=True,
-        max_length=512,
-        return_tensors="pt",
-    )
-    inputs = (encoded_inputs["input_ids"], encoded_inputs["attention_mask"])
-    return inputs
-
-
-def top5_possibilities(text, inputs, token_logits, log_write):
-    global DEBUG
-    global compiled_module
-
-    if DEBUG:
-        log_write.write("Retrieving top 5 possible outcomes.\n")
-    tokenizer = compiled_module["tokenizer"]
-    mask_id = torch.where(inputs[0] == tokenizer.mask_token_id)[1]
-    mask_token_logits = token_logits[0, mask_id, :]
-    percentage = torch.nn.functional.softmax(mask_token_logits, dim=1)[0]
-    top_5_tokens = torch.topk(mask_token_logits, 5, dim=1).indices[0].tolist()
-    top5 = {}
-    for token in top_5_tokens:
-        label = text.replace(tokenizer.mask_token, tokenizer.decode(token))
-        top5[label] = percentage[token].item()
-    if DEBUG:
-        log_write.write("Done.\n")
-    return top5
-
-
-##############################################################################
-
-
-def albert_maskfill_inf(masked_text, device):
-    global DEBUG
-    global compiled_module
-
-    DEBUG = False
-    log_write = open(r"logs/albert_maskfill_log.txt", "w")
-    if log_write:
-        DEBUG = True
-
-    inputs = preprocess_data(masked_text)
-    if device not in compiled_module.keys():
-        if DEBUG:
-            log_write.write("Compiling the Albert Maskfill module.\n")
-        mlir_importer = SharkImporter(
-            AlbertModule(),
-            inputs,
-            frontend="torch",
-        )
-        minilm_mlir, func_name = mlir_importer.import_mlir(
-            is_dynamic=False, tracing_required=True
-        )
-        shark_module = SharkInference(
-            minilm_mlir, func_name, mlir_dialect="linalg", device=device
-        )
-        shark_module.compile()
-        compiled_module[device] = shark_module
-        if DEBUG:
-            log_write.write("Compilation successful.\n")
-
-    token_logits = torch.tensor(compiled_module[device].forward(inputs))
-    output = top5_possibilities(masked_text, inputs, token_logits, log_write)
-    log_write.close()
-
-    std_output = ""
-    with open(r"logs/albert_maskfill_log.txt", "r") as log_read:
-        std_output = log_read.read()
-
-    return output, std_output
--- a/web/models/diffusion/init.py
+++ b/web/models/diffusion/init.py
--- a/web/models/diffusion/setup_vdiffusion.sh
+++ b/web/models/diffusion/setup_vdiffusion.sh
@@ -1,5 +0,0 @@
-git clone --recursive https://github.com/crowsonkb/v-diffusion-pytorch.git
-pip install ftfy regex tqdm
-
-mkdir checkpoints
-wget https://the-eye.eu/public/AI/models/v-diffusion/cc12m_1_cfg.pth -P checkpoints/
--- a/web/models/diffusion/v_diffusion.py
+++ b/web/models/diffusion/v_diffusion.py
@@ -1,214 +0,0 @@
-"""classifier-free guidance sampling from a diffusion model."""
-
-from functools import partial
-from pathlib import Path
-
-from PIL import Image
-import torch
-from torch import nn
-from torch.nn import functional as F
-from torchvision import transforms
-from torchvision.transforms import functional as TF
-from tqdm import trange
-
-from shark.shark_inference import SharkInference
-from torch.fx.experimental.proxy_tensor import make_fx
-from torch._decomp import get_decompositions
-import torch_mlir
-
-import sys
-
-sys.path.append("models/diffusion/v-diffusion-pytorch")
-
-from CLIP import clip
-from diffusion import get_model, get_models, sampling, utils
-
-import gradio as gr
-
-MODULE_DIR = Path(__file__).resolve().parent
-
-set_global_parameters = False
-device = None
-model = None
-checkpoint = None
-clip_model = None
-
-
-def parse_prompt(prompt, default_weight=3.0):
-    if prompt.startswith("http://") or prompt.startswith("https://"):
-        vals = prompt.rsplit(":", 2)
-        vals = [vals[0] + ":" + vals[1], *vals[2:]]
-    else:
-        vals = prompt.rsplit(":", 1)
-    vals = vals + ["", default_weight][len(vals) :]
-    print(vals[1])
-    print(vals[0])
-    return vals[0], float(vals[1])
-
-
-def run(x, steps, shark_module, args):
-    def compiled_cfg_model_fn(x, t):
-        x_ny = x.detach().numpy()
-        t_ny = t.detach().numpy()
-        inputs = (x_ny, t_ny)
-        result = shark_module.forward(inputs)
-        return torch.from_numpy(result)
-
-    return sampling.plms_sample(compiled_cfg_model_fn, x, steps, {})
-
-
-def run_all(
-    x,
-    t,
-    steps,
-    n,
-    batch_size,
-    side_x,
-    side_y,
-    shark_module,
-    args,
-):
-    x = torch.randn([n, 3, side_y, side_x], device=device)
-    t = torch.linspace(1, 0, args["steps"] + 1, device=device)[:-1]
-    steps = utils.get_spliced_ddpm_cosine_schedule(t)
-    pil_images = []
-    for i in trange(0, n, batch_size):
-        cur_batch_size = min(n - i, batch_size)
-        outs = run(x[i : i + cur_batch_size], steps, shark_module, args)
-        for j, out in enumerate(outs):
-            pil_images.append(utils.to_pil_image(out))
-    return pil_images[0]
-
-
-def cache_model():
-    global set_global_parameters
-    global device
-    global model
-    global checkpoint
-    global clip_model
-    if not set_global_parameters:
-        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-        model = get_model("cc12m_1_cfg")()
-        checkpoint = MODULE_DIR / f"checkpoints/cc12m_1_cfg.pth"
-        model.load_state_dict(torch.load(checkpoint, map_location="cpu"))
-        if device.type == "cuda":
-            model = model.half()
-        model = model.to(device).eval().requires_grad_(False)
-        clip_model_name = (
-            model.clip_model if hasattr(model, "clip_model") else "ViT-B/16"
-        )
-        clip_model = clip.load(clip_model_name, jit=False, device=device)[0]
-        clip_model.eval().requires_grad_(False)
-        set_global_parameters = True
-
-
-def vdiff_inf(prompts: str, n, bs, steps, _device):
-    global device
-    global model
-    global checkpoint
-    global clip_model
-
-    args = {}
-    target_embeds = []
-    weights = []
-    args["prompts"] = prompts
-    args["batch_size"] = int(bs)
-    args["n"] = int(n)
-    args["seed"] = 0
-    args["steps"] = int(steps)
-    args["device"] = _device
-
-    cache_model()
-
-    _, side_y, side_x = model.shape
-    normalize = transforms.Normalize(
-        mean=[0.48145466, 0.4578275, 0.40821073],
-        std=[0.26862954, 0.26130258, 0.27577711],
-    )
-
-    zero_embed = torch.zeros([1, clip_model.visual.output_dim], device=device)
-    target_embeds.append(zero_embed)
-
-    prompt_list = args["prompts"].rsplit(";")
-    for prompt in prompt_list:
-        txt, weight = parse_prompt(prompt)
-        target_embeds.append(
-            clip_model.encode_text(clip.tokenize(txt).to(device)).float()
-        )
-        weights.append(weight)
-    weights = torch.tensor([1 - sum(weights), *weights], device=device)
-
-    torch.manual_seed(args["seed"])
-
-    x = torch.randn([args["n"], 3, side_y, side_x], device=device)
-    t = torch.linspace(1, 0, args["steps"] + 1, device=device)[:-1]
-    steps = utils.get_spliced_ddpm_cosine_schedule(t)
-    min_batch_size = min(args["n"], args["batch_size"])
-    x_in = x[0:min_batch_size, :, :, :]
-    ts = x_in.new_ones([x_in.shape[0]])
-    t_in = t[0] * ts
-
-    def cfg_model_fn(x, t):
-        n = x.shape[0]
-        n_conds = len(target_embeds)
-        x_in = x.repeat([n_conds, 1, 1, 1])
-        t_in = t.repeat([n_conds])
-        clip_embed_in = torch.cat([*target_embeds]).repeat([n, 1])
-        vs = model(x_in, t_in, clip_embed_in).view([n_conds, n, *x.shape[1:]])
-        v = vs.mul(weights[:, None, None, None, None]).sum(0)
-        return v
-
-    fx_g = make_fx(
-        cfg_model_fn,
-        decomposition_table=get_decompositions(
-            [
-                torch.ops.aten.embedding_dense_backward,
-                torch.ops.aten.native_layer_norm_backward,
-                torch.ops.aten.slice_backward,
-                torch.ops.aten.select_backward,
-                torch.ops.aten.norm.ScalarOpt_dim,
-                torch.ops.aten.native_group_norm,
-                torch.ops.aten.upsample_bilinear2d.vec,
-                torch.ops.aten.split.Tensor,
-                torch.ops.aten.split_with_sizes,
-            ]
-        ),
-    )(x_in, t_in)
-
-    fx_g.graph.set_codegen(torch.fx.graph.CodeGen())
-    fx_g.recompile()
-
-    for node in fx_g.graph.nodes:
-        if isinstance(node.target, torch._ops.OpOverload):
-            node.target = node.target.overloadpacket
-    fx_g.recompile()
-
-    ts_g = torch.jit.script(fx_g)
-
-    module = torch_mlir.compile(
-        ts_g,
-        [x_in, t_in],
-        torch_mlir.OutputType.LINALG_ON_TENSORS,
-        use_tracing=False,
-    )
-
-    mlir_model = module
-    func_name = "forward"
-    shark_module = SharkInference(
-        mlir_model, func_name, device=args["device"], mlir_dialect="linalg"
-    )
-    shark_module.compile()
-    return (
-        run_all(
-            x,
-            t,
-            args["steps"],
-            args["n"],
-            args["batch_size"],
-            side_x,
-            side_y,
-            shark_module,
-            args,
-        ),
-        "Testing..",
-    )
--- a/web/models/resnet50.py
+++ b/web/models/resnet50.py
@@ -1,90 +0,0 @@
-from PIL import Image
-import requests
-import torch
-from torchvision import transforms
-from shark.shark_inference import SharkInference
-from shark.shark_downloader import download_model
-
-################################## Preprocessing inputs and helper functions ########
-
-DEBUG = False
-compiled_module = {}
-
-
-def preprocess_image(img):
-    image = Image.fromarray(img)
-    preprocess = transforms.Compose(
-        [
-            transforms.Resize(256),
-            transforms.CenterCrop(224),
-            transforms.ToTensor(),
-            transforms.Normalize(
-                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
-            ),
-        ]
-    )
-    img_preprocessed = preprocess(image)
-    return torch.unsqueeze(img_preprocessed, 0)
-
-
-def load_labels():
-    classes_text = requests.get(
-        "https://raw.githubusercontent.com/cathyzhyi/ml-data/main/imagenet-classes.txt",
-        stream=True,
-    ).text
-    labels = [line.strip() for line in classes_text.splitlines()]
-    return labels
-
-
-def top3_possibilities(res, log_write):
-    global DEBUG
-
-    if DEBUG:
-        log_write.write("Retrieving top 3 possible outcomes.\n")
-    labels = load_labels()
-    _, indexes = torch.sort(res, descending=True)
-    percentage = torch.nn.functional.softmax(res, dim=1)[0]
-    top3 = dict(
-        [(labels[idx], percentage[idx].item()) for idx in indexes[0][:3]]
-    )
-    if DEBUG:
-        log_write.write("Done.\n")
-    return top3
-
-
-##############################################################################
-
-
-def resnet_inf(numpy_img, device):
-    global DEBUG
-    global compiled_module
-
-    DEBUG = False
-    log_write = open(r"logs/resnet50_log.txt", "w")
-    if log_write:
-        DEBUG = True
-
-    if device not in compiled_module.keys():
-        if DEBUG:
-            log_write.write("Compiling the Resnet50 module.\n")
-        mlir_model, func_name, inputs, golden_out = download_model(
-            "resnet50", frontend="torch"
-        )
-        shark_module = SharkInference(
-            mlir_model, func_name, device=device, mlir_dialect="linalg"
-        )
-        shark_module.compile()
-        compiled_module[device] = shark_module
-        if DEBUG:
-            log_write.write("Compilation successful.\n")
-
-    img = preprocess_image(numpy_img)
-    result = compiled_module[device].forward((img.detach().numpy(),))
-    output = top3_possibilities(torch.from_numpy(result), log_write)
-    log_write.close()
-
-    std_output = ""
-    with open(r"logs/resnet50_log.txt", "r") as log_read:
-        std_output = log_read.read()
-
-    return output, std_output
--- a/web/models/stable_diffusion/init.py
+++ b/web/models/stable_diffusion/init.py
--- a/web/models/stable_diffusion/cache_objects.py
+++ b/web/models/stable_diffusion/cache_objects.py
@@ -1,111 +0,0 @@
-from transformers import CLIPTokenizer
-from diffusers import (
-    LMSDiscreteScheduler,
-    PNDMScheduler,
-    DDIMScheduler,
-    DPMSolverMultistepScheduler,
-    EulerDiscreteScheduler,
-    EulerAncestralDiscreteScheduler,
-)
-from models.stable_diffusion.opt_params import get_unet, get_vae, get_clip
-from models.stable_diffusion.utils import (
-    set_init_device_flags,
-    set_iree_runtime_flags,
-)
-from models.stable_diffusion.stable_args import args
-from models.stable_diffusion.schedulers import (
-    SharkEulerDiscreteScheduler,
-)
-import gc
-
-
-model_config = {
-    "v2_1": "stabilityai/stable-diffusion-2-1",
-    "v2_1base": "stabilityai/stable-diffusion-2-1-base",
-    "v1_4": "CompVis/stable-diffusion-v1-4",
-}
-
-
-def get_schedulers(version):
-    schedulers = dict()
-    schedulers["PNDM"] = PNDMScheduler.from_pretrained(
-        model_config[version],
-        subfolder="scheduler",
-    )
-    schedulers["LMSDiscrete"] = LMSDiscreteScheduler.from_pretrained(
-        model_config[version],
-        subfolder="scheduler",
-    )
-    schedulers["DDIM"] = DDIMScheduler.from_pretrained(
-        model_config[version],
-        subfolder="scheduler",
-    )
-    schedulers[
-        "DPMSolverMultistep"
-    ] = DPMSolverMultistepScheduler.from_pretrained(
-        model_config[version],
-        subfolder="scheduler",
-    )
-    schedulers["EulerDiscrete"] = EulerDiscreteScheduler.from_pretrained(
-        model_config[version],
-        subfolder="scheduler",
-    )
-    schedulers[
-        "EulerAncestralDiscrete"
-    ] = EulerAncestralDiscreteScheduler.from_pretrained(
-        model_config[version],
-        subfolder="scheduler",
-    )
-    schedulers[
-        "SharkEulerDiscrete"
-    ] = SharkEulerDiscreteScheduler.from_pretrained(
-        model_config[version],
-        subfolder="scheduler",
-    )
-    schedulers["SharkEulerDiscrete"].compile()
-    return schedulers
-
-
-def get_tokenizer(version):
-    tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
-    if version != "v1_4":
-        tokenizer = CLIPTokenizer.from_pretrained(
-            model_config[version], subfolder="tokenizer"
-        )
-    return tokenizer
-
-
-class ModelCache:
-    def __init__(self):
-        self.device = None
-        self.variant = None
-        self.version = None
-        self.schedulers = None
-        self.tokenizer = None
-        self.vae = None
-        self.clip = None
-        self.unet = None
-
-    def set_models(self, device_key):
-        if self.device != device_key or self.variant != args.variant:
-            self.device = device_key
-            self.variant = args.variant
-            self.version = args.version
-            args.device = device_key.split("=>", 1)[1].strip()
-            args.max_length = 64
-            args.use_tuned = True
-            set_init_device_flags()
-            del self.schedulers
-            del self.tokenizer
-            del self.vae
-            del self.unet
-            del self.clip
-            gc.collect()
-            self.schedulers = get_schedulers(args.version)
-            self.tokenizer = get_tokenizer(args.version)
-            self.vae = get_vae()
-            self.unet = get_unet()
-            self.clip = get_clip()
-
-
-model_cache = ModelCache()
--- a/web/models/stable_diffusion/logos/Nod_logo.png
+++ b/web/models/stable_diffusion/logos/Nod_logo.png
--- a/web/models/stable_diffusion/logos/nod-logo.png
+++ b/web/models/stable_diffusion/logos/nod-logo.png
--- a/web/models/stable_diffusion/logos/sd-demo-logo.png
+++ b/web/models/stable_diffusion/logos/sd-demo-logo.png
--- a/web/models/stable_diffusion/main.py
+++ b/web/models/stable_diffusion/main.py
@@ -1,299 +0,0 @@
-import torch
-import os
-from PIL import Image, PngImagePlugin
-from tqdm.auto import tqdm
-from models.stable_diffusion.cache_objects import model_cache
-from models.stable_diffusion.stable_args import args
-from models.stable_diffusion.utils import disk_space_check
-from random import randint
-import json
-import numpy as np
-import time
-import sys
-from datetime import datetime as dt
-from csv import DictWriter
-import re
-from pathlib import Path
-
-
-if args.clear_all:
-    print("CLEARING ALL, EXPECT SEVERAL MINUTES TO RECOMPILE")
-    from glob import glob
-    import shutil
-
-    vmfbs = glob(os.path.join(os.getcwd(), "*.vmfb"))
-    for vmfb in vmfbs:
-        if os.path.exists(vmfb):
-            os.remove(vmfb)
-    home = os.path.expanduser("~")
-    if os.name == "nt":  # Windows
-        appdata = os.getenv("LOCALAPPDATA")
-        shutil.rmtree(os.path.join(appdata, "AMD/VkCache"), ignore_errors=True)
-        shutil.rmtree(os.path.join(home, "shark_tank"), ignore_errors=True)
-    elif os.name == "unix":
-        shutil.rmtree(os.path.join(home, ".cache/AMD/VkCache"))
-        shutil.rmtree(os.path.join(home, ".local/shark_tank"))
-
-
-# Helper function to profile the vulkan device.
-def start_profiling(file_path="foo.rdc", profiling_mode="queue"):
-    if args.vulkan_debug_utils and "vulkan" in args.device:
-        import iree
-
-        print(f"Profiling and saving to {file_path}.")
-        vulkan_device = iree.runtime.get_device(args.device)
-        vulkan_device.begin_profiling(mode=profiling_mode, file_path=file_path)
-        return vulkan_device
-    return None
-
-
-def end_profiling(device):
-    if device:
-        return device.end_profiling()
-
-
-def set_ui_params(
-    prompt,
-    negative_prompt,
-    steps,
-    guidance_scale,
-    seed,
-    scheduler_key,
-    variant,
-):
-    args.prompts = [prompt]
-    args.negative_prompts = [negative_prompt]
-    args.steps = steps
-    args.guidance_scale = torch.tensor(guidance_scale).to(torch.float32)
-    args.seed = seed
-    args.scheduler = scheduler_key
-    args.variant = variant
-
-
-# save output images and the inputs correspoding to it.
-def save_output_img(output_img):
-    output_path = args.output_dir if args.output_dir else Path.cwd()
-    disk_space_check(output_path, lim=5)
-    generated_imgs_path = Path(output_path, "generated_imgs")
-    generated_imgs_path.mkdir(parents=True, exist_ok=True)
-    csv_path = Path(generated_imgs_path, "imgs_history.csv")
-
-    prompt_slice = re.sub("[^a-zA-Z0-9]", "_", args.prompts[0][:15])
-    out_img_name = (
-        f"{prompt_slice}_{args.seed}_{dt.now().strftime('%y%m%d_%H%M%S')}"
-    )
-    if args.output_img_format == "jpg":
-        out_img_path = Path(generated_imgs_path, f"{out_img_name}.jpg")
-        output_img.save(
-            out_img_path,
-            quality=95,
-            subsampling=0,
-            optimize=True,
-            progressive=True,
-        )
-    else:
-        out_img_path = Path(generated_imgs_path, f"{out_img_name}.png")
-        pngInfo = PngImagePlugin.PngInfo()
-
-        if args.write_metadata_to_png:
-            pngInfo.add_text(
-                "parameters",
-                f"{args.prompts}\nNegative prompt: {args.negative_prompts}\nSteps:{args.steps}, Sampler: {args.scheduler}, CFG scale: {args.guidance_scale}, Seed: {args.seed}, Size: {args.width}x{args.height}, Model: {args.variant}",
-            )
-
-        output_img.save(
-            output_path / f"{out_img_name}.png", "PNG", pnginfo=pngInfo
-        )
-
-        if args.output_img_format not in ["png", "jpg"]:
-            print(
-                f"[ERROR] Format {args.output_img_format} is not supported yet."
-                "Image saved as png instead. Supported formats: png / jpg"
-            )
-
-    new_entry = {
-        "VARIANT": args.variant,
-        "VERSION": args.version,
-        "SCHEDULER": args.scheduler,
-        "PROMPT": args.prompts[0],
-        "NEG_PROMPT": args.negative_prompts[0],
-        "SEED": args.seed,
-        "CFG_SCALE": float(args.guidance_scale),
-        "PRECISION": args.precision,
-        "STEPS": args.steps,
-        "OUTPUT": out_img_path,
-    }
-
-    with open(csv_path, "a") as csv_obj:
-        dictwriter_obj = DictWriter(csv_obj, fieldnames=list(new_entry.keys()))
-        dictwriter_obj.writerow(new_entry)
-        csv_obj.close()
-
-    if args.save_metadata_to_json:
-        del new_entry["OUTPUT"]
-        with open(f"{output_path}/{out_img_name}.json", "w") as f:
-            json.dump(new_entry, f, indent=4)
-
-
-def stable_diff_inf(
-    prompt: str,
-    negative_prompt: str,
-    steps: int,
-    guidance_scale: float,
-    seed: int,
-    scheduler_key: str,
-    variant: str,
-    device_key: str,
-):
-    # Handle out of range seeds.
-    uint32_info = np.iinfo(np.uint32)
-    uint32_min, uint32_max = uint32_info.min, uint32_info.max
-    if seed < uint32_min or seed >= uint32_max:
-        seed = randint(uint32_min, uint32_max)
-
-    set_ui_params(
-        prompt,
-        negative_prompt,
-        steps,
-        guidance_scale,
-        seed,
-        scheduler_key,
-        variant,
-    )
-    dtype = torch.float32 if args.precision == "fp32" else torch.half
-    generator = torch.manual_seed(
-        args.seed
-    )  # Seed generator to create the inital latent noise
-
-    # set height and width.
-    height = 512  # default height of Stable Diffusion
-    width = 512  # default width of Stable Diffusion
-    if args.version == "v2_1":
-        height = 768
-        width = 768
-
-    # get all cached data.
-    disk_space_check(Path.cwd())
-    model_cache.set_models(device_key)
-    tokenizer = model_cache.tokenizer
-    scheduler = model_cache.schedulers[args.scheduler]
-    vae, unet, clip = model_cache.vae, model_cache.unet, model_cache.clip
-    cpu_scheduling = not args.scheduler.startswith("Shark")
-
-    # create a random initial latent.
-    latents = torch.randn(
-        (1, 4, height // 8, width // 8),
-        generator=generator,
-        dtype=torch.float32,
-    ).to(dtype)
-
-    # Warmup phase to improve performance.
-    if args.warmup_count >= 1:
-        vae_warmup_input = torch.clone(latents).detach().numpy()
-        clip_warmup_input = torch.randint(1, 2, (2, args.max_length))
-    for i in range(args.warmup_count):
-        vae("forward", (vae_warmup_input,))
-        clip("forward", (clip_warmup_input,))
-
-    start = time.time()
-    text_input = tokenizer(
-        args.prompts,
-        padding="max_length",
-        max_length=args.max_length,
-        truncation=True,
-        return_tensors="pt",
-    )
-    max_length = text_input.input_ids.shape[-1]
-    uncond_input = tokenizer(
-        args.negative_prompts,
-        padding="max_length",
-        max_length=max_length,
-        truncation=True,
-        return_tensors="pt",
-    )
-    text_input = torch.cat([uncond_input.input_ids, text_input.input_ids])
-
-    clip_inf_start = time.time()
-    text_embeddings = clip("forward", (text_input,))
-    clip_inf_end = time.time()
-    text_embeddings = torch.from_numpy(text_embeddings).to(dtype)
-    text_embeddings_numpy = text_embeddings.detach().numpy()
-
-    scheduler.set_timesteps(args.steps)
-    scheduler.is_scale_input_called = True
-
-    latents = latents * scheduler.init_noise_sigma
-
-    avg_ms = 0
-    for i, t in tqdm(enumerate(scheduler.timesteps)):
-        step_start = time.time()
-        timestep = torch.tensor([t]).to(dtype).detach().numpy()
-        latent_model_input = scheduler.scale_model_input(latents, t)
-        if cpu_scheduling:
-            latent_model_input = latent_model_input.detach().numpy()
-
-        profile_device = start_profiling(file_path="unet.rdc")
-        noise_pred = unet(
-            "forward",
-            (
-                latent_model_input,
-                timestep,
-                text_embeddings_numpy,
-                args.guidance_scale,
-            ),
-            send_to_host=False,
-        )
-        end_profiling(profile_device)
-
-        if cpu_scheduling:
-            noise_pred = torch.from_numpy(noise_pred.to_host())
-            latents = scheduler.step(noise_pred, t, latents).prev_sample
-        else:
-            latents = scheduler.step(noise_pred, t, latents)
-        step_time = time.time() - step_start
-        avg_ms += step_time
-        step_ms = int((step_time) * 1000)
-        if not args.hide_steps:
-            print(f" \nIteration = {i}, Time = {step_ms}ms")
-
-    # scale and decode the image latents with vae
-    if args.use_base_vae:
-        latents = 1 / 0.18215 * latents
-    latents_numpy = latents
-    if cpu_scheduling:
-        latents_numpy = latents.detach().numpy()
-    profile_device = start_profiling(file_path="vae.rdc")
-    vae_start = time.time()
-    images = vae("forward", (latents_numpy,))
-    vae_end = time.time()
-    end_profiling(profile_device)
-    if args.use_base_vae:
-        image = torch.from_numpy(images)
-        image = (image.detach().cpu() * 255.0).numpy()
-        images = image.round()
-    end_time = time.time()
-
-    avg_ms = 1000 * avg_ms / args.steps
-    clip_inf_time = (clip_inf_end - clip_inf_start) * 1000
-    vae_inf_time = (vae_end - vae_start) * 1000
-    total_time = end_time - start
-    print(f"\nAverage step time: {avg_ms}ms/it")
-    print(f"Clip Inference time (ms) = {clip_inf_time:.3f}")
-    print(f"VAE Inference time (ms): {vae_inf_time:.3f}")
-    print(f"\nTotal image generation time: {total_time}sec")
-
-    # generate outputs to web.
-    images = torch.from_numpy(images).to(torch.uint8).permute(0, 2, 3, 1)
-    pil_images = [Image.fromarray(image) for image in images.numpy()]
-
-    text_output = f"prompt={args.prompts}"
-    text_output += f"\nnegative prompt={args.negative_prompts}"
-    text_output += f"\nvariant={args.variant}, version={args.version}, scheduler={args.scheduler}"
-    text_output += f"\ndevice={device_key}"
-    text_output += f"\nsteps={args.steps}, guidance_scale={args.guidance_scale}, seed={args.seed}, size={height}x{width}"
-    text_output += f"\nAverage step time: {avg_ms:.4f}ms/it"
-    text_output += f"\nTotal image generation time: {total_time:.4f}sec"
-
-    save_output_img(pil_images[0])
-
-    return pil_images[0], text_output
--- a/web/models/stable_diffusion/model_wrappers.py
+++ b/web/models/stable_diffusion/model_wrappers.py
@@ -1,182 +0,0 @@
-from diffusers import AutoencoderKL, UNet2DConditionModel
-from transformers import CLIPTextModel
-from models.stable_diffusion.utils import compile_through_fx
-from models.stable_diffusion.resources import models_config
-from models.stable_diffusion.stable_args import args
-import torch
-
-
-# clip has 2 variants of max length 77 or 64.
-model_clip_max_length = 64 if args.max_length == 64 else 77
-if args.variant in ["anythingv3", "analogdiffusion", "dreamlike"]:
-    model_clip_max_length = 77
-elif args.variant == "openjourney":
-    model_clip_max_length = 64
-
-model_input = {
-    "v2_1": {
-        "clip": (torch.randint(1, 2, (2, model_clip_max_length)),),
-        "vae": (torch.randn(1, 4, 96, 96),),
-        "unet": (
-            torch.randn(1, 4, 96, 96),  # latents
-            torch.tensor([1]).to(torch.float32),  # timestep
-            torch.randn(2, model_clip_max_length, 1024),  # embedding
-            torch.tensor(1).to(torch.float32),  # guidance_scale
-        ),
-    },
-    "v2_1base": {
-        "clip": (torch.randint(1, 2, (2, model_clip_max_length)),),
-        "vae": (torch.randn(1, 4, 64, 64),),
-        "unet": (
-            torch.randn(1, 4, 64, 64),  # latents
-            torch.tensor([1]).to(torch.float32),  # timestep
-            torch.randn(2, model_clip_max_length, 1024),  # embedding
-            torch.tensor(1).to(torch.float32),  # guidance_scale
-        ),
-    },
-    "v1_4": {
-        "clip": (torch.randint(1, 2, (2, model_clip_max_length)),),
-        "vae": (torch.randn(1, 4, 64, 64),),
-        "unet": (
-            torch.randn(1, 4, 64, 64),
-            torch.tensor([1]).to(torch.float32),  # timestep
-            torch.randn(2, model_clip_max_length, 768),
-            torch.tensor(1).to(torch.float32),
-        ),
-    },
-}
-
-version = args.version if args.variant == "stablediffusion" else "v1_4"
-
-
-def get_configs():
-    model_id_key = f"{args.variant}/{version}"
-    revision_key = f"{args.variant}/{args.precision}"
-    try:
-        model_id = models_config[0][model_id_key]
-        revision = models_config[1][revision_key]
-    except KeyError:
-        raise Exception(
-            f"No entry for {model_id_key} or {revision_key} in the models configuration"
-        )
-
-    return model_id, revision
-
-
-def get_clip_mlir(model_name="clip_text", extra_args=[]):
-    model_id, revision = get_configs()
-
-    class CLIPText(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.text_encoder = CLIPTextModel.from_pretrained(
-                model_id,
-                subfolder="text_encoder",
-                revision=revision,
-            )
-
-        def forward(self, input):
-            return self.text_encoder(input)[0]
-
-    clip_model = CLIPText()
-    shark_clip = compile_through_fx(
-        clip_model,
-        model_input[version]["clip"],
-        model_name=model_name,
-        extra_args=extra_args,
-    )
-    return shark_clip
-
-
-def get_shark_module(model_key, module, model_name, extra_args):
-    if args.precision == "fp16":
-        module = module.half().cuda()
-        inputs = tuple(
-            [
-                inputs.half().cuda() if len(inputs.shape) != 0 else inputs
-                for inputs in model_input[version][model_key]
-            ]
-        )
-    else:
-        inputs = model_input[version][model_key]
-
-    shark_module = compile_through_fx(
-        module,
-        inputs,
-        model_name=model_name,
-        extra_args=extra_args,
-    )
-    return shark_module
-
-
-def get_base_vae_mlir(model_name="vae", extra_args=[]):
-    model_id, revision = get_configs()
-
-    class BaseVaeModel(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.vae = AutoencoderKL.from_pretrained(
-                model_id,
-                subfolder="vae",
-                revision=revision,
-            )
-
-        def forward(self, input):
-            x = self.vae.decode(input, return_dict=False)[0]
-            return (x / 2 + 0.5).clamp(0, 1)
-
-    vae = BaseVaeModel()
-    return get_shark_module("vae", vae, model_name, extra_args)
-
-
-def get_vae_mlir(model_name="vae", extra_args=[]):
-    model_id, revision = get_configs()
-
-    class VaeModel(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.vae = AutoencoderKL.from_pretrained(
-                model_id,
-                subfolder="vae",
-                revision=revision,
-            )
-
-        def forward(self, input):
-            input = 1 / 0.18215 * input
-            x = self.vae.decode(input, return_dict=False)[0]
-            x = (x / 2 + 0.5).clamp(0, 1)
-            x = x * 255.0
-            return x.round()
-
-    vae = VaeModel()
-    return get_shark_module("vae", vae, model_name, extra_args)
-
-
-def get_unet_mlir(model_name="unet", extra_args=[]):
-    model_id, revision = get_configs()
-
-    class UnetModel(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.unet = UNet2DConditionModel.from_pretrained(
-                model_id,
-                subfolder="unet",
-                revision=revision,
-            )
-            self.in_channels = self.unet.in_channels
-            self.train(False)
-
-        def forward(self, latent, timestep, text_embedding, guidance_scale):
-            # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes.
-            latents = torch.cat([latent] * 2)
-            unet_out = self.unet.forward(
-                latents, timestep, text_embedding, return_dict=False
-            )[0]
-            noise_pred_uncond, noise_pred_text = unet_out.chunk(2)
-            noise_pred = noise_pred_uncond + guidance_scale * (
-                noise_pred_text - noise_pred_uncond
-            )
-            return noise_pred
-
-    unet = UnetModel()
-    return get_shark_module("unet", unet, model_name, extra_args)
--- a/web/models/stable_diffusion/opt_params.py
+++ b/web/models/stable_diffusion/opt_params.py
@@ -1,99 +0,0 @@
-import sys
-from models.stable_diffusion.model_wrappers import (
-    get_base_vae_mlir,
-    get_vae_mlir,
-    get_unet_mlir,
-    get_clip_mlir,
-)
-from models.stable_diffusion.resources import models_db
-from models.stable_diffusion.stable_args import args
-from models.stable_diffusion.utils import get_shark_model
-
-BATCH_SIZE = len(args.prompts)
-if BATCH_SIZE != 1:
-    sys.exit("Only batch size 1 is supported.")
-
-
-def get_params(bucket_key, model_key, model, is_tuned, precision):
-    iree_flags = []
-    if len(args.iree_vulkan_target_triple) > 0:
-        iree_flags.append(
-            f"-iree-vulkan-target-triple={args.iree_vulkan_target_triple}"
-        )
-
-    # Disable bindings fusion to work with moltenVK.
-    if sys.platform == "darwin":
-        iree_flags.append("-iree-stream-fuse-binding=false")
-
-    try:
-        bucket = models_db[0][bucket_key]
-        model_name = models_db[1][model_key]
-        iree_flags += models_db[2][model][is_tuned][precision][
-            "default_compilation_flags"
-        ]
-    except KeyError:
-        raise Exception(
-            f" there is no entry for {model_key} in the models database"
-        )
-
-    if (
-        "specified_compilation_flags"
-        in models_db[2][model][is_tuned][precision]
-    ):
-        device = (
-            args.device
-            if "://" not in args.device
-            else args.device.split("://")[0]
-        )
-        if (
-            device
-            not in models_db[2][model][is_tuned][precision][
-                "specified_compilation_flags"
-            ]
-        ):
-            device = "default_device"
-        iree_flags += models_db[2][model][is_tuned][precision][
-            "specified_compilation_flags"
-        ][device]
-
-    return bucket, model_name, iree_flags
-
-
-def get_unet():
-    # Tuned model is present only for `fp16` precision.
-    is_tuned = "tuned" if args.use_tuned else "untuned"
-    bucket_key = f"{args.variant}/{is_tuned}"
-    model_key = f"{args.variant}/{args.version}/unet/{args.precision}/length_{args.max_length}/{is_tuned}"
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, "unet", is_tuned, args.precision
-    )
-    if not args.use_tuned and args.import_mlir:
-        return get_unet_mlir(model_name, iree_flags)
-    return get_shark_model(bucket, model_name, iree_flags)
-
-
-def get_vae():
-    # Tuned model is present only for `fp16` precision.
-    is_tuned = "tuned" if args.use_tuned else "untuned"
-    is_base = "/base" if args.use_base_vae else ""
-    bucket_key = f"{args.variant}/{is_tuned}"
-    model_key = f"{args.variant}/{args.version}/vae/{args.precision}/length_77/{is_tuned}{is_base}"
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, "vae", is_tuned, args.precision
-    )
-    if not args.use_tuned and args.import_mlir:
-        if args.use_base_vae:
-            return get_base_vae_mlir(model_name, iree_flags)
-        return get_vae_mlir(model_name, iree_flags)
-    return get_shark_model(bucket, model_name, iree_flags)
-
-
-def get_clip():
-    bucket_key = f"{args.variant}/untuned"
-    model_key = f"{args.variant}/{args.version}/clip/fp32/length_{args.max_length}/untuned"
-    bucket, model_name, iree_flags = get_params(
-        bucket_key, model_key, "clip", "untuned", "fp32"
-    )
-    if args.import_mlir:
-        return get_clip_mlir(model_name, iree_flags)
-    return get_shark_model(bucket, model_name, iree_flags)
--- a/web/models/stable_diffusion/resources.py
+++ b/web/models/stable_diffusion/resources.py
@@ -1,41 +0,0 @@
-import os
-import json
-import sys
-
-
-def resource_path(relative_path):
-    """Get absolute path to resource, works for dev and for PyInstaller"""
-    base_path = getattr(
-        sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))
-    )
-    return os.path.join(base_path, relative_path)
-
-
-prompt_examples = []
-prompts_loc = resource_path("resources/prompts.json")
-if os.path.exists(prompts_loc):
-    with open(prompts_loc, encoding="utf-8") as fopen:
-        prompt_examples = json.load(fopen)
-
-if not prompt_examples:
-    print("Unable to fetch prompt examples.")
-
-
-models_db = []
-models_loc = resource_path("resources/model_db.json")
-if os.path.exists(models_loc):
-    with open(models_loc, encoding="utf-8") as fopen:
-        models_db = json.load(fopen)
-
-if len(models_db) != 3:
-    sys.exit("Error: Unable to load models database.")
-
-
-models_config = []
-modelconfig_loc = resource_path("resources/model_config.json")
-if os.path.exists(modelconfig_loc):
-    with open(modelconfig_loc, encoding="utf-8") as fopen:
-        models_config = json.load(fopen)
-
-if len(models_config) != 2:
-    sys.exit("Error: Unable to load models configuration.")
--- a/web/models/stable_diffusion/resources/model_config.json
+++ b/web/models/stable_diffusion/resources/model_config.json
@@ -1,21 +0,0 @@
-[
-  {
-    "stablediffusion/v1_4":"CompVis/stable-diffusion-v1-4",
-    "stablediffusion/v2_1base":"stabilityai/stable-diffusion-2-1-base",
-    "stablediffusion/v2_1":"stabilityai/stable-diffusion-2-1",
-    "anythingv3/v1_4":"Linaqruf/anything-v3.0",
-    "analogdiffusion/v1_4":"wavymulder/Analog-Diffusion",
-    "openjourney/v1_4":"prompthero/openjourney",
-    "dreamlike/v1_4":"dreamlike-art/dreamlike-diffusion-1.0"
-  },
-  {
-    "stablediffusion/fp16":"fp16",
-    "stablediffusion/fp32":"main",
-    "anythingv3/fp16":"diffusers",
-    "anythingv3/fp32":"diffusers",
-    "analogdiffusion/fp16":"main",
-    "analogdiffusion/fp32":"main",
-    "openjourney/fp16":"main",
-    "openjourney/fp32":"main"
-  }
-]
--- a/web/models/stable_diffusion/resources/model_db.json
+++ b/web/models/stable_diffusion/resources/model_db.json
@@ -1,164 +0,0 @@
-[
-  {
-    "stablediffusion/untuned":"gs://shark_tank/stable_diffusion",
-    "stablediffusion/tuned":"gs://shark_tank/sd_tuned",
-    "anythingv3/untuned":"gs://shark_tank/sd_anythingv3",
-    "anythingv3/tuned":"gs://shark_tank/sd_tuned",
-    "analogdiffusion/untuned":"gs://shark_tank/sd_analog_diffusion",
-    "analogdiffusion/tuned":"gs://shark_tank/sd_tuned",
-    "openjourney/untuned":"gs://shark_tank/sd_openjourney",
-    "openjourney/tuned":"gs://shark_tank/sd_tuned",
-    "dreamlike/untuned":"gs://shark_tank/sd_dreamlike_diffusion"
-  },
-  {
-    "stablediffusion/v1_4/unet/fp16/length_77/untuned":"unet_8dec_fp16",
-    "stablediffusion/v1_4/unet/fp32/length_77/untuned":"unet_1dec_fp32",
-    "stablediffusion/v1_4/vae/fp16/length_77/untuned":"vae_19dec_fp16",
-    "stablediffusion/v1_4/vae/fp16/length_77/untuned/base":"vae_8dec_fp16",
-    "stablediffusion/v1_4/vae/fp32/length_77/untuned":"vae_1dec_fp32",
-    "stablediffusion/v1_4/clip/fp32/length_77/untuned":"clip_18dec_fp32",
-    "stablediffusion/v2_1base/unet/fp16/length_77/untuned":"unet2base_8dec_fp16",
-    "stablediffusion/v2_1base/unet/fp16/length_77/tuned":"unet2base_8dec_fp16_tuned_v2",
-    "stablediffusion/v2_1base/unet/fp16/length_64/untuned":"unet_19dec_v2p1base_fp16_64",
-    "stablediffusion/v2_1base/unet/fp16/length_64/tuned":"unet_19dec_v2p1base_fp16_64_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/untuned":"vae2base_19dec_fp16",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned":"vae2base_19dec_fp16_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/untuned/base":"vae2base_8dec_fp16",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/base":"vae2base_8dec_fp16_tuned",
-    "stablediffusion/v2_1base/clip/fp32/length_77/untuned":"clip2base_18dec_fp32",
-    "stablediffusion/v2_1base/clip/fp32/length_64/untuned":"clip_19dec_v2p1base_fp32_64",
-    "stablediffusion/v2_1/unet/fp16/length_77/untuned":"unet2_14dec_fp16",
-    "stablediffusion/v2_1/vae/fp16/length_77/untuned":"vae2_19dec_fp16",
-    "stablediffusion/v2_1/vae/fp16/length_77/untuned/base":"vae2_8dec_fp16",
-    "stablediffusion/v2_1/clip/fp32/length_77/untuned":"clip2_18dec_fp32",
-    "anythingv3/v2_1base/unet/fp16/length_77/untuned":"av3_unet_19dec_fp16",
-    "anythingv3/v2_1base/unet/fp16/length_77/tuned":"av3_unet_19dec_fp16_tuned",
-    "anythingv3/v2_1base/unet/fp32/length_77/untuned":"av3_unet_19dec_fp32",
-    "anythingv3/v2_1base/vae/fp16/length_77/untuned":"av3_vae_19dec_fp16",
-    "anythingv3/v2_1base/vae/fp16/length_77/tuned":"av3_vae_19dec_fp16_tuned",
-    "anythingv3/v2_1base/vae/fp16/length_77/untuned/base":"av3_vaebase_22dec_fp16",
-    "anythingv3/v2_1base/vae/fp32/length_77/untuned":"av3_vae_19dec_fp32",
-    "anythingv3/v2_1base/vae/fp32/length_77/untuned/base":"av3_vaebase_22dec_fp32",
-    "anythingv3/v2_1base/clip/fp32/length_77/untuned":"av3_clip_19dec_fp32",
-    "analogdiffusion/v2_1base/unet/fp16/length_77/untuned":"ad_unet_19dec_fp16",
-    "analogdiffusion/v2_1base/unet/fp16/length_77/tuned":"ad_unet_19dec_fp16_tuned",
-    "analogdiffusion/v2_1base/unet/fp32/length_77/untuned":"ad_unet_19dec_fp32",
-    "analogdiffusion/v2_1base/vae/fp16/length_77/untuned":"ad_vae_19dec_fp16",
-    "analogdiffusion/v2_1base/vae/fp16/length_77/tuned":"ad_vae_19dec_fp16_tuned",
-    "analogdiffusion/v2_1base/vae/fp16/length_77/untuned/base":"ad_vaebase_22dec_fp16",
-    "analogdiffusion/v2_1base/vae/fp32/length_77/untuned":"ad_vae_19dec_fp32",
-    "analogdiffusion/v2_1base/vae/fp32/length_77/untuned/base":"ad_vaebase_22dec_fp32",
-    "analogdiffusion/v2_1base/clip/fp32/length_77/untuned":"ad_clip_19dec_fp32",
-    "openjourney/v2_1base/unet/fp16/length_64/untuned":"oj_unet_22dec_fp16_64",
-    "openjourney/v2_1base/unet/fp32/length_64/untuned":"oj_unet_22dec_fp32_64",
-    "openjourney/v2_1base/vae/fp16/length_77/untuned":"oj_vae_22dec_fp16",
-    "openjourney/v2_1base/vae/fp16/length_77/untuned/base":"oj_vaebase_22dec_fp16",
-    "openjourney/v2_1base/vae/fp32/length_77/untuned":"oj_vae_22dec_fp32",
-    "openjourney/v2_1base/vae/fp32/length_77/untuned/base":"oj_vaebase_22dec_fp32",
-    "openjourney/v2_1base/clip/fp32/length_64/untuned":"oj_clip_22dec_fp32_64",
-    "dreamlike/v2_1base/unet/fp16/length_77/untuned":"dl_unet_23dec_fp16_77",
-    "dreamlike/v2_1base/unet/fp32/length_77/untuned":"dl_unet_23dec_fp32_77",
-    "dreamlike/v2_1base/vae/fp16/length_77/untuned":"dl_vae_23dec_fp16",
-    "dreamlike/v2_1base/vae/fp16/length_77/untuned/base":"dl_vaebase_23dec_fp16",
-    "dreamlike/v2_1base/vae/fp32/length_77/untuned":"dl_vae_23dec_fp32",
-    "dreamlike/v2_1base/vae/fp32/length_77/untuned/base":"dl_vaebase_23dec_fp32",
-    "dreamlike/v2_1base/clip/fp32/length_77/untuned":"dl_clip_23dec_fp32_77"
-  },
-  {
-    "unet": {
-      "tuned": {
-        "fp16": {
-          "default_compilation_flags": []
-        },
-        "fp32": {
-          "default_compilation_flags": []
-        }
-      },
-      "untuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32"
-          ],
-          "specified_compilation_flags": {
-            "cuda": ["--iree-flow-enable-conv-nchw-to-nhwc-transform"],
-            "default_device": ["--iree-flow-enable-conv-img2col-transform"]
-          }
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-conv-nchw-to-nhwc-transform",
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=16"
-          ]
-        }
-      }
-    },
-    "vae": {
-      "tuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32",
-            "--iree-flow-enable-conv-img2col-transform",
-            "--iree-flow-enable-conv-winograd-transform"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32",
-            "--iree-flow-enable-conv-img2col-transform",
-            "--iree-flow-enable-conv-winograd-transform"
-          ]
-        }
-      },
-      "untuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32",
-            "--iree-flow-enable-conv-img2col-transform"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-conv-nchw-to-nhwc-transform",
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=16"
-          ]
-        }
-      }
-    },
-    "clip": {
-      "tuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        }
-      },
-      "untuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        }
-      }
-    }
-  }
-]
--- a/web/models/stable_diffusion/resources/prompts.json
+++ b/web/models/stable_diffusion/resources/prompts.json
@@ -1,8 +0,0 @@
-[["A high tech solarpunk utopia in the Amazon rainforest"],
-["A pikachu fine dining with a view to the Eiffel Tower"],
-["A mecha robot in a favela in expressionist style"],
-["an insect robot preparing a delicious meal"],
-["A digital Illustration of the Babel tower, 4k, detailed, trending in artstation, fantasy vivid colors"],
-["Cluttered house in the woods, anime, oil painting, high resolution, cottagecore, ghibli inspired, 4k"],
-["A beautiful mansion beside a waterfall in the woods, by josef thoma, matte painting, trending on artstation HQ"],
-["portrait photo of a asia old warrior chief, tribal panther make up, blue on red, side profile, looking away, serious eyes"]]
--- a/web/models/stable_diffusion/schedulers.py
+++ b/web/models/stable_diffusion/schedulers.py
@@ -1,133 +0,0 @@
-import sys
-import numpy as np
-from typing import List, Optional, Tuple, Union
-from diffusers import (
-    LMSDiscreteScheduler,
-    PNDMScheduler,
-    DDIMScheduler,
-    DPMSolverMultistepScheduler,
-    EulerDiscreteScheduler,
-)
-from diffusers.configuration_utils import register_to_config
-from models.stable_diffusion.utils import compile_through_fx, get_shark_model
-from models.stable_diffusion.stable_args import args
-import torch
-
-SCHEDULER_BUCKET = "gs://shark_tank/stable_diffusion/schedulers"
-
-model_input = {
-    "euler": {
-        "latent": torch.randn(1, 4, 64, 64),
-        "output": torch.randn(1, 4, 64, 64),
-        "sigma": torch.tensor(1).to(torch.float32),
-        "dt": torch.tensor(1).to(torch.float32),
-    },
-}
-
-
-class SharkEulerDiscreteScheduler(EulerDiscreteScheduler):
-    @register_to_config
-    def __init__(
-        self,
-        num_train_timesteps: int = 1000,
-        beta_start: float = 0.0001,
-        beta_end: float = 0.02,
-        beta_schedule: str = "linear",
-        trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
-        prediction_type: str = "epsilon",
-    ):
-        super().__init__(
-            num_train_timesteps,
-            beta_start,
-            beta_end,
-            beta_schedule,
-            trained_betas,
-            prediction_type,
-        )
-
-    def compile(self):
-        example_latent = model_input["euler"]["latent"]
-        example_output = model_input["euler"]["output"]
-        if args.precision == "fp16":
-            example_latent = example_latent.half()
-            example_output = example_output.half()
-        example_sigma = model_input["euler"]["sigma"]
-        example_dt = model_input["euler"]["dt"]
-
-        class ScalingModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
-            def forward(self, latent, sigma):
-                return latent / ((sigma**2 + 1) ** 0.5)
-
-        class SchedulerStepModel(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-
-            def forward(self, noise_pred, sigma, latent, dt):
-                pred_original_sample = latent - sigma * noise_pred
-                derivative = (latent - pred_original_sample) / sigma
-                return latent + derivative * dt
-
-        iree_flags = []
-        if len(args.iree_vulkan_target_triple) > 0:
-            iree_flags.append(
-                f"-iree-vulkan-target-triple={args.iree_vulkan_target_triple}"
-            )
-        # Disable bindings fusion to work with moltenVK.
-        if sys.platform == "darwin":
-            iree_flags.append("-iree-stream-fuse-binding=false")
-
-        if args.import_mlir:
-            scaling_model = ScalingModel()
-            self.scaling_model = compile_through_fx(
-                scaling_model,
-                (example_latent, example_sigma),
-                model_name="euler_scale_model_input_" + args.precision,
-                extra_args=iree_flags,
-            )
-
-            step_model = SchedulerStepModel()
-            self.step_model = compile_through_fx(
-                step_model,
-                (example_output, example_sigma, example_latent, example_dt),
-                model_name="euler_step_" + args.precision,
-                extra_args=iree_flags,
-            )
-        else:
-            self.scaling_model = get_shark_model(
-                SCHEDULER_BUCKET,
-                "euler_scale_model_input_" + args.precision,
-                iree_flags,
-            )
-            self.step_model = get_shark_model(
-                SCHEDULER_BUCKET, "euler_step_" + args.precision, iree_flags
-            )
-
-    def scale_model_input(self, sample, timestep):
-        step_index = (self.timesteps == timestep).nonzero().item()
-        sigma = self.sigmas[step_index]
-        return self.scaling_model(
-            "forward",
-            (
-                sample,
-                sigma,
-            ),
-            send_to_host=False,
-        )
-
-    def step(self, noise_pred, timestep, latent):
-        step_index = (self.timesteps == timestep).nonzero().item()
-        sigma = self.sigmas[step_index]
-        dt = self.sigmas[step_index + 1] - sigma
-        return self.step_model(
-            "forward",
-            (
-                noise_pred,
-                sigma,
-                latent,
-                dt,
-            ),
-            send_to_host=False,
-        )
--- a/web/models/stable_diffusion/stable_args.py
+++ b/web/models/stable_diffusion/stable_args.py
@@ -1,270 +0,0 @@
-import argparse
-
-p = argparse.ArgumentParser(
-    description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
-)
-
-##############################################################################
-### Stable Diffusion Params
-##############################################################################
-
-p.add_argument(
-    "--prompts",
-    nargs="+",
-    default=["cyberpunk forest by Salvador Dali"],
-    help="text of which images to be generated.",
-)
-
-p.add_argument(
-    "--negative-prompts",
-    nargs="+",
-    default=[""],
-    help="text you don't want to see in the generated image.",
-)
-
-p.add_argument(
-    "--steps",
-    type=int,
-    default=50,
-    help="the no. of steps to do the sampling.",
-)
-
-p.add_argument(
-    "--seed",
-    type=int,
-    default=42,
-    help="the seed to use.",
-)
-
-p.add_argument(
-    "--guidance_scale",
-    type=float,
-    default=7.5,
-    help="the value to be used for guidance scaling.",
-)
-
-p.add_argument(
-    "--max_length",
-    type=int,
-    default=64,
-    help="max length of the tokenizer output, options are 64 and 77.",
-)
-
-##############################################################################
-### Model Config and Usage Params
-##############################################################################
-
-p.add_argument(
-    "--device", type=str, default="vulkan", help="device to run the model."
-)
-
-p.add_argument(
-    "--version",
-    type=str,
-    default="v2_1base",
-    help="Specify version of stable diffusion model",
-)
-
-p.add_argument(
-    "--precision", type=str, default="fp16", help="precision to run the model."
-)
-
-p.add_argument(
-    "--import_mlir",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="imports the model from torch module to shark_module otherwise downloads the model from shark_tank.",
-)
-
-p.add_argument(
-    "--load_vmfb",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="attempts to load the model from a precompiled flatbuffer and compiles + saves it if not found.",
-)
-
-p.add_argument(
-    "--save_vmfb",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="saves the compiled flatbuffer to the local directory",
-)
-
-p.add_argument(
-    "--use_tuned",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="Download and use the tuned version of the model if available",
-)
-
-p.add_argument(
-    "--use_base_vae",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="Do conversion from the VAE output to pixel space on cpu.",
-)
-
-p.add_argument(
-    "--variant",
-    default="stablediffusion",
-    help="We now support multiple vairants of SD finetuned for different dataset. you can use the following anythingv3, ...",  # TODO add more once supported
-)
-
-p.add_argument(
-    "--scheduler",
-    type=str,
-    default="SharkEulerDiscrete",
-    help="other supported schedulers are [PNDM, DDIM, LMSDiscrete, EulerDiscrete, DPMSolverMultistep]",
-)
-
-p.add_argument(
-    "--output_img_format",
-    type=str,
-    default="png",
-    help="specify the format in which output image is save. Supported options: jpg / png",
-)
-
-p.add_argument(
-    "--output_dir",
-    type=str,
-    default=None,
-    help="Directory path to save the output images and json",
-)
-
-##############################################################################
-### IREE - Vulkan supported flags
-##############################################################################
-
-p.add_argument(
-    "--iree-vulkan-target-triple",
-    type=str,
-    default="",
-    help="Specify target triple for vulkan",
-)
-
-p.add_argument(
-    "--vulkan_debug_utils",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="Profiles vulkan device and collects the .rdc info",
-)
-
-p.add_argument(
-    "--vulkan_large_heap_block_size",
-    default="4147483648",
-    help="flag for setting VMA preferredLargeHeapBlockSize for vulkan device, default is 4G",
-)
-
-p.add_argument(
-    "--vulkan_validation_layers",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for disabling vulkan validation layers when benchmarking",
-)
-
-##############################################################################
-### Misc. Debug and Optimization flags
-##############################################################################
-
-p.add_argument(
-    "--use_compiled_scheduler",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="use the default scheduler precompiled into the model if available",
-)
-
-p.add_argument(
-    "--local_tank_cache",
-    default="",
-    help="Specify where to save downloaded shark_tank artifacts. If this is not set, the default is ~/.local/shark_tank/.",
-)
-
-p.add_argument(
-    "--dump_isa",
-    default=False,
-    action="store_true",
-    help="When enabled call amdllpc to get ISA dumps. use with dispatch benchmarks.",
-)
-
-p.add_argument(
-    "--dispatch_benchmarks",
-    default=None,
-    help='dispatches to return benchamrk data on.  use "All" for all, and None for none.',
-)
-
-p.add_argument(
-    "--dispatch_benchmarks_dir",
-    default="temp_dispatch_benchmarks",
-    help='directory where you want to store dispatch data generated with "--dispatch_benchmarks"',
-)
-
-p.add_argument(
-    "--enable_rgp",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for inserting debug frames between iterations for use with rgp.",
-)
-
-p.add_argument(
-    "--hide_steps",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="flag for hiding the details of iteration/sec for each step.",
-)
-
-p.add_argument(
-    "--warmup_count",
-    type=int,
-    default=0,
-    help="flag setting warmup count for clip and vae [>= 0].",
-)
-
-p.add_argument(
-    "--clear_all",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag to clear all mlir and vmfb from common locations. Recompiling will take several minutes",
-)
-
-p.add_argument(
-    "--save_metadata_to_json",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for whether or not to save a generation information json file with the image.",
-)
-
-p.add_argument(
-    "--write_metadata_to_png",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for whether or not to save generation information in PNG chunk text to generated images.",
-)
-
-##############################################################################
-### Web UI flags
-##############################################################################
-
-p.add_argument(
-    "--progress_bar",
-    default=True,
-    action=argparse.BooleanOptionalAction,
-    help="flag for removing the pregress bar animation during image generation",
-)
-
-p.add_argument(
-    "--share",
-    default=False,
-    action=argparse.BooleanOptionalAction,
-    help="flag for generating a public URL",
-)
-
-p.add_argument(
-    "--server_port",
-    type=int,
-    default=8080,
-    help="flag for setting server port",
-)
-
-##############################################################################
-
-args = p.parse_args()
--- a/web/models/stable_diffusion/utils.py
+++ b/web/models/stable_diffusion/utils.py
@@ -1,241 +0,0 @@
-import os
-import torch
-from shark.shark_inference import SharkInference
-from models.stable_diffusion.stable_args import args
-from shark.shark_importer import import_with_fx
-from shark.iree_utils.vulkan_utils import (
-    set_iree_vulkan_runtime_flags,
-    get_vulkan_target_triple,
-)
-
-
-def _compile_module(shark_module, model_name, extra_args=[]):
-    if args.load_vmfb or args.save_vmfb:
-        device = (
-            args.device
-            if "://" not in args.device
-            else "-".join(args.device.split("://"))
-        )
-        extended_name = "{}_{}".format(model_name, device)
-        vmfb_path = os.path.join(os.getcwd(), extended_name + ".vmfb")
-        if args.load_vmfb and os.path.isfile(vmfb_path) and not args.save_vmfb:
-            print(f"loading existing vmfb from: {vmfb_path}")
-            shark_module.load_module(vmfb_path, extra_args=extra_args)
-        else:
-            if args.save_vmfb:
-                print("Saving to {}".format(vmfb_path))
-            else:
-                print(
-                    "No vmfb found. Compiling and saving to {}".format(
-                        vmfb_path
-                    )
-                )
-            path = shark_module.save_module(
-                os.getcwd(), extended_name, extra_args
-            )
-            shark_module.load_module(path, extra_args=extra_args)
-    else:
-        shark_module.compile(extra_args)
-    return shark_module
-
-
-# Downloads the model from shark_tank and returns the shark_module.
-def get_shark_model(tank_url, model_name, extra_args=[]):
-    from shark.shark_downloader import download_model
-    from shark.parser import shark_args
-
-    # Set local shark_tank cache directory.
-    shark_args.local_tank_cache = args.local_tank_cache
-
-    mlir_model, func_name, inputs, golden_out = download_model(
-        model_name,
-        tank_url=tank_url,
-        frontend="torch",
-    )
-    shark_module = SharkInference(
-        mlir_model, device=args.device, mlir_dialect="linalg"
-    )
-    return _compile_module(shark_module, model_name, extra_args)
-
-
-# Converts the torch-module into a shark_module.
-def compile_through_fx(model, inputs, model_name, extra_args=[]):
-    mlir_module, func_name = import_with_fx(model, inputs)
-
-    shark_module = SharkInference(
-        mlir_module,
-        device=args.device,
-        mlir_dialect="linalg",
-    )
-
-    return _compile_module(shark_module, model_name, extra_args)
-
-
-def set_iree_runtime_flags():
-    vulkan_runtime_flags = [
-        f"--vulkan_large_heap_block_size={args.vulkan_large_heap_block_size}",
-        f"--vulkan_validation_layers={'true' if args.vulkan_validation_layers else 'false'}",
-    ]
-    if args.enable_rgp:
-        vulkan_runtime_flags += [
-            f"--enable_rgp=true",
-            f"--vulkan_debug_utils=true",
-        ]
-    set_iree_vulkan_runtime_flags(flags=vulkan_runtime_flags)
-
-
-def get_all_devices(driver_name):
-    """
-    Inputs: driver_name
-    Returns a list of all the available devices for a given driver sorted by
-    the iree path names of the device as in --list_devices option in iree.
-    """
-    from iree.runtime import get_driver
-
-    driver = get_driver(driver_name)
-    device_list_src = driver.query_available_devices()
-    device_list_src.sort(key=lambda d: d["path"])
-    return device_list_src
-
-
-def get_device_mapping(driver, key_combination=3):
-    """This method ensures consistent device ordering when choosing
-    specific devices for execution
-    Args:
-        driver (str): execution driver (vulkan, cuda, rocm, etc)
-        key_combination (int, optional): choice for mapping value for device name.
-        1 : path
-        2 : name
-        3 : (name, path)
-        Defaults to 3.
-    Returns:
-        dict: map to possible device names user can input mapped to desired combination of name/path.
-    """
-    from shark.iree_utils._common import iree_device_map
-
-    driver = iree_device_map(driver)
-    device_list = get_all_devices(driver)
-    device_map = dict()
-
-    def get_output_value(dev_dict):
-        if key_combination == 1:
-            return f"{driver}://{dev_dict['path']}"
-        if key_combination == 2:
-            return dev_dict["name"]
-        if key_combination == 3:
-            return (dev_dict["name"], f"{driver}://{dev_dict['path']}")
-
-    # mapping driver name to default device (driver://0)
-    device_map[f"{driver}"] = get_output_value(device_list[0])
-    for i, device in enumerate(device_list):
-        # mapping with index
-        device_map[f"{driver}://{i}"] = get_output_value(device)
-        # mapping with full path
-        device_map[f"{driver}://{device['path']}"] = get_output_value(device)
-    return device_map
-
-
-def map_device_to_name_path(device, key_combination=3):
-    """Gives the appropriate device data (supported name/path) for user selected execution device
-    Args:
-        device (str): user
-        key_combination (int, optional): choice for mapping value for device name.
-        1 : path
-        2 : name
-        3 : (name, path)
-        Defaults to 3.
-    Raises:
-        ValueError:
-    Returns:
-        str / tuple: returns the mapping str or tuple of mapping str for the device depending on key_combination value
-    """
-    driver = device.split("://")[0]
-    device_map = get_device_mapping(driver, key_combination)
-    try:
-        device_mapping = device_map[device]
-    except KeyError:
-        raise ValueError(f"Device '{device}' is not a valid device.")
-    return device_mapping
-
-
-def set_init_device_flags():
-    if "vulkan" in args.device:
-        # set runtime flags for vulkan.
-        set_iree_runtime_flags()
-
-        # set triple flag to avoid multiple calls to get_vulkan_triple_flag
-        device_name, args.device = map_device_to_name_path(args.device)
-        if not args.iree_vulkan_target_triple:
-            triple = get_vulkan_target_triple(device_name)
-            if triple is not None:
-                args.iree_vulkan_target_triple = triple
-        print(
-            f"Found device {device_name}. Using target triple {args.iree_vulkan_target_triple}."
-        )
-    elif "cuda" in args.device:
-        args.device = "cuda"
-    elif "cpu" in args.device:
-        args.device = "cpu"
-
-    # set max_length based on availability.
-    if args.version == "v1_4":
-        args.max_length = 77
-    elif args.variant in ["anythingv3", "analogdiffusion", "dreamlike"]:
-        args.max_length = 77
-    elif args.variant == "openjourney":
-        args.max_length = 64
-
-    # use tuned models only in the case of stablediffusion/fp16 and rdna3 cards.
-    if (
-        args.variant in ["openjourney", "dreamlike"]
-        or args.precision != "fp16"
-        or args.version == "v1_4"
-        or "vulkan" not in args.device
-        or "rdna3" not in args.iree_vulkan_target_triple
-    ):
-        args.use_tuned = False
-        print("Tuned models are currently not supported for this setting.")
-
-    elif args.use_base_vae and args.variant != "stablediffusion":
-        args.use_tuned = False
-        print("Tuned models are currently not supported for this setting.")
-
-    if args.use_tuned:
-        print("Using tuned models for stablediffusion/fp16 and rdna3 card.")
-
-
-# Utility to get list of devices available.
-def get_available_devices():
-    def get_devices_by_name(driver_name):
-        from shark.iree_utils._common import iree_device_map
-
-        device_list = []
-        try:
-            driver_name = iree_device_map(driver_name)
-            device_list_dict = get_all_devices(driver_name)
-            print(f"{driver_name} devices are available.")
-        except:
-            print(f"{driver_name} devices are not available.")
-        else:
-            for i, device in enumerate(device_list_dict):
-                device_list.append(f"{device['name']} => {driver_name}://{i}")
-        return device_list
-
-    set_iree_runtime_flags()
-
-    available_devices = []
-    vulkan_devices = get_devices_by_name("vulkan")
-    available_devices.extend(vulkan_devices)
-    cuda_devices = get_devices_by_name("cuda")
-    available_devices.extend(cuda_devices)
-    #  available_devices.append("cpu")
-    return available_devices
-
-
-def disk_space_check(path, lim=20):
-    from shutil import disk_usage
-
-    du = disk_usage(path)
-    free = du.free / (1024 * 1024 * 1024)
-    if free <= lim:
-        print(f"[WARNING] Only {free:.2f}GB space available in {path}.")
--- a/web/shark_sd.spec
+++ b/web/shark_sd.spec
@@ -1,78 +0,0 @@
-# -*- mode: python ; coding: utf-8 -*-
-from PyInstaller.utils.hooks import collect_data_files
-from PyInstaller.utils.hooks import copy_metadata
-
-import sys ; sys.setrecursionlimit(sys.getrecursionlimit() * 5)
-
-datas = []
-datas += collect_data_files('torch')
-datas += copy_metadata('torch')
-datas += copy_metadata('tqdm')
-datas += copy_metadata('regex')
-datas += copy_metadata('requests')
-datas += copy_metadata('packaging')
-datas += copy_metadata('filelock')
-datas += copy_metadata('numpy')
-datas += copy_metadata('tokenizers')
-datas += copy_metadata('importlib_metadata')
-datas += copy_metadata('torchvision')
-datas += copy_metadata('torch-mlir')
-datas += copy_metadata('diffusers')
-datas += copy_metadata('transformers')
-datas += copy_metadata('omegaconf')
-datas += copy_metadata('safetensors')
-datas += collect_data_files('gradio')
-datas += collect_data_files('iree')
-datas += collect_data_files('google-cloud-storage')
-datas += collect_data_files('shark')
-datas += [
-         ( 'models/stable_diffusion/resources/prompts.json', 'resources' ),
-         ( 'models/stable_diffusion/resources/model_db.json', 'resources' ),
-         ( 'models/stable_diffusion/resources/model_config.json', 'resources' ),
-         ( 'models/stable_diffusion/logos/*', 'logos' )
-         ]
-datas += [('demo.css', '.')]
-
-binaries = []
-
-block_cipher = None
-
-
-a = Analysis(
-    ['index.py'],
-    pathex=['.'],
-    binaries=binaries,
-    datas=datas,
-    hiddenimports=['shark', 'shark.*', 'shark.shark_inference', 'shark_inference', 'iree.tools.core', 'gradio'],
-    hookspath=[],
-    hooksconfig={},
-    runtime_hooks=[],
-    excludes=[],
-    win_no_prefer_redirects=False,
-    win_private_assemblies=False,
-    cipher=block_cipher,
-    noarchive=False,
-)
-pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
-
-exe = EXE(
-    pyz,
-    a.scripts,
-    a.binaries,
-    a.zipfiles,
-    a.datas,
-    [],
-    name='shark_sd',
-    debug=False,
-    bootloader_ignore_signals=False,
-    strip=False,
-    upx=True,
-    upx_exclude=[],
-    runtime_tmpdir=None,
-    console=True,
-    disable_windowed_traceback=False,
-    argv_emulation=False,
-    target_arch=None,
-    codesign_identity=None,
-    entitlements_file=None,
-)