pin diffusers to e47459c (#1279 )

[SD] Add weight emphasis to prompts encoder (#1276 )
update model db to reflect changes (#1277 )
2026-04-20 03:00:34 -04:00 · 2023-04-04 18:29:21 -07:00 · 2023-04-04 09:47:04 -07:00 · 2023-04-04 11:46:55 -05:00 · 2023-04-03 22:30:09 -05:00 · 2023-04-03 18:36:23 -07:00
16 changed files with 819 additions and 399 deletions
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_img2img.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_img2img.py
@@ -154,8 +154,8 @@ class Image2ImagePipeline(StableDiffusionPipeline):
            seed = randint(uint32_min, uint32_max)
        generator = torch.manual_seed(seed)

-        # Get text embeddings from prompts
-        text_embeddings = self.encode_prompts(prompts, neg_prompts, max_length)
+        # Get text embeddings with weight emphasis from prompts
+        text_embeddings = self.encode_prompts_weight(prompts, neg_prompts)

        # guidance scale as a float32 tensor.
        guidance_scale = torch.tensor(guidance_scale).to(torch.float32)
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_inpaint.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_inpaint.py
@@ -406,8 +406,8 @@ class InpaintPipeline(StableDiffusionPipeline):
            dtype=dtype,
        )

-        # Get text embeddings from prompts
-        text_embeddings = self.encode_prompts(prompts, neg_prompts, max_length)
+        # Get text embeddings with weight emphasis from prompts
+        text_embeddings = self.encode_prompts_weight(prompts, neg_prompts)

        # guidance scale as a float32 tensor.
        guidance_scale = torch.tensor(guidance_scale).to(torch.float32)
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_outpaint.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_outpaint.py
@@ -407,8 +407,8 @@ class OutpaintPipeline(StableDiffusionPipeline):
            dtype=dtype,
        )

-        # Get text embeddings from prompts
-        text_embeddings = self.encode_prompts(prompts, neg_prompts, max_length)
+        # Get text embeddings with weight emphasis from prompts
+        text_embeddings = self.encode_prompts_weight(prompts, neg_prompts)

        # guidance scale as a float32 tensor.
        guidance_scale = torch.tensor(guidance_scale).to(torch.float32)
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_stencil.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_stencil.py
@@ -228,8 +228,8 @@ class StencilPipeline(StableDiffusionPipeline):
            seed = randint(uint32_min, uint32_max)
        generator = torch.manual_seed(seed)

-        # Get text embeddings from prompts
-        text_embeddings = self.encode_prompts(prompts, neg_prompts, max_length)
+        # Get text embeddings with weight emphasis from prompts
+        text_embeddings = self.encode_prompts_weight(prompts, neg_prompts)

        # guidance scale as a float32 tensor.
        guidance_scale = torch.tensor(guidance_scale).to(torch.float32)
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_txt2img.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_txt2img.py
@@ -1,5 +1,4 @@
 import torch
-from tqdm.auto import tqdm
 import numpy as np
 from random import randint
 from transformers import CLIPTokenizer
@@ -111,8 +110,8 @@ class Text2ImagePipeline(StableDiffusionPipeline):
            dtype=dtype,
        )

-        # Get text embeddings from prompts
-        text_embeddings = self.encode_prompts(prompts, neg_prompts, max_length)
+        # Get text embeddings with weight emphasis from prompts
+        text_embeddings = self.encode_prompts_weight(prompts, neg_prompts)

        # guidance scale as a float32 tensor.
        guidance_scale = torch.tensor(guidance_scale).to(torch.float32)
@@ -130,7 +129,7 @@ class Text2ImagePipeline(StableDiffusionPipeline):
        # Img latents -> PIL images
        all_imgs = []
        self.load_vae()
-        for i in tqdm(range(0, latents.shape[0], batch_size)):
+        for i in range(0, latents.shape[0], batch_size):
            imgs = self.decode_latents(
                latents=latents[i : i + batch_size],
                use_base_vae=use_base_vae,
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_upscaler.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_upscaler.py
@@ -255,8 +255,8 @@ class UpscalerPipeline(StableDiffusionPipeline):
            seed = randint(uint32_min, uint32_max)
        generator = torch.manual_seed(seed)

-        # Get text embeddings from prompts
-        text_embeddings = self.encode_prompts(prompts, neg_prompts, max_length)
+        # Get text embeddings with weight emphasis from prompts
+        text_embeddings = self.encode_prompts_weight(prompts, neg_prompts)

        # 4. Preprocess image
        image = preprocess(image).to(dtype)
--- a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_utils.py
+++ b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_utils.py
@@ -328,3 +328,503 @@ class StableDiffusionPipeline:
            )

        return cls(scheduler, sd_model, import_mlir, use_lora, ondemand)
+
+    # #####################################################
+    # Implements text embeddings with weights from prompts
+    # https://huggingface.co/AlanB/lpw_stable_diffusion_mod
+    # #####################################################
+    def encode_prompts_weight(
+        self,
+        prompt,
+        negative_prompt,
+        do_classifier_free_guidance=True,
+        max_embeddings_multiples=1,
+        num_images_per_prompt=1,
+    ):
+        r"""
+        Encodes the prompt into text encoder hidden states.
+        Args:
+            prompt (`str` or `list(int)`):
+                prompt to be encoded
+            negative_prompt (`str` or `List[str]`):
+                The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
+                if `guidance_scale` is less than `1`).
+            do_classifier_free_guidance (`bool`):
+                whether to use classifier free guidance or not,
+                SHARK: must be set to True as we always expect neg embeddings (defaulted to True)
+            max_embeddings_multiples (`int`, *optional*, defaults to `3`):
+                The max multiple length of prompt embeddings compared to the max output length of text encoder.
+                SHARK: max_embeddings_multiples>1 produce a tensor shape error (defaulted to 1)
+            num_images_per_prompt (`int`):
+                number of images that should be generated per prompt
+                SHARK: num_images_per_prompt is not used (defaulted to 1)
+        """
+
+        # SHARK: Load the clip and prepare inference time
+        self.load_clip()
+        clip_inf_start = time.time()
+
+        batch_size = len(prompt) if isinstance(prompt, list) else 1
+
+        if negative_prompt is None:
+            negative_prompt = [""] * batch_size
+        elif isinstance(negative_prompt, str):
+            negative_prompt = [negative_prompt] * batch_size
+        if batch_size != len(negative_prompt):
+            raise ValueError(
+                f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
+                f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
+                " the batch size of `prompt`."
+            )
+
+        text_embeddings, uncond_embeddings = get_weighted_text_embeddings(
+            pipe=self,
+            prompt=prompt,
+            uncond_prompt=negative_prompt
+            if do_classifier_free_guidance
+            else None,
+            max_embeddings_multiples=max_embeddings_multiples,
+        )
+        # SHARK: we are not using num_images_per_prompt
+        # bs_embed, seq_len, _ = text_embeddings.shape
+        # text_embeddings = text_embeddings.repeat(1, num_images_per_prompt, 1)
+        # text_embeddings = text_embeddings.view(bs_embed * num_images_per_prompt, seq_len, -1)
+
+        if do_classifier_free_guidance:
+            # SHARK: we are not using num_images_per_prompt
+            # bs_embed, seq_len, _ = uncond_embeddings.shape
+            # uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt, 1)
+            # uncond_embeddings = uncond_embeddings.view(bs_embed * num_images_per_prompt, seq_len, -1)
+            text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
+
+        # SHARK: Report clip inference time
+        clip_inf_time = (time.time() - clip_inf_start) * 1000
+        # self.unload_clip()
+        self.log += f"\nClip Inference time (ms) = {clip_inf_time:.3f}"
+
+        return text_embeddings.numpy()
+
+
+from typing import List, Optional, Union
+import re
+
+re_attention = re.compile(
+    r"""
+\\\(|
+\\\)|
+\\\[|
+\\]|
+\\\\|
+\\|
+\(|
+\[|
+:([+-]?[.\d]+)\)|
+\)|
+]|
+[^\\()\[\]:]+|
+:
+""",
+    re.X,
+)
+
+
+def parse_prompt_attention(text):
+    """
+    Parses a string with attention tokens and returns a list of pairs: text and its associated weight.
+    Accepted tokens are:
+      (abc) - increases attention to abc by a multiplier of 1.1
+      (abc:3.12) - increases attention to abc by a multiplier of 3.12
+      [abc] - decreases attention to abc by a multiplier of 1.1
+      \( - literal character '('
+      \[ - literal character '['
+      \) - literal character ')'
+      \] - literal character ']'
+      \\ - literal character '\'
+      anything else - just text
+    >>> parse_prompt_attention('normal text')
+    [['normal text', 1.0]]
+    >>> parse_prompt_attention('an (important) word')
+    [['an ', 1.0], ['important', 1.1], [' word', 1.0]]
+    >>> parse_prompt_attention('(unbalanced')
+    [['unbalanced', 1.1]]
+    >>> parse_prompt_attention('\(literal\]')
+    [['(literal]', 1.0]]
+    >>> parse_prompt_attention('(unnecessary)(parens)')
+    [['unnecessaryparens', 1.1]]
+    >>> parse_prompt_attention('a (((house:1.3)) [on] a (hill:0.5), sun, (((sky))).')
+    [['a ', 1.0],
+     ['house', 1.5730000000000004],
+     [' ', 1.1],
+     ['on', 1.0],
+     [' a ', 1.1],
+     ['hill', 0.55],
+     [', sun, ', 1.1],
+     ['sky', 1.4641000000000006],
+     ['.', 1.1]]
+    """
+
+    res = []
+    round_brackets = []
+    square_brackets = []
+
+    round_bracket_multiplier = 1.1
+    square_bracket_multiplier = 1 / 1.1
+
+    def multiply_range(start_position, multiplier):
+        for p in range(start_position, len(res)):
+            res[p][1] *= multiplier
+
+    for m in re_attention.finditer(text):
+        text = m.group(0)
+        weight = m.group(1)
+
+        if text.startswith("\\"):
+            res.append([text[1:], 1.0])
+        elif text == "(":
+            round_brackets.append(len(res))
+        elif text == "[":
+            square_brackets.append(len(res))
+        elif weight is not None and len(round_brackets) > 0:
+            multiply_range(round_brackets.pop(), float(weight))
+        elif text == ")" and len(round_brackets) > 0:
+            multiply_range(round_brackets.pop(), round_bracket_multiplier)
+        elif text == "]" and len(square_brackets) > 0:
+            multiply_range(square_brackets.pop(), square_bracket_multiplier)
+        else:
+            res.append([text, 1.0])
+
+    for pos in round_brackets:
+        multiply_range(pos, round_bracket_multiplier)
+
+    for pos in square_brackets:
+        multiply_range(pos, square_bracket_multiplier)
+
+    if len(res) == 0:
+        res = [["", 1.0]]
+
+    # merge runs of identical weights
+    i = 0
+    while i + 1 < len(res):
+        if res[i][1] == res[i + 1][1]:
+            res[i][0] += res[i + 1][0]
+            res.pop(i + 1)
+        else:
+            i += 1
+
+    return res
+
+
+def get_prompts_with_weights(
+    pipe: StableDiffusionPipeline, prompt: List[str], max_length: int
+):
+    r"""
+    Tokenize a list of prompts and return its tokens with weights of each token.
+    No padding, starting or ending token is included.
+    """
+    tokens = []
+    weights = []
+    truncated = False
+    for text in prompt:
+        texts_and_weights = parse_prompt_attention(text)
+        text_token = []
+        text_weight = []
+        for word, weight in texts_and_weights:
+            # tokenize and discard the starting and the ending token
+            token = pipe.tokenizer(word).input_ids[1:-1]
+            text_token += token
+            # copy the weight by length of token
+            text_weight += [weight] * len(token)
+            # stop if the text is too long (longer than truncation limit)
+            if len(text_token) > max_length:
+                truncated = True
+                break
+        # truncate
+        if len(text_token) > max_length:
+            truncated = True
+            text_token = text_token[:max_length]
+            text_weight = text_weight[:max_length]
+        tokens.append(text_token)
+        weights.append(text_weight)
+    if truncated:
+        print(
+            "Prompt was truncated. Try to shorten the prompt or increase max_embeddings_multiples"
+        )
+    return tokens, weights
+
+
+def pad_tokens_and_weights(
+    tokens,
+    weights,
+    max_length,
+    bos,
+    eos,
+    no_boseos_middle=True,
+    chunk_length=77,
+):
+    r"""
+    Pad the tokens (with starting and ending tokens) and weights (with 1.0) to max_length.
+    """
+    max_embeddings_multiples = (max_length - 2) // (chunk_length - 2)
+    weights_length = (
+        max_length
+        if no_boseos_middle
+        else max_embeddings_multiples * chunk_length
+    )
+    for i in range(len(tokens)):
+        tokens[i] = (
+            [bos] + tokens[i] + [eos] * (max_length - 1 - len(tokens[i]))
+        )
+        if no_boseos_middle:
+            weights[i] = (
+                [1.0] + weights[i] + [1.0] * (max_length - 1 - len(weights[i]))
+            )
+        else:
+            w = []
+            if len(weights[i]) == 0:
+                w = [1.0] * weights_length
+            else:
+                for j in range(max_embeddings_multiples):
+                    w.append(1.0)  # weight for starting token in this chunk
+                    w += weights[i][
+                        j
+                        * (chunk_length - 2) : min(
+                            len(weights[i]), (j + 1) * (chunk_length - 2)
+                        )
+                    ]
+                    w.append(1.0)  # weight for ending token in this chunk
+                w += [1.0] * (weights_length - len(w))
+            weights[i] = w[:]
+
+    return tokens, weights
+
+
+def get_unweighted_text_embeddings(
+    pipe: StableDiffusionPipeline,
+    text_input: torch.Tensor,
+    chunk_length: int,
+    no_boseos_middle: Optional[bool] = True,
+):
+    """
+    When the length of tokens is a multiple of the capacity of the text encoder,
+    it should be split into chunks and sent to the text encoder individually.
+    """
+    max_embeddings_multiples = (text_input.shape[1] - 2) // (chunk_length - 2)
+    if max_embeddings_multiples > 1:
+        text_embeddings = []
+        for i in range(max_embeddings_multiples):
+            # extract the i-th chunk
+            text_input_chunk = text_input[
+                :, i * (chunk_length - 2) : (i + 1) * (chunk_length - 2) + 2
+            ].clone()
+
+            # cover the head and the tail by the starting and the ending tokens
+            text_input_chunk[:, 0] = text_input[0, 0]
+            text_input_chunk[:, -1] = text_input[0, -1]
+            # text_embedding = pipe.text_encoder(text_input_chunk)[0]
+            # SHARK: deplicate the text_input as Shark runner expects tokens and neg tokens
+            formatted_text_input_chunk = torch.cat(
+                [text_input_chunk, text_input_chunk]
+            )
+            text_embedding = pipe.text_encoder(
+                "forward", (formatted_text_input_chunk,)
+            )[0]
+
+            if no_boseos_middle:
+                if i == 0:
+                    # discard the ending token
+                    text_embedding = text_embedding[:, :-1]
+                elif i == max_embeddings_multiples - 1:
+                    # discard the starting token
+                    text_embedding = text_embedding[:, 1:]
+                else:
+                    # discard both starting and ending tokens
+                    text_embedding = text_embedding[:, 1:-1]
+
+            text_embeddings.append(text_embedding)
+        # SHARK: Convert the result to tensor
+        # text_embeddings = torch.concat(text_embeddings, axis=1)
+        text_embeddings_np = np.concatenate(np.array(text_embeddings))
+        text_embeddings = torch.from_numpy(text_embeddings_np)[None, :]
+    else:
+        # SHARK: deplicate the text_input as Shark runner expects tokens and neg tokens
+        # Convert the result to tensor
+        # text_embeddings = pipe.text_encoder(text_input)[0]
+        formatted_text_input = torch.cat([text_input, text_input])
+        text_embeddings = pipe.text_encoder(
+            "forward", (formatted_text_input,)
+        )[0]
+        text_embeddings = torch.from_numpy(text_embeddings)[None, :]
+    return text_embeddings
+
+
+def get_weighted_text_embeddings(
+    pipe: StableDiffusionPipeline,
+    prompt: Union[str, List[str]],
+    uncond_prompt: Optional[Union[str, List[str]]] = None,
+    max_embeddings_multiples: Optional[int] = 3,
+    no_boseos_middle: Optional[bool] = False,
+    skip_parsing: Optional[bool] = False,
+    skip_weighting: Optional[bool] = False,
+):
+    r"""
+    Prompts can be assigned with local weights using brackets. For example,
+    prompt 'A (very beautiful) masterpiece' highlights the words 'very beautiful',
+    and the embedding tokens corresponding to the words get multiplied by a constant, 1.1.
+    Also, to regularize of the embedding, the weighted embedding would be scaled to preserve the original mean.
+    Args:
+        pipe (`StableDiffusionPipeline`):
+            Pipe to provide access to the tokenizer and the text encoder.
+        prompt (`str` or `List[str]`):
+            The prompt or prompts to guide the image generation.
+        uncond_prompt (`str` or `List[str]`):
+            The unconditional prompt or prompts for guide the image generation. If unconditional prompt
+            is provided, the embeddings of prompt and uncond_prompt are concatenated.
+        max_embeddings_multiples (`int`, *optional*, defaults to `3`):
+            The max multiple length of prompt embeddings compared to the max output length of text encoder.
+        no_boseos_middle (`bool`, *optional*, defaults to `False`):
+            If the length of text token is multiples of the capacity of text encoder, whether reserve the starting and
+            ending token in each of the chunk in the middle.
+        skip_parsing (`bool`, *optional*, defaults to `False`):
+            Skip the parsing of brackets.
+        skip_weighting (`bool`, *optional*, defaults to `False`):
+            Skip the weighting. When the parsing is skipped, it is forced True.
+    """
+    max_length = (
+        pipe.tokenizer.model_max_length - 2
+    ) * max_embeddings_multiples + 2
+    if isinstance(prompt, str):
+        prompt = [prompt]
+
+    if not skip_parsing:
+        prompt_tokens, prompt_weights = get_prompts_with_weights(
+            pipe, prompt, max_length - 2
+        )
+        if uncond_prompt is not None:
+            if isinstance(uncond_prompt, str):
+                uncond_prompt = [uncond_prompt]
+            uncond_tokens, uncond_weights = get_prompts_with_weights(
+                pipe, uncond_prompt, max_length - 2
+            )
+    else:
+        prompt_tokens = [
+            token[1:-1]
+            for token in pipe.tokenizer(
+                prompt, max_length=max_length, truncation=True
+            ).input_ids
+        ]
+        prompt_weights = [[1.0] * len(token) for token in prompt_tokens]
+        if uncond_prompt is not None:
+            if isinstance(uncond_prompt, str):
+                uncond_prompt = [uncond_prompt]
+            uncond_tokens = [
+                token[1:-1]
+                for token in pipe.tokenizer(
+                    uncond_prompt, max_length=max_length, truncation=True
+                ).input_ids
+            ]
+            uncond_weights = [[1.0] * len(token) for token in uncond_tokens]
+
+    # round up the longest length of tokens to a multiple of (model_max_length - 2)
+    max_length = max([len(token) for token in prompt_tokens])
+    if uncond_prompt is not None:
+        max_length = max(
+            max_length, max([len(token) for token in uncond_tokens])
+        )
+
+    max_embeddings_multiples = min(
+        max_embeddings_multiples,
+        (max_length - 1) // (pipe.tokenizer.model_max_length - 2) + 1,
+    )
+    max_embeddings_multiples = max(1, max_embeddings_multiples)
+    max_length = (
+        pipe.tokenizer.model_max_length - 2
+    ) * max_embeddings_multiples + 2
+
+    # pad the length of tokens and weights
+    bos = pipe.tokenizer.bos_token_id
+    eos = pipe.tokenizer.eos_token_id
+    prompt_tokens, prompt_weights = pad_tokens_and_weights(
+        prompt_tokens,
+        prompt_weights,
+        max_length,
+        bos,
+        eos,
+        no_boseos_middle=no_boseos_middle,
+        chunk_length=pipe.tokenizer.model_max_length,
+    )
+    # prompt_tokens = torch.tensor(prompt_tokens, dtype=torch.long, device=pipe.device)
+    prompt_tokens = torch.tensor(prompt_tokens, dtype=torch.long, device="cpu")
+    if uncond_prompt is not None:
+        uncond_tokens, uncond_weights = pad_tokens_and_weights(
+            uncond_tokens,
+            uncond_weights,
+            max_length,
+            bos,
+            eos,
+            no_boseos_middle=no_boseos_middle,
+            chunk_length=pipe.tokenizer.model_max_length,
+        )
+        # uncond_tokens = torch.tensor(uncond_tokens, dtype=torch.long, device=pipe.device)
+        uncond_tokens = torch.tensor(
+            uncond_tokens, dtype=torch.long, device="cpu"
+        )
+
+    # get the embeddings
+    text_embeddings = get_unweighted_text_embeddings(
+        pipe,
+        prompt_tokens,
+        pipe.tokenizer.model_max_length,
+        no_boseos_middle=no_boseos_middle,
+    )
+    # prompt_weights = torch.tensor(prompt_weights, dtype=text_embeddings.dtype, device=pipe.device)
+    prompt_weights = torch.tensor(
+        prompt_weights, dtype=torch.float, device="cpu"
+    )
+    if uncond_prompt is not None:
+        uncond_embeddings = get_unweighted_text_embeddings(
+            pipe,
+            uncond_tokens,
+            pipe.tokenizer.model_max_length,
+            no_boseos_middle=no_boseos_middle,
+        )
+        # uncond_weights = torch.tensor(uncond_weights, dtype=uncond_embeddings.dtype, device=pipe.device)
+        uncond_weights = torch.tensor(
+            uncond_weights, dtype=torch.float, device="cpu"
+        )
+
+    # assign weights to the prompts and normalize in the sense of mean
+    # TODO: should we normalize by chunk or in a whole (current implementation)?
+    if (not skip_parsing) and (not skip_weighting):
+        previous_mean = (
+            text_embeddings.float()
+            .mean(axis=[-2, -1])
+            .to(text_embeddings.dtype)
+        )
+        text_embeddings *= prompt_weights.unsqueeze(-1)
+        current_mean = (
+            text_embeddings.float()
+            .mean(axis=[-2, -1])
+            .to(text_embeddings.dtype)
+        )
+        text_embeddings *= (
+            (previous_mean / current_mean).unsqueeze(-1).unsqueeze(-1)
+        )
+        if uncond_prompt is not None:
+            previous_mean = (
+                uncond_embeddings.float()
+                .mean(axis=[-2, -1])
+                .to(uncond_embeddings.dtype)
+            )
+            uncond_embeddings *= uncond_weights.unsqueeze(-1)
+            current_mean = (
+                uncond_embeddings.float()
+                .mean(axis=[-2, -1])
+                .to(uncond_embeddings.dtype)
+            )
+            uncond_embeddings *= (
+                (previous_mean / current_mean).unsqueeze(-1).unsqueeze(-1)
+            )
+
+    if uncond_prompt is not None:
+        return text_embeddings, uncond_embeddings
+    return text_embeddings, None
--- a/apps/stable_diffusion/src/utils/resources/model_db.json
+++ b/apps/stable_diffusion/src/utils/resources/model_db.json
@@ -1,85 +1,19 @@
 [
  {
-    "stablediffusion/untuned":"gs://shark_tank/sd_untuned",
-    "stablediffusion/tuned":"gs://shark_tank/sd_tuned",
-    "stablediffusion/tuned/cuda":"gs://shark_tank/sd_tuned/cuda",
-    "anythingv3/untuned":"gs://shark_tank/sd_anythingv3",
-    "anythingv3/tuned":"gs://shark_tank/sd_tuned",
-    "anythingv3/tuned/cuda":"gs://shark_tank/sd_tuned/cuda",
-    "analogdiffusion/untuned":"gs://shark_tank/sd_analog_diffusion",
-    "analogdiffusion/tuned":"gs://shark_tank/sd_tuned",
-    "analogdiffusion/tuned/cuda":"gs://shark_tank/sd_tuned/cuda",
-    "openjourney/untuned":"gs://shark_tank/sd_openjourney",
-    "openjourney/tuned":"gs://shark_tank/sd_tuned",
-    "dreamlike/untuned":"gs://shark_tank/sd_dreamlike_diffusion"
+    "stablediffusion/untuned":"gs://shark_tank/nightly"
  },
  {
-    "stablediffusion/v1_4/unet/fp16/length_77/untuned":"unet_8dec_fp16",
-    "stablediffusion/v1_4/unet/fp16/length_77/tuned":"unet_8dec_fp16_tuned",
-    "stablediffusion/v1_4/unet/fp16/length_77/tuned/cuda":"unet_8dec_fp16_cuda_tuned",
-    "stablediffusion/v1_4/unet/fp32/length_77/untuned":"unet_1dec_fp32",
-    "stablediffusion/v1_4/unet/fp32/length_64/untuned":"unet_1_64_512_512_fp32_CompVis_stable_diffusion_v1_4",
-    "stablediffusion/v1_4/vae/fp16/length_77/untuned":"vae_19dec_fp16",
-    "stablediffusion/v1_4/vae/fp16/length_77/tuned":"vae_19dec_fp16_tuned",
-    "stablediffusion/v1_4/vae/fp16/length_77/tuned/cuda":"vae_19dec_fp16_cuda_tuned",
-    "stablediffusion/v1_4/vae/fp16/length_77/untuned/base":"vae_8dec_fp16",
-    "stablediffusion/v1_4/vae/fp32/length_77/untuned":"vae_1_64_512_512_fp32_CompVis_stable_diffusion_v1_4",
-    "stablediffusion/v1_4/vae/fp32/length_64/untuned":"vae_1_64_512_512_fp32_CompVis_stable_diffusion_v1_4",
-    "stablediffusion/v1_4/clip/fp32/length_77/untuned":"clip_18dec_fp32",
-    "stablediffusion/v1_4/clip/fp32/length_64/untuned":"clip_1_64_512_512_fp32_CompVis_stable_diffusion_v1_4",
-    "stablediffusion/v2_1base/unet/fp16/length_77/untuned":"unet77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/unet/fp16/length_77/tuned":"unet2base_8dec_fp16_tuned_v2",
-    "stablediffusion/v2_1base/unet/fp16/length_77/tuned/cuda":"unet2base_8dec_fp16_cuda_tuned",
-    "stablediffusion/v2_1base/unet/fp16/length_64/untuned":"unet64_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/unet/fp16/length_64/tuned":"unet_19dec_v2p1base_fp16_64_tuned",
-    "stablediffusion/v2_1base/unet/fp16/length_64/tuned/cuda":"unet_19dec_v2p1base_fp16_64_cuda_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/untuned":"vae77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned":"vae2base_19dec_fp16_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/cuda":"vae2base_19dec_fp16_cuda_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/untuned/base":"vae2base_8dec_fp16",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/base":"vae2base_8dec_fp16_tuned",
-    "stablediffusion/v2_1base/vae/fp16/length_77/tuned/base/cuda":"vae2base_8dec_fp16_cuda_tuned",
-    "stablediffusion/v2_1base/clip/fp32/length_77/untuned":"clip77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1base/clip/fp32/length_64/untuned":"clip64_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1/unet/fp16/length_77/untuned":"unet77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1/vae/fp16/length_77/untuned":"vae77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "stablediffusion/v2_1/vae/fp16/length_77/untuned/base":"vae2_8dec_fp16",
-    "stablediffusion/v2_1/clip/fp32/length_77/untuned":"clip77_512_512_fp16_stabilityai_stable_diffusion_2_1_base",
-    "anythingv3/v1_4/unet/fp16/length_77/untuned":"av3_unet_19dec_fp16",
-    "anythingv3/v1_4/unet/fp16/length_77/tuned":"av3_unet_19dec_fp16_tuned",
-    "anythingv3/v1_4/unet/fp16/length_77/tuned/cuda":"av3_unet_19dec_fp16_cuda_tuned",
-    "anythingv3/v1_4/unet/fp32/length_77/untuned":"av3_unet_19dec_fp32",
-    "anythingv3/v1_4/vae/fp16/length_77/untuned":"av3_vae_19dec_fp16",
-    "anythingv3/v1_4/vae/fp16/length_77/tuned":"av3_vae_19dec_fp16_tuned",
-    "anythingv3/v1_4/vae/fp16/length_77/tuned/cuda":"av3_vae_19dec_fp16_cuda_tuned",
-    "anythingv3/v1_4/vae/fp16/length_77/untuned/base":"av3_vaebase_22dec_fp16",
-    "anythingv3/v1_4/vae/fp32/length_77/untuned":"av3_vae_19dec_fp32",
-    "anythingv3/v1_4/vae/fp32/length_77/untuned/base":"av3_vaebase_22dec_fp32",
-    "anythingv3/v1_4/clip/fp32/length_77/untuned":"av3_clip_19dec_fp32",
-    "analogdiffusion/v1_4/unet/fp16/length_77/untuned":"ad_unet_19dec_fp16",
-    "analogdiffusion/v1_4/unet/fp16/length_77/tuned":"ad_unet_19dec_fp16_tuned",
-    "analogdiffusion/v1_4/unet/fp16/length_77/tuned/cuda":"ad_unet_19dec_fp16_cuda_tuned",
-    "analogdiffusion/v1_4/unet/fp32/length_77/untuned":"ad_unet_19dec_fp32",
-    "analogdiffusion/v1_4/vae/fp16/length_77/untuned":"ad_vae_19dec_fp16",
-    "analogdiffusion/v1_4/vae/fp16/length_77/tuned":"ad_vae_19dec_fp16_tuned",
-    "analogdiffusion/v1_4/vae/fp16/length_77/tuned/cuda":"ad_vae_19dec_fp16_cuda_tuned",
-    "analogdiffusion/v1_4/vae/fp16/length_77/untuned/base":"ad_vaebase_22dec_fp16",
-    "analogdiffusion/v1_4/vae/fp32/length_77/untuned":"ad_vae_19dec_fp32",
-    "analogdiffusion/v1_4/vae/fp32/length_77/untuned/base":"ad_vaebase_22dec_fp32",
-    "analogdiffusion/v1_4/clip/fp32/length_77/untuned":"ad_clip_19dec_fp32",
-    "openjourney/v1_4/unet/fp16/length_64/untuned":"oj_unet_22dec_fp16_64",
-    "openjourney/v1_4/unet/fp32/length_64/untuned":"oj_unet_22dec_fp32_64",
-    "openjourney/v1_4/vae/fp16/length_77/untuned":"oj_vae_22dec_fp16",
-    "openjourney/v1_4/vae/fp16/length_77/untuned/base":"oj_vaebase_22dec_fp16",
-    "openjourney/v1_4/vae/fp32/length_77/untuned":"oj_vae_22dec_fp32",
-    "openjourney/v1_4/vae/fp32/length_77/untuned/base":"oj_vaebase_22dec_fp32",
-    "openjourney/v1_4/clip/fp32/length_64/untuned":"oj_clip_22dec_fp32_64",
-    "dreamlike/v1_4/unet/fp16/length_77/untuned":"dl_unet_23dec_fp16_77",
-    "dreamlike/v1_4/unet/fp32/length_77/untuned":"dl_unet_23dec_fp32_77",
-    "dreamlike/v1_4/vae/fp16/length_77/untuned":"dl_vae_23dec_fp16",
-    "dreamlike/v1_4/vae/fp16/length_77/untuned/base":"dl_vaebase_23dec_fp16",
-    "dreamlike/v1_4/vae/fp32/length_77/untuned":"dl_vae_23dec_fp32",
-    "dreamlike/v1_4/vae/fp32/length_77/untuned/base":"dl_vaebase_23dec_fp32",
-    "dreamlike/v1_4/clip/fp32/length_77/untuned":"dl_clip_23dec_fp32_77"
+    "stablediffusion/v1_4/unet/fp16/length_64/untuned":"unet_1_64_512_512_fp16_stable-diffusion-2-1-base_vulkan",
+    "stablediffusion/v1_4/vae/fp16/length_77/untuned":"vae_1_64_512_512_fp16_stable-diffusion-v1-4_vulkan",
+    "stablediffusion/v1_4/vae/fp16/length_64/untuned":"vae_1_64_512_512_fp16_stable-diffusion-v1-4_vulkan",
+    "stablediffusion/v1_4/clip/fp32/length_64/untuned":"clip_1_64_512_512_fp16_stable-diffusion-v1-4_vulkan",
+    "stablediffusion/v2_1base/unet/fp16/length_77/untuned":"unet_1_77_512_512_fp16_stable-diffusion-2-1-base_vulkan",
+    "stablediffusion/v2_1base/unet/fp16/length_64/untuned":"unet_1_64_512_512_fp16_stable-diffusion-2-1-base_vulkan",
+    "stablediffusion/v2_1base/vae/fp16/length_77/untuned":"vae_1_64_512_512_fp16_stable-diffusion-2-1-base_vulkan",
+    "stablediffusion/v2_1base/clip/fp32/length_77/untuned":"clip_1_77_512_512_fp16_stable-diffusion-2-1-base_vulkan",
+    "stablediffusion/v2_1base/clip/fp32/length_64/untuned":"clip_1_64_512_512_fp16_stable-diffusion-2-1-base_vulkan",
+    "stablediffusion/v2_1/unet/fp16/length_77/untuned":"unet_1_77_512_512_fp16_stable-diffusion-2-1-base_vulkan",
+    "stablediffusion/v2_1/vae/fp16/length_77/untuned":"vae_1_64_512_512_fp16_stable-diffusion-2-1-base_vulkan",
+    "stablediffusion/v2_1/clip/fp32/length_77/untuned":"clip_1_64_512_512_fp16_stable-diffusion-2-1-base_vulkan"
  }
 ]
--- a/apps/stable_diffusion/web/index.py
+++ b/apps/stable_diffusion/web/index.py
@@ -45,7 +45,12 @@ if __name__ == "__main__":
    dark_theme = resource_path("ui/css/sd_dark_theme.css")

    from apps.stable_diffusion.web.ui import (
-        get_txt2img_web,
+        txt2img_web,
+        txt2img_gallery,
+        txt2img_sendto_img2img,
+        txt2img_sendto_inpaint,
+        txt2img_sendto_outpaint,
+        txt2img_sendto_upscaler,
        img2img_web,
        img2img_gallery,
        img2img_init_image,
@@ -91,14 +96,6 @@ if __name__ == "__main__":
    ) as sd_web:
        with gr.Tabs() as tabs:
            with gr.TabItem(label="Text-to-Image", id=0):
-                (
-                    txt2img_web,
-                    txt2img_gallery,
-                    txt2img_sendto_img2img,
-                    txt2img_sendto_inpaint,
-                    txt2img_sendto_outpaint,
-                    txt2img_sendto_upscaler,
-                ) = get_txt2img_web()
                txt2img_web.render()
            with gr.TabItem(label="Image-to-Image", id=1):
                img2img_web.render()
--- a/apps/stable_diffusion/web/ui/init.py
+++ b/apps/stable_diffusion/web/ui/init.py
@@ -1,6 +1,11 @@
 from apps.stable_diffusion.web.ui.txt2img_ui import (
    txt2img_inf,
-    get_txt2img_web,
+    txt2img_web,
+    txt2img_gallery,
+    txt2img_sendto_img2img,
+    txt2img_sendto_inpaint,
+    txt2img_sendto_outpaint,
+    txt2img_sendto_upscaler,
 )
 from apps.stable_diffusion.web.ui.img2img_ui import (
    img2img_web,
--- a/apps/stable_diffusion/web/ui/txt2img_ui.py
+++ b/apps/stable_diffusion/web/ui/txt2img_ui.py
@@ -4,6 +4,15 @@ import torch
 import time
 import gradio as gr
 from PIL import Image
+from apps.stable_diffusion.web.ui.utils import (
+    available_devices,
+    nodlogo_loc,
+    get_custom_model_path,
+    get_custom_model_files,
+    scheduler_list_txt2img,
+    predefined_models,
+    cancel_sd,
+)
 from apps.stable_diffusion.src import (
    args,
    Text2ImagePipeline,
@@ -184,306 +193,280 @@ def txt2img_inf(
    return generated_imgs, text_output


-def get_txt2img_web():
-    from apps.stable_diffusion.web.ui.utils import (
-        available_devices,
-        nodlogo_loc,
-        get_custom_model_path,
-        get_custom_model_files,
-        scheduler_list_txt2img,
-        predefined_models,
-        cancel_sd,
-    )
-
-    with gr.Blocks(title="Text-to-Image") as txt2img_web:
-        with gr.Row(elem_id="ui_title"):
-            nod_logo = Image.open(nodlogo_loc)
-            with gr.Row():
-                with gr.Column(scale=1, elem_id="demo_title_outer"):
-                    gr.Image(
-                        value=nod_logo,
-                        show_label=False,
-                        interactive=False,
-                        elem_id="top_logo",
-                    ).style(width=150, height=50)
-        with gr.Row(elem_id="ui_body"):
-            with gr.Row():
-                with gr.Column(scale=1, min_width=600):
-                    with gr.Row():
-                        with gr.Column(scale=10):
-                            with gr.Row():
-                                custom_model = gr.Dropdown(
-                                    label=f"Models (Custom Model path: {get_custom_model_path()})",
-                                    elem_id="custom_model",
-                                    value=os.path.basename(args.ckpt_loc)
-                                    if args.ckpt_loc
-                                    else "None",
-                                    choices=["None"]
-                                    + get_custom_model_files()
-                                    + predefined_models,
-                                )
-                                hf_model_id = gr.Textbox(
-                                    elem_id="hf_model_id",
-                                    placeholder="Select 'None' in the Models dropdown on the left and enter model ID here e.g: SG161222/Realistic_Vision_V1.3",
-                                    value="",
-                                    label="HuggingFace Model ID",
-                                    lines=3,
-                                )
-                        with gr.Column(scale=1, min_width=170):
-                            png_info_img = gr.Image(
-                                label="Import PNG info",
-                                elem_id="txt2img_prompt_image",
-                                type="pil",
-                                tool="None",
-                                visible=True,
-                            )
-
-                    with gr.Group(elem_id="prompt_box_outer"):
-                        prompt = gr.Textbox(
-                            label="Prompt",
-                            value=args.prompts[0],
-                            lines=1,
-                            elem_id="prompt_box",
-                        )
-                        negative_prompt = gr.Textbox(
-                            label="Negative Prompt",
-                            value=args.negative_prompts[0],
-                            lines=1,
-                            elem_id="negative_prompt_box",
-                        )
-                    with gr.Accordion(label="LoRA Options", open=False):
+with gr.Blocks(title="Text-to-Image") as txt2img_web:
+    with gr.Row(elem_id="ui_title"):
+        nod_logo = Image.open(nodlogo_loc)
+        with gr.Row():
+            with gr.Column(scale=1, elem_id="demo_title_outer"):
+                gr.Image(
+                    value=nod_logo,
+                    show_label=False,
+                    interactive=False,
+                    elem_id="top_logo",
+                ).style(width=150, height=50)
+    with gr.Row(elem_id="ui_body"):
+        with gr.Row():
+            with gr.Column(scale=1, min_width=600):
+                with gr.Row():
+                    with gr.Column(scale=10):
                        with gr.Row():
-                            lora_weights = gr.Dropdown(
-                                label=f"Standlone LoRA weights (Path: {get_custom_model_path('lora')})",
-                                elem_id="lora_weights",
-                                value="None",
+                            custom_model = gr.Dropdown(
+                                label=f"Models (Custom Model path: {get_custom_model_path()})",
+                                elem_id="custom_model",
+                                value=os.path.basename(args.ckpt_loc)
+                                if args.ckpt_loc
+                                else "None",
                                choices=["None"]
-                                + get_custom_model_files("lora"),
+                                + get_custom_model_files()
+                                + predefined_models,
                            )
-                            lora_hf_id = gr.Textbox(
-                                elem_id="lora_hf_id",
-                                placeholder="Select 'None' in the Standlone LoRA weights dropdown on the left if you want to use a standalone HuggingFace model ID for LoRA here e.g: sayakpaul/sd-model-finetuned-lora-t4",
+                            hf_model_id = gr.Textbox(
+                                elem_id="hf_model_id",
+                                placeholder="Select 'None' in the Models dropdown on the left and enter model ID here e.g: SG161222/Realistic_Vision_V1.3",
                                value="",
                                label="HuggingFace Model ID",
                                lines=3,
                            )
-                    with gr.Accordion(label="Advanced Options", open=False):
-                        with gr.Row():
-                            scheduler = gr.Dropdown(
-                                elem_id="scheduler",
-                                label="Scheduler",
-                                value=args.scheduler,
-                                choices=scheduler_list_txt2img,
-                            )
-                            with gr.Group():
-                                save_metadata_to_png = gr.Checkbox(
-                                    label="Save prompt information to PNG",
-                                    value=args.write_metadata_to_png,
-                                    interactive=True,
-                                )
-                                save_metadata_to_json = gr.Checkbox(
-                                    label="Save prompt information to JSON file",
-                                    value=args.save_metadata_to_json,
-                                    interactive=True,
-                                )
-                        with gr.Row():
-                            height = gr.Slider(
-                                384,
-                                768,
-                                value=args.height,
-                                step=8,
-                                label="Height",
-                            )
-                            width = gr.Slider(
-                                384,
-                                768,
-                                value=args.width,
-                                step=8,
-                                label="Width",
-                            )
-                            precision = gr.Radio(
-                                label="Precision",
-                                value=args.precision,
-                                choices=[
-                                    "fp16",
-                                    "fp32",
-                                ],
-                                visible=False,
-                            )
-                            max_length = gr.Radio(
-                                label="Max Length",
-                                value=args.max_length,
-                                choices=[
-                                    64,
-                                    77,
-                                ],
-                                visible=False,
-                            )
-                        with gr.Row():
-                            steps = gr.Slider(
-                                1, 100, value=args.steps, step=1, label="Steps"
-                            )
-                            guidance_scale = gr.Slider(
-                                0,
-                                50,
-                                value=args.guidance_scale,
-                                step=0.1,
-                                label="CFG Scale",
-                            )
-                        with gr.Row():
-                            with gr.Column(scale=3):
-                                batch_count = gr.Slider(
-                                    1,
-                                    100,
-                                    value=args.batch_count,
-                                    step=1,
-                                    label="Batch Count",
-                                    interactive=True,
-                                )
-                            with gr.Column(scale=3):
-                                batch_size = gr.Slider(
-                                    1,
-                                    4,
-                                    value=args.batch_size,
-                                    step=1,
-                                    label="Batch Size",
-                                    interactive=True,
-                                )
-                            stop_batch = gr.Button("Stop Batch")
-                    with gr.Row():
-                        seed = gr.Number(
-                            value=args.seed, precision=0, label="Seed"
-                        )
-                        device = gr.Dropdown(
-                            elem_id="device",
-                            label="Device",
-                            value=available_devices[0],
-                            choices=available_devices,
-                        )
-                    with gr.Row():
-                        with gr.Column(scale=2):
-                            random_seed = gr.Button("Randomize Seed")
-                            random_seed.click(
-                                None,
-                                inputs=[],
-                                outputs=[seed],
-                                _js="() => -1",
-                            )
-                        with gr.Column(scale=6):
-                            stable_diffusion = gr.Button("Generate Image(s)")
-
-                    with gr.Accordion(label="Prompt Examples!", open=False):
-                        ex = gr.Examples(
-                            examples=prompt_examples,
-                            inputs=prompt,
-                            cache_examples=False,
-                            elem_id="prompt_examples",
+                    with gr.Column(scale=1, min_width=170):
+                        png_info_img = gr.Image(
+                            label="Import PNG info",
+                            elem_id="txt2img_prompt_image",
+                            type="pil",
+                            tool="None",
+                            visible=True,
                        )

-                with gr.Column(scale=1, min_width=600):
-                    with gr.Group():
-                        txt2img_gallery = gr.Gallery(
-                            label="Generated images",
-                            show_label=False,
-                            elem_id="gallery",
-                        ).style(grid=[2])
-                        std_output = gr.Textbox(
-                            value="Nothing to show.",
-                            lines=1,
-                            show_label=False,
+                with gr.Group(elem_id="prompt_box_outer"):
+                    prompt = gr.Textbox(
+                        label="Prompt",
+                        value=args.prompts[0],
+                        lines=1,
+                        elem_id="prompt_box",
+                    )
+                    negative_prompt = gr.Textbox(
+                        label="Negative Prompt",
+                        value=args.negative_prompts[0],
+                        lines=1,
+                        elem_id="negative_prompt_box",
+                    )
+                with gr.Accordion(label="LoRA Options", open=False):
+                    with gr.Row():
+                        lora_weights = gr.Dropdown(
+                            label=f"Standlone LoRA weights (Path: {get_custom_model_path('lora')})",
+                            elem_id="lora_weights",
+                            value="None",
+                            choices=["None"] + get_custom_model_files("lora"),
+                        )
+                        lora_hf_id = gr.Textbox(
+                            elem_id="lora_hf_id",
+                            placeholder="Select 'None' in the Standlone LoRA weights dropdown on the left if you want to use a standalone HuggingFace model ID for LoRA here e.g: sayakpaul/sd-model-finetuned-lora-t4",
+                            value="",
+                            label="HuggingFace Model ID",
+                            lines=3,
+                        )
+                with gr.Accordion(label="Advanced Options", open=False):
+                    with gr.Row():
+                        scheduler = gr.Dropdown(
+                            elem_id="scheduler",
+                            label="Scheduler",
+                            value=args.scheduler,
+                            choices=scheduler_list_txt2img,
+                        )
+                        with gr.Group():
+                            save_metadata_to_png = gr.Checkbox(
+                                label="Save prompt information to PNG",
+                                value=args.write_metadata_to_png,
+                                interactive=True,
+                            )
+                            save_metadata_to_json = gr.Checkbox(
+                                label="Save prompt information to JSON file",
+                                value=args.save_metadata_to_json,
+                                interactive=True,
+                            )
+                    with gr.Row():
+                        height = gr.Slider(
+                            384,
+                            768,
+                            value=args.height,
+                            step=8,
+                            label="Height",
+                        )
+                        width = gr.Slider(
+                            384,
+                            768,
+                            value=args.width,
+                            step=8,
+                            label="Width",
+                        )
+                        precision = gr.Radio(
+                            label="Precision",
+                            value=args.precision,
+                            choices=[
+                                "fp16",
+                                "fp32",
+                            ],
+                            visible=False,
+                        )
+                        max_length = gr.Radio(
+                            label="Max Length",
+                            value=args.max_length,
+                            choices=[
+                                64,
+                                77,
+                            ],
+                            visible=False,
+                        )
+                    with gr.Row():
+                        steps = gr.Slider(
+                            1, 100, value=args.steps, step=1, label="Steps"
+                        )
+                        guidance_scale = gr.Slider(
+                            0,
+                            50,
+                            value=args.guidance_scale,
+                            step=0.1,
+                            label="CFG Scale",
                        )
                        ondemand = gr.Checkbox(
                            value=args.ondemand,
                            label="Low VRAM",
                            interactive=True,
                        )
-                    output_dir = (
-                        args.output_dir if args.output_dir else Path.cwd()
-                    )
-                    output_dir = Path(output_dir, "generated_imgs")
-                    output_loc = gr.Textbox(
-                        label="Saving Images at",
-                        value=output_dir,
-                        interactive=False,
-                    )
                    with gr.Row():
-                        txt2img_sendto_img2img = gr.Button(
-                            value="SendTo Img2Img"
-                        )
-                        txt2img_sendto_inpaint = gr.Button(
-                            value="SendTo Inpaint"
-                        )
-                        txt2img_sendto_outpaint = gr.Button(
-                            value="SendTo Outpaint"
-                        )
-                        txt2img_sendto_upscaler = gr.Button(
-                            value="SendTo Upscaler"
+                        with gr.Column(scale=3):
+                            batch_count = gr.Slider(
+                                1,
+                                100,
+                                value=args.batch_count,
+                                step=1,
+                                label="Batch Count",
+                                interactive=True,
+                            )
+                        with gr.Column(scale=3):
+                            batch_size = gr.Slider(
+                                1,
+                                4,
+                                value=args.batch_size,
+                                step=1,
+                                label="Batch Size",
+                                interactive=True,
+                            )
+                        stop_batch = gr.Button("Stop Batch")
+                with gr.Row():
+                    seed = gr.Number(
+                        value=args.seed, precision=0, label="Seed"
+                    )
+                    device = gr.Dropdown(
+                        elem_id="device",
+                        label="Device",
+                        value=available_devices[0],
+                        choices=available_devices,
+                    )
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        random_seed = gr.Button("Randomize Seed")
+                        random_seed.click(
+                            None,
+                            inputs=[],
+                            outputs=[seed],
+                            _js="() => -1",
                        )
+                    with gr.Column(scale=6):
+                        stable_diffusion = gr.Button("Generate Image(s)")

-            kwargs = dict(
-                fn=txt2img_inf,
-                inputs=[
-                    prompt,
-                    negative_prompt,
-                    height,
-                    width,
-                    steps,
-                    guidance_scale,
-                    seed,
-                    batch_count,
-                    batch_size,
-                    scheduler,
-                    custom_model,
-                    hf_model_id,
-                    precision,
-                    device,
-                    max_length,
-                    save_metadata_to_json,
-                    save_metadata_to_png,
-                    lora_weights,
-                    lora_hf_id,
-                    ondemand,
-                ],
-                outputs=[txt2img_gallery, std_output],
-                show_progress=args.progress_bar,
-            )
+                with gr.Accordion(label="Prompt Examples!", open=False):
+                    ex = gr.Examples(
+                        examples=prompt_examples,
+                        inputs=prompt,
+                        cache_examples=False,
+                        elem_id="prompt_examples",
+                    )

-            prompt_submit = prompt.submit(**kwargs)
-            neg_prompt_submit = negative_prompt.submit(**kwargs)
-            generate_click = stable_diffusion.click(**kwargs)
-            stop_batch.click(
-                fn=cancel_sd,
-                cancels=[prompt_submit, neg_prompt_submit, generate_click],
-            )
+            with gr.Column(scale=1, min_width=600):
+                with gr.Group():
+                    txt2img_gallery = gr.Gallery(
+                        label="Generated images",
+                        show_label=False,
+                        elem_id="gallery",
+                    ).style(grid=[2])
+                    std_output = gr.Textbox(
+                        value="Nothing to show.",
+                        lines=1,
+                        show_label=False,
+                    )
+                output_dir = args.output_dir if args.output_dir else Path.cwd()
+                output_dir = Path(output_dir, "generated_imgs")
+                output_loc = gr.Textbox(
+                    label="Saving Images at",
+                    value=output_dir,
+                    interactive=False,
+                )
+                with gr.Row():
+                    txt2img_sendto_img2img = gr.Button(value="SendTo Img2Img")
+                    txt2img_sendto_inpaint = gr.Button(value="SendTo Inpaint")
+                    txt2img_sendto_outpaint = gr.Button(
+                        value="SendTo Outpaint"
+                    )
+                    txt2img_sendto_upscaler = gr.Button(
+                        value="SendTo Upscaler"
+                    )

-            from apps.stable_diffusion.web.utils.png_metadata import (
-                import_png_metadata,
-            )
+        kwargs = dict(
+            fn=txt2img_inf,
+            inputs=[
+                prompt,
+                negative_prompt,
+                height,
+                width,
+                steps,
+                guidance_scale,
+                seed,
+                batch_count,
+                batch_size,
+                scheduler,
+                custom_model,
+                hf_model_id,
+                precision,
+                device,
+                max_length,
+                save_metadata_to_json,
+                save_metadata_to_png,
+                lora_weights,
+                lora_hf_id,
+                ondemand,
+            ],
+            outputs=[txt2img_gallery, std_output],
+            show_progress=args.progress_bar,
+        )

-            png_info_img.change(
-                fn=import_png_metadata,
-                inputs=[
-                    png_info_img,
-                ],
-                outputs=[
-                    png_info_img,
-                    prompt,
-                    negative_prompt,
-                    steps,
-                    scheduler,
-                    guidance_scale,
-                    seed,
-                    width,
-                    height,
-                    custom_model,
-                    hf_model_id,
-                ],
-            )
-    return (
-        txt2img_web,
-        txt2img_gallery,
-        txt2img_sendto_img2img,
-        txt2img_sendto_inpaint,
-        txt2img_sendto_outpaint,
-        txt2img_sendto_upscaler,
-    )
+        prompt_submit = prompt.submit(**kwargs)
+        neg_prompt_submit = negative_prompt.submit(**kwargs)
+        generate_click = stable_diffusion.click(**kwargs)
+        stop_batch.click(
+            fn=cancel_sd,
+            cancels=[prompt_submit, neg_prompt_submit, generate_click],
+        )
+
+        from apps.stable_diffusion.web.utils.png_metadata import (
+            import_png_metadata,
+        )
+
+        png_info_img.change(
+            fn=import_png_metadata,
+            inputs=[
+                png_info_img,
+            ],
+            outputs=[
+                png_info_img,
+                prompt,
+                negative_prompt,
+                steps,
+                scheduler,
+                guidance_scale,
+                seed,
+                width,
+                height,
+                custom_model,
+                hf_model_id,
+            ],
+        )
--- a/build_tools/stable_diffusion_testing.py
+++ b/build_tools/stable_diffusion_testing.py
@@ -188,9 +188,7 @@ def test_loop(device="vulkan", beta=False, extra_flags=[]):
                    with open(dumpfile_name, "r+") as f:
                        output = f.readlines()
                        print("\n".join(output))
-                    if model_name == "CompVis/stable-diffusion-v1-4":
-                        print("failed a known successful model.")
-                        exit(1)
+                    exit(1)
                if os.name == "nt":
                    counter += 1
                    if counter % 2 == 0:
--- a/conftest.py
+++ b/conftest.py
@@ -71,8 +71,8 @@ def pytest_addoption(parser):
    parser.addoption(
        "--tank_prefix",
        type=str,
-        default="nightly",
-        help="Prefix to gs://shark_tank/ model directories from which to download SHARK tank artifacts. Default is 'latest'.",
+        default=None,
+        help="Prefix to gs://shark_tank/ model directories from which to download SHARK tank artifacts. Default is nightly.",
    )
    parser.addoption(
        "--benchmark_dispatches",
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,7 +16,7 @@ parameterized

 # Add transformers, diffusers and scipy since it most commonly used
 transformers
-diffusers @ git+https://github.com/huggingface/diffusers@main
+diffusers @ git+https://github.com/huggingface/diffusers@e47459c80f6f6a5a1c19d32c3fd74edf94f47aa2
 scipy
 ftfy
 gradio
--- a/shark/shark_downloader.py
+++ b/shark/shark_downloader.py
@@ -150,11 +150,14 @@ def get_git_revision_short_hash() -> str:
    if shark_args.shark_prefix is not None:
        prefix_kw = shark_args.shark_prefix
    else:
-        prefix_kw = (
-            subprocess.check_output(["git", "rev-parse", "--short", "HEAD"])
-            .decode("ascii")
-            .strip()
-        )
+        import json
+
+        dir_path = os.path.dirname(os.path.realpath(__file__))
+        src = os.path.join(dir_path, "..", "tank_version.json")
+        with open(src, "r") as f:
+            data = json.loads(f.read())
+            prefix_kw = data["version"]
+    print(f"Checking for updates from gs://shark_tank/{prefix_kw}")
    return prefix_kw


@@ -186,9 +189,6 @@ def get_sharktank_prefix():
    return tank_prefix


-shark_args.shark_prefix = get_sharktank_prefix()
-
-
 # Downloads the torch model from gs://shark_tank dir.
 def download_model(
    model_name,
@@ -201,6 +201,7 @@ def download_model(
    model_name = model_name.replace("/", "_")
    dyn_str = "_dynamic" if dynamic else ""
    os.makedirs(WORKDIR, exist_ok=True)
+    shark_args.shark_prefix = get_sharktank_prefix()
    if import_args["batch_size"] != 1:
        model_dir_name = (
            model_name
--- a/tank_version.json
+++ b/tank_version.json
@@ -0,0 +1,3 @@
+{
+	"version": "2023-03-31_02d52bb"
+}
Author	SHA1	Message	Date
Ean Garvey	e5a69a7c36	pin diffusers to e47459c (#1279 )	2023-04-04 18:29:21 -07:00
m68k-fr	450b6cafc4	[SD] Add weight emphasis to prompts encoder (#1276 )	2023-04-04 09:47:04 -07:00
Daniel Garvey	237d26baa2	update model db to reflect changes (#1277 ) * remove 1/1 tqdm progress bar * update model_db to reflect changes	2023-04-04 11:46:55 -05:00
Daniel Garvey	67d6ee1104	remove 1/1 tqdm progress bar (#1274 )	2023-04-03 22:30:09 -05:00
Ean Garvey	98b069488e	Add tank_version.json (#1272 )	2023-04-03 18:36:23 -07:00
jinchen62	e0f227643a	Fix webui circular import issue (#1271 )	2023-04-03 16:00:10 -07:00