Fix formatting.

Dan shark studio (#1970 )
* Fix issue in Falcon-GPTQ * initial webui and llama2 --------- Co-authored-by: Vivek Khandelwal <vivekkhandelwal1424@gmail.com>
2026-01-11 14:58:11 -05:00 · 2023-11-13 20:01:28 -06:00 · 2023-11-13 19:07:28 -06:00
21 changed files with 138 additions and 388 deletions
--- a/.github/workflows/test-models.yml
+++ b/.github/workflows/test-models.yml
@@ -112,7 +112,7 @@ jobs:
        cd $GITHUB_WORKSPACE
        PYTHON=python${{ matrix.python-version }} IMPORTER=1 ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest --benchmark=native --update_tank -k cpu 
+        pytest --forked --benchmark=native --ci --ci_sha=${SHORT_SHA} --update_tank --tank_url="gs://shark_tank/nightly/" -k cpu 
        gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv
        gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cpu_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cpu_latest.csv
        python build_tools/vicuna_testing.py
@@ -123,7 +123,7 @@ jobs:
        cd $GITHUB_WORKSPACE
        PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest --benchmark=native --update_tank -k cuda
+        pytest --forked --benchmark=native --ci --ci_sha=${SHORT_SHA} --update_tank --tank_url="gs://shark_tank/nightly/" -k cuda
        gsutil cp ./bench_results.csv gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv
        gsutil cp gs://shark-public/builder/bench_results/${DATE}/bench_results_cuda_${SHORT_SHA}.csv gs://shark-public/builder/bench_results/latest/bench_results_cuda_latest.csv
        # Disabled due to black image bug
@@ -146,7 +146,7 @@ jobs:
        cd $GITHUB_WORKSPACE
        PYTHON=python${{ matrix.python-version }} ./setup_venv.sh
        source shark.venv/bin/activate
-        pytest --update_tank -k vulkan
+        pytest --forked --benchmark="native" --ci --ci_sha=${SHORT_SHA} --update_tank --tank_url="gs://shark_tank/nightly/" -k vulkan
        python build_tools/stable_diffusion_testing.py --device=vulkan

    - name: Validate Vulkan Models (Windows)
--- a/apps/language_models/langchain/h2oai_pipeline.py
+++ b/apps/language_models/langchain/h2oai_pipeline.py
@@ -25,7 +25,7 @@ from apps.stable_diffusion.src import args

 # Brevitas
 from typing import List, Tuple
-from brevitas_examples.common.generative.quantize import quantize_model
+from brevitas_examples.llm.llm_quant.quantize import quantize_model
 from brevitas_examples.llm.llm_quant.run_utils import get_model_impl


@@ -101,7 +101,7 @@ class H2OGPTModel(torch.nn.Module):
                dtype=torch.float32,
                weight_bit_width=weight_bit_width,
                weight_param_method="stats",
-                weight_scale_precision="float_scale",
+                weight_scale_precision="float",
                weight_quant_type="asym",
                weight_quant_granularity="per_group",
                weight_group_size=128,
--- a/apps/language_models/scripts/vicuna.py
+++ b/apps/language_models/scripts/vicuna.py
@@ -244,8 +244,7 @@ class VicunaBase(SharkLLMBase):
        print(f"[DEBUG] output_name = {output_name}")
        maps1 = []
        maps2 = []
-        constants_1 = set()
-        constants_2 = set()
+        constants = set()
        f1 = []
        f2 = []

@@ -256,7 +255,7 @@ class VicunaBase(SharkLLMBase):
            if re.search("#map\d*\s*=", line):
                maps1.append(line)
            elif re.search("arith.constant", line):
-                constants_1.add(line)
+                constants.add(line)
            elif not re.search("module", line):
                line = re.sub("forward", "first_vicuna_forward", line)
                f1.append(line)
@@ -282,7 +281,7 @@ class VicunaBase(SharkLLMBase):
            elif "global_seed" in line:
                continue
            elif re.search("arith.constant", line):
-                constants_2.add(line)
+                constants.add(line)
            elif not re.search("module", line):
                line = re.sub("forward", "second_vicuna_forward", line)
                f2.append(line)
@@ -305,21 +304,15 @@ class VicunaBase(SharkLLMBase):
        module_end = "}"

        global_vars = []
-        global_var_loading1 = dict()
-        global_var_loading2 = dict()
+        vnames = []
+        global_var_loading1 = []
+        global_var_loading2 = []

        print(f"[DEBUG] processing constants")
-        # in both 1 and 2
-        constants = [(e , "") for e in list(constants_1 & constants_2)]
-        # only in 1
-        constants.extend([(e, "_1") for e in list(constants_1.difference(constants_2))])
-        # only in 2
-        constants.extend([(e, "_2") for e in list(constants_2.difference(constants_1))])
-        del constants_1, constants_2
-        gc.collect()
-
+        counter = 0
+        constants = list(constants)
        while constants:
-            constant, vname_suf = constants.pop(0)
+            constant = constants.pop(0)
            vname, vbody = constant.split("=")
            vname = re.sub("%", "", vname)
            vname = vname.strip()
@@ -329,34 +322,35 @@ class VicunaBase(SharkLLMBase):
                print(constant)
            vdtype = vbody.split(":")[-1].strip()
            fixed_vdtype = vdtype
+            noinline = "{noinline}" if "tensor" in fixed_vdtype else ""
+            if "c1_i64" in vname:
+                print(constant)
+                counter += 1
+            if counter == 2:
+                counter = 0
+                print("detected duplicate")
+                continue
+            vnames.append(vname)
            if "true" not in vname:
                global_vars.append(
-                    f"ml_program.global private @{vname}{vname_suf}({vbody}) : {fixed_vdtype}"
+                    f"ml_program.global private @{vname}({vbody}) : {fixed_vdtype}"
+                )
+                global_var_loading1.append(
+                    f"\t\t%{vname} = ml_program.global_load_const @{vname} : {fixed_vdtype}"
+                )
+                global_var_loading2.append(
+                    f"\t\t%{vname} = ml_program.global_load_const @{vname} : {fixed_vdtype}"
                )
-                if vname_suf != "_2":
-                    global_var_loading1[
-                        f"\t\t%{vname} = ml_program.global_load_const @{vname}{vname_suf} : {fixed_vdtype}"
-                    ] = ""
-                if vname_suf != "_1":
-                    global_var_loading2[
-                        f"\t\t%{vname} = ml_program.global_load_const @{vname}{vname_suf} : {fixed_vdtype}"
-                    ] = ""
            else:
                global_vars.append(
-                    f"ml_program.global private @{vname}{vname_suf}({vbody}) : i1"
+                    f"ml_program.global private @{vname}({vbody}) : i1"
+                )
+                global_var_loading1.append(
+                    f"\t\t%{vname} = ml_program.global_load_const @{vname} : i1"
+                )
+                global_var_loading2.append(
+                    f"\t\t%{vname} = ml_program.global_load_const @{vname} : i1"
                )
-                if vname_suf != "_2":
-                    global_var_loading1[
-                        f"\t\t%{vname} = ml_program.global_load_const @{vname}{vname_suf} : i1"
-                    ] = ""
-                if vname_suf != "_1":
-                    global_var_loading2[
-                        f"\t\t%{vname} = ml_program.global_load_const @{vname}{vname_suf} : i1"
-                    ] = ""
-
-        del constants
-        gc.collect()
-

        new_f1, new_f2 = [], []

@@ -364,7 +358,7 @@ class VicunaBase(SharkLLMBase):
        for line in f1:
            if "func.func" in line:
                new_f1.append(line)
-                for global_var in global_var_loading1.keys():
+                for global_var in global_var_loading1:
                    new_f1.append(global_var)
            else:
                new_f1.append(line)
@@ -373,7 +367,7 @@ class VicunaBase(SharkLLMBase):
        for line in f2:
            if "func.func" in line:
                new_f2.append(line)
-                for global_var in global_var_loading2.keys():
+                for global_var in global_var_loading2:
                    if (
                        "c20_i64 = arith.addi %dim_i64, %c1_i64 : i64"
                        in global_var
@@ -874,7 +868,7 @@ class ShardedVicuna(VicunaBase):
                    layer0, inputs0[0], inputs0[1], inputs0[2]
                )
                if self.precision in ["int4", "int8"]:
-                    from brevitas_examples.common.generative.quantize import quantize_model
+                    from brevitas_examples.llm.llm_quant.quantize import quantize_model
                    from brevitas_examples.llm.llm_quant.run_utils import get_model_impl
                    module0 = torch_mlir.compile(
                        ts_g,
@@ -1075,7 +1069,7 @@ class ShardedVicuna(VicunaBase):
            )

        if self.precision in ["int4", "int8"]:
-            from brevitas_examples.common.generative.quantize import quantize_model
+            from brevitas_examples.llm.llm_quant.quantize import quantize_model
            from brevitas_examples.llm.llm_quant.run_utils import get_model_impl
            print("Applying weight quantization..")
            weight_bit_width = 4 if self.precision == "int4" else 8
@@ -1085,7 +1079,7 @@ class ShardedVicuna(VicunaBase):
                weight_quant_type="asym",
                weight_bit_width=weight_bit_width,
                weight_param_method="stats",
-                weight_scale_precision="float_scale",
+                weight_scale_precision="float",
                weight_quant_granularity="per_group",
                weight_group_size=self.weight_group_size,
                quantize_weight_zero_point=False,
--- a/apps/language_models/src/model_wrappers/minigpt4.py
+++ b/apps/language_models/src/model_wrappers/minigpt4.py
@@ -5,7 +5,7 @@ from typing import List, Any
 from transformers import StoppingCriteria


-from brevitas_examples.common.generative.quantize import quantize_model
+from brevitas_examples.llm.llm_quant.quantize import quantize_model
 from brevitas_examples.llm.llm_quant.run_utils import get_model_impl


@@ -37,7 +37,7 @@ class VisionModel(torch.nn.Module):
                dtype=torch.float32,
                weight_bit_width=weight_bit_width,
                weight_param_method="stats",
-                weight_scale_precision="float_scale",
+                weight_scale_precision="float",
                weight_quant_type="asym",
                weight_quant_granularity="per_group",
                weight_group_size=weight_group_size,
@@ -52,7 +52,7 @@ class VisionModel(torch.nn.Module):
                dtype=torch.float32,
                weight_bit_width=weight_bit_width,
                weight_param_method="stats",
-                weight_scale_precision="float_scale",
+                weight_scale_precision="float",
                weight_quant_type="asym",
                weight_quant_granularity="per_group",
                weight_group_size=weight_group_size,
@@ -93,7 +93,7 @@ class FirstLlamaModel(torch.nn.Module):
                dtype=torch.float32,
                weight_bit_width=weight_bit_width,
                weight_param_method="stats",
-                weight_scale_precision="float_scale",
+                weight_scale_precision="float",
                weight_quant_type="asym",
                weight_quant_granularity="per_group",
                weight_group_size=weight_group_size,
@@ -157,7 +157,7 @@ class SecondLlamaModel(torch.nn.Module):
                dtype=torch.float32,
                weight_bit_width=weight_bit_width,
                weight_param_method="stats",
-                weight_scale_precision="float_scale",
+                weight_scale_precision="float",
                weight_quant_type="asym",
                weight_quant_granularity="per_group",
                weight_group_size=weight_group_size,
--- a/apps/language_models/src/model_wrappers/vicuna_model.py
+++ b/apps/language_models/src/model_wrappers/vicuna_model.py
@@ -24,9 +24,7 @@ class FirstVicuna(torch.nn.Module):
        )
        print(f"[DEBUG] model_path : {model_path}")
        if precision in ["int4", "int8"]:
-            from brevitas_examples.common.generative.quantize import (
-                quantize_model,
-            )
+            from brevitas_examples.llm.llm_quant.quantize import quantize_model
            from brevitas_examples.llm.llm_quant.run_utils import (
                get_model_impl,
            )
@@ -38,7 +36,7 @@ class FirstVicuna(torch.nn.Module):
                dtype=self.accumulates,
                weight_bit_width=weight_bit_width,
                weight_param_method="stats",
-                weight_scale_precision="float_scale",
+                weight_scale_precision="float",
                weight_quant_type="asym",
                weight_quant_granularity="per_group",
                weight_group_size=weight_group_size,
@@ -81,9 +79,7 @@ class SecondVicuna7B(torch.nn.Module):
        )
        print(f"[DEBUG] model_path : {model_path}")
        if precision in ["int4", "int8"]:
-            from brevitas_examples.common.generative.quantize import (
-                quantize_model,
-            )
+            from brevitas_examples.llm.llm_quant.quantize import quantize_model
            from brevitas_examples.llm.llm_quant.run_utils import (
                get_model_impl,
            )
@@ -95,7 +91,7 @@ class SecondVicuna7B(torch.nn.Module):
                dtype=self.accumulates,
                weight_bit_width=weight_bit_width,
                weight_param_method="stats",
-                weight_scale_precision="float_scale",
+                weight_scale_precision="float",
                weight_quant_type="asym",
                weight_quant_granularity="per_group",
                weight_group_size=weight_group_size,
@@ -333,9 +329,7 @@ class SecondVicuna13B(torch.nn.Module):
            torch.float32 if accumulates == "fp32" else torch.float16
        )
        if precision in ["int4", "int8"]:
-            from brevitas_examples.common.generative.quantize import (
-                quantize_model,
-            )
+            from brevitas_examples.llm.llm_quant.quantize import quantize_model
            from brevitas_examples.llm.llm_quant.run_utils import (
                get_model_impl,
            )
@@ -347,7 +341,7 @@ class SecondVicuna13B(torch.nn.Module):
                dtype=self.accumulates,
                weight_bit_width=weight_bit_width,
                weight_param_method="stats",
-                weight_scale_precision="float_scale",
+                weight_scale_precision="float",
                weight_quant_type="asym",
                weight_quant_granularity="per_group",
                weight_group_size=weight_group_size,
@@ -633,9 +627,7 @@ class SecondVicuna70B(torch.nn.Module):
        )
        print(f"[DEBUG] model_path : {model_path}")
        if precision in ["int4", "int8"]:
-            from brevitas_examples.common.generative.quantize import (
-                quantize_model,
-            )
+            from brevitas_examples.llm.llm_quant.quantize import quantize_model
            from brevitas_examples.llm.llm_quant.run_utils import (
                get_model_impl,
            )
@@ -647,7 +639,7 @@ class SecondVicuna70B(torch.nn.Module):
                dtype=self.accumulates,
                weight_bit_width=weight_bit_width,
                weight_param_method="stats",
-                weight_scale_precision="float_scale",
+                weight_scale_precision="float",
                weight_quant_type="asym",
                weight_quant_granularity="per_group",
                weight_group_size=weight_group_size,
--- a/apps/language_models/src/model_wrappers/vicuna_model_gpu.py
+++ b/apps/language_models/src/model_wrappers/vicuna_model_gpu.py
@@ -24,9 +24,7 @@ class FirstVicunaGPU(torch.nn.Module):
        )
        print(f"[DEBUG] model_path : {model_path}")
        if precision in ["int4", "int8"]:
-            from brevitas_examples.common.generative.quantize import (
-                quantize_model,
-            )
+            from brevitas_examples.llm.llm_quant.quantize import quantize_model
            from brevitas_examples.llm.llm_quant.run_utils import (
                get_model_impl,
            )
@@ -38,7 +36,7 @@ class FirstVicunaGPU(torch.nn.Module):
                dtype=self.accumulates,
                weight_bit_width=weight_bit_width,
                weight_param_method="stats",
-                weight_scale_precision="float_scale",
+                weight_scale_precision="float",
                weight_quant_type="asym",
                weight_quant_granularity="per_group",
                weight_group_size=weight_group_size,
@@ -80,9 +78,7 @@ class SecondVicuna7BGPU(torch.nn.Module):
        )
        print(f"[DEBUG] model_path : {model_path}")
        if precision in ["int4", "int8"]:
-            from brevitas_examples.common.generative.quantize import (
-                quantize_model,
-            )
+            from brevitas_examples.llm.llm_quant.quantize import quantize_model
            from brevitas_examples.llm.llm_quant.run_utils import (
                get_model_impl,
            )
@@ -94,7 +90,7 @@ class SecondVicuna7BGPU(torch.nn.Module):
                dtype=self.accumulates,
                weight_bit_width=weight_bit_width,
                weight_param_method="stats",
-                weight_scale_precision="float_scale",
+                weight_scale_precision="float",
                weight_quant_type="asym",
                weight_quant_granularity="per_group",
                weight_group_size=weight_group_size,
@@ -331,9 +327,7 @@ class SecondVicuna13BGPU(torch.nn.Module):
            torch.float32 if accumulates == "fp32" else torch.float16
        )
        if precision in ["int4", "int8"]:
-            from brevitas_examples.common.generative.quantize import (
-                quantize_model,
-            )
+            from brevitas_examples.llm.llm_quant.quantize import quantize_model
            from brevitas_examples.llm.llm_quant.run_utils import (
                get_model_impl,
            )
@@ -345,7 +339,7 @@ class SecondVicuna13BGPU(torch.nn.Module):
                dtype=self.accumulates,
                weight_bit_width=weight_bit_width,
                weight_param_method="stats",
-                weight_scale_precision="float_scale",
+                weight_scale_precision="float",
                weight_quant_type="asym",
                weight_quant_granularity="per_group",
                weight_group_size=weight_group_size,
@@ -631,9 +625,7 @@ class SecondVicuna70BGPU(torch.nn.Module):
        )
        print(f"[DEBUG] model_path : {model_path}")
        if precision in ["int4", "int8"]:
-            from brevitas_examples.common.generative.quantize import (
-                quantize_model,
-            )
+            from brevitas_examples.llm.llm_quant.quantize import quantize_model
            from brevitas_examples.llm.llm_quant.run_utils import (
                get_model_impl,
            )
@@ -645,7 +637,7 @@ class SecondVicuna70BGPU(torch.nn.Module):
                dtype=self.accumulates,
                weight_bit_width=weight_bit_width,
                weight_param_method="stats",
-                weight_scale_precision="float_scale",
+                weight_scale_precision="float",
                weight_quant_type="asym",
                weight_quant_granularity="per_group",
                weight_group_size=weight_group_size,
--- a/apps/language_models/src/pipelines/minigpt4_pipeline.py
+++ b/apps/language_models/src/pipelines/minigpt4_pipeline.py
@@ -132,7 +132,7 @@ import torch_mlir
 from torch_mlir.compiler_utils import run_pipeline_with_repro_report
 from typing import List, Tuple
 from io import BytesIO
-from brevitas_examples.common.generative.quantize import quantize_model
+from brevitas_examples.llm.llm_quant.quantize import quantize_model
 from brevitas_examples.llm.llm_quant.run_utils import get_model_impl


--- a/apps/stable_diffusion/src/utils/stencils/stencil_utils.py
+++ b/apps/stable_diffusion/src/utils/stencils/stencil_utils.py
@@ -1,10 +1,6 @@
 import numpy as np
 from PIL import Image
 import torch
-import os
-from pathlib import Path
-import torchvision
-import time
 from apps.stable_diffusion.src.utils.stencils import (
    CannyDetector,
    OpenposeDetector,
@@ -14,33 +10,6 @@ from apps.stable_diffusion.src.utils.stencils import (
 stencil = {}


-def save_img(img):
-    from apps.stable_diffusion.src.utils import (
-        get_generated_imgs_path,
-        get_generated_imgs_todays_subdir,
-    )
-
-    subdir = Path(
-        get_generated_imgs_path(), get_generated_imgs_todays_subdir()
-    )
-    os.makedirs(subdir, exist_ok=True)
-    if isinstance(img, Image.Image):
-        img.save(
-            os.path.join(
-                subdir, "controlnet_" + str(int(time.time())) + ".png"
-            )
-        )
-    elif isinstance(img, np.ndarray):
-        img = Image.fromarray(img)
-        img.save(os.path.join(subdir, str(int(time.time())) + ".png"))
-    else:
-        converter = torchvision.transforms.ToPILImage()
-        for i in img:
-            converter(i).save(
-                os.path.join(subdir, str(int(time.time())) + ".png")
-            )
-
-
 def HWC3(x):
    assert x.dtype == np.uint8
    if x.ndim == 2:
@@ -192,7 +161,6 @@ def hint_canny(
        detected_map = stencil["canny"](
            input_image, low_threshold, high_threshold
        )
-        save_img(detected_map)
        detected_map = HWC3(detected_map)
        return detected_map

@@ -208,7 +176,6 @@ def hint_openpose(
            stencil["openpose"] = OpenposeDetector()

        detected_map, _ = stencil["openpose"](input_image)
-        save_img(detected_map)
        detected_map = HWC3(detected_map)
        return detected_map

@@ -220,7 +187,6 @@ def hint_scribble(image: Image.Image):

        detected_map = np.zeros_like(input_image, dtype=np.uint8)
        detected_map[np.min(input_image, axis=2) < 127] = 255
-        save_img(detected_map)
        return detected_map


@@ -233,6 +199,5 @@ def hint_zoedepth(image: Image.Image):
            stencil["depth"] = ZoeDetector()

        detected_map = stencil["depth"](input_image)
-        save_img(detected_map)
        detected_map = HWC3(detected_map)
        return detected_map
--- a/apps/stable_diffusion/web/ui/common_ui_events.py
+++ b/apps/stable_diffusion/web/ui/common_ui_events.py
@@ -1,55 +0,0 @@
-from apps.stable_diffusion.web.ui.utils import (
-    HSLHue,
-    hsl_color,
-    get_lora_metadata,
-)
-
-
-# Answers HTML to show the most frequent tags used when a LoRA was trained,
-# taken from the metadata of its .safetensors file.
-def lora_changed(lora_file):
-    # tag frequency percentage, that gets maximum amount of the staring hue
-    TAG_COLOR_THRESHOLD = 0.55
-    # tag frequency percentage, above which a tag is displayed
-    TAG_DISPLAY_THRESHOLD = 0.65
-    # template for the html used to display a tag
-    TAG_HTML_TEMPLATE = '<span class="lora-tag" style="border: 1px solid {color};">{tag}</span>'
-
-    if lora_file == "None":
-        return ["<div><i>No LoRA selected</i></div>"]
-    elif not lora_file.lower().endswith(".safetensors"):
-        return [
-            "<div><i>Only metadata queries for .safetensors files are currently supported</i></div>"
-        ]
-    else:
-        metadata = get_lora_metadata(lora_file)
-        if metadata:
-            frequencies = metadata["frequencies"]
-            return [
-                "".join(
-                    [
-                        f'<div class="lora-model">Trained against weights in: {metadata["model"]}</div>'
-                    ]
-                    + [
-                        TAG_HTML_TEMPLATE.format(
-                            color=hsl_color(
-                                (tag[1] - TAG_COLOR_THRESHOLD)
-                                / (1 - TAG_COLOR_THRESHOLD),
-                                start=HSLHue.RED,
-                                end=HSLHue.GREEN,
-                            ),
-                            tag=tag[0],
-                        )
-                        for tag in frequencies
-                        if tag[1] > TAG_DISPLAY_THRESHOLD
-                    ],
-                )
-            ]
-        elif metadata is None:
-            return [
-                "<div><i>This LoRA does not publish tag frequency metadata</i></div>"
-            ]
-        else:
-            return [
-                "<div><i>This LoRA has empty tag frequency metadata, or we could not parse it</i></div>"
-            ]
--- a/apps/stable_diffusion/web/ui/css/sd_dark_theme.css
+++ b/apps/stable_diffusion/web/ui/css/sd_dark_theme.css
@@ -246,39 +246,10 @@ footer {
    background-color: var(--block-label-background-fill);
 }

-/* lora tag pills */
-.lora-tags {
-    border: 1px solid var(--border-color-primary);
-    color: var(--block-info-text-color) !important;
-    padding: var(--block-padding);
-}
-
-.lora-tag {
-    display: inline-block;
-    height: 2em;
-    color: rgb(212 212 212) !important;
-    margin-right: 5pt;
-    margin-bottom: 5pt;
-    padding: 2pt 5pt;
-    border-radius: 5pt;
-    white-space: nowrap;
-}
-
-.lora-model {
-    margin-bottom: var(--spacing-lg);
-    color: var(--block-info-text-color) !important;
-    line-height: var(--line-sm);
-}
-
 /* output gallery tab */
-.output_parameters_dataframe table.table {
-    /* works around a gradio bug that always shows scrollbars */
-    overflow: clip auto;
-}
-
 .output_parameters_dataframe tbody td {
    font-size: small;
-    line-height: var(--line-xs);
+    line-height: var(--line-xs)
 }

 .output_icon_button {
--- a/apps/stable_diffusion/web/ui/img2img_ui.py
+++ b/apps/stable_diffusion/web/ui/img2img_ui.py
@@ -5,7 +5,6 @@ import gradio as gr
 import PIL
 from math import ceil
 from PIL import Image
-
 from apps.stable_diffusion.web.ui.utils import (
    available_devices,
    nodlogo_loc,
@@ -15,7 +14,6 @@ from apps.stable_diffusion.web.ui.utils import (
    predefined_models,
    cancel_sd,
 )
-from apps.stable_diffusion.web.ui.common_ui_events import lora_changed
 from apps.stable_diffusion.src import (
    args,
    Image2ImagePipeline,
@@ -438,11 +436,6 @@ with gr.Blocks(title="Image-to-Image") as img2img_web:
                            label="HuggingFace Model ID",
                            lines=3,
                        )
-                    with gr.Row():
-                        lora_tags = gr.HTML(
-                            value="<div><i>No LoRA selected</i></div>",
-                            elem_classes="lora-tags",
-                        )
                with gr.Accordion(label="Advanced Options", open=False):
                    with gr.Row():
                        scheduler = gr.Dropdown(
@@ -645,10 +638,3 @@ with gr.Blocks(title="Image-to-Image") as img2img_web:
            fn=cancel_sd,
            cancels=[prompt_submit, neg_prompt_submit, generate_click],
        )
-
-        lora_weights.change(
-            fn=lora_changed,
-            inputs=[lora_weights],
-            outputs=[lora_tags],
-            queue=True,
-        )
--- a/apps/stable_diffusion/web/ui/inpaint_ui.py
+++ b/apps/stable_diffusion/web/ui/inpaint_ui.py
@@ -4,7 +4,6 @@ import time
 import sys
 import gradio as gr
 from PIL import Image
-
 from apps.stable_diffusion.web.ui.utils import (
    available_devices,
    nodlogo_loc,
@@ -14,7 +13,6 @@ from apps.stable_diffusion.web.ui.utils import (
    predefined_paint_models,
    cancel_sd,
 )
-from apps.stable_diffusion.web.ui.common_ui_events import lora_changed
 from apps.stable_diffusion.src import (
    args,
    InpaintPipeline,
@@ -321,11 +319,6 @@ with gr.Blocks(title="Inpainting") as inpaint_web:
                            label="HuggingFace Model ID",
                            lines=3,
                        )
-                    with gr.Row():
-                        lora_tags = gr.HTML(
-                            value="<div><i>No LoRA selected</i></div>",
-                            elem_classes="lora-tags",
-                        )
                with gr.Accordion(label="Advanced Options", open=False):
                    with gr.Row():
                        scheduler = gr.Dropdown(
@@ -525,10 +518,3 @@ with gr.Blocks(title="Inpainting") as inpaint_web:
            fn=cancel_sd,
            cancels=[prompt_submit, neg_prompt_submit, generate_click],
        )
-
-        lora_weights.change(
-            fn=lora_changed,
-            inputs=[lora_weights],
-            outputs=[lora_tags],
-            queue=True,
-        )
--- a/apps/stable_diffusion/web/ui/outpaint_ui.py
+++ b/apps/stable_diffusion/web/ui/outpaint_ui.py
@@ -3,8 +3,9 @@ import torch
 import time
 import gradio as gr
 from PIL import Image
-
-from apps.stable_diffusion.web.ui.common_ui_events import lora_changed
+import base64
+from io import BytesIO
+from fastapi.exceptions import HTTPException
 from apps.stable_diffusion.web.ui.utils import (
    available_devices,
    nodlogo_loc,
@@ -322,11 +323,6 @@ with gr.Blocks(title="Outpainting") as outpaint_web:
                            label="HuggingFace Model ID",
                            lines=3,
                        )
-                    with gr.Row():
-                        lora_tags = gr.HTML(
-                            value="<div><i>No LoRA selected</i></div>",
-                            elem_classes="lora-tags",
-                        )
                with gr.Accordion(label="Advanced Options", open=False):
                    with gr.Row():
                        scheduler = gr.Dropdown(
@@ -550,10 +546,3 @@ with gr.Blocks(title="Outpainting") as outpaint_web:
            fn=cancel_sd,
            cancels=[prompt_submit, neg_prompt_submit, generate_click],
        )
-
-        lora_weights.change(
-            fn=lora_changed,
-            inputs=[lora_weights],
-            outputs=[lora_tags],
-            queue=True,
-        )
--- a/apps/stable_diffusion/web/ui/outputgallery_ui.py
+++ b/apps/stable_diffusion/web/ui/outputgallery_ui.py
@@ -91,7 +91,7 @@ with gr.Blocks() as outputgallery_web:
                value=gallery_files.value,
                visible=False,
                show_label=True,
-                columns=4,
+                columns=2,
            )

        with gr.Column(scale=4):
@@ -204,9 +204,6 @@ with gr.Blocks() as outputgallery_web:
            ),
        ]

-    def on_image_columns_change(columns):
-        return gr.Gallery.update(columns=columns)
-
    def on_select_subdir(subdir) -> list:
        # evt.value is the subdirectory name
        new_images = outputgallery_filenames(subdir)
@@ -368,6 +365,53 @@ with gr.Blocks() as outputgallery_web:
                gr.update(),
            )

+    # Unfortunately as of gradio 3.34.0 gr.update against Galleries doesn't
+    # support things set with .style, nor the elem_classes kwarg, so we have
+    # to directly set things up via JavaScript if we want the client to take
+    # notice of our changes to the number of columns after it decides to put
+    # them back to the original number when we change something
+    def js_set_columns_in_browser(timeout_length):
+        return f"""
+            (new_cols) => {{
+                setTimeout(() => {{
+                    required_style = "auto ".repeat(new_cols).trim();
+                    gallery = document.querySelector('#outputgallery_gallery .grid-container');
+                    if (gallery) {{
+                        gallery.style.gridTemplateColumns = required_style
+                    }}
+                }}, {timeout_length});
+                return [];      // prevents console error from gradio
+            }}
+        """
+
+    # --- Wire handlers up to the actions
+
+    # Many actions reset the number of columns shown in the gallery on the
+    # browser end, so we have to set them back to what we think they should
+    # be after the initial action.
+    #
+    # None of the actions on this tab trigger inference, and we want the
+    # user to be able to do them whilst other tabs have ongoing inference
+    # running. Waiting in the queue behind inference jobs would mean the UI
+    # can't fully respond until the inference tasks complete,
+    # hence queue=False on all of these.
+    set_gallery_columns_immediate = dict(
+        fn=None,
+        inputs=[image_columns],
+        # gradio blanks the UI on Chrome on Linux on gallery select if
+        # I don't put an output here
+        outputs=[dev_null],
+        _js=js_set_columns_in_browser(0),
+        queue=False,
+    )
+
+    # setting columns after selecting a gallery item needs a real
+    # timeout length for the number of columns to actually be applied.
+    # Not really sure why, maybe something has to finish animating?
+    set_gallery_columns_delayed = dict(
+        set_gallery_columns_immediate, _js=js_set_columns_in_browser(250)
+    )
+
    # clearing images when we need to completely change what's in the
    # gallery avoids current images being shown replacing piecemeal and
    # prevents weirdness and errors if the user selects an image during the
@@ -379,35 +423,32 @@ with gr.Blocks() as outputgallery_web:
        queue=False,
    )

+    image_columns.change(**set_gallery_columns_immediate)
+
    subdirectories.select(**clear_gallery).then(
        on_select_subdir,
        [subdirectories],
        [gallery_files, gallery, logo],
        queue=False,
-    )
+    ).then(**set_gallery_columns_immediate)

-    open_subdir.click(on_open_subdir, inputs=[subdirectories], queue=False)
+    open_subdir.click(
+        on_open_subdir, inputs=[subdirectories], queue=False
+    ).then(**set_gallery_columns_immediate)

    refresh.click(**clear_gallery).then(
        on_refresh,
        [subdirectories],
        [subdirectories, subdirectory_paths, gallery_files, gallery, logo],
        queue=False,
-    )
-
-    image_columns.change(
-        fn=on_image_columns_change,
-        inputs=[image_columns],
-        outputs=[gallery],
-        queue=False,
-    )
+    ).then(**set_gallery_columns_immediate)

    gallery.select(
        on_select_image,
        [gallery_files],
        [outputgallery_filename, image_parameters],
        queue=False,
-    )
+    ).then(**set_gallery_columns_delayed)

    outputgallery_filename.change(
        on_outputgallery_filename_change,
@@ -436,7 +477,7 @@ with gr.Blocks() as outputgallery_web:
                open_subdir,
            ],
            queue=False,
-        )
+        ).then(**set_gallery_columns_immediate)

    # We should have been passed a list of components on other tabs that update
    # when a new image has generated on that tab, so set things up so the user
@@ -448,4 +489,4 @@ with gr.Blocks() as outputgallery_web:
                inputs=[subdirectories, subdirectory_paths, component],
                outputs=[gallery_files, gallery, logo],
                queue=False,
-            )
+            ).then(**set_gallery_columns_immediate)
--- a/apps/stable_diffusion/web/ui/txt2img_ui.py
+++ b/apps/stable_diffusion/web/ui/txt2img_ui.py
@@ -5,7 +5,6 @@ import sys
 import gradio as gr
 from PIL import Image
 from math import ceil
-
 from apps.stable_diffusion.web.ui.utils import (
    available_devices,
    nodlogo_loc,
@@ -16,7 +15,6 @@ from apps.stable_diffusion.web.ui.utils import (
    predefined_models,
    cancel_sd,
 )
-from apps.stable_diffusion.web.ui.common_ui_events import lora_changed
 from apps.stable_diffusion.web.utils.metadata import import_png_metadata
 from apps.stable_diffusion.web.utils.common_label_calc import status_label
 from apps.stable_diffusion.src import (
@@ -398,11 +396,6 @@ with gr.Blocks(title="Text-to-Image") as txt2img_web:
                            label="HuggingFace Model ID",
                            lines=3,
                        )
-                    with gr.Row():
-                        lora_tags = gr.HTML(
-                            value="<div><i>No LoRA selected</i></div>",
-                            elem_classes="lora-tags",
-                        )
                with gr.Accordion(label="Advanced Options", open=False):
                    with gr.Row():
                        scheduler = gr.Dropdown(
@@ -696,10 +689,3 @@ with gr.Blocks(title="Text-to-Image") as txt2img_web:
            outputs=[scheduler],
            queue=False,
        )
-
-        lora_weights.change(
-            fn=lora_changed,
-            inputs=[lora_weights],
-            outputs=[lora_tags],
-            queue=True,
-        )
--- a/apps/stable_diffusion/web/ui/upscaler_ui.py
+++ b/apps/stable_diffusion/web/ui/upscaler_ui.py
@@ -3,7 +3,6 @@ import torch
 import time
 import gradio as gr
 from PIL import Image
-
 from apps.stable_diffusion.web.ui.utils import (
    available_devices,
    nodlogo_loc,
@@ -13,7 +12,6 @@ from apps.stable_diffusion.web.ui.utils import (
    predefined_upscaler_models,
    cancel_sd,
 )
-from apps.stable_diffusion.web.ui.common_ui_events import lora_changed
 from apps.stable_diffusion.web.utils.common_label_calc import status_label
 from apps.stable_diffusion.src import (
    args,
@@ -342,11 +340,6 @@ with gr.Blocks(title="Upscaler") as upscaler_web:
                            label="HuggingFace Model ID",
                            lines=3,
                        )
-                    with gr.Row():
-                        lora_tags = gr.HTML(
-                            value="<div><i>No LoRA selected</i></div>",
-                            elem_classes="lora-tags",
-                        )
                with gr.Accordion(label="Advanced Options", open=False):
                    with gr.Row():
                        scheduler = gr.Dropdown(
@@ -544,10 +537,3 @@ with gr.Blocks(title="Upscaler") as upscaler_web:
            fn=cancel_sd,
            cancels=[prompt_submit, neg_prompt_submit, generate_click],
        )
-
-        lora_weights.change(
-            fn=lora_changed,
-            inputs=[lora_weights],
-            outputs=[lora_tags],
-            queue=True,
-        )
--- a/apps/stable_diffusion/web/ui/utils.py
+++ b/apps/stable_diffusion/web/ui/utils.py
@@ -1,16 +1,10 @@
 import os
 import sys
+from apps.stable_diffusion.src import get_available_devices
 import glob
-import math
-import json
-import safetensors
-
 from pathlib import Path
 from apps.stable_diffusion.src import args
 from dataclasses import dataclass
-from enum import IntEnum
-
-from apps.stable_diffusion.src import get_available_devices
 import apps.stable_diffusion.web.utils.global_obj as global_obj
 from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_utils import (
    SD_STATE_CANCEL,
@@ -34,15 +28,6 @@ class Config:
    ondemand: str  # should this be expecting a bool instead?


-class HSLHue(IntEnum):
-    RED = 0
-    YELLOW = 60
-    GREEN = 120
-    CYAN = 180
-    BLUE = 240
-    MAGENTA = 300
-
-
 custom_model_filetypes = (
    "*.ckpt",
    "*.safetensors",
@@ -176,69 +161,6 @@ def get_custom_vae_or_lora_weights(weights, hf_id, model):
    return use_weight


-def hsl_color(alpha: float, start, end):
-    b = (end - start) * (alpha if alpha > 0 else 0)
-    result = b + start
-
-    # Return a CSS HSL string
-    return f"hsl({math.floor(result)}, 80%, 35%)"
-
-
-def get_lora_metadata(lora_filename):
-    # get the metadata from the file
-    filename = get_custom_model_pathfile(lora_filename, "lora")
-    with safetensors.safe_open(filename, framework="pt", device="cpu") as f:
-        metadata = f.metadata()
-
-    # guard clause for if there isn't any metadata
-    if not metadata:
-        return None
-
-    # metadata is a dictionary of strings, the values of the keys we're
-    # interested in are actually json, and need to be loaded as such
-    tag_frequencies = json.loads(metadata.get("ss_tag_frequency", str("{}")))
-    dataset_dirs = json.loads(metadata.get("ss_dataset_dirs", str("{}")))
-    tag_dirs = [dir for dir in tag_frequencies.keys()]
-
-    # gather the tag frequency information for all the datasets trained
-    all_frequencies = {}
-    for dataset in tag_dirs:
-        frequencies = sorted(
-            [entry for entry in tag_frequencies[dataset].items()],
-            reverse=True,
-            key=lambda x: x[1],
-        )
-
-        # get a figure for the total number of images processed for this dataset
-        # either then number actually listed or in its dataset_dir entry or
-        # the highest frequency's number if that doesn't exist
-        img_count = dataset_dirs.get(dir, {}).get(
-            "img_count", frequencies[0][1]
-        )
-
-        # add the dataset frequencies to the overall frequencies replacing the
-        # frequency counts on the tags with a percentage/ratio
-        all_frequencies.update(
-            [(entry[0], entry[1] / img_count) for entry in frequencies]
-        )
-
-    trained_model_id = " ".join(
-        [
-            metadata.get("ss_sd_model_hash", ""),
-            metadata.get("ss_sd_model_name", ""),
-            metadata.get("ss_base_model_version", ""),
-        ]
-    ).strip()
-
-    # return the topmost <count> of all frequencies in all datasets
-    return {
-        "model": trained_model_id,
-        "frequencies": sorted(
-            all_frequencies.items(), reverse=True, key=lambda x: x[1]
-        ),
-    }
-
-
 def cancel_sd():
    # Try catch it, as gc can delete global_obj.sd_obj while switching model
    try:
--- a/build_tools/stable_diffusion_testing.py
+++ b/build_tools/stable_diffusion_testing.py
@@ -78,10 +78,7 @@ def test_loop(
    os.mkdir("./test_images/golden")
    get_inpaint_inputs()
    hf_model_names = model_config_dicts[0].values()
-    tuned_options = [
-        "--no-use_tuned",
-        "--use_tuned",
-    ]
+    tuned_options = ["--no-use_tuned", "--use_tuned"]
    import_options = ["--import_mlir", "--no-import_mlir"]
    prompt_text = "--prompt=cyberpunk forest by Salvador Dali"
    inpaint_prompt_text = "--prompt=Face of a yellow cat, high resolution, sitting on a park bench"
@@ -115,8 +112,6 @@ def test_loop(
                    and use_tune == tuned_options[1]
                ):
                    continue
-                elif use_tune == tuned_options[1]:
-                    continue
                command = (
                    [
                        executable,  # executable is the python from the venv used to run this
--- a/docs/shark_sd_koboldcpp.md
+++ b/docs/shark_sd_koboldcpp.md
@@ -22,33 +22,33 @@ This does mean however, that on a brand new fresh install of SHARK that has not

 * Make sure you have suitable drivers for your graphics card installed. See the prerequisties section of the [README](https://github.com/nod-ai/SHARK#readme).
 * Download the latest SHARK studio .exe from [here](https://github.com/nod-ai/SHARK/releases) or follow the instructions in the [README](https://github.com/nod-ai/SHARK#readme) for an advanced, Linux or Mac install.
- * Run SHARK from terminal/PowerShell with the `--api` flag. Since koboldcpp also expects both CORS support and the image generator to be running on port `7860` rather than SHARK default of `8080`, also include both the `--api_accept_origin` flag with a suitable origin (use `="*"` to enable all origins) and `--server_port=7860` on the command line. (See the if you want to run SHARK on a different port)
+ * Run SHARK from terminal/PowerShell with the `--api` flag. Since koboldcpp also expects both CORS support and the image generator to be running on port `7860` rather than SHARK default of `8080`, also include both the `--api_cors_origin` flag with a suitable origin (use `="*"` to enable all origins) and `--server_port=7860` on the command line. (See the if you want to run SHARK on a different port)

 ```powershell
 ## Run the .exe in API mode, with CORS support, on the A1111 endpoint port:
-.\node_ai_shark_studio_<date>_<ver>.exe --api --api_accept_origin="*"  --server_port=7860
+.\node_ai_shark_studio_<date>_<ver>.exe --api --api_cors_origin="*"  --server_port=7860

 ## Run trom the base directory of a source clone of SHARK on Windows:
 .\setup_venv.ps1
-python .\apps\stable_diffusion\web\index.py --api --api_accept_origin="*"  --server_port=7860
+python .\apps\stable_diffusion\web\index.py --api --api_cors_origin="*"  --server_port=7860

 ## Run a the base directory of a source clone of SHARK on Linux:
 ./setup_venv.sh
 source shark.venv/bin/activate
-python ./apps/stable_diffusion/web/index.py --api --api_accept_origin="*"  --server_port=7860
+python ./apps/stable_diffusion/web/index.py --api --api_cors_origin="*"  --server_port=7860

 ## An example giving improved performance on AMD cards using vulkan, that runs on the same port as A1111
-.\node_ai_shark_studio_20320901_2525.exe --api --api_accept_origin="*" --device_allocator="caching" --server_port=7860
+.\node_ai_shark_studio_20320901_2525.exe --api --api_cors_origin="*" --device_allocator="caching" --server_port=7860

 ## Since the api respects most applicable SHARK command line arguments for options not specified,
 ## or currently unimplemented by API, there might be some you want to set, as listed in `--help`
 .\node_ai_shark_studio_20320901_2525.exe --help

 ## For instance, the example above, but with a a custom VAE specified
-.\node_ai_shark_studio_20320901_2525.exe --api --api_accept_origin="*" --device_allocator="caching" --server_port=7860 --custom_vae="clearvae_v23.safetensors"
+.\node_ai_shark_studio_20320901_2525.exe --api --api_cors_origin="*" --device_allocator="caching" --server_port=7860 --custom_vae="clearvae_v23.safetensors"

 ## An example with multiple specific CORS origins
-python apps/stable_diffusion/web/index.py --api --api_accept_origin="koboldcpp.example.com:7001" --api_accept_origin="koboldcpp.example.com:7002" --server_port=7860
+python apps/stable_diffusion/web/index.py --api --api_cors_origin="koboldcpp.example.com:7001" --api_cors_origin="koboldcpp.example.com:7002" --server_port=7860
 ```

 SHARK should start in server mode, and you should see something like this:
--- a/setup_venv.sh
+++ b/setup_venv.sh
@@ -111,7 +111,7 @@ else
 fi
 if [[ -z "${NO_BACKEND}" ]]; then
  echo "Installing ${RUNTIME}..."
-  $PYTHON -m pip install --pre --upgrade --no-index --find-links ${RUNTIME} iree-compiler iree-runtime
+  $PYTHON -m pip install --pre --upgrade --find-links ${RUNTIME} iree-compiler iree-runtime
 else
  echo "Not installing a backend, please make sure to add your backend to PYTHONPATH"
 fi
--- a/shark/shark_compile.py
+++ b/shark/shark_compile.py
@@ -7,7 +7,7 @@ import torch_mlir
 from torch_mlir.compiler_utils import run_pipeline_with_repro_report
 from typing import List, Tuple
 from io import BytesIO
-from brevitas_examples.common.generative.quantize import quantize_model
+from brevitas_examples.llm.llm_quant.quantize import quantize_model
 from brevitas_examples.llm.llm_quant.run_utils import get_model_impl


@@ -84,7 +84,7 @@ def compile_int_precision(
        weight_quant_type="asym",
        weight_bit_width=weight_bit_width,
        weight_param_method="stats",
-        weight_scale_precision="float_scale",
+        weight_scale_precision="float",
        weight_quant_granularity="per_group",
        weight_group_size=weight_group_size,
        quantize_weight_zero_point=False,
Author	SHA1	Message	Date
Ean Garvey	ee0233e370	Fix formatting.	2023-11-13 20:01:28 -06:00
Daniel Garvey	a3deeec870	Dan shark studio (#1970 ) * Fix issue in Falcon-GPTQ * initial webui and llama2 --------- Co-authored-by: Vivek Khandelwal <vivekkhandelwal1424@gmail.com>	2023-11-13 19:07:28 -06:00