[SD-CLI] Add support for .safetensors + Use diffusers pipeline to load SD

-- This commit uses `load_pipeline_from_original_stable_diffusion_ckpt` as exposed due to [Diffusers PR](https://github.com/huggingface/diffusers/pull/2019). -- It also adds a support for the end users to use `.safetensors` along with `.ckpt` file. Signed-off-by: Abhishek Varma <abhishek@nod-labs.com>
2026-04-03 03:00:17 -04:00 · 2023-01-30 12:09:16 +00:00
parent c3c701e654
commit fcd62513cf
3 changed files with 35 additions and 23 deletions
--- a/shark/examples/shark_inference/stable_diffusion/README.md
+++ b/shark/examples/shark_inference/stable_diffusion/README.md
@@ -17,7 +17,8 @@ use the flag `--hf_model_id=` to specify the repo-id of the model to be used.
 python .\shark\examples\shark_inference\stable_diffusion\main.py --hf_model_id="Linaqruf/anything-v3.0" --max_length=77 --prompt="1girl, brown hair, green eyes, colorful, autumn, cumulonimbus clouds, lighting, blue sky, falling leaves, garden" --no-use_tuned
 ```

-## Run a custom model using a HuggingFace `.ckpt` file:
+## Run a custom model using a `.ckpt` / `.safetensors` checkpoint file:
+* Ensure you don't have any `.yaml` file at the root directory of SHARK - best would be to ensure you're on the latest `main` branch and use `--clear_all` the first time you're running the command for inference.
 * Install `pytorch_lightning` by running :-
 ```shell
 pip install pytorch_lightning
@@ -30,9 +31,13 @@ NOTE: This is needed to process [ckpt file of runwayml/stable-diffusion-v1-5](ht
 python3.10 main.py --precision=fp16 --device=vulkan --prompt="tajmahal, oil on canvas, sunflowers, 4k, uhd" --max_length=64 --import_mlir --ckpt_loc="/path/to/.ckpt/file" --no-use_tuned
 ```
 * We use a combination of 2 flags to make this feature work : `import_mlir` and `ckpt_loc`.
-* In case `ckpt_loc` is NOT specified then a [default](https://huggingface.co/stabilityai/stable-diffusion-2-1-base) HuggingFace repo-id is run via `hf_model_id`. So, you can use `import_mlir` and `hf_model_id` to run HuggingFace's StableDiffusion variants.
+* In case `ckpt_loc` is NOT specified then a [default](https://huggingface.co/stabilityai/stable-diffusion-2-1-base) HuggingFace repo-id is run via `hf_model_id`. So, two ways to use `import_mlir` :-
+- With `hf_model_id` to run HuggingFace's StableDiffusion variants.
+- With `ckpt_loc` to run a StableDiffusion variant with a `.ckpt` or `.safetensors` checkpoint file

 * Use custom model `.ckpt` files from [HuggingFace-StableDiffusion](https://huggingface.co/models?other=stable-diffusion) to generate images.
+* You may also try out [.safetensors file of Protogen x3.4 of civitai.com](https://civitai.com/models/3666/protogen-x34-photorealism-official-release) and provide the `.safetensors` path to `ckpt_loc` flag.
+* NOTE: Ensure that the `.ckpt` or `.safetensors` file are part of the path passed to `ckpt_loc` flag. Eg: `--ckpt_loc="/path/to/checkpoint/file/name_of_checkpoint.ckpt` OR `--ckpt_loc="/path/to/checkpoint/file/name_of_checkpoint.safetensors`. Also ensure that you're using `--no-use_tuned` flag in your run command.


 ## Running the model for a `batch_size` and for a set of `runs`:
--- a/shark/examples/shark_inference/stable_diffusion/main.py
+++ b/shark/examples/shark_inference/stable_diffusion/main.py
@@ -37,6 +37,12 @@ if args.clear_all:
    for vmfb in vmfbs:
        if os.path.exists(vmfb):
            os.remove(vmfb)
+    # Temporary workaround of deleting yaml files to incorporate diffusers' pipeline.
+    # TODO: Remove this once we have better weight updation logic.
+    inference_yaml = ["v2-inference-v.yaml", "v1-inference.yaml"]
+    for yaml in inference_yaml:
+        if os.path.exists(yaml):
+            os.remove(yaml)
    home = os.path.expanduser("~")
    if os.name == "nt":  # Windows
        appdata = os.getenv("LOCALAPPDATA")
@@ -114,7 +120,10 @@ if __name__ == "__main__":
        unet = get_unet()
        vae = get_vae()
    else:
-        if ".ckpt" in args.ckpt_loc:
+        if args.ckpt_loc != "":
+            assert args.ckpt_loc.lower().endswith(
+                (".ckpt", ".safetensors")
+            ), "checkpoint files supported can be any of [.ckpt, .safetensors] type"
            preprocessCKPT()
        mlir_import = SharkifyStableDiffusionModel(
            args.hf_model_id,
--- a/shark/examples/shark_inference/stable_diffusion/utils.py
+++ b/shark/examples/shark_inference/stable_diffusion/utils.py
@@ -1,6 +1,5 @@
 import os
 import gc
-import torch
 from shark.shark_inference import SharkInference
 from stable_args import args
 from shark.shark_importer import import_with_fx
@@ -12,6 +11,9 @@ from shark.iree_utils.gpu_utils import get_cuda_sm_cc
 from resources import opt_flags
 from sd_annotation import sd_model_annotation
 import sys
+from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
+    load_pipeline_from_original_stable_diffusion_ckpt,
+)


 def get_vmfb_path_name(model_name):
@@ -359,25 +361,21 @@ def preprocessCKPT():
        diffusers_path,
    )
    path_to_diffusers = complete_path_to_diffusers.as_posix()
-    # TODO: Use the SD to Diffusers CKPT pipeline once it's included in the release.
-    sd_to_diffusers = os.path.join(os.getcwd(), "sd_to_diffusers.py")
-    if not os.path.isfile(sd_to_diffusers):
-        url = "https://raw.githubusercontent.com/huggingface/diffusers/8a3f0c1f7178f4a3d5a5b21ae8c2906f473e240d/scripts/convert_original_stable_diffusion_to_diffusers.py"
-        import requests
-
-        req = requests.get(url)
-        open(sd_to_diffusers, "wb").write(req.content)
-        print("Downloaded SD to Diffusers converter")
-    else:
-        print("SD to Diffusers converter already exists")
-
-    os.system(
-        "python "
-        + sd_to_diffusers
-        + " --checkpoint_path="
-        + args.ckpt_loc
-        + " --dump_path="
-        + path_to_diffusers
+    from_safetensors = (
+        True if args.ckpt_loc.lower().endswith(".safetensors") else False
    )
+    # EMA weights usually yield higher quality images for inference but non-EMA weights have
+    # been yielding better results in our case.
+    # TODO: Add an option `--ema` (`--no-ema`) for users to specify if they want to go for EMA
+    #       weight extraction or not.
+    extract_ema = False
+    print("Loading pipeline from original stable diffusion checkpoint")
+    pipe = load_pipeline_from_original_stable_diffusion_ckpt(
+        checkpoint_path=args.ckpt_loc,
+        extract_ema=extract_ema,
+        from_safetensors=from_safetensors,
+    )
+    pipe.save_pretrained(path_to_diffusers)
+    print("Loading complete")
    args.ckpt_loc = path_to_diffusers
    print("Custom model path is : ", args.ckpt_loc)