[SD] Add Stable diffusion text2image rest API (#1265)

Signed-off-by: Gaurav Shukla <gaurav@nod-labs.com>
2026-04-03 03:00:17 -04:00 · 2023-04-04 00:32:24 +05:30
parent 2191fc8952
commit f49d41a807
6 changed files with 650 additions and 607 deletions
--- a/apps/stable_diffusion/scripts/init.py
+++ b/apps/stable_diffusion/scripts/init.py
@@ -1,4 +1,3 @@
-from apps.stable_diffusion.scripts.txt2img import txt2img_inf
 from apps.stable_diffusion.scripts.img2img import img2img_inf
 from apps.stable_diffusion.scripts.inpaint import inpaint_inf
 from apps.stable_diffusion.scripts.outpaint import outpaint_inf
--- a/apps/stable_diffusion/scripts/txt2img.py
+++ b/apps/stable_diffusion/scripts/txt2img.py
@@ -10,174 +10,6 @@ from apps.stable_diffusion.src import (
    clear_all,
    save_output_img,
 )
-from apps.stable_diffusion.src.utils import get_generation_text_info
-
-
-# set initial values of iree_vulkan_target_triple, use_tuned and import_mlir.
-init_iree_vulkan_target_triple = args.iree_vulkan_target_triple
-init_use_tuned = args.use_tuned
-init_import_mlir = args.import_mlir
-
-
-# Exposed to UI.
-def txt2img_inf(
-    prompt: str,
-    negative_prompt: str,
-    height: int,
-    width: int,
-    steps: int,
-    guidance_scale: float,
-    seed: int,
-    batch_count: int,
-    batch_size: int,
-    scheduler: str,
-    custom_model: str,
-    hf_model_id: str,
-    precision: str,
-    device: str,
-    max_length: int,
-    save_metadata_to_json: bool,
-    save_metadata_to_png: bool,
-    lora_weights: str,
-    lora_hf_id: str,
-):
-    from apps.stable_diffusion.web.ui.utils import (
-        get_custom_model_pathfile,
-        get_custom_vae_or_lora_weights,
-        Config,
-    )
-    import apps.stable_diffusion.web.utils.global_obj as global_obj
-    from apps.stable_diffusion.src.pipelines.pipeline_shark_stable_diffusion_utils import (
-        SD_STATE_CANCEL,
-    )
-
-    args.prompts = [prompt]
-    args.negative_prompts = [negative_prompt]
-    args.guidance_scale = guidance_scale
-    args.steps = steps
-    args.scheduler = scheduler
-
-    # set ckpt_loc and hf_model_id.
-    args.ckpt_loc = ""
-    args.hf_model_id = ""
-    if custom_model == "None":
-        if not hf_model_id:
-            return (
-                None,
-                "Please provide either custom model or huggingface model ID, both must not be empty",
-            )
-        args.hf_model_id = hf_model_id
-    elif ".ckpt" in custom_model or ".safetensors" in custom_model:
-        args.ckpt_loc = get_custom_model_pathfile(custom_model)
-    else:
-        args.hf_model_id = custom_model
-
-    args.save_metadata_to_json = save_metadata_to_json
-    args.write_metadata_to_png = save_metadata_to_png
-
-    args.use_lora = get_custom_vae_or_lora_weights(
-        lora_weights, lora_hf_id, "lora"
-    )
-
-    dtype = torch.float32 if precision == "fp32" else torch.half
-    cpu_scheduling = not scheduler.startswith("Shark")
-    new_config_obj = Config(
-        "txt2img",
-        args.hf_model_id,
-        args.ckpt_loc,
-        precision,
-        batch_size,
-        max_length,
-        height,
-        width,
-        device,
-        use_lora=args.use_lora,
-        use_stencil=None,
-    )
-    if (
-        not global_obj.get_sd_obj()
-        or global_obj.get_cfg_obj() != new_config_obj
-    ):
-        global_obj.clear_cache()
-        global_obj.set_cfg_obj(new_config_obj)
-        args.precision = precision
-        args.batch_count = batch_count
-        args.batch_size = batch_size
-        args.max_length = max_length
-        args.height = height
-        args.width = width
-        args.device = device.split("=>", 1)[1].strip()
-        args.iree_vulkan_target_triple = init_iree_vulkan_target_triple
-        args.use_tuned = init_use_tuned
-        args.import_mlir = init_import_mlir
-        args.img_path = None
-        set_init_device_flags()
-        model_id = (
-            args.hf_model_id
-            if args.hf_model_id
-            else "stabilityai/stable-diffusion-2-1-base"
-        )
-        global_obj.set_schedulers(get_schedulers(model_id))
-        scheduler_obj = global_obj.get_scheduler(scheduler)
-        global_obj.set_sd_obj(
-            Text2ImagePipeline.from_pretrained(
-                scheduler=scheduler_obj,
-                import_mlir=args.import_mlir,
-                model_id=args.hf_model_id,
-                ckpt_loc=args.ckpt_loc,
-                precision=args.precision,
-                max_length=args.max_length,
-                batch_size=args.batch_size,
-                height=args.height,
-                width=args.width,
-                use_base_vae=args.use_base_vae,
-                use_tuned=args.use_tuned,
-                custom_vae=args.custom_vae,
-                low_cpu_mem_usage=args.low_cpu_mem_usage,
-                debug=args.import_debug if args.import_mlir else False,
-                use_lora=args.use_lora,
-            )
-        )
-
-    global_obj.set_sd_scheduler(scheduler)
-
-    start_time = time.time()
-    global_obj.get_sd_obj().log = ""
-    generated_imgs = []
-    seeds = []
-    img_seed = utils.sanitize_seed(seed)
-    text_output = ""
-    for i in range(batch_count):
-        if i > 0:
-            img_seed = utils.sanitize_seed(-1)
-        out_imgs = global_obj.get_sd_obj().generate_images(
-            prompt,
-            negative_prompt,
-            batch_size,
-            height,
-            width,
-            steps,
-            guidance_scale,
-            img_seed,
-            args.max_length,
-            dtype,
-            args.use_base_vae,
-            cpu_scheduling,
-        )
-        seeds.append(img_seed)
-        total_time = time.time() - start_time
-        text_output = get_generation_text_info(seeds, device)
-        text_output += "\n" + global_obj.get_sd_obj().log
-        text_output += f"\nTotal image(s) generation time: {total_time:.4f}sec"
-
-        if global_obj.get_sd_status() == SD_STATE_CANCEL:
-            break
-        else:
-            save_output_img(out_imgs[0], img_seed)
-            generated_imgs.extend(out_imgs)
-            yield generated_imgs, text_output
-
-    return generated_imgs, text_output


 def main():