From 83a3cc9eb480a0cedf4b7ff73a8427880af0171a Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 25 Oct 2022 00:30:48 -0400 Subject: [PATCH 01/15] start support for 1.5 inpainting model, not complete --- ldm/generate.py | 11 ++++- ldm/invoke/generator/base.py | 19 ++++---- ldm/invoke/model_cache.py | 2 + ldm/models/autoencoder.py | 2 +- ldm/models/diffusion/ddim.py | 14 +++++- ldm/models/diffusion/ddpm.py | 79 +++++++++++++++++++++++++++++++- ldm/models/diffusion/ksampler.py | 2 + ldm/models/diffusion/sampler.py | 30 +++++++++++- 8 files changed, 145 insertions(+), 14 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 8ffb7110a3..a834844e7e 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -404,7 +404,10 @@ class Generate: ) # TODO: Hacky selection of operation to perform. Needs to be refactored. - if (init_image is not None) and (mask_image is not None): + if self.sampler.conditioning_key() in ('hybrid','concat'): + print(f'** Inpainting model detected. Will try it! **') + generator = self._make_omnibus() + elif (init_image is not None) and (mask_image is not None): generator = self._make_inpaint() elif (embiggen != None or embiggen_tiles != None): generator = self._make_embiggen() @@ -690,6 +693,12 @@ class Generate: self.generators['inpaint'] = Inpaint(self.model, self.precision) return self.generators['inpaint'] + def _make_omnibus(self): + if not self.generators.get('omnibus'): + from ldm.invoke.generator.omnibus import Omnibus + self.generators['omnibus'] = Omnibus(self.model, self.precision) + return self.generators['omnibus'] + def load_model(self): ''' preload model identified in self.model_name diff --git a/ldm/invoke/generator/base.py b/ldm/invoke/generator/base.py index 89476cd216..c70924449b 100644 --- a/ldm/invoke/generator/base.py +++ b/ldm/invoke/generator/base.py @@ -40,12 +40,13 @@ class Generator(): self.variation_amount = variation_amount self.with_variations = with_variations - def generate(self,prompt,init_image,width,height,iterations=1,seed=None, + def generate(self,prompt,init_image,width,height,sampler, iterations=1,seed=None, image_callback=None, step_callback=None, threshold=0.0, perlin=0.0, **kwargs): scope = choose_autocast(self.precision) - make_image = self.get_make_image( + make_image = self.get_make_image( prompt, + sampler = sampler, init_image = init_image, width = width, height = height, @@ -54,13 +55,16 @@ class Generator(): perlin = perlin, **kwargs ) - results = [] seed = seed if seed is not None else self.new_seed() first_seed = seed seed, initial_noise = self.generate_initial_noise(seed, width, height) - with scope(self.model.device.type), self.model.ema_scope(): + + scope = (scope(self.model.device.type), self.model.ema_scope()) if sampler.conditioning_key() not in ('hybrid','concat') else scope(self.model.device.type) + + with scope: for n in trange(iterations, desc='Generating'): + print('DEBUG: in iterations loop() called') x_T = None if self.variation_amount > 0: seed_everything(seed) @@ -75,7 +79,6 @@ class Generator(): x_T = self.get_noise(width,height) except: pass - image = make_image(x_T) results.append([image, seed]) if image_callback is not None: @@ -83,10 +86,10 @@ class Generator(): seed = self.new_seed() return results - def sample_to_image(self,samples): + def sample_to_image(self,samples)->Image.Image: """ - Returns a function returning an image derived from the prompt and the initial image - Return value depends on the seed at the time you call it + Given samples returned from a sampler, converts + it into a PIL Image """ x_samples = self.model.decode_first_stage(samples) x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index f580dfba25..f972a9eb16 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -13,6 +13,7 @@ import gc import hashlib import psutil import transformers +import traceback import os from sys import getrefcount from omegaconf import OmegaConf @@ -73,6 +74,7 @@ class ModelCache(object): self.models[model_name]['hash'] = hash except Exception as e: print(f'** model {model_name} could not be loaded: {str(e)}') + print(traceback.format_exc()) print(f'** restoring {self.current_model}') self.get_model(self.current_model) return None diff --git a/ldm/models/autoencoder.py b/ldm/models/autoencoder.py index 359f5688d1..3db7b6fd73 100644 --- a/ldm/models/autoencoder.py +++ b/ldm/models/autoencoder.py @@ -66,7 +66,7 @@ class VQModel(pl.LightningModule): self.use_ema = use_ema if self.use_ema: self.model_ema = LitEma(self) - print(f'Keeping EMAs of {len(list(self.model_ema.buffers()))}.') + print(f'>> Keeping EMAs of {len(list(self.model_ema.buffers()))}.') if ckpt_path is not None: self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys) diff --git a/ldm/models/diffusion/ddim.py b/ldm/models/diffusion/ddim.py index f5dada8627..bfb78c1397 100644 --- a/ldm/models/diffusion/ddim.py +++ b/ldm/models/diffusion/ddim.py @@ -41,7 +41,19 @@ class DDIMSampler(Sampler): else: x_in = torch.cat([x] * 2) t_in = torch.cat([t] * 2) - c_in = torch.cat([unconditional_conditioning, c]) + if isinstance(c, dict): + assert isinstance(unconditional_conditioning, dict) + c_in = dict() + for k in c: + if isinstance(c[k], list): + c_in[k] = [ + torch.cat([unconditional_conditioning[k][i], c[k][i]]) + for i in range(len(c[k])) + ] + else: + c_in[k] = torch.cat([unconditional_conditioning[k], c[k]]) + else: + c_in = torch.cat([unconditional_conditioning, c]) e_t_uncond, e_t = self.model.apply_model(x_in, t_in, c_in).chunk(2) e_t = e_t_uncond + unconditional_guidance_scale * ( e_t - e_t_uncond diff --git a/ldm/models/diffusion/ddpm.py b/ldm/models/diffusion/ddpm.py index 4b62b5e393..fd3b6688f3 100644 --- a/ldm/models/diffusion/ddpm.py +++ b/ldm/models/diffusion/ddpm.py @@ -19,6 +19,7 @@ from functools import partial from tqdm import tqdm from torchvision.utils import make_grid from pytorch_lightning.utilities.distributed import rank_zero_only +from omegaconf import ListConfig import urllib from ldm.util import ( @@ -120,7 +121,7 @@ class DDPM(pl.LightningModule): self.use_ema = use_ema if self.use_ema: self.model_ema = LitEma(self.model) - print(f'Keeping EMAs of {len(list(self.model_ema.buffers()))}.') + print(f' | Keeping EMAs of {len(list(self.model_ema.buffers()))}.') self.use_scheduler = scheduler_config is not None if self.use_scheduler: @@ -1883,6 +1884,24 @@ class LatentDiffusion(DDPM): return samples, intermediates + @torch.no_grad() + def get_unconditional_conditioning(self, batch_size, null_label=None): + if null_label is not None: + xc = null_label + if isinstance(xc, ListConfig): + xc = list(xc) + if isinstance(xc, dict) or isinstance(xc, list): + c = self.get_learned_conditioning(xc) + else: + if hasattr(xc, "to"): + xc = xc.to(self.device) + c = self.get_learned_conditioning(xc) + else: + # todo: get null label from cond_stage_model + raise NotImplementedError() + c = repeat(c, "1 ... -> b ...", b=batch_size).to(self.device) + return c + @torch.no_grad() def log_images( self, @@ -2138,6 +2157,7 @@ class DiffusionWrapper(pl.LightningModule): ] def forward(self, x, t, c_concat: list = None, c_crossattn: list = None): + print(f'DEBUG (ddpm) c_concat = {c_concat}') if self.conditioning_key is None: out = self.diffusion_model(x, t) elif self.conditioning_key == 'concat': @@ -2147,8 +2167,8 @@ class DiffusionWrapper(pl.LightningModule): cc = torch.cat(c_crossattn, 1) out = self.diffusion_model(x, t, context=cc) elif self.conditioning_key == 'hybrid': - xc = torch.cat([x] + c_concat, dim=1) cc = torch.cat(c_crossattn, 1) + xc = torch.cat([x] + c_concat, dim=1) out = self.diffusion_model(xc, t, context=cc) elif self.conditioning_key == 'adm': cc = c_crossattn[0] @@ -2187,3 +2207,58 @@ class Layout2ImgDiffusion(LatentDiffusion): cond_img = torch.stack(bbox_imgs, dim=0) logs['bbox_image'] = cond_img return logs + +class LatentInpaintDiffusion(LatentDiffusion): + def __init__( + self, + concat_keys=("mask", "masked_image"), + masked_image_key="masked_image", + finetune_keys=None, + *args, + **kwargs, + ): + super().__init__(*args, **kwargs) + self.masked_image_key = masked_image_key + assert self.masked_image_key in concat_keys + self.concat_keys = concat_keys + + + @torch.no_grad() + def get_input( + self, batch, k, cond_key=None, bs=None, return_first_stage_outputs=False + ): + # note: restricted to non-trainable encoders currently + assert ( + not self.cond_stage_trainable + ), "trainable cond stages not yet supported for inpainting" + z, c, x, xrec, xc = super().get_input( + batch, + self.first_stage_key, + return_first_stage_outputs=True, + force_c_encode=True, + return_original_cond=True, + bs=bs, + ) + + assert exists(self.concat_keys) + c_cat = list() + for ck in self.concat_keys: + cc = ( + rearrange(batch[ck], "b h w c -> b c h w") + .to(memory_format=torch.contiguous_format) + .float() + ) + if bs is not None: + cc = cc[:bs] + cc = cc.to(self.device) + bchw = z.shape + if ck != self.masked_image_key: + cc = torch.nn.functional.interpolate(cc, size=bchw[-2:]) + else: + cc = self.get_first_stage_encoding(self.encode_first_stage(cc)) + c_cat.append(cc) + c_cat = torch.cat(c_cat, dim=1) + all_conds = {"c_concat": [c_cat], "c_crossattn": [c]} + if return_first_stage_outputs: + return z, all_conds, x, xrec, xc + return z, all_conds diff --git a/ldm/models/diffusion/ksampler.py b/ldm/models/diffusion/ksampler.py index ac0615b30c..68c26b5d6c 100644 --- a/ldm/models/diffusion/ksampler.py +++ b/ldm/models/diffusion/ksampler.py @@ -281,3 +281,5 @@ class KSampler(Sampler): ''' return self.model.inner_model.q_sample(x0,ts) + def conditioning_key(self)->str: + return self.model.inner_model.model.conditioning_key diff --git a/ldm/models/diffusion/sampler.py b/ldm/models/diffusion/sampler.py index ff705513f8..9e57bc25d4 100644 --- a/ldm/models/diffusion/sampler.py +++ b/ldm/models/diffusion/sampler.py @@ -158,6 +158,18 @@ class Sampler(object): **kwargs, ): + if conditioning is not None: + if isinstance(conditioning, dict): + ctmp = conditioning[list(conditioning.keys())[0]] + while isinstance(ctmp, list): + ctmp = ctmp[0] + cbs = ctmp.shape[0] + if cbs != batch_size: + print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}") + else: + if conditioning.shape[0] != batch_size: + print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}") + # check to see if make_schedule() has run, and if not, run it if self.ddim_timesteps is None: self.make_schedule( @@ -193,7 +205,7 @@ class Sampler(object): ) return samples, intermediates - #torch.no_grad() + @torch.no_grad() def do_sampling( self, cond, @@ -307,6 +319,19 @@ class Sampler(object): mask = None, ): + print(f'DEBUG(sampler): cond = {cond}') + if cond is not None: + if isinstance(cond, dict): + ctmp = cond[list(cond.keys())[0]] + while isinstance(ctmp, list): + ctmp = ctmp[0] + cbs = ctmp.shape[0] + if cbs != batch_size: + print(f"Warning: Got {cbs} conds but batch-size is {batch_size}") + else: + if cond.shape[0] != batch_size: + print(f"Warning: Got {cond.shape[0]} conditionings but batch-size is {batch_size}") + timesteps = ( np.arange(self.ddpm_num_timesteps) if use_original_steps @@ -411,3 +436,6 @@ class Sampler(object): return self.model.inner_model.q_sample(x0,ts) ''' return self.model.q_sample(x0,ts) + + def conditioning_key(self)->str: + return self.model.model.conditioning_key From be8a992b85f9c10eac380161849b9e4e69978908 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 25 Oct 2022 00:38:24 -0400 Subject: [PATCH 02/15] add missing file --- ldm/invoke/generator/omnibus.py | 144 ++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 ldm/invoke/generator/omnibus.py diff --git a/ldm/invoke/generator/omnibus.py b/ldm/invoke/generator/omnibus.py new file mode 100644 index 0000000000..43192cd152 --- /dev/null +++ b/ldm/invoke/generator/omnibus.py @@ -0,0 +1,144 @@ +"""omnibus module to be used with the runwayml 9-channel custom inpainting model""" + +import torch +import numpy as np +from PIL import Image +from ldm.invoke.generator.base import downsampling +from ldm.invoke.generator.img2img import Img2Img +from ldm.invoke.generator.txt2img import Txt2Img + +class Omnibus(Img2Img,Txt2Img): + def __init__(self, model, precision): + super().__init__(model, precision) + + def get_make_image( + self, + prompt, + sampler, + steps, + cfg_scale, + ddim_eta, + conditioning, + width, + height, + init_image = None, + mask_image = None, + strength = None, + step_callback=None, + threshold=0.0, + perlin=0.0, + **kwargs): + """ + Returns a function returning an image derived from the prompt and the initial image + Return value depends on the seed at the time you call it. + """ + self.perlin = perlin + + sampler.make_schedule( + ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False + ) + + if isinstance(init_image, Image.Image): + init_image = self._image_to_tensor(init_image) + + if isinstance(mask_image, Image.Image): + mask_image = self._image_to_tensor(mask_image,normalize=False) + + t_enc = steps + + if init_image is not None and mask_image is not None: # inpainting + masked_image = init_image * (1 - mask_image) # masked image is the image masked by mask - masked regions zero + + elif init_image is not None: # img2img + scope = choose_autocast(self.precision) + with scope(self.model.device.type): + self.init_latent = self.model.get_first_stage_encoding( + self.model.encode_first_stage(init_image) + ) # move to latent space + # create a completely black mask (1s) + mask_image = torch.ones(init_image.shape[0], 3, init_image.width, init_image.height, device=self.model.device) + # and the masked image is just a copy of the original + masked_image = init_image + t_enc = int(strength * steps) + + else: # txt2img + mask_image = torch.zeros(init_image.shape[0], 3, init_image.width, init_image.height, device=self.model.device) + masked_image = mask_image + + model = self.model + + def make_image(x_T): + with torch.no_grad(): + with torch.autocast("cuda"): + + batch = self.make_batch_sd( + init_image, + mask_image, + masked_image, + prompt=prompt, + device=model.device, + num_samples=1 + ) + + c = model.cond_stage_model.encode(batch["txt"]) + + c_cat = list() + for ck in model.concat_keys: + cc = batch[ck].float() + if ck != model.masked_image_key: + bchw = [num_samples, 4, h//8, w//8] + cc = torch.nn.functional.interpolate(cc, size=bchw[-2:]) + else: + cc = model.get_first_stage_encoding(model.encode_first_stage(cc)) + c_cat.append(cc) + c_cat = torch.cat(c_cat, dim=1) + + # cond + cond={"c_concat": [c_cat], "c_crossattn": [c]} + + # uncond cond + uc_cross = model.get_unconditional_conditioning(num_samples, "") + uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]} + shape = [model.channels, h//8, w//8] + + samples, = sampler.sample( + batch_size = 1, + S = t_enc, + x_T = x_T, + conditioning = cond, + shape = shape, + verbose = False, + unconditional_guidance_scale = cfg_scale, + unconditional_conditioning = uc_full, + eta = 1.0, + img_callback = step_callback, + threshold = threshold, + ) + if self.free_gpu_mem: + self.model.model.to("cpu") + return self.sample_to_image(samples) + + return make_image + + def make_batch_sd( + image, + mask, + masked_image, + prompt, + device, + num_samples=1): + batch = { + "image": repeat(image.to(device=device), "1 ... -> n ...", n=num_samples), + "txt": num_samples * [prompt], + "mask": repeat(mask.to(device=device), "1 ... -> n ...", n=num_samples), + "masked_image": repeat(masked_image.to(device=device), "1 ... -> n ...", n=num_samples), + } + return batch + + def get_noise(self, width:int, height:int): + if self.init_latent: + print('DEBUG: returning Img2Img.getnoise()') + return super(Img2Img,self).get_noise(width,height) + else: + print('DEBUG: returning Txt2Img.getnoise()') + return super(Txt2Img,self).get_noise(width,height) From a2e53892ec97fa4057e2a2a022ffd92c372bad17 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 25 Oct 2022 00:47:13 -0400 Subject: [PATCH 03/15] fixed synax errors; now channel mismatch issue --- ldm/invoke/generator/omnibus.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ldm/invoke/generator/omnibus.py b/ldm/invoke/generator/omnibus.py index 43192cd152..b6ddbfdb03 100644 --- a/ldm/invoke/generator/omnibus.py +++ b/ldm/invoke/generator/omnibus.py @@ -2,6 +2,7 @@ import torch import numpy as np +from einops import repeat from PIL import Image from ldm.invoke.generator.base import downsampling from ldm.invoke.generator.img2img import Img2Img @@ -33,6 +34,7 @@ class Omnibus(Img2Img,Txt2Img): Return value depends on the seed at the time you call it. """ self.perlin = perlin + num_samples = 1 sampler.make_schedule( ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False @@ -77,7 +79,7 @@ class Omnibus(Img2Img,Txt2Img): masked_image, prompt=prompt, device=model.device, - num_samples=1 + num_samples=num_samples, ) c = model.cond_stage_model.encode(batch["txt"]) @@ -86,7 +88,7 @@ class Omnibus(Img2Img,Txt2Img): for ck in model.concat_keys: cc = batch[ck].float() if ck != model.masked_image_key: - bchw = [num_samples, 4, h//8, w//8] + bchw = [num_samples, 4, height//8, width//8] cc = torch.nn.functional.interpolate(cc, size=bchw[-2:]) else: cc = model.get_first_stage_encoding(model.encode_first_stage(cc)) @@ -99,7 +101,7 @@ class Omnibus(Img2Img,Txt2Img): # uncond cond uc_cross = model.get_unconditional_conditioning(num_samples, "") uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]} - shape = [model.channels, h//8, w//8] + shape = [model.channels, height//8, width//8] samples, = sampler.sample( batch_size = 1, @@ -121,6 +123,7 @@ class Omnibus(Img2Img,Txt2Img): return make_image def make_batch_sd( + self, image, mask, masked_image, From 175c7bddfc531fe59ded96ef881186ff06331c48 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 25 Oct 2022 07:12:31 -0400 Subject: [PATCH 04/15] add missing inpainting yaml file --- .../v1-inpainting-inference.yaml | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 configs/stable-diffusion/v1-inpainting-inference.yaml diff --git a/configs/stable-diffusion/v1-inpainting-inference.yaml b/configs/stable-diffusion/v1-inpainting-inference.yaml new file mode 100644 index 0000000000..5652e04374 --- /dev/null +++ b/configs/stable-diffusion/v1-inpainting-inference.yaml @@ -0,0 +1,79 @@ +model: + base_learning_rate: 7.5e-05 + target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: hybrid # important + monitor: val/loss_simple_ema + scale_factor: 0.18215 + finetune_keys: null + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + personalization_config: + target: ldm.modules.embedding_manager.EmbeddingManager + params: + placeholder_strings: ["*"] + initializer_words: ['face', 'man', 'photo', 'africanmale'] + per_image_tokens: false + num_vectors_per_token: 1 + progressive_words: False + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 9 # 4 data + 4 downscaled image + 1 mask + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder From aaf7a4f1d3573f832a4231f4e2f7976f09ec3a89 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 25 Oct 2022 10:00:28 -0400 Subject: [PATCH 05/15] inpaint and txt2img working with ddim sampler --- ldm/generate.py | 5 +++++ ldm/invoke/generator/img2img.py | 5 ++++- ldm/invoke/generator/omnibus.py | 18 ++++++++++++------ ldm/models/diffusion/ddpm.py | 1 - 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index a834844e7e..e1d35e6607 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -655,6 +655,7 @@ class Generate: return init_image,init_mask + # lots o' repeated code here! Turn into a make_func() def _make_base(self): if not self.generators.get('base'): from ldm.invoke.generator import Generator @@ -665,6 +666,7 @@ class Generate: if not self.generators.get('img2img'): from ldm.invoke.generator.img2img import Img2Img self.generators['img2img'] = Img2Img(self.model, self.precision) + self.generators['img2img'].free_gpu_mem = self.free_gpu_mem return self.generators['img2img'] def _make_embiggen(self): @@ -693,10 +695,13 @@ class Generate: self.generators['inpaint'] = Inpaint(self.model, self.precision) return self.generators['inpaint'] + # "omnibus" supports the runwayML custom inpainting model, which does + # txt2img, img2img and inpainting using slight variations on the same code def _make_omnibus(self): if not self.generators.get('omnibus'): from ldm.invoke.generator.omnibus import Omnibus self.generators['omnibus'] = Omnibus(self.model, self.precision) + self.generators['omnibus'].free_gpu_mem = self.free_gpu_mem return self.generators['omnibus'] def load_model(self): diff --git a/ldm/invoke/generator/img2img.py b/ldm/invoke/generator/img2img.py index 613f1aca31..31c3ca256e 100644 --- a/ldm/invoke/generator/img2img.py +++ b/ldm/invoke/generator/img2img.py @@ -77,7 +77,10 @@ class Img2Img(Generator): def _image_to_tensor(self, image:Image, normalize:bool=True)->Tensor: image = np.array(image).astype(np.float32) / 255.0 - image = image[None].transpose(0, 3, 1, 2) + if len(image.shape) == 2: # 'L' image, as in a mask + image = image[None,None] + else: # 'RGB' image + image = image[None].transpose(0, 3, 1, 2) image = torch.from_numpy(image) if normalize: image = 2.0 * image - 1.0 diff --git a/ldm/invoke/generator/omnibus.py b/ldm/invoke/generator/omnibus.py index b6ddbfdb03..fd3d3d47f2 100644 --- a/ldm/invoke/generator/omnibus.py +++ b/ldm/invoke/generator/omnibus.py @@ -3,7 +3,7 @@ import torch import numpy as np from einops import repeat -from PIL import Image +from PIL import Image, ImageOps from ldm.invoke.generator.base import downsampling from ldm.invoke.generator.img2img import Img2Img from ldm.invoke.generator.txt2img import Txt2Img @@ -44,7 +44,7 @@ class Omnibus(Img2Img,Txt2Img): init_image = self._image_to_tensor(init_image) if isinstance(mask_image, Image.Image): - mask_image = self._image_to_tensor(mask_image,normalize=False) + mask_image = self._image_to_tensor(ImageOps.invert(mask_image).convert('L'),normalize=False) t_enc = steps @@ -53,10 +53,12 @@ class Omnibus(Img2Img,Txt2Img): elif init_image is not None: # img2img scope = choose_autocast(self.precision) + with scope(self.model.device.type): self.init_latent = self.model.get_first_stage_encoding( self.model.encode_first_stage(init_image) ) # move to latent space + # create a completely black mask (1s) mask_image = torch.ones(init_image.shape[0], 3, init_image.width, init_image.height, device=self.model.device) # and the masked image is just a copy of the original @@ -64,8 +66,9 @@ class Omnibus(Img2Img,Txt2Img): t_enc = int(strength * steps) else: # txt2img - mask_image = torch.zeros(init_image.shape[0], 3, init_image.width, init_image.height, device=self.model.device) - masked_image = mask_image + init_image = torch.zeros(1, 3, width, height, device=self.model.device) + mask_image = torch.ones(1, 1, width, height, device=self.model.device) + masked_image = init_image model = self.model @@ -102,8 +105,8 @@ class Omnibus(Img2Img,Txt2Img): uc_cross = model.get_unconditional_conditioning(num_samples, "") uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]} shape = [model.channels, height//8, width//8] - - samples, = sampler.sample( + + samples, _ = sampler.sample( batch_size = 1, S = t_enc, x_T = x_T, @@ -136,6 +139,9 @@ class Omnibus(Img2Img,Txt2Img): "mask": repeat(mask.to(device=device), "1 ... -> n ...", n=num_samples), "masked_image": repeat(masked_image.to(device=device), "1 ... -> n ...", n=num_samples), } + print(f'DEBUG: image = {batch["image"]} shape={batch["image"].shape}') + print(f'DEBUG: mask = {batch["mask"]} shape={batch["mask"].shape}') + print(f'DEBUG: masked_image = {batch["masked_image"]} shape={batch["masked_image"].shape}') return batch def get_noise(self, width:int, height:int): diff --git a/ldm/models/diffusion/ddpm.py b/ldm/models/diffusion/ddpm.py index fd3b6688f3..26492baf7c 100644 --- a/ldm/models/diffusion/ddpm.py +++ b/ldm/models/diffusion/ddpm.py @@ -2157,7 +2157,6 @@ class DiffusionWrapper(pl.LightningModule): ] def forward(self, x, t, c_concat: list = None, c_crossattn: list = None): - print(f'DEBUG (ddpm) c_concat = {c_concat}') if self.conditioning_key is None: out = self.diffusion_model(x, t) elif self.conditioning_key == 'concat': From b101be041bb1de73f637711b40936aff8c82aceb Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 25 Oct 2022 10:45:12 -0400 Subject: [PATCH 06/15] add support for runwayML custom inpainting model This is still a work in progress but seems functional. It supports inpainting, txt2img and img2img on the ddim and k* samplers (plms still needs work, but I know what to do). To test this, get the file `sd-v1-5-inpainting.ckpt' from https://huggingface.co/runwayml/stable-diffusion-inpainting and place it at `models/ldm/stable-diffusion-v1/sd-v1-5-inpainting.ckpt` Launch invoke.py with --model inpainting-1.5 and proceed as usual. Caveats: 1. The inpainting model takes about 800 Mb more memory than the standard 1.5 model. This model will not work on 4 GB cards. 2. The inpainting model is temperamental. It wants you to describe the entire scene and not just the masked area to replace. So if you want to replace the parrot on a man's shoulder with a crow, the prompt "crow" may fail. Try "man with a crow on shoulder" instead. The symptom of a failed inpainting is that the area will be erased and replaced with background. 3. This has not been tested well. Please report bugs. --- configs/models.yaml | 7 +++++++ ldm/invoke/generator/omnibus.py | 11 ++++------- ldm/models/diffusion/ksampler.py | 22 +++++++++++++++++++--- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/configs/models.yaml b/configs/models.yaml index 46bc2df8b4..9522f025b1 100644 --- a/configs/models.yaml +++ b/configs/models.yaml @@ -13,6 +13,13 @@ stable-diffusion-1.4: default: true width: 512 height: 512 +inpainting-1.5: + description: runwayML tuned inpainting model v1.5 + weights: models/ldm/stable-diffusion-v1/sd-v1-5-inpainting.ckpt + config: configs/stable-diffusion/v1-inpainting-inference.yaml +# vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt + width: 512 + height: 512 stable-diffusion-1.5: config: configs/stable-diffusion/v1-inference.yaml weights: models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt diff --git a/ldm/invoke/generator/omnibus.py b/ldm/invoke/generator/omnibus.py index fd3d3d47f2..99fe046654 100644 --- a/ldm/invoke/generator/omnibus.py +++ b/ldm/invoke/generator/omnibus.py @@ -4,6 +4,7 @@ import torch import numpy as np from einops import repeat from PIL import Image, ImageOps +from ldm.invoke.devices import choose_autocast from ldm.invoke.generator.base import downsampling from ldm.invoke.generator.img2img import Img2Img from ldm.invoke.generator.txt2img import Txt2Img @@ -60,7 +61,7 @@ class Omnibus(Img2Img,Txt2Img): ) # move to latent space # create a completely black mask (1s) - mask_image = torch.ones(init_image.shape[0], 3, init_image.width, init_image.height, device=self.model.device) + mask_image = torch.ones(1, 1, init_image.shape[2], init_image.shape[3], device=self.model.device) # and the masked image is just a copy of the original masked_image = init_image t_enc = int(strength * steps) @@ -74,7 +75,8 @@ class Omnibus(Img2Img,Txt2Img): def make_image(x_T): with torch.no_grad(): - with torch.autocast("cuda"): + scope = choose_autocast(self.precision) + with scope(self.model.device.type): batch = self.make_batch_sd( init_image, @@ -139,15 +141,10 @@ class Omnibus(Img2Img,Txt2Img): "mask": repeat(mask.to(device=device), "1 ... -> n ...", n=num_samples), "masked_image": repeat(masked_image.to(device=device), "1 ... -> n ...", n=num_samples), } - print(f'DEBUG: image = {batch["image"]} shape={batch["image"].shape}') - print(f'DEBUG: mask = {batch["mask"]} shape={batch["mask"].shape}') - print(f'DEBUG: masked_image = {batch["masked_image"]} shape={batch["masked_image"].shape}') return batch def get_noise(self, width:int, height:int): if self.init_latent: - print('DEBUG: returning Img2Img.getnoise()') return super(Img2Img,self).get_noise(width,height) else: - print('DEBUG: returning Txt2Img.getnoise()') return super(Txt2Img,self).get_noise(width,height) diff --git a/ldm/models/diffusion/ksampler.py b/ldm/models/diffusion/ksampler.py index 68c26b5d6c..59a3bebe4d 100644 --- a/ldm/models/diffusion/ksampler.py +++ b/ldm/models/diffusion/ksampler.py @@ -12,6 +12,22 @@ from ldm.modules.diffusionmodules.util import ( extract_into_tensor, ) +def make_cond_in(uncond, cond): + if isinstance(cond, dict): + assert isinstance(uncond, dict) + cond_in = dict() + for k in cond: + if isinstance(cond[k], list): + cond_in[k] = [ + torch.cat([uncond[k][i], cond[k][i]]) + for i in range(len(cond[k])) + ] + else: + cond_in[k] = torch.cat([uncond[k], cond[k]]) + else: + cond_in = torch.cat([uncond, cond]) + return cond_in + def cfg_apply_threshold(result, threshold = 0.0, scale = 0.7): if threshold <= 0.0: return result @@ -37,7 +53,7 @@ class CFGDenoiser(nn.Module): def forward(self, x, sigma, uncond, cond, cond_scale): x_in = torch.cat([x] * 2) sigma_in = torch.cat([sigma] * 2) - cond_in = torch.cat([uncond, cond]) + cond_in = make_cond_in(uncond,cond) uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2) if self.warmup < self.warmup_max: thresh = max(1, 1 + (self.threshold - 1) * (self.warmup / self.warmup_max)) @@ -64,13 +80,12 @@ class KSampler(Sampler): def forward(self, x, sigma, uncond, cond, cond_scale): x_in = torch.cat([x] * 2) sigma_in = torch.cat([sigma] * 2) - cond_in = torch.cat([uncond, cond]) + cond_in = make_cond_in(uncond, cond) uncond, cond = self.inner_model( x_in, sigma_in, cond=cond_in ).chunk(2) return uncond + (cond - uncond) * cond_scale - def make_schedule( self, ddim_num_steps, @@ -283,3 +298,4 @@ class KSampler(Sampler): def conditioning_key(self)->str: return self.model.inner_model.model.conditioning_key + From 83e1c39ab85de131175edef209ff416ee1fd8814 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 25 Oct 2022 11:42:30 -0400 Subject: [PATCH 07/15] plms works, bugs quashed - The plms sampler now works with custom inpainting model - Quashed bug that was causing generation on normal models to fail (oops!) - Can now generate non-square images with custom inpainting model --- ldm/invoke/generator/base.py | 7 +++---- ldm/invoke/generator/omnibus.py | 4 ++-- ldm/models/diffusion/ksampler.py | 27 ++++++--------------------- ldm/models/diffusion/plms.py | 2 +- ldm/models/diffusion/sampler.py | 21 +++++++++++++++++++++ 5 files changed, 33 insertions(+), 28 deletions(-) diff --git a/ldm/invoke/generator/base.py b/ldm/invoke/generator/base.py index c70924449b..03f066323c 100644 --- a/ldm/invoke/generator/base.py +++ b/ldm/invoke/generator/base.py @@ -60,11 +60,10 @@ class Generator(): first_seed = seed seed, initial_noise = self.generate_initial_noise(seed, width, height) - scope = (scope(self.model.device.type), self.model.ema_scope()) if sampler.conditioning_key() not in ('hybrid','concat') else scope(self.model.device.type) - - with scope: + # There used to be an additional self.model.ema_scope() here, but it breaks + # the inpaint-1.5 model. Not sure what it did.... ? + with scope(self.model.device.type): for n in trange(iterations, desc='Generating'): - print('DEBUG: in iterations loop() called') x_T = None if self.variation_amount > 0: seed_everything(seed) diff --git a/ldm/invoke/generator/omnibus.py b/ldm/invoke/generator/omnibus.py index 99fe046654..c8de01addb 100644 --- a/ldm/invoke/generator/omnibus.py +++ b/ldm/invoke/generator/omnibus.py @@ -67,8 +67,8 @@ class Omnibus(Img2Img,Txt2Img): t_enc = int(strength * steps) else: # txt2img - init_image = torch.zeros(1, 3, width, height, device=self.model.device) - mask_image = torch.ones(1, 1, width, height, device=self.model.device) + init_image = torch.zeros(1, 3, height, width, device=self.model.device) + mask_image = torch.ones(1, 1, height, width, device=self.model.device) masked_image = init_image model = self.model diff --git a/ldm/models/diffusion/ksampler.py b/ldm/models/diffusion/ksampler.py index 59a3bebe4d..5f223cdf46 100644 --- a/ldm/models/diffusion/ksampler.py +++ b/ldm/models/diffusion/ksampler.py @@ -12,22 +12,6 @@ from ldm.modules.diffusionmodules.util import ( extract_into_tensor, ) -def make_cond_in(uncond, cond): - if isinstance(cond, dict): - assert isinstance(uncond, dict) - cond_in = dict() - for k in cond: - if isinstance(cond[k], list): - cond_in[k] = [ - torch.cat([uncond[k][i], cond[k][i]]) - for i in range(len(cond[k])) - ] - else: - cond_in[k] = torch.cat([uncond[k], cond[k]]) - else: - cond_in = torch.cat([uncond, cond]) - return cond_in - def cfg_apply_threshold(result, threshold = 0.0, scale = 0.7): if threshold <= 0.0: return result @@ -43,9 +27,10 @@ def cfg_apply_threshold(result, threshold = 0.0, scale = 0.7): class CFGDenoiser(nn.Module): - def __init__(self, model, threshold = 0, warmup = 0): + def __init__(self, sampler, threshold = 0, warmup = 0): super().__init__() - self.inner_model = model + self.inner_model = sampler.model + self.sampler = sampler self.threshold = threshold self.warmup_max = warmup self.warmup = max(warmup / 10, 1) @@ -53,7 +38,7 @@ class CFGDenoiser(nn.Module): def forward(self, x, sigma, uncond, cond, cond_scale): x_in = torch.cat([x] * 2) sigma_in = torch.cat([sigma] * 2) - cond_in = make_cond_in(uncond,cond) + cond_in = self.sampler.make_cond_in(uncond,cond) uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2) if self.warmup < self.warmup_max: thresh = max(1, 1 + (self.threshold - 1) * (self.warmup / self.warmup_max)) @@ -80,7 +65,7 @@ class KSampler(Sampler): def forward(self, x, sigma, uncond, cond, cond_scale): x_in = torch.cat([x] * 2) sigma_in = torch.cat([sigma] * 2) - cond_in = make_cond_in(uncond, cond) + cond_in = self.make_cond_in(uncond, cond) uncond, cond = self.inner_model( x_in, sigma_in, cond=cond_in ).chunk(2) @@ -209,7 +194,7 @@ class KSampler(Sampler): else: x = torch.randn([batch_size, *shape], device=self.device) * sigmas[0] - model_wrap_cfg = CFGDenoiser(self.model, threshold=threshold, warmup=max(0.8*S,S-10)) + model_wrap_cfg = CFGDenoiser(self, threshold=threshold, warmup=max(0.8*S,S-10)) extra_args = { 'cond': conditioning, 'uncond': unconditional_conditioning, diff --git a/ldm/models/diffusion/plms.py b/ldm/models/diffusion/plms.py index 9e722eb932..4261f549d2 100644 --- a/ldm/models/diffusion/plms.py +++ b/ldm/models/diffusion/plms.py @@ -45,7 +45,7 @@ class PLMSSampler(Sampler): else: x_in = torch.cat([x] * 2) t_in = torch.cat([t] * 2) - c_in = torch.cat([unconditional_conditioning, c]) + c_in = self.make_cond_in(unconditional_conditioning, c) e_t_uncond, e_t = self.model.apply_model( x_in, t_in, c_in ).chunk(2) diff --git a/ldm/models/diffusion/sampler.py b/ldm/models/diffusion/sampler.py index 9e57bc25d4..fd7ba106c1 100644 --- a/ldm/models/diffusion/sampler.py +++ b/ldm/models/diffusion/sampler.py @@ -439,3 +439,24 @@ class Sampler(object): def conditioning_key(self)->str: return self.model.model.conditioning_key + + def make_cond_in(self, uncond, cond): + ''' + This handles the choice between a conditional conditioning + that is a tensor (used by cross attention) vs one that is a dict + used by 'hybrid' + ''' + if isinstance(cond, dict): + assert isinstance(uncond, dict) + cond_in = dict() + for k in cond: + if isinstance(cond[k], list): + cond_in[k] = [ + torch.cat([uncond[k][i], cond[k][i]]) + for i in range(len(cond[k])) + ] + else: + cond_in[k] = torch.cat([uncond[k], cond[k]]) + else: + cond_in = torch.cat([uncond, cond]) + return cond_in From e33971fe2cb046d08548366bc775a4091b0739ea Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 25 Oct 2022 11:42:30 -0400 Subject: [PATCH 08/15] plms works, bugs quashed - The plms sampler now works with custom inpainting model - Quashed bug that was causing generation on normal models to fail (oops!) - Can now generate non-square images with custom inpainting model Credits for advice and assistance during porting: @any-winter-4079 (http://github.com/any-winter-4079) @db3000 (Danny Beer http://github.com/db3000) --- ldm/invoke/generator/base.py | 7 +++---- ldm/invoke/generator/omnibus.py | 4 ++-- ldm/models/diffusion/ksampler.py | 27 ++++++--------------------- ldm/models/diffusion/plms.py | 2 +- ldm/models/diffusion/sampler.py | 21 +++++++++++++++++++++ 5 files changed, 33 insertions(+), 28 deletions(-) diff --git a/ldm/invoke/generator/base.py b/ldm/invoke/generator/base.py index c70924449b..03f066323c 100644 --- a/ldm/invoke/generator/base.py +++ b/ldm/invoke/generator/base.py @@ -60,11 +60,10 @@ class Generator(): first_seed = seed seed, initial_noise = self.generate_initial_noise(seed, width, height) - scope = (scope(self.model.device.type), self.model.ema_scope()) if sampler.conditioning_key() not in ('hybrid','concat') else scope(self.model.device.type) - - with scope: + # There used to be an additional self.model.ema_scope() here, but it breaks + # the inpaint-1.5 model. Not sure what it did.... ? + with scope(self.model.device.type): for n in trange(iterations, desc='Generating'): - print('DEBUG: in iterations loop() called') x_T = None if self.variation_amount > 0: seed_everything(seed) diff --git a/ldm/invoke/generator/omnibus.py b/ldm/invoke/generator/omnibus.py index 99fe046654..c8de01addb 100644 --- a/ldm/invoke/generator/omnibus.py +++ b/ldm/invoke/generator/omnibus.py @@ -67,8 +67,8 @@ class Omnibus(Img2Img,Txt2Img): t_enc = int(strength * steps) else: # txt2img - init_image = torch.zeros(1, 3, width, height, device=self.model.device) - mask_image = torch.ones(1, 1, width, height, device=self.model.device) + init_image = torch.zeros(1, 3, height, width, device=self.model.device) + mask_image = torch.ones(1, 1, height, width, device=self.model.device) masked_image = init_image model = self.model diff --git a/ldm/models/diffusion/ksampler.py b/ldm/models/diffusion/ksampler.py index 59a3bebe4d..5f223cdf46 100644 --- a/ldm/models/diffusion/ksampler.py +++ b/ldm/models/diffusion/ksampler.py @@ -12,22 +12,6 @@ from ldm.modules.diffusionmodules.util import ( extract_into_tensor, ) -def make_cond_in(uncond, cond): - if isinstance(cond, dict): - assert isinstance(uncond, dict) - cond_in = dict() - for k in cond: - if isinstance(cond[k], list): - cond_in[k] = [ - torch.cat([uncond[k][i], cond[k][i]]) - for i in range(len(cond[k])) - ] - else: - cond_in[k] = torch.cat([uncond[k], cond[k]]) - else: - cond_in = torch.cat([uncond, cond]) - return cond_in - def cfg_apply_threshold(result, threshold = 0.0, scale = 0.7): if threshold <= 0.0: return result @@ -43,9 +27,10 @@ def cfg_apply_threshold(result, threshold = 0.0, scale = 0.7): class CFGDenoiser(nn.Module): - def __init__(self, model, threshold = 0, warmup = 0): + def __init__(self, sampler, threshold = 0, warmup = 0): super().__init__() - self.inner_model = model + self.inner_model = sampler.model + self.sampler = sampler self.threshold = threshold self.warmup_max = warmup self.warmup = max(warmup / 10, 1) @@ -53,7 +38,7 @@ class CFGDenoiser(nn.Module): def forward(self, x, sigma, uncond, cond, cond_scale): x_in = torch.cat([x] * 2) sigma_in = torch.cat([sigma] * 2) - cond_in = make_cond_in(uncond,cond) + cond_in = self.sampler.make_cond_in(uncond,cond) uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2) if self.warmup < self.warmup_max: thresh = max(1, 1 + (self.threshold - 1) * (self.warmup / self.warmup_max)) @@ -80,7 +65,7 @@ class KSampler(Sampler): def forward(self, x, sigma, uncond, cond, cond_scale): x_in = torch.cat([x] * 2) sigma_in = torch.cat([sigma] * 2) - cond_in = make_cond_in(uncond, cond) + cond_in = self.make_cond_in(uncond, cond) uncond, cond = self.inner_model( x_in, sigma_in, cond=cond_in ).chunk(2) @@ -209,7 +194,7 @@ class KSampler(Sampler): else: x = torch.randn([batch_size, *shape], device=self.device) * sigmas[0] - model_wrap_cfg = CFGDenoiser(self.model, threshold=threshold, warmup=max(0.8*S,S-10)) + model_wrap_cfg = CFGDenoiser(self, threshold=threshold, warmup=max(0.8*S,S-10)) extra_args = { 'cond': conditioning, 'uncond': unconditional_conditioning, diff --git a/ldm/models/diffusion/plms.py b/ldm/models/diffusion/plms.py index 9e722eb932..4261f549d2 100644 --- a/ldm/models/diffusion/plms.py +++ b/ldm/models/diffusion/plms.py @@ -45,7 +45,7 @@ class PLMSSampler(Sampler): else: x_in = torch.cat([x] * 2) t_in = torch.cat([t] * 2) - c_in = torch.cat([unconditional_conditioning, c]) + c_in = self.make_cond_in(unconditional_conditioning, c) e_t_uncond, e_t = self.model.apply_model( x_in, t_in, c_in ).chunk(2) diff --git a/ldm/models/diffusion/sampler.py b/ldm/models/diffusion/sampler.py index 9e57bc25d4..fd7ba106c1 100644 --- a/ldm/models/diffusion/sampler.py +++ b/ldm/models/diffusion/sampler.py @@ -439,3 +439,24 @@ class Sampler(object): def conditioning_key(self)->str: return self.model.model.conditioning_key + + def make_cond_in(self, uncond, cond): + ''' + This handles the choice between a conditional conditioning + that is a tensor (used by cross attention) vs one that is a dict + used by 'hybrid' + ''' + if isinstance(cond, dict): + assert isinstance(uncond, dict) + cond_in = dict() + for k in cond: + if isinstance(cond[k], list): + cond_in[k] = [ + torch.cat([uncond[k][i], cond[k][i]]) + for i in range(len(cond[k])) + ] + else: + cond_in[k] = torch.cat([uncond[k], cond[k]]) + else: + cond_in = torch.cat([uncond, cond]) + return cond_in From 4352eb6628bcc0369a739c5b1055cc2c7e407da6 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 25 Oct 2022 13:17:06 -0400 Subject: [PATCH 09/15] stop crashes on non-square images --- ldm/invoke/generator/omnibus.py | 3 ++- ldm/invoke/restoration/outcrop.py | 7 ++++++- ldm/models/diffusion/sampler.py | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ldm/invoke/generator/omnibus.py b/ldm/invoke/generator/omnibus.py index c8de01addb..2fccd7c6f7 100644 --- a/ldm/invoke/generator/omnibus.py +++ b/ldm/invoke/generator/omnibus.py @@ -71,6 +71,8 @@ class Omnibus(Img2Img,Txt2Img): mask_image = torch.ones(1, 1, height, width, device=self.model.device) masked_image = init_image + height = init_image.shape[2] + width = init_image.shape[3] model = self.model def make_image(x_T): @@ -88,7 +90,6 @@ class Omnibus(Img2Img,Txt2Img): ) c = model.cond_stage_model.encode(batch["txt"]) - c_cat = list() for ck in model.concat_keys: cc = batch[ck].float() diff --git a/ldm/invoke/restoration/outcrop.py b/ldm/invoke/restoration/outcrop.py index 017d9de7e1..0c4831dd84 100644 --- a/ldm/invoke/restoration/outcrop.py +++ b/ldm/invoke/restoration/outcrop.py @@ -89,6 +89,9 @@ class Outcrop(object): def _extend(self,image:Image,pixels:int)-> Image: extended_img = Image.new('RGBA',(image.width,image.height+pixels)) + mask_height = pixels if self.generate.model.model.conditioning_key in ('hybrid','concat') \ + else pixels *2 + # first paste places old image at top of extended image, stretch # it, and applies a gaussian blur to it # take the top half region, stretch and paste it @@ -105,7 +108,9 @@ class Outcrop(object): # now make the top part transparent to use as a mask alpha = extended_img.getchannel('A') - alpha.paste(0,(0,0,extended_img.width,pixels*2)) + alpha.paste(0,(0,0,extended_img.width,mask_height)) extended_img.putalpha(alpha) + extended_img.save('outputs/curly_extended.png') + return extended_img diff --git a/ldm/models/diffusion/sampler.py b/ldm/models/diffusion/sampler.py index fd7ba106c1..4e9fce0102 100644 --- a/ldm/models/diffusion/sampler.py +++ b/ldm/models/diffusion/sampler.py @@ -265,6 +265,7 @@ class Sampler(object): ) if mask is not None: + print('DEBUG: in masking routine') assert x0 is not None img_orig = self.model.q_sample( x0, ts From 3c1ef48fe24c027131b7c846970bec963b6c2b69 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 25 Oct 2022 13:57:42 -0400 Subject: [PATCH 10/15] fix crash when doing img2img with ddim sampler and SD 1.5 --- ldm/models/diffusion/sampler.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/ldm/models/diffusion/sampler.py b/ldm/models/diffusion/sampler.py index 4e9fce0102..ccebed3bfc 100644 --- a/ldm/models/diffusion/sampler.py +++ b/ldm/models/diffusion/sampler.py @@ -319,20 +319,6 @@ class Sampler(object): init_latent = None, mask = None, ): - - print(f'DEBUG(sampler): cond = {cond}') - if cond is not None: - if isinstance(cond, dict): - ctmp = cond[list(cond.keys())[0]] - while isinstance(ctmp, list): - ctmp = ctmp[0] - cbs = ctmp.shape[0] - if cbs != batch_size: - print(f"Warning: Got {cbs} conds but batch-size is {batch_size}") - else: - if cond.shape[0] != batch_size: - print(f"Warning: Got {cond.shape[0]} conditionings but batch-size is {batch_size}") - timesteps = ( np.arange(self.ddpm_num_timesteps) if use_original_steps From ca2f579f434269a8061f3fa85c9b2772330cbd30 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 25 Oct 2022 15:56:07 -0400 Subject: [PATCH 11/15] prevent crash when providing empty quoted prompt ("") --- ldm/invoke/args.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index 2f8e2303cb..7068bd83c1 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -181,7 +181,9 @@ class Args(object): switches_started = False for element in elements: - if element[0] == '-' and not switches_started: + if len(element) == 0: # empty prompt + pass + elif element[0] == '-' and not switches_started: switches_started = True if switches_started: switches.append(element) From 8d5a2250117b8542bddce3e1733207382051f326 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 25 Oct 2022 17:26:00 -0400 Subject: [PATCH 12/15] allow for empty prompts (useful for inpaint removal) --- scripts/invoke.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/invoke.py b/scripts/invoke.py index faa85de80e..466536bc46 100644 --- a/scripts/invoke.py +++ b/scripts/invoke.py @@ -171,9 +171,9 @@ def main_loop(gen, opt): except (OSError, AttributeError, KeyError): pass - if len(opt.prompt) == 0: - print('\nTry again with a prompt!') - continue +# if len(opt.prompt) == 0: +# print('\nTry again with a prompt!') +# continue # width and height are set by model if not specified if not opt.width: From d3047c7cb096681226df5e2f5615923891af41e1 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 25 Oct 2022 22:44:42 -0400 Subject: [PATCH 13/15] do not encode init image in starting latent --- ldm/invoke/generator/omnibus.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/ldm/invoke/generator/omnibus.py b/ldm/invoke/generator/omnibus.py index 2fccd7c6f7..d841cac64f 100644 --- a/ldm/invoke/generator/omnibus.py +++ b/ldm/invoke/generator/omnibus.py @@ -64,7 +64,6 @@ class Omnibus(Img2Img,Txt2Img): mask_image = torch.ones(1, 1, init_image.shape[2], init_image.shape[3], device=self.model.device) # and the masked image is just a copy of the original masked_image = init_image - t_enc = int(strength * steps) else: # txt2img init_image = torch.zeros(1, 3, height, width, device=self.model.device) @@ -111,7 +110,7 @@ class Omnibus(Img2Img,Txt2Img): samples, _ = sampler.sample( batch_size = 1, - S = t_enc, + S = steps, x_T = x_T, conditioning = cond, shape = shape, @@ -145,7 +144,4 @@ class Omnibus(Img2Img,Txt2Img): return batch def get_noise(self, width:int, height:int): - if self.init_latent: - return super(Img2Img,self).get_noise(width,height) - else: - return super(Txt2Img,self).get_noise(width,height) + return super(Txt2Img,self).get_noise(width,height) From 906dafe3cd286a0c7fc079d1d37fcec6756e4aec Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 26 Oct 2022 00:18:31 -0400 Subject: [PATCH 14/15] make variations work with inpainting model --- ldm/invoke/generator/base.py | 5 ++++- ldm/invoke/generator/img2img.py | 2 +- ldm/invoke/generator/omnibus.py | 6 +++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/ldm/invoke/generator/base.py b/ldm/invoke/generator/base.py index e326bcfe8d..2e96c93cbb 100644 --- a/ldm/invoke/generator/base.py +++ b/ldm/invoke/generator/base.py @@ -6,6 +6,7 @@ import torch import numpy as np import random import os +import traceback from tqdm import tqdm, trange from PIL import Image, ImageFilter from einops import rearrange, repeat @@ -82,7 +83,9 @@ class Generator(): try: x_T = self.get_noise(width,height) except: - pass + print('** An error occurred while getting initial noise **') + print(traceback.format_exc()) + image = make_image(x_T) if self.safety_checker is not None: diff --git a/ldm/invoke/generator/img2img.py b/ldm/invoke/generator/img2img.py index 31c3ca256e..73eb2e6a06 100644 --- a/ldm/invoke/generator/img2img.py +++ b/ldm/invoke/generator/img2img.py @@ -14,7 +14,7 @@ from ldm.models.diffusion.ddim import DDIMSampler class Img2Img(Generator): def __init__(self, model, precision): super().__init__(model, precision) - self.init_latent = None # by get_noise() + self.init_latent = None # by get_noise() def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, conditioning,init_image,strength,step_callback=None,threshold=0.0,perlin=0.0,**kwargs): diff --git a/ldm/invoke/generator/omnibus.py b/ldm/invoke/generator/omnibus.py index d841cac64f..e0705ec397 100644 --- a/ldm/invoke/generator/omnibus.py +++ b/ldm/invoke/generator/omnibus.py @@ -70,6 +70,7 @@ class Omnibus(Img2Img,Txt2Img): mask_image = torch.ones(1, 1, height, width, device=self.model.device) masked_image = init_image + self.init_latent = init_image height = init_image.shape[2] width = init_image.shape[3] model = self.model @@ -144,4 +145,7 @@ class Omnibus(Img2Img,Txt2Img): return batch def get_noise(self, width:int, height:int): - return super(Txt2Img,self).get_noise(width,height) + if self.init_latent is not None: + height = self.init_latent.shape[2] + width = self.init_latent.shape[3] + return Txt2Img.get_noise(self,width,height) From b1da13a984b7e0c8e5d7fa09f250b355040db303 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 26 Oct 2022 08:29:56 -0400 Subject: [PATCH 15/15] minor cleanups - change default model back to 1.4 - remove --fnformat from canonicalized dream prompt arguments (not needed for image reproducibility) - add -tm to canonicalized dream prompt arguments (definitely needed for image reproducibility) --- configs/models.yaml | 2 +- ldm/invoke/args.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/configs/models.yaml b/configs/models.yaml index 40bdc88cd8..162da38da2 100644 --- a/configs/models.yaml +++ b/configs/models.yaml @@ -12,6 +12,7 @@ stable-diffusion-1.4: description: Stable Diffusion inference model version 1.4 width: 512 height: 512 + default: true inpainting-1.5: description: runwayML tuned inpainting model v1.5 weights: models/ldm/stable-diffusion-v1/sd-v1-5-inpainting.ckpt @@ -19,7 +20,6 @@ inpainting-1.5: # vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt width: 512 height: 512 - default: true stable-diffusion-1.5: config: configs/stable-diffusion/v1-inference.yaml weights: models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index 7068bd83c1..a57928e22f 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -219,7 +219,6 @@ class Args(object): switches.append(f'-W {a["width"]}') switches.append(f'-H {a["height"]}') switches.append(f'-C {a["cfg_scale"]}') - switches.append(f'--fnformat {a["fnformat"]}') if a['perlin'] > 0: switches.append(f'--perlin {a["perlin"]}') if a['threshold'] > 0: @@ -245,6 +244,8 @@ class Args(object): switches.append(f'-f {a["strength"]}') if a['inpaint_replace']: switches.append(f'--inpaint_replace') + if a['text_mask']: + switches.append(f'-tm {" ".join([str(u) for u in a["text_mask"]])}') else: switches.append(f'-A {a["sampler_name"]}')