diff --git a/README.md b/README.md index 2af4a2a53a..6260032d3f 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,36 @@ You may also pass a -v option to generate count variants on the original passing the first generated image back into img2img the requested number of times. It generates interesting variants. +## GFPGAN Support + +This script also provides the ability to invoke GFPGAN after image generation. Doing so will enhance faces +and optionally upscale the image to a higher resolution. + +To use the ability, clone the [GFPGAN repository](https://github.com/TencentARC/GFPGAN) and follow their +installation instructions. By default, we expect GFPGAN to be installed in a 'gfpgan' sibling directory. + +You may also want to install Real-ESRGAN, if you want to enhance non-face regions in the image by installing +the pip Real-ESRGAN package. +``` +pip install realesrgan + +``` + +Now, you can run this script by adding the --gfpgan option. Any issues with GFPGAN will be reported on initialization. + +When generating prompts, add a -G or --gfpgan_strenth option to control the strength of the GFPGAN enhancement. +0.0 is no enhancement, 1.0 is maximum enhancement. + +So for instance, to apply the maximum strength: +~~~~ +dream> a man wearing a pineapple hat -G 1 +~~~~ + +That's it! + +There's also a bunch of options to control GFPGAN settings when starting the script for different configs that you can +read about in the help text. This will let you control where GFPGAN is installed, if upsampling is enapled, the upsampler to use and the model path. + ## Barebones Web Server As of version 1.10, this distribution comes with a bare bones web server (see screenshot). To use it, diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index 0ec3d60d98..ab40330e43 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -132,7 +132,8 @@ The vast majority of these arguments default to reasonable values. strength=0.75, # default in scripts/img2img.py embedding_path=None, latent_diffusion_weights=False, # just to keep track of this parameter when regenerating prompt - device='cuda' + device='cuda', + gfpgan=None, ): self.batch_size = batch_size self.iterations = iterations @@ -154,6 +155,7 @@ The vast majority of these arguments default to reasonable values. self.sampler = None self.latent_diffusion_weights=latent_diffusion_weights self.device = device + self.gfpgan = gfpgan if seed is None: self.seed = self._new_seed() else: @@ -199,6 +201,7 @@ The vast majority of these arguments default to reasonable values. # these are specific to img2img init_img=None, strength=None, + gfpgan_strength=None, variants=None, **args): # eat up additional cruft ''' @@ -214,6 +217,7 @@ The vast majority of these arguments default to reasonable values. cfg_scale // how strongly the prompt influences the image (7.5) (must be >1) init_img // path to an initial image - its dimensions override width and height strength // strength for noising/unnoising init_img. 0.0 preserves image exactly, 1.0 replaces it completely + gfpgan_strength // strength for GFPGAN. 0.0 preserves image exactly, 1.0 replaces it completely ddim_eta // image randomness (eta=0.0 means the same seed always produces the same image) variants // if >0, the 1st generated image will be passed back to img2img to generate the requested number of variants callback // a function or method that will be called each time an image is generated @@ -260,7 +264,8 @@ The vast majority of these arguments default to reasonable values. batch_size=batch_size,iterations=iterations, steps=steps,seed=seed,cfg_scale=cfg_scale,ddim_eta=ddim_eta, skip_normalize=skip_normalize, - init_img=init_img,strength=strength,variants=variants, + init_img=init_img,strength=strength, + gfpgan_strength=gfpgan_strength,variants=variants, callback=image_callback) else: results = self._txt2img(prompt, @@ -268,6 +273,7 @@ The vast majority of these arguments default to reasonable values. batch_size=batch_size,iterations=iterations, steps=steps,seed=seed,cfg_scale=cfg_scale,ddim_eta=ddim_eta, skip_normalize=skip_normalize, + gfpgan_strength=gfpgan_strength, width=width,height=height, callback=image_callback) toc = time.time() @@ -280,6 +286,7 @@ The vast majority of these arguments default to reasonable values. batch_size,iterations, steps,seed,cfg_scale,ddim_eta, skip_normalize, + gfpgan_strength, width,height, callback): # the callback is called each time a new Image is generated """ @@ -335,6 +342,8 @@ The vast majority of these arguments default to reasonable values. for x_sample in x_samples_ddim: x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c') image = Image.fromarray(x_sample.astype(np.uint8)) + if gfpgan_strength > 0: + image = self._run_gfpgan(image, gfpgan_strength) images.append([image,seed]) if callback is not None: callback(image,seed) @@ -354,6 +363,7 @@ The vast majority of these arguments default to reasonable values. batch_size,iterations, steps,seed,cfg_scale,ddim_eta, skip_normalize, + gfpgan_strength, init_img,strength,variants, callback): """ @@ -419,6 +429,8 @@ The vast majority of these arguments default to reasonable values. for x_sample in x_samples: x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c') image = Image.fromarray(x_sample.astype(np.uint8)) + if gfpgan_strength > 0: + image = self._run_gfpgan(image, gfpgan_strength) images.append([image,seed]) if callback is not None: callback(image,seed) @@ -549,3 +561,18 @@ The vast majority of these arguments default to reasonable values. weights.append(1.0) remaining = 0 return prompts, weights + + def _run_gfpgan(self, image, strength): + if (self.gfpgan is None): + print(f"GFPGAN not initialized, it must be loaded via the --gfpgan argument") + return image + + image = image.convert("RGB") + + cropped_faces, restored_faces, restored_img = self.gfpgan.enhance(np.array(image, dtype=np.uint8), has_aligned=False, only_center_face=False, paste_back=True) + res = Image.fromarray(restored_img) + + if strength < 1.0: + res = Image.blend(image, res, strength) + + return res diff --git a/scripts/dream.py b/scripts/dream.py index 24dac5b927..c49340d655 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -6,6 +6,7 @@ import shlex import os import sys import copy + from ldm.dream_util import Completer,PngWriter,PromptFormatter debugging = False @@ -68,6 +69,28 @@ def main(): # preload the model t2i.load_model() + + # load GFPGAN if requested + if opt.use_gfpgan: + print("\n* --gfpgan was specified, loading gfpgan...") + try: + model_path = os.path.join(opt.gfpgan_dir, opt.gfpgan_model_path) + if not os.path.isfile(model_path): + raise Exception("GFPGAN model not found at path "+model_path) + + sys.path.append(os.path.abspath(opt.gfpgan_dir)) + from gfpgan import GFPGANer + + bg_upsampler = None + if opt.gfpgan_bg_upsampler is not None: + bg_upsampler = load_gfpgan_bg_upsampler(opt.gfpgan_bg_upsampler, opt.gfpgan_bg_tile) + + t2i.gfpgan = GFPGANer(model_path=model_path, upscale=opt.gfpgan_upscale, arch='clean', channel_multiplier=2, bg_upsampler=bg_upsampler) + except Exception: + import traceback + print("Error loading GFPGAN:", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + print("\n* Initialization done! Awaiting your command (-h for help, 'q' to quit, 'cd' to change output dir, 'pwd' to print output dir)...") log_path = os.path.join(opt.outdir,'dream_log.txt') @@ -183,6 +206,32 @@ def main_loop(t2i,outdir,parser,log,infile): print("goodbye!") +def load_gfpgan_bg_upsampler(bg_upsampler, bg_tile=400): + import torch + + if bg_upsampler == 'realesrgan': + if not torch.cuda.is_available(): # CPU + import warnings + warnings.warn('The unoptimized RealESRGAN is slow on CPU. We do not use it. ' + 'If you really want to use it, please modify the corresponding codes.') + bg_upsampler = None + else: + from basicsr.archs.rrdbnet_arch import RRDBNet + from realesrgan import RealESRGANer + model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2) + bg_upsampler = RealESRGANer( + scale=2, + model_path='https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth', + model=model, + tile=bg_tile, + tile_pad=10, + pre_pad=0, + half=True) # need to set False in CPU mode + else: + bg_upsampler = None + + return bg_upsampler + # variant generation is going to be superseded by a generalized # "prompt-morph" functionality # def generate_variants(t2i,outdir,opt,previous_gens): @@ -261,6 +310,31 @@ def create_argv_parser(): type=str, default="cuda", help="device to run stable diffusion on. defaults to cuda `torch.cuda.current_device()` if avalible") + # GFPGAN related args + parser.add_argument('--gfpgan', + dest='use_gfpgan', + action='store_true', + help="load gfpgan for use in the dreambot. Note: Enabling GFPGAN will require more GPU memory") + parser.add_argument("--gfpgan_upscale", + type=int, + default=2, + help="The final upsampling scale of the image. Default: 2. Only used if --gfpgan is specified") + parser.add_argument("--gfpgan_bg_upsampler", + type=str, + default='realesrgan', + help="Background upsampler. Default: None. Options: realesrgan, none. Only used if --gfpgan is specified") + parser.add_argument("--gfpgan_bg_tile", + type=int, + default=400, + help="Tile size for background sampler, 0 for no tile during testing. Default: 400. Only used if --gfpgan is specified") + parser.add_argument("--gfpgan_model_path", + type=str, + default='experiments/pretrained_models/GFPGANv1.3.pth', + help="indicates the path to the GFPGAN model, relative to --gfpgan_dir. Only used if --gfpgan is specified") + parser.add_argument("--gfpgan_dir", + type=str, + default='../gfpgan', + help="indicates the directory containing the GFPGAN code. Only used if --gfpgan is specified") return parser @@ -278,6 +352,7 @@ def create_cmd_parser(): parser.add_argument('-i','--individual',action='store_true',help="generate individual files (default)") parser.add_argument('-I','--init_img',type=str,help="path to input image for img2img mode (supersedes width and height)") parser.add_argument('-f','--strength',default=0.75,type=float,help="strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely") + parser.add_argument('-G','--gfpgan_strength', default=0.5, type=float, help="The strength at which to apply the GFPGAN model to the result, in order to improve faces.") # variants is going to be superseded by a generalized "prompt-morph" function # parser.add_argument('-v','--variants',type=int,help="in img2img mode, the first generated image will get passed back to img2img to generate the requested number of variants") parser.add_argument('-x','--skip_normalize',action='store_true',help="skip subprompt weight normalization")