diff --git a/README.md b/README.md index 843c2dcb44..a206bc8d44 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,7 @@ you can try starting `dream.py` with the `--precision=float32` flag: - [Interactive Command Line Interface](docs/features/CLI.md) - [Image To Image](docs/features/IMG2IMG.md) - [Inpainting Support](docs/features/INPAINTING.md) +- [Outpainting Support](docs/features/OUTPAINTING.md) - [GFPGAN and Real-ESRGAN Support](docs/features/UPSCALE.md) - [Seamless Tiling](docs/features/OTHER.md#seamless-tiling) - [Google Colab](docs/features/OTHER.md#google-colab) @@ -157,7 +158,7 @@ For older changelogs, please visit the **[CHANGELOG](docs/features/CHANGELOG.md) Please check out our **[Q&A](docs/help/TROUBLESHOOT.md)** to get solutions for common installation problems and other issues. -### Contributing +# Contributing Anyone who wishes to contribute to this project, whether documentation, features, bug fixes, code cleanup, testing, or code reviews, is very much encouraged to do so. If you are unfamiliar with how diff --git a/ldm/dream/args.py b/ldm/dream/args.py index 03b931808f..eb1913d1cf 100644 --- a/ldm/dream/args.py +++ b/ldm/dream/args.py @@ -74,9 +74,10 @@ To retrieve a (series of) opt objects corresponding to the metadata, do this: opt_list = metadata_loads(metadata) The metadata should be pulled out of the PNG image. pngwriter has a method -retrieve_metadata that will do this. - +retrieve_metadata that will do this, or you can do it in one swell foop +with metadata_from_png(): + opt_list = metadata_from_png('/path/to/image_file.png') """ import argparse @@ -87,6 +88,7 @@ import hashlib import os import copy import base64 +import ldm.dream.pngwriter from ldm.dream.conditioning import split_weighted_subprompts SAMPLER_CHOICES = [ @@ -208,10 +210,16 @@ class Args(object): # esrgan-specific parameters if a['upscale']: switches.append(f'-U {" ".join([str(u) for u in a["upscale"]])}') + + # embiggen parameters if a['embiggen']: switches.append(f'--embiggen {" ".join([str(u) for u in a["embiggen"]])}') if a['embiggen_tiles']: switches.append(f'--embiggen_tiles {" ".join([str(u) for u in a["embiggen_tiles"]])}') + + # outpainting parameters + if a['out_direction']: + switches.append(f'-D {" ".join([str(u) for u in a["out_direction"]])}') if a['with_variations']: formatted_variations = ','.join(f'{seed}:{weight}' for seed, weight in (a["with_variations"])) switches.append(f'-V {formatted_variations}') @@ -546,6 +554,14 @@ class Args(object): help='Strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely', default=0.75, ) + img2img_group.add_argument( + '-D', + '--out_direction', + nargs='+', + type=str, + metavar=('direction', 'pixels'), + help='Direction to extend the given image (left|right|top|bottom). If a distance pixel value is not specified it defaults to half the image size' + ) postprocessing_group.add_argument( '-ft', '--facetool', @@ -710,6 +726,15 @@ def metadata_dumps(opt, return metadata +def metadata_from_png(png_file_path): + ''' + Given the path to a PNG file created by dream.py, retrieves + an Args object containing the image metadata + ''' + meta = ldm.dream.pngwriter.retrieve_metadata(png_file_path) + opts = metadata_loads(meta) + return opts[0] + def metadata_loads(metadata): ''' Takes the dictionary corresponding to RFC266 (https://github.com/lstein/stable-diffusion/issues/266) diff --git a/ldm/generate.py b/ldm/generate.py index 30c5048084..e0468434ea 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -27,8 +27,8 @@ from ldm.util import instantiate_from_config from ldm.models.diffusion.ddim import DDIMSampler from ldm.models.diffusion.plms import PLMSSampler from ldm.models.diffusion.ksampler import KSampler -from ldm.dream.pngwriter import PngWriter, retrieve_metadata -from ldm.dream.args import metadata_loads +from ldm.dream.pngwriter import PngWriter +from ldm.dream.args import metadata_from_png from ldm.dream.image_util import InitImageResizer from ldm.dream.devices import choose_torch_device, choose_precision from ldm.dream.conditioning import get_uc_and_c @@ -276,8 +276,9 @@ class Generate: strength = None, init_color = None, # these are specific to embiggen (which also relies on img2img args) - embiggen=None, - embiggen_tiles=None, + embiggen = None, + embiggen_tiles = None, + out_direction = None, # these are specific to GFPGAN/ESRGAN facetool = None, gfpgan_strength = 0, @@ -388,9 +389,14 @@ class Generate: log_tokens =self.log_tokenization ) - (init_image, mask_image) = self._make_images( - init_img, init_mask, width, height, fit) - + init_image,mask_image = self._make_images( + init_img, + init_mask, + width, + height, + fit=fit, + out_direction=out_direction, + ) if (init_image is not None) and (mask_image is not None): generator = self._make_inpaint() elif (embiggen != None or embiggen_tiles != None): @@ -469,16 +475,17 @@ class Generate: ) return results - # this needs to be generalized to all sorts of postprocessors, but for now - # sufficient to support most use cases + # this needs to be generalized to all sorts of postprocessors, which should be wrapped + # in a nice harmonized call signature. For now we have a bunch of if/elses! def apply_postprocessor( self, image_path, - tool = 'gfpgan', # one of 'upscale', 'gfpgan', 'codeformer', or 'embiggen' + tool = 'gfpgan', # one of 'upscale', 'gfpgan', 'codeformer', 'outpaint', or 'embiggen' gfpgan_strength = 0.0, codeformer_fidelity = 0.75, - save_original = True, # to get new name upscale = None, + out_direction = None, + save_original = True, # to get new name callback = None, opt = None, ): @@ -489,8 +496,7 @@ class Generate: image_metadata = None prompt = None try: - meta = retrieve_metadata(image_path) - args = metadata_loads(meta) + args = metadata_from_png(image_path) if len(args) > 1: print("* Can't postprocess a grid") return @@ -556,22 +562,56 @@ class Generate: embiggen_tiles = opt.embiggen_tiles, image_callback = callback, ) - + elif tool == 'outpaint': + oldargs = metadata_from_png(image_path) + opt.strength = 0.83 + opt.init_img = image_path + return self.prompt2image( + oldargs.prompt, + out_direction = opt.out_direction, + sampler = self.sampler, + steps = opt.steps, + cfg_scale = opt.cfg_scale, + ddim_eta = self.ddim_eta, + conditioning= get_uc_and_c( + oldargs.prompt, model =self.model, + skip_normalize=opt.skip_normalize, + log_tokens =opt.log_tokenization + ), + width = opt.width, + height = opt.height, + init_img = image_path, # not the Image! (sigh) + strength = opt.strength, + image_callback = callback, + ) else: print(f'* postprocessing tool {tool} is not yet supported') return None - def _make_images(self, img_path, mask_path, width, height, fit=False): - init_image = None - init_mask = None + def _make_images( + self, + img_path, + mask_path, + width, + height, + fit=False, + out_direction=None, + ): + init_image = None + init_mask = None if not img_path: return None, None - image = self._load_img(img_path, width, height, - fit=fit) # this returns an Image - # this returns a torch tensor - init_image = self._create_init_image(image) + image = self._load_img( + img_path, + width, + height, + fit=fit + ) # this returns an Image + if out_direction: + image = self._create_outpaint_image(image, out_direction) + init_image = self._create_init_image(image) # this returns a torch tensor # if image has a transparent area and no mask was provided, then try to generate mask if self._has_transparency(image) and not mask_path: @@ -789,6 +829,7 @@ class Generate: return model def _load_img(self, path, width, height, fit=False): + print(f'DEBUG: path = {path}') assert os.path.exists(path), f'>> {path}: File not found' # with Image.open(path) as img: @@ -815,6 +856,66 @@ class Generate: image = 2.0 * image - 1.0 return image.to(self.device) + # TODO: outpainting is a post-processing application and should be made to behave + # like the other ones. + def _create_outpaint_image(self, image, direction_args): + assert len(direction_args) in [1, 2], 'Direction (-D) must have exactly one or two arguments.' + + if len(direction_args) == 1: + direction = direction_args[0] + pixels = None + elif len(direction_args) == 2: + direction = direction_args[0] + pixels = int(direction_args[1]) + + assert direction in ['top', 'left', 'bottom', 'right'], 'Direction (-D) must be one of "top", "left", "bottom", "right"' + + image = image.convert("RGBA") + # we always extend top, but rotate to extend along the requested side + if direction == 'left': + image = image.transpose(Image.Transpose.ROTATE_270) + elif direction == 'bottom': + image = image.transpose(Image.Transpose.ROTATE_180) + elif direction == 'right': + image = image.transpose(Image.Transpose.ROTATE_90) + + pixels = image.height//2 if pixels is None else int(pixels) + assert 0 < pixels < image.height, 'Direction (-D) pixels length must be in the range 0 - image.size' + + # the top part of the image is taken from the source image mirrored + # coordinates (0,0) are the upper left corner of an image + top = image.transpose(Image.Transpose.FLIP_TOP_BOTTOM).convert("RGBA") + top = top.crop((0, top.height - pixels, top.width, top.height)) + + # setting all alpha of the top part to 0 + alpha = top.getchannel("A") + alpha.paste(0, (0, 0, top.width, top.height)) + top.putalpha(alpha) + + # taking the bottom from the original image + bottom = image.crop((0, 0, image.width, image.height - pixels)) + + new_img = image.copy() + new_img.paste(top, (0, 0)) + new_img.paste(bottom, (0, pixels)) + + # create a 10% dither in the middle + dither = min(image.height//10, pixels) + for x in range(0, image.width, 2): + for y in range(pixels - dither, pixels + dither): + (r, g, b, a) = new_img.getpixel((x, y)) + new_img.putpixel((x, y), (r, g, b, 0)) + + # let's rotate back again + if direction == 'left': + new_img = new_img.transpose(Image.Transpose.ROTATE_90) + elif direction == 'bottom': + new_img = new_img.transpose(Image.Transpose.ROTATE_180) + elif direction == 'right': + new_img = new_img.transpose(Image.Transpose.ROTATE_270) + + return new_img + def _create_init_mask(self, image): # convert into a black/white mask image = self._image_to_mask(image) diff --git a/scripts/dream.py b/scripts/dream.py index 53eda84b96..dc1757dce2 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -4,11 +4,12 @@ import os import re import sys +import shlex import copy import warnings import time import ldm.dream.readline -from ldm.dream.args import Args, metadata_dumps +from ldm.dream.args import Args, metadata_dumps, metadata_from_png from ldm.dream.pngwriter import PngWriter from ldm.dream.server import DreamServer, ThreadingDreamServer from ldm.dream.image_util import make_grid @@ -166,6 +167,17 @@ def main_loop(gen, opt, infile): if opt.parse_cmd(command) is None: continue + + if opt.init_img: + try: + oldargs = metadata_from_png(opt.init_img) + opt.prompt = oldargs.prompt + print(f'>> Retrieved old prompt "{opt.prompt}" from {opt.init_img}') + except AttributeError: + pass + except KeyError: + pass + if len(opt.prompt) == 0: print('\nTry again with a prompt!') continue @@ -197,7 +209,9 @@ def main_loop(gen, opt, infile): opt.seed = None continue - # TODO - move this into a module + if opt.strength is None: + opt.strength = 0.75 if opt.out_direction is None else 0.83 + if opt.with_variations is not None: # shotgun parsing, woo parts = [] @@ -347,7 +361,15 @@ def do_postprocess (gen, opt, callback): print(f'* file {file_path} does not exist') return - tool = opt.facetool if opt.gfpgan_strength > 0 else ('embiggen' if opt.embiggen else 'upscale') + tool=None + if opt.gfpgan_strength > 0: + tool = opt.facetool + elif opt.embiggen: + tool = 'embiggen' + elif opt.upscale: + tool = 'upscale' + elif opt.out_direction: + tool = 'outpaint' opt.save_original = True # do not overwrite old image! return gen.apply_postprocessor( image_path = opt.prompt, @@ -356,6 +378,7 @@ def do_postprocess (gen, opt, callback): codeformer_fidelity = opt.codeformer_fidelity, save_original = opt.save_original, upscale = opt.upscale, + out_direction = opt.out_direction, callback = callback, opt = opt, ) @@ -415,5 +438,16 @@ def dream_server_loop(gen, host, port, outdir, gfpgan): dream_server.server_close() +def write_log_message(results, log_path): + """logs the name of the output image, prompt, and prompt args to the terminal and log file""" + global output_cntr + log_lines = [f'{path}: {prompt}\n' for path, prompt in results] + for l in log_lines: + output_cntr += 1 + print(f'[{output_cntr}] {l}',end='') + + with open(log_path, 'a', encoding='utf-8') as file: + file.writelines(log_lines) + if __name__ == '__main__': main()