diff --git a/docs/features/CLI.md b/docs/features/CLI.md index d809d48841..e50c5833c9 100644 --- a/docs/features/CLI.md +++ b/docs/features/CLI.md @@ -1,17 +1,30 @@ # **Interactive Command-Line Interface** -The `dream.py` script, located in `scripts/dream.py`, provides an interactive interface to image generation similar to the "dream mothership" bot that Stable AI provided on its Discord server. +The `dream.py` script, located in `scripts/dream.py`, provides an +interactive interface to image generation similar to the "dream +mothership" bot that Stable AI provided on its Discord server. -Unlike the txt2img.py and img2img.py scripts provided in the original CompViz/stable-diffusion source code repository, the time-consuming initialization of the AI model initialization only happens once. After that image generation -from the command-line interface is very fast. +Unlike the txt2img.py and img2img.py scripts provided in the original +CompViz/stable-diffusion source code repository, the time-consuming +initialization of the AI model initialization only happens once. After +that image generation from the command-line interface is very fast. -The script uses the readline library to allow for in-line editing, command history (up and down arrows), autocompletion, and more. To help keep track of which prompts generated which images, the script writes a log file of image names and prompts to the selected output directory. +The script uses the readline library to allow for in-line editing, +command history (up and down arrows), autocompletion, and more. To +help keep track of which prompts generated which images, the script +writes a log file of image names and prompts to the selected output +directory. -In addition, as of version 1.02, it also writes the prompt into the PNG file's metadata where it can be retrieved using scripts/images2prompt.py +In addition, as of version 1.02, it also writes the prompt into the +PNG file's metadata where it can be retrieved using +scripts/images2prompt.py The script is confirmed to work on Linux, Windows and Mac systems. -_Note:_ This script runs from the command-line or can be used as a Web application. The Web GUI is currently rudimentary, but a much better replacement is on its way. +_Note:_ This script runs from the command-line or can be used as a Web +application. The Web GUI is currently rudimentary, but a much better +replacement is on its way. + ``` (ldm) ~/stable-diffusion$ python3 ./scripts/dream.py @@ -183,6 +196,56 @@ well as the --mask (-M) argument: | --init_mask | -M | None |Path to an image the same size as the initial_image, with areas for inpainting made transparent.| +# Shortcuts + +Since one so frequently refers back to a previously-generated seed or +image, dream.py provides an easy shortcut that avoids having to cut +and paste these values. + +Here's how it works. Say you generated 6 images of a man-eating snail: + +~~~~ +dream> man-eating snail -n6 +... +>> Usage stats: +>> 6 image(s) generated in 79.85s +>> Max VRAM used for this generation: 3.36G. Current VRAM utilization:2.21G +>> Max VRAM used since script start: 3.36G +Outputs: +[1] outputs/img-samples/000210.1414805682.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S1414805682 +[2] outputs/img-samples/000210.3312885013.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S3312885013 +[3] outputs/img-samples/000210.1398528919.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S1398528919 +[4] outputs/img-samples/000210.92626031.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S92626031 +[5] outputs/img-samples/000210.1733666373.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S1733666373 +[6] outputs/img-samples/000210.2453524229.png: "man-eating snail" -s50 -W512 -H512 -C7.5 -Ak_lms -S2453524229 +~~~~ + +The last image generated (with seed 2453524229) looks really good. So let's +pick that one for variation generation. Instead of cutting and pasting +the argument -S2453524229, we can simply refer to the most recent seed as +-1, and write: + +~~~~ +dream> man-eating snail -v0.1 -n10 -S-1 +>> Reusing previous seed 2453524229 +...etc... +~~~~ + +You can use -2 to refer to the second to last seed, -3 to the third to +last, etc. It works with both individual images and grids. However, +the numbering system only extends across the last group of images +generated and doesn't reach back to earlier commands. + +The initial image (-I or --init_img) argument works in a similar +way. To use the second-to-most-recent snail image as the initial +image for an img2img render, you could refer to it as -I-2: + +~~~~ +dream> glowing science-fiction snail -I -2 -n4 +>> Reusing previous image outputs/img-samples/000213.2150458613.png +...etc... +~~~~ + # Command-line editing and completion If you are on a Macintosh or Linux machine, the command-line offers diff --git a/ldm/dream/devices.py b/ldm/dream/devices.py index 90bc9e97dd..3b85a7420c 100644 --- a/ldm/dream/devices.py +++ b/ldm/dream/devices.py @@ -13,8 +13,9 @@ def choose_torch_device() -> str: def choose_autocast_device(device): '''Returns an autocast compatible device from a torch device''' device_type = device.type # this returns 'mps' on M1 - # autocast only supports cuda or cpu - if device_type in ('cuda','cpu'): + if device_type == 'cuda': return device_type,autocast + elif device_type == 'cpu': + return device_type,nullcontext else: return 'cpu',nullcontext diff --git a/ldm/generate.py b/ldm/generate.py index 7aa93432fd..857f325a25 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -111,7 +111,6 @@ class Generate: height = 512, sampler_name = 'k_lms', ddim_eta = 0.0, # deterministic - precision = 'autocast', full_precision = False, strength = 0.75, # default in scripts/img2img.py seamless = False, @@ -129,7 +128,6 @@ class Generate: self.sampler_name = sampler_name self.grid = grid self.ddim_eta = ddim_eta - self.precision = precision self.full_precision = True if choose_torch_device() == 'mps' else full_precision self.strength = strength self.seamless = seamless diff --git a/ldm/modules/diffusionmodules/model.py b/ldm/modules/diffusionmodules/model.py index 970f6aad8f..a3598c40ef 100644 --- a/ldm/modules/diffusionmodules/model.py +++ b/ldm/modules/diffusionmodules/model.py @@ -121,30 +121,17 @@ class ResnetBlock(nn.Module): padding=0) def forward(self, x, temb): - h1 = x - h2 = self.norm1(h1) - del h1 - - h3 = nonlinearity(h2) - del h2 - - h4 = self.conv1(h3) - del h3 + h = self.norm1(x) + h = nonlinearity(h) + h = self.conv1(h) if temb is not None: - h4 = h4 + self.temb_proj(nonlinearity(temb))[:,:,None,None] + h = h + self.temb_proj(nonlinearity(temb))[:,:,None,None] - h5 = self.norm2(h4) - del h4 - - h6 = nonlinearity(h5) - del h5 - - h7 = self.dropout(h6) - del h6 - - h8 = self.conv2(h7) - del h7 + h = self.norm2(h) + h = nonlinearity(h) + h = self.dropout(h) + h = self.conv2(h) if self.in_channels != self.out_channels: if self.use_conv_shortcut: @@ -152,7 +139,7 @@ class ResnetBlock(nn.Module): else: x = self.nin_shortcut(x) - return x + h8 + return x + h class LinAttnBlock(LinearAttention): """to match AttnBlock usage""" @@ -209,8 +196,7 @@ class AttnBlock(nn.Module): h_ = torch.zeros_like(k, device=q.device) - device_type = 'mps' if q.device.type == 'mps' else 'cuda' - if device_type == 'cuda': + if q.device.type == 'cuda': stats = torch.cuda.memory_stats(q.device) mem_active = stats['active_bytes.all.current'] mem_reserved = stats['reserved_bytes.all.current'] @@ -599,22 +585,16 @@ class Decoder(nn.Module): temb = None # z to block_in - h1 = self.conv_in(z) + h = self.conv_in(z) # middle - h2 = self.mid.block_1(h1, temb) - del h1 - - h3 = self.mid.attn_1(h2) - del h2 - - h = self.mid.block_2(h3, temb) - del h3 + h = self.mid.block_1(h, temb) + h = self.mid.attn_1(h) + h = self.mid.block_2(h, temb) # prepare for up sampling - device_type = 'mps' if h.device.type == 'mps' else 'cuda' gc.collect() - if device_type == 'cuda': + if h.device.type == 'cuda': torch.cuda.empty_cache() # upsampling @@ -622,33 +602,19 @@ class Decoder(nn.Module): for i_block in range(self.num_res_blocks+1): h = self.up[i_level].block[i_block](h, temb) if len(self.up[i_level].attn) > 0: - t = h - h = self.up[i_level].attn[i_block](t) - del t - + h = self.up[i_level].attn[i_block](h) if i_level != 0: - t = h - h = self.up[i_level].upsample(t) - del t + h = self.up[i_level].upsample(h) # end if self.give_pre_end: return h - h1 = self.norm_out(h) - del h - - h2 = nonlinearity(h1) - del h1 - - h = self.conv_out(h2) - del h2 - + h = self.norm_out(h) + h = nonlinearity(h) + h = self.conv_out(h) if self.tanh_out: - t = h - h = torch.tanh(t) - del t - + h = torch.tanh(h) return h